def _save_data(url, session, db): """Store data into movie_new for tweeting message later""" with sqlite3.connect(db) as conn: conn.text_factory = str cursor = conn.cursor() insert = """INSERT INTO movie_new (title, download_url, movie_url, douban_url, douban_title, douban_id, lpic_url, download_url_md5, upload_date) VALUES (?,?,?,?,?,?,?,?,?)""" query = """SELECT * from movie_new WHERE download_url_md5=?""" for item in parse_sourcelist(session=session, url=url): movie_url = item title, upload_date, download_url = parse_detail(item) download_url_md5 = md5(download_url).hexdigest() douban_title, douban_url, douban_id, lpic_url = \ parse_douban(title, api_key=API_KEY) data = (title, download_url, movie_url, douban_url, douban_title,\ douban_id, lpic_url, download_url_md5, upload_date) #test = datetime.strptime('2013-05-28', '%Y-%m-%d').date() == \ # datetime.strptime(upload_date, '%Y-%m-%d').date() #if test and not cursor.execute(query, (download_url_md5,)).fetchall(): if check_update(upload_date) and not cursor.execute(query, (download_url_md5, )).fetchall(): print "Insert data", douban_title, "into database" cursor.execute(insert, data) conn.commit() else: print "Can't insert data for duplicate content or no update"
def create_db(): with sqlite3.connect(_database) as conn: # print 'Creating schema' # with open(_schema, 'rt') as f: # schema = f.read() # conn.executescript(schema) conn.text_factory = str cursor = conn.cursor() f = open(_detail) for line in f.readlines()[1083:]: (title, download_url, movie_url) = line.split('|') try: (douban_title, douban_url, douban_id, lpic_url) = parse_douban(title, api_key=API_KEY) print title, '<><><>', douban_title data = (title, download_url, movie_url, douban_url,\ douban_title, douban_id, lpic_url) insert = """INSERT INTO movie (title, download_url, movie_url, douban_url, douban_title, douban_id, lpic_url) VALUES(?,?,?,?,?,?,?)""" cursor.execute(insert, data) #douban API request limit 40 request per second per IP time.sleep(1.5) conn.commit() except TypeError: print "%s not found in Douban" % title
def _save_data(url, session, db): """Store data into movie_new for tweeting message later""" with sqlite3.connect(db) as conn: conn.text_factory = str cursor = conn.cursor() insert = """INSERT INTO movie_new (title, download_url, movie_url, douban_url, douban_title, douban_id, lpic_url, download_url_md5, upload_date) VALUES (?,?,?,?,?,?,?,?,?)""" query = """SELECT * from movie_new WHERE download_url_md5=?""" for item in parse_sourcelist(session=session, url=url): movie_url = item title, upload_date, download_url = parse_detail(item) download_url_md5 = md5(download_url).hexdigest() douban_title, douban_url, douban_id, lpic_url = \ parse_douban(title, api_key=API_KEY) data = (title, download_url, movie_url, douban_url, douban_title,\ douban_id, lpic_url, download_url_md5, upload_date) #test = datetime.strptime('2013-05-28', '%Y-%m-%d').date() == \ # datetime.strptime(upload_date, '%Y-%m-%d').date() #if test and not cursor.execute(query, (download_url_md5,)).fetchall(): if check_update(upload_date) and not cursor.execute( query, (download_url_md5, )).fetchall(): print "Insert data", douban_title, "into database" cursor.execute(insert, data) conn.commit() else: print "Can't insert data for duplicate content or no update"
def create_db(): with sqlite3.connect(_database) as conn: # print 'Creating schema' # with open(_schema, 'rt') as f: # schema = f.read() # conn.executescript(schema) conn.text_factory = str cursor = conn.cursor() f = open(_detail) for line in f.readlines()[1083:]: (title, download_url, movie_url) = line.split("|") try: (douban_title, douban_url, douban_id, lpic_url) = parse_douban(title, api_key=API_KEY) print title, "<><><>", douban_title data = (title, download_url, movie_url, douban_url, douban_title, douban_id, lpic_url) insert = """INSERT INTO movie (title, download_url, movie_url, douban_url, douban_title, douban_id, lpic_url) VALUES(?,?,?,?,?,?,?)""" cursor.execute(insert, data) # douban API request limit 40 request per second per IP time.sleep(1.5) conn.commit() except TypeError: print "%s not found in Douban" % title
activate_this = 'venv/bin/activate_this.py' execfile(activate_this, dict(__file__=activate_this)) import requests from strs2weibo import parse_douban, parse_detail, parse_sourcelist,\ contruct_status, retrieve_image, ROOT if __name__ == '__main__': session = requests.Session() session.get(ROOT) url = 'http://strs.gdufs.edu.cn/web/VOD/vod_sourcelist.asp?Groupid=1&page=1' for detail_url in parse_sourcelist(session, url): title = parse_detail(detail_url)[0] download_url = parse_detail(detail_url)[2] (douban_title, douban_url, douban_id, lpic_url ) = parse_douban(title) pic = retrieve_image(lpic_url) topic = u'电影传送门' status = construct_status(topic, douban_title, download_url, douban_url) print status # weibo_upload(status, pic)
#!usr/bin/env python # -*- coding: utf-8 -*- activate_this = 'venv/bin/activate_this.py' execfile(activate_this, dict(__file__=activate_this)) import requests from strs2weibo import parse_douban, parse_detail, parse_sourcelist,\ contruct_status, retrieve_image, ROOT if __name__ == '__main__': session = requests.Session() session.get(ROOT) url = 'http://strs.gdufs.edu.cn/web/VOD/vod_sourcelist.asp?Groupid=1&page=1' for detail_url in parse_sourcelist(session, url): title = parse_detail(detail_url)[0] download_url = parse_detail(detail_url)[2] (douban_title, douban_url, douban_id, lpic_url) = parse_douban(title) pic = retrieve_image(lpic_url) topic = u'电影传送门' status = construct_status(topic, douban_title, download_url, douban_url) print status # weibo_upload(status, pic)