def write_predictdb(): cursor.execute( '''UPDATE UpdateFilm SET linear_predict=%s,linear_test=%s, lasso_predict=%s,lasso_test=%s,knn_predict=%s,knn_test=%s,poly_predict=%s,poly_test=%s WHERE imdb_filmID = %s''', (scoredict['linear_predict'], scoredict['linear_test'], scoredict['lasso_predict'], scoredict['lasso_test'], scoredict['knn_predict'], scoredict['knn_test'], scoredict['poly_predict'], scoredict['poly_test'], filmid)) db.commit()
from crawler_util import page_read from film_update import moviescrawler from db_helper.save import cursor, db def get_exist_list(): exist_tup = cursor.fetchall() exists = list() for a_item in exist_tup: exists.append(a_item[0]) return exists cursor.execute('SELECT imdb_filmID FROM FilmDB') exist_films = get_exist_list() soup = page_read.page_read_nolog( 'http://www.imdb.com/search/title?count=100&' 'groups=oscar_best_picture_winners&title_type=feature&sort=release_date,desc' ) imdb_href = 'http://www.imdb.com' for item in soup.select('.lister-item-header'): # print movieurl the_filmid = item.a.get('href').split('title/')[1].split('/')[0] if the_filmid in exist_films: cursor.execute('''UPDATE FilmDB SET Oscar = 1 WHERE imdb_filmID=%s''', (the_filmid, )) db.commit() else: print(the_filmid) moviescrawler.crawl_imdb(the_filmid, 'Oscar') db.commit()
def get_exist_list(): cursor.execute('SELECT imdb_filmID FROM FilmDB') exists = [x[0] for x in cursor] return exists
from crawler_util import page_read from film_update import moviescrawler from db_helper.save import cursor, db def get_exist_list(): cursor.execute('SELECT imdb_filmID FROM FilmDB') exists = [x[0] for x in cursor] return exists exist_films = get_exist_list() soup = page_read.page_read_nolog( 'http://www.imdb.com/chart/top/?ref_=nv_mv_250_6') for item in soup.select('.titleColumn'): ref = item.a.get('href').strip() film_id = ref.split('title/')[1].split('/')[0] if film_id in exist_films: cursor.execute( '''UPDATE FilmDB SET filmType = 'Top250' WHERE imdb_filmID=%s''', (film_id, )) print(film_id) db.commit() else: moviescrawler.crawl_imdb(film_id, 'Top250') db.commit()
def write_predictdb(): cursor.execute( '''UPDATE UpdateFilm SET linear_predict=%s,linear_test=%s, lasso_predict=%s,lasso_test=%s,knn_predict=%s,knn_test=%s,poly_predict=%s,poly_test=%s WHERE imdb_filmID = %s''', (scoredict['linear_predict'], scoredict['linear_test'], scoredict['lasso_predict'], scoredict['lasso_test'], scoredict['knn_predict'], scoredict['knn_test'], scoredict['poly_predict'], scoredict['poly_test'], filmid)) db.commit() cursor.execute('SELECT click_times,gross ' 'FROM FilmDB,TrailerClick ' 'WHERE FilmDB.imdb_filmID=TrailerClick.imdb_filmID ' 'AND gross>4*TrailerClick.click_times') clicks = cursor.fetchall() X_R1 = list() y_R1 = list() for click_time, gross in clicks: X_R1.append(click_time) y_R1.append(gross) X_data = np.array(X_R1).reshape(-1, 1) y_data = np.array(y_R1).reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2, random_state=4) cursor.execute( 'SELECT TrailerClick.imdb_filmID,max(click_times) FROM UpdateFilm,TrailerClick WHERE TrailerClick.imdb_filmID=UpdateFilm.imdb_filmID GROUP BY UpdateFilm.imdb_filmID'