def generate_csv(): conn = IMDB() movies = conn.fetch_vec('SELECT id, imdb_id FROM movie;') with open('actors.csv', 'wb', 0) as fp: writer = csv.writer(fp) for sql_id, imdb_id in movies: actors = conn.fetch_vec("SELECT person_id FROM actors " "WHERE movie_id = %s ORDER BY `index` ASC ;", sql_id) writer.writerow([imdb_id] + actors)
def fill_actors(actors_csv='http_linkers/actors.csv'): conn = IMDB() conn.execute_query('DELETE FROM actors;') with open(actors_csv, 'rb') as fp: reader = csv.reader(fp) for row in reader: for index, actor_id in enumerate(row[1:], 1): q = 'INSERT INTO actors (movie_id,person_id,`index`) ' \ 'VALUES (%s,%s,%s);' conn.execute_query(q, row[0], actor_id, index)
def process_all_movies(): http_conn = IMDb() imdb = IMDB() movies_query = "SELECT id,imdb_id FROM movie WHERE id NOT IN " \ "(SELECT distinct movie_id from actors);" with open("http_cast_log.txt", 'wb', 0) as log, open("http_cast_failed.txt", 'wb', 0) as fail_fp: for rec in imdb.fetch_vec(movies_query): movie = {'id': rec[0], 'imdb id': rec[1]} process_movie(http_conn, imdb, movie, log, fail_fp)
def fill_info(): imdb_conn = IMDb('sql', DB_URI) cust_conn = IMDB() ids = cust_conn.fetch_vec('SELECT imdb_id FROM title WHERE NOT imdb_id IS NULL;') for id in ids: try: imdb_conn.get_movie_business(id) except: print "err %s \n" % id
def update_stars_csv(): conn = IMDB() query = "SELECT imdb_id FROM title WHERE id = %s ;" done = [] with open('stars/stars_old.csv', 'rb') as old_fp, \ open('stars/stars.csv', 'wb', 0) as stars_fp: reader = csv.reader(old_fp) writer = csv.writer(stars_fp) for row in reader: if row[0] in done: continue imdb_id = conn.fetch_scalar(query, row[0]) writer.writerow([imdb_id] + [row[1]]) done.append(row[0])
def fill_stars(stars_csv='http_linkers/stars.csv'): conn = IMDB() conn.execute_query('DELETE FROM stars;') with open(stars_csv, 'rb') as fp, \ open('stars/db_success.txt', 'wb', 0) as succ_fp, \ open('stars/db_failed.txt', 'wb', 0) as fail_fp, \ open('stars/db_log.txt', 'wb', 0) as log_fp: reader = csv.reader(fp) pr = re.compile(r'.*[\[\]\,].*') for row in reader: try: imdb_id = row[0] stars = [x for x in row[1].split('\'') if pr.match(x) is None] process_movie(conn, imdb_id, stars) succ_fp.write(imdb_id + '\n') # print("{} : {} ".format(mov_id, stars)) except Exception, e: fail_fp.write(imdb_id + '\n') log_fp.write( " AT <{}> : {} \n {} \n".format(imdb_id, e, traceback.format_exc()))
def create_stars_csv(limit=99999): conn = IMDB() try: with open('stars/success.txt', 'rb', 0) as fp: done = [line.strip() for line in fp] except: with open('stars/success.txt', 'wb', 0) as fp: done = [] total, failed, success = 0, 0, 0 with open("stars/log.txt", 'wb', 0) as log_fp, \ open("stars/failed.txt", 'wb', 0) as fail_fp, \ open("stars/stars.csv", 'ab', 0) as stars_fp, \ open('stars/success.txt', 'ab', 0) as suc_fp: csv_writer = csv.writer(stars_fp) query = "SELECT id,imdb_id FROM title WHERE NOT imdb_id IS NULL;" res = conn.fetch_vec(query) ids = [(str(x), str(y)) for x, y in res if str(x) not in done] for sql_id, imdb_id in ids: if limit <= 0: return time.sleep(1) limit -= 1 total += 1 try: stars = get_stars(imdb_id) if not stars: failed += 1 fail_fp.write(sql_id + '\n') else: csv_writer.writerow([sql_id] + [stars]) suc_fp.write(sql_id + '\n') success += 1 except Exception, e: failed += 1 fail_fp.write(sql_id + '\n') log_fp.write( " AT <{}> : {} \n {} \n".format(sql_id, e, traceback.format_exc()))
def fill_movie_imdbid(): imdb_conn = IMDb('sql', DB_URI) cust_conn = IMDB() ids = cust_conn.fetch_vec('SELECT id FROM title WHERE imdb_id IS NULL;') for id in ids: print id imdb_conn.get_imdbMovieID(id) cust_conn.execute_query( 'UPDATE movie,title SET movie.imdb_id=title.imdb_id WHERE title.id=movie.id;') cust_conn.execute_query("INSERT INTO missing_movies (SELECT * FROM movie WHERE " "imdb_id IS NULL);") cust_conn.execute_query("DELETE FROM movie WHERE imdb_id IS NULL;")