def generate_csv(): conn = IMDB() movies = conn.fetch_vec('SELECT id, imdb_id FROM movie;') with open('actors.csv', 'wb', 0) as fp: writer = csv.writer(fp) for sql_id, imdb_id in movies: actors = conn.fetch_vec("SELECT person_id FROM actors " "WHERE movie_id = %s ORDER BY `index` ASC ;", sql_id) writer.writerow([imdb_id] + actors)
def process_all_movies(): http_conn = IMDb() imdb = IMDB() movies_query = "SELECT id,imdb_id FROM movie WHERE id NOT IN " \ "(SELECT distinct movie_id from actors);" with open("http_cast_log.txt", 'wb', 0) as log, open("http_cast_failed.txt", 'wb', 0) as fail_fp: for rec in imdb.fetch_vec(movies_query): movie = {'id': rec[0], 'imdb id': rec[1]} process_movie(http_conn, imdb, movie, log, fail_fp)
def fill_info(): imdb_conn = IMDb('sql', DB_URI) cust_conn = IMDB() ids = cust_conn.fetch_vec('SELECT imdb_id FROM title WHERE NOT imdb_id IS NULL;') for id in ids: try: imdb_conn.get_movie_business(id) except: print "err %s \n" % id
def fill_movie_imdbid(): imdb_conn = IMDb('sql', DB_URI) cust_conn = IMDB() ids = cust_conn.fetch_vec('SELECT id FROM title WHERE imdb_id IS NULL;') for id in ids: print id imdb_conn.get_imdbMovieID(id) cust_conn.execute_query( 'UPDATE movie,title SET movie.imdb_id=title.imdb_id WHERE title.id=movie.id;') cust_conn.execute_query("INSERT INTO missing_movies (SELECT * FROM movie WHERE " "imdb_id IS NULL);") cust_conn.execute_query("DELETE FROM movie WHERE imdb_id IS NULL;")
def create_stars_csv(limit=99999): conn = IMDB() try: with open('stars/success.txt', 'rb', 0) as fp: done = [line.strip() for line in fp] except: with open('stars/success.txt', 'wb', 0) as fp: done = [] total, failed, success = 0, 0, 0 with open("stars/log.txt", 'wb', 0) as log_fp, \ open("stars/failed.txt", 'wb', 0) as fail_fp, \ open("stars/stars.csv", 'ab', 0) as stars_fp, \ open('stars/success.txt', 'ab', 0) as suc_fp: csv_writer = csv.writer(stars_fp) query = "SELECT id,imdb_id FROM title WHERE NOT imdb_id IS NULL;" res = conn.fetch_vec(query) ids = [(str(x), str(y)) for x, y in res if str(x) not in done] for sql_id, imdb_id in ids: if limit <= 0: return time.sleep(1) limit -= 1 total += 1 try: stars = get_stars(imdb_id) if not stars: failed += 1 fail_fp.write(sql_id + '\n') else: csv_writer.writerow([sql_id] + [stars]) suc_fp.write(sql_id + '\n') success += 1 except Exception, e: failed += 1 fail_fp.write(sql_id + '\n') log_fp.write( " AT <{}> : {} \n {} \n".format(sql_id, e, traceback.format_exc()))