예제 #1
0
def generate_csv():
    conn = IMDB()
    movies = conn.fetch_vec('SELECT id, imdb_id FROM movie;')
    with open('actors.csv', 'wb', 0) as fp:
        writer = csv.writer(fp)
        for sql_id, imdb_id in movies:
            actors = conn.fetch_vec("SELECT person_id FROM actors "
                                    "WHERE movie_id = %s ORDER BY `index` ASC ;", sql_id)
            writer.writerow([imdb_id] + actors)
예제 #2
0
def fill_actors(actors_csv='http_linkers/actors.csv'):
    conn = IMDB()
    conn.execute_query('DELETE FROM actors;')
    with open(actors_csv, 'rb') as fp:
        reader = csv.reader(fp)
        for row in reader:
            for index, actor_id in enumerate(row[1:], 1):
                q = 'INSERT INTO actors (movie_id,person_id,`index`) ' \
                    'VALUES (%s,%s,%s);'
                conn.execute_query(q, row[0], actor_id, index)
예제 #3
0
def process_all_movies():
    http_conn = IMDb()
    imdb = IMDB()
    movies_query = "SELECT id,imdb_id FROM movie WHERE id NOT IN " \
                   "(SELECT distinct movie_id from actors);"
    with open("http_cast_log.txt", 'wb', 0) as log, open("http_cast_failed.txt", 'wb',
            0) as fail_fp:
        for rec in imdb.fetch_vec(movies_query):
            movie = {'id': rec[0], 'imdb id': rec[1]}
            process_movie(http_conn, imdb, movie, log, fail_fp)
예제 #4
0
def fill_info():
    imdb_conn = IMDb('sql', DB_URI)
    cust_conn = IMDB()

    ids = cust_conn.fetch_vec('SELECT imdb_id FROM title WHERE NOT imdb_id IS NULL;')
    for id in ids:
        try:
            imdb_conn.get_movie_business(id)
        except:
            print "err %s \n" % id
예제 #5
0
def update_stars_csv():
    conn = IMDB()
    query = "SELECT imdb_id FROM title WHERE id = %s ;"
    done = []
    with open('stars/stars_old.csv', 'rb') as old_fp, \
            open('stars/stars.csv', 'wb', 0) as stars_fp:
        reader = csv.reader(old_fp)
        writer = csv.writer(stars_fp)
        for row in reader:
            if row[0] in done:
                continue
            imdb_id = conn.fetch_scalar(query, row[0])
            writer.writerow([imdb_id] + [row[1]])
            done.append(row[0])
예제 #6
0
def fill_stars(stars_csv='http_linkers/stars.csv'):
    conn = IMDB()
    conn.execute_query('DELETE FROM stars;')
    with open(stars_csv, 'rb') as fp, \
            open('stars/db_success.txt', 'wb', 0) as succ_fp, \
            open('stars/db_failed.txt', 'wb', 0) as fail_fp, \
            open('stars/db_log.txt', 'wb', 0) as log_fp:
        reader = csv.reader(fp)
        pr = re.compile(r'.*[\[\]\,].*')
        for row in reader:
            try:
                imdb_id = row[0]
                stars = [x for x in row[1].split('\'') if pr.match(x) is None]
                process_movie(conn, imdb_id, stars)
                succ_fp.write(imdb_id + '\n')
                # print("{} : {} ".format(mov_id, stars))
            except Exception, e:
                fail_fp.write(imdb_id + '\n')
                log_fp.write(
                        " AT <{}> : {}  \n {} \n".format(imdb_id, e,
                                traceback.format_exc()))
예제 #7
0
def create_stars_csv(limit=99999):
    conn = IMDB()

    try:
        with open('stars/success.txt', 'rb', 0) as fp:
            done = [line.strip() for line in fp]
    except:
        with open('stars/success.txt', 'wb', 0) as fp:
            done = []
    total, failed, success = 0, 0, 0
    with open("stars/log.txt", 'wb', 0) as log_fp, \
            open("stars/failed.txt", 'wb', 0) as fail_fp, \
            open("stars/stars.csv", 'ab', 0) as stars_fp, \
            open('stars/success.txt', 'ab', 0) as suc_fp:
        csv_writer = csv.writer(stars_fp)
        query = "SELECT id,imdb_id FROM title WHERE NOT imdb_id IS NULL;"
        res = conn.fetch_vec(query)
        ids = [(str(x), str(y)) for x, y in res if str(x) not in done]

        for sql_id, imdb_id in ids:
            if limit <= 0:
                return
            time.sleep(1)
            limit -= 1
            total += 1
            try:
                stars = get_stars(imdb_id)
                if not stars:
                    failed += 1
                    fail_fp.write(sql_id + '\n')
                else:
                    csv_writer.writerow([sql_id] + [stars])
                    suc_fp.write(sql_id + '\n')
                    success += 1
            except Exception, e:
                failed += 1
                fail_fp.write(sql_id + '\n')
                log_fp.write(
                        " AT <{}> : {}  \n {} \n".format(sql_id, e,
                                traceback.format_exc()))
예제 #8
0
def fill_movie_imdbid():
    imdb_conn = IMDb('sql', DB_URI)
    cust_conn = IMDB()

    ids = cust_conn.fetch_vec('SELECT id FROM title WHERE imdb_id IS NULL;')
    for id in ids:
        print id
        imdb_conn.get_imdbMovieID(id)

    cust_conn.execute_query(
            'UPDATE movie,title SET movie.imdb_id=title.imdb_id WHERE title.id=movie.id;')

    cust_conn.execute_query("INSERT INTO missing_movies (SELECT * FROM movie WHERE "
                            "imdb_id IS NULL);")

    cust_conn.execute_query("DELETE FROM movie WHERE imdb_id IS NULL;")