def get_trailer_comments():
    with tqdm(total=len(filtered_trailers)) as pbar:
        for index, row in filtered_trailers.iterrows():
            tralier_comments = yt.get_video_comments(row['youtubeId'],
                                                     parser=custom_parser,
                                                     part=['snippet'])
            for comment in tralier_comments:
                insert_params = {
                    'trailerId': row['id'],
                    'trailerYoutubeId': row['youtubeId'],
                    'commentId': comment['commentId'],
                    'channelUrl': comment['channelUrl'],
                    'channelId': comment['channelId'],
                    'channelName': comment['channelName'],
                    'displayText': comment['displayText'],
                    'originalText': comment['originalText'],
                    'likeCount': comment['likeCount'],
                    'publishDate': comment['publishDate'],
                    'updateDate': comment['updateDate'],
                    'replyCount': comment['replyCount'],
                    'parentId': comment['parentId']
                }
                database_helper.insert_data("trailer_comments", insert_params)

            pbar.update(1)
Example #2
0
def get_synopsis():
    """
    Function which uses imdb to collect long from synopsis.
    """
    #get all movies from db
    movies_df = movie_helper.get_movies_df()

    with tqdm(total=len(movies_df)) as pbar:
        for index, row in movies_df.iterrows():

            #if imdb id exists use it to look up the API
            if (row['imdbId']):

                #get synponsis and update the db
                movie = ia.get_movie(str(row['imdbId']), info='synopsis')
                try:
                    synopsis = movie['synopsis']
                    database_helper.insert_data("synopsis", {
                        "movieId": row["movieId"],
                        "summary": synopsis
                    })
                except:
                    #throw exception and print to console if synopsis does not exist
                    print(row['title'] + ' (' + row['imdbId'] + ')')

            pbar.update(1)
Example #3
0
def get_actors():
    """Function which uses imdb to collect movie actors"""

    #get all movies from db
    movies_df = movie_helper.get_movies_df()

    with tqdm(total=len(movies_df)) as pbar:
        for index, row in movies_df.iterrows():

            #if imdbid exists user it to look up the API
            if (row['imdbId']):
                movie = ia.get_movie(str(row['imdbId']))

                #get list of cast
                cast_list = movie.get('cast')
                if (cast_list != None):
                    for cast_member in cast_list:

                        #Try to get the name of the character
                        character_name = ""
                        if (isinstance(cast_member.currentRole, list)):
                            character_name = ','.join(
                                [x['name'] for x in cast_member.currentRole])
                        else:
                            try:
                                character_name = cast_member.currentRole[
                                    'name']
                            except:
                                character_name = "Unknown"

                        #first check if the person exists
                        imdb_id = cast_member.personID
                        person_df = database_helper.select_query(
                            "people", {'imdbId': imdb_id})
                        if (person_df.empty):
                            database_helper.insert_data(
                                "people", {
                                    "imdbId": imdb_id,
                                    "fullName": cast_member["name"]
                                })

                        #add movie director link
                        database_helper.insert_data(
                            "actors", {
                                "p_imdbId": imdb_id,
                                "m_imdbId": row['imdbId'],
                                "role": character_name
                            })

            pbar.update(1)
Example #4
0
def load_trailers_from_csv():
    """
    Load manually collected movie trailers into datbase
    """

    file_path = "../../ProjectData/trailers.csv"
    trailers_df = pd.read_csv(file_path)

    #loop through manually collected list of movie trailers and inser them into the db
    with tqdm(total=len(trailers_df)) as pbar:
        for index, row in trailers_df.iterrows():
            insert_pararms = {
                "movieId": row["movieId"],
                "youtubeId": row["youtubeId"],
                "url": row["url"],
                "title": row["title"],
                "channelTitle": row["channelTitle"]
            }
            database_helper.insert_data("trailers", insert_pararms)
            pbar.update(1)
Example #5
0
def add_box_office():
    """Function which adds the weekend box office data from the BFI into the db"""

    #get full film set
    film_df = bfi_helper.get_raw_data()
    film_df_sub = film_df[['Film', 'Country of Origin',
                           'Distributor']].drop_duplicates()
    film_df_unq = film_df.drop_duplicates()

    with tqdm(total=len(film_df)) as pbar:
        for index, row in film_df.iterrows():

            #get the movie id and use it to insert weekend data into the db
            movie_df = database_helper.select_query("movies",
                                                    {"title": row['Film']})
            movie_id = int(movie_df.iloc[0]['movieId'])

            percentage_change = None
            try:
                percentage_change = float(row['% change on last week'])
            except ValueError:
                percentage_change = None

            insert_params = {
                "movieId": movie_id,
                "weeksOnRelease": row['Weeks on release'],
                "noOfcinemas": row['Number of cinemas'],
                "weekendGross": row['Weekend Gross'],
                "percentageChange": percentage_change,
                "siteAverage": row['Site average'],
                "grossToDate": row['Total Gross to date'],
                "weekendStart": row['weekendStart'],
                "weekendEnd": row['weekendEnd'],
                "rank": row['Rank']
            }
            database_helper.insert_data("weekend_box_office", insert_params)

            pbar.update(1)
Example #6
0
def get_writers():
    """
    Function which uses imdb id to get list of writers
    """

    #get all movies from db
    movies_df = movie_helper.get_movies_df()

    with tqdm(total=len(movies_df)) as pbar:
        for index, row in movies_df.iterrows():

            #if imdbid exists user it to look up the API
            if (row['imdbId']):
                movie = ia.get_movie(str(row['imdbId']))

                #get list of writers
                writers = movie.get('writer')
                if (writers != None):
                    for writer in writers:
                        #first check if the person exists
                        imdb_id = writer.personID
                        person_df = database_helper.select_query(
                            "people", {'imdbId': imdb_id})
                        if (person_df.empty):
                            database_helper.insert_data(
                                "people", {
                                    "imdbId": imdb_id,
                                    "fullName": writer["name"]
                                })

                        #add movie director link
                        database_helper.insert_data("writers", {
                            "p_imdbId": imdb_id,
                            "m_imdbId": row['imdbId']
                        })

            pbar.update(1)
Example #7
0
def add_movies_to_db():
    """Function which creates a unique list of movies from BFI data and inserts into DB"""

    #get full film set
    film_df = bfi_helper.get_raw_data()
    film_df_sub = film_df[['Film', 'Country of Origin',
                           'Distributor']].drop_duplicates()
    film_df_unq = film_df.drop_duplicates()

    with tqdm(total=len(film_df_unq)) as pbar:
        for index, row in film_df_unq.iterrows():

            #check that the movie has not been added yet
            existing = database_helper.select_query("movies",
                                                    {"title": row["Film"]})
            if (existing.empty):
                #insert into db
                database_helper.insert_data(
                    "movies", {
                        "title": row['Film'],
                        "distributor": row['Distributor'],
                        "country": row['Country of Origin']
                    })
            pbar.update(1)