Beispiel #1
0
def get_mojo_rank_info():
    """
    Function to calculate weekend box office rank summaries from mojo info
    """

    #get movies from the db and calulate rank info
    rank_info_df = movie_helper.get_highest_mojo_rank()

    with tqdm(total=len(rank_info_df)) as pbar:
        for index, row in rank_info_df.iterrows():

            #update the database
            updates = {
                "best_rank": int(row['best_rank']),
                'weekends_at_best_rank': int(row['weekends_at_best_rank']),
                'weekends_in_top_3': int(row['weekends_in_top_3']),
                'weekends_in_top_5': int(row['weekends_in_top_5']),
                'weekends_in_top_10': int(row['weekends_in_top_10']),
                'weekends_in_top_15': int(row['weekends_in_top_15'])
            }
            selects = {"movieId": int(row["movieId"])}
            database_helper.update_data("movies",
                                        update_params=updates,
                                        select_params=selects)

            pbar.update(1)
Beispiel #2
0
def get_mojo_data():
    """
    Function which uses imdb id to scrape movie financial summary from BoxOfficeMojo
    """

    #get all movies from db
    movies_df = movie_helper.get_movies_df()

    with tqdm(total=len(movies_df)) as pbar:
        for index, row in movies_df.iterrows():

            #if imdb id exists use it to scrape info from box office mojo
            if (row['imdbId']):

                #get stats and update the db
                stats = mojo_helper.get_mojo_stats(row['imdbId'])
                updates = {
                    "budget_usd": stats["Budget"],
                    "uk_gross_usd": stats["UK"],
                    "domestic_gross_usd": stats["Domestic"],
                    "worldwide_gross_usd": stats["Worldwide"],
                    "international_gross_usd": stats["International"]
                }
                selects = {"movieId": row["movieId"]}
                database_helper.update_data("movies",
                                            update_params=updates,
                                            select_params=selects)
            pbar.update(1)
Beispiel #3
0
def get_mojo_run_info():
    """
    Function to calculate weekend box office summaries from mojo info
    """

    #get movies from the db and calulate run info
    run_info_df = movie_helper.get_movie_run_info()

    with tqdm(total=len(run_info_df)) as pbar:
        for index, row in run_info_df.iterrows():
            #update the database
            updates = {
                "end_weekend": row['end_weekend'],
                "total_weekends": row['total_weekends'],
                "total_release_weeks": row['total_release_weeks'],
                "first_run_end": row['first_run_end'],
                "first_run_weeks": row['first_run_weeks']
            }

            selects = {"movieId": row["movieId"]}
            database_helper.update_data("movies",
                                        update_params=updates,
                                        select_params=selects)

            pbar.update(1)
Beispiel #4
0
def get_release_dates():
    """
    Funciton which uses imdb to collect uk release date of films.
    """
    #get all movies from db
    movies_df = movie_helper.get_movies_df()

    with tqdm(total=len(movies_df)) as pbar:
        for index, row in movies_df.iterrows():

            #get list of release dates from API
            movie = ia.get_movie(str(row['imdbId']), info='release dates')
            release_dates = movie['release dates']

            #try to extract UK release dates (string from imdb is a mess)
            uk = [
                i for i in movie['release dates'] if 'UK' in i and not '(' in i
            ]
            if (len(uk) > 0):
                #if successful update the db with the release date
                date_string = uk[0].split('::')[1]
                date = datetime.strptime(date_string, '%d %B %Y')
                database_helper.update_data(
                    "movies",
                    update_params={"ukReleaseDate": date},
                    select_params={"movieId": row["movieId"]})
            else:
                #if no uk release date found print to console
                print("No UK release for ", row.title)

            pbar.update(1)
Beispiel #5
0
def get_cast_notes():
    """Function which uses imdb to collect cast notes eg Credited/Uncredited"""

    #get all movies from db
    movies_df = movie_helper.get_movies_df()

    with tqdm(total=len(movies_df)) as pbar:
        for index, row in movies_df.iterrows():

            #if imdbid exists use it to collect cast notes
            if (row['imdbId']):
                movie = ia.get_movie(str(row['imdbId']))
                cast_list = movie.get('cast')
                if (cast_list != None):
                    for cast_member in cast_list:
                        imdb_id = cast_member.personID
                        updates = {'notes': cast_member.notes}
                        selects = {
                            "p_imdbId": imdb_id,
                            "m_imdbId": row['imdbId']
                        }
                        database_helper.update_data("actors",
                                                    update_params=updates,
                                                    select_params=selects)

            pbar.update(1)
Beispiel #6
0
def get_keywords():
    """
    Function which uses imdb id to collect plot keywords
    """

    #get all movies from db
    movies_df = movie_helper.get_movies_df()

    with tqdm(total=len(movies_df)) as pbar:
        for index, row in movies_df.iterrows():

            #if imbdid exists use it to look up the API
            if (row['imdbId']):

                #get list of keywords and created delimted string
                movie = ia.get_movie(str(row['imdbId']), info='keywords')
                try:
                    keywords = ",".join(movie['keywords'])
                except:
                    keywords = None

                #update the movies table in the db
                database_helper.update_data(
                    "movies",
                    update_params={"keywords": keywords},
                    select_params={"movieId": row["movieId"]})
            pbar.update(1)
Beispiel #7
0
def update_tweet_sentiments():
    """Function to assign sentiment socres and classification to all tweets in the movie_tweets2019 table"""

    with tqdm(total=len(movies)) as pbar:

        #assign tweet sentiment to tweets for each movie
        for movie in movies:
            sentiment_df = tweet_helper.get_tweet_sentiments_scores(
                movie.movieId)

            #update the db with newly assigned tweet sentiment and classes
            for index, row in sentiment_df.iterrows():

                update_params = {
                    "negative_scr": row["negative_scr"],
                    "positive_scr": row["positive_scr"],
                    "neutral_scr": row["neutral_scr"],
                    "compound_scr": row["compound_scr"],
                    "senti_class": row["senti_class"]
                }
                select_params = {"id": row["id"]}
                database_helper.update_data("movie_tweets2019",
                                            update_params=update_params,
                                            select_params=select_params)
            pbar.update(1)
Beispiel #8
0
def get_trailer_metadata():
    """
    Function which uses youtubeId to collect trailer metadata
    """

    #get all trailers from the database
    trailers_df = database_helper.select_query("trailers")

    with tqdm(total=len(trailers_df)) as pbar:
        for index, row in trailers_df.iterrows():

            #use the youtube id to make an api request for video meta data
            trailer_data = yt.get_video_metadata(row['youtubeId'])

            #update the db with collected meta data
            update_params = {
                'title': trailer_data['video_title'],
                'channelTitle': trailer_data['channel_title'],
                'channelId': trailer_data['channel_id'],
                'categoryId': trailer_data['video_category'],
                'commentCount': trailer_data['video_comment_count'],
                'description': trailer_data['video_description'],
                'likeCount': trailer_data['video_like_count'],
                'dislikeCount': trailer_data['video_dislike_count'],
                'viewCount': trailer_data['video_view_count'],
                'publishDate': trailer_data['video_publish_date'],
                'tags': trailer_data['video_tags']
            }
            select_params = {"youtubeId": row["youtubeId"]}
            database_helper.update_data("trailers",
                                        update_params=update_params,
                                        select_params=select_params)
            pbar.update(1)
Beispiel #9
0
def get_metaData():
    """
    Function which uses imdbId to retreive metadata from IMDb for each movie
    """
    #get all movies from db
    movies_df = movie_helper.get_movies_df()

    #get movie meta data
    with tqdm(total=len(movies_df)) as pbar:
        for index, row in movies_df.iterrows():

            #if an imdbid exists use it to look up the API
            if (row['imdbId']):

                #get base meta data from imdb
                movie = ia.get_movie(str(row['imdbId']))
                year = movie['year']

                #created delimited list of genre strings
                if (movie.get('genres')):
                    genres = ','.join(movie.get('genres'))

                rating = movie.get('rating')
                votes = movie.get('votes')

                #create delimited list of movie certificates
                certificates = None
                if (movie.get('certificates')):
                    certificates = ','.join(movie.get('certificates'))

                #update database with collected meta data
                update_params = {
                    "year": year,
                    "genres": genres,
                    "rating": rating,
                    "votes": votes,
                    "certificates": certificates
                }
                select_params = {"movieId": row["movieId"]}
                database_helper.update_data("movies",
                                            update_params=update_params,
                                            select_params=select_params)

            pbar.update(1)
Beispiel #10
0
def get_imdbIds():
    """
    Function which uses the movie title from BFI to get the imdb id from IMDb api
    """

    #get all movies from db
    movies_df = movie_helper.get_movies_df()

    for index, row in movies_df.iterrows():

        #use the api to search imdb for films with the the title
        search_results = ia.search_movie(row['title'])

        #only interested in movie objects
        movie_results = list(
            filter(lambda x: x.get('kind') == 'movie', search_results))

        if (len(movie_results) > 0):
            #take the first results by default
            movie = movie_results[0]

            #if there is more than one then get most recent?
            if (len(movie_results) > 1):
                #flag issue to console so movie can be manually checked
                print("Check: ", row['title'])

                #try to get the one from 2019
                year_results = list(
                    filter(lambda x: x.get('year') == 2019, movie_results))
                if (len(year_results) > 0):
                    movie = year_results[0]

            #extract imdb url and id using API
            movie_url = ia.get_imdbURL(movie)
            movie_id = ia.get_imdbID(movie)

            #update database
            database_helper.update_data(
                "movies",
                update_params={
                    "imdbId": movie_id,
                    "url": movie_url
                },
                select_params={"movieId", row["movieId"]})
Beispiel #11
0
def get_trailer_release_dates():
    """Function to specifically update the trailer release dates which could not be retreived by get_trailer_metadata()"""

    #get all trailers from the db
    trailers_df = database_helper.select_query("trailers")

    with tqdm(total=len(trailers_df)) as pbar:
        for index, row in trailers_df.iterrows():

            #use customized api request to correctly retreive the release dates of the trailers
            trailer_date = youtube_helper.get_trailer_release(
                row['youtubeId'], yt)

            #update the database
            update_params = {'publishDate': trailer_date}
            select_params = {"youtubeId": row["youtubeId"]}

            database_helper.update_data("trailers",
                                        update_params=update_params,
                                        select_params=select_params)
            pbar.update(1)
Beispiel #12
0
def get_critical_period():
    """
    Function to calculate the film critical period based on the release date and weekend box office info
    """

    #get movies from df and calculate crticial period
    movies_df = movie_helper.get_critical_period()

    with tqdm(total=len(movies_df)) as pbar:
        for index, row in movies_df.iterrows():

            #update the database
            updates = {
                "critical_start": row['critical_start'],
                'critical_end': row['critical_end']
            }
            selects = {"movieId": int(row["movieId"])}
            database_helper.update_data("movies",
                                        update_params=updates,
                                        select_params=selects)

            pbar.update(1)
if (greta_res.get('certificates')):
    certificates = ','.join(greta_res.get('certificates'))

#update database
update_params = {
    "imdbId": '2639336',
    "url": 'https://www.imdb.com/title/tt2639336/',
    "year": year,
    "genres": genres,
    "rating": rating,
    "votes": votes,
    "certificates": certificates
}
select_params = {"movieId": int(greta["movieId"])}
database_helper.update_data("movies",
                            update_params=update_params,
                            select_params=select_params)

#"Kobiety Mafii 2"
# kobiety_mafii = database_helper.select_query("movies", { "movieId" : 262 })
# kobiety_mafii = kobiety_mafii.iloc[0]

# kobiety_mafii_res = ia.get_movie('8858420')
# year = kobiety_mafii_res['year']
# if (kobiety_mafii_res.get('genres')):
#     genres = ','.join(kobiety_mafii_res.get('genres'))
# rating = kobiety_mafii_res.get('rating')
# votes = kobiety_mafii_res.get('votes')
# certificates = None
# if (kobiety_mafii_res.get('certificates')):
#     certificates = ','.join(kobiety_mafii_res.get('certificates'))