Example #1
0
def get_mojo_data():
    """
    Function which uses imdb id to scrape movie financial summary from BoxOfficeMojo
    """

    #get all movies from db
    movies_df = movie_helper.get_movies_df()

    with tqdm(total=len(movies_df)) as pbar:
        for index, row in movies_df.iterrows():

            #if imdb id exists use it to scrape info from box office mojo
            if (row['imdbId']):

                #get stats and update the db
                stats = mojo_helper.get_mojo_stats(row['imdbId'])
                updates = {
                    "budget_usd": stats["Budget"],
                    "uk_gross_usd": stats["UK"],
                    "domestic_gross_usd": stats["Domestic"],
                    "worldwide_gross_usd": stats["Worldwide"],
                    "international_gross_usd": stats["International"]
                }
                selects = {"movieId": row["movieId"]}
                database_helper.update_data("movies",
                                            update_params=updates,
                                            select_params=selects)
            pbar.update(1)
Example #2
0
def get_release_dates():
    """
    Funciton which uses imdb to collect uk release date of films.
    """
    #get all movies from db
    movies_df = movie_helper.get_movies_df()

    with tqdm(total=len(movies_df)) as pbar:
        for index, row in movies_df.iterrows():

            #get list of release dates from API
            movie = ia.get_movie(str(row['imdbId']), info='release dates')
            release_dates = movie['release dates']

            #try to extract UK release dates (string from imdb is a mess)
            uk = [
                i for i in movie['release dates'] if 'UK' in i and not '(' in i
            ]
            if (len(uk) > 0):
                #if successful update the db with the release date
                date_string = uk[0].split('::')[1]
                date = datetime.strptime(date_string, '%d %B %Y')
                database_helper.update_data(
                    "movies",
                    update_params={"ukReleaseDate": date},
                    select_params={"movieId": row["movieId"]})
            else:
                #if no uk release date found print to console
                print("No UK release for ", row.title)

            pbar.update(1)
Example #3
0
def get_cast_notes():
    """Function which uses imdb to collect cast notes eg Credited/Uncredited"""

    #get all movies from db
    movies_df = movie_helper.get_movies_df()

    with tqdm(total=len(movies_df)) as pbar:
        for index, row in movies_df.iterrows():

            #if imdbid exists use it to collect cast notes
            if (row['imdbId']):
                movie = ia.get_movie(str(row['imdbId']))
                cast_list = movie.get('cast')
                if (cast_list != None):
                    for cast_member in cast_list:
                        imdb_id = cast_member.personID
                        updates = {'notes': cast_member.notes}
                        selects = {
                            "p_imdbId": imdb_id,
                            "m_imdbId": row['imdbId']
                        }
                        database_helper.update_data("actors",
                                                    update_params=updates,
                                                    select_params=selects)

            pbar.update(1)
Example #4
0
def get_keywords():
    """
    Function which uses imdb id to collect plot keywords
    """

    #get all movies from db
    movies_df = movie_helper.get_movies_df()

    with tqdm(total=len(movies_df)) as pbar:
        for index, row in movies_df.iterrows():

            #if imbdid exists use it to look up the API
            if (row['imdbId']):

                #get list of keywords and created delimted string
                movie = ia.get_movie(str(row['imdbId']), info='keywords')
                try:
                    keywords = ",".join(movie['keywords'])
                except:
                    keywords = None

                #update the movies table in the db
                database_helper.update_data(
                    "movies",
                    update_params={"keywords": keywords},
                    select_params={"movieId": row["movieId"]})
            pbar.update(1)
Example #5
0
def get_synopsis():
    """
    Function which uses imdb to collect long from synopsis.
    """
    #get all movies from db
    movies_df = movie_helper.get_movies_df()

    with tqdm(total=len(movies_df)) as pbar:
        for index, row in movies_df.iterrows():

            #if imdb id exists use it to look up the API
            if (row['imdbId']):

                #get synponsis and update the db
                movie = ia.get_movie(str(row['imdbId']), info='synopsis')
                try:
                    synopsis = movie['synopsis']
                    database_helper.insert_data("synopsis", {
                        "movieId": row["movieId"],
                        "summary": synopsis
                    })
                except:
                    #throw exception and print to console if synopsis does not exist
                    print(row['title'] + ' (' + row['imdbId'] + ')')

            pbar.update(1)
Example #6
0
def get_actors():
    """Function which uses imdb to collect movie actors"""

    #get all movies from db
    movies_df = movie_helper.get_movies_df()

    with tqdm(total=len(movies_df)) as pbar:
        for index, row in movies_df.iterrows():

            #if imdbid exists user it to look up the API
            if (row['imdbId']):
                movie = ia.get_movie(str(row['imdbId']))

                #get list of cast
                cast_list = movie.get('cast')
                if (cast_list != None):
                    for cast_member in cast_list:

                        #Try to get the name of the character
                        character_name = ""
                        if (isinstance(cast_member.currentRole, list)):
                            character_name = ','.join(
                                [x['name'] for x in cast_member.currentRole])
                        else:
                            try:
                                character_name = cast_member.currentRole[
                                    'name']
                            except:
                                character_name = "Unknown"

                        #first check if the person exists
                        imdb_id = cast_member.personID
                        person_df = database_helper.select_query(
                            "people", {'imdbId': imdb_id})
                        if (person_df.empty):
                            database_helper.insert_data(
                                "people", {
                                    "imdbId": imdb_id,
                                    "fullName": cast_member["name"]
                                })

                        #add movie director link
                        database_helper.insert_data(
                            "actors", {
                                "p_imdbId": imdb_id,
                                "m_imdbId": row['imdbId'],
                                "role": character_name
                            })

            pbar.update(1)
Example #7
0
def get_metaData():
    """
    Function which uses imdbId to retreive metadata from IMDb for each movie
    """
    #get all movies from db
    movies_df = movie_helper.get_movies_df()

    #get movie meta data
    with tqdm(total=len(movies_df)) as pbar:
        for index, row in movies_df.iterrows():

            #if an imdbid exists use it to look up the API
            if (row['imdbId']):

                #get base meta data from imdb
                movie = ia.get_movie(str(row['imdbId']))
                year = movie['year']

                #created delimited list of genre strings
                if (movie.get('genres')):
                    genres = ','.join(movie.get('genres'))

                rating = movie.get('rating')
                votes = movie.get('votes')

                #create delimited list of movie certificates
                certificates = None
                if (movie.get('certificates')):
                    certificates = ','.join(movie.get('certificates'))

                #update database with collected meta data
                update_params = {
                    "year": year,
                    "genres": genres,
                    "rating": rating,
                    "votes": votes,
                    "certificates": certificates
                }
                select_params = {"movieId": row["movieId"]}
                database_helper.update_data("movies",
                                            update_params=update_params,
                                            select_params=select_params)

            pbar.update(1)
Example #8
0
def get_imdbIds():
    """
    Function which uses the movie title from BFI to get the imdb id from IMDb api
    """

    #get all movies from db
    movies_df = movie_helper.get_movies_df()

    for index, row in movies_df.iterrows():

        #use the api to search imdb for films with the the title
        search_results = ia.search_movie(row['title'])

        #only interested in movie objects
        movie_results = list(
            filter(lambda x: x.get('kind') == 'movie', search_results))

        if (len(movie_results) > 0):
            #take the first results by default
            movie = movie_results[0]

            #if there is more than one then get most recent?
            if (len(movie_results) > 1):
                #flag issue to console so movie can be manually checked
                print("Check: ", row['title'])

                #try to get the one from 2019
                year_results = list(
                    filter(lambda x: x.get('year') == 2019, movie_results))
                if (len(year_results) > 0):
                    movie = year_results[0]

            #extract imdb url and id using API
            movie_url = ia.get_imdbURL(movie)
            movie_id = ia.get_imdbID(movie)

            #update database
            database_helper.update_data(
                "movies",
                update_params={
                    "imdbId": movie_id,
                    "url": movie_url
                },
                select_params={"movieId", row["movieId"]})
Example #9
0
def get_mojo_box_office():
    """
    Function which uses imdb id to scrape movie weekend box office data from BoxOfficeMojo
    """

    #get movies from db
    movies_df = movie_helper.get_movies_df()

    with tqdm(total=len(movies_df)) as pbar:
        for index, row in movies_df.iterrows():
            #get df of box office info for each weekend
            weekend_df = mojo_helper.get_uk_box_office_df(row['imdbId'])
            weekend_df["movieId"] = row["movieId"]

            #insert into the database
            database_helper.bulk_insert_df("weekend_box_office_mojo",
                                           weekend_df,
                                           weekend_df.columns.values.tolist())
            pbar.update(1)
Example #10
0
def gen_bottom_20_tweet_count():
    """
    Function to plot the bottom 20 movies by tweet count 
    """

    #get movies from db and count tweets
    movies_df = movie_helper.get_movies_df()
    movies_df["tweet_count"] = movies_df.apply(
        lambda row: movie_helper.count_tweets(row['movieId'])['count'], axis=1)

    #sort values and take bottom 20
    movies_df = movies_df.sort_values(by='tweet_count').head(20)

    #do bar plot
    plt.barh(movies_df["title"], movies_df["tweet_count"], color='green')
    plt.ylabel('Movie Title')
    plt.xlabel('Tweet Count')
    plt.title('Bottom 20 Movies')
    plt.show()
Example #11
0
def get_writers():
    """
    Function which uses imdb id to get list of writers
    """

    #get all movies from db
    movies_df = movie_helper.get_movies_df()

    with tqdm(total=len(movies_df)) as pbar:
        for index, row in movies_df.iterrows():

            #if imdbid exists user it to look up the API
            if (row['imdbId']):
                movie = ia.get_movie(str(row['imdbId']))

                #get list of writers
                writers = movie.get('writer')
                if (writers != None):
                    for writer in writers:
                        #first check if the person exists
                        imdb_id = writer.personID
                        person_df = database_helper.select_query(
                            "people", {'imdbId': imdb_id})
                        if (person_df.empty):
                            database_helper.insert_data(
                                "people", {
                                    "imdbId": imdb_id,
                                    "fullName": writer["name"]
                                })

                        #add movie director link
                        database_helper.insert_data("writers", {
                            "p_imdbId": imdb_id,
                            "m_imdbId": row['imdbId']
                        })

            pbar.update(1)
Example #12
0
from movie import Movie
import database_helper
import movie_helper
import tweet_helper
import osmnx as ox
import geopandas as gpd
from geopandas.tools import sjoin
import seaborn as sns
import matplotlib.dates as mdates
import numpy as np
from colour import Color
import scipy.signal
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from nameparser import HumanName

movies_df = movie_helper.get_movies_df()


def gen_top_20_tweet_count():
    """
    Function to plot the top 20 movies by tweet count    
    """

    #get movies from db and count tweets
    movies_df = movie_helper.get_movies_df()
    movies_df["tweet_count"] = movies_df.apply(
        lambda row: movie_helper.count_tweets(row['movieId'])['count'], axis=1)

    #sort by tweet count and take top 20
    movies_df = movies_df.sort_values(by='tweet_count',
                                      ascending=False).head(20)