Python select_query Exemples, database_helper.select_query Python Exemples

Exemple #1

0

Afficher le fichier

def get_trailer_metadata():
    """
    Function which uses youtubeId to collect trailer metadata
    """

    #get all trailers from the database
    trailers_df = database_helper.select_query("trailers")

    with tqdm(total=len(trailers_df)) as pbar:
        for index, row in trailers_df.iterrows():

            #use the youtube id to make an api request for video meta data
            trailer_data = yt.get_video_metadata(row['youtubeId'])

            #update the db with collected meta data
            update_params = {
                'title': trailer_data['video_title'],
                'channelTitle': trailer_data['channel_title'],
                'channelId': trailer_data['channel_id'],
                'categoryId': trailer_data['video_category'],
                'commentCount': trailer_data['video_comment_count'],
                'description': trailer_data['video_description'],
                'likeCount': trailer_data['video_like_count'],
                'dislikeCount': trailer_data['video_dislike_count'],
                'viewCount': trailer_data['video_view_count'],
                'publishDate': trailer_data['video_publish_date'],
                'tags': trailer_data['video_tags']
            }
            select_params = {"youtubeId": row["youtubeId"]}
            database_helper.update_data("trailers",
                                        update_params=update_params,
                                        select_params=select_params)
            pbar.update(1)

Exemple #2

0

Afficher le fichier

def get_youtube_trailers():
    """
    Attempt to collect movie trailers from YouTube (does not work due to API limits)
    """

    movies_df = database_helper.select_query("movies")
    with tqdm(total=len(movies_df)) as pbar:
        for index, row in movies_df.iterrows():
            title = re.sub(r"\s*\(.*\)\s*", "", row["title"])
            title = re.sub(r'[^\w\s]', '', title)
            print(title)
            # if (row['distributor']):
            #     try:
            #         yt_search = yt.search(q = title + " trailer", max_results=10, parser=None)
            #         distributor_trailers = list(filter(lambda x : row['distributor'].lower() in x['snippet']['channelTitle'].lower(), yt_search))

            #         if (len(distributor_trailers) > 0):
            #             #add trailers to db
            #             for trailer in distributor_trailers:
            #                 database_helper.insert_data("trailers", {"movieId" : row["movieId"], "youtubeId" : trailer['id']['videoId']})
            #         else:
            #             print("Couldnt find trailer for " + row["title"])

            #     except Exception as error:
            #         print(error)
            pbar.update(1)

Exemple #3

0

Afficher le fichier

 def __init__(self, db_row):
     """
     Director box office class constructor
     
     :param db_row: pandas series object corresponding to row from which object should be built
     """
     self.movie_imdbId = db_row.m_imdbId
     person_df = database_helper.select_query("people", { "imdbId" : db_row.p_imdbId })
     Person.__init__(self, person_df.iloc[0])

Exemple #4

0

Afficher le fichier

def get_actors():
    """Function which uses imdb to collect movie actors"""

    #get all movies from db
    movies_df = movie_helper.get_movies_df()

    with tqdm(total=len(movies_df)) as pbar:
        for index, row in movies_df.iterrows():

            #if imdbid exists user it to look up the API
            if (row['imdbId']):
                movie = ia.get_movie(str(row['imdbId']))

                #get list of cast
                cast_list = movie.get('cast')
                if (cast_list != None):
                    for cast_member in cast_list:

                        #Try to get the name of the character
                        character_name = ""
                        if (isinstance(cast_member.currentRole, list)):
                            character_name = ','.join(
                                [x['name'] for x in cast_member.currentRole])
                        else:
                            try:
                                character_name = cast_member.currentRole[
                                    'name']
                            except:
                                character_name = "Unknown"

                        #first check if the person exists
                        imdb_id = cast_member.personID
                        person_df = database_helper.select_query(
                            "people", {'imdbId': imdb_id})
                        if (person_df.empty):
                            database_helper.insert_data(
                                "people", {
                                    "imdbId": imdb_id,
                                    "fullName": cast_member["name"]
                                })

                        #add movie director link
                        database_helper.insert_data(
                            "actors", {
                                "p_imdbId": imdb_id,
                                "m_imdbId": row['imdbId'],
                                "role": character_name
                            })

            pbar.update(1)

Exemple #5

0

Afficher le fichier

Fichier : test.py Projet : spatial-intelligence/MscProj

def check_synopsis():
    """Check the database to make sure synopsis have been collected for every movie."""

    movies_df = database_helper.select_query("movies", {"enabled": '1'})
    movies = []
    with tqdm(total=len(movies_df)) as pbar:
        for index, row in movies_df.iterrows():
            movie = Movie(row)
            movies.append(movie)

            #if there is no synopsis print movie to command line
            if (movie.synopsis == ''):
                print(movie.title + " (" + movie.imdbId + ") no synopsis")
            pbar.update(1)

Exemple #6

0

Afficher le fichier

 def __init__(self, db_row):
     """
     Actor box office class constructor
     
     :param db_row: pandas series object corresponding to row from which object should be built
     """
     
     self.actorId = db_row.id
     self.movie_imdbId = db_row.m_imdbId
     self.role = db_row.role
     self.credited = not db_row.notes == '(uncredited)'
     #get person entry 
     person_df = database_helper.select_query("people", { "imdbId" : db_row.p_imdbId })
     Person.__init__(self, person_df.iloc[0])

Exemple #7

0

Afficher le fichier

def get_hashtags_from_trailers():
    """Function to extract the movie hashtags from trailer descriptions"""

    #get all the trailers from the db
    trailers_df = database_helper.select_query("trailers")

    with tqdm(total=len(trailers_df)) as pbar:
        for index, row in trailers_df.iterrows():

            #extract hashtags from the description and print to the console for inspection
            if ('#' in row.description):
                hashtags = re.findall(r"#(\w+)", row.description)
                print(row.title)
                print(hashtags)
            pbar.update(1)

Exemple #8

0

Afficher le fichier

def get_trailer_release_dates():
    """Function to specifically update the trailer release dates which could not be retreived by get_trailer_metadata()"""

    #get all trailers from the db
    trailers_df = database_helper.select_query("trailers")

    with tqdm(total=len(trailers_df)) as pbar:
        for index, row in trailers_df.iterrows():

            #use customized api request to correctly retreive the release dates of the trailers
            trailer_date = youtube_helper.get_trailer_release(
                row['youtubeId'], yt)

            #update the database
            update_params = {'publishDate': trailer_date}
            select_params = {"youtubeId": row["youtubeId"]}

            database_helper.update_data("trailers",
                                        update_params=update_params,
                                        select_params=select_params)
            pbar.update(1)

Exemple #9

0

Afficher le fichier

def add_box_office():
    """Function which adds the weekend box office data from the BFI into the db"""

    #get full film set
    film_df = bfi_helper.get_raw_data()
    film_df_sub = film_df[['Film', 'Country of Origin',
                           'Distributor']].drop_duplicates()
    film_df_unq = film_df.drop_duplicates()

    with tqdm(total=len(film_df)) as pbar:
        for index, row in film_df.iterrows():

            #get the movie id and use it to insert weekend data into the db
            movie_df = database_helper.select_query("movies",
                                                    {"title": row['Film']})
            movie_id = int(movie_df.iloc[0]['movieId'])

            percentage_change = None
            try:
                percentage_change = float(row['% change on last week'])
            except ValueError:
                percentage_change = None

            insert_params = {
                "movieId": movie_id,
                "weeksOnRelease": row['Weeks on release'],
                "noOfcinemas": row['Number of cinemas'],
                "weekendGross": row['Weekend Gross'],
                "percentageChange": percentage_change,
                "siteAverage": row['Site average'],
                "grossToDate": row['Total Gross to date'],
                "weekendStart": row['weekendStart'],
                "weekendEnd": row['weekendEnd'],
                "rank": row['Rank']
            }
            database_helper.insert_data("weekend_box_office", insert_params)

            pbar.update(1)

Exemple #10

0

Afficher le fichier

def get_writers():
    """
    Function which uses imdb id to get list of writers
    """

    #get all movies from db
    movies_df = movie_helper.get_movies_df()

    with tqdm(total=len(movies_df)) as pbar:
        for index, row in movies_df.iterrows():

            #if imdbid exists user it to look up the API
            if (row['imdbId']):
                movie = ia.get_movie(str(row['imdbId']))

                #get list of writers
                writers = movie.get('writer')
                if (writers != None):
                    for writer in writers:
                        #first check if the person exists
                        imdb_id = writer.personID
                        person_df = database_helper.select_query(
                            "people", {'imdbId': imdb_id})
                        if (person_df.empty):
                            database_helper.insert_data(
                                "people", {
                                    "imdbId": imdb_id,
                                    "fullName": writer["name"]
                                })

                        #add movie director link
                        database_helper.insert_data("writers", {
                            "p_imdbId": imdb_id,
                            "m_imdbId": row['imdbId']
                        })

            pbar.update(1)

Exemple #11

0

Afficher le fichier

def add_movies_to_db():
    """Function which creates a unique list of movies from BFI data and inserts into DB"""

    #get full film set
    film_df = bfi_helper.get_raw_data()
    film_df_sub = film_df[['Film', 'Country of Origin',
                           'Distributor']].drop_duplicates()
    film_df_unq = film_df.drop_duplicates()

    with tqdm(total=len(film_df_unq)) as pbar:
        for index, row in film_df_unq.iterrows():

            #check that the movie has not been added yet
            existing = database_helper.select_query("movies",
                                                    {"title": row["Film"]})
            if (existing.empty):
                #insert into db
                database_helper.insert_data(
                    "movies", {
                        "title": row['Film'],
                        "distributor": row['Distributor'],
                        "country": row['Country of Origin']
                    })
            pbar.update(1)

Exemple #12

0

Afficher le fichier

Fichier : spatial.py Projet : spatial-intelligence/MscProj

def plot_chi_sqrd_surface(movieId=0,
                          normalize_by="All",
                          start_date=None,
                          end_date=None,
                          critical_period=False):
    """
    Function for generating expectation maps
    
    :param movieId: integer movieId for creating expecation score for movie tweets
    :param normalize_by: string val indicating if scores should be normalized by all tweets, or movie tweets
    :param start_date: datetime of start date for filtering tweets
    :param end_date: datetime of end_date for filtering tweets
    :param critical_period: bool indicating if movie tweets should only be counted over critical period
    """

    #fix dates so we include start of start and end of end
    if not start_date == None:
        start_date = datetime.combine(start_date.date(), datetime.min.time())

    if not end_date == None:
        end_date = datetime.combine(end_date.date(), datetime.max.time())

    #use gb regions for normalizing and for plotting
    #Ordanance survey data contained only the shape files for GB so need to normalize populations by this rather than fishnet which also uses NI
    gb_regions = database_helper.get_geo_data("select * from uk_regions",
                                              "geombng")
    gb_regions_count = database_helper.select_region_tweets(
        start_date=start_date, end_date=end_date)

    #check if we are using the entire population of tweets or just the movie populaiton
    if normalize_by == "Movies":
        #normalize by movie tweets
        gb_regions_count = database_helper.select_movie_region_tweets(
            start_date=start_date, end_date=end_date)
        gb_regions_count = gb_regions_count.drop(columns=['movieid'])
        gb_regions_count = gb_regions_count.groupby(
            by="cellid").size().reset_index(name="tweet_count")

    total_gb_tweets = gb_regions_count["tweet_count"].sum()

    #first step get total tweets in uk fishnet
    uk_fishnet_count = database_helper.select_fishnet_count(
        start_date=start_date, end_date=end_date)

    #now get total movie tweets in uk fishnet
    movie_fishnet_tweets = database_helper.select_movie_fishnet_tweets(
        movieId, start_date=start_date, end_date=end_date)

    #now get movie tweets per cell
    movie_cell_tweets = movie_fishnet_tweets.groupby(
        by="cellid").size().reset_index(name="movie_tweets")

    #now group with total fishnet counts
    fishnet_movie_comb = uk_fishnet_count.merge(movie_cell_tweets,
                                                how='left',
                                                on='cellid')

    #attach results to geodataframe so it can be plotted
    uk_fishnet = database_helper.get_geo_data("select * from uk_fishnet",
                                              "geombng")
    uk_fishnet = uk_fishnet.rename(columns={"id": "cellid"})

    uk_fishnet = uk_fishnet.merge(fishnet_movie_comb, how='left', on='cellid')

    #replace na with 0
    uk_fishnet = uk_fishnet.fillna(0)

    ##get total gb tweets for movie
    gb_movie_fishnet = sjoin(gb_regions, uk_fishnet, how='inner')
    gb_movie_fishnet = gb_movie_fishnet[[
        "cellid", "movie_tweets"
    ]].drop_duplicates().reset_index(drop=True)
    gb_movie_total = gb_movie_fishnet["movie_tweets"].sum()

    #do expecation calculation
    uk_fishnet['surf_expectation'] = uk_fishnet.apply(
        lambda row: calc_surface_expectation(total_gb_tweets, gb_movie_total,
                                             row["tweet_count"], row[
                                                 "movie_tweets"]),
        axis=1)

    #replace na with 0 (not all fishnet cells have tweets)
    uk_fishnet = uk_fishnet.fillna(0)

    #get cell colors
    uk_fishnet["color"] = uk_fishnet.apply(
        lambda row: get_cell_color(row["surf_expectation"]), axis=1)
    uk_fishnet["label"] = uk_fishnet.apply(
        lambda row: get_cell_label(row["surf_expectation"]), axis=1)

    #return uk_fishnet

    #now do plots
    fig, ax = plt.subplots(1, figsize=(9, 9))

    #this takes time, may be useful to create the overlay and store in db then use pandas join/merge to input expectation
    overlay = gpd.overlay(gb_regions, uk_fishnet, how='intersection')
    map_ax = overlay.plot(color=overlay['color'], ax=ax)

    title = "Movie Tweets Expectation Map"

    #get movie
    if movieId > 0:
        movies_df = database_helper.select_query("movies",
                                                 {"movieId": movieId})
        title = movies_df.iloc[0]["title"] + " Tweet Expecation"

    if critical_period:
        title = "{0} (Critical Period)".format(title)
    elif (start_date != None) and (end_date != None):
        title = "{0} ({1} - {2})".format(title, start_date.date(),
                                         end_date.date())

    ax.set_axis_off()
    #plt.axis('equal')

    legend_elements = [
        Line2D([0], [0],
               marker='s',
               color='red',
               label='Above Expected',
               markerfacecolor='red',
               markersize=15),
        Line2D([0], [0],
               marker='s',
               color='white',
               label='At Expected',
               markerfacecolor='white',
               markersize=15),
        Line2D([0], [0],
               marker='s',
               color='blue',
               label='Below Expected',
               markerfacecolor='blue',
               markersize=15)
    ]

    ax.legend(handles=legend_elements, loc="upper left")
    plt.title(title)
    plt.show()
    plt.clf()
    plt.cla()
    plt.close()

    return overlay

Exemple #13

0

Afficher le fichier

Fichier : spatial.py Projet : spatial-intelligence/MscProj

def get_most_popular_movie_per_region(start_date=None,
                                      end_date=None,
                                      senti_class=None,
                                      ignore_list=[28, 121],
                                      senti_percentage=False,
                                      critical_period=False):
    """
    Function to get the most popular move per region by tweet count
    
    :param start_date: datetime of start date for filtering tweets
    :param end_date: datetime of end_date for filtering tweets
    :param senti_class: string to filter tweets by sentiment
    :param ignore_list: integer list of movie ids to ignore
    :param senti_percentage: bool indicating favourites should be based on sentiment percentage
    :param critical_period: bool indicating if tweets should be filtered to crticial period
    
    :return dataframe of regions and their favourite movies
    """

    #get all regional tweets according to date and sentiment filters
    region_movie_tweets = database_helper.select_movie_region_tweets(
        start_date=start_date, end_date=end_date, senti_class=senti_class)

    #check if we need to filter by the crticial period
    if critical_period:
        movies_df = movie_helper.get_movies_df()
        small_movies_df = movies_df[[
            "movieId", "critical_start", "critical_end"
        ]]
        small_movies_df = small_movies_df.rename(
            columns={"movieId": "movieid"})
        region_movie_tweets = region_movie_tweets.merge(small_movies_df,
                                                        how="left",
                                                        on="movieid")
        region_movie_tweets = region_movie_tweets[
            (region_movie_tweets["created_at"] >=
             region_movie_tweets["critical_start"])
            & (region_movie_tweets["created_at"] <=
               region_movie_tweets["critical_end"])]

    #group tweets by region and movie
    region_movie_grouped = region_movie_tweets.groupby(
        by=["unit_id", "movieid"]).size().reset_index(name="tweet_count")

    #check if we should use sentiment percentage (i.e film with highest percentage of positive tweets)
    group_col = "tweet_count"
    if (senti_percentage) and (not senti_class == None):
        #calculate sentiment tweets as percentage
        region_movie_all = database_helper.select_movie_region_tweets(
            start_date=start_date, end_date=end_date)

        if critical_period:
            movies_df = movie_helper.get_movies_df()
            small_movies_df = movies_df[[
                "movieId", "critical_start", "critical_end"
            ]]
            small_movies_df = small_movies_df.rename(
                columns={"movieId": "movieid"})
            region_movie_all = region_movie_all.merge(small_movies_df,
                                                      how="left",
                                                      on="movieid")
            region_movie_all = region_movie_all[
                (region_movie_all["created_at"] >=
                 region_movie_all["critical_start"])
                & (region_movie_all["created_at"] <=
                   region_movie_all["critical_end"])]

        region_movie_all_grouped = region_movie_all.groupby(
            by=["unit_id", "movieid"]).size().reset_index(
                name="tweet_count_all")

        #use threshold of 20 tweets per region?
        region_movie_all_grouped = region_movie_all_grouped[
            region_movie_all_grouped["tweet_count_all"] >= 20]

        region_movie_grouped = region_movie_grouped.merge(
            region_movie_all_grouped, how="left", on=["unit_id", "movieid"])

        region_movie_grouped["senti_percentage"] = (
            region_movie_grouped["tweet_count"] /
            region_movie_grouped["tweet_count_all"]) * 100
        group_col = "senti_percentage"

    #remove ignored movies from list
    if len(ignore_list) > 0:
        region_movie_grouped = region_movie_grouped[
            ~region_movie_grouped["movieid"].isin(ignore_list)]

    #get the movies with the highest count per region
    most_popular_per_region = region_movie_grouped.loc[
        region_movie_grouped.groupby(['unit_id'])[group_col].idxmax()]

    #this is slow but really helps with generating the figures
    #attach movie ttitles to results
    movies_df = movie_helper.get_movies_df()
    movie_titles = movies_df[["movieId", "title"]]

    #attach region names
    gb_regions = database_helper.select_query("tweets_region_count")
    gb_regions = gb_regions[["unit_id", "region"]]

    most_popular_per_region = most_popular_per_region.merge(gb_regions,
                                                            how="left",
                                                            on="unit_id")
    most_popular_per_region = most_popular_per_region.merge(
        movie_titles, how="left", left_on="movieid",
        right_on="movieId").drop(columns="movieId")

    return most_popular_per_region

Exemple #14

0

Afficher le fichier

Fichier : spatial.py Projet : spatial-intelligence/MscProj

def plot_region_tweets_bar(movieId=0,
                           normalize=False,
                           start_date=None,
                           end_date=None,
                           critical_period=True):
    """
    Function for generating bar plot of regional movie tweets
    
    :param movieId: integer movieId for creating expecation score for movie tweets
    :param normalize: bool indicating if tweet counts should be normalized
    :param start_date: datetime of start date for filtering tweets
    :param end_date: datetime of end_date for filtering tweets
    :param critical_period: bool indicating if tweets should be filtered to crticial period
    """

    #select movie tweets with region cell id and unit id attached
    region_movie_tweets = database_helper.select_movie_region_tweets(
        movieId, start_date=start_date, end_date=end_date)

    #group by region unit_id to per region tweet count
    tweet_freq = region_movie_tweets.drop(columns=['movieid'])
    tweet_freq = region_movie_tweets.groupby(by="unit_id").size().reset_index(
        name="movie_tweet_count")

    plot_col = "movie_tweet_count"

    title = "Regional Movie Tweets"
    ylabel = "Movie Tweet"
    movie_title = ""
    if movieId > 0:
        movies_df = database_helper.select_query("movies",
                                                 {"movieId": movieId})
        movie_title = movies_df.iloc[0]["title"]
        title = movie_title + " Tweets"

    #if normalize generate column (movie tweets per million tweets)
    tweet_region_counts = database_helper.select_query("tweets_region_count")

    if normalize:
        tweet_freq = tweet_region_counts.merge(tweet_freq,
                                               on="unit_id",
                                               how="left")

        #fill na with 0
        tweet_freq = tweet_freq.fillna(0)
        tweet_freq["norm_count"] = (tweet_freq['movie_tweet_count'] /
                                    tweet_freq['tweet_count']) * 1000000
        plot_col = "norm_count"

        title = "Regional Movie Tweets (per million tweets)"
        if movieId > 0:
            title = movie_title + " Tweets (per million tweets)"
        ylabel = "Movie Tweets (per million tweets)"
    else:
        regions = tweet_region_counts[["unit_id", "region"]]
        tweet_freq = tweet_freq.merge(regions, on="unit_id", how="left")

    #check if we need to filter by the critical period
    if critical_period:
        title = "{0} (Critical Period)".format(title)
    elif (start_date != None) and (end_date != None):
        title = "{0} ({1} - {2})".format(title, start_date.date(),
                                         end_date.date())

    #create bar plot
    ax = sns.barplot(x="region", y=plot_col, data=tweet_freq)
    ax.set(xlabel='Region', ylabel=ylabel)
    plt.title(title)
    plt.xticks(rotation=90)
    plt.show()

    return tweet_freq

Exemple #15

0

Afficher le fichier

Fichier : spatial.py Projet : spatial-intelligence/MscProj

def plot_movie_tweets_map(movieId=0,
                          normalize=False,
                          start_date=None,
                          end_date=None,
                          critical_period=False):
    """
    Function for generating heatmap of movie tweets
    
    :param movieId: integer movieId for creating expecation score for movie tweets
    :param start_date: datetime of start date for filtering tweets
    :param end_date: datetime of end_date for filtering tweets
    :param critical_period: bool indicating if tweets should be filtered to crticial period
    """

    #select movie tweets with region cell id and unit id attached
    region_movie_tweets = database_helper.select_movie_region_tweets(
        movieId, start_date=start_date, end_date=end_date)

    #group by region unit_id to per region tweet count
    tweet_freq = region_movie_tweets.drop(columns=['movieid'])
    tweet_freq = region_movie_tweets.groupby(by="unit_id").size().reset_index(
        name="movie_tweet_count")

    map_col = "movie_tweet_count"

    title = "Regional Movie Tweets"
    movie_title = ""
    if movieId > 0:
        movies_df = database_helper.select_query("movies",
                                                 {"movieId": movieId})
        movie_title = movies_df.iloc[0]["title"]
        title = movie_title + " Tweets"

    #if normalize generate column (movie tweets per million tweets)
    if normalize:
        tweet_region_counts = database_helper.select_query(
            "tweets_region_count")
        tweet_freq = tweet_region_counts.merge(tweet_freq,
                                               on="unit_id",
                                               how="left")

        #fill na with 0
        tweet_freq = tweet_freq.fillna(0)
        tweet_freq["norm_count"] = (tweet_freq['movie_tweet_count'] /
                                    tweet_freq['tweet_count']) * 1000000
        map_col = "norm_count"

        #title = "Regional Movie Tweets (per million tweets)"
        #if movieId > 0:
        #title = movie_title + " Tweets (per million tweets)"

    #check if we need to filter by critical period
    if critical_period:
        title = "{0} (Critical Period)".format(title)
    elif (start_date != None) and (end_date != None):
        title = "{0} ({1} - {2})".format(title, start_date.date(),
                                         end_date.date())

    #merge with shape file
    gb = gpd.read_file("../../ProjectData/Data/GB/european_region_region.shp")
    map_freq = gb.merge(tweet_freq, left_on='UNIT_ID', right_on='unit_id')

    #plot
    fig, ax = plt.subplots(1, 1, figsize=(11, 9))
    ax.axis('off')
    ax.set_title(title)
    fig.set_dpi(100)
    map_freq.plot(column=map_col, ax=ax, legend=True, cmap='OrRd')
    plt.show()

    return map_freq

Exemple #16

0

Afficher le fichier

Fichier : get_movie_comments.py Projet : spatial-intelligence/MscProj

"""
from tqdm import tqdm
import pandas as pd
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
import time

import sys
sys.path.insert(1, '/home/andy/Documents/MscProject/MscProj/Utils')

import database_helper
from youtube_helper import YouTubeHelper

yt = YouTubeHelper().yt
trailers_df = database_helper.select_query("trailers")

#filter out this list of selected trailers
filter_ids = [
    95, 103, 81, 93, 89, 36, 239, 71, 30, 41, 14, 80, 70, 350, 59, 65, 64, 110,
    124, 368, 372, 123
]
filtered_trailers = trailers_df[~trailers_df.id.isin(filter_ids)]


def custom_parser(json):
    snippet = json['snippet']['topLevelComment']['snippet']

    comment = {
        'commentId': json['id'],
        'channelUrl': '',

Exemple #17

0

Afficher le fichier

Fichier : movie_cleanup.py Projet : spatial-intelligence/MscProj

"""

import imdb
from tqdm import tqdm
import pandas as pd
import sys
# insert at 1, 0 is the script path (or '' in REPL)
sys.path.insert(1, '/home/andy/Documents/MscProject/MscProj/Utils')

import database_helper

#initialize imdb
ia = imdb.IMDb()

#greta
greta = database_helper.select_query("movies", {"movieId": 234})
greta = greta.iloc[0]

greta_res = ia.get_movie('2639336')
year = greta_res['year']
if (greta_res.get('genres')):
    genres = ','.join(greta_res.get('genres'))
rating = greta_res.get('rating')
votes = greta_res.get('votes')
certificates = None
if (greta_res.get('certificates')):
    certificates = ','.join(greta_res.get('certificates'))

#update database
update_params = {
    "imdbId": '2639336',