def review_extracter(movie): """input movie name, outputs txt file with reviews on the front page of imdb""" ### extracts content from movie page ### imdb = Imdb() url = imdb.search_for_title(movie)[0] reviews = imdb.get_title_user_reviews(url["imdb_id"]) ### creates list of all reviews present on the front page### imdb = Imdb() url = imdb.search_for_title(movie)[0] reviews = imdb.get_title_user_reviews(url["imdb_id"]) ### creates txt file for all reviews present on the front page### review_list = [] for i in range(len(reviews["reviews"])): line = reviews["reviews"][i]["reviewText"] review_list.append(line) ###pickles list### with open(f"{movie}_imdb.pickle", "wb") as f: pickle.dump(review_list, f)
def main(): imdb = Imdb() movie = str(input('Movie Name: ')) movie_search = '+'.join(movie.split()) # print(imdb.search_for_name("Christian Bale")) movie_dict = imdb.search_for_title(movie_search) help_print_movie(movie_dict) imdb_id = str(input('IMBD ID: ')) review_dict = imdb.get_title_user_reviews(imdb_id) review_list = review_dict['reviews'] help_print_review(review_list)
def average_reviewscore(title): """ Accepts a movie title from user as a string. Calls the imdbpie API and iterates through each user review left for the specified title. Uses sentiment analysis and prints the average compound score of all reviews left for the particular title. """ imdb = Imdb() id = imdb.search_for_title(title)[0]['imdb_id'] reviews = imdb.get_title_user_reviews(id) numberofreviews = len(reviews['reviews']) compound_scores = [] for i in range(numberofreviews): review = reviews['reviews'][i]['reviewText'] score = SentimentIntensityAnalyzer().polarity_scores(review) compound_scores.append(score['compound']) numerator = 0 denominator = len(compound_scores) for i in range(denominator): numerator += compound_scores[i] average = numerator / denominator print(average)
import csv from imdbpie import Imdb imdb = Imdb() import openpyxl import pandas as pd review = [] movie = [] path = 'H:\IFS\IMDB\\test.xlsx' # Excel sheet containing the name of the movies path1 = 'H:\IFS\IMDB\\test1.xlsx' # Excel sheet containing the result from the IMDB which contain the user review for each movie df = pd.read_excel(path, sheetname='Sheet1') for row in df['Movies']: try: movie.append(row) Id = imdb.search_for_title(row)[0]['imdb_id'] review.append(imdb.get_title_user_reviews(Id)['totalReviews']) except IndexError: review.append("INVALID") df = pd.DataFrame({'Movies': movie, 'Review': review}) writer = pd.ExcelWriter(path1, engine='xlsxwriter') df.to_excel(writer, sheet_name='Sheet1') writer.save()
from imdbpie import Imdb import pprint from nltk.sentiment.vader import SentimentIntensityAnalyzer import nltk nltk.download('vader_lexicon') imdb = Imdb() endgame = imdb.get_title_user_reviews("tt4154796") avatar = imdb.get_title_user_reviews("tt0499549") titanic = imdb.get_title_user_reviews("tt0120338") starwars = imdb.get_title_user_reviews("tt2488496") infinity = imdb.get_title_user_reviews("tt4154756") jurassic = imdb.get_title_user_reviews("tt0369610") lionking = imdb.get_title_user_reviews("tt6105098") avengers = imdb.get_title_user_reviews("tt0848228") furious = imdb.get_title_user_reviews("tt2820852") frozen = imdb.get_title_user_reviews("tt4520988") movies = [ endgame, avatar, titanic, starwars, infinity, jurassic, lionking, avengers, furious, frozen ] movies_string = [ "endgame", "avatar", "titanic", "starwars", "infinity", "jurassic", "lionking", "avengers", "furious", "frozen" ] # pprint.pprint(avengers) # print(avengers_reviews['reviews'][1]['author']['displayName']) # print(endgame['reviews'][1]['reviewText']) # print(len(avengers_reviews))
from imdbpie import Imdb import random imdb = Imdb() parasite = (imdb.search_for_title("Parasite")[0]) reviews = imdb.get_title_user_reviews("tt6751668") joker = (imdb.search_for_title("Joker")[0]) review2 = imdb.get_title_user_reviews("tt7286456") unplanned = (imdb.search_for_title("Unplanned")[0]) review3 = imdb.get_title_user_reviews("tt9024106") thegodfather = (imdb.search_for_title("The Godfather")[0]) reviews3 = imdb.get_title_user_reviews("tt0068646") disastermovie = (imdb.search_for_title("Disaster Movie")[0]) reviews2 = imdb.get_title_user_reviews("tt1213644") # print(reviews) # import pprint # pprint.pprint(reviews) # pprint.pprint(review2) # pprint.pprint(review3) # pprint.pprint(reviews3) # pprint.pprint(reviews2) # print(reviews['reviews'][0:]['author']['displayName']) # print(reviews['reviews'][0]['reviewText'])
from imdbpie import Imdb from collections import Counter from nltk import FreqDist imdb = Imdb() print(imdb.search_for_title("Inception")[0]) reviews = imdb.get_title_user_reviews("tt1375666") blackbook = [reviews['reviews'][0]['reviewText']] blackbook.append(reviews['reviews'][1]['reviewText']) blackbook.append(reviews['reviews'][2]['reviewText']) blackbook.append(reviews['reviews'][3]['reviewText']) blackbook.append(reviews['reviews'][4]['reviewText']) blackbook.append(reviews['reviews'][5]['reviewText']) blackbook.append(reviews['reviews'][6]['reviewText']) blackbook.append(reviews['reviews'][7]['reviewText']) blackbook.append(reviews['reviews'][8]['reviewText']) blackbook.append(reviews['reviews'][9]['reviewText']) blackbook.append(reviews['reviews'][10]['reviewText']) blackbook.append(reviews['reviews'][11]['reviewText']) blackbook.append(reviews['reviews'][12]['reviewText']) blackbook.append(reviews['reviews'][13]['reviewText']) blackbook.append(reviews['reviews'][14]['reviewText']) blackbook.append(reviews['reviews'][15]['reviewText']) blackbook.append(reviews['reviews'][16]['reviewText']) blackbook.append(reviews['reviews'][17]['reviewText']) blackbook.append(reviews['reviews'][18]['reviewText']) blackbook.append(reviews['reviews'][19]['reviewText']) blackbook.append(reviews['reviews'][20]['reviewText']) blackbook.append(reviews['reviews'][21]['reviewText']) blackbook.append(reviews['reviews'][22]['reviewText'])
if 'rating' in rat: ratings[title] = rat['rating'] print(ratings) with open('ratings.pickle','wb') as f: pickle.dump(ratings,f) print('ratings has been saved.') # create a dictionary that has a list of reviews for each of the titles that have reviews. userratings = {} for title in movienumbers: rat = imdb.get_title_user_reviews(title) # skip movies that don't have reviews if 'reviews' in rat: dummylist = [] for entry in rat['reviews']: dummylist.append(entry['reviewText']) userratings[title] = dummylist print(userratings) with open('userratings.pickle','wb') as f: pickle.dump(userratings,f) print('userratings has been saved.') # create a dictionary that has a list of reviews for each of the titles that have reviews.
import pickle imdb = Imdb() movie_names = [ "The Shawshank Redemption", "The Godfather", "Disaster Movie", "Saving Christmas", ] for movie in movie_names: """ This step fetches data from Imdb according to movie names in the list and store the movie datas as a pickle file with a standardized file name """ movie_title = imdb.search_for_title(movie)[0] movie_data = imdb.get_title_user_reviews(movie_title["imdb_id"]) review_data = movie_data["reviews"] save_pickle_name = movie.replace(" ", "_") + ".pickle" with open(save_pickle_name, "wb") as file_stream: """ This step opens a write stream then, write the moview review data to pickle file """ pickle.dump(review_data, file_stream) file_stream.close() print(f"{movie}'s review data has been saved to {save_pickle_name}")
# Sentiment Analysis import nltk # nltk.download('vader_lexicon') from nltk.sentiment.vader import SentimentIntensityAnalyzer from imdbpie import Imdb imdb = Imdb() # print all movies that contain the word "Joker" # print(imdb.search_for_title("Joker")) # Focus on "Joker" (2019), obtain "imdb_id" # print(imdb.search_for_title("Joker")[0]['imdb_id']) reviews = imdb.get_title_user_reviews("tt7286456") # print(reviews) # print(reviews['reviews']) # print(reviews['reviews'][0]['reviewText']) review1 = reviews['reviews'][0]['reviewText'] review2 = reviews['reviews'][1]['reviewText'] review3 = reviews['reviews'][2]['reviewText'] score1 = SentimentIntensityAnalyzer().polarity_scores(review1) score2 = SentimentIntensityAnalyzer().polarity_scores(review2) score3 = SentimentIntensityAnalyzer().polarity_scores(review3) # print(score1) def sentiment_analysis(d): '''
from nltk.sentiment.vader import SentimentIntensityAnalyzer from imdbpie import Imdb """import natural language processing package and Imdb package""" imdb = Imdb() print("please enter a movie title:") name = input() """User enters movie title""" movie_id = imdb.search_for_title(name)[0]['imdb_id'] reviews = imdb.get_title_user_reviews(movie_id) """function identifies movie id based on its title""" print(reviews['reviews'][0]['reviewText']) review_1 = reviews['reviews'][0]['reviewText'] score = SentimentIntensityAnalyzer().polarity_scores(review_1) """ feeds the written review from Imdb into the sentiment analyzer. provides output in form of a polarity score: pos. neg. neu. compound """ print(score)
import matplotlib.pyplot as plt import nltk import nltk.corpus from nltk.corpus import stopwords # print(stopwords.words('english')) # main Program movieName = "The Fault In Our Stars" author = [] # List to store all author names for a review wordCnt = [] # A List to store count of the reviewText imdb = Imdb() movieDict = imdb.search_for_title(movieName)[ 0] # closest match is the first index id = movieDict['imdb_id'] reviews = imdb.get_title_user_reviews(id) #pprint.pprint(reviews['reviews'][0]) # pickle allReviews = reviews['reviews'] # get the reviews into a dictionary def countWords(): for R in allReviews: # Loop to traverse through each Review #print (R['author']['displayName'],R['reviewText']) # to debug author.append(R['author'] ['displayName']) # author has display name and author ID wordCnt.append(len(R['reviewText'].split()))
class Movie: def __init__(self): self.imdb = Imdb() self.reviews = None self.chartdata = None self.director = None self.commentbasedrating = 0 self.title = None self.poster = None self.durationMin = None self.rating = 0 self.id = None self.summary = None self.outline = None self.cast = None self.directors = None def SetAfterInit(self, dict): self.dict = dict self.id = dict['base']['id'].split('/')[2] if 'base' in self.dict: if 'title' in self.dict['base']: self.title = self.dict['base']['title'] if 'runningTimeInMinutes' in self.dict['base']: self.durationMin = self.dict['base']['runningTimeInMinutes'] if 'image' in self.dict['base']: if 'url' in self.dict['base']['image']: self.poster = self.dict['base']['image']['url'] if 'ratings' in self.dict: if 'rating' in self.dict['ratings']: self.rating = float(self.dict['ratings']['rating']) if 'plot' in self.dict: if 'outline' in self.dict['plot']: if 'text' in self.dict['plot']['outline']: self.outline = self.dict['plot']['outline']['text'] if 'summaries' in self.dict['plot']: if len(self.dict['plot']['summaries']) > 0: self.summary = self.dict['plot']['summaries'][0]['text'] def SetAfterSearch(self, dict): self.id = dict['id'] self.title = dict['title'] self.poster = dict['poster'] self.durationMin = dict['durationMin'] self.rating = dict['rating'] self.outline = dict['outline'] self.summary = dict['summary'] def AnalyzeReviews(self): self.GetReviews() if self.reviews != None: self.SetCommentbasedrating() self.GenerateChartData() def GetReviews(self): reviews_temp_load = self.imdb.get_title_user_reviews(self.id) if 'reviews' in reviews_temp_load: reviews_temp = reviews_temp_load['reviews'] self.reviews = [] for review in reviews_temp: review_temp = UserReview(review['helpfulnessScore'], review['reviewText']) self.reviews.append(review_temp) def SetCommentbasedrating(self): scoreSum = 0 for review in self.reviews: scoreSum += review.commentbasedrating self.commentbasedrating = scoreSum / len(self.reviews) def GenerateChartData(self): cd = ChartData(self.reviews) cd.SetDataHelpfulness(self.reviews) cd.SetDataCommentbasedrating(self.reviews) self.chartdata = cd def SetCast(self): dict = self.imdb.get_title_credits(self.id) if 'credits' in dict: if 'cast' in dict['credits']: self.cast = dict['credits']['cast'] if 'director' in dict['credits']: self.directors = dict['credits']['director'] def GetJSONSearch(self): listReviews = [] if self.reviews != None: for review in self.reviews: listReviews.append(review.GetJSON()) cd = {} if self.chartdata != None: cd = self.chartdata.GetJSON() dict = { "id": self.id, "title": self.title, "poster": self.poster, "durationMin": self.durationMin, "rating": self.rating, "outline": self.outline, "summary": self.summary, "cast": self.cast, "directors": self.directors, "reviews": listReviews, "commentbasedrating": self.commentbasedrating, "chartdata": cd } return dict
# Sarah Zazyczny Text Mining from imdbpie import Imdb from nltk import * from nltk.sentiment.vader import SentimentIntensityAnalyzer imdb = Imdb() # print(imdb.search_for_title("Clueless")[0]) reviews = imdb.get_title_user_reviews("tt0112697") import pprint # pprint.pprint(reviews) # scores for reviewA, reviewB, reviewC reviewA = (reviews['reviews'][0]['reviewText']) Ascore = SentimentIntensityAnalyzer().polarity_scores(reviewA) reviewB = (reviews['reviews'][1]['reviewText']) Bscore = SentimentIntensityAnalyzer().polarity_scores(reviewB) reviewC = (reviews['reviews'][2]['reviewText']) Cscore = SentimentIntensityAnalyzer().polarity_scores(reviewC) # test any review within by changing the [0] reviewtest = (reviews['reviews'][0]['reviewText']) scoretest = SentimentIntensityAnalyzer().polarity_scores(reviewtest) # SIMPLIFIED WITH FUNCTION # function to print movie review sentiment score def review_sentiment_score(n): """
from imdbpie import Imdb imdb = Imdb() print(imdb.search_for_title("Lords of Dogtown")[0]) reviews = imdb.get_title_user_reviews("tt0355702") #import pprint #pprint.pprint(reviews) print(reviews['reviews'][0]['author']['displayName']) print(reviews['reviews'][0]['reviewText'])
import nltk nltk.download('vader_lexicon') from nltk.sentiment.vader import SentimentIntensityAnalyzer # sentence = "I don't like reading classic books because they are boring." # score = SentimentIntensityAnalyzer().polarity_scores(sentence) # print(score) import imdbpie from imdbpie import Imdb imdb = Imdb() print(imdb.search_for_title("The Pink Panther")[0]) reviews = imdb.get_title_user_reviews("tt0383216") # import pprint # pprint.pprint(reviews) print(reviews['reviews'][0]['author']['displayName']) print(reviews['reviews'][0]['reviewText']) import string print(string.punctuation) # for line in reviews: # if line.