def review_extracter(movie):
    """input movie name, outputs txt file with reviews on the front page of imdb"""
    ### extracts content from movie page ###
    imdb = Imdb()
    url = imdb.search_for_title(movie)[0]
    reviews = imdb.get_title_user_reviews(url["imdb_id"])
    ### creates list of all reviews present on the front page###
    imdb = Imdb()
    url = imdb.search_for_title(movie)[0]
    reviews = imdb.get_title_user_reviews(url["imdb_id"])
    ### creates txt file for all reviews present on the front page###
    review_list = []
    for i in range(len(reviews["reviews"])):
        line = reviews["reviews"][i]["reviewText"]
        review_list.append(line)
    ###pickles list###
    with open(f"{movie}_imdb.pickle", "wb") as f:
        pickle.dump(review_list, f)
Exemple #2
0
def main():
    imdb = Imdb()
    movie = str(input('Movie Name: '))
    movie_search = '+'.join(movie.split())
    # print(imdb.search_for_name("Christian Bale"))
    movie_dict = imdb.search_for_title(movie_search)
    help_print_movie(movie_dict)
    imdb_id = str(input('IMBD ID: '))
    review_dict = imdb.get_title_user_reviews(imdb_id)
    review_list = review_dict['reviews']
    help_print_review(review_list)
Exemple #3
0
def average_reviewscore(title):
    """
    Accepts a movie title from user as a string.
    Calls the imdbpie API and iterates through each user review left for the specified title.
    Uses sentiment analysis and prints the average compound score of all reviews left for the particular title.
    """
    imdb = Imdb()
    id = imdb.search_for_title(title)[0]['imdb_id']
    reviews = imdb.get_title_user_reviews(id)
    numberofreviews = len(reviews['reviews'])
    compound_scores = []
    for i in range(numberofreviews):
        review = reviews['reviews'][i]['reviewText']
        score = SentimentIntensityAnalyzer().polarity_scores(review)
        compound_scores.append(score['compound'])
    numerator = 0
    denominator = len(compound_scores)
    for i in range(denominator):
        numerator += compound_scores[i]
    average = numerator / denominator
    print(average)
Exemple #4
0
import csv
from imdbpie import Imdb
imdb = Imdb()
import openpyxl
import pandas as pd
review = []
movie = []
path = 'H:\IFS\IMDB\\test.xlsx'  # Excel sheet containing the name of the movies
path1 = 'H:\IFS\IMDB\\test1.xlsx'  # Excel sheet containing the result from the IMDB which contain the user review for each movie
df = pd.read_excel(path, sheetname='Sheet1')
for row in df['Movies']:
    try:
        movie.append(row)
        Id = imdb.search_for_title(row)[0]['imdb_id']
        review.append(imdb.get_title_user_reviews(Id)['totalReviews'])
    except IndexError:
        review.append("INVALID")

df = pd.DataFrame({'Movies': movie, 'Review': review})
writer = pd.ExcelWriter(path1, engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1')
writer.save()
Exemple #5
0
from imdbpie import Imdb
import pprint
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
nltk.download('vader_lexicon')

imdb = Imdb()

endgame = imdb.get_title_user_reviews("tt4154796")
avatar = imdb.get_title_user_reviews("tt0499549")
titanic = imdb.get_title_user_reviews("tt0120338")
starwars = imdb.get_title_user_reviews("tt2488496")
infinity = imdb.get_title_user_reviews("tt4154756")
jurassic = imdb.get_title_user_reviews("tt0369610")
lionking = imdb.get_title_user_reviews("tt6105098")
avengers = imdb.get_title_user_reviews("tt0848228")
furious = imdb.get_title_user_reviews("tt2820852")
frozen = imdb.get_title_user_reviews("tt4520988")
movies = [
    endgame, avatar, titanic, starwars, infinity, jurassic, lionking, avengers,
    furious, frozen
]
movies_string = [
    "endgame", "avatar", "titanic", "starwars", "infinity", "jurassic",
    "lionking", "avengers", "furious", "frozen"
]
# pprint.pprint(avengers)
# print(avengers_reviews['reviews'][1]['author']['displayName'])
# print(endgame['reviews'][1]['reviewText'])
# print(len(avengers_reviews))
Exemple #6
0
from imdbpie import Imdb
import random

imdb = Imdb()
parasite = (imdb.search_for_title("Parasite")[0])
reviews = imdb.get_title_user_reviews("tt6751668")

joker = (imdb.search_for_title("Joker")[0])
review2 = imdb.get_title_user_reviews("tt7286456")

unplanned = (imdb.search_for_title("Unplanned")[0])
review3 = imdb.get_title_user_reviews("tt9024106")

thegodfather = (imdb.search_for_title("The Godfather")[0])
reviews3 = imdb.get_title_user_reviews("tt0068646")

disastermovie = (imdb.search_for_title("Disaster Movie")[0])
reviews2 = imdb.get_title_user_reviews("tt1213644")

# print(reviews)
# import pprint
# pprint.pprint(reviews)
# pprint.pprint(review2)
# pprint.pprint(review3)
# pprint.pprint(reviews3)
# pprint.pprint(reviews2)

# print(reviews['reviews'][0:]['author']['displayName'])
# print(reviews['reviews'][0]['reviewText'])

from imdbpie import Imdb
from collections import Counter
from nltk import FreqDist

imdb = Imdb()
print(imdb.search_for_title("Inception")[0])
reviews = imdb.get_title_user_reviews("tt1375666")

blackbook = [reviews['reviews'][0]['reviewText']]
blackbook.append(reviews['reviews'][1]['reviewText'])
blackbook.append(reviews['reviews'][2]['reviewText'])
blackbook.append(reviews['reviews'][3]['reviewText'])
blackbook.append(reviews['reviews'][4]['reviewText'])
blackbook.append(reviews['reviews'][5]['reviewText'])
blackbook.append(reviews['reviews'][6]['reviewText'])
blackbook.append(reviews['reviews'][7]['reviewText'])
blackbook.append(reviews['reviews'][8]['reviewText'])
blackbook.append(reviews['reviews'][9]['reviewText'])
blackbook.append(reviews['reviews'][10]['reviewText'])
blackbook.append(reviews['reviews'][11]['reviewText'])
blackbook.append(reviews['reviews'][12]['reviewText'])
blackbook.append(reviews['reviews'][13]['reviewText'])
blackbook.append(reviews['reviews'][14]['reviewText'])
blackbook.append(reviews['reviews'][15]['reviewText'])
blackbook.append(reviews['reviews'][16]['reviewText'])
blackbook.append(reviews['reviews'][17]['reviewText'])
blackbook.append(reviews['reviews'][18]['reviewText'])
blackbook.append(reviews['reviews'][19]['reviewText'])
blackbook.append(reviews['reviews'][20]['reviewText'])
blackbook.append(reviews['reviews'][21]['reviewText'])
blackbook.append(reviews['reviews'][22]['reviewText'])
Exemple #8
0
    if 'rating' in rat:
        ratings[title] = rat['rating']

print(ratings)

with open('ratings.pickle','wb') as f:
    pickle.dump(ratings,f)

print('ratings has been saved.')

# create a dictionary that has a list of reviews for each of the titles that have reviews.

userratings = {}

for title in movienumbers:
    rat = imdb.get_title_user_reviews(title)
    # skip movies that don't have reviews
    if 'reviews' in rat:
        dummylist = []
        for entry in rat['reviews']:
            dummylist.append(entry['reviewText'])

        userratings[title] = dummylist

print(userratings)

with open('userratings.pickle','wb') as f:
    pickle.dump(userratings,f)

print('userratings has been saved.')
# create a dictionary that has a list of reviews for each of the titles that have reviews.
Exemple #9
0
import pickle

imdb = Imdb()

movie_names = [
    "The Shawshank Redemption",
    "The Godfather",
    "Disaster Movie",
    "Saving Christmas",
]

for movie in movie_names:
    """
    This step fetches data from Imdb according to movie names in the list
    and store the movie datas as a pickle file with a standardized file name
    """
    movie_title = imdb.search_for_title(movie)[0]
    movie_data = imdb.get_title_user_reviews(movie_title["imdb_id"])
    review_data = movie_data["reviews"]
    save_pickle_name = movie.replace(" ", "_") + ".pickle"

    with open(save_pickle_name, "wb") as file_stream:
        """
        This step opens a write stream
        then, write the moview review data to pickle file
        """
        pickle.dump(review_data, file_stream)
    file_stream.close()

    print(f"{movie}'s review data has been saved to {save_pickle_name}")
# Sentiment Analysis
import nltk
# nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from imdbpie import Imdb

imdb = Imdb()

# print all movies that contain the word "Joker"
# print(imdb.search_for_title("Joker"))

# Focus on "Joker" (2019), obtain "imdb_id"
# print(imdb.search_for_title("Joker")[0]['imdb_id'])

reviews = imdb.get_title_user_reviews("tt7286456")

# print(reviews)
# print(reviews['reviews'])
# print(reviews['reviews'][0]['reviewText'])

review1 = reviews['reviews'][0]['reviewText']
review2 = reviews['reviews'][1]['reviewText']
review3 = reviews['reviews'][2]['reviewText']
score1 = SentimentIntensityAnalyzer().polarity_scores(review1)
score2 = SentimentIntensityAnalyzer().polarity_scores(review2)
score3 = SentimentIntensityAnalyzer().polarity_scores(review3)
# print(score1)
      
def sentiment_analysis(d):
    '''
Exemple #11
0
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from imdbpie import Imdb
"""import natural language processing package and Imdb package"""

imdb = Imdb()
print("please enter a movie title:")
name = input()
"""User enters movie title"""

movie_id = imdb.search_for_title(name)[0]['imdb_id']
reviews = imdb.get_title_user_reviews(movie_id)
"""function identifies movie id based on its title"""

print(reviews['reviews'][0]['reviewText'])

review_1 = reviews['reviews'][0]['reviewText']
score = SentimentIntensityAnalyzer().polarity_scores(review_1)
""" feeds the written review from Imdb into the sentiment analyzer.
    provides output in form of a polarity score: pos. neg. neu. compound
"""
print(score)
Exemple #12
0
import matplotlib.pyplot as plt
import nltk
import nltk.corpus
from nltk.corpus import stopwords
# print(stopwords.words('english'))

# main Program

movieName = "The Fault In Our Stars"
author = []  # List to store all author names for a review
wordCnt = []  # A List to store count of the reviewText
imdb = Imdb()
movieDict = imdb.search_for_title(movieName)[
    0]  # closest match is the first index
id = movieDict['imdb_id']
reviews = imdb.get_title_user_reviews(id)

#pprint.pprint(reviews['reviews'][0])
# pickle

allReviews = reviews['reviews']  # get the reviews into a dictionary


def countWords():
    for R in allReviews:  # Loop to traverse through each Review
        #print (R['author']['displayName'],R['reviewText']) # to debug
        author.append(R['author']
                      ['displayName'])  # author has display name and author ID
        wordCnt.append(len(R['reviewText'].split()))

Exemple #13
0
class Movie:
    def __init__(self):
        self.imdb = Imdb()
        self.reviews = None
        self.chartdata = None
        self.director = None
        self.commentbasedrating = 0
        self.title = None
        self.poster = None
        self.durationMin = None
        self.rating = 0
        self.id = None
        self.summary = None
        self.outline = None
        self.cast = None
        self.directors = None

    def SetAfterInit(self, dict):
        self.dict = dict
        self.id = dict['base']['id'].split('/')[2]

        if 'base' in self.dict:
            if 'title' in self.dict['base']:
                self.title = self.dict['base']['title']
            if 'runningTimeInMinutes' in self.dict['base']:
                self.durationMin = self.dict['base']['runningTimeInMinutes']
            if 'image' in self.dict['base']:
                if 'url' in self.dict['base']['image']:
                    self.poster = self.dict['base']['image']['url']
        if 'ratings' in self.dict:
            if 'rating' in self.dict['ratings']:
                self.rating = float(self.dict['ratings']['rating'])
        if 'plot' in self.dict:
            if 'outline' in self.dict['plot']:
                if 'text' in self.dict['plot']['outline']:
                    self.outline = self.dict['plot']['outline']['text']
            if 'summaries' in self.dict['plot']:
                if len(self.dict['plot']['summaries']) > 0:
                    self.summary = self.dict['plot']['summaries'][0]['text']

    def SetAfterSearch(self, dict):
        self.id = dict['id']
        self.title = dict['title']
        self.poster = dict['poster']
        self.durationMin = dict['durationMin']
        self.rating = dict['rating']
        self.outline = dict['outline']
        self.summary = dict['summary']

    def AnalyzeReviews(self):
        self.GetReviews()
        if self.reviews != None:
            self.SetCommentbasedrating()
            self.GenerateChartData()

    def GetReviews(self):

        reviews_temp_load = self.imdb.get_title_user_reviews(self.id)
        if 'reviews' in reviews_temp_load:
            reviews_temp = reviews_temp_load['reviews']
            self.reviews = []
            for review in reviews_temp:
                review_temp = UserReview(review['helpfulnessScore'],
                                         review['reviewText'])
                self.reviews.append(review_temp)

    def SetCommentbasedrating(self):
        scoreSum = 0
        for review in self.reviews:
            scoreSum += review.commentbasedrating
        self.commentbasedrating = scoreSum / len(self.reviews)

    def GenerateChartData(self):
        cd = ChartData(self.reviews)
        cd.SetDataHelpfulness(self.reviews)
        cd.SetDataCommentbasedrating(self.reviews)
        self.chartdata = cd

    def SetCast(self):
        dict = self.imdb.get_title_credits(self.id)
        if 'credits' in dict:
            if 'cast' in dict['credits']:
                self.cast = dict['credits']['cast']
            if 'director' in dict['credits']:
                self.directors = dict['credits']['director']

    def GetJSONSearch(self):

        listReviews = []
        if self.reviews != None:
            for review in self.reviews:
                listReviews.append(review.GetJSON())

        cd = {}
        if self.chartdata != None:
            cd = self.chartdata.GetJSON()

        dict = {
            "id": self.id,
            "title": self.title,
            "poster": self.poster,
            "durationMin": self.durationMin,
            "rating": self.rating,
            "outline": self.outline,
            "summary": self.summary,
            "cast": self.cast,
            "directors": self.directors,
            "reviews": listReviews,
            "commentbasedrating": self.commentbasedrating,
            "chartdata": cd
        }
        return dict
Exemple #14
0
# Sarah Zazyczny Text Mining 

from imdbpie import Imdb
from nltk import * 
from nltk.sentiment.vader import SentimentIntensityAnalyzer

imdb = Imdb()
# print(imdb.search_for_title("Clueless")[0])
reviews = imdb.get_title_user_reviews("tt0112697")

import pprint
# pprint.pprint(reviews)

# scores for reviewA, reviewB, reviewC
reviewA = (reviews['reviews'][0]['reviewText'])
Ascore = SentimentIntensityAnalyzer().polarity_scores(reviewA)

reviewB = (reviews['reviews'][1]['reviewText'])
Bscore = SentimentIntensityAnalyzer().polarity_scores(reviewB)

reviewC = (reviews['reviews'][2]['reviewText'])
Cscore = SentimentIntensityAnalyzer().polarity_scores(reviewC)

# test any review within by changing the [0]
reviewtest = (reviews['reviews'][0]['reviewText'])
scoretest = SentimentIntensityAnalyzer().polarity_scores(reviewtest)

# SIMPLIFIED WITH FUNCTION
# function to print movie review sentiment score
def review_sentiment_score(n):
    """
Exemple #15
0

from imdbpie import Imdb

imdb = Imdb()
print(imdb.search_for_title("Lords of Dogtown")[0])
reviews = imdb.get_title_user_reviews("tt0355702")

#import pprint
#pprint.pprint(reviews)

print(reviews['reviews'][0]['author']['displayName'])
print(reviews['reviews'][0]['reviewText'])
import nltk
nltk.download('vader_lexicon')

from nltk.sentiment.vader import SentimentIntensityAnalyzer
# sentence = "I don't like reading classic books because they are boring."
# score = SentimentIntensityAnalyzer().polarity_scores(sentence)
# print(score)

import imdbpie
from imdbpie import Imdb

imdb = Imdb()
print(imdb.search_for_title("The Pink Panther")[0])
reviews = imdb.get_title_user_reviews("tt0383216")

# import pprint
# pprint.pprint(reviews)

print(reviews['reviews'][0]['author']['displayName'])
print(reviews['reviews'][0]['reviewText'])

import string
print(string.punctuation)

# for line in reviews:
#     if line.