def test_get_title_by_id_excludes_episodes(self): assert self.imdb.get_title_by_id("tt3181538") is not None imdb = Imdb(exclude_episodes=True) title = imdb.get_title_by_id("tt3181538") assert title is None
def save(self, *args, **kwargs): imdb = Imdb() movie = imdb.get_title_by_id(self.imdb_id) self.title = movie.title self.year = movie.year self.plot = movie.plot_outline super(Film, self).save(*args, **kwargs)
class TestCache(unittest.TestCase): def setUp(self): self.imdb = Imdb() def tearDown(self): shutil.rmtree(self.imdb.cache_dir, ignore_errors=True) def _get_cache_size(self): """ Returns a count of the items in the cache """ cache = os.path.exists(self.imdb.cache_dir) if not cache: return 0 _, _, cache_files = next(os.walk(self.imdb.cache_dir)) return len(cache_files) def test_cache_populated(self): """ Tests the cache is populated correctly """ self.imdb = Imdb({'cache': True, 'cache_dir': '/tmp/imdbpie-test'}) self.assertEqual(self._get_cache_size(), 0) movie = self.imdb.find_movie_by_id("tt0382932") # Make a 2nd call to ensure no duplicate cache items created self.imdb.find_movie_by_id("tt0382932") # find makes 2 api calls self.assertEqual(self._get_cache_size(), 2) self.assertEqual(movie.title, 'Ratatouille') def test_cache_not_populated_when_disabled(self): """ Tests the cache is not populated when disabled (default) """ self.assertEqual(self._get_cache_size(), 0) self.imdb.find_movie_by_id("tt0382932") self.assertEqual(self._get_cache_size(), 0)
def imdb(self): """Get IMDb data for release, set self.imdb_id to override auto match from movie title + year""" from imdbpie import Imdb imdb = Imdb() if not self.movie_release: return False imdb_id = None if hasattr(self, "imdb_id"): imdb_id = self.imdb_id # Find IMDb match by title and check if year is a match if not imdb_id: for imdb_match in imdb.find_by_title(self.movie_title): if int(imdb_match.get("year")) == self.movie_year: imdb_id = imdb_match.get("imdb_id") break # No IMDb match could be found from title + year if not imdb_id: return False return imdb.find_movie_by_id(imdb_id)
def test_build_url_proxied(self): imdb_fr = Imdb(locale="en_FR", cache=False, anonymize=True, proxy_uri="http://someproxywebsite.co.uk?url={0}") imdb_fr.timestamp = time.mktime(datetime.date.today().timetuple()) url = imdb_fr._build_url(path="/title/maindetails", params={"tconst": "tt1111111"}) expected_url = "http://someproxywebsite.co.uk?url=" + quote("https://app.imdb.com/title/maindetails") assert url.startswith(expected_url) is True
def __init__(self, cache=True, cache_dir=None): # open connection to imdb if cache is not None: if cache_dir is not None: self.imdb = Imdb(cache=True, cache_dir=cache_dir) else: self.imdb = Imdb(cache=True) else: self.imdb = Imdb()
def updateart(): import urllib from imdbpie import Imdb imdb = Imdb() print 'updating art for movies(imdb cover)' for movie in Movie.query.all(): print 'processing %s' % movie.c00 imdbid = movie.c09 if Posters.query.filter_by(apiid=imdbid).first() is not None: print 'skipping %s as it is allready in the database' % movie.c00 continue try: title = imdb.get_title_by_id(imdbid) if title.cover_url is None: continue poster = Posters() poster.apiid = imdbid poster.type = 'movie' response = urllib.urlopen(title.cover_url) data = response.read() data64 = data.encode('base64').replace('\n', '') poster.imgdata = 'data:image/jpeg;base64,%s' % data64 # print poster.imgdata db.session.add(poster) db.session.commit() except: continue print 'updating art for tv' from tvdb_api import Tvdb t = Tvdb(banners=True) for show in Tvshow.query.all(): print 'processing %s' % show.c00 tvdbid = show.c12 if Posters.query.filter_by(apiid=tvdbid).first() is not None: print 'skipping %s as it is allready in the database' % show.c00 continue try: tvdbshow = t[int(tvdbid)] bannerkeys = tvdbshow['_banners']['season']['season'].keys() banner_url = tvdbshow['_banners']['season']['season'][bannerkeys[0]]['_bannerpath'] poster = Posters() poster.apiid = tvdbid poster.type = 'tv' response = urllib.urlopen(banner_url) data = response.read() data64 = data.encode('base64').replace('\n', '') poster.imgdata = 'data:image/jpeg;base64,%s' % data64 # print poster.imgdata db.session.add(poster) db.session.commit() except: continue
def imdb_import(number): """ Helper method to import large quantities of movies from IMDB as sample data. """ reset_database() imdb = Imdb(cache=True) top = imdb.top_250() movies = [] count = 0 for x in top: if count >= int(number): break m = Movie() im = imdb.get_title_by_id(x['tconst']) m.name = im.title m.year = im.year m.imdb_id = im.imdb_id m.save() movies.append(m) # adding director and actors for person in im.credits: if person.token == "directors": m.director = Person.objects.create_or_find_imdb(person) elif person.token == "cast": m.actors.add(Person.objects.create_or_find_imdb(person)) m.save() for i in range(random.randrange(3)): mc = MovieCopy() mc.movie = m mc.save() count = count+1 # imdb.get_title_images("tt0468569") # imdb.get_person_images("nm0000033") return { 'number_imported': number, 'kind': 'movies', 'movies': movies, }
def test_get_episodes(self): assert self.imdb.get_title_by_id('tt0303461') is not None imdb = Imdb() episodes = imdb.get_episodes('tt0303461') assert episodes is not None assert len(episodes) == 14 episode_1 = episodes[0] assert episode_1.imdb_id == "tt0579539" assert episode_1.type == "tv_episode" assert episode_1.title == u'The Train Job' assert episode_1.series_name == 'Firefly' assert episode_1.release_date == "2002-09-20" assert episode_1.year == 2002
def test_get_title_by_id_using_proxy(self): imdb = Imdb(locale="en_US", cache=False, anonymize=True) title = imdb.get_title_by_id("tt0111161") assert title.title == "The Shawshank Redemption" assert title.year == 1994 assert title.type == "feature" assert title.tagline == ("Fear can hold you prisoner. " "Hope can set you free.") assert isinstance(title.plots, list) is True assert len(title.plots) == 5 assert isinstance(title.rating, float) is True assert sorted(title.genres) == sorted(["Crime", "Drama"]) assert isinstance(title.votes, int) is True assert title.runtime == 8520 assert len(title.trailers) == 3
def __init__(self): self._imdb = Imdb() self._oldTop = self._get_stored_data() self._oldTopList = self._generate_oldTop_id_list() self._newTop = self._fetch_data() self._newTopList = self._generate_newTop_id_list() self._newlyAdded = self._find_newly_added()
def test_get_title_by_id_using_proxy(self): imdb = Imdb(locale='en_US', cache=False, anonymize=True) title = imdb.get_title_by_id('tt0111161') assert title.title == 'The Shawshank Redemption' assert title.year == 1994 assert title.type == 'feature' assert title.tagline == ('Fear can hold you prisoner. ' 'Hope can set you free.') assert isinstance(title.plots, list) is True assert len(title.plots) == 6 assert isinstance(title.rating, float) is True assert sorted(title.genres) == sorted(['Crime', 'Drama']) assert isinstance(title.votes, int) is True assert title.runtime == 8520 assert len(title.trailers) == 3
class IMDB(Miner): def __init__(self): self.handle = Imdb() super(IMDB, self).__init__() def top_list(self, number): pop_movies = self.handle.top_250() return pop_movies def get_movie_id(self, index): return "tt" + index # formatting to IMDB_ID def get_movie_by_id(self, movie_id): return self.handle.get_title_images(movie_id), self.handle.get_title(movie_id)
def test_review(set_up): imdb = Imdb(locale='en_US', cache=False) reviews_data = imdb._get_reviews_data('tt0111161') review = Review(data=reviews_data[0]) assert 'carflo' == review.username assert review.text.startswith('Why do I want to write the 234th ') is True assert review.text.endswith('Redemption to touch the soul.') is True assert '2003-11-26' == review.date assert 10 == review.rating assert 'Tied for the best movie I have ever seen' == review.summary assert 'G' == review.status assert 'Texas' == review.user_location assert 1902 <= review.user_score assert 2207 <= review.user_score_count
def test_build_url_proxied(self): imdb_fr = Imdb( locale='en_FR', cache=False, anonymize=True, proxy_uri='http://someproxywebsite.co.uk?url={0}' ) imdb_fr.timestamp = time.mktime(datetime.date.today().timetuple()) url = imdb_fr._build_url( path='/title/maindetails', params={'tconst': 'tt1111111'}) expected_url = ( 'http://someproxywebsite.co.uk?url=' + quote('https://app.imdb.com/title/maindetails') ) assert url.startswith(expected_url) is True
def test_build_url(self): imdb_fr = Imdb(locale="en_FR", cache=False) imdb_fr.timestamp = time.mktime(datetime.date.today().timetuple()) url = imdb_fr._build_url(path="/title/maindetails", params={"tconst": "tt1111111"}) expected_url = ( "https://app.imdb.com/" "title/maindetails" "?apiKey=d2bb34ec6f6d4ef3703c9b0c36c4791ef8b9ca9b" "&apiPolicy=app1_1" "&locale=en_FR" "×tamp={timestamp}" "&tconst=tt1111111&api=v1&appid=iphone1_1" ).format(timestamp=imdb_fr.timestamp) assert_urls_match(expected_url, url)
def __init__(self): self.parser = Parser() self.directory = '' self.imdb = Imdb() self.cover_size = 214, 317 self.square_size = 317, 317 self.current = MOVIE_DICT self.all_files = [] self.display = Display()
def test_build_url(self): imdb_fr = Imdb(locale='en_FR', cache=False) imdb_fr.timestamp = time.mktime(datetime.date.today().timetuple()) url = imdb_fr._build_url( path='/title/maindetails', params={'tconst': 'tt1111111'}) expected_url = ( 'https://app.imdb.com/' 'title/maindetails' '?apiKey=d2bb34ec6f6d4ef3703c9b0c36c4791ef8b9ca9b' '&apiPolicy=app1_1' '&locale=en_FR' '×tamp={timestamp}' '&tconst=tt1111111&api=v1&appid=iphone1_1' ).format(timestamp=imdb_fr.timestamp) assert_urls_match(expected_url, url)
class CommonMetadataIMDB(object): """ Class for interfacing with imdb """ def __init__(self, cache=True, cache_dir=None): # open connection to imdb if cache is not None: if cache_dir is not None: self.imdb = Imdb(cache=True, cache_dir=cache_dir) else: self.imdb = Imdb(cache=True) else: self.imdb = Imdb() def com_imdb_title_search(self, media_title): """ # fetch info from title """ return self.imdb.search_for_title(media_title) def com_imdb_id_search(self, media_id): """ # fetch info by ttid """ return self.imdb.get_title_by_id(media_id) def com_imdb_person_by_id(self, person_id): """ # fetch person info by id """ return self.imdb.get_person_by_id(person_id) def com_imdb_person_images_by_id(self, person_id): """ # fetch person images by id """ return self.imdb.get_person_images(person_id) def com_imdb_title_review_by_id(self, media_id): """ # fetch the title review """ return self.imdb.get_title_reviews(media_id)
def main(): parser = argparse.ArgumentParser() parser.add_argument("-k", "--clusters", required=True, type=int, help="Number of cluters") args = vars(parser.parse_args()) k = args["clusters"] make_output_dirs(k) if os.listdir("posters") == []: imdb = Imdb(anonymize=True) top = imdb.top_250() write_posters(top) qp_dir = "quantized_posters_" + str(k) + "/" cb_dir = "color_bars_" + str(k) + "/" if (os.listdir(qp_dir) == []) and (os.listdir(cb_dir) == []): posters = os.listdir("posters") for poster in posters: process_poster("posters/" + poster, k)
def net_search(name): #input=movie name #output=movie id from imdb imdb = Imdb({'anonymize': False}) #~ movie = imdb.find_movie_by_id("tt0382932") #~ print movie.title if len(name)>0: details=imdb.find_by_title(name) if len(details)>0: #~ return imdb.find_movie_by_id(details[0]['imdb_id']) imdb_id=details[0]['imdb_id'] #~ print imdb_id movie=imdb.find_movie_by_id(imdb_id) return movie else: #~ return NULL return -1
def test_cache_populated(self): """ Tests the cache is populated correctly """ self.imdb = Imdb({'cache': True, 'cache_dir': '/tmp/imdbpie-test'}) self.assertEqual(self._get_cache_size(), 0) movie = self.imdb.find_movie_by_id("tt0382932") # Make a 2nd call to ensure no duplicate cache items created self.imdb.find_movie_by_id("tt0382932") # find makes 2 api calls self.assertEqual(self._get_cache_size(), 2) self.assertEqual(movie.title, 'Ratatouille')
def __init__(self, app_dir, logger): self.app_dir_data = app_dir.user_data_dir self.cur_cache = {} self.cache = {} self.cache_file = os.path.join(app_dir.user_data_dir, CACHING_FILE) if os.path.exists(self.cache_file): with open(self.cache_file) as f: self.cache = yaml.load(f) if not self.cache: self.cache = {} self.in_cache = [] self.imdb = Imdb() self.logger = logger
class ImdbCommand(Command): name = 'imdb' aliases = ['movie'] description = 'Searches IMDB for movie titles.' def __init__(self, bot, config): super().__init__(bot, config) self._imdb = Imdb(cache=True, exclude_episodes=True) def run(self, message, args): if not args: self.reply(message, 'Please supply some search terms!') return self.bot.telegram.send_chat_action(message.chat.id, 'typing') results = self._imdb.search_for_title(' '.join(args)) if not results: self.reply(message, 'No results found!') return result = self._imdb.get_title_by_id(results[0]['imdb_id']) reply = '<b>URL:</b> http://www.imdb.com/title/{0}\n'.format(telegram_escape(result.imdb_id)) reply += '<b>Title:</b> {0}\n'.format(telegram_escape(result.title)) reply += '<b>Year:</b> {0}\n'.format(result.year) reply += '<b>Genre:</b> {0}\n'.format(telegram_escape(', '.join(result.genres[:3]))) reply += '<b>Rating:</b> {0}\n'.format(result.rating) runtime, _ = divmod(result.runtime, 60) reply += '<b>Runtime:</b> {0} minutes\n'.format(runtime) reply += '<b>Certification:</b> {0}\n'.format(result.certification) reply += '<b>Cast:</b> {0}\n'.format( telegram_escape(', '.join([person.name for person in result.cast_summary[:5]]))) reply += '<b>Director(s):</b> {0}\n\n'.format( telegram_escape(', '.join([person.name for person in result.directors_summary[:5]]))) reply += telegram_escape(result.plots[0]) self.reply(message, reply, parse_mode='HTML')
def lookup_movie(movie_name): movie_matches = Imdb().search_for_title(movie_name) if not movie_matches: raise MovieLookUpFailed("No movies matching this name!") else: return MovieList(movie_matches)
print('The most used "significant" words & their freq. are:') print('film: 18, soldiers: 5, stress: 4, soldier: 4') print('story: 3, lives: 3, weapon: 2, violent: 2, trauma: 2') print('training: 2, killing: 2, hard: 2, fighting: 2') print('\n') print('Words used pertaining to the emotional aspects of the movies:') print('violent: 2, emotions: 2, emotional: 2, angry: 2') #NLTK #nltk.download() and click Models and download vador_lexicon from imdbpie import Imdb imdb = Imdb() print(imdb.search_for_title("Lone Survivor")[0]) print(imdb._get_reviews_data("tt1091191")[0]['summary']) print(imdb._get_reviews_data("tt1091191")[0]['user_name']) print(imdb._get_reviews_data("tt1091191")[0]['date']) print(imdb._get_reviews_data("tt1091191")[0]['text']) from nltk.sentiment.vader import SentimentIntensityAnalyzer import nltk sentence = open('lone_survivor.txt', 'r', encoding='utf8').read() score = SentimentIntensityAnalyzer().polarity_scores(sentence) print(score)
from imdbpie import Imdb import unittest imdb = Imdb({'anonymize': False}) movie = imdb.find_movie_by_id("tt0382932") class TestTrailer(unittest.TestCase): def test_trailer_url(self): self.assertIsNotNone(movie.trailers) if __name__ == '__main__': unittest.main()
suite = unittest.TestSuite() for all_tests in unittest.defaultTestLoader.discover('./', pattern='*_test.py'): for test in all_tests: suite.addTests(test) return suite if __name__ == '__main__': unittest.main() ########NEW FILE######## __FILENAME__ = image_test from imdbpie import Imdb import unittest imdb = Imdb({'anonymize': False}) images = imdb.title_images("tt0468569") class TestImage(unittest.TestCase): def test_results(self): self.assertGreaterEqual(len(images), 107) def test_caption(self): self.assertEqual(images[0].caption, 'Still of Gary Oldman in The Dark Knight') def test_url(self): self.assertEqual( images[0].url, 'http://ia.media-imdb.com/images/M/MV5BOTAxNzI0ND'
else: return s except Exception as ex: print('Exception at single_quote 22', ex) return s movie_list = [] mng = MngIMDB() movie_list = mng.load_data() mngDb = MovieDB() moviesid = mngDb.get_all_old_movies_by_field('movieid') moviesnames = mngDb.get_all_old_movies_by_field('title') actorsnames = mngDb.get_all_old_actors_by_field('name') mng.setUpClass() imdb = Imdb() for movie in movie_list: if int(movie.year) < 2017: continue # for movie.title in moviesnames: # continue try: if type(movie.title) is tuple: movie.title = movie.title[0] datam = None try: datam = imdb.search_for_title(movie.title) except: print("error") continue movie_id = ''
#! /usr/bin/python from imdbpie import Imdb imdb = Imdb() s_result = imdb.search_for_title("Logan") max_values = len(s_result) if max_values > 5: max_values = 5 for i in xrange(0, max_values, 1): print s_result[i] #print s_result[0] #print(type(dknight)) #print dknight #print(dknight['imdb_id'])
class Movie: def __init__(self): self.imdb = Imdb() self.reviews = None self.chartdata = None self.director = None self.commentbasedrating = 0 self.title = None self.poster = None self.durationMin = None self.rating = 0 self.id = None self.summary = None self.outline = None self.cast = None self.directors = None def SetAfterInit(self, dict): self.dict = dict self.id = dict['base']['id'].split('/')[2] if 'base' in self.dict: if 'title' in self.dict['base']: self.title = self.dict['base']['title'] if 'runningTimeInMinutes' in self.dict['base']: self.durationMin = self.dict['base']['runningTimeInMinutes'] if 'image' in self.dict['base']: if 'url' in self.dict['base']['image']: self.poster = self.dict['base']['image']['url'] if 'ratings' in self.dict: if 'rating' in self.dict['ratings']: self.rating = float(self.dict['ratings']['rating']) if 'plot' in self.dict: if 'outline' in self.dict['plot']: if 'text' in self.dict['plot']['outline']: self.outline = self.dict['plot']['outline']['text'] if 'summaries' in self.dict['plot']: if len(self.dict['plot']['summaries']) > 0: self.summary = self.dict['plot']['summaries'][0]['text'] def SetAfterSearch(self, dict): self.id = dict['id'] self.title = dict['title'] self.poster = dict['poster'] self.durationMin = dict['durationMin'] self.rating = dict['rating'] self.outline = dict['outline'] self.summary = dict['summary'] def AnalyzeReviews(self): self.GetReviews() if self.reviews != None: self.SetCommentbasedrating() self.GenerateChartData() def GetReviews(self): reviews_temp_load = self.imdb.get_title_user_reviews(self.id) if 'reviews' in reviews_temp_load: reviews_temp = reviews_temp_load['reviews'] self.reviews = [] for review in reviews_temp: review_temp = UserReview(review['helpfulnessScore'], review['reviewText']) self.reviews.append(review_temp) def SetCommentbasedrating(self): scoreSum = 0 for review in self.reviews: scoreSum += review.commentbasedrating self.commentbasedrating = scoreSum / len(self.reviews) def GenerateChartData(self): cd = ChartData(self.reviews) cd.SetDataHelpfulness(self.reviews) cd.SetDataCommentbasedrating(self.reviews) self.chartdata = cd def SetCast(self): dict = self.imdb.get_title_credits(self.id) if 'credits' in dict: if 'cast' in dict['credits']: self.cast = dict['credits']['cast'] if 'director' in dict['credits']: self.directors = dict['credits']['director'] def GetJSONSearch(self): listReviews = [] if self.reviews != None: for review in self.reviews: listReviews.append(review.GetJSON()) cd = {} if self.chartdata != None: cd = self.chartdata.GetJSON() dict = { "id": self.id, "title": self.title, "poster": self.poster, "durationMin": self.durationMin, "rating": self.rating, "outline": self.outline, "summary": self.summary, "cast": self.cast, "directors": self.directors, "reviews": listReviews, "commentbasedrating": self.commentbasedrating, "chartdata": cd } return dict
import matplotlib.pyplot as plt import plotly.plotly as py from imdbpie import Imdb imdb = Imdb() imdb = Imdb(anonymize=True) # to proxy requests import pickle import ast import json movie=open('movie review score_0-50','r') movie1=open('movie review score_51-100','r') movie2=open('movie review score_101-150','r') movie3=open('movie review score_151-200','r') movie4=open('movie review score_201-250','r') #dictionary=dict(x.split(':') for x in movie.read().split('],')) #print dictionary Scores= eval(movie.read()) Scores.update(eval(movie1.read())) Scores.update(eval(movie2.read())) Scores.update(eval(movie3.read())) Scores.update(eval(movie4.read())) with open('catergory', 'rb') as handle: Category = pickle.load(handle) #print Category CategoryLen={} for i in Category: temp=[]
from sklearn.metrics import accuracy_score from sklearn.grid_search import GridSearchCV import psycopg2 from sqlalchemy import create_engine import requests from imdbpie import Imdb import nltk import matplotlib.pyplot as plt %matplotlib inline import seaborn as sns ######################################### # part 1 # importing top 250 movies from imdb database using api thang into a dataframe imdb = Imdb() imdb = Imdb(anonymize=True) top_250 = pd.DataFrame(imdb.top_250()) # sorting values by rating and selecting only the top 100 movies top_250 = top_250.sort_values(by='rating', ascending=False) top_100 = top_250[0:100] # limiting columns according to starter code mask = ['num_votes', 'rating', 'tconst', 'title', 'year'] top_100 = top_100[mask] # getting genre/runtime from OMDB top_100 movie_list = top_100['tconst']
return s movie_list = [] movie_genres = {} actor_set = {} with open('data.csv') as f: for row in f.readlines()[1:]: columns = row.split(',') movie_id = columns[0].split('/')[4] genres = columns[1][:-1] movie_list.append(movie_id) movie_genres[movie_id] = genres imdb = Imdb() movie_count = 0 for movie_id in movie_list: try: title = imdb.get_title(movie_id) sql = ( '''INSERT INTO movie_movie VALUES (\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\')''' .format( movie_id, single_quote(str(title['base']['title'])), title['base']['year'], title['base']['runningTimeInMinutes'], movie_genres[movie_id], title['ratings']['rating'], single_quote(title['base']['image']['url']), single_quote(str(title['plot']['outline']['text'])), single_quote( str( imdb.get_title_videos(movie_id)['videos'][0]
from sklearn.metrics import mean_squared_error, r2_score import psycopg2 import requests import nltk import urllib from bs4 import BeautifulSoup import nltk import matplotlib.pyplot as plt import seaborn as sns %matplotlib inline #1. Connect to the imdbpie API imdb = Imdb() imdb = Imdb(anonymize = True) #2. Query the top 250 rated movies in the database imdb.top_250() #3. Put the information into a dataframe, then keep only relevant columns data = pd.DataFrame(imdb.top_250()) data.head() data.drop('can_rate', axis=1, inplace=True) data.drop('image', axis=1, inplace=True) data.drop('type', axis=1, inplace=True) #4. Select only the top 100 movies data = data.iloc[0:100]
# The python package imdbpie was used in this program # and the repository for this can be found at: # https://github.com/richardasaurus/imdb-pie from imdbpie import Imdb import fresh_tomatoes import media # Creating an instance of Imdb to be used to collect data via API imdb = Imdb() imdb = Imdb(anonymize=True) # IMDB Id's to be used to receive info from IMDB using API's interstellar_id = "tt0816692" gravity_id = "tt1454468" passengers_id = "tt1355644" arrival_id = "tt2543164" pulpfiction_id = "tt0110912" contact_id = "tt0118884" # Creation of movie objects to include required information interstellar = media.Movie(imdb.get_title_by_id(interstellar_id).title, imdb.get_title_by_id(interstellar_id).plot_outline, "https://upload.wikimedia.org/wikipedia/en/b/bc/" "Interstellar_film_poster.jpg", "https://www.youtube.com/watch?v=3WzHXI5HizQ", imdb.get_title_by_id(interstellar_id).certification, imdb.get_title_by_id(interstellar_id).rating) gravity = media.Movie(imdb.get_title_by_id(gravity_id).title, imdb.get_title_by_id(gravity_id).plot_outline,
import os import imdb import time import winshell from imdbpie import Imdb from win32com.client import Dispatch # https://github.com/richardasaurus/imdb-pie # https://imdbpy.sourceforge.io/support.html#documentation t_start = time.time() ib = imdb.IMDb() # Import the imdb package. ia = Imdb() del_ext = ["txt", "nfo", "png", "jpg", "url"] ign_ext = [ "exe", "zip", "part", "srt", "pdf", "iso", "txt", "nfo", "png", "jpg", "url", "ini" ] ign_key = [] repl_key = dict() repl_key["Æ"] = 'ae' error_list = list() library = "Z:\Downloaded\Video(s)" # library = "Z:\Ripped\Movies" os.chdir(library) with open("movie_management.log", 'w') as log: for root, subdirs, files in os.walk(os.path.join(library, 'Library')):
def get_client(): client = Imdb() return client
def setup(): global imdb imdb = Imdb(anonymize=True)
import sys reload(sys) sys.setdefaultencoding('UTF8') import json, requests from pymongo import MongoClient #-----------usando imbd----------------- from imdbpie import Imdb imdb = Imdb() imdb = Imdb(anonymize=True) import re #Conexion a MongoDB cliente = MongoClient() #Inicializar objeto cliente = MongoClient('127.0.0.1', 27017) #Indicar parametros del servidor bd = cliente.taller4 #Seleccionar Schema coleccion = bd.peliculas #Seleccionar Coleccion movie = str(raw_input('Movie Name: ')) movie_search = '+'.join(movie.split()) base_url = 'http://www.imdb.com/find?q=' url = base_url + movie_search + '&s=all' title_search = re.compile('/title/tt\d+') print base_url #coleccion.insert_one(top)
from imdbpie import Imdb import fresh_tomatoes import media # Creating an instance of Imdb to be used to collect data via API imdb = Imdb() imdb = Imdb(anonymize=True) # IMDB Id's to be used to receive info from IMDB using API's interstellar_id = "tt0816692" gravity_id = "tt1454468" passengers_id = "tt1355644" arrival_id = "tt2543164" pulpfiction_id = "tt0110912" contact_id = "tt0118884" # Creation of movie objects to include required information interstellar = media.Movie( imdb.get_title_by_id(interstellar_id).title, imdb.get_title_by_id(interstellar_id).plot_outline, "https://upload.wikimedia.org/wikipedia/en/b/bc/Interstellar_film_poster.jpg", "https://www.youtube.com/watch?v=3WzHXI5HizQ", imdb.get_title_by_id(interstellar_id).certification, imdb.get_title_by_id(interstellar_id).rating) gravity = media.Movie( imdb.get_title_by_id(gravity_id).title, imdb.get_title_by_id(gravity_id).plot_outline, "https://upload.wikimedia.org/wikipedia/en/f/f6/Gravity_Poster.jpg", "https://www.youtube.com/watch?v=OiTiKOy59o4", imdb.get_title_by_id(gravity_id).certification,
class ImdbExtractor(object): def __init__(self, data_path=None): super(ImdbExtractor, self).__init__() self.search_api = IMDBPy() self.info_api = IMDBPie(anonymize=True) self.movie_lens = MovieLens(data_path) # self.data_path = "data/movies_data" self.data_path = data_path + ".out" if data_path \ else "data/movies_data" self.errors = [] def retrieve_objects(self): movies = self.movie_lens.movies() with open(self.data_path, "w", 1, encoding="utf-8") as file: for movie in movies: print("\n") print(movie.id) print(movie.data["name"]) while True: try: m = self.find_movie(movie.data["name"]) except IMDbDataAccessError as e: print("========== CONNECTION ERROR ==========") print(e) sleep(5) else: break data = str(movie.id) if m: plots, genres = self.movie_info(m.movieID) reviews = self.movie_reviews(m.movieID) if plots or genres or reviews: movie.data["genres"].extend(genres) data += u'::' + movie.data["name"] data += u'::' + u' '.join(filter(None, plots)) data += u'::' + u' '.join(filter(None, movie.data["genres"])) data += u'::' + u' '.join(filter(None, reviews)) data = data.replace('\r', ' ').replace('\n', ' ') else: data += u"::ERROR" else: data += u"::ERROR" file.write(data + u"\n") def movie_reviews(self, movie_id): try: reviews = self.info_api.get_title_reviews("tt" + movie_id, max_results=20) except ValueError as e: return [] reviews_arr = [] if reviews: for r in reviews: review = r.summary if r.summary else "" review += " " + r.text if r.text else "" reviews_arr.append(review) return reviews_arr def movie_info(self, movie_id): try: movie = self.info_api.get_title_by_id("tt" + movie_id) except ValueError as e: return [], [] plots = movie.plots if movie.plots else [] genres = movie.genres if movie.genres else [] return plots, genres def find_movie(self, name): movies = self.search_api.search_movie(name) if not movies: name = re.sub("\((\D*)\)", "", name) print("---------- SEARCHING AGAIN: ----------") print(name) movies = self.search_api.search_movie(name) print(movies) if not movies: print("########## NO MOVIE FOUND ##########") return None def sanitize_name(_str): new_str = _str.strip().lower() for char in string.punctuation: new_str = new_str.replace(char, "") return new_str name_split = name.split("(") title = sanitize_name(name_split[0]) year = int(name_split[-1][:-1].strip()) movie = None for i in movies: if "year" in i.keys() and int(i["year"]) == year: movie = i break if not movie: print("########## NO MOVIE FROM SAME YEAR ##########") return None self.search_api.update(movie) eng_title = "" if "akas" in movie.keys(): print("tem akas") for aka in movie["akas"]: aka_split = aka.split("::") if len(aka_split) > 1 \ and (aka_split[1].find("(English title)") != -1 \ or aka_split[1].find("USA") != -1): eng_title = aka_split[0].strip().lower() break imdb_title = sanitize_name(movie["title"]) original_title = name_split[1].strip()[:-1].lower() print("imdb title: " + imdb_title) print("english title: " + eng_title) print("year: " + str(movie["year"])) if imdb_title == title or eng_title == title \ or (len(name_split) == 3 \ and imdb_title == original_title): return movie else: print("########## FOUND DIFFERENT MOVIE ##########") print(movie["title"] + " (" + str(movie["year"]) + ")") return None
#based off details.csv from getAllMovieDetails.py #ONLY SOME MOVIES WILL BE WRITTEN INTO F2. cOPY THEM INTO TEMPmOVIErATINGS. kEEP DOING THIS from imdbpie import Imdb imdb = Imdb() imdb = Imdb(anonymize=True) imdb = Imdb(cache=True) f1 = open('Details.csv', 'r') f2 = open('movieRatings.csv', 'w') movieID = [] year = [] count = 1 #just to show how much is written in new file for line in f1: attribute = line.split(",") #splitting each item attribute[-1] = attribute[-1].strip() #removing \n from the last attribute #year.append(attribute[0]) #storing only years movieID.append(attribute[1]) #storing only movie ids for item in movieID: if (imdb.get_title_by_id(item)): #checking if movie exists print(count) title = imdb.get_title_by_id(item) rating = str(title.rating) if (rating is "None"): f2.write("None") else: f2.write(rating) f2.write(',') #so that it's in csv format
def single_quote(s): if len(s) == 0: return 'None' if s.find('\'') != -1: ss = s.split("\'") new = '' for x in ss: new = new + "\'" + "\'" + x return new[2:] else: return s imdb = Imdb() imdb = Imdb(anonymize=True) # to proxy requests top250 = [] top250 = imdb.top_250() for item in top250: try: title = imdb.get_title_by_id(item['tconst']) if len(title.trailers) > 0: trailer_url = title.trailers[0]['url'] else: trailer_url = 'None' new_movie = ( '''INSERT INTO movie_movie VALUES (\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\')'''.format( item['tconst'], single_quote(str(item['title'])),
def test_get_title_by_id_returns_none_when_is_episode(self): imdb = Imdb(exclude_episodes=True) assert imdb.get_title_by_id('tt0615090') is None
class PosterBot(): def __init__(self): socket.setdefaulttimeout(30) self.plex_url = os.environ.get('PLEX_URL') self.plex_token = os.environ.get('PLEX_TOKEN') self.dir = os.environ.get('POSTER_DIR') if not os.path.exists(self.dir): os.makedirs(self.dir) self.imdb = Imdb() def run(self): self.getMoviePosters() self.getTVPosters() def getMoviePosters(self): movie_xml = ET.fromstring(urllib.request.urlopen(''.join((self.plex_url, '/library/sections/1/all?X-Plex-Token=', self.plex_token))).read()) for child in movie_xml: title = child.attrib.get('title') ratingKey = child.attrib.get('ratingKey') thumb = child.attrib.get('thumb') updatedAt = int(child.attrib.get('updatedAt')) oldfile, exists = Posters.objects.search_entry(ratingKey, updatedAt) if not exists: # couldn't find in database newfile = self.downloadPoster(thumb, ratingKey, updatedAt) if newfile: imdb_url = self.getImdbLink(title) Posters.objects.create_entry(ratingKey, newfile, imdb_url, title, updatedAt) elif oldfile: # entry needs to be updated newfile = self.downloadPoster(thumb, ratingKey, updatedAt) if newfile: os.remove(os.path.join(self.dir, oldfile)) Posters.objects.update_entry(ratingKey, newfile, updatedAt) def getTVPosters(self): tv_xml = ET.fromstring(urllib.request.urlopen(''.join((self.plex_url, '/library/sections/2/all?X-Plex-Token=', self.plex_token))).read()) for child in tv_xml: title = child.attrib.get('title') ratingKey = child.attrib.get('ratingKey') thumb = child.attrib.get('thumb') updatedAt = int(thumb.rsplit('/', 1)[-1]) oldfile, exists = Posters.objects.search_entry(ratingKey, updatedAt) if exists: imdb_url = self.getImdbLink(title, ratingKey=ratingKey) else: # couldn't find in database newfile = self.downloadPoster(thumb, ratingKey, updatedAt) if newfile: imdb_url = self.getImdbLink(title, 'TV') Posters.objects.create_entry(ratingKey, newfile, imdb_url, title, updatedAt) if oldfile: # entry needs to be updated newfile = self.downloadPoster(thumb, ratingKey, updatedAt) if newfile: os.remove(os.path.join(self.dir, oldfile)) Posters.objects.update_entry(ratingKey, newfile, updatedAt) show_xml = ET.fromstring(urllib.request.urlopen(''.join((self.plex_url, '/library/metadata/', ratingKey, '/children?X-Plex-Token=', self.plex_token))).read()) for season in show_xml: #loop for seasons in show ratingKey = season.get('ratingKey') if ratingKey is None: continue thumb = season.get('thumb') updatedAt = int(season.get('updatedAt')) oldfile, exists = Posters.objects.search_entry(ratingKey, updatedAt) if not exists: #couldn't find entry in database newfile = self.downloadPoster(thumb, ratingKey, updatedAt) if newfile: Posters.objects.create_entry(ratingKey, newfile, imdb_url, title, updatedAt) elif oldfile: #entry needs to be updated newfile = self.downloadPoster(thumb, ratingKey, updatedAt) if newfile: os.remove(os.path.join(self.dir, oldfile)) Posters.objects.update_entry(ratingKey, newfile, updatedAt) # save directory stored in environment variable for now (could switch to db) def downloadPoster(self, thumb, ratingKey, updatedAt): url = ''.join((self.plex_url, thumb, '?X-Plex-Token=', self.plex_token)) filename = ''.join((ratingKey, '-', str(updatedAt), '.jpg')) path = os.path.join(self.dir, filename) try: urllib.request.urlretrieve(url, path) return filename except socket.timeout: if os.path.exists(path): os.remove(path) print("timeout error: " + filename) except FileNotFoundError: print("File or folder doesn't exist: " + path) except socket.error: if os.path.exists(path): os.remove(path) print("socket error occured: ") except: if os.path.exists(path): os.remove(path) print("Unexpected error:", sys.exc_info()[0]) return "" # specify type if TV show def getImdbLink(self, title, type='Movie', ratingKey=''): if ratingKey: poster = Posters.objects.get(ratingKey=ratingKey) return poster.imdb_url search = self.imdb.search_for_title(title) if len(search) > 0: imdb_id = search[0].get('imdb_id') return "http://imdb.com/title/" + imdb_id title = title.replace(' ', '+') if type == 'Movie': return ''.join(('http://www.imdb.com/find?q=', title, '&s=tt&ttype=ft')) return ''.join(('http://www.imdb.com/find?q=', title, '&s=tt&ttype=tv'))
from imdbpie import Imdb import json imdb = Imdb(anonymize=True) # to proxy requests reviews = imdb.get_title_reviews("tt0120338", max_results=2500) classified_reviews = [] positive_reviews = [x for x in reviews if x.rating > 7] negative_reviews = [x for x in reviews if x.rating < 5] for i in range(0, 550): classified_reviews.append({ 'text': positive_reviews[i].text, 'class': 'POSITIVE' }) classified_reviews.append({ 'text': negative_reviews[i].text, 'class': 'NEGATIVE' }) with open('result.json', 'w') as fp: json.dump(classified_reviews, fp)
def test_get_episodes_raises_when_exclude_episodes_enabled(self): imdb = Imdb(locale='en_US', cache=False, exclude_episodes=True) with pytest.raises(ValueError): imdb.get_episodes('tt0303461')
def _search(self, title, year=None, fallback_search=False): """ Search the api for a movie. :param title: the title to search for :type title: str :param year: the year :type year: int or None :return: the search result or None if not found :rtype: imdbpie.objects.TitleSearchResult or None """ name = title if year: name += ' (' + text_type(year) + ')' if fallback_search: log.info('Searching imdb api again with year included for %s', name) search_results = ImdbFacade().search_for_title( re.sub('[()]', '', name)) else: log.info('Searching imdb api for %s', name) search_results = ImdbFacade().search_for_title(title) # Find the first movie that matches the title (and year if present) for search_result in search_results: # type: TitleSearchResult if self.sanitize_imdb_title( search_result.title) == self.sanitize_imdb_title(title): # If a year is present, it should also be the same if year: if search_result.year == int(year): return search_result else: continue # If no year is present, take the first match else: return search_result # If no match is found, try to search for alternative titles of the first (most relevant) result if len(search_results) > 0: best_match = search_results[0] # type: TitleSearchResult best_match_title_versions = Imdb().get_title_versions( best_match.imdb_id) # Not available in ImdbFacade if best_match_title_versions and 'alternateTitles' in best_match_title_versions: for alternate_title in best_match_title_versions[ 'alternateTitles']: if self.sanitize_imdb_title( alternate_title['title'] ) == self.sanitize_imdb_title(title): # If a year is present, it should also be the same if year: if best_match.year == int(year): return TitleSearchResult( imdb_id=best_match.imdb_id, title=best_match.title, type=best_match.type, year=best_match.year) else: continue # If no year is present, take the first match else: return TitleSearchResult( imdb_id=best_match.imdb_id, title=best_match.title, type=best_match.type, year=best_match.year) # Fallback search in case nothing could be found if not fallback_search: return self._search(title, year=year, fallback_search=True) return None
from imdbpie import Imdb imdb = Imdb({'anonymize': False, 'locale': 'en_US', 'exclude_episodes': False}) def run_tests(): """ Overall tests not using unittests for a simple visual results overview """ print((movie.title)) print(('year', movie.year)) print(('type', movie.type)) print(('tagline', movie.tagline)) print(('rating', movie.rating)) print(('certification', movie.certification)) print(('genres', movie.genres)) print(('plot', movie.plot)) print(('runtime', movie.runtime)) print(('writers', movie.writers)) print(('directors', movie.directors)) print(('creators', movie.creators)) print(('cast summary', movie.cast_summary)) print(('full credits', movie.credits)) if __name__ == '__main__': movie = imdb.find_movie_by_id('tt0705926') run_tests()
from imdbpie import Imdb import re imdb = Imdb({'anonymize' : False}) movie = imdb.find_movie_by_id("tt1210166") def run_tests(): global imdb print('have a trailer_url:') match = re.findall(r'http://ia.media-imdb.com/images/.*/', movie.trailer_url)[0] if match: print('passed') if __name__ == '__main__': run_tests()
import csv from urllib.parse import urlencode from datetime import datetime import json import requests import pandas as pd from imdbpie import Imdb TMDB_API_KEY = "df11d86cc7da3a00faaeafc354b858de" OMDB_API_KEY = "83930f10" DATA_URL = "https://pkgstore.datahub.io/36661def37f62e4130670ab75e06465a/oscars-nominees-and-winners/data_json/data/d3c23178ad964c76c8ce0ed81762ed7b/data_json.json" imdb = Imdb() def get_json(url): res = requests.get(url) if res: return res.json() return None def tmdb_get(endpoint, params: dict = None): url = F'https://api.themoviedb.org/3{endpoint}?api_key={TMDB_API_KEY}' if params: url += f'&{urlencode(params)}' return get_json(url=url)
from imdbpie import Imdb import unittest imdb = Imdb({'anonymize': False}) images = imdb.title_images("tt0468569") class TestImage(unittest.TestCase): def test_results(self): self.assertGreaterEqual(len(images), 107) def test_caption(self): self.assertEqual(images[0].caption, 'Still of Gary Oldman in The Dark Knight') def test_url(self): self.assertEqual(images[0].url, 'http://ia.media-imdb.com/images/M/MV5BOTAxNzI0NDE1NF5BMl5BanBnXkFtZTcwNjczMTk2Mw@@._V1_.jpg') if __name__ == '__main__': unittest.main()
#!/usr/bin/env python2 # -*- coding: utf-8 -*- """ Created on Mon Oct 23 16:02:41 2017 @author: huaqingxie """ # import modules from imdbpie import Imdb import pandas as pd # to proxy requests imdb = Imdb() imdb = Imdb(anonymize=True) mydf = pd.read_csv("movie_data_filter.csv") print mydf.head(20) imdbid = mydf['imdbId'] director = [] actor = [] year = [] imdb_score = [] certification = [] n = 0 for line in imdbid: n += 1 print "...", n if len(str(int(line))) == 6:
def test_get_title_by_id_returns_none_when_is_episode(self): imdb = Imdb(exclude_episodes=True) assert imdb.get_title_by_id("tt0615090") is None
async def imdb(query, api: Imdb, localize): """ Send an api request to imdb using the search query :param query: the search query :param api: the imdb api object :param localize: the localization strings :return: the result """ # FIXME: Use Aiohttp instead of this api wrapper try: names = lambda x: ', '.join((p.name for p in x)) if x else 'N/A' null_check = lambda x: x if x and not isinstance(x, int) else 'N/A' id_ = api.search_for_title(query)[0]['imdb_id'] res = api.get_title_by_id(id_) eps = api.get_episodes(id_) if res.type == 'tv_series' else None ep_count = len(eps) if eps is not None else None season_count = eps[-1].season if eps is not None else None title = null_check(res.title) release = null_check(res.release_date) runtime = res.runtime if runtime is not None: hours, seconds = divmod(runtime, 3600) minutes = seconds / 60 runtime_str = '{} {} {} {}'.format(round(hours), localize['hours'], round(minutes), localize['minutes']) else: runtime_str = 'N/A' rated = null_check(res.certification) genre = ', '.join(res.genres) if res.genres else 'N/A' director = names(res.directors_summary) writer = names(res.writers_summary) cast = names(res.cast_summary) plot = null_check(res.plot_outline) poster = res.poster_url score = f'{res.rating}/10' if res.rating is not None else 'N/A' embed = Embed(colour=0xE5BC26) embed.set_author(name=title) if poster: embed.set_image(url=poster) if season_count is not None: embed.add_field(name=localize['seasons'], value=season_count) if ep_count is not None: embed.add_field(name=localize['episodes'], value=str(ep_count)) embed.add_field(name=localize['release_date'], value=release) embed.add_field(name=localize['rated'], value=rated) embed.add_field(name=localize['runtime'], value=runtime_str) embed.add_field(name=localize['genre'], value=genre) embed.add_field(name=localize['director'], value=director) embed.add_field(name=localize['writer'], value=writer) embed.add_field(name=localize['cast'], value=cast) embed.add_field(name=localize['score'], value=score) embed.add_field(name=localize['plot_outline'], value=plot, inline=False) return embed except (JSONDecodeError, IndexError): return localize['title_not_found']
import os import requests import urllib3 import PIL from PIL import Image from PIL import ImageDraw from PIL import ImageFont from imdbpie import Imdb from io import StringIO import textwrap imdb = Imdb() imdb = Imdb(anonymize=True) # to proxy requests pathname = 'D:\Eyes\Eng' def GetCleanName(shortname): shortname = shortname.replace('480P', '') shortname = shortname.replace('720P', '') shortname = shortname.replace('1080P', '') shortname = shortname.replace('X264', '') shortname = shortname.replace('YIFY', '') shortname = shortname.replace('BRRIP', '') shortname = shortname.replace('AAC', '') shortname = shortname.replace('ETRG', '') shortname = shortname.replace('BLURAY', '') shortname = shortname.replace('YTS AG', '') shortname = shortname.replace('[YTS.AG]', '') shortname = shortname.replace('AC3', '') shortname = shortname.replace('WEB DL', '')
# 12 Nov 2017 | Checking out imdbpie from imdbpie import Imdb imdb = Imdb() lst = imdb.search_for_title("The Dark Knight") print(lst)
class TestImdb(object): imdb = Imdb(locale='en_US', cache=False) def test_build_url(self): imdb_fr = Imdb(locale='en_FR', cache=False) imdb_fr.timestamp = time.mktime(datetime.date.today().timetuple()) url = imdb_fr._build_url(path='/title/maindetails', params={'tconst': 'tt1111111'}) expected_url = ('https://app.imdb.com/' 'title/maindetails' '?apiKey=d2bb34ec6f6d4ef3703c9b0c36c4791ef8b9ca9b' '&apiPolicy=app1_1' '&locale=en_FR' '×tamp={timestamp}' '&tconst=tt1111111&api=v1&appid=iphone1_1').format( timestamp=imdb_fr.timestamp) assert_urls_match(expected_url, url) def test_build_url_proxied(self): imdb_fr = Imdb(locale='en_FR', cache=False, anonymize=True, proxy_uri='http://someproxywebsite.co.uk?url={0}') imdb_fr.timestamp = time.mktime(datetime.date.today().timetuple()) url = imdb_fr._build_url(path='/title/maindetails', params={'tconst': 'tt1111111'}) expected_url = ('http://someproxywebsite.co.uk?url=' + quote('https://app.imdb.com/title/maindetails')) assert url.startswith(expected_url) is True def test_get_title_plots(self): plots = self.imdb.get_title_plots('tt0111161') expected_plot3 = ('Andy Dufresne is sent to Shawshank Prison for the ' 'murder of his wife and her secret lover. He is very' ' isolated and lonely at first, but realizes there ' 'is something deep inside your body that people ' 'can\'t touch or get to....\'HOPE\'. Andy becomes ' 'friends with prison \'fixer\' Red, and Andy ' 'epitomizes why it is crucial to have dreams. His ' 'spirit and determination lead us into a world full ' 'of imagination, one filled with courage and desire.' ' Will Andy ever realize his dreams?') assert len(plots) >= 5 assert expected_plot3 in plots def test_get_credits_data(self): credits = self.imdb._get_credits_data('tt0111161') expected_credits = load_test_data('get_credits_tt0111161.json') assert len(expected_credits) <= len(credits) for index, credit_item in enumerate(expected_credits): assert (sorted(credit_item, key=itemgetter(1)) == sorted(credits[index], key=itemgetter(1))) def test_get_credits_non_existant_title(self): with pytest.raises(HTTPError): self.imdb._get_credits_data('tt-non-existant-id') def test_get_reviews_data(self): reviews = self.imdb._get_reviews_data('tt0111161') assert len(reviews) == 10 expected_review_keys = [ 'status', 'user_score', 'text', 'summary', 'user_score_count', 'date', 'user_name' ] # other optional keys: user_rating, user_location # results are changeable so check on data structure for review in reviews: for key in expected_review_keys: assert key in review.keys() def test_get_title_reviews(self): reviews = self.imdb.get_title_reviews('tt0111161') assert 10 == len(reviews) assert reviews[0].username == 'carflo' assert reviews[0].date == '2003-11-26' assert reviews[0].summary == 'Tied for the best movie I have ever seen' def test_get_title_reviews_limit(self): reviews = self.imdb.get_title_reviews('tt2294629', max_results=20) assert 20 == len(reviews) reviews = self.imdb.get_title_reviews('tt2294629', max_results=31) assert 31 == len(reviews) def test_title_reviews_non_existant_title(self): with pytest.raises(HTTPError): self.imdb.get_title_reviews('tt-non-existant-id') def test_title_exists(self): result = self.imdb.title_exists('tt2322441') assert True is result def test_title_exists_non_existant_title(self): result = self.imdb.title_exists('tt0000000') assert False is result def test_search_for_title_searching_title(self): results = self.imdb.search_for_title('Shawshank redemption') expected_top_results = [ { 'imdb_id': 'tt0111161', 'title': 'The Shawshank Redemption', 'year': '1994' }, { 'imdb_id': 'tt0265738', 'title': 'The SharkTank Redemption', 'year': '2000' }, ] assert 14 == len(results) assert expected_top_results == results[:2] def test_search_for_person(self): results = self.imdb.search_for_person('Andrew Lloyd Webber') assert 12 == len(results) expected_results = [{ 'name': 'Andrew Lloyd Webber', 'imdb_id': 'nm0515908' }, { 'name': 'Andrew Lloyd Walker', 'imdb_id': 'nm3530714' }, { 'name': 'Robert Lloyd', 'imdb_id': 'nm0516115' }, { 'name': 'Madeleine Gurdon', 'imdb_id': 'nm2967056' }, { 'name': 'Andrew Webberley', 'imdb_id': 'nm1422165' }, { 'name': 'Imogen Lloyd Webber', 'imdb_id': 'nm2622250' }, { 'name': 'Robert Floyd', 'imdb_id': 'nm0283292' }, { 'name': 'Andrew Webber', 'imdb_id': 'nm0916341' }, { 'name': 'Andrew Webber', 'imdb_id': 'nm1267376' }, { 'name': 'Andrew Webber', 'imdb_id': 'nm3404464' }, { 'name': 'Mark Webber', 'imdb_id': 'nm1902514' }, { 'name': 'Andrew Webber', 'imdb_id': 'nm5409221' }] assert (sorted(expected_results, key=itemgetter('imdb_id')) == sorted( results, key=itemgetter('imdb_id'))) def test_search_for_title_no_results(self): results = self.imdb.search_for_title('898582da396c93d5589e0') assert [] == results def test_top_250(self): results = self.imdb.top_250() assert 250 == len(results) expected_keys = [ 'rating', 'tconst', 'title', 'image', 'num_votes', 'year', 'can_rate', 'type' ] # results are changeable so check on data structure for result in results: assert sorted(expected_keys) == sorted(result.keys()) def test_popular_shows(self): results = self.imdb.popular_shows() assert 50 == len(results) expected_keys = [ 'tconst', 'title', # 'image', # optional key 'year', 'principals', 'type' ] # results are changeable so check on data structure for index, result in enumerate(results): assert set(expected_keys).issubset(set(result.keys())) is True def test_get_title_by_id_returns_none_when_is_episode(self): imdb = Imdb(exclude_episodes=True) assert imdb.get_title_by_id('tt0615090') is None @patch('imdbpie.imdbpie.Imdb._get') def test_get_title_by_id_returns_none_when_no_resp(self, mock_get): mock_get.return_value = None assert self.imdb.get_title_by_id('tt0111161') is None def test_get_person_by_id(self): person = self.imdb.get_person_by_id('nm0000151') assert person.name == 'Morgan Freeman' assert person.imdb_id == 'nm0000151' assert is_valid_url(person.photo_url) is True @patch('imdbpie.imdbpie.Imdb._get') def test_get_person_by_id_returns_none_when_no_resp(self, mock_get): mock_get.return_value = None assert self.imdb.get_person_by_id('nm0000151') is None def test_get_title_by_id(self): title = self.imdb.get_title_by_id('tt0111161') assert title.title == 'The Shawshank Redemption' assert title.year == 1994 assert title.type == 'feature' assert title.tagline == ('Fear can hold you prisoner. ' 'Hope can set you free.') assert isinstance(title.plots, list) is True assert len(title.plots) >= 5 assert isinstance(title.rating, float) is True assert sorted(title.genres) == sorted(['Crime', 'Drama']) assert isinstance(title.votes, int) is True assert title.runtime == 8520 assert is_valid_url(title.poster_url) is True assert is_valid_url(title.cover_url) is True assert title.release_date == '1994-10-14' assert title.certification == 'R' for trailer_url in title.trailer_image_urls: assert is_valid_url(trailer_url) is True expected_plot_outline = ( 'Two imprisoned men bond over a number ' 'of years, finding solace and eventual redemption through acts ' 'of common decency.') assert title.plot_outline == expected_plot_outline assert isinstance(title.directors_summary[0], Person) assert len(title.directors_summary) == 1 assert len(title.creators) == 0 assert len(title.cast_summary) == 4 expected_cast_names = [ 'Tim Robbins', 'Morgan Freeman', 'Bob Gunton', 'William Sadler' ] for name in expected_cast_names: assert name in [p.name for p in title.cast_summary] expected_writers = ['Stephen King', 'Frank Darabont'] for name in expected_writers: assert name in [p.name for p in title.writers_summary] assert len(title.credits) >= 327 assert (sorted(load_test_data('expected_credits.json')) == sorted( [p.imdb_id for p in title.credits])) assert isinstance(title.credits[10], Person) assert len(title.trailers) == 3 def test_get_title_by_id_using_proxy(self): imdb = Imdb(locale='en_US', cache=False, anonymize=True) title = imdb.get_title_by_id('tt0111161') assert title.title == 'The Shawshank Redemption' assert title.year == 1994 assert title.type == 'feature' assert title.tagline == ('Fear can hold you prisoner. ' 'Hope can set you free.') assert isinstance(title.plots, list) is True assert len(title.plots) >= 5 assert isinstance(title.rating, float) is True assert sorted(title.genres) == sorted(['Crime', 'Drama']) assert isinstance(title.votes, int) is True assert title.runtime == 8520 assert len(title.trailers) == 3 def test_get_title_by_id_redirection_result(self): assert self.imdb.get_title_by_id('tt0000021') is None def test_get_title_by_id_excludes_episodes(self): assert self.imdb.get_title_by_id('tt3181538') is not None imdb = Imdb(exclude_episodes=True) title = imdb.get_title_by_id('tt3181538') assert title is None def test_get_episodes(self): assert self.imdb.get_title_by_id('tt0303461') is not None imdb = Imdb() episodes = imdb.get_episodes('tt0303461') assert episodes is not None assert len(episodes) == 14 episode_1 = episodes[0] assert episode_1.imdb_id == "tt0579539" assert episode_1.type == "tv_episode" assert episode_1.title == u'The Train Job' assert episode_1.series_name == 'Firefly' assert episode_1.release_date == "2002-09-20" assert episode_1.year == 2002 def test_get_episodes_raises_when_exclude_episodes_enabled(self): imdb = Imdb(locale='en_US', cache=False, exclude_episodes=True) with pytest.raises(ValueError): imdb.get_episodes('tt0303461') def test_get_person_images(self): person_images = self.imdb.get_person_images('nm0000032') assert len(person_images) >= 200 for person_image in person_images[:10]: assert person_image.caption is not None assert is_valid_url(person_image.url) is True assert isinstance(person_image.width, int) assert isinstance(person_image.height, int) def test_get_title_images(self): title_images = self.imdb.get_title_images('tt0111161') assert len(title_images) >= 38 for image in title_images: assert isinstance(image, Image) is True def test_get_title_by_id_raises_not_found(self): with pytest.raises(HTTPError): self.imdb.get_title_by_id('tt9999999')
def main(title): reviews = [] # Search tweets ts = TwitterSearch( consumer_key=os.environ.get('TWITTER_CONSUMER_KEY'), consumer_secret=os.environ.get('TWITTER_CONSUMER_SECRET'), access_token=os.environ.get('TWITTER_ACCESS_TOKEN'), access_token_secret=os.environ.get('TWITTER_TOKEN_SECRET')) try: ts.connect() tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.setKeywords([ '#' + title + 'Movie' ]) # let's define all words we would like to have a look for tso.setLanguage('en') # we want to see German tweets only tso.setIncludeEntities( False) # and don't give us all those entity information # add tweets to reviews list results = ts.getSearchResults(tso) except TwitterSearchException as e: # take care of all those ugly errors if there are some logging.exception(str(e)) ts.cleanUp() else: for offset in range(results.getSize()): if offset > 9: break tweet = results.getTweetByIndex(offset) reviews.append({ 'author': tweet.getUserName(), 'summary': tweet.getText(), 'text': tweet.getText(), 'date': parser.parse(tweet.getCreatedDate(), ignoretz=True), 'source': 'Twitter' }) finally: ts.disconnect() # Search Imdb imdb = Imdb() try: response = imdb.search_for_title(title)[0] title_id = response['imdb_id'] response = imdb.get_title_reviews(title_id, max_results=10) except IndexError as e: logging.exception(str(e)) else: for review in response: reviews.append({ 'author': review.username, 'summary': review.summary, 'text': review.text, 'date': parser.parse(review.date, ignoretz=True), 'source': 'IMDB' }) # Search NYTimes url = "https://api.nytimes.com/svc/movies/v2/reviews/search.json" data = {'query': title, 'api-key': os.environ.get('NY_TIMES_API_KEY')} response = requests.get(url, data) count = 0 for review in response.json()['results']: if count > 9: break reviews.append({ 'author': review['byline'], 'summary': review['headline'], 'text': review['summary_short'], 'date': parser.parse(review['date_updated'], ignoretz=True), 'source': 'NYTimes' }) count += 1 # Sort reviews by date reviews.sort(cmp=_cmprev) # Print reviews for review in reviews: print('(%s) @%s: %s [Source: %s]' % (review['date'].strftime('%Y-%m-%d'), review['author'], review['summary'], review['source']))