def imdb_rating(movieTitle, year=None): imdb = Imdb() try: results = imdb.search_for_title(movieTitle) except: print('WARNING: Could not find the title %s' % movieTitle) return 0.00 if year is None: bestHit = results[0] else: gotHit = False for result in results: movieYear = int(result.get('year')) if movieYear - 2 <= year and movieYear + 2 >= year: bestHit = result gotHit = True break if not gotHit: print('WARNING: Could not get match for %s' % movieTitle) return 0.00 print('-=MATCH=- %s --from %s --=-- %s' % (movieTitle, year, bestHit)) idBestHit = bestHit.get('imdb_id') rating = imdb.get_title_by_id(idBestHit).rating movType = imdb.get_title_by_id(idBestHit).type if movType.lower() != 'feature' and movType.lower() != 'documentary': print('WARNING: This is not a feature-film or docu: %s' % movieTitle) return 0.00 elif rating is None: print('WARNING: Could not get rating from title %s' % bestHit.get('title')) return 0.00 return float(rating)
def save(self, *args, **kwargs): imdb = Imdb() movie = imdb.get_title_by_id(self.imdb_id) self.title = movie.title self.year = movie.year self.plot = movie.plot_outline super(Film, self).save(*args, **kwargs)
def metadata(s): imdb = Imdb() imdb = Imdb(anonymize=True) # to proxy requests names = imdb.search_for_title(s) title = imdb.get_title_by_id(names[0][u'imdb_id']) cast = cast_rating(title.cast_summary) return title
def test(title): imdb = Imdb() try: search = imdb.search_for_title(title)[0] except IndexError: return 'Movie not found.' #results = imdb.search_for_title(title)[:5] #for i in range(len(results)): # movie = results[i] # print('{}. {} ({})'.format(i+1, movie['title'], movie['year'])) #while True: # prompt = raw_input('> ') # if prompt in ['1','2','3','4','5']: # break # else: # print('Invalid choice.') #select = results[int(prompt)-1] #movie = imdb.get_title_by_id(select['imdb_id']) # checking out all dir(movies) #for i in dir(movie): # if i.startswith('_'): # continue #x = getattr(movie, i) # print('{} - {}'.format(i, x)) movie = imdb.get_title_by_id(search['imdb_id']) return '{} ({})\n{}\nIMDB: {}'.format(movie.title, movie.year, movie.plot_outline, movie.rating)
def test_get_title_by_id_excludes_episodes(self): assert self.imdb.get_title_by_id('tt3181538') is not None imdb = Imdb(exclude_episodes=True) title = imdb.get_title_by_id('tt3181538') assert title is None
def add_from_imdb(imdb_id, youtube_id): """Retrieves movie data from the inofficial IMDb REST API as well as the IMDB lib imdbpie.""" file_name = os.path.dirname(__file__) + "/imdb/" + imdb_id + ".json" if not os.path.isdir(os.path.dirname(file_name)): os.mkdir(os.path.dirname(file_name)) if os.path.isfile(file_name): imdb_saved = open(file_name) imdb_save = json.loads(imdb_saved.read()) else: response = urllib.urlopen('http://imdb.wemakesites.net/api/' + imdb_id + '?api_key=' + Movie.__api_key) json_response = response.read() imdb_data = json.loads(json_response) imdb_save = imdb_data['data'] imdb = Imdb() imdb = Imdb(anonymize=True) # to proxy requests movie = imdb.get_title_by_id(imdb_id) if not movie.title: movie.title = raw_input("Movie Title not defined. Please set: ") imdb_save['rating'] = movie.rating imdb_save['title'] = movie.title if youtube_id: imdb_save['youtube_id'] = youtube_id imdb_file = open(file_name, 'w') imdb_file.write(json.dumps(imdb_save)) imdb_file.close()
def test_get_title_by_id_excludes_episodes(self): assert self.imdb.get_title_by_id("tt3181538") is not None imdb = Imdb(exclude_episodes=True) title = imdb.get_title_by_id("tt3181538") assert title is None
def episodeBuilder(episode_id): imdb = Imdb() imdb = Imdb(anonymize=True) episode = imdb.get_title_by_id(episode_id) return episode.plot_outline, episode.title
class imdb_api: def __init__(self, anonymize=False): self.imdb = Imdb(anonymize=anonymize) def search(self, title, imdb_id=""): if not imdb_id: print("Searching") results = self.imdb.search_for_title(title) chosen = self.search_select(results) title_id = results[chosen]["imdb_id"] else: title_id = imdb_id print("Looking up series") title = self.imdb.get_title_by_id(title_id) if title.type == "tv_series": print("Detected TV series, downloading episode list") episodes = self.imdb.get_episodes(title_id) print(episodes) return [title, episodes] else: return [title] def search_select(self, results): def get_input(allow_0=True): print("Which show do you want? type 0 to see more") out = 0 while out == 0: inp = input("> ") if inp == "0" and allow_0: return out else: try: out = int(inp) except ValueError: print("Not a number") return out if len(results) > 1: chosen = 0 for index, show in enumerate(results): print("[{0}] ({3})\t{1}\t{2}".format(str(index + 1), show["title"], show["year"], show["imdb_id"])) if ((index + 1) % 10) == 0: chosen = get_input() if chosen != 0: break if chosen == 0: print("No more") chosen = get_input(allow_0=False) return chosen - 1 #imdb = imdb_api() #x = imdb.search("Person of Interest") #print(x)
def getMovieInfo(title): imdb = Imdb(anonymize=True) movie_id = imdb.search_for_title(title)[0]['imdb_id'] movie = imdb.get_title_by_id(movie_id) movieInfo = [ 'Title: ' + movie.title, 'Rating: ' + str(movie.rating), 'Runtime: ' + str(int(movie.runtime) / 60), 'Release Date: ' + movie.release_date, 'Certification: ' + movie.certification ] return movieInfo
def updateart(): import urllib from imdbpie import Imdb imdb = Imdb() print 'updating art for movies(imdb cover)' for movie in Movie.query.all(): print 'processing %s' % movie.c00 imdbid = movie.c09 if Posters.query.filter_by(apiid=imdbid).first() is not None: print 'skipping %s as it is allready in the database' % movie.c00 continue try: title = imdb.get_title_by_id(imdbid) if title.cover_url is None: continue poster = Posters() poster.apiid = imdbid poster.type = 'movie' response = urllib.urlopen(title.cover_url) data = response.read() data64 = data.encode('base64').replace('\n', '') poster.imgdata = 'data:image/jpeg;base64,%s' % data64 # print poster.imgdata db.session.add(poster) db.session.commit() except: continue print 'updating art for tv' from tvdb_api import Tvdb t = Tvdb(banners=True) for show in Tvshow.query.all(): print 'processing %s' % show.c00 tvdbid = show.c12 if Posters.query.filter_by(apiid=tvdbid).first() is not None: print 'skipping %s as it is allready in the database' % show.c00 continue try: tvdbshow = t[int(tvdbid)] bannerkeys = tvdbshow['_banners']['season']['season'].keys() banner_url = tvdbshow['_banners']['season']['season'][bannerkeys[0]]['_bannerpath'] poster = Posters() poster.apiid = tvdbid poster.type = 'tv' response = urllib.urlopen(banner_url) data = response.read() data64 = data.encode('base64').replace('\n', '') poster.imgdata = 'data:image/jpeg;base64,%s' % data64 # print poster.imgdata db.session.add(poster) db.session.commit() except: continue
def identify_movies(movies): """ identifying the movies from IMDB """ imdb = Imdb() ids = [] for key, vals in movies.items(): for val in vals: for info in imdb.search_for_title(val): if key == info.get('year') and val == info.get( 'title').lower(): ids.append(info.get('imdb_id')) return [imdb.get_title_by_id(id) for id in ids]
def imdb_import(number): """ Helper method to import large quantities of movies from IMDB as sample data. """ reset_database() imdb = Imdb(cache=True) top = imdb.top_250() movies = [] count = 0 for x in top: if count >= int(number): break m = Movie() im = imdb.get_title_by_id(x['tconst']) m.name = im.title m.year = im.year m.imdb_id = im.imdb_id m.save() movies.append(m) # adding director and actors for person in im.credits: if person.token == "directors": m.director = Person.objects.create_or_find_imdb(person) elif person.token == "cast": m.actors.add(Person.objects.create_or_find_imdb(person)) m.save() for i in range(random.randrange(3)): mc = MovieCopy() mc.movie = m mc.save() count = count+1 # imdb.get_title_images("tt0468569") # imdb.get_person_images("nm0000033") return { 'number_imported': number, 'kind': 'movies', 'movies': movies, }
def retrieve_imdb_info(movies_df): imdb = Imdb(anonymize=True) # movies_df = movies_df.head() start = time.time() start_row = movies_df.loc[movies_df['imdbId'] == 2267968].iloc[0] start_row_index = start_row.name indices = movies_df.index[start_row_index + 1:] # indices = indices[:2] with open('imdb_movies.csv', 'a') as csv_file: csv_writer = csv.writer(csv_file, delimiter=',') for index in indices: row = movies_df.iloc[index] imdb_id = row['imdbId'] imdb_id = imdb_id_conversion_dict.get(imdb_id, imdb_id) imdb_id_str = 'tt%07d' % imdb_id # print imdb_id, imdb_id_str imdb_title = imdb.get_title_by_id(imdb_id_str) elapsed_time = int(round(time.time() - start, 0)) minutes = elapsed_time // 60 seconds = elapsed_time % 60 print '%s %d:%02d %s' % (index, minutes, seconds, imdb_title) imdb_movie = ImdbMovie(imdb_id, imdb_title.title, imdb_title.type, imdb_title.year, imdb_title.tagline, imdb_title.rating, imdb_title.certification, imdb_title.genres, imdb_title.votes, imdb_title.runtime, imdb_title.release_date, convert_person_list(imdb_title.directors_summary), convert_person_list(imdb_title.creators), convert_person_list(imdb_title.cast_summary), convert_person_list(imdb_title.writers_summary), ) csv_writer.writerow(imdb_movie)
def test_get_title_by_id_using_proxy(self): imdb = Imdb(locale="en_US", cache=False, anonymize=True) title = imdb.get_title_by_id("tt0111161") assert title.title == "The Shawshank Redemption" assert title.year == 1994 assert title.type == "feature" assert title.tagline == ("Fear can hold you prisoner. " "Hope can set you free.") assert isinstance(title.plots, list) is True assert len(title.plots) == 5 assert isinstance(title.rating, float) is True assert sorted(title.genres) == sorted(["Crime", "Drama"]) assert isinstance(title.votes, int) is True assert title.runtime == 8520 assert len(title.trailers) == 3
def test_get_title_by_id_using_proxy(self): imdb = Imdb(locale='en_US', cache=False, anonymize=True) title = imdb.get_title_by_id('tt0111161') assert title.title == 'The Shawshank Redemption' assert title.year == 1994 assert title.type == 'feature' assert title.tagline == ('Fear can hold you prisoner. ' 'Hope can set you free.') assert isinstance(title.plots, list) is True assert len(title.plots) == 6 assert isinstance(title.rating, float) is True assert sorted(title.genres) == sorted(['Crime', 'Drama']) assert isinstance(title.votes, int) is True assert title.runtime == 8520 assert len(title.trailers) == 3
def test_get_title_by_id_using_proxy(self): imdb = Imdb(locale='en_US', cache=False, anonymize=True) title = imdb.get_title_by_id('tt0111161') assert title.title == 'The Shawshank Redemption' assert title.year == 1994 assert title.type == 'feature' assert title.tagline == ('Fear can hold you prisoner. ' 'Hope can set you free.') assert isinstance(title.plots, list) is True assert len(title.plots) == 5 assert isinstance(title.rating, float) is True assert sorted(title.genres) == sorted(['Crime', 'Drama']) assert isinstance(title.votes, int) is True assert title.runtime == 8520 assert len(title.trailers) == 3
def seasonBuilder(title): # gets the information of the show in general # Also gets the seasons and episdoes in a dict to use in the other file # iniatilize imdb object imdb = Imdb() imdb = Imdb(anonymize=True) title_json = imdb.search_for_title(title) if title_json == []: print('No Results Found') else: # get imdb id to get more information title_id = title_json[0]['imdb_id'] result = imdb.get_title_by_id(title_id) show_title = result.title year = result.year image_url = result.cover_url description = result.plot_outline temp = imdb.get_episodes(title_id) # build season dict to send back to main file seasons = {} episodes = {} season_counter = 1 for e in temp: # new dict entry for the next season, the number season of the show is the entry key if e.season > season_counter: # the current season is done, time to start building the next episiode dict seasons[season_counter] = episodes episodes = {} season_counter += 1 episodes[e.episode] = [e.title, e.release_date, e.imdb_id] return show_title, year, image_url, description, seasons
class CommonMetadataIMDB(object): """ Class for interfacing with imdb """ def __init__(self, cache=True, cache_dir=None): # open connection to imdb if cache is not None: if cache_dir is not None: self.imdb = Imdb(cache=True, cache_dir=cache_dir) else: self.imdb = Imdb(cache=True) else: self.imdb = Imdb() def com_imdb_title_search(self, media_title): """ # fetch info from title """ return self.imdb.search_for_title(media_title) def com_imdb_id_search(self, media_id): """ # fetch info by ttid """ return self.imdb.get_title_by_id(media_id) def com_imdb_person_by_id(self, person_id): """ # fetch person info by id """ return self.imdb.get_person_by_id(person_id) def com_imdb_person_images_by_id(self, person_id): """ # fetch person images by id """ return self.imdb.get_person_images(person_id) def com_imdb_title_review_by_id(self, media_id): """ # fetch the title review """ return self.imdb.get_title_reviews(media_id)
def explore_imdb(): imdb = Imdb(anonymize=True) title = imdb.get_title_by_id("tt0468569") print 'title: %s' % title.title print 'type: %s' % title.type print 'year: %s' % title.year print 'tagline: %s' % title.tagline print 'rating: %s' % title.rating print 'certification: %s' % title.certification print 'genres: %s' % title.genres print 'num_votes: %s' % title.votes print 'runtime: %s' % title.runtime print 'release_date: %s' % title.release_date print 'directors_summary: %s' % title.directors_summary print 'creators: %s' % title.creators print 'cast_summary: %s' % title.cast_summary print 'writers_summary: %s' % title.writers_summary
class ImdbCommand(Command): name = 'imdb' aliases = ['movie'] description = 'Searches IMDB for movie titles.' def __init__(self, bot, config): super().__init__(bot, config) self._imdb = Imdb(cache=True, exclude_episodes=True) def run(self, message, args): if not args: self.reply(message, 'Please supply some search terms!') return self.bot.telegram.send_chat_action(message.chat.id, 'typing') results = self._imdb.search_for_title(' '.join(args)) if not results: self.reply(message, 'No results found!') return result = self._imdb.get_title_by_id(results[0]['imdb_id']) reply = '<b>URL:</b> http://www.imdb.com/title/{0}\n'.format(telegram_escape(result.imdb_id)) reply += '<b>Title:</b> {0}\n'.format(telegram_escape(result.title)) reply += '<b>Year:</b> {0}\n'.format(result.year) reply += '<b>Genre:</b> {0}\n'.format(telegram_escape(', '.join(result.genres[:3]))) reply += '<b>Rating:</b> {0}\n'.format(result.rating) runtime, _ = divmod(result.runtime, 60) reply += '<b>Runtime:</b> {0} minutes\n'.format(runtime) reply += '<b>Certification:</b> {0}\n'.format(result.certification) reply += '<b>Cast:</b> {0}\n'.format( telegram_escape(', '.join([person.name for person in result.cast_summary[:5]]))) reply += '<b>Director(s):</b> {0}\n\n'.format( telegram_escape(', '.join([person.name for person in result.directors_summary[:5]]))) reply += telegram_escape(result.plots[0]) self.reply(message, reply, parse_mode='HTML')
# ##### Import the Top 100 Actors and drop unwanted columns # In[ ]: top_actors = pd.read_csv("top_100_actors.csv") top_actors.drop(['created', 'modified'],inplace=True,axis=1) # ##### Pull selected movie information and add columns to top_250 dataframe # In[ ]: for index, row in top_250.iterrows(): movie = imdb.get_title_by_id(row['tconst']) print(movie.genres) print(index) pd.concat([top_250,pd.DataFrame(columns=['genres'])]) top_250.ix[index]['genres']=movie.genres #top_250[index,'genres'] = movie.genres #print(top_250.ix[index]['genres']) # top_250.set_value(index, 'genres', movie.genres) # top_250.iloc[index]['certification'] = movie.certification # top_250.iloc[index]['runtime'] = movie.runtime # top_250.iloc[index]['writers_summary'] = movie.writers_summary # top_250.iloc[index]['directors_summary'] = movie.directors_summary # top_250.iloc[index]['creators'] = movie.creators # top_250.iloc[index]['cast_summary'] = movie.cast_summary # top_250.iloc[index]['credits'] = movie.credits
def getMovieInfo(title): imdb = Imdb(anonymize = True) movie_id = imdb.search_for_title(title)[0]['imdb_id'] movie = imdb.get_title_by_id(movie_id) movieInfo = ['Title: ' + movie.title, 'Rating: ' + str(movie.rating), 'Runtime: ' + str(int(movie.runtime)/60), 'Release Date: ' + movie.release_date, 'Certification: ' + movie.certification] return movieInfo
import psycopg2 from imdbpie import Imdb import random imdb = Imdb() imdb = Imdb(anonymize=True) variable = imdb.search_for_title("The Dark Knight")[0] # conn = psycopg2.connect() # cur = conn.cursor() title = imdb.get_title_by_id("tt0468569") print (title.title) print (title.rating) print (title.runtime) x = 0 listOfPopularMovies = imdb.top_250() while x<15: temp = random.randint(1, 249) t = listOfPopularMovies[temp] tid = t["tconst"] print (tid) print (t["title"] + " is the " + str(temp) +"th rated movie") print ("It's score is: " + str(t["rating"])) x = x + 1
help="""list of movie's ids to download. Eg:\n tt1289401 - Ghostbusters\n tt1386697 - Suicide Squad\n tt2975590 - Batman v Superman: Dawn of Justice\n tt2094766 - Assassin's Creed""") args = parser.parse_args() movies_ids = args.movies_ids output = args.output imdb = Imdb() imdbid_to_movie = {} for movie_id in movies_ids: imdbid_to_movie[movie_id] = imdb.get_title_by_id(movie_id) print "Movies to download:\n" for movie_id in movies_ids: movie = imdbid_to_movie[movie_id] print "%s (%s):" % (movie.title, movie.imdb_id) print "\tYear:", movie.year print "\tTagline:", movie.tagline print "\tRating:", movie.rating print "\tGenres:", ", ".join(movie.genres) print "\tDirectors:", ", ".join([person.name for person in movie.directors_summary]) print "\n" reviews_list = []
new = '' for x in ss: new = new + "\'" + "\'" + x return new[2:] else: return s imdb = Imdb() imdb = Imdb(anonymize=True) # to proxy requests top250 = [] top250 = imdb.top_250() for item in top250: try: title = imdb.get_title_by_id(item['tconst']) if len(title.trailers) > 0: trailer_url = title.trailers[0]['url'] else: trailer_url = 'None' new_movie = ( '''INSERT INTO movie_movie VALUES (\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\')''' .format( item['tconst'], single_quote(str(item['title'])), item['year'], title.release_date, item['rating'], single_quote(item['image']['url']), single_quote(str(title.plot_outline)), single_quote(str(trailer_url)),
def test_get_title_by_id_returns_none_when_is_episode(self): imdb = Imdb(exclude_episodes=True) assert imdb.get_title_by_id('tt0615090') is None
class ImdbClient: def __init__(self): self.imdbpy = IMDb() self.imdb = Imdb(exclude_episodes=False) self.imdb = Imdb(anonymize=True) # to proxy requests self.db = api.TVDB('B43FF87DE395DF56') def get_tweets_from_mongo(self, show, limit): # Connect to mongo client = MongoClient() # access movRie stream db movies = client['movieratings_stream'] # colletion of tweets tweets = movies['tweets'] tweet_text = [] counter = 0 # iterate through cursor that takes the 'limit' most recent tweets with hashtag 'show' for tweet in tweets.find({'show_title': show}): # .sort('created_at', pymongo.DESCENDING): if counter < limit: tweet_text.append(tweet.get("tweet_text")) counter += 1 else: break return tweet_text def get_show_id(self, show_title): title_list = list(self.imdb.search_for_title(show_title)) index = 0 show_id = None while index < len(title_list) and show_id is None: if title_list[index] is not None: result = title_list[index][u'title'].lower() query = show_title.lower() # if result in query: if fuzz.ratio(result, query) >= 90: # print title_list show_id = title_list[index][u'imdb_id'] index += 1 return show_id # TODO: get rid of usage of this def searchShow(self, tvshow): title_id = self.get_show_id(tvshow) print(title_id) reviews = [] print(tvshow) if title_id is not None and title_id != '': reviews = self.imdb.get_title_reviews(title_id, max_results=sys.maxint) print reviews else: print("Invalid show id") return reviews def fetch_reviews(self, episode_id): reviews = self.imdb.get_title_reviews(episode_id, max_results=sys.maxint) return reviews def getCurrentImdbRating(self, tvshow): tvshowid = self.get_show_id(tvshow) title = self.imdb.get_title_by_id(tvshowid) return float(title.rating) # dont use this, use example from # http://imdbpy.sourceforge.net/docs/README.series.txt def get_all_episode_names(self, tvshow): result = self.db.search(tvshow, 'en') show = result[0] res = [] for x in range(1, len(show)): season = show[x] for y in range(1, len(season) + 1): if season[y].EpisodeName is not None and season[y].EpisodeName != '': res.append(season[y].EpisodeName) return res def get_show(self, show_id): show = self.imdbpy.get_movie(show_id.replace('t', '')) self.imdbpy.update(show, 'episodes') print("show_show(" + show_id + "): " + str(show)) return show # episode names for a specific season of tvshow def get_specific_episode_names(self, tvshow, season): result = self.db.search(tvshow, 'en') show = result[0] res = [] season = show[1] for x in range(1, len(season) + 1): if season[x].EpisodeName is not None: print season[x].EpisodeName res.append(season[x].EpisodeName) return res def get_all_episode_reviews(self, episodelist, tvshow): reviews = [] for episode in episodelist: curEpisode = episode + " " + tvshow reviews.append(self.searchShow(curEpisode)) # call searchshow for each print("Episodes:\n" + str(reviews)) return reviews
class IMDBGraph: def __init__(self, anonymize=True): self._imdb = Imdb(anonymize=anonymize, cache=True) self._graph = nx.Graph() def _add_node(self, name, nodetype): ''' Add simple node without attributes ''' if name not in self._graph.nodes(): self._graph.add_node(name, node=nodetype) def addPerson(self, idname): ''' add New actor/actress no the graph ''' actor = self._imdb.get_person_by_id(idname) self._graph.add_node(actor.name) def addMovie(self, idname): movie = self._imdb.get_title_by_id(idname) self._add_node(movie.title, 'movie') logging.info("Loading {0}".format(idname)) def addMovieAndConnect(self, idname): movie = self._imdb.get_title_by_id(idname) self._add_node(movie.title, 'movie') self._add_node(movie.year, 'year') logging.info("Loading {0}".format(idname)) for genre in movie.genres: self._add_node(genre, 'genre') self._graph.add_edge(movie.title, genre) for person in movie.credits: self._add_node(person.name, 'actor') self._graph.add_edge(movie.title, person.name, weight=movie.rating + movie.votes, rating=movie.rating, votes=movie.votes) for person in movie.cast_summary: self._add_node(person.name, "actor") self._graph.add_edge(movie.title, person.name) def addPopular(self, limit=2): ''' Add popular movies and shows ''' shows = self._imdb.popular_shows() #movies = self._imdb.top_250() if limit > len(shows): limit = len(shows) for show in shows[:limit]: self.addMovie(show['tconst']) def removeNode(self, nodename): self._graph.remove_node(nodename) def addEdge(self, innode, outnode, prop=None): if innode not in self._graph: raise Exception("{0} not in graph".format(innode)) if outnode not in self._graph: raise Exception("{0} not in graph".format(outnode)) self._graph.add_edge(innode, outnode, prop=prop) def components(self): comp = nx.connected_components(self._graph) degree = nx.degree(self._graph) def avg_degree(self): ''' Return average number of degree for each node ''' return nx.average_neighbor_degree(self._graph) def avg_degree_connectivity(self): return nx.average_degree_connectivity(self._graph) def clustering(self): ''' Compute a bipartite clustering coefficient for nodes. ''' return nx.clustering(self._graph) def get_item(self, item): ''' Getting node from the graph ''' return self._graph.node[item] def filter_edges(self, param, func): for n, nbrs in self._graph.adjacency_iter(): for nbr, attr in nbrs.items(): if len(attr) == 0 or param not in attr: continue data = attr[param] if func(data): yield (n, nbr, data) def cliques(self): ''' return all cluques from the graph ''' return nx.find_cliques(self._graph) def stat(self): ''' Return basic statistics of the graph ''' return { 'nodes': self._graph.number_of_nodes(), 'edges': self._graph.number_of_edges(), 'density': nx.density(self._graph) } def save(self, outpath): ''' save graph to the file ''' pass
def start_process(filenames, mode): """ This is the main funtion of the script where it does its main processing.\n filenames is the list of files to be processed\n mode = 1,2,3 or 4\n 1 means mp4 to tagged mp4\n 2 means mp4 with sub to subbed and tagged mp4\n 3 means mkv to tagged mp4\n 4 means mkv with sub to subbed and tagged mp4 """ searchindex = 0 for filename in filenames: try: title = filename[:-4] print('\nFetching movie data for "' + title + '"') search = tmdb.Search() response = search.movie(query=title) # getting a Movies object from the id that we got from the search # results try: # sometimes blank search results are returned tmdb_movie = tmdb.Movies(search.results[searchindex]['id']) except IndexError: while len(search.results) == 0: title = input("\nCould not find the movie, Enter" " alternate movie title >> ") searchindex = int(input('Search result index >> ')) response = search.movie(query=title) try: tmdb_movie = (tmdb.Movies( search.results[searchindex]['id'])) except IndexError: continue # we get the info about the movie response = tmdb_movie.info() # making an imdb object imdb = Imdb() # tmdb_movie.imdb_id is the imdb id of the moovie that we searched # before usng tmdb imdb_movie = imdb.get_title_by_id(tmdb_movie.imdb_id) # using imdb provided movie name and newfilename = (imdb_movie.title + ' (' + str(imdb_movie.year) + ').mp4') newfilename = (newfilename.replace(':', ' -').replace('/', ' ').replace( '?', '')) command = "" if mode == 1: # it is required to rename it as its already an mp4 file that # wasn't proccessed by ffmpeg os.rename(filename, newfilename) if mode == 2 or mode == 4: command = ('ffmpeg -i "' + filename + '" -sub_charenc UTF-8 -i "' + filename[:-4] + '.srt" ' + '-map 0 -map 1 -c copy -c:s mov_text ' '-metadata:s:s:0 handler="English Subtitle" ' '-metadata:s:s:0 language=eng ' '-metadata:s:a:0 handler="" ' '-metadata:s:v:0 handler="" "' + newfilename + '"') subprocess.run(command) if mode == 3: command = ('ffmpeg -i "' + filename + '" -c copy -c:s mov_text ' '-metadata:s:s:0 handler="English" ' '-metadata:s:s:0 language=eng ' '-metadata:s:a:0 handler="" ' '-metadata:s:v:0 handler="" ' '"' + newfilename + '"') subprocess.run(command) # the poster is fetched from tmdb only if there is no file # named " filename + '.jpg' " in the working directory # this way user can provide their own poster image to be used poster_filename = filename[:-4] + '.jpg' if not os.path.isfile(poster_filename): print('\nFetching the movie poster...') path = search.results[searchindex]['poster_path'] poster_path = r'https://image.tmdb.org/t/p/w640' + path uo = urllib.request.urlopen(poster_path) with open(poster_filename, "wb") as poster_file: poster_file.write(uo.read()) poster_file.close() imdb_rating_and_plot = str('IMDb rating [' + str(float(imdb_movie.rating)) + '/10] - ' + imdb_movie.plot_outline) # setting the genres of the movie. I use ';' as a delimeter # to searate the multiple genre values genre = ';'.join(imdb_movie.genres) # Going overboard and adding directors name to artist tag of # the mp4 file directors = imdb_movie.directors_summary director = directors[0].name video = MP4(newfilename) with open(poster_filename, "rb") as f: video["covr"] = [ MP4Cover(f.read(), imageformat=MP4Cover.FORMAT_JPEG) ] video['\xa9day'] = str(imdb_movie.year) video['\xa9nam'] = imdb_movie.title video['\xa9cmt'] = imdb_rating_and_plot video['\xa9gen'] = genre video['\xa9ART'] = director print('\nAdding poster and tagging file...') try: video.save() # I have encounterd this error in pevious version # of script, now I handle it by removing the metadata # of the file. That seems to solve the probelem except OverflowError: remove_meta_command = ('ffmpeg -i "' + newfilename + '" -codec copy -map_metadata -1 "' + newfilename[:-4] + 'new.mp4"') subprocess.run(remove_meta_command) video_new = MP4(newfilename[:-4] + 'new.mp4') with open(poster_filename, "rb") as f: video_new["covr"] = [ MP4Cover(f.read(), imageformat=MP4Cover.FORMAT_JPEG) ] video_new['\xa9day'] = str(imdb_movie.year) video_new['\xa9nam'] = imdb_movie.title video_new['\xa9cmt'] = imdb_rating_and_plot video_new['\xa9gen'] = genre video_new['\xa9ART'] = director print('\nAdding poster and tagging file...') try: video_new.save() if not os.path.exists('auto fixed files'): os.makedirs('auto fixed files') os.rename(newfilename[:-4] + 'new.mp4', 'auto fixed files\\' + newfilename[:-4] + '.mp4') os.remove(newfilename) except OverflowError: errored_files.append(filename + (' - Could not save even after' 'striping metadata')) continue os.remove(poster_filename) print('\n' + filename + (' was proccesed successfuly!\n\n====================' '======================================')) except Exception as e: print('\nSome error occured while processing ' + filename + '\n\n====================================================') errored_files.append(filename + ' - ' + str(e))
from imdbpie import Imdb import json imdb = Imdb(anonymize=True, cache=True) for i in xrange(0, 9999999): cnt = 0 movie_id = "tt" + str(i).zfill(7) with open('reviews.json', 'w') as fp: if imdb.title_exists(movie_id): cnt += 1 title = imdb.get_title_by_id(movie_id) print movie_id, title.title reviews = imdb.get_title_reviews(movie_id) # if reviews == None: # reviews = [] # else: # reviews = [review.__dict__ for review in reviews] json.dump([title, reviews], fp, indent=2, default=lambda o: o.__dict__) if cnt > 10: break
imdb = Imdb(cache=True) f1=open('Details.csv','r') f2=open('movieRatings.csv','w') movieID=[] year=[] count=1 #just to show how much is written in new file for line in f1: attribute=line.split(",") #splitting each item attribute[-1]=attribute[-1].strip() #removing \n from the last attribute #year.append(attribute[0]) #storing only years movieID.append(attribute[1])#storing only movie ids for item in movieID: if(imdb.get_title_by_id(item)): #checking if movie exists print(count) title=imdb.get_title_by_id(item) rating=str(title.rating) if(rating is "None"): f2.write("None") else: f2.write(rating) f2.write(',') #so that it's in csv format year=str(title.year) if(year is "None"): f2.write("None") else: f2.write(year) f2.write(',')
class ImdbClient: def __init__(self): self.imdbpy = IMDb() self.imdb = Imdb(exclude_episodes=False) self.imdb = Imdb(anonymize=True) # to proxy requests self.db = api.TVDB('B43FF87DE395DF56') def readFromMongo(self, show, limit): # Connect to mongo client = MongoClient() # access movie stream db movies = client['movieratings_stream'] # colletion of tweets tweets = movies['tweets'] tweet_text = [] counter = 0 # iterate through cursor that takes the 'limit' most recent tweets with hashtag 'show' for tweet in tweets.find({'show_title': show}): # .sort('created_at', pymongo.DESCENDING): if counter < limit: tweet_text.append(tweet.get("tweet_text")) counter += 1 else: break return tweet_text def getTitle(self, show_title): m = self.imdbpy.get_movie('0389564') # The 4400. m['kind'] # kind is 'tv series'. self.imdbpy.update(m, 'episodes') # retrieves episodes information. m['episodes'] # a dictionary with the format: # {#season_number: { # #episode_number: Movie object, # #episode_number: Movie object, # ... # }, # ... # } # season_number always starts with 1, episode_number # depends on the series' numbering schema: some series # have a 'episode 0', while others starts counting from 1. m['episodes'][1][1] # <Movie id:0502803[http] title:_"The 4400" Pilot (2004)_> e = m['episodes'][1][2] # second episode of the first season. e['kind'] # kind is 'episode'. e['season'], e['episode'] # return 1, 2. e['episode of'] # <Movie id:0389564[http] title:_"4400, The" (2004)_> # XXX: beware that e['episode of'] and m _are not_ the # same object, while both represents the same series. # This is to avoid circular references; the # e['episode of'] object only contains basics # information (title, movieID, year, ....) i.update(e) # retrieve normal information about this episode (cast, ...) e['title'] # 'The New and Improved Carl Morrissey' e['series title'] # 'The 4400' e['long imdb episode title'] # '"The 4400" The New and Improved Carl Morrissey (2004)' # print(show_title) # sleep(3) # title_list = list(self.imdb.search_for_title(show_title)) # print(list(self.imdb.search_for_title("Days Gone Bye The Walking Dead"))) # print(title_list) # sleep(3) # index = 0 # show_id = None # while show_id is None: # print ("title_list", title_list[index][u'title']) # print ("show title", show_title) # result = title_list[index][u'title'].lower() # query = show_title.lower() # if result in query: # print title_list # show_id = title_list[index][u'imdb_id'] # # endless loop # index += 1 # return show_id def searchShow(self, tvshow): print tvshow title_id = self.getTitle(tvshow) # if tvshow is not self.tvshow: print title_id print tvshow # print('title: ', title_id) reviews = self.imdb.get_title_reviews(title_id, max_results=sys.maxint) title = self.imdb.get_title_by_id(title_id) print title_id print tvshow # print("title: " + str(title.data)) # print len(reviews) return reviews def getCurrentImdbRating(self, tvshow): tvshowid = self.getTitle(tvshow) title = self.imdb.get_title_by_id(tvshowid) return float(title.rating) def get_all_episode_names(self, tvshow): result = self.db.search(tvshow, 'en') show = result[0] res = [] for x in range(1, len(show)): season = show[x] for y in range(1, len(season) + 1): if season[y].EpisodeName is not None: res.append(season[y].EpisodeName) return res def get_specific_episode_names(self, tvshow, season): result = self.db.search(tvshow, 'en') show = result[0] res = [] season = show[1] for x in range(1, len(season) + 1): if season[x].EpisodeName is not None: print season[x].EpisodeName res.append(season[x].EpisodeName) return res def get_all_episodes(self, episodelist, tvshow): for episode in episodelist: currEpisode = episode + " " + tvshow reviews = [] reviews.append(searchshow(currEpisode)) #call searchshow for each # get list of all episode names given a tv show # create review list, for each episode name, call searchshow append # call method that trains
def check_helios(): ### Vars ### Helios_base = 'http://www.helios.pl' Helios_web = requests.get('http://www.helios.pl/3,Wroclaw/Repertuar/') repertuar = [] movie_db = [] imdb = Imdb(anonymize=True) UpdateMailBody('=== Helios Films ===<br>') ################ if Helios_web.status_code == 200: print('Successful connectet to Helios website') else: exit() Helios_data = bs(Helios_web.text, "html.parser") ### First loop - create Helios link to each movie ### for data in Helios_data.findAll('a', {'class': 'movie-link'}): link = data.get('href') link = Helios_base + link repertuar.append(link) ### Second loop - extract movie name from each Helios movie web page ### for data in repertuar: # print(data) temp = bs(requests.get(data).text, "html.parser") a = temp.find('h2') if a.string: movie_db.append(a.string) movie_db = remove_duplicates(movie_db) # print(movie_db) ### third loop - gather data from IMdB ### for data in movie_db: # print(data) temp = imdb.search_for_title(data) if temp: temp = temp[0] # print(temp['imdb_id']) title = imdb.get_title_by_id(temp['imdb_id']) # print(title.rating) UpdateMailBody(data + '<br> Genres: ' + str(title.genres) + '<br>') UpdateMailBody(' IMDB Rated: ' + str(title.rating) + '<br>') # print(mail_body) quality = 0 for a in title.trailers: if a['format'] == 'HD 1080p' and quality < 3: jakosc = a['url'] quality = 3 if a['format'] == 'HD 720p' and quality < 2: jakosc = a['url'] quality = 2 if a['format'] == 'HD 480p' and quality < 1: jakosc = a['url'] quality = 1 if quality == 0: UpdateMailBody('No Normal quality trailer<br><br>') else: UpdateMailBody('<a href = "' + jakosc + '" > (Trailer) </a><br><br>')
class IMDBGraph: def __init__(self, anonymize=True): self._imdb = Imdb(anonymize=anonymize, cache=True) self._graph = nx.Graph() def _add_node(self, name, nodetype): ''' Add simple node without attributes ''' if name not in self._graph.nodes(): self._graph.add_node(name, node=nodetype) def addPerson(self, idname): ''' add New actor/actress no the graph ''' actor = self._imdb.get_person_by_id(idname) self._graph.add_node(actor.name) def addMovie(self, idname): movie = self._imdb.get_title_by_id(idname) self._add_node(movie.title, 'movie') logging.info("Loading {0}".format(idname)) def addMovieAndConnect(self, idname): movie = self._imdb.get_title_by_id(idname) self._add_node(movie.title, 'movie') self._add_node(movie.year, 'year') logging.info("Loading {0}".format(idname)) for genre in movie.genres: self._add_node(genre, 'genre') self._graph.add_edge(movie.title, genre) for person in movie.credits: self._add_node(person.name, 'actor') self._graph.add_edge(movie.title, person.name, weight=movie.rating + movie.votes, rating=movie.rating, votes=movie.votes) for person in movie.cast_summary: self._add_node(person.name, "actor") self._graph.add_edge(movie.title, person.name) def addPopular(self, limit=2): ''' Add popular movies and shows ''' shows = self._imdb.popular_shows() #movies = self._imdb.top_250() if limit > len(shows): limit = len(shows) for show in shows[:limit]: self.addMovie(show['tconst']) def removeNode(self, nodename): self._graph.remove_node(nodename) def addEdge(self, innode, outnode, prop=None): if innode not in self._graph: raise Exception("{0} not in graph".format(innode)) if outnode not in self._graph: raise Exception("{0} not in graph".format(outnode)) self._graph.add_edge(innode,outnode, prop=prop) def components(self): comp = nx.connected_components(self._graph) degree = nx.degree(self._graph) def avg_degree(self): ''' Return average number of degree for each node ''' return nx.average_neighbor_degree(self._graph) def avg_degree_connectivity(self): return nx.average_degree_connectivity(self._graph) def clustering(self): ''' Compute a bipartite clustering coefficient for nodes. ''' return nx.clustering(self._graph) def get_item(self, item): ''' Getting node from the graph ''' return self._graph.node[item] def filter_edges(self, param, func): for n, nbrs in self._graph.adjacency_iter(): for nbr, attr in nbrs.items(): if len(attr) == 0 or param not in attr: continue data = attr[param] if func(data): yield (n, nbr, data) def cliques(self): ''' return all cluques from the graph ''' return nx.find_cliques(self._graph) def stat(self): ''' Return basic statistics of the graph ''' return {'nodes': self._graph.number_of_nodes(), 'edges': self._graph.number_of_edges(), 'density': nx.density(self._graph)} def save(self, outpath): ''' save graph to the file ''' pass
imdb = Imdb(anonymize=True) movies = imdb.top_250() cols = [ "Title", "Actors", "Director", "Genres", "Rating", "Running Time", "Year", "Certification", "Writers" ] df = pd.DataFrame(columns=cols) spinner = Halo(text='Loading', spinner='dots') spinner.start() for j, el in enumerate(movies): movie = imdb.get_title_by_id(el["tconst"]) title = movie.title actors = ', '.join(i.name for i in movie.cast_summary) director = movie.directors_summary[0].name genres = ', '.join(i for i in movie.genres) rating = movie.rating rtime = movie.runtime year = movie.year cert = movie.certification writers = ', '.join(i.name for i in movie.writers_summary) spinner.text = "Running - " + str((j + 1) / 2.5) + "%" df.loc[j] = [ title, actors, director, genres, rating, rtime, year, cert, writers ] df.to_csv("movies.csv")
# https://github.com/richardasaurus/imdb-pie from imdbpie import Imdb imdb = Imdb() imdb = Imdb(anonymize=True) print(imdb.search_for_title("The Dark Knight")) print() print(imdb.search_for_person("Christian Bale")) print() print(imdb.get_episodes('tt0096697')) top250 = imdb.top_250() for i in range(0, len(top250)): print(top250[i]) print() title = imdb.get_title_by_id("tt1210166") for person in title.credits: # check if they are a writer if person.token == 'writers': print(person.name + ' is a writer') else: print(person.name + ' is not a writer')
class Quiz: movies_type = '' imdb = '' movie = None def __init__(self, session): self.session = session self.imdb = Imdb() self.imdb = Imdb(cache=True) def set_level(self, level): pass def rand_movie(self, rand_type=None): movie_id = '' while self.movie is None: if rand_type == "pop": pop_movies = self.imdb.top_250() number = randrange(0, len(pop_movies) - 1) movie_id = pop_movies[number]['tconst'] if rand_type is None: number = str(randrange(1, 99999)) if len(number) < 7: number = '0' * (7 - len(number)) + number movie_id = "tt"+number # formatting to IMDB_ID self.movie = self.imdb.get_title_by_id(movie_id) if self.movie is not None: if len(self.movie.trailer_image_urls) < 1: self.movie = None def get_movie_photo(self): try: return choice(self.movie.trailer_image_urls) except ValueError as e: raise e def get_question(self, rand_type=None): try: self.rand_movie(rand_type) return self.get_movie_photo() except ValueError as e: raise(_("not_possible_find_movie")) def show(self, update, rand_type): chat_id = update.message.chat_id movie_img = self.get_question(rand_type) self.session.messenger.send_msg(chat_id, "CINEMONSTER", "title") self.session.messenger.send_photo(chat_id, movie_img, caption=_("question_which_movie")) self.session.update_counter() self.session.status = "running" def check_resps(self, update): chat_id = update.message.chat_id if str.lower(self.movie.title) == str.lower(update.message.text): player = Player(update.message.from_user.id) player.name = update.message.from_user.first_name+" "+update.message.from_user.last_name try: self.session.player_add(player) except ValueError as e: pass self.session.players[update.message.from_user.id].add_points(1) self.session.messenger.send_msg(chat_id, msg=(player.name, _("correct_answer")), type_msg='bold') self.movie = None def check_expiration(self): try: self.session.update_timer() except ValueError as e: pass if self.session.status == "timed_out": self.session.messenger.send_msg(chat_id=self.session.chat_id, msg=(_("times_up"), self.movie.title), type_msg='bold') self.session.status = "stop" self.movie = None
login_conf = {'inputEmail': 'arac', 'inputPassword': '******'} # for output out = rating.copy() directors = [] casts = [] writers = [] imdb_rating = [] cover_url = [] for index, row in out.iterrows(): try: mid = (row['movieId'].astype(int)) imdbid = get_imdb_id(mid) title = imdb.get_title_by_id(imdbid) except: print(imdbid) break idirectors = [x.name for x in title.directors_summary] icasts = [x.name for x in title.cast_summary] iwriters = [x.name for x in title.writers_summary] iimdb_rating = title.rating icover_url = title.cover_url directors.append(idirectors) casts.append(icasts) writers.append(iwriters) imdb_rating.append(iimdb_rating) cover_url.append(icover_url) out['director'] = directors
new = '' for x in ss: new = new + "\'" + "\'" + x return new[2:] else: return s imdb = Imdb() imdb = Imdb(anonymize=True) # to proxy requests top250 = [] top250 = imdb.top_250() for item in top250: try: title = imdb.get_title_by_id(item['tconst']) if len(title.trailers) > 0: trailer_url = title.trailers[0]['url'] else: trailer_url = 'None' new_movie = ( '''INSERT INTO movie_movie VALUES (\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\',\'{}\')'''.format( item['tconst'], single_quote(str(item['title'])), item['year'], title.release_date, item['rating'], single_quote(item['image']['url']), single_quote(str(title.plot_outline)), single_quote(str(trailer_url)), ))
class googleIt(): BING_API_KEY = "SjCn0rSMC6ipl8HJiI2vAYQj1REMPA+raOMPSd5K9A0" domainSearch = "" imdb = object() def __init__(self, domainSearch="imdb"): self.domainSearch = domainSearch self.imdb = Imdb() def _makeSearchTerm(self, movieName): return movieName + " :" + self.domainSearch # bing advanced search doesn't work w our request soooo..... #return "site:" + self.domainSearch + " " + movieName def _GetMovieResearch(self, term, limit=50, format='json'): bing = PyBingWebSearch(self.BING_API_KEY, term, web_only=False) return bing.search(limit, format) def _findImdbLinks(self, researchResults): # get all the links that contains the domainSearch name (imdb by default) for link in researchResults: if (re.search(self.domainSearch, link.url)): yield link.url #imdblist.append(link.url) def getMovieID(self, movieTitle): assert isinstance(movieTitle, str) # get the fifty firsts results of a research researchResults = self._GetMovieResearch( self._makeSearchTerm(movieTitle)) # find all the links from imdb imDBlinks = self._findImdbLinks(researchResults) # TODO make pattern to find the imdb main url (ex: http://www.imdb.com/title/tt0330373/) # check http://daringfireball.net/2010/07/improved_regex_for_matching_urls # if you give the format as http://www.imdb.com/title/tt0330373/, return the id # mess up if incorrect url. This is why we need a regex here movieId = next(imDBlinks).split("title/")[1][:-1] # check wether the id is only made of min letters and digit assert (re.match("^[a-z0-9]*$", movieId)) # and if it matches the right size (all id have the same size) assert (len(movieId) == 9) return movieId def getMovieImage(self, movieId="", movieTitle=""): pass def getMovieInfo(self, movieId="", movieTitle=""): """ Return information about a movie Movie ID (string) can be found for example in a imdb url Movie Title (string) is simply the move Title Only one parameter is required. With both parameters, we'll do a double check ♥ !! Note that using the ID return only one movie, but the Title may return many Returned movie object properties : movie.imdb_id movie.title movie.type movie.year movie.tagline movie.plots movie.plot_outline movie.rating movie.genres movie.votes movie.runtime movie.poster_url movie.cover_url movie.release_date movie.certification movie.trailer_image_urls movie.directors_summary movie.creators movie.cast_summary movie.writers_summary movie.credits movie.trailers """ def _hasNumber(inputStr): """ Check if given sting contains any number (1-9). Return boolean Do you really need more details ? """ return any(char.isdigit() for char in inputStr) try: assert (movieId or movieTitle) except AssertionError: return False assert isinstance(movieTitle, str) movieTitle = movieTitle.lower() if movieTitle and movieId: """if(movieId == l.get("imdb_id")): theChosenOne = self.imdb.search_for_title(movieId) print(theChosenOne) print("welcome to the grind") print(theChosenOne.title) print(theChosenOne.type) print(theChosenOne.year) print(theChosenOne.genres) print(theChosenOne.trailers) print(theChosenOne.poster_url) sys.exit("f**k tamère")""" elif movieTitle: foundMovie = self.imdb.search_for_title(movieTitle) # if many movies found if (isinstance(foundMovie, list)): def generator(movieList): for l in movieList: yield self.imdb.get_title_by_id(l.get("imdb_id")) for m in generator(foundMovie): print(m) print(m.title) print(m.type) print(m.year) return "nique ta mère" elif movieId: return self.imdb.get_title_by_id(movieId) """ # check if found movie correspond to the given one (if one given) if movieTitle: # if the given movie name contains a #, we won't do a Levenshtein # Harry Potter 4 === Harry Potter and the Goblet of Fire if not _hasNumber(movieTitle): # If no # in it, we'd do a little Levenshtein distance mesure # both movies name are in lowercase if Levenshtein.distance(foundMovieTitle, movieTitle) > 3: raise ValueError("Given movie title doesn't match the id.") """ else: #wtf return False
def main(): # Connect to the Plex server print("Connecting to the Plex server at '{base_url}'...".format( base_url=PLEX_URL)) try: plex = PlexServer(PLEX_URL, PLEX_TOKEN) except: print("No Plex server found at: {base_url}".format(base_url=PLEX_URL)) print("Exiting script.") return # Get list of movies from the Plex server print( "Retrieving a list of movies from the '{library}' library in Plex...". format(library=MOVIE_LIBRARY_NAME)) try: movie_library = plex.library.section(MOVIE_LIBRARY_NAME) except: print("The '{library}' library does not exist in Plex.".format( library=MOVIE_LIBRARY_NAME)) print("Exiting script.") return imdb = Imdb() conn_db = sqlite3.connect(PLEX_DATABASE_FILE) db = conn_db.cursor() if RATING_SOURCE == 'imdb': print("Using IDMB ratings.") elif RATING_SOURCE == 'rt': print("Using Rotten Tomatoes critic ratings.") else: print("Invalid rating source. Must be 'imdb' or 'rt'.") print("Exiting script.") return for plex_movie in movie_library.all(): if 'imdb://' in plex_movie.guid: imdb_id = plex_movie.guid.split('imdb://')[1].split('?')[0] elif 'themoviedb://' in plex_movie.guid: tmdb_id = plex_movie.guid.split('themoviedb://')[1].split('?')[0] imdb_id = get_imdb_id_from_tmdb(tmdb_id) else: imdb_id = None if not imdb_id: print("Missing IMDB ID. Skipping movie '{pm.title}'.".format( pm=plex_movie)) continue if RATING_SOURCE == 'imdb': if imdb.title_exists(imdb_id): imdb_movie = imdb.get_title_by_id(imdb_id) else: print( "Movie not found on IMDB. Skipping movie '{pm.title} ({imdb_id})'." .format(pm=plex_movie, imdb_id=imdb_id)) continue print("{im.rating}\t{pm.title}".format(pm=plex_movie, im=imdb_movie)) if not DRY_RUN: db_execute( db, "UPDATE metadata_items SET rating = ? WHERE id = ? AND user_fields NOT LIKE ?", [ imdb_movie.rating, plex_movie.ratingKey, '%lockedFields=5%' ]) extra_data = db_execute( db, "SELECT extra_data FROM metadata_items WHERE id = ?", [plex_movie.ratingKey]).fetchone()[0] if extra_data: extra_data = re.sub( r"at%3AratingImage=.+?&|at%3AaudienceRatingImage=.+?&", '', extra_data) db_execute( db, "UPDATE metadata_items SET extra_data = ? WHERE id = ?", [extra_data, plex_movie.ratingKey]) db_execute( db, "UPDATE metadata_items SET extra_data = ? || extra_data WHERE id = ?", [ 'at%3AratingImage=imdb%3A%2F%2Fimage%2Erating&', plex_movie.ratingKey ]) elif RATING_SOURCE == 'rt': rt_client_result = RottenTomatoesClient.search( term=plex_movie.title, limit=5) if RT_MATCH_YEAR: rt_movie = next((m for m in rt_client_result['movies'] if m['year'] == plex_movie.year), None) else: rt_movie = next((m for m in rt_client_result['movies']), None) if rt_movie is None: print( "Movie not found on RottenTomatoes. Skipping movie '{pm.title} ({imdb_id})'." .format(pm=plex_movie, imdb_id=imdb_id)) continue rt_rating = rt_movie['meterScore'] / 10.0 tomato = 'ripe' if rt_rating >= 6 else 'rotten' print("{rt_rating}\t{pm.title}".format(pm=plex_movie, rt_rating=rt_rating)) if not DRY_RUN: db_execute( db, "UPDATE metadata_items SET audience_rating = ? WHERE id = ?", [rt_rating, plex_movie.ratingKey]) extra_data = db_execute( db, "SELECT extra_data FROM metadata_items WHERE id = ?", [plex_movie.ratingKey]).fetchone()[0] if extra_data: extra_data = re.sub( r"at%3AratingImage=.+?&|at%3AaudienceRatingImage=.+?&", '', extra_data) db_execute( db, "UPDATE metadata_items SET extra_data = ? WHERE id = ?", [extra_data, plex_movie.ratingKey]) db_execute( db, "UPDATE metadata_items SET extra_data = ? || extra_data WHERE id = ?", [ 'at%3AaudienceRatingImage=rottentomatoes%3A%2F%2Fimage%2Erating%2E{}&' .format(tomato), plex_movie.ratingKey ]) conn_db.commit() db.close()
async def imdb(query, api: Imdb, localize): """ Send an api request to imdb using the search query :param query: the search query :param api: the imdb api object :param localize: the localization strings :return: the result """ # FIXME: Use Aiohttp instead of this api wrapper try: names = lambda x: ', '.join((p.name for p in x)) if x else 'N/A' null_check = lambda x: x if x and not isinstance(x, int) else 'N/A' id_ = api.search_for_title(query)[0]['imdb_id'] res = api.get_title_by_id(id_) eps = api.get_episodes(id_) if res.type == 'tv_series' else None ep_count = len(eps) if eps is not None else None season_count = eps[-1].season if eps is not None else None title = null_check(res.title) release = null_check(res.release_date) runtime = res.runtime if runtime is not None: hours, seconds = divmod(runtime, 3600) minutes = seconds / 60 runtime_str = '{} {} {} {}'.format(round(hours), localize['hours'], round(minutes), localize['minutes']) else: runtime_str = 'N/A' rated = null_check(res.certification) genre = ', '.join(res.genres) if res.genres else 'N/A' director = names(res.directors_summary) writer = names(res.writers_summary) cast = names(res.cast_summary) plot = null_check(res.plot_outline) poster = res.poster_url score = f'{res.rating}/10' if res.rating is not None else 'N/A' embed = Embed(colour=0xE5BC26) embed.set_author(name=title) if poster: embed.set_image(url=poster) if season_count is not None: embed.add_field(name=localize['seasons'], value=season_count) if ep_count is not None: embed.add_field(name=localize['episodes'], value=str(ep_count)) embed.add_field(name=localize['release_date'], value=release) embed.add_field(name=localize['rated'], value=rated) embed.add_field(name=localize['runtime'], value=runtime_str) embed.add_field(name=localize['genre'], value=genre) embed.add_field(name=localize['director'], value=director) embed.add_field(name=localize['writer'], value=writer) embed.add_field(name=localize['cast'], value=cast) embed.add_field(name=localize['score'], value=score) embed.add_field(name=localize['plot_outline'], value=plot, inline=False) return embed except (JSONDecodeError, IndexError): return localize['title_not_found']
def __init__(self, id, freq): imdb = Imdb() imdb = Imdb(anonymize=True) # to proxy requests print id try: movie = imdb.get_title_by_id(id) except urllib2.HTTPError as err: print err if movie is None: print "bu None Abiii" print movie self.id = id self.title = movie.title self.freq = freq self.year = movie.year # self.tagline = movie.tagline self.rating = movie.rating self.type = movie.type self.cast_summary = '|'.join(p.name for p in movie.cast_summary) self.writers_summary = '|'.join(p.name for p in movie.writers_summary) self.creators = movie.creators self.directors_summary = '|'.join(p.name for p in movie.directors_summary) self.runtime = movie.runtime self.votes = movie.votes self.certification = movie.certification self.action = 0 self.animation = 0 self.comedy = 0 self.horror = 0 self.adventure = 0 self.drama = 0 self.thriller = 0 self.romance = 0 self.sci_fi = 0 self.western = 0 self.mystery = 0 self.history = 0 self.crime = 0 self.biography = 0 self.fantasy = 0 self.war = 0 self.family = 0 self.music = 0 for g in movie.genres: if g.lower() == 'action': self.action = 1 elif g.lower() == 'comedy': self.comedy = 1 elif g.lower() == 'horror': self.horror = 1 elif g.lower() == 'adventure': self.adventure = 1 elif g.lower() == 'drama': self.drama = 1 elif g.lower() == 'thriller': self.thriller = 1 elif g.lower() == 'romance': self.romance = 1 elif g.lower() == 'sci-fi': self.sci_fi = 1 elif g.lower() == 'western': self.western = 1 elif g.lower() == 'mystery': self.mystery = 1 elif g.lower() == 'history': self.history = 1 elif g.lower() == 'crime': self.crime = 1 elif g.lower() == 'biography': self.biography = 1 elif g.lower() == 'fantasy': self.fantasy = 1 elif g.lower() == 'war': self.war = 1 elif g.lower() == 'family': self.family = 1 elif g.lower() == 'music': self.music = 1 elif g.lower() == 'animation': self.animation = 1
imdb = Imdb(anonymize=True) series = imdb.popular_shows() cols = [ "Title", "Actors", "Genres", "Rating", "Running Time", "Year", "Certification", "Writers" ] df = pd.DataFrame(columns=cols) spinner = Halo(text='Loading', spinner='dots') spinner.start() for j, el in enumerate(series): show = imdb.get_title_by_id(el["tconst"]) title = show.title actors = ', '.join(i["name"] for i in el["principals"]) genres = ', '.join(i for i in show.genres) genres = ', '.join(i for i in show.genres) rating = show.rating rtime = show.runtime year = show.year cert = show.certification writers = ', '.join(i.name for i in show.writers_summary) spinner.text = "Running - " + str((j + 1) / 0.5) + "%" df.loc[j] = [title, actors, genres, rating, rtime, year, cert, writers] df.to_csv("shows.csv") spinner.stop()
top100 = top250.sort_values('rating', ascending = False)[0:100] top100.tail() '''4. Get the genres and runtime for each movie and add them to the dataframe There can be multiple genres per movie, so this will need some finessing.''' #I wonder if the certification is significant too (i.e. PG-13...) #genre - probably going to end up making genre a series of dummies so that each movie is still just one row... #Pulling runtime and classification to add to DF #loop through the DF by id, and for each id, pull the rating and add it... #Runtime lambda fxn runtime_pull = lambda x: imdb.get_title_by_id(x).runtime #Top 100 runtime top100['runtime'] = top100.tconst.apply(runtime_pull) #Top 250's runtime top250['runtime'] = top250[u'tconst'].apply(runtime_pull) #Genres lambda function to get genres from imdb database genres_pull = lambda x: imdb.get_title_by_id(x).genres #Getting genres for top100 DF top100['genre'] = top100.tconst.apply(genres_pull)
import datapr as dp from imdbpie import Imdb from pymongo import MongoClient import time imdb = Imdb() imdb = Imdb(anonymize=True) # to proxy requests dataset = dp.input() client = MongoClient() db = client.pluto user = "******" for index, row in dataset.iterrows(): film = imdb.get_title_by_id("tt" + row['imdbId']) result = db['user' + str(row['userId']) + 'Film'].insert_one( { "title": film.title, "imdbId": "tt" + row['imdbId'], "year": film.year, "poster": film.poster_url, "imdbRating": film.rating, "genres": film.genres, "plot": film.plot_outline, "runtime": int(film.runtime)/60 }) print ("+1 new film") if (str(row['userId']) != user): result = db['testusers'].insert_one( {
def test_get_title_by_id_returns_none_when_is_episode(self): imdb = Imdb(exclude_episodes=True) assert imdb.get_title_by_id("tt0615090") is None
df = pd.DataFrame(columns = ["Title","TitleID", "Year", "ActorFirstName", "ActorSurName"]) while(currentRows<numObs): #get a random number; n = random.randint(1000000,3000000) if (n not in importedTitle): titleID = "tt" + str(n) else: #already done so skip print("skipping loop as already imported") continue try: title = imdb.get_title_by_id(titleID) except: #debug print("no Match found:" + str(failed)) failed = failed+1 #break out of loop continue #title found #check it is within the selected year range if (title is not None): if (title.year is not None): if (title.year<=maxYear and title.year>=minYear): #continue
class IMDBcon: def __init__(self): self.parser = Parser() self.directory = '' self.imdb = Imdb() self.cover_size = 214, 317 self.square_size = 317, 317 self.current = MOVIE_DICT self.all_files = [] self.display = Display() def update_display(self, process, args=None): """Send process to self.display to print to screen""" if args: process = PROCESSES[process] % args else: process = PROCESSES[process] self.display.update_current_process(process) def make_empty_square(self): """Make transparent .png image""" image = Image.new('RGBA', self.square_size, (0, 0, 0, 0)) image.save(EMPTY_PNG, 'PNG') def make_magic_script(self): """Make temporary magic 'set_icon.py' script""" with open(MAGIC_SCRIPT, 'w') as script: script.write(MAGIC_SCRIPT_STRING) def make_temp_files(self): """Make temporary files""" if os.path.isdir(TEMP_DIR): shutil.rmtree(TEMP_DIR) os.mkdir(TEMP_DIR) self.make_empty_square() self.make_magic_script() def remove_temp_dir(self): """Remove temporary directory""" self.display.current_title = '' self.update_display('clean') shutil.rmtree(TEMP_DIR) def set_current(self, dict_item=None, path=''): """Set self.current 'title' and 'path'""" if dict_item: self.current = dict_item elif path: self.current['path'] = path self.current['title'] = os.path.splitext(os.path.basename(path))[0] self.display.current_title = self.current['title'] def set_id(self, imdb_id): if os.path.isdir(self.current['path']): id_path = os.path.join(self.current['path'], '.imdb_id') with open(id_path, 'w') as id_file: id_file.write(imdb_id) self.current['imdb_id'] = imdb_id def get_current_title(self): """Set self.current.imdb to Imdb Title object""" self.update_display('search') imdb_id = os.path.join(self.current['path'], '.imdb_id') # User can use preset imdb_id for full accuracy if self.current['imdb_id']: try: self.current['imdb_obj'] = self.imdb.get_title_by_id(self.current['imdb_id']) except HTTPError: error = 'Bad IMDB id for "%s" (%s)' % ( self.current['title'], self.current['imdb_id']) self.display.errors_caught.append(error) return False elif os.path.isfile(imdb_id): try: with open(imdb_id) as id_file: self.current['imdb_obj'] = self.imdb.get_title_by_id( ''.join(id_file.read().split())) except HTTPError: error = 'Bad IMDB id for "%s"' % self.current['title'] self.display.errors_caught.append(error) return False else: try: titles = self.imdb.search_for_title(self.current['title']) temp = titles[0] # Not an Imdb Title object self.current['imdb_obj'] = self.imdb.get_title_by_id(temp['imdb_id']) if os.path.isdir(self.current['path']): with open(imdb_id, 'w') as id_file: id_file.write(temp['imdb_id']) except IndexError: error = 'No Titles Found for "%s"' % self.current['title'] self.display.errors_caught.append(error) return False if self.current['imdb_obj'].cover_url: return True else: error = 'No Cover Image Found for "%s"' % self.current['title'] self.display.errors_caught.append(error) return False def retrieve_cover(self): """Download .jpg cover file from IMDB""" url = self.current['imdb_obj'].cover_url self.update_display('download', url) urlretrieve(url, TEMP_JPG) def resize_icon(self): """Set .jpg cover to self.cover_size""" self.update_display('resize', str(self.cover_size)) image = Image.open(TEMP_JPG) resized = image.resize(self.cover_size, Image.ANTIALIAS) resized.save(TEMP_JPG) def square_icon(self): """Convert .jpg cover to .png squared cover""" self.update_display('square') background = Image.open(EMPTY_PNG) cover = Image.open(TEMP_JPG) offset = (50, 0) background.paste(cover, offset) background.save(TEMP_PNG) os.remove(TEMP_JPG) def set_icon_magic(self): """Run 'set_icon.py' script""" self.update_display('set_icon') os.system('python2.6 %s "%s" "%s"' % (MAGIC_SCRIPT, TEMP_PNG, self.current['path'])) def set_icon(self): """Set directory icon to matching IMDB cover image""" if not self.get_current_title(): return self.retrieve_cover() self.resize_icon() self.square_icon() self.set_icon_magic() for item in self.current['duplicates']: self.set_current(dict_item=item) self.set_icon_magic() os.remove(TEMP_PNG) def exit_message(self): """Display exit message along with any errors""" self.display.update_current_process('') print(PROCESSES['complete']) if self.display.errors_caught: for error in self.display.errors_caught: print(error) else: print('No Errors.') print def is_duplicate(self, item): for existing in self.all_files: if item['title'] == existing['title']: existing['duplicates'].append(item) return True return False def find_all(self): """Get list of all subdirectories and their files in directory""" for root, dirs, files in os.walk(self.directory): for directory in dirs: item = { 'path': os.path.join(root, directory), 'title': directory, 'imdb_id': None, 'imdb_obj': None, 'duplicates': [] } if not self.is_duplicate(item): self.all_files.append(item) if not self.parser.tag == '-a': continue for filename in files: split = os.path.splitext(filename) title, ext = split if ext[1:] not in ACCEPTED_EXTENSIONS: continue item = { 'path': os.path.join(root, filename), 'title': title, 'imdb_id': None, 'duplicates': [] } if not self.is_duplicate(item): self.all_files.append(item) self.display.total_processes = len(self.all_files) def set_icons(self): """Set icons for all sub-directories in directory""" self.find_all() for item in self.all_files: self.set_current(dict_item=item) self.set_icon() self.display.completed_processes += 1 def run(self): if not self.parser.valid: return tag, arg1, arg2 = self.parser.parsed self.make_temp_files() print '' if tag in ('-m', '-a'): self.directory = arg1 self.set_icons() if tag == '-s': self.set_current(path=arg1) self.set_icon() if tag == '-id': self.set_current(path=arg2) self.set_id(arg1) self.set_icon() self.remove_temp_dir() self.exit_message()
print(('type', movie.type)) print(('tagline', movie.tagline)) print(('rating', movie.rating)) print(('certification', movie.certification)) print(('genres', movie.genres)) print(('runtime', movie.runtime)) print(('writers summary', movie.writers_summary)) print(('directors summary', movie.directors_summary)) print(('creators', movie.creators)) print(('cast summary', movie.cast_summary)) print(('full credits', movie.credits)) print(('cert', movie.certification)) #if __name__ == '__main__': movie = imdb.get_title_by_id('tt0705926') #movie_tests() foo = imdb.search_for_title() # x = 0 # for i in foo: # print(i['title']) # print(x) # In[140]: def person_tests(): print(('name', person.name)) print(('name', person.name))