class IMDBApi(object): def __init__(self, title, year=0): self.query = dict(title=title, year=int(year)) self.rt = RT(rotten) def __call__(self): matches = [] year = self.query['year'] results = self.rt.search(self.query['title']) self.partial = copy.copy(results) if year: results = [x for x in results if x['year']==self.query['year']] for r in results: matches.append(self.rt.info(r['id'])) return matches
from rottentomatoes import RT import time rt = RT() #movies = ['fight club','gravity','toy story 3','american hustle','skyfall','jack and jill','basic instinct 2','white out','lost souls','babylon','argo','bears','her','up','a beautiful mind','braveheart','the hurt locker','gambit','paranoia','getaway'] movies = [line.strip('\n') for line in open('movies.txt')] f = open('reviews-date.txt', 'w') count = 0 for u in movies: movlst = rt.search(u, page_limit=1) if movlst: if movlst[0][u'id'] != '': review = rt.info(movlst[0][u'id'], 'reviews') review.viewkeys() rlt = review[ u'reviews'] #review is a dict, value of each key is list, each list element is a dict for a in rlt: f.write(a[u'quote'] + '\t' + a[u'date'] + '\n') f2.write(a[u'date'] + '\n') count = count + 1 #print a[u'quote'] if len(rlt) == 0: print u else: print count else: print u else: print u time.sleep(5) f.close()
from rottentomatoes import RT import time rt = RT() # movies = ['fight club','gravity','toy story 3','american hustle','skyfall','jack and jill','basic instinct 2','white out','lost souls','babylon','argo','bears','her','up','a beautiful mind','braveheart','the hurt locker','gambit','paranoia','getaway'] movies = [line.strip("\n") for line in open("movies.txt")] f = open("reviews-date.txt", "w") count = 0 for u in movies: movlst = rt.search(u, page_limit=1) if movlst: if movlst[0][u"id"] != "": review = rt.info(movlst[0][u"id"], "reviews") review.viewkeys() rlt = review[u"reviews"] # review is a dict, value of each key is list, each list element is a dict for a in rlt: f.write(a[u"quote"] + "\t" + a[u"date"] + "\n") f2.write(a[u"date"] + "\n") count = count + 1 # print a[u'quote'] if len(rlt) == 0: print u else: print count else: print u else: print u time.sleep(5) f.close()
class MovieInfo(object): def __init__(self, movie, rotten_tomatoe_api_key, tmdb_api_key, aws_access_key, aws_secret_key, affiliate_key, rt_id=None, tmdb_id=None): self._movie = movie # amazon self._amazon_product_search = AmazonProductSearch(aws_access_key, aws_secret_key, affiliate_key) # rotten tomatoes self._rt = RT(rotten_tomatoe_api_key) if rt_id: self._rt_data = self._rt.info(rt_id) else: self._rt_data = self._rt.search(movie)[0] # tmdb self._tmdb = tmdb self._tmdb.configure(tmdb_api_key) movie = self._tmdb.Movies(movie, limit=True, expected_release_date=self._rt_data['release_dates']['theater']).get_best_match() self._tmdb_data = self._tmdb.Movie(movie[1]['id']) # youtube self._yt_service = gdata.youtube.service.YouTubeService() def get_amazon_purchase_links(self, top_cast, runtime): products = self._amazon_product_search.item_search(self._movie, top_cast, runtime) return products @property def cast(self): ''' Returns the names of the full cast for this movie ''' full_cast = self._rt.info(self._rt_data['id'], 'cast') names = [cast['name'] for cast in full_cast['cast']] return names @property def critic_reviews(self): ''' Returns a list of critic reviews for this movie. The list is componsed of json document. ''' reviews = self._rt.info(self._rt_data['id'], 'reviews') return reviews['reviews'] @property def critics_score(self): ''' Returns the rotten tomatoe critic score for this movie ''' return self._rt_data['ratings']['critics_score'] @property def director(self): ''' Returns a list of directors for this movie ''' return self._tmdb_data.get_director() @property def genres(self): ''' Returns the genres of this movie, supplied by tmdb ''' genres = self._tmdb_data.get_genres() genres = [genre['name'].lower() for genre in genres] return genres @property def imdb_id(self): ''' Returns a list of directors for this movie ''' try: return "tt" + self._rt_data['alternate_ids']['imdb'] except: return self._tmdb_data.get_imdb_id() @property def poster(self): ''' Returns the poster of the movie, in its original size ''' return self._tmdb_data.get_poster() @property def runtime(self): ''' Return the runtime of this movie in minues ''' try: return int(self._rt_data['runtime']) except: return int(self._tmdb_data.get_runtime()) @property def release_date(self): ''' Returns this movie's release date in {year}-{month}-{day} format ''' try: return parser.parse(self._rt_data['release_dates']['theater']) except: return parser.parse(self._tmdb_data.get_release_date()) @property def similar_movies(self): ''' Returns a list of imdb ids of movies that are similar to this one ''' movies = self._rt.info(self._rt_data['id'], 'similar')['movies'] # if movie is none or len(movies) == 0: # search google current movie title + # parse DOM for return movies @property def synopsis(self): ''' Returns this movie's synopsis ''' synopsis = self._rt_data['synopsis'] if len(synopsis) == 0: synopsis = self._tmdb_data.get_overview() return synopsis @property def title(self): ''' Returns this movie's title ''' return self._rt_data['title'] @property def trailers(self, limit=3): ''' This function returns a list of trailers for this movie. We will use TMDB's data if it returns 3 or more trailers. If not, we will query youtube with the search term: "{movie_name} trailer {release_year}" to find trailers for this movie. Returns a list of youtube ids of the trailers ''' trailers = self._tmdb_data.get_trailers()['youtube'] if len(trailers) > limit: return [trailer['source'] for trailer in trailers] else: release_year = str(self.release_date).split('-')[0] query = gdata.youtube.service.YouTubeVideoQuery() query.vq = "{title} trailer {release_year} ".format( title=self._movie, release_year=release_year) query.orderby = 'relevance' feed = self._yt_service.YouTubeQuery(query) entries = self._remove_long_youtube_videos(feed.entry[:3]) entries = self._remove_unrelated_videos(entries) unique_entries = self._remove_duplicate_youtube_videos(entries) return unique_entries def _remove_duplicate_youtube_videos(self, entries, threshold=5): ''' This method removes duplicate videos by measuring the runtime of the youtube videos. If two videos are within 5 seconds (the threshold) of each other in runtime, we assume that one of the videos is a duplicate of the other. ''' limit = 3 videos = [] for entry in entries: runtime = int(entry.media.duration.seconds) similar = [runtime >= int(video["runtime"]) - threshold and runtime <= int(video["runtime"]) + threshold for video in videos] if not any(similar): video_id = self._extract_youtube_id(entry.media.player.url) videos.append({"yt_id": video_id, "runtime": runtime}) yt_ids = [video['yt_id'] for video in videos] return yt_ids def _remove_long_youtube_videos(self, entries, max_seconds=600): entries = filter(lambda entry: int(entry.media.duration.seconds) < max_seconds, entries) return entries def _remove_unrelated_videos(self, entries): entries = filter(lambda entry: fuzzywuzzy.fuzz.ratio(entry.media.title.text.decode('utf-8').lower(), self._movie.lower()) > 20, entries) return entries def _extract_youtube_id(self, youtube_url): video_id = youtube_url.split('v=')[1] ampersand_position = video_id.find('&') if(ampersand_position != -1): video_id = video_id[0:ampersand_position] return video_id
class MovieInfo(object): def __init__(self, movie, rotten_tomatoe_api_key, tmdb_api_key, aws_access_key, aws_secret_key, affiliate_key, rt_id=None, tmdb_id=None): self._movie = movie # amazon self._amazon_product_search = AmazonProductSearch( aws_access_key, aws_secret_key, affiliate_key) # rotten tomatoes self._rt = RT(rotten_tomatoe_api_key) if rt_id: self._rt_data = self._rt.info(rt_id) else: self._rt_data = self._rt.search(movie)[0] # tmdb self._tmdb = tmdb self._tmdb.configure(tmdb_api_key) movie = self._tmdb.Movies( movie, limit=True, expected_release_date=self._rt_data['release_dates'] ['theater']).get_best_match() self._tmdb_data = self._tmdb.Movie(movie[1]['id']) # youtube self._yt_service = gdata.youtube.service.YouTubeService() def get_amazon_purchase_links(self, top_cast, runtime): products = self._amazon_product_search.item_search( self._movie, top_cast, runtime) return products @property def cast(self): ''' Returns the names of the full cast for this movie ''' full_cast = self._rt.info(self._rt_data['id'], 'cast') names = [cast['name'] for cast in full_cast['cast']] return names @property def critic_reviews(self): ''' Returns a list of critic reviews for this movie. The list is componsed of json document. ''' reviews = self._rt.info(self._rt_data['id'], 'reviews') return reviews['reviews'] @property def critics_score(self): ''' Returns the rotten tomatoe critic score for this movie ''' return self._rt_data['ratings']['critics_score'] @property def director(self): ''' Returns a list of directors for this movie ''' return self._tmdb_data.get_director() @property def genres(self): ''' Returns the genres of this movie, supplied by tmdb ''' genres = self._tmdb_data.get_genres() genres = [genre['name'].lower() for genre in genres] return genres @property def imdb_id(self): ''' Returns a list of directors for this movie ''' try: return "tt" + self._rt_data['alternate_ids']['imdb'] except: return self._tmdb_data.get_imdb_id() @property def poster(self): ''' Returns the poster of the movie, in its original size ''' return self._tmdb_data.get_poster() @property def runtime(self): ''' Return the runtime of this movie in minues ''' try: return int(self._rt_data['runtime']) except: return int(self._tmdb_data.get_runtime()) @property def release_date(self): ''' Returns this movie's release date in {year}-{month}-{day} format ''' try: return parser.parse(self._rt_data['release_dates']['theater']) except: return parser.parse(self._tmdb_data.get_release_date()) @property def similar_movies(self): ''' Returns a list of imdb ids of movies that are similar to this one ''' movies = self._rt.info(self._rt_data['id'], 'similar')['movies'] # if movie is none or len(movies) == 0: # search google current movie title + # parse DOM for return movies @property def synopsis(self): ''' Returns this movie's synopsis ''' synopsis = self._rt_data['synopsis'] if len(synopsis) == 0: synopsis = self._tmdb_data.get_overview() return synopsis @property def title(self): ''' Returns this movie's title ''' return self._rt_data['title'] @property def trailers(self, limit=3): ''' This function returns a list of trailers for this movie. We will use TMDB's data if it returns 3 or more trailers. If not, we will query youtube with the search term: "{movie_name} trailer {release_year}" to find trailers for this movie. Returns a list of youtube ids of the trailers ''' trailers = self._tmdb_data.get_trailers()['youtube'] if len(trailers) > limit: return [trailer['source'] for trailer in trailers] else: release_year = str(self.release_date).split('-')[0] query = gdata.youtube.service.YouTubeVideoQuery() query.vq = "{title} trailer {release_year} ".format( title=self._movie, release_year=release_year) query.orderby = 'relevance' feed = self._yt_service.YouTubeQuery(query) entries = self._remove_long_youtube_videos(feed.entry[:3]) entries = self._remove_unrelated_videos(entries) unique_entries = self._remove_duplicate_youtube_videos(entries) return unique_entries def _remove_duplicate_youtube_videos(self, entries, threshold=5): ''' This method removes duplicate videos by measuring the runtime of the youtube videos. If two videos are within 5 seconds (the threshold) of each other in runtime, we assume that one of the videos is a duplicate of the other. ''' limit = 3 videos = [] for entry in entries: runtime = int(entry.media.duration.seconds) similar = [ runtime >= int(video["runtime"]) - threshold and runtime <= int(video["runtime"]) + threshold for video in videos ] if not any(similar): video_id = self._extract_youtube_id(entry.media.player.url) videos.append({"yt_id": video_id, "runtime": runtime}) yt_ids = [video['yt_id'] for video in videos] return yt_ids def _remove_long_youtube_videos(self, entries, max_seconds=600): entries = filter( lambda entry: int(entry.media.duration.seconds) < max_seconds, entries) return entries def _remove_unrelated_videos(self, entries): entries = filter( lambda entry: fuzzywuzzy.fuzz.ratio( entry.media.title.text.decode('utf-8').lower(), self._movie.lower()) > 20, entries) return entries def _extract_youtube_id(self, youtube_url): video_id = youtube_url.split('v=')[1] ampersand_position = video_id.find('&') if (ampersand_position != -1): video_id = video_id[0:ampersand_position] return video_id