def get_rottentomatoes_ratings(title, year, result=None): print "Processing {} - {}".format(title, year) result = result or defaultdict(lambda: "N/A", { 'title': title, 'year': year }) notes = [] try: rt = RT(RT_API_KEY) movie_list = rt.search(process_title(title)) if year: movie_list = filter_year(movie_list, year) if not movie_list: raise Exception("No results found.") try: movie = movie_list[0] result['rt_matched_title'] = movie['title'] result['rt_audience_score'] = movie['ratings']['audience_score'] result['rt_critics_score'] = movie['ratings']['critics_score'] except KeyError: notes.append("Results not found: {}".format(title)) except Exception as e: notes.append("Exception encountered: {}".format(e)) traceback.print_exc() finally: # result['Title'] = title # result['Year'] = year result['rt_notes'] = '|'.join(notes) return result
def main(): assert os.environ["RT_KEY"], "Your Rotten Tomatoes API key should be stored in the RT_KEY env var!" rt = RT() # NOTE: you should have your API key stored in RT_KEY before this will work movies = [] link_template = "" for country in BOX_OFFICE_COUNTRIES: print "requesting box office hits for {}".format(country) r = rt.lists('movies', 'box_office', limit=LIMIT, country=country) movies += r['movies'] link_template = link_template or r['link_template'] time.sleep(10) # respect our API limits! # to maintain compatibility with movies.json fields, our top level dict # should have the following fields: # total (int) # movies (list) # link_template (string) total = len(movies) result = { "total": total, "movies": movies, "link_template": link_template, } with open(OUTPUT_FILE, "w") as f: json.dump(result, f, indent=2, sort_keys=True)
def get_rottentomatoes_ratings(title, year, result=None): print "Processing {} - {}".format(title, year) result = result or defaultdict(lambda: "N/A", {'title':title, 'year': year}) notes = [] try: rt = RT(RT_API_KEY) movie_list = rt.search(process_title(title)) if year: movie_list = filter_year(movie_list, year) if not movie_list: raise Exception("No results found.") try: movie = movie_list[0] result['rt_matched_title'] = movie['title'] result['rt_audience_score'] = movie['ratings']['audience_score'] result['rt_critics_score'] = movie['ratings']['critics_score'] except KeyError: notes.append("Results not found: {}".format(title)) except Exception as e: notes.append("Exception encountered: {}".format(e)) traceback.print_exc() finally: # result['Title'] = title # result['Year'] = year result['rt_notes'] = '|'.join(notes) return result
def __init__(self, movie, rotten_tomatoe_api_key, tmdb_api_key, aws_access_key, aws_secret_key, affiliate_key, rt_id=None, tmdb_id=None): self._movie = movie # amazon self._amazon_product_search = AmazonProductSearch( aws_access_key, aws_secret_key, affiliate_key) # rotten tomatoes self._rt = RT(rotten_tomatoe_api_key) if rt_id: self._rt_data = self._rt.info(rt_id) else: self._rt_data = self._rt.search(movie)[0] # tmdb self._tmdb = tmdb self._tmdb.configure(tmdb_api_key) movie = self._tmdb.Movies( movie, limit=True, expected_release_date=self._rt_data['release_dates'] ['theater']).get_best_match() self._tmdb_data = self._tmdb.Movie(movie[1]['id']) # youtube self._yt_service = gdata.youtube.service.YouTubeService()
def test(request): titles=[] img_locations=[] movie_locations=[] extract_movies(titles, img_locations, movie_locations) for i in range(len((titles))): # print(titles[i]) # print(movie_locations[i]) # print(img_locations[i]) if (len(Movie.objects.filter(name=titles[i]))==0 and titles[i]!='LOL'): if (len(RT(RT_KEY).search(titles[i]))>0 and RT(RT_KEY).search(titles[i])[0]['ratings']['critics_score']!=-1): critics_score= RT(RT_KEY).search(titles[i])[0]['ratings']['critics_score'] audience_score=RT(RT_KEY).search(titles[i])[0]['ratings']['audience_score'] movie = Movie(name=titles[i], url=movie_locations[i]+'?country='+country, pic_url=img_locations[i], country=country, is_american=False, audience_score=audience_score, critics_score=critics_score) movie.save() # get_rotten_tomates() else: movie = Movie(name=titles[i], url=movie_locations[i]+'?country='+country, pic_url=img_locations[i], country=country, is_american=False) movie.save() # if (len(RT('bt7f4pcbku6m9mqzuhhncc9e').search(titles[i]))>0 and RT('bt7f4pcbku6m9mqzuhhncc9e').search(titles[i])[0]['ratings']['critics_score']!=-1): # critics_score= RT('bt7f4pcbku6m9mqzuhhncc9e').search(titles[i])[0]['ratings']['critics_score'] # audience_score=RT('bt7f4pcbku6m9mqzuhhncc9e').search(titles[i])[0]['ratings']['audience_score'] # movie = Movie(name=titles[i], url=movie_locations[i]+'?country='+country, pic_url=img_locations[i], country=country, is_american=False, audience_score=audience_score, critics_score=critics_score) # movie.save() # else: get_rotten_tomates() t = get_template('index.html') html = t.render(Context({})) return HttpResponse(html)
def get_rotten_tomates(request): for movie in Movie.objects.all(): print("got here") if len(RT('bt7f4pcbku6m9mqzuhhncc9e').search(movie.name))==0: break movie_id=RT('bt7f4pcbku6m9mqzuhhncc9e').search(movie.name)[0]['id'] if (len(RT('bt7f4pcbku6m9mqzuhhncc9e').info(movie_id, 'reviews')['reviews'])>2): reviewblob = RT('bt7f4pcbku6m9mqzuhhncc9e').info(movie_id, 'reviews')['reviews'] quote1=reviewblob[0]['quote'] fresh_bool1='fresh' in reviewblob[0]['freshness'] name1= reviewblob[0]['critic'] quote2=reviewblob[1]['quote'] fresh_bool2='fresh' in reviewblob[1]['freshness'] name2= reviewblob[1]['critic'] quote3=reviewblob[2]['quote'] fresh_bool3='fresh' in reviewblob[2]['freshness'] name3= reviewblob[2]['critic'] movie.quote1=quote1 print(movie.name) print (movie.quote1) movie.quote2=quote2 movie.quote3=quote3 movie.critic1=name1 movie.critic2=name2 movie.critic3=name3 movie.fresh1=fresh_bool1 movie.fresh2=fresh_bool2 movie.fresh3=fresh_bool3 movie.save() t = get_template('index.html') html = t.render(Context({})) return HttpResponse(html)
def backend(movieTitles1,movieTitles2,database,svd_model_file): """ Returns recommended movies for two people movieTitles1: list (!) of movies person 1 wants to watch movieTitles2: list (!) of movies person 2 wants to watch """ #-----------------------------------------CREATE MOVIE ID AND TITLES DATAFRAME-----------------------------------------------------------# moviesDf=createMovieIDTitleDataFrame(database) #-----------------------------------------FIND MOVIEIDS OF INPUT MOVIES------------------------------------------------------------# # TO DO: Fix handling of not finding a movie ITEMIDS1=[getMovieID(moviesDf,movie) for movie in movieTitles1] ITEMIDS2=[getMovieID(moviesDf,movie) for movie in movieTitles2] #-------------------------------------------------GET SIMILARITIES-----------------------------------------------------------------# sims=getSimilarityMatrix(svd_model_file) #------------------------------------------------GET RECOMMENDED MOVIEIDS--------------------------------------------------------------# recsIDs = getRecMovieIDs(ITEMIDS1,ITEMIDS2,sims) #------------------------------------------------CONVERT TO TITLES--------------------------------------------------------------# recTitles=moviesDf.Title.loc[recsIDs].tolist() #------------------------------------------------GET POSTER URLS--------------------------------------------------------------# rt=RT() recMoviesInfo=[rt.search(title)[0] for title in recTitles] posterUrls=[movie['posters']['original'] for movie in recMoviesInfo] movieUrls=[movie['links']['alternate'] for movie in recMoviesInfo] #------------------------------------------------RETURN--------------------------------------------------------------# return zip(recTitles,posterUrls,movieUrls)
def sync_RT_movies(list = 'box_office'): # Lists could be: box_office, in_theaters, opening, and upcoming from rottentomatoes import RT rt = RT() movies = rt.movies(list) for movie in movies: movie_obj, created = Movie.objects.get_or_create(RT_movie_id = movie['id']) movie_obj.title = movie['title'] movie_obj.poster_detailed = movie['posters']['detailed'] movie_obj.save() print movie_obj print "********"
def dump_rt(self): rttable = dict.fromkeys(KEYS_RTTABLE) rt = RT("wy3s6eaj82m5ztwmsjuhnm38") info = rt.feeling_lucky(search_term=self.name) try: rttable["rt_id"] = info["id"] rttable["name"] = info["title"] rttable["url"] = info["links"]["alternate"] rttable["rating_audience"] = info["ratings"]["audience_score"] rttable["rating_critics"] = info["ratings"]["critics_score"] rttable["reviews_audience"] = info["ratings"]["audience_rating"] rttable["reviews_critics"] = info["ratings"]["critics_rating"] except Exception as e: print "Error Fetching from RT : %s" % e return rttable
def movie_rat(f): movie=f[2] if len(f)>3: for i in range(3,len(f)): movie+=' '+f[i] rt = RT('53uhmdfpu5sybbb5y529skkh') #amitbj96 title=rt.search(movie)[0]['title'] D1=rt.search(title,page_limit=1)[0]['ratings'] D=rt.search(title)[0]['synopsis'] D=str(D); if D=='': D="No Synopsis Found." # [movie name , ratings 5/5 , synopsis LL= [title,str(D1['audience_score'])+"/100",str(D)] return LL
class IMDBApi(object): def __init__(self, title, year=0): self.query = dict(title=title, year=int(year)) self.rt = RT(rotten) def __call__(self): matches = [] year = self.query['year'] results = self.rt.search(self.query['title']) self.partial = copy.copy(results) if year: results = [x for x in results if x['year']==self.query['year']] for r in results: matches.append(self.rt.info(r['id'])) return matches
def _getAllInfo(self, search_params): ret = [] try: pretty_title = str(search_params) source = RT(self.key) results = source.search(pretty_title) for i in results: title = utils.sanitizeString(i.get("title", search_params.title)) year = str(i.get("year", search_params.year)) info = movie_types.MovieInfo(title, year) #no genre without more effort ret.append(info) except Exception as ex: #bad need to find a better exception utils.logWarning("Lib: {} Title: {} Error {}: {}".format(self.display_name, search_params.title, type(ex), ex), title="{} lookup".format(self.display_name)) return ret
def rottenTomatoes(search_string, channel): '''Takes a string and outputs details from rotten tomatoes''' # Look up the json data from rotten tomatoes movie = RT().search(search_string, page_limit=1) # Respond if you don't find a match if not movie: ircMessage("no u", channel) return movie = movie[0] ratings = movie['ratings'] # Not all movies have a synopsis summary = "" if movie['synopsis'] != "": summary = "Summary: " + movie['synopsis'][:200] + "..." # Movies without critics scores are replaced with N/A critics = "N/A" if ratings['critics_score'] > 0: critics = ratings['critics_score'] message = "{0} ({1}): Critics: {2} Audience: {3} URL: http://rottentomatoes.com/m/{4} {5}".format( movie['title'], movie['year'], critics, ratings['audience_score'], movie['id'], summary) ircMessage(message, channel)
def synopsis_fetcher(movies): print movies client = MongoClient() db = client.recommender_db movies_db = db.movies synopsis = [] for movie in movies: print movie['m_title'] synopse = {} movie_in_db = movies_db.find_one({'m_id':movie['m_id']}) if not movie_in_db.has_key('sin'): print movie['m_title'] s_rotten = RT().search(movie['m_title']) sy_from_rotten = check_in_rotten(movie) if sy_from_rotten: synopse = sy_from_rotten else: search_imdb = search_title_imdb(movie['m_title']) if search_imdb: synopse = {'m_id': movie['m_id'], 'sin':search_imdb} else: synopsis.append(movie_in_db) print "synopse append----------------------------" if synopse: synopsis.append(synopse) print "----------------fetched synopse--------------" print synopse print "---------------------------------------------" movies_db.update({"m_id":synopse['m_id']}, {'$set':{"sin":synopse['sin'] }}) return synopsis
class RTAdapter(Adapter): """Rotten Tomatoes Adapter Implements the Rotten Tomatoes adapter. """ def __init__(self): self.config = { 'api_key': '8yvmeqtydvquk9bxv4mvemhm', } self.rt = RT(self.config['api_key']) def get_similar_film_titles(self, title): # Get films films = self.rt.search(title)[:5] # Check if results are empty if not films: raise FilmNotFoundError() # Extract titles return [film['title'] for film in films] def get_film(self, title): # Get films films = self.rt.search(title) # Find film in recieved list film_titles = [f.get('title', None) for f in films] found_title = safe_find_film(title, film_titles) # Raise error if not found if not found_title: raise FilmNotFoundError() film = films[film_titles.index(found_title)] return film def get_film_score(self, title): film = self.get_film(title) # Check if ratings exists if not 'ratings' in film: return None # Return film score normalized_score = film['ratings']['critics_score'] / 100.0 return normalized_score def __repr__(self): return 'Rotten Tomatoes'
def get_movie_rating(movie): """Returns a Rotten Tomatoes score for the given movie title""" try: json = RT.search(movie)[0] title = json['title'] rating = json['ratings']['critics_score'] return (title, rating) except IndexError: return None
def sms(): response = twiml.Response() body = request.form['Body'] rt = RT() dump = rt.search(body) if dump: rating = dump[0]['ratings']['critics_score'] title = dump[0]['title'] msg = 'The film ' + str(title) + ' received a ' + \ str(rating) + ' on the TOMATOMETER.' if rating > 50: msg = msg + ' I would watch it.' else: msg = msg + ' Skip this one.' else: msg = "We didn't find " + body + " on RT... maybe check your spelling." response.sms(msg) return str(response)
def get_rotten_tomates(): for movie in Movie.objects.all(): if len(RT('bt7f4pcbku6m9mqzuhhncc9e').search(movie.name)) == 0: break movie_id = RT('bt7f4pcbku6m9mqzuhhncc9e').search(movie.name)[0]['id'] for j in range( len( RT('bt7f4pcbku6m9mqzuhhncc9e').info( movie_id, 'reviews')['reviews'])): reviewblob = RT('bt7f4pcbku6m9mqzuhhncc9e').info( movie_id, 'reviews')['reviews'] quote = reviewblob[j]['quote'] fresh_bool = 'fresh' in reviewblob[j]['freshness'] name = reviewblob[j]['critic'] print(quote) print(name) review = Review(name=name, body=quote, fresh=fresh_bool, movie=movie) review.save()
def check_in_rotten(movie): s_rotten = RT().search(movie['m_title']) synopse = {} if s_rotten: m_rotten = s_rotten[0] r_synopse = m_rotten['synopsis'] if r_synopse: synopse = {'m_id': movie['m_id'], 'sin':r_synopse} elif m_rotten.has_key('alternate_ids'): i_id = m_rotten['alternate_ids']['imdb'] imdb_request_plot = get_imdb_plot(i_id) if imdb_request_plot: print imdb_request_plot synopse = {'m_id': movie['m_id'], 'sin':imdb_request_plot} else: print "no movie plot or synopse " return synopse
def __init__(self, movie, rotten_tomatoe_api_key, tmdb_api_key, aws_access_key, aws_secret_key, affiliate_key, rt_id=None, tmdb_id=None): self._movie = movie # amazon self._amazon_product_search = AmazonProductSearch(aws_access_key, aws_secret_key, affiliate_key) # rotten tomatoes self._rt = RT(rotten_tomatoe_api_key) if rt_id: self._rt_data = self._rt.info(rt_id) else: self._rt_data = self._rt.search(movie)[0] # tmdb self._tmdb = tmdb self._tmdb.configure(tmdb_api_key) movie = self._tmdb.Movies(movie, limit=True, expected_release_date=self._rt_data['release_dates']['theater']).get_best_match() self._tmdb_data = self._tmdb.Movie(movie[1]['id']) # youtube self._yt_service = gdata.youtube.service.YouTubeService()
def test_empty_feeling_lucky_method_fails(self): self.assertRaises(TypeError, RT().feeling_lucky)
def test_movies_box_office(self): RT().movies('box_office') path = call_args('path') expected_path = '/api/public/v1.0/lists/movies/box_office.json' self.assertEqual(path, expected_path)
def test_dvds_new_releases(self): RT().dvds('new_releases') path = call_args('path') expected_path = '/api/public/v1.0/lists/dvds/new_releases.json' self.assertEqual(path, expected_path)
def test_movies_in_theaters(self): RT().movies('in_theaters') path = call_args('path') expected_path = '/api/public/v1.0/lists/movies/in_theaters.json' self.assertEqual(path, expected_path)
def test_movies_opening(self): RT().movies('opening') path = call_args('path') expected_path = '/api/public/v1.0/lists/movies/opening.json' self.assertEqual(path, expected_path)
def test_initialized_api_key(self): self.assertEqual(RT('called_api_key').api_key, 'called_api_key')
def __init__(self): self.config = { 'api_key': '8yvmeqtydvquk9bxv4mvemhm', } self.rt = RT(self.config['api_key'])
def test_empty_search_url_keys(self): RT().search('') movie = call_args() self.assertEqual(movie.keys(), ['apikey'])
def test_search_url_keys_with_page_arg(self): RT().search('some movie', page=2) movie = call_args() self.assertEqual(movie.keys(), ['q', 'apikey', 'page'])
# -*- coding: utf-8 -*- import time from rottentomatoes import RT rt = RT("uhaar3a93r8jqamjzmg5kum8") import MySQLdb as mbd # set up the connection to the MySQL database con = mbd.connect('localhost', 'simmerin', 'simmerin', 'movies') j = 0 # a counter for debugging purposes with con: cur = con.cursor() # drop the table if it exists to prevent adding movies multiple times cur.execute("DROP TABLE IF EXISTS rtid") # create the table as detailed in the documentation cur.execute("CREATE TABLE rtid(id INT PRIMARY KEY AUTO_INCREMENT, \ rtid INT)") # bring in the titles and years from the Box Office Mojo data cur.execute("SELECT * FROM mojo") titles = cur.fetchall() # loop through each title and pull out the rt id for the first film # returned from the RT search with the correct year, if no matches # use NULL for the rt id for movie in titles: print j title = movie[1] year = movie[5] startTime = time.time() searchResults = rt.search(title) correctFilm = 0
def test_version_argument_with_string(self): self.assertEqual(RT(version='2.5').version, '2.5')
#output the list of titles so I don't have to grab it again #write_path = 'E:\Netflix Movies\\' write_path = 'C:\Other Projects\Netflix Movies\\' write_file = "titles.csv" f = write_path+write_file with open(f, 'w+') as my_file: my_file.write('\n'.join(netflix_titles)) #Read back in the csv file: netflix_titles = [line.rstrip('\n') for line in open(f)] #set up RT api package from rottentomatoes import RT rt = RT('4cbst6rnnvresrd9e8q83hhs') #just testing how RT's API works fight_clubs = rt.search('101 dalmations') for club in fight_clubs: print "title="+club["title"]+" & ID="+club["id"] + " & released="+str(club["year"]) print "Critics' Score: "+str(club["ratings"]["critics_score"]) print "Audience Score: "+str(club["ratings"]["audience_score"]) if club["title"] in netflix_titles: print "On Netflix!" #scrape http://www.rottentomatoes.com/top/bestofrt/?year=2012 for best movies from lxml import html import requests
from rottentomatoes import RT import os import sys import requests import json if len(sys.argv) != 3: print "Usage: ./movie_rater.py dirname threshold" exit(1) searchdir = sys.argv[1] keepthreshold = int(sys.argv[2]) myrt = RT() movielist = os.listdir(searchdir) movielist.sort() def omdbsearch(title): data = json.loads(requests.get("http://omdbapi.com/?t=" + title + "&r=json").text) #print str(data) return data keepcount = 0 deletecount = 0
import config import media import my_movie_list import youtube_search # Rotten Tomatoes API for Python # docs: https://github.com/zachwill/rottentomatoes from rottentomatoes import RT # initialize a rotten tomatoes instance rt = RT( config.api_key ) # get a list of movie openings from RT opening_movies = rt.lists('movies', 'opening') # iterate through opening movies and create instances of our Movie class for each movie_collection = [] for mov in opening_movies['movies']: # Find youtube trailer options = youtube_search.Options( mov['title'] + ' trailer', 1 ) trailer_id = youtube_search.search(options) # Convert cast names to a comma separated list cast = '' l = mov['abridged_cast'].__len__() - 1 for i, actor in enumerate( mov['abridged_cast'] ): cast += actor['name'] if i < l: cast += ', ' movie = media.Movie(mov['title'], mov['synopsis'], mov['posters']['thumbnail'], trailer_id, mov['mpaa_rating'], mov['release_dates']['theater'], mov['ratings']['critics_score'], cast)
def test_first_json_loads_movies_result_is_returned(self): data = RT().feeling_lucky('some movie') self.assertEqual(data, 'first_result')
def test_search_url_keys_for_lion_king(self): RT().search('the lion king') movie = call_args() assert 'my_api_key' in movie['apikey'] assert 'the lion king' in movie['q']
def test_uninitialized_api_key(self): self.assertEqual(RT().api_key, 'my_api_key')
class MovieInfo(object): def __init__(self, movie, rotten_tomatoe_api_key, tmdb_api_key, aws_access_key, aws_secret_key, affiliate_key, rt_id=None, tmdb_id=None): self._movie = movie # amazon self._amazon_product_search = AmazonProductSearch(aws_access_key, aws_secret_key, affiliate_key) # rotten tomatoes self._rt = RT(rotten_tomatoe_api_key) if rt_id: self._rt_data = self._rt.info(rt_id) else: self._rt_data = self._rt.search(movie)[0] # tmdb self._tmdb = tmdb self._tmdb.configure(tmdb_api_key) movie = self._tmdb.Movies(movie, limit=True, expected_release_date=self._rt_data['release_dates']['theater']).get_best_match() self._tmdb_data = self._tmdb.Movie(movie[1]['id']) # youtube self._yt_service = gdata.youtube.service.YouTubeService() def get_amazon_purchase_links(self, top_cast, runtime): products = self._amazon_product_search.item_search(self._movie, top_cast, runtime) return products @property def cast(self): ''' Returns the names of the full cast for this movie ''' full_cast = self._rt.info(self._rt_data['id'], 'cast') names = [cast['name'] for cast in full_cast['cast']] return names @property def critic_reviews(self): ''' Returns a list of critic reviews for this movie. The list is componsed of json document. ''' reviews = self._rt.info(self._rt_data['id'], 'reviews') return reviews['reviews'] @property def critics_score(self): ''' Returns the rotten tomatoe critic score for this movie ''' return self._rt_data['ratings']['critics_score'] @property def director(self): ''' Returns a list of directors for this movie ''' return self._tmdb_data.get_director() @property def genres(self): ''' Returns the genres of this movie, supplied by tmdb ''' genres = self._tmdb_data.get_genres() genres = [genre['name'].lower() for genre in genres] return genres @property def imdb_id(self): ''' Returns a list of directors for this movie ''' try: return "tt" + self._rt_data['alternate_ids']['imdb'] except: return self._tmdb_data.get_imdb_id() @property def poster(self): ''' Returns the poster of the movie, in its original size ''' return self._tmdb_data.get_poster() @property def runtime(self): ''' Return the runtime of this movie in minues ''' try: return int(self._rt_data['runtime']) except: return int(self._tmdb_data.get_runtime()) @property def release_date(self): ''' Returns this movie's release date in {year}-{month}-{day} format ''' try: return parser.parse(self._rt_data['release_dates']['theater']) except: return parser.parse(self._tmdb_data.get_release_date()) @property def similar_movies(self): ''' Returns a list of imdb ids of movies that are similar to this one ''' movies = self._rt.info(self._rt_data['id'], 'similar')['movies'] # if movie is none or len(movies) == 0: # search google current movie title + # parse DOM for return movies @property def synopsis(self): ''' Returns this movie's synopsis ''' synopsis = self._rt_data['synopsis'] if len(synopsis) == 0: synopsis = self._tmdb_data.get_overview() return synopsis @property def title(self): ''' Returns this movie's title ''' return self._rt_data['title'] @property def trailers(self, limit=3): ''' This function returns a list of trailers for this movie. We will use TMDB's data if it returns 3 or more trailers. If not, we will query youtube with the search term: "{movie_name} trailer {release_year}" to find trailers for this movie. Returns a list of youtube ids of the trailers ''' trailers = self._tmdb_data.get_trailers()['youtube'] if len(trailers) > limit: return [trailer['source'] for trailer in trailers] else: release_year = str(self.release_date).split('-')[0] query = gdata.youtube.service.YouTubeVideoQuery() query.vq = "{title} trailer {release_year} ".format( title=self._movie, release_year=release_year) query.orderby = 'relevance' feed = self._yt_service.YouTubeQuery(query) entries = self._remove_long_youtube_videos(feed.entry[:3]) entries = self._remove_unrelated_videos(entries) unique_entries = self._remove_duplicate_youtube_videos(entries) return unique_entries def _remove_duplicate_youtube_videos(self, entries, threshold=5): ''' This method removes duplicate videos by measuring the runtime of the youtube videos. If two videos are within 5 seconds (the threshold) of each other in runtime, we assume that one of the videos is a duplicate of the other. ''' limit = 3 videos = [] for entry in entries: runtime = int(entry.media.duration.seconds) similar = [runtime >= int(video["runtime"]) - threshold and runtime <= int(video["runtime"]) + threshold for video in videos] if not any(similar): video_id = self._extract_youtube_id(entry.media.player.url) videos.append({"yt_id": video_id, "runtime": runtime}) yt_ids = [video['yt_id'] for video in videos] return yt_ids def _remove_long_youtube_videos(self, entries, max_seconds=600): entries = filter(lambda entry: int(entry.media.duration.seconds) < max_seconds, entries) return entries def _remove_unrelated_videos(self, entries): entries = filter(lambda entry: fuzzywuzzy.fuzz.ratio(entry.media.title.text.decode('utf-8').lower(), self._movie.lower()) > 20, entries) return entries def _extract_youtube_id(self, youtube_url): video_id = youtube_url.split('v=')[1] ampersand_position = video_id.find('&') if(ampersand_position != -1): video_id = video_id[0:ampersand_position] return video_id
def test_version_argument_with_float(self): self.assertEqual(RT(version=2.5).version, '2.5')
import mechanize import settings import time from bs4 import BeautifulSoup from Movie import db, Movie from movie_crawler import save_movie_info_to_mongo from rottentomatoes import RT if __name__ == "__main__": db_conn_settings = dict([(k.lower(), v) for k, v in settings.ProdConfig.MONGODB_SETTINGS.items() if v]) db.connect(**db_conn_settings) rotten_tomatoe_api_key = settings.Config.ROTTEN_TOMATOES_API_KEY rt = RT(rotten_tomatoe_api_key) movies = Movie.objects.timeout(False) for index, movie in enumerate(movies): print index if len(movie.similar_movies) == 0: print "updating: %s %s" % (movie.title, movie.release_date) # creating our search term search_term = movie.title.replace(" ", "+") url = "https://www.google.com/search?q=%s+Movie" % search_term # querying google br = mechanize.Browser() br.addheaders = [('user-agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.69 Safari/537.36'), ('accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8')] br.set_handle_robots(False)
def test_nonempty_search_url_path(self): RT().search('some movie') path = call_args('path') self.assertEqual(path, '/api/public/v1.0/movies')
from rottentomatoes import RT rt = RT() movies = rt.movies('in_theaters') import pdb; pdb.set_trace()
def test_nonempty_search_url_keys(self): RT().search('some movie') movie = call_args() self.assertEqual(movie.keys(), ['q', 'apikey'])
def test_search_results_for_standard_datatype(self): results = RT().search('some movie') self.assertEqual(results, ['first_result', 'second_result'])
def test_search_url_keys_with_multiple_kwargs(self): RT().search('some movie', page=2, page_limit=5) movie = call_args() self.assertEqual(movie.keys(), ['q', 'apikey', 'page', 'page_limit'])
#!/usr/bin/python from rottentomatoes import RT import sys import pprint #api_key = 'bpbjuznunrqvkeuqjk9m2dmm' #RT(api_key).search('gone with the wind') rt = RT() dump = rt.search(sys.argv[1]) pprint.pprint(dump) if dump: rating = dump[0]['ratings']['critics_score'] title = dump[0]['title'] msg = 'The film ' + str(title) + ' received a ' + \ str(rating) + ' on the TOMATOMETER.' if rating > 50: msg = msg + ' I would watch it.' else: msg = msg + ' Skip this one.' else: msg = 'nothing here!' print msg
def test_search_url_keys_for_ronin(self): RT().search('ronin') movie = call_args() assert 'my_api_key' in movie['apikey'] assert 'ronin' in movie['q']
from rottentomatoes import RT import time rt = RT() # movies = ['fight club','gravity','toy story 3','american hustle','skyfall','jack and jill','basic instinct 2','white out','lost souls','babylon','argo','bears','her','up','a beautiful mind','braveheart','the hurt locker','gambit','paranoia','getaway'] movies = [line.strip("\n") for line in open("movies.txt")] f = open("reviews-date.txt", "w") count = 0 for u in movies: movlst = rt.search(u, page_limit=1) if movlst: if movlst[0][u"id"] != "": review = rt.info(movlst[0][u"id"], "reviews") review.viewkeys() rlt = review[u"reviews"] # review is a dict, value of each key is list, each list element is a dict for a in rlt: f.write(a[u"quote"] + "\t" + a[u"date"] + "\n") f2.write(a[u"date"] + "\n") count = count + 1 # print a[u'quote'] if len(rlt) == 0: print u else: print count else: print u else: print u time.sleep(5) f.close()
#!/usr/bin/env python from rottentomatoes import RT import json, csv rt = RT() #movies = ['toy story 3', 'the lion king', 'the matrix', 'the dark knight', 'inception', 'titanic', 'the godfather', 'the little mermaid', 'the shining', 'avatar'] movies = ['pocahontas', 'shawshank redemption'] json = [] for movie in movies: json = json + rt.search(movie) with open('test2.csv', 'wb+') as f: dict_writer = csv.DictWriter( f, fieldnames=[ 'ratings', 'abridged_directors', 'links', 'title', 'critics_consensus', 'release_dates', 'abridged_cast', 'synopsis', 'mpaa_rating', 'year', 'alternate_ids', 'posters', 'runtime', 'id' ]) dict_writer.writeheader() dict_writer.writerows(json)
def __init__(self, title, year=0): self.query = dict(title=title, year=int(year)) self.rt = RT(rotten)