def add_failed(self, entry, reason=None, **kwargs): """Adds entry to internal failed list, displayed with --failed""" reason = reason or 'Unknown' failed = Session() try: # query item's existence item = failed.query(FailedEntry).filter(FailedEntry.title == entry['title']).\ filter(FailedEntry.url == entry['original_url']).first() if not item: item = FailedEntry(entry['title'], entry['original_url'], reason) else: item.count += 1 item.tof = datetime.now() item.reason = reason failed.merge(item) log.debug('Marking %s in failed list. Has failed %s times.' % (item.title, item.count)) # limit item number to 25 for row in failed.query(FailedEntry).order_by( FailedEntry.tof.desc())[25:]: failed.delete(row) failed.commit() finally: failed.close()
def add_failed(self, entry): """Adds entry to internal failed list, displayed with --failed""" failed = Session() try: # query item's existence item = failed.query(FailedEntry).filter(FailedEntry.title == entry['title']).\ filter(FailedEntry.url == entry['url']).first() if not item: item = FailedEntry(entry['title'], entry['url']) else: item.count += 1 item.tof = datetime.now() failed.merge(item) # limit item number to 25 for row in failed.query(FailedEntry).order_by(FailedEntry.tof.desc())[25:]: failed.delete(row) failed.commit() finally: failed.close()
def add_failed(self, entry, reason=None, **kwargs): """Adds entry to internal failed list, displayed with --failed""" reason = reason or 'Unknown' failed = Session() try: # query item's existence item = failed.query(FailedEntry).filter(FailedEntry.title == entry['title']).\ filter(FailedEntry.url == entry['original_url']).first() if not item: item = FailedEntry(entry['title'], entry['original_url'], reason) else: item.count += 1 item.tof = datetime.now() item.reason = reason failed.merge(item) log.debug('Marking %s in failed list. Has failed %s times.' % (item.title, item.count)) # limit item number to 25 for row in failed.query(FailedEntry).order_by(FailedEntry.tof.desc())[25:]: failed.delete(row) failed.commit() finally: failed.close()
def lookup_movie( title=None, year=None, rottentomatoes_id=None, imdb_id=None, smart_match=None, only_cached=False, session=None ): """Do a lookup from Rotten Tomatoes for the movie matching the passed arguments. Any combination of criteria can be passed, the most specific criteria specified will be used. :param rottentomatoes_id: rottentomatoes_id of desired movie :param imdb_id: imdb_id of desired movie :param title: title of desired movie :param year: release year of desired movie :param smart_match: attempt to clean and parse title and year from a string :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache :param session: optionally specify a session to use, if specified, returned Movie will be live in that session :returns: The Movie object populated with data from Rotten Tomatoes :raises: PluginError if a match cannot be found or there are other problems with the lookup """ if smart_match: # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year title_parser = MovieParser() title_parser.parse(smart_match) title = title_parser.name year = title_parser.year if title == "" and not (rottentomatoes_id or imdb_id or title): raise PluginError("Failed to parse name from %s" % smart_match) if title: search_string = title.lower() if year: search_string = "%s %s" % (search_string, year) elif not (rottentomatoes_id or imdb_id): raise PluginError("No criteria specified for rotten tomatoes lookup") def id_str(): return "<title=%s,year=%s,rottentomatoes_id=%s,imdb_id=%s>" % (title, year, rottentomatoes_id, imdb_id) if not session: session = Session() log.debug("Looking up rotten tomatoes information for %s" % id_str()) movie = None if rottentomatoes_id: movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == rottentomatoes_id).first() if not movie and imdb_id: alt_id = ( session.query(RottenTomatoesAlternateId) .filter(RottenTomatoesAlternateId.name.in_(["imdb", "flexget_imdb"])) .filter(RottenTomatoesAlternateId.id == imdb_id.lstrip("t")) .first() ) if alt_id: movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == alt_id.movie_id).first() if not movie and title: movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower()) if year: movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year) movie = movie_filter.first() if not movie: log.debug("No matches in movie cache found, checking search cache.") found = ( session.query(RottenTomatoesSearchResult) .filter(func.lower(RottenTomatoesSearchResult.search) == search_string) .first() ) if found and found.movie: log.debug("Movie found in search cache.") movie = found.movie if movie: # Movie found in cache, check if cache has expired. if movie.expired and not only_cached: log.debug("Cache has expired for %s, attempting to refresh from Rotten Tomatoes." % id_str()) try: imdb_alt_id = ( movie.alternate_ids and filter(lambda alt_id: alt_id.name in ["imdb", "flexget_imdb"], movie.alternate_ids)[0].id ) if imdb_alt_id: result = movies_alias(imdb_alt_id, "imdb") else: result = movies_info(movie.id) movie = set_movie_details(movie, session, result) session.merge(movie) except URLError: log.error("Error refreshing movie details from Rotten Tomatoes, cached info being used.") else: log.debug("Movie %s information restored from cache." % id_str()) else: if only_cached: raise PluginError("Movie %s not found from cache" % id_str()) # There was no movie found in the cache, do a lookup from Rotten Tomatoes log.debug("Movie %s not found in cache, looking up from rotten tomatoes." % id_str()) try: # Lookups using imdb_id if imdb_id: log.debug("Using IMDB alias %s." % imdb_id) result = movies_alias(imdb_id, "imdb") if result: mismatch = [] if ( title and difflib.SequenceMatcher( lambda x: x == " ", re.sub("\s+\(.*\)$", "", result["title"].lower()), title.lower() ).ratio() < MIN_MATCH ): mismatch.append("the title (%s <-?-> %s)" % (title, result["title"])) result["year"] = int(result["year"]) if year and fabs(result["year"] - year) > 1: mismatch.append("the year (%s <-?-> %s)" % (year, result["year"])) release_year = None if result.get("release_dates", {}).get("theater"): log.debug("Checking year against theater release date") release_year = time.strptime(result["release_dates"].get("theater"), "%Y-%m-%d").tm_year if fabs(release_year - year) > 1: mismatch.append("the theater release (%s)" % release_year) elif result.get("release_dates", {}).get("dvd"): log.debug("Checking year against dvd release date") release_year = time.strptime(result["release_dates"].get("dvd"), "%Y-%m-%d").tm_year if fabs(release_year - year) > 1: mismatch.append("the DVD release (%s)" % release_year) if mismatch: log.warning( "Rotten Tomatoes had an imdb alias for %s but it didn't match %s." % (imdb_id, ", or ".join(mismatch)) ) else: log.debug("imdb_id %s maps to rt_id %s, checking db for info." % (imdb_id, result["id"])) movie = ( session.query(RottenTomatoesMovie) .filter(RottenTomatoesMovie.id == result.get("id")) .first() ) if movie: log.debug( "Movie %s was in database, but did not have the imdb_id stored, " "forcing an update" % movie ) movie = set_movie_details(movie, session, result) session.merge(movie) else: log.debug("%s was not in database, setting info." % result["title"]) movie = RottenTomatoesMovie() movie = set_movie_details(movie, session, result) if not movie: raise PluginError("set_movie_details returned %s" % movie) session.add(movie) else: log.debug("IMDB alias %s returned no results." % imdb_id) if not movie and rottentomatoes_id: result = movies_info(rottentomatoes_id) if result: movie = RottenTomatoesMovie() movie = set_movie_details(movie, session, result) session.add(movie) if not movie and title: log.verbose("Searching from rt `%s`" % search_string) results = movies_search(search_string) if results: results = results.get("movies") if results: for movie_res in results: seq = difflib.SequenceMatcher(lambda x: x == " ", movie_res["title"].lower(), title.lower()) movie_res["match"] = seq.ratio() results.sort(key=lambda x: x["match"], reverse=True) # Remove all movies below MIN_MATCH, and different year for movie_res in results[:]: if year and movie_res.get("year"): movie_res["year"] = int(movie_res["year"]) if movie_res["year"] != year: release_year = False if movie_res.get("release_dates", {}).get("theater"): log.debug("Checking year against theater release date") release_year = time.strptime( movie_res["release_dates"].get("theater"), "%Y-%m-%d" ).tm_year elif movie_res.get("release_dates", {}).get("dvd"): log.debug("Checking year against dvd release date") release_year = time.strptime( movie_res["release_dates"].get("dvd"), "%Y-%m-%d" ).tm_year if not (release_year and release_year == year): log.debug( "removing %s - %s (wrong year: %s)" % ( movie_res["title"], movie_res["id"], str(release_year or movie_res["year"]), ) ) results.remove(movie_res) continue if movie_res["match"] < MIN_MATCH: log.debug("removing %s (min_match)" % movie_res["title"]) results.remove(movie_res) continue if not results: raise PluginError("no appropiate results") if len(results) == 1: log.debug("SUCCESS: only one movie remains") else: # Check min difference between best two hits diff = results[0]["match"] - results[1]["match"] if diff < MIN_DIFF: log.debug( "unable to determine correct movie, min_diff too small" "(`%s (%d) - %s` <-?-> `%s (%d) - %s`)" % ( results[0]["title"], results[0]["year"], results[0]["id"], results[1]["title"], results[1]["year"], results[1]["id"], ) ) for r in results: log.debug("remain: %s (match: %s) %s" % (r["title"], r["match"], r["id"])) raise PluginError("min_diff") imdb_alt_id = results[0].get("alternate_ids", {}).get("imdb") if imdb_alt_id: result = movies_alias(imdb_alt_id) else: result = movies_info(results[0].get("id")) if not result: result = results[0] movie = RottenTomatoesMovie() try: movie = set_movie_details(movie, session, result) if imdb_id and not filter( lambda alt_id: alt_id.name == "imdb" and alt_id.id == imdb_id.lstrip("t"), movie.alternate_ids, ): log.warning("Adding flexget_imdb alternate id %s for movie %s" % (imdb_id, movie)) movie.alternate_ids.append( RottenTomatoesAlternateId("flexget_imdb", imdb_id.lstrip("t")) ) session.add(movie) session.commit() except IntegrityError: log.warning( "Found movie %s in database after search even though we " "already looked, updating it with search result." % movie ) session.rollback() movie = ( session.query(RottenTomatoesMovie) .filter(RottenTomatoesMovie.id == result["id"]) .first() ) movie = set_movie_details(movie, session, result) if imdb_id and not filter( lambda alt_id: alt_id.name == "imdb" and alt_id.id == imdb_id.lstrip("t"), movie.alternate_ids, ): log.warning("Adding flexget_imdb alternate id %s for movie %s" % (imdb_id, movie)) movie.alternate_ids.append( RottenTomatoesAlternateId("flexget_imdb", imdb_id.lstrip("t")) ) session.merge(movie) session.commit() if title.lower() != movie.title.lower(): log.debug("Saving search result for '%s'" % search_string) session.add(RottenTomatoesSearchResult(search=search_string, movie=movie)) except URLError: raise PluginError("Error looking up movie from RottenTomatoes") if not movie: raise PluginError("No results found from rotten tomatoes for %s" % id_str()) else: # Access attributes to force the relationships to eager load before we detach from session for attr in ["alternate_ids", "cast", "directors", "genres", "links", "posters", "release_dates"]: getattr(movie, attr) session.commit() return movie
def lookup_movie(title=None, year=None, rottentomatoes_id=None, smart_match=None, only_cached=False, session=None): """ Do a lookup from Rotten Tomatoes for the movie matching the passed arguments. Any combination of criteria can be passed, the most specific criteria specified will be used. :param rottentomatoes_id: rottentomatoes_id of desired movie :param string title: title of desired movie :param year: release year of desired movie :param smart_match: attempt to clean and parse title and year from a string :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache :param session: optionally specify a session to use, if specified, returned Movie will be live in that session :returns: The Movie object populated with data from Rotten Tomatoes :raises: PluginError if a match cannot be found or there are other problems with the lookup """ if smart_match: # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year title_parser = MovieParser() title_parser.parse(smart_match) title = title_parser.name year = title_parser.year if title == '' and not (rottentomatoes_id or title): raise PluginError('Failed to parse name from %s' % smart_match) if title: search_string = title.lower() if year: search_string = '%s %s' % (search_string, year) elif not rottentomatoes_id: raise PluginError('No criteria specified for rotten tomatoes lookup') def id_str(): return '<title=%s,year=%s,rottentomatoes_id=%s>' % (title, year, rottentomatoes_id) if not session: session = Session() log.debug('Looking up rotten tomatoes information for %s' % id_str()) movie = None # Try to lookup from cache if rottentomatoes_id: movie = session.query(RottenTomatoesMovie).\ filter(RottenTomatoesMovie.id == rottentomatoes_id).first() if not movie and title: movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower()) if year: movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year) movie = movie_filter.first() if not movie: log.debug('No matches in movie cache found, checking search cache.') found = session.query(RottenTomatoesSearchResult).\ filter(func.lower(RottenTomatoesSearchResult.search) == search_string).first() if found and found.movie: log.debug('Movie found in search cache.') movie = found.movie if movie: # Movie found in cache, check if cache has expired. if movie.expired and not only_cached: log.debug('Cache has expired for %s, attempting to refresh from Rotten Tomatoes.' % id_str()) try: result = movies_info(movie.id) movie = _set_movie_details(movie, session, result) session.merge(movie) except URLError: log.error('Error refreshing movie details from Rotten Tomatoes, cached info being used.') else: log.debug('Movie %s information restored from cache.' % id_str()) else: if only_cached: raise PluginError('Movie %s not found from cache' % id_str()) # There was no movie found in the cache, do a lookup from Rotten Tomatoes log.debug('Movie %s not found in cache, looking up from rotten tomatoes.' % id_str()) try: if not movie and rottentomatoes_id: result = movies_info(rottentomatoes_id) if result: movie = RottenTomatoesMovie() movie = _set_movie_details(movie, session, result) session.add(movie) if not movie and title: # TODO: Extract to method log.verbose('Searching from rt `%s`' % search_string) results = movies_search(search_string) if results: results = results.get('movies') if results: for movie_res in results: seq = difflib.SequenceMatcher( lambda x: x == ' ', movie_res['title'].lower(), title.lower()) movie_res['match'] = seq.ratio() results.sort(key=lambda x: x['match'], reverse=True) # Remove all movies below MIN_MATCH, and different year for movie_res in results[:]: if year and movie_res.get('year'): movie_res['year'] = int(movie_res['year']) if movie_res['year'] != year: release_year = False if movie_res.get('release_dates', {}).get('theater'): log.debug('Checking year against theater release date') release_year = time.strptime(movie_res['release_dates'].get('theater'), '%Y-%m-%d').tm_year elif movie_res.get('release_dates', {}).get('dvd'): log.debug('Checking year against dvd release date') release_year = time.strptime(movie_res['release_dates'].get('dvd'), '%Y-%m-%d').tm_year if not (release_year and release_year == year): log.debug('removing %s - %s (wrong year: %s)' % (movie_res['title'], movie_res['id'], str(release_year or movie_res['year']))) results.remove(movie_res) continue if movie_res['match'] < MIN_MATCH: log.debug('removing %s (min_match)' % movie_res['title']) results.remove(movie_res) continue if not results: raise PluginError('no appropiate results') if len(results) == 1: log.debug('SUCCESS: only one movie remains') else: # Check min difference between best two hits diff = results[0]['match'] - results[1]['match'] if diff < MIN_DIFF: log.debug('unable to determine correct movie, min_diff too small' '(`%s (%d) - %s` <-?-> `%s (%d) - %s`)' % (results[0]['title'], results[0]['year'], results[0]['id'], results[1]['title'], results[1]['year'], results[1]['id'])) for r in results: log.debug('remain: %s (match: %s) %s' % (r['title'], r['match'], r['id'])) raise PluginError('min_diff') result = movies_info(results[0].get('id')) if not result: result = results[0] movie = session.query(RottenTomatoesMovie).filter( RottenTomatoesMovie.id == result['id']).first() if not movie: movie = RottenTomatoesMovie() movie = _set_movie_details(movie, session, result) session.add(movie) session.commit() if title.lower() != movie.title.lower(): log.debug('Saving search result for \'%s\'' % search_string) session.add(RottenTomatoesSearchResult(search=search_string, movie=movie)) except URLError: raise PluginError('Error looking up movie from RottenTomatoes') if not movie: raise PluginError('No results found from rotten tomatoes for %s' % id_str()) else: # Access attributes to force the relationships to eager load before we detach from session for attr in ['alternate_ids', 'cast', 'directors', 'genres', 'links', 'posters', 'release_dates']: getattr(movie, attr) session.commit() return movie
def lookup_movie(title=None, year=None, rottentomatoes_id=None, imdb_id=None, smart_match=None, only_cached=False, session=None): """Do a lookup from Rotten Tomatoes for the movie matching the passed arguments. Any combination of criteria can be passed, the most specific criteria specified will be used. :param rottentomatoes_id: rottentomatoes_id of desired movie :param imdb_id: imdb_id of desired movie :param title: title of desired movie :param year: release year of desired movie :param smart_match: attempt to clean and parse title and year from a string :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache :param session: optionally specify a session to use, if specified, returned Movie will be live in that session :returns: The Movie object populated with data from Rotten Tomatoes :raises: PluginError if a match cannot be found or there are other problems with the lookup """ if smart_match: # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year title_parser = MovieParser() title_parser.parse(smart_match) title = title_parser.name year = title_parser.year if title == '' and not (rottentomatoes_id or imdb_id or title): raise PluginError('Failed to parse name from %s' % raw_name) if title: search_string = title.lower() if year: search_string = '%s %s' % (search_string, year) elif not (rottentomatoes_id or imdb_id): raise PluginError('No criteria specified for rotten tomatoes lookup') def id_str(): return '<title=%s,year=%s,rottentomatoes_id=%s,imdb_id=%s>' % (title, year, rottentomatoes_id, imdb_id) if not session: session = Session() log.debug('Looking up rotten tomatoes information for %s' % id_str()) movie = None if rottentomatoes_id: movie = session.query(RottenTomatoesMovie).\ filter(RottenTomatoesMovie.id == rottentomatoes_id).first() if not movie and imdb_id: alt_id = session.query(RottenTomatoesAlternateId).\ filter(RottenTomatoesAlternateId.name.in_(['imdb', 'flexget_imdb'])).\ filter(RottenTomatoesAlternateId.id == imdb_id.lstrip('t')).first() if alt_id: movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == alt_id.movie_id).first() if not movie and title: movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower()) if year: movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year) movie = movie_filter.first() if not movie: found = session.query(RottenTomatoesSearchResult). \ filter(func.lower(RottenTomatoesSearchResult.search) == search_string).first() if found and found.movie: movie = found.movie if movie: # Movie found in cache, check if cache has expired. if movie.expired and not only_cached: log.debug('Cache has expired for %s, attempting to refresh from Rotten Tomatoes.' % id_str()) try: imdb_alt_id = movie.alternate_ids and filter(lambda alt_id: alt_id.name in ['imdb', 'flexget_imdb'], movie.alternate_ids)[0].id if imdb_alt_id: result = movies_alias(imdb_alt_id, 'imdb') else: result = movies_info(movie.id) movie = set_movie_details(movie, session, result) session.merge(movie) except URLError: log.error('Error refreshing movie details from Rotten Tomatoes, cached info being used.') else: log.debug('Movie %s information restored from cache.' % id_str()) else: if only_cached: raise PluginError('Movie %s not found from cache' % id_str()) # There was no movie found in the cache, do a lookup from Rotten Tomatoes log.debug('Movie %s not found in cache, looking up from rotten tomatoes.' % id_str()) try: # Lookups using imdb_id if imdb_id: log.debug('Using IMDB alias %s.' % imdb_id) result = movies_alias(imdb_id, 'imdb') if result: mismatch = [] if title and difflib.SequenceMatcher(lambda x: x == ' ', re.sub('\s+\(.*\)$', '', result['title'].lower()), title.lower()).ratio() < MIN_MATCH: mismatch.append('the title (%s <-?-> %s)' % (title, result['title'])) if year and fabs(result['year'] - year) > 1: mismatch.append('the year (%s <-?-> %s)' % (year, result['year'])) release_year = None if result.get('release_dates', {}).get('theater'): log.debug('Checking year against theater release date') release_year = time.strptime(result['release_dates'].get('theater'), '%Y-%m-%d').tm_year if fabs(release_year - year) > 1: mismatch.append('the theater release (%s)' % release_year) elif result.get('release_dates', {}).get('dvd'): log.debug('Checking year against dvd release date') release_year = time.strptime(result['release_dates'].get('dvd'), '%Y-%m-%d').tm_year if fabs(release_year - year) > 1: mismatch.append('the DVD release (%s)' % release_year) if mismatch: log.warning('Rotten Tomatoes had an imdb alias for %s but it didn\'t match %s.' % \ (imdb_id, ', or '.join(mismatch))) else: log.debug('imdb_id %s maps to rt_id %s, checking db for info.' % (imdb_id, result['id'])) movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == result.get('id')).first() if movie: log.debug('Movie %s was in database, but did not have the imdb_id stored, ' 'forcing an update' % movie) movie = set_movie_details(movie, session, result) session.merge(movie) else: log.debug('%s was not in database, setting info.' % result['title']) movie = RottenTomatoesMovie() movie = set_movie_details(movie, session, result) if not movie: raise PluginError('set_movie_details returned %s' % movie) session.add(movie) else: log.debug('IMDB alias %s returned no results.' % imdb_id) if not movie and rottentomatoes_id: result = movies_info(rottentomatoes_id) if result: movie = RottenTomatoesMovie() movie = set_movie_details(movie, session, result) session.add(movie) if not movie and title: log.verbose('Searching from rt `%s`' % search_string) results = movies_search(search_string) if results: results = results.get('movies') if results: for movie_res in results: seq = difflib.SequenceMatcher(lambda x: x == ' ', movie_res['title'].lower(), title.lower()) movie_res['match'] = seq.ratio() results.sort(key=lambda x: x['match'], reverse=True) # Remove all movies below MIN_MATCH, and different year for movie_res in results[:]: if year and movie_res.get('year') and movie_res['year'] != year: release_year = False if movie_res.get('release_dates', {}).get('theater'): log.debug('Checking year against theater release date') release_year = time.strptime(movie_res['release_dates'].get('theater'), '%Y-%m-%d').tm_year elif movie_res.get('release_dates', {}).get('dvd'): log.debug('Checking year against dvd release date') release_year = time.strptime(movie_res['release_dates'].get('dvd'), '%Y-%m-%d').tm_year if not (release_year and release_year == year): log.debug('removing %s - %s (wrong year: %s)' % (movie_res['title'], movie_res['id'], str(release_year or movie_res['year']))) results.remove(movie_res) continue if movie_res['match'] < MIN_MATCH: log.debug('removing %s (min_match)' % movie_res['title']) results.remove(movie_res) continue if not results: raise PluginError('no appropiate results') if len(results) == 1: log.debug('SUCCESS: only one movie remains') else: # Check min difference between best two hits diff = results[0]['match'] - results[1]['match'] if diff < MIN_DIFF: log.debug('unable to determine correct movie, min_diff too small' '(`%s (%d) - %s` <-?-> `%s (%d) - %s`)' % (results[0]['title'], results[0]['year'], results[0]['id'], results[1]['title'], results[1]['year'], results[1]['id'])) for r in results: log.debug('remain: %s (match: %s) %s' % (r['title'], r['match'], r['id'])) raise PluginError('min_diff') imdb_alt_id = results[0].get('alternate_ids', {}).get('imdb') if imdb_alt_id: result = movies_alias(imdb_alt_id) else: result = movies_info(results[0].get('id')) if not result: result = results[0] movie = RottenTomatoesMovie() try: movie = set_movie_details(movie, session, result) if imdb_id and not filter(lambda alt_id: alt_id.name == 'imdb' and alt_id.id == imdb_id.lstrip('t'), movie.alternate_ids): log.warning('Adding flexget_imdb alternate id %s for movie %s' % (imdb_id, movie)) movie.alternate_ids.append(RottenTomatoesAlternateId('flexget_imdb',\ imdb_id.lstrip('t'))) session.add(movie) except IntegrityError: log.warning('Found movie %s in database after search even though we ' 'already looked, updating it with search result.' % movie) session.rollback() movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == result['id']).first() movie = set_movie_details(movie, session, result) if imdb_id and not filter(lambda alt_id: alt_id.name == 'imdb' and alt_id.id == imdb_id.lstrip('t'), movie.alternate_ids): log.warning('Adding flexget_imdb alternate id %s for movie %s' % (imdb_id, movie)) movie.alternate_ids.append(RottenTomatoesAlternateId('flexget_imdb',\ imdb_id.lstrip('t'))) session.merge(movie) if title.lower() != movie.title.lower(): log.debug('Saving search result for \'%s\'' % search_string) session.add(RottenTomatoesSearchResult(search=search_string, movie=movie)) except URLError: raise PluginError('Error looking up movie from RottenTomatoes') if not movie: raise PluginError('No results found from rotten tomatoes for %s' % id_str()) else: # Access attributes to force the relationships to eager load before we detach from session for attr in ['alternate_ids', 'cast', 'directors', 'genres', 'links', 'posters', 'release_dates']: getattr(movie, attr) session.commit() return movie
def lookup_movie(title=None, year=None, rottentomatoes_id=None, imdb_id=None, smart_match=None, only_cached=False, session=None): """ Do a lookup from Rotten Tomatoes for the movie matching the passed arguments. Any combination of criteria can be passed, the most specific criteria specified will be used. :param rottentomatoes_id: rottentomatoes_id of desired movie :param imdb_id: imdb_id of desired movie :param title: title of desired movie :param year: release year of desired movie :param smart_match: attempt to clean and parse title and year from a string :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache :param session: optionally specify a session to use, if specified, returned Movie will be live in that session :returns: The Movie object populated with data from Rotten Tomatoes :raises: PluginError if a match cannot be found or there are other problems with the lookup """ if smart_match: # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year title_parser = MovieParser() title_parser.parse(smart_match) title = title_parser.name year = title_parser.year if title == '' and not (rottentomatoes_id or imdb_id or title): raise PluginError('Failed to parse name from %s' % smart_match) if title: search_string = title.lower() if year: search_string = '%s %s' % (search_string, year) elif not (rottentomatoes_id or imdb_id): raise PluginError('No criteria specified for rotten tomatoes lookup') def id_str(): return '<title=%s,year=%s,rottentomatoes_id=%s,imdb_id=%s>' % (title, year, rottentomatoes_id, imdb_id) if not session: session = Session() log.debug('Looking up rotten tomatoes information for %s' % id_str()) movie = None # Try to lookup from cache if rottentomatoes_id: movie = session.query(RottenTomatoesMovie).\ filter(RottenTomatoesMovie.id == rottentomatoes_id).first() if not movie and imdb_id: alt_id = session.query(RottenTomatoesAlternateId).\ filter(RottenTomatoesAlternateId.name.in_(['imdb', 'flexget_imdb'])).\ filter(RottenTomatoesAlternateId.id == imdb_id.lstrip('t')).first() if alt_id: movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == alt_id.movie_id).first() if not movie and title: movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower()) if year: movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year) movie = movie_filter.first() if not movie: log.debug('No matches in movie cache found, checking search cache.') found = session.query(RottenTomatoesSearchResult).\ filter(func.lower(RottenTomatoesSearchResult.search) == search_string).first() if found and found.movie: log.debug('Movie found in search cache.') movie = found.movie if movie: # Movie found in cache, check if cache has expired. if movie.expired and not only_cached: log.debug('Cache has expired for %s, attempting to refresh from Rotten Tomatoes.' % id_str()) try: imdb_alt_id = movie.alternate_ids and filter( lambda alt_id: alt_id.name in ['imdb', 'flexget_imdb'], movie.alternate_ids)[0].id if imdb_alt_id: result = movies_alias(imdb_alt_id, 'imdb') else: result = movies_info(movie.id) movie = _set_movie_details(movie, session, result) session.merge(movie) except URLError: log.error('Error refreshing movie details from Rotten Tomatoes, cached info being used.') else: log.debug('Movie %s information restored from cache.' % id_str()) else: if only_cached: raise PluginError('Movie %s not found from cache' % id_str()) # There was no movie found in the cache, do a lookup from Rotten Tomatoes log.debug('Movie %s not found in cache, looking up from rotten tomatoes.' % id_str()) try: # Lookups using imdb_id # TODO: extract to method if imdb_id: log.debug('Using IMDB alias %s.' % imdb_id) result = movies_alias(imdb_id, 'imdb') if result: mismatch = [] min_match = difflib.SequenceMatcher(lambda x: x == ' ', re.sub('\s+\(.*\)$', '', result['title'].lower()), title.lower()).ratio() < MIN_MATCH if title and min_match: mismatch.append('the title (%s <-?-> %s)' % (title, result['title'])) result['year'] = int(result['year']) if year and fabs(result['year'] - year) > 1: mismatch.append('the year (%s <-?-> %s)' % (year, result['year'])) release_year = None if result.get('release_dates', {}).get('theater'): log.debug('Checking year against theater release date') release_year = time.strptime(result['release_dates'].get('theater'), '%Y-%m-%d').tm_year if fabs(release_year - year) > 1: mismatch.append('the theater release (%s)' % release_year) elif result.get('release_dates', {}).get('dvd'): log.debug('Checking year against dvd release date') release_year = time.strptime(result['release_dates'].get('dvd'), '%Y-%m-%d').tm_year if fabs(release_year - year) > 1: mismatch.append('the DVD release (%s)' % release_year) if mismatch: log.warning('Rotten Tomatoes had an imdb alias for %s but it didn\'t match %s.' % (imdb_id, ', or '.join(mismatch))) else: log.debug('imdb_id %s maps to rt_id %s, checking db for info.' % (imdb_id, result['id'])) movie = session.query(RottenTomatoesMovie).\ filter(RottenTomatoesMovie.id == result.get('id')).first() if movie: log.debug('Movie %s was in database, but did not have the imdb_id stored, ' 'forcing an update' % movie) movie = _set_movie_details(movie, session, result) session.merge(movie) else: log.debug('%s was not in database, setting info.' % result['title']) movie = RottenTomatoesMovie() movie = _set_movie_details(movie, session, result) if not movie: raise PluginError('set_movie_details returned %s' % movie) session.add(movie) else: log.debug('IMDB alias %s returned no results.' % imdb_id) if not movie and rottentomatoes_id: result = movies_info(rottentomatoes_id) if result: movie = RottenTomatoesMovie() movie = _set_movie_details(movie, session, result) session.add(movie) if not movie and title: # TODO: Extract to method log.verbose('Searching from rt `%s`' % search_string) results = movies_search(search_string) if results: results = results.get('movies') if results: for movie_res in results: seq = difflib.SequenceMatcher( lambda x: x == ' ', movie_res['title'].lower(), title.lower()) movie_res['match'] = seq.ratio() results.sort(key=lambda x: x['match'], reverse=True) # Remove all movies below MIN_MATCH, and different year for movie_res in results[:]: if year and movie_res.get('year'): movie_res['year'] = int(movie_res['year']) if movie_res['year'] != year: release_year = False if movie_res.get('release_dates', {}).get('theater'): log.debug('Checking year against theater release date') release_year = time.strptime(movie_res['release_dates'].get('theater'), '%Y-%m-%d').tm_year elif movie_res.get('release_dates', {}).get('dvd'): log.debug('Checking year against dvd release date') release_year = time.strptime(movie_res['release_dates'].get('dvd'), '%Y-%m-%d').tm_year if not (release_year and release_year == year): log.debug('removing %s - %s (wrong year: %s)' % (movie_res['title'], movie_res['id'], str(release_year or movie_res['year']))) results.remove(movie_res) continue if movie_res['match'] < MIN_MATCH: log.debug('removing %s (min_match)' % movie_res['title']) results.remove(movie_res) continue if not results: raise PluginError('no appropiate results') if len(results) == 1: log.debug('SUCCESS: only one movie remains') else: # Check min difference between best two hits diff = results[0]['match'] - results[1]['match'] if diff < MIN_DIFF: log.debug('unable to determine correct movie, min_diff too small' '(`%s (%d) - %s` <-?-> `%s (%d) - %s`)' % (results[0]['title'], results[0]['year'], results[0]['id'], results[1]['title'], results[1]['year'], results[1]['id'])) for r in results: log.debug('remain: %s (match: %s) %s' % (r['title'], r['match'], r['id'])) raise PluginError('min_diff') imdb_alt_id = results[0].get('alternate_ids', {}).get('imdb') if imdb_alt_id: result = movies_alias(imdb_alt_id) else: result = movies_info(results[0].get('id')) if not result: result = results[0] movie = RottenTomatoesMovie() movie = _set_movie_details(movie, session, result) if imdb_id and not filter( lambda alt_id: alt_id.name == 'imdb' and alt_id.id == imdb_id.lstrip('t'), movie.alternate_ids): # TODO: get rid of these confusing lambdas log.warning('Adding flexget_imdb alternate id %s for movie %s' % (imdb_id, movie)) movie.alternate_ids.append(RottenTomatoesAlternateId('flexget_imdb', imdb_id.lstrip('t'))) session.add(movie) session.commit() if title.lower() != movie.title.lower(): log.debug('Saving search result for \'%s\'' % search_string) session.add(RottenTomatoesSearchResult(search=search_string, movie=movie)) except URLError: raise PluginError('Error looking up movie from RottenTomatoes') if not movie: raise PluginError('No results found from rotten tomatoes for %s' % id_str()) else: # Access attributes to force the relationships to eager load before we detach from session for attr in ['alternate_ids', 'cast', 'directors', 'genres', 'links', 'posters', 'release_dates']: getattr(movie, attr) session.commit() return movie