def lookup_movie(title=None, year=None, rottentomatoes_id=None, imdb_id=None, smart_match=None, only_cached=False, session=None): """ Do a lookup from Rotten Tomatoes for the movie matching the passed arguments. Any combination of criteria can be passed, the most specific criteria specified will be used. :param rottentomatoes_id: rottentomatoes_id of desired movie :param imdb_id: imdb_id of desired movie :param title: title of desired movie :param year: release year of desired movie :param smart_match: attempt to clean and parse title and year from a string :param only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache :param session: optionally specify a session to use, if specified, returned Movie will be live in that session :returns: The Movie object populated with data from Rotten Tomatoes :raises: PluginError if a match cannot be found or there are other problems with the lookup """ if smart_match: # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year title_parser = MovieParser() title_parser.parse(smart_match) title = title_parser.name year = title_parser.year if title == '' and not (rottentomatoes_id or imdb_id or title): raise PluginError('Failed to parse name from %s' % smart_match) if title: search_string = title.lower() if year: search_string = '%s %s' % (search_string, year) elif not (rottentomatoes_id or imdb_id): raise PluginError('No criteria specified for rotten tomatoes lookup') def id_str(): return '<title=%s,year=%s,rottentomatoes_id=%s,imdb_id=%s>' % (title, year, rottentomatoes_id, imdb_id) if not session: session = Session() log.debug('Looking up rotten tomatoes information for %s' % id_str()) movie = None # Try to lookup from cache if rottentomatoes_id: movie = session.query(RottenTomatoesMovie).\ filter(RottenTomatoesMovie.id == rottentomatoes_id).first() if not movie and imdb_id: alt_id = session.query(RottenTomatoesAlternateId).\ filter(RottenTomatoesAlternateId.name.in_(['imdb', 'flexget_imdb'])).\ filter(RottenTomatoesAlternateId.id == imdb_id.lstrip('t')).first() if alt_id: movie = session.query(RottenTomatoesMovie).filter(RottenTomatoesMovie.id == alt_id.movie_id).first() if not movie and title: movie_filter = session.query(RottenTomatoesMovie).filter(func.lower(RottenTomatoesMovie.title) == title.lower()) if year: movie_filter = movie_filter.filter(RottenTomatoesMovie.year == year) movie = movie_filter.first() if not movie: log.debug('No matches in movie cache found, checking search cache.') found = session.query(RottenTomatoesSearchResult).\ filter(func.lower(RottenTomatoesSearchResult.search) == search_string).first() if found and found.movie: log.debug('Movie found in search cache.') movie = found.movie if movie: # Movie found in cache, check if cache has expired. if movie.expired and not only_cached: log.debug('Cache has expired for %s, attempting to refresh from Rotten Tomatoes.' % id_str()) try: imdb_alt_id = movie.alternate_ids and filter( lambda alt_id: alt_id.name in ['imdb', 'flexget_imdb'], movie.alternate_ids)[0].id if imdb_alt_id: result = movies_alias(imdb_alt_id, 'imdb') else: result = movies_info(movie.id) movie = _set_movie_details(movie, session, result) session.merge(movie) except URLError: log.error('Error refreshing movie details from Rotten Tomatoes, cached info being used.') else: log.debug('Movie %s information restored from cache.' % id_str()) else: if only_cached: raise PluginError('Movie %s not found from cache' % id_str()) # There was no movie found in the cache, do a lookup from Rotten Tomatoes log.debug('Movie %s not found in cache, looking up from rotten tomatoes.' % id_str()) try: # Lookups using imdb_id # TODO: extract to method if imdb_id: log.debug('Using IMDB alias %s.' % imdb_id) result = movies_alias(imdb_id, 'imdb') if result: mismatch = [] min_match = difflib.SequenceMatcher(lambda x: x == ' ', re.sub('\s+\(.*\)$', '', result['title'].lower()), title.lower()).ratio() < MIN_MATCH if title and min_match: mismatch.append('the title (%s <-?-> %s)' % (title, result['title'])) result['year'] = int(result['year']) if year and fabs(result['year'] - year) > 1: mismatch.append('the year (%s <-?-> %s)' % (year, result['year'])) release_year = None if result.get('release_dates', {}).get('theater'): log.debug('Checking year against theater release date') release_year = time.strptime(result['release_dates'].get('theater'), '%Y-%m-%d').tm_year if fabs(release_year - year) > 1: mismatch.append('the theater release (%s)' % release_year) elif result.get('release_dates', {}).get('dvd'): log.debug('Checking year against dvd release date') release_year = time.strptime(result['release_dates'].get('dvd'), '%Y-%m-%d').tm_year if fabs(release_year - year) > 1: mismatch.append('the DVD release (%s)' % release_year) if mismatch: log.warning('Rotten Tomatoes had an imdb alias for %s but it didn\'t match %s.' % (imdb_id, ', or '.join(mismatch))) else: log.debug('imdb_id %s maps to rt_id %s, checking db for info.' % (imdb_id, result['id'])) movie = session.query(RottenTomatoesMovie).\ filter(RottenTomatoesMovie.id == result.get('id')).first() if movie: log.debug('Movie %s was in database, but did not have the imdb_id stored, ' 'forcing an update' % movie) movie = _set_movie_details(movie, session, result) session.merge(movie) else: log.debug('%s was not in database, setting info.' % result['title']) movie = RottenTomatoesMovie() movie = _set_movie_details(movie, session, result) if not movie: raise PluginError('set_movie_details returned %s' % movie) session.add(movie) else: log.debug('IMDB alias %s returned no results.' % imdb_id) if not movie and rottentomatoes_id: result = movies_info(rottentomatoes_id) if result: movie = RottenTomatoesMovie() movie = _set_movie_details(movie, session, result) session.add(movie) if not movie and title: # TODO: Extract to method log.verbose('Searching from rt `%s`' % search_string) results = movies_search(search_string) if results: results = results.get('movies') if results: for movie_res in results: seq = difflib.SequenceMatcher( lambda x: x == ' ', movie_res['title'].lower(), title.lower()) movie_res['match'] = seq.ratio() results.sort(key=lambda x: x['match'], reverse=True) # Remove all movies below MIN_MATCH, and different year for movie_res in results[:]: if year and movie_res.get('year'): movie_res['year'] = int(movie_res['year']) if movie_res['year'] != year: release_year = False if movie_res.get('release_dates', {}).get('theater'): log.debug('Checking year against theater release date') release_year = time.strptime(movie_res['release_dates'].get('theater'), '%Y-%m-%d').tm_year elif movie_res.get('release_dates', {}).get('dvd'): log.debug('Checking year against dvd release date') release_year = time.strptime(movie_res['release_dates'].get('dvd'), '%Y-%m-%d').tm_year if not (release_year and release_year == year): log.debug('removing %s - %s (wrong year: %s)' % (movie_res['title'], movie_res['id'], str(release_year or movie_res['year']))) results.remove(movie_res) continue if movie_res['match'] < MIN_MATCH: log.debug('removing %s (min_match)' % movie_res['title']) results.remove(movie_res) continue if not results: raise PluginError('no appropiate results') if len(results) == 1: log.debug('SUCCESS: only one movie remains') else: # Check min difference between best two hits diff = results[0]['match'] - results[1]['match'] if diff < MIN_DIFF: log.debug('unable to determine correct movie, min_diff too small' '(`%s (%d) - %s` <-?-> `%s (%d) - %s`)' % (results[0]['title'], results[0]['year'], results[0]['id'], results[1]['title'], results[1]['year'], results[1]['id'])) for r in results: log.debug('remain: %s (match: %s) %s' % (r['title'], r['match'], r['id'])) raise PluginError('min_diff') imdb_alt_id = results[0].get('alternate_ids', {}).get('imdb') if imdb_alt_id: result = movies_alias(imdb_alt_id) else: result = movies_info(results[0].get('id')) if not result: result = results[0] movie = RottenTomatoesMovie() movie = _set_movie_details(movie, session, result) if imdb_id and not filter( lambda alt_id: alt_id.name == 'imdb' and alt_id.id == imdb_id.lstrip('t'), movie.alternate_ids): # TODO: get rid of these confusing lambdas log.warning('Adding flexget_imdb alternate id %s for movie %s' % (imdb_id, movie)) movie.alternate_ids.append(RottenTomatoesAlternateId('flexget_imdb', imdb_id.lstrip('t'))) session.add(movie) session.commit() if title.lower() != movie.title.lower(): log.debug('Saving search result for \'%s\'' % search_string) session.add(RottenTomatoesSearchResult(search=search_string, movie=movie)) except URLError: raise PluginError('Error looking up movie from RottenTomatoes') if not movie: raise PluginError('No results found from rotten tomatoes for %s' % id_str()) else: # Access attributes to force the relationships to eager load before we detach from session for attr in ['alternate_ids', 'cast', 'directors', 'genres', 'links', 'posters', 'release_dates']: getattr(movie, attr) session.commit() return movie
def lookup(title=None, year=None, tmdb_id=None, imdb_id=None, smart_match=None, only_cached=False, session=None): """Do a lookup from tmdb for the movie matching the passed arguments. Any combination of criteria can be passed, the most specific criteria specified will be used. Returns: The Movie object populated with data from tmdb Raises: LookupError if a match cannot be found or there are other problems with the lookup Args: tmdb_id: tmdb_id of desired movie imdb_id: imdb_id of desired movie title: title of desired movie year: release year of desired movie smart_match: attempt to clean and parse title and year from a string only_cached: if this is specified, an online lookup will not occur if the movie is not in the cache session: optionally specify a session to use, if specified, returned Movie will be live in that session """ if not (tmdb_id or imdb_id or title) and smart_match: # If smart_match was specified, and we don't have more specific criteria, parse it into a title and year title_parser = MovieParser() title_parser.parse(smart_match) title = title_parser.name year = title_parser.year if title: search_string = title.lower() if year: search_string = '%s %s' % (search_string, year) elif not (tmdb_id or imdb_id): raise LookupError('No criteria specified for tmdb lookup') log.debug('Looking up tmdb information for %r' % { 'title': title, 'tmdb_id': tmdb_id, 'imdb_id': imdb_id }) movie = None def id_str(): return '<title=%s,tmdb_id=%s,imdb_id=%s>' % (title, tmdb_id, imdb_id) if tmdb_id: movie = session.query(TMDBMovie).filter( TMDBMovie.id == tmdb_id).first() if not movie and imdb_id: movie = session.query(TMDBMovie).filter( TMDBMovie.imdb_id == imdb_id).first() if not movie and title: movie_filter = session.query(TMDBMovie).filter( func.lower(TMDBMovie.name) == title.lower()) if year: movie_filter = movie_filter.filter(TMDBMovie.year == year) movie = movie_filter.first() if not movie: found = session.query(TMDBSearchResult). \ filter(func.lower(TMDBSearchResult.search) == search_string).first() if found and found.movie: movie = found.movie if movie: # Movie found in cache, check if cache has expired. refresh_time = timedelta(days=2) if movie.released: if movie.released > datetime.now() - timedelta(days=7): # Movie is less than a week old, expire after 1 day refresh_time = timedelta(days=1) else: age_in_years = (datetime.now() - movie.released).days / 365 refresh_time += timedelta(days=age_in_years * 5) if movie.updated < datetime.now( ) - refresh_time and not only_cached: log.debug( 'Cache has expired for %s, attempting to refresh from TMDb.' % id_str()) try: ApiTmdb.get_movie_details(movie, session) except URLError: log.error( 'Error refreshing movie details from TMDb, cached info being used.' ) else: log.debug('Movie %s information restored from cache.' % id_str()) else: if only_cached: raise LookupError('Movie %s not found from cache' % id_str()) # There was no movie found in the cache, do a lookup from tmdb log.debug('Movie %s not found in cache, looking up from tmdb.' % id_str()) try: if imdb_id and not tmdb_id: result = get_first_result('imdbLookup', imdb_id) if result: movie = session.query(TMDBMovie).filter( TMDBMovie.id == result['id']).first() if movie: # Movie was in database, but did not have the imdb_id stored, force an update ApiTmdb.get_movie_details(movie, session) else: tmdb_id = result['id'] if tmdb_id: movie = TMDBMovie() movie.id = tmdb_id ApiTmdb.get_movie_details(movie, session) if movie.name: session.merge(movie) else: movie = None elif title: result = get_first_result('search', search_string) if result: movie = session.query(TMDBMovie).filter( TMDBMovie.id == result['id']).first() if not movie: movie = TMDBMovie(result) ApiTmdb.get_movie_details(movie, session) session.merge(movie) if title.lower() != movie.name.lower(): session.merge( TMDBSearchResult(search=search_string, movie=movie)) except URLError: raise LookupError('Error looking up movie from TMDb') if not movie: raise LookupError('No results found from tmdb for %s' % id_str()) else: # Access attributes to force the relationships to eager load before we detach from session movie.genres movie.posters return movie
def parse(self, data): movieparser = MovieParser() movieparser.data = data movieparser.parse() return movieparser