def parse_what(self, what): """Given an imdb id or movie title, looks up from imdb and returns a dict with imdb_id and title keys""" imdb_id = extract_id(what) title = what if imdb_id: # Given an imdb id, find title parser = ImdbParser() try: parser.parse('http://www.imdb.com/title/%s' % imdb_id) except Exception: raise QueueError('Error parsing info from imdb for %s' % imdb_id) if parser.name: title = parser.name else: # Given a title, try to do imdb search for id console('Searching imdb for %s' % what) search = ImdbSearch() result = search.smart_match(what) if not result: raise QueueError( 'ERROR: Unable to find any such movie from imdb, use imdb url instead.' ) imdb_id = extract_id(result['url']) title = result['name'] self.options['imdb_id'] = imdb_id self.options['title'] = title return {'title': title, 'imdb_id': imdb_id}
def lookup(self, entry, search_allowed=True, session=None): """ Perform imdb lookup for entry. :param entry: Entry instance :param search_allowed: Allow fallback to search :raises PluginError: Failure reason """ from flexget.manager import manager if entry.get('imdb_id', eval_lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_id']) elif entry.get('imdb_url', eval_lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_url']) elif entry.get('title', eval_lazy=False): log.debug('lookup for %s' % entry['title']) else: raise plugin.PluginError( 'looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.' ) # if imdb_id is included, build the url. if entry.get('imdb_id', eval_lazy=False) and not entry.get( 'imdb_url', eval_lazy=False): entry['imdb_url'] = make_url(entry['imdb_id']) # make sure imdb url is valid if entry.get('imdb_url', eval_lazy=False): imdb_id = extract_id(entry['imdb_url']) if imdb_id: entry['imdb_url'] = make_url(imdb_id) else: log.debug('imdb url %s is invalid, removing it' % entry['imdb_url']) del (entry['imdb_url']) # no imdb_url, check if there is cached result for it or if the # search is known to fail if not entry.get('imdb_url', eval_lazy=False): result = session.query(SearchResult).filter( SearchResult.title == entry['title']).first() if result: # TODO: 1.2 this should really be checking task.options.retry if result.fails and not manager.options.execute.retry: # this movie cannot be found, not worth trying again ... log.debug('%s will fail lookup' % entry['title']) raise plugin.PluginError('IMDB lookup failed for %s' % entry['title']) else: if result.url: log.trace('Setting imdb url for %s from db' % entry['title']) entry['imdb_id'] = result.imdb_id entry['imdb_url'] = result.url movie = None # no imdb url, but information required, try searching if not entry.get('imdb_url', eval_lazy=False) and search_allowed: log.verbose('Searching from imdb `%s`' % entry['title']) search = ImdbSearch() search_name = entry.get('movie_name', entry['title'], eval_lazy=False) search_result = search.smart_match(search_name) if search_result: entry['imdb_url'] = search_result['url'] # store url for this movie, so we don't have to search on every run result = SearchResult(entry['title'], entry['imdb_url']) session.add(result) session.commit() log.verbose('Found %s' % (entry['imdb_url'])) else: log_once('IMDB lookup failed for %s' % entry['title'], log, logging.WARN, session=session) # store FAIL for this title result = SearchResult(entry['title']) result.fails = True session.add(result) session.commit() raise plugin.PluginError('Title `%s` lookup failed' % entry['title']) # check if this imdb page has been parsed & cached movie = session.query(Movie).filter( Movie.url == entry['imdb_url']).first() # If we have a movie from cache, we are done if movie and not movie.expired: entry.update_using_map(self.field_map, movie) return # Movie was not found in cache, or was expired if movie is not None: if movie.expired: log.verbose('Movie `%s` details expired, refreshing ...' % movie.title) # Remove the old movie, we'll store another one later. session.query(MovieLanguage).filter( MovieLanguage.movie_id == movie.id).delete() session.query(Movie).filter( Movie.url == entry['imdb_url']).delete() session.commit() # search and store to cache if 'title' in entry: log.verbose('Parsing imdb for `%s`' % entry['title']) else: log.verbose('Parsing imdb for `%s`' % entry['imdb_id']) try: movie = self._parse_new_movie(entry['imdb_url'], session) except UnicodeDecodeError: log.error( 'Unable to determine encoding for %s. Installing chardet library may help.' % entry['imdb_url']) # store cache so this will not be tried again movie = Movie() movie.url = entry['imdb_url'] session.add(movie) session.commit() raise plugin.PluginError('UnicodeDecodeError') except ValueError as e: # TODO: might be a little too broad catch, what was this for anyway? ;P if manager.options.debug: log.exception(e) raise plugin.PluginError( 'Invalid parameter: %s' % entry['imdb_url'], log) for att in [ 'title', 'score', 'votes', 'year', 'genres', 'languages', 'actors', 'directors', 'mpaa_rating' ]: log.trace('movie.%s: %s' % (att, getattr(movie, att))) # Update the entry fields entry.update_using_map(self.field_map, movie)
def lookup(self, entry, search_allowed=True): """ Perform imdb lookup for entry. :param entry: Entry instance :param search_allowed: Allow fallback to search :raises PluginError: Failure reason """ from flexget.manager import manager if entry.get('imdb_url', eval_lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_url']) elif entry.get('imdb_id', eval_lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_id']) elif entry.get('title', eval_lazy=False): log.debug('lookup for %s' % entry['title']) else: raise PluginError('looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.') session = Session() try: # entry sanity checks for field in ['imdb_votes', 'imdb_score']: if entry.get(field, eval_lazy=False): value = entry[field] if not isinstance(value, (int, float)): raise PluginError('Entry field %s should be a number!' % field) # if imdb_id is included, build the url. if entry.get('imdb_id', eval_lazy=False) and not entry.get('imdb_url', eval_lazy=False): entry['imdb_url'] = make_url(entry['imdb_id']) # make sure imdb url is valid if entry.get('imdb_url', eval_lazy=False): imdb_id = extract_id(entry['imdb_url']) if imdb_id: entry['imdb_url'] = make_url(imdb_id) else: log.debug('imdb url %s is invalid, removing it' % entry['imdb_url']) del(entry['imdb_url']) # no imdb_url, check if there is cached result for it or if the # search is known to fail if not entry.get('imdb_url', eval_lazy=False): result = session.query(SearchResult).\ filter(SearchResult.title == entry['title']).first() if result: if result.fails and not manager.options.retry: # this movie cannot be found, not worth trying again ... log.debug('%s will fail lookup' % entry['title']) raise PluginError('Title `%s` lookup fails' % entry['title']) else: if result.url: log.trace('Setting imdb url for %s from db' % entry['title']) entry['imdb_url'] = result.url # no imdb url, but information required, try searching if not entry.get('imdb_url', eval_lazy=False) and search_allowed: log.verbose('Searching from imdb `%s`' % entry['title']) search = ImdbSearch() search_name = entry.get('movie_name', entry['title'], eval_lazy=False) search_result = search.smart_match(search_name) if search_result: entry['imdb_url'] = search_result['url'] # store url for this movie, so we don't have to search on # every run result = SearchResult(entry['title'], entry['imdb_url']) session.add(result) log.verbose('Found %s' % (entry['imdb_url'])) else: log_once('Imdb lookup failed for %s' % entry['title'], log) # store FAIL for this title result = SearchResult(entry['title']) result.fails = True session.add(result) raise PluginError('Title `%s` lookup failed' % entry['title']) # check if this imdb page has been parsed & cached movie = session.query(Movie).\ options(joinedload_all(Movie.genres), joinedload_all(Movie.languages), joinedload_all(Movie.actors), joinedload_all(Movie.directors)).\ filter(Movie.url == entry['imdb_url']).first() # determine whether or not movie details needs to be parsed req_parse = False if not movie: req_parse = True elif movie.expired: req_parse = True if req_parse: if movie is not None: if movie.expired: log.verbose('Movie `%s` details expired, refreshing ...' % movie.title) # Remove the old movie, we'll store another one later. session.query(MovieLanguage).filter(MovieLanguage.movie_id == movie.id).delete() session.query(Movie).filter(Movie.url == entry['imdb_url']).delete() # search and store to cache if 'title' in entry: log.verbose('Parsing imdb for `%s`' % entry['title']) else: log.verbose('Parsing imdb for `%s`' % entry['imdb_id']) try: movie = self._parse_new_movie(entry['imdb_url'], session) except UnicodeDecodeError: log.error('Unable to determine encoding for %s. Installing chardet library may help.' % entry['imdb_url']) # store cache so this will not be tried again movie = Movie() movie.url = entry['imdb_url'] session.add(movie) raise PluginError('UnicodeDecodeError') except ValueError as e: # TODO: might be a little too broad catch, what was this for anyway? ;P if manager.options.debug: log.exception(e) raise PluginError('Invalid parameter: %s' % entry['imdb_url'], log) for att in ['title', 'score', 'votes', 'year', 'genres', 'languages', 'actors', 'directors', 'mpaa_rating']: log.trace('movie.%s: %s' % (att, getattr(movie, att))) # store to entry entry.update_using_map(self.field_map, movie) finally: log.trace('committing session') session.commit()
def lookup(self, entry, search_allowed=True): """ Perform imdb lookup for entry. :param entry: Entry instance :param search_allowed: Allow fallback to search :raises PluginError: Failure reason """ from flexget.manager import manager if entry.get("imdb_url", eval_lazy=False): log.debug("No title passed. Lookup for %s" % entry["imdb_url"]) elif entry.get("imdb_id", eval_lazy=False): log.debug("No title passed. Lookup for %s" % entry["imdb_id"]) elif entry.get("title", eval_lazy=False): log.debug("lookup for %s" % entry["title"]) else: raise PluginError("looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.") take_a_break = False session = Session() try: # entry sanity checks for field in ["imdb_votes", "imdb_score"]: if entry.get(field, eval_lazy=False): value = entry[field] if not isinstance(value, (int, float)): raise PluginError("Entry field %s should be a number!" % field) # if imdb_id is included, build the url. if entry.get("imdb_id", eval_lazy=False) and not entry.get("imdb_url", eval_lazy=False): entry["imdb_url"] = make_url(entry["imdb_id"]) # make sure imdb url is valid if entry.get("imdb_url", eval_lazy=False): imdb_id = extract_id(entry["imdb_url"]) if imdb_id: entry["imdb_url"] = make_url(imdb_id) else: log.debug("imdb url %s is invalid, removing it" % entry["imdb_url"]) del (entry["imdb_url"]) # no imdb_url, check if there is cached result for it or if the # search is known to fail if not entry.get("imdb_url", eval_lazy=False): result = session.query(SearchResult).filter(SearchResult.title == entry["title"]).first() if result: if result.fails and not manager.options.retry: # this movie cannot be found, not worth trying again ... log.debug("%s will fail lookup" % entry["title"]) raise PluginError("Title `%s` lookup fails" % entry["title"]) else: if result.url: log.trace("Setting imdb url for %s from db" % entry["title"]) entry["imdb_url"] = result.url # no imdb url, but information required, try searching if not entry.get("imdb_url", eval_lazy=False) and search_allowed: log.verbose("Searching from imdb `%s`" % entry["title"]) take_a_break = True search = ImdbSearch() search_result = search.smart_match(entry["title"]) if search_result: entry["imdb_url"] = search_result["url"] # store url for this movie, so we don't have to search on # every run result = SearchResult(entry["title"], entry["imdb_url"]) session.add(result) log.verbose("Found %s" % (entry["imdb_url"])) else: log_once("Imdb lookup failed for %s" % entry["title"], log) # store FAIL for this title result = SearchResult(entry["title"]) result.fails = True session.add(result) raise PluginError("Title `%s` lookup failed" % entry["title"]) # check if this imdb page has been parsed & cached movie = ( session.query(Movie) .options( joinedload_all(Movie.genres), joinedload_all(Movie.languages), joinedload_all(Movie.actors), joinedload_all(Movie.directors), ) .filter(Movie.url == entry["imdb_url"]) .first() ) # determine whether or not movie details needs to be parsed req_parse = False if not movie: req_parse = True elif movie.expired: req_parse = True if req_parse: if movie is not None: if movie.expired: log.verbose("Movie `%s` details expired, refreshing ..." % movie.title) # Remove the old movie, we'll store another one later. session.query(Movie).filter(Movie.url == entry["imdb_url"]).delete() # search and store to cache if "title" in entry: log.verbose("Parsing imdb for `%s`" % entry["title"]) else: log.verbose("Parsing imdb for `%s`" % entry["imdb_id"]) try: take_a_break = True movie = self._parse_new_movie(entry["imdb_url"], session) except UnicodeDecodeError: log.error( "Unable to determine encoding for %s. Installing chardet library may help." % entry["imdb_url"] ) # store cache so this will not be tried again movie = Movie() movie.url = entry["imdb_url"] session.add(movie) raise PluginError("UnicodeDecodeError") except ValueError, e: # TODO: might be a little too broad catch, what was this for anyway? ;P if manager.options.debug: log.exception(e) raise PluginError("Invalid parameter: %s" % entry["imdb_url"], log) for att in ["title", "score", "votes", "year", "genres", "languages", "actors", "directors", "mpaa_rating"]: log.trace("movie.%s: %s" % (att, getattr(movie, att))) # store to entry entry.update_using_map(self.field_map, movie) # give imdb a little break between requests (see: http://flexget.com/ticket/129#comment:1) if take_a_break and not manager.options.debug and not manager.unit_test: import time time.sleep(3)
def lookup(self, entry, search_allowed=True): """Perform imdb lookup for entry. Raises PluginError with failure reason.""" from flexget.manager import manager if entry.get('imdb_url', lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_url']) elif entry.get('imdb_id', lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_id']) elif entry.get('title', lazy=False): log.debug('lookup for %s' % entry['title']) else: raise PluginError('looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.') take_a_break = False session = Session() try: # entry sanity checks for field in ['imdb_votes', 'imdb_score']: if entry.get(field, lazy=False): value = entry[field] if not isinstance(value, (int, float)): raise PluginError('Entry field %s should be a number!' % field) # if imdb_id is included, build the url. if entry.get('imdb_id', lazy=False) and not entry.get('imdb_url', lazy=False): entry['imdb_url'] = make_url(entry['imdb_id']) # make sure imdb url is valid if entry.get('imdb_url', lazy=False): imdb_id = extract_id(entry['imdb_url']) if imdb_id: entry['imdb_url'] = make_url(imdb_id) else: log.debug('imdb url %s is invalid, removing it' % entry['imdb_url']) del(entry['imdb_url']) # no imdb_url, check if there is cached result for it or if the # search is known to fail if not entry.get('imdb_url', lazy=False): result = session.query(SearchResult).\ filter(SearchResult.title == entry['title']).first() if result: if result.fails and not manager.options.retry: # this movie cannot be found, not worth trying again ... log.debug('%s will fail lookup' % entry['title']) raise PluginError('Title `%s` lookup fails' % entry['title']) else: if result.url: log.trace('Setting imdb url for %s from db' % entry['title']) entry['imdb_url'] = result.url # no imdb url, but information required, try searching if not entry.get('imdb_url', lazy=False) and search_allowed: log.verbose('Searching from imdb `%s`' % entry['title']) take_a_break = True search = ImdbSearch() search_result = search.smart_match(entry['title']) if search_result: entry['imdb_url'] = search_result['url'] # store url for this movie, so we don't have to search on # every run result = SearchResult(entry['title'], entry['imdb_url']) session.add(result) log.verbose('Found %s' % (entry['imdb_url'])) else: log_once('Imdb lookup failed for %s' % entry['title'], log) # store FAIL for this title result = SearchResult(entry['title']) result.fails = True session.add(result) raise PluginError('Title `%s` lookup failed' % entry['title']) # check if this imdb page has been parsed & cached movie = session.query(Movie).\ options(joinedload_all(Movie.genres, Movie.languages, Movie.actors, Movie.directors)).\ filter(Movie.url == entry['imdb_url']).first() refresh_interval = 2 if movie: if movie.year: age = (datetime.now().year - movie.year) refresh_interval += age * 5 log.debug('cached movie `%s` age %i refresh interval %i days' % (movie.title, age, refresh_interval)) if not movie or movie.updated is None or \ movie.updated < datetime.now() - timedelta(days=refresh_interval): # Remove the old movie, we'll store another one later. session.query(Movie).filter(Movie.url == entry['imdb_url']).delete() # search and store to cache if 'title' in entry: log.verbose('Parsing imdb for `%s`' % entry['title']) else: log.verbose('Parsing imdb for `%s`' % entry['imdb_id']) try: take_a_break = True imdb = ImdbParser() imdb.parse(entry['imdb_url']) # store to database movie = Movie() movie.photo = imdb.photo movie.title = imdb.name movie.score = imdb.score movie.votes = imdb.votes movie.year = imdb.year movie.mpaa_rating = imdb.mpaa_rating movie.plot_outline = imdb.plot_outline movie.url = entry['imdb_url'] for name in imdb.genres: genre = session.query(Genre).\ filter(Genre.name == name).first() if not genre: genre = Genre(name) movie.genres.append(genre) # pylint:disable=E1101 for name in imdb.languages: language = session.query(Language).\ filter(Language.name == name).first() if not language: language = Language(name) movie.languages.append(language) # pylint:disable=E1101 for imdb_id, name in imdb.actors.iteritems(): actor = session.query(Actor).\ filter(Actor.imdb_id == imdb_id).first() if not actor: actor = Actor(imdb_id, name) movie.actors.append(actor) # pylint:disable=E1101 for imdb_id, name in imdb.directors.iteritems(): director = session.query(Director).\ filter(Director.imdb_id == imdb_id).first() if not director: director = Director(imdb_id, name) movie.directors.append(director) # pylint:disable=E1101 # so that we can track how long since we've updated the info later movie.updated = datetime.now() session.add(movie) except UnicodeDecodeError: log.error('Unable to determine encoding for %s. Installing chardet library may help.' % entry['imdb_url']) # store cache so this will not be tried again movie = Movie() movie.url = entry['imdb_url'] session.add(movie) raise PluginError('UnicodeDecodeError') except ValueError, e: # TODO: might be a little too broad catch, what was this for anyway? ;P if manager.options.debug: log.exception(e) raise PluginError('Invalid parameter: %s' % entry['imdb_url'], log) for att in ['title', 'score', 'votes', 'year', 'genres', 'languages', 'actors', 'directors', 'mpaa_rating']: log.trace('movie.%s: %s' % (att, getattr(movie, att))) # store to entry entry.update_using_map(self.field_map, movie) # give imdb a little break between requests (see: http://flexget.com/ticket/129#comment:1) if (take_a_break and not manager.options.debug and not manager.unit_test): import time time.sleep(3)
def lookup(self, entry, search_allowed=True, session=None): """ Perform imdb lookup for entry. :param entry: Entry instance :param search_allowed: Allow fallback to search :raises PluginError: Failure reason """ from flexget.manager import manager if entry.get("imdb_id", eval_lazy=False): log.debug("No title passed. Lookup for %s" % entry["imdb_id"]) elif entry.get("imdb_url", eval_lazy=False): log.debug("No title passed. Lookup for %s" % entry["imdb_url"]) elif entry.get("title", eval_lazy=False): log.debug("lookup for %s" % entry["title"]) else: raise plugin.PluginError("looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.") # if imdb_id is included, build the url. if entry.get("imdb_id", eval_lazy=False) and not entry.get("imdb_url", eval_lazy=False): entry["imdb_url"] = make_url(entry["imdb_id"]) # make sure imdb url is valid if entry.get("imdb_url", eval_lazy=False): imdb_id = extract_id(entry["imdb_url"]) if imdb_id: entry["imdb_url"] = make_url(imdb_id) else: log.debug("imdb url %s is invalid, removing it" % entry["imdb_url"]) del (entry["imdb_url"]) # no imdb_url, check if there is cached result for it or if the # search is known to fail if not entry.get("imdb_url", eval_lazy=False): result = session.query(SearchResult).filter(SearchResult.title == entry["title"]).first() if result: # TODO: 1.2 this should really be checking task.options.retry if result.fails and not manager.options.execute.retry: # this movie cannot be found, not worth trying again ... log.debug("%s will fail lookup" % entry["title"]) raise plugin.PluginError("IMDB lookup failed for %s" % entry["title"]) else: if result.url: log.trace("Setting imdb url for %s from db" % entry["title"]) entry["imdb_id"] = result.imdb_id entry["imdb_url"] = result.url # no imdb url, but information required, try searching if not entry.get("imdb_url", eval_lazy=False) and search_allowed: log.verbose("Searching from imdb `%s`" % entry["title"]) search = ImdbSearch() search_name = entry.get("movie_name", entry["title"], eval_lazy=False) search_result = search.smart_match(search_name) if search_result: entry["imdb_url"] = search_result["url"] # store url for this movie, so we don't have to search on every run result = SearchResult(entry["title"], entry["imdb_url"]) session.add(result) session.commit() log.verbose("Found %s" % (entry["imdb_url"])) else: log_once("IMDB lookup failed for %s" % entry["title"], log, logging.WARN, session=session) # store FAIL for this title result = SearchResult(entry["title"]) result.fails = True session.add(result) session.commit() raise plugin.PluginError("Title `%s` lookup failed" % entry["title"]) # check if this imdb page has been parsed & cached movie = session.query(Movie).filter(Movie.url == entry["imdb_url"]).first() # If we have a movie from cache, we are done if movie and not movie.expired: entry.update_using_map(self.field_map, movie) return # Movie was not found in cache, or was expired if movie is not None: if movie.expired: log.verbose("Movie `%s` details expired, refreshing ..." % movie.title) # Remove the old movie, we'll store another one later. session.query(MovieLanguage).filter(MovieLanguage.movie_id == movie.id).delete() session.query(Movie).filter(Movie.url == entry["imdb_url"]).delete() session.commit() # search and store to cache if "title" in entry: log.verbose("Parsing imdb for `%s`" % entry["title"]) else: log.verbose("Parsing imdb for `%s`" % entry["imdb_id"]) try: movie = self._parse_new_movie(entry["imdb_url"], session) except UnicodeDecodeError: log.error("Unable to determine encoding for %s. Installing chardet library may help." % entry["imdb_url"]) # store cache so this will not be tried again movie = Movie() movie.url = entry["imdb_url"] session.add(movie) session.commit() raise plugin.PluginError("UnicodeDecodeError") except ValueError as e: # TODO: might be a little too broad catch, what was this for anyway? ;P if manager.options.debug: log.exception(e) raise plugin.PluginError("Invalid parameter: %s" % entry["imdb_url"], log) for att in [ "title", "score", "votes", "year", "genres", "languages", "actors", "directors", "writers", "mpaa_rating", ]: log.trace("movie.%s: %s" % (att, getattr(movie, att))) # Update the entry fields entry.update_using_map(self.field_map, movie)