def __setitem__(self, key, value): # Enforce unicode compatibility. Check for all subclasses of basestring, so that NavigableStrings are also cast if isinstance(value, basestring) and not type(value) == unicode: try: value = unicode(value) except UnicodeDecodeError: raise EntryUnicodeError(key, value) # url and original_url handling if key == 'url': if not isinstance(value, basestring): raise PluginError('Tried to set %r url to %r' % (self.get('title'), value)) if not 'original_url' in self: self['original_url'] = value # title handling if key == 'title': if not isinstance(value, basestring): raise PluginError('Tried to set title to %r' % value) # TODO: HACK! Implement via plugin once #348 (entry events) is implemented # enforces imdb_url in same format if key == 'imdb_url' and isinstance(value, basestring): imdb_id = extract_id(value) if imdb_id: value = make_url(imdb_id) else: log.debug('Tried to set imdb_id to invalid imdb url: %s' % value) value = None try: log.trace('ENTRY SET: %s = %r' % (key, value)) except Exception, e: log.debug('trying to debug key `%s` value threw exception: %s' % (key, e))
def on_task_metainfo(self, task, config): # check if disabled (value set to false) if 'scan_imdb' in task.config: if not task.config['scan_imdb']: return for entry in task.entries: # Don't override already populated imdb_ids if entry.get('imdb_id', eval_lazy=False): continue if 'description' not in entry: continue urls = re.findall(r'\bimdb.com/title/tt\d+\b', entry['description']) # Find unique imdb ids imdb_ids = [_f for _f in set(extract_id(url) for url in urls) if _f] if not imdb_ids: continue if len(imdb_ids) > 1: log.debug('Found multiple imdb ids; not using any of: %s' % ' '.join(imdb_ids)) continue entry['imdb_id'] = imdb_ids[0] entry['imdb_url'] = make_url(entry['imdb_id']) log.debug('Found imdb url in description %s' % entry['imdb_url'])
def __setitem__(self, key, value): # Enforce unicode compatibility. Check for all subclasses of basestring, so that NavigableStrings are also cast if isinstance(value, basestring) and not type(value) == unicode: try: value = unicode(value) except UnicodeDecodeError: raise EntryUnicodeError(key, value) # url and original_url handling if key == 'url': if not isinstance(value, basestring): raise PluginError('Tried to set %r url to %r' % (self.get('title'), value)) self.setdefault('original_url', value) # title handling if key == 'title': if not isinstance(value, basestring): raise PluginError('Tried to set title to %r' % value) # TODO: HACK! Implement via plugin once #348 (entry events) is implemented # enforces imdb_url in same format if key == 'imdb_url' and isinstance(value, basestring): imdb_id = extract_id(value) if imdb_id: value = make_url(imdb_id) else: log.debug('Tried to set imdb_id to invalid imdb url: %s' % value) value = None try: log.trace('ENTRY SET: %s = %r' % (key, value)) except Exception as e: log.debug('trying to debug key `%s` value threw exception: %s' % (key, e)) dict.__setitem__(self, key, value)
def on_task_metainfo(self, task, config): # check if disabled (value set to false) if 'scan_imdb' in task.config: if not task.config['scan_imdb']: return for entry in task.entries: # Don't override already populated imdb_ids if entry.get('imdb_id', eval_lazy=False): continue if not 'description' in entry: continue urls = re.findall(r'\bimdb.com/title/tt\d+\b', entry['description']) # Find unique imdb ids imdb_ids = filter(None, set(extract_id(url) for url in urls)) if not imdb_ids: continue if len(imdb_ids) > 1: log.debug('Found multiple imdb ids; not using any of: %s' % ' '.join(imdb_ids)) continue entry['imdb_id'] = imdb_ids[0] entry['imdb_url'] = make_url(entry['imdb_id']) log.debug('Found imdb url in description %s' % entry['imdb_url'])
def lookup(self, entry, search_allowed=True, session=None): """ Perform imdb lookup for entry. :param entry: Entry instance :param search_allowed: Allow fallback to search :raises PluginError: Failure reason """ from flexget.manager import manager if entry.get('imdb_id', eval_lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_id']) elif entry.get('imdb_url', eval_lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_url']) elif entry.get('title', eval_lazy=False): log.debug('lookup for %s' % entry['title']) else: raise plugin.PluginError( 'looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.' ) # if imdb_id is included, build the url. if entry.get('imdb_id', eval_lazy=False) and not entry.get( 'imdb_url', eval_lazy=False): entry['imdb_url'] = make_url(entry['imdb_id']) # make sure imdb url is valid if entry.get('imdb_url', eval_lazy=False): imdb_id = extract_id(entry['imdb_url']) if imdb_id: entry['imdb_url'] = make_url(imdb_id) else: log.debug('imdb url %s is invalid, removing it' % entry['imdb_url']) del (entry['imdb_url']) # no imdb_url, check if there is cached result for it or if the # search is known to fail if not entry.get('imdb_url', eval_lazy=False): result = session.query(SearchResult).filter( SearchResult.title == entry['title']).first() if result: # TODO: 1.2 this should really be checking task.options.retry if result.fails and not manager.options.execute.retry: # this movie cannot be found, not worth trying again ... log.debug('%s will fail lookup' % entry['title']) raise plugin.PluginError('IMDB lookup failed for %s' % entry['title']) else: if result.url: log.trace('Setting imdb url for %s from db' % entry['title']) entry['imdb_id'] = result.imdb_id entry['imdb_url'] = result.url movie = None # no imdb url, but information required, try searching if not entry.get('imdb_url', eval_lazy=False) and search_allowed: log.verbose('Searching from imdb `%s`' % entry['title']) search = ImdbSearch() search_name = entry.get('movie_name', entry['title'], eval_lazy=False) search_result = search.smart_match(search_name) if search_result: entry['imdb_url'] = search_result['url'] # store url for this movie, so we don't have to search on every run result = SearchResult(entry['title'], entry['imdb_url']) session.add(result) session.commit() log.verbose('Found %s' % (entry['imdb_url'])) else: log_once('IMDB lookup failed for %s' % entry['title'], log, logging.WARN, session=session) # store FAIL for this title result = SearchResult(entry['title']) result.fails = True session.add(result) session.commit() raise plugin.PluginError('Title `%s` lookup failed' % entry['title']) # check if this imdb page has been parsed & cached movie = session.query(Movie).filter( Movie.url == entry['imdb_url']).first() # If we have a movie from cache, we are done if movie and not movie.expired: entry.update_using_map(self.field_map, movie) return # Movie was not found in cache, or was expired if movie is not None: if movie.expired: log.verbose('Movie `%s` details expired, refreshing ...' % movie.title) # Remove the old movie, we'll store another one later. session.query(MovieLanguage).filter( MovieLanguage.movie_id == movie.id).delete() session.query(Movie).filter( Movie.url == entry['imdb_url']).delete() session.commit() # search and store to cache if 'title' in entry: log.verbose('Parsing imdb for `%s`' % entry['title']) else: log.verbose('Parsing imdb for `%s`' % entry['imdb_id']) try: movie = self._parse_new_movie(entry['imdb_url'], session) except UnicodeDecodeError: log.error( 'Unable to determine encoding for %s. Installing chardet library may help.' % entry['imdb_url']) # store cache so this will not be tried again movie = Movie() movie.url = entry['imdb_url'] session.add(movie) session.commit() raise plugin.PluginError('UnicodeDecodeError') except ValueError as e: # TODO: might be a little too broad catch, what was this for anyway? ;P if manager.options.debug: log.exception(e) raise plugin.PluginError( 'Invalid parameter: %s' % entry['imdb_url'], log) for att in [ 'title', 'score', 'votes', 'year', 'genres', 'languages', 'actors', 'directors', 'mpaa_rating' ]: log.trace('movie.%s: %s' % (att, getattr(movie, att))) # Update the entry fields entry.update_using_map(self.field_map, movie)
def lookup(self, entry, search_allowed=True): """ Perform imdb lookup for entry. :param entry: Entry instance :param search_allowed: Allow fallback to search :raises PluginError: Failure reason """ from flexget.manager import manager if entry.get('imdb_url', eval_lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_url']) elif entry.get('imdb_id', eval_lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_id']) elif entry.get('title', eval_lazy=False): log.debug('lookup for %s' % entry['title']) else: raise PluginError('looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.') session = Session() try: # entry sanity checks for field in ['imdb_votes', 'imdb_score']: if entry.get(field, eval_lazy=False): value = entry[field] if not isinstance(value, (int, float)): raise PluginError('Entry field %s should be a number!' % field) # if imdb_id is included, build the url. if entry.get('imdb_id', eval_lazy=False) and not entry.get('imdb_url', eval_lazy=False): entry['imdb_url'] = make_url(entry['imdb_id']) # make sure imdb url is valid if entry.get('imdb_url', eval_lazy=False): imdb_id = extract_id(entry['imdb_url']) if imdb_id: entry['imdb_url'] = make_url(imdb_id) else: log.debug('imdb url %s is invalid, removing it' % entry['imdb_url']) del(entry['imdb_url']) # no imdb_url, check if there is cached result for it or if the # search is known to fail if not entry.get('imdb_url', eval_lazy=False): result = session.query(SearchResult).\ filter(SearchResult.title == entry['title']).first() if result: if result.fails and not manager.options.retry: # this movie cannot be found, not worth trying again ... log.debug('%s will fail lookup' % entry['title']) raise PluginError('Title `%s` lookup fails' % entry['title']) else: if result.url: log.trace('Setting imdb url for %s from db' % entry['title']) entry['imdb_url'] = result.url # no imdb url, but information required, try searching if not entry.get('imdb_url', eval_lazy=False) and search_allowed: log.verbose('Searching from imdb `%s`' % entry['title']) search = ImdbSearch() search_name = entry.get('movie_name', entry['title'], eval_lazy=False) search_result = search.smart_match(search_name) if search_result: entry['imdb_url'] = search_result['url'] # store url for this movie, so we don't have to search on # every run result = SearchResult(entry['title'], entry['imdb_url']) session.add(result) log.verbose('Found %s' % (entry['imdb_url'])) else: log_once('Imdb lookup failed for %s' % entry['title'], log) # store FAIL for this title result = SearchResult(entry['title']) result.fails = True session.add(result) raise PluginError('Title `%s` lookup failed' % entry['title']) # check if this imdb page has been parsed & cached movie = session.query(Movie).\ options(joinedload_all(Movie.genres), joinedload_all(Movie.languages), joinedload_all(Movie.actors), joinedload_all(Movie.directors)).\ filter(Movie.url == entry['imdb_url']).first() # determine whether or not movie details needs to be parsed req_parse = False if not movie: req_parse = True elif movie.expired: req_parse = True if req_parse: if movie is not None: if movie.expired: log.verbose('Movie `%s` details expired, refreshing ...' % movie.title) # Remove the old movie, we'll store another one later. session.query(MovieLanguage).filter(MovieLanguage.movie_id == movie.id).delete() session.query(Movie).filter(Movie.url == entry['imdb_url']).delete() # search and store to cache if 'title' in entry: log.verbose('Parsing imdb for `%s`' % entry['title']) else: log.verbose('Parsing imdb for `%s`' % entry['imdb_id']) try: movie = self._parse_new_movie(entry['imdb_url'], session) except UnicodeDecodeError: log.error('Unable to determine encoding for %s. Installing chardet library may help.' % entry['imdb_url']) # store cache so this will not be tried again movie = Movie() movie.url = entry['imdb_url'] session.add(movie) raise PluginError('UnicodeDecodeError') except ValueError as e: # TODO: might be a little too broad catch, what was this for anyway? ;P if manager.options.debug: log.exception(e) raise PluginError('Invalid parameter: %s' % entry['imdb_url'], log) for att in ['title', 'score', 'votes', 'year', 'genres', 'languages', 'actors', 'directors', 'mpaa_rating']: log.trace('movie.%s: %s' % (att, getattr(movie, att))) # store to entry entry.update_using_map(self.field_map, movie) finally: log.trace('committing session') session.commit()
def on_task_input(self, task, config): sess = requests.Session() if config.get('username') and config.get('password'): log.verbose('Logging in ...') # Log in to imdb with our handler params = {'login': config['username'], 'password': config['password']} try: # First get the login page so we can get the hidden input value soup = get_soup(sess.get('https://secure.imdb.com/register-imdb/login').content) # Fix for bs4 bug. see #2313 and github#118 auxsoup = soup.find('div', id='nb20').next_sibling.next_sibling tag = auxsoup.find('input', attrs={'name': '49e6c'}) if tag: params['49e6c'] = tag['value'] else: log.warning('Unable to find required info for imdb login, maybe their login method has changed.') # Now we do the actual login with appropriate parameters r = sess.post('https://secure.imdb.com/register-imdb/login', data=params, raise_status=False) except requests.RequestException as e: raise plugin.PluginError('Unable to login to imdb: %s' % e.message) # IMDb redirects us upon a successful login. # removed - doesn't happen always? # if r.status_code != 302: # log.warning('It appears logging in to IMDb was unsuccessful.') # try to automatically figure out user_id from watchlist redirect url if not 'user_id' in config: log.verbose('Getting user_id ...') try: response = sess.get('http://www.imdb.com/list/watchlist') except requests.RequestException as e: log.error('Error retrieving user ID from imdb: %s' % e.message) user_id = '' else: log.debug('redirected to %s' % response.url) user_id = response.url.split('/')[-2] if re.match(USER_ID_RE, user_id): config['user_id'] = user_id else: raise plugin.PluginError('Couldn\'t figure out user_id, please configure it manually.') if not 'user_id' in config: raise plugin.PluginError('Configuration option `user_id` required.') log.verbose('Retrieving list %s ...' % config['list']) # Get the imdb list in csv format try: url = 'http://www.imdb.com/list/export' params = {'list_id': config['list'], 'author_id': config['user_id']} log.debug('Requesting %s' % url) opener = sess.get(url, params=params) mime_type = parse_header(opener.headers['content-type'])[0] log.debug('mime_type: %s' % mime_type) if mime_type != 'text/csv': raise plugin.PluginError('Didn\'t get CSV export as response. Probably specified list `%s` ' 'does not exist.' % config['list']) csv_rows = csv.reader(opener.iter_lines()) except requests.RequestException as e: raise plugin.PluginError('Unable to get imdb list: %s' % e.message) # Create an Entry for each movie in the list entries = [] for row in csv_rows: if not row or row[0] == 'position': # Don't use blank rows or the headings row continue try: title = decode_html(row[5]).decode('utf-8') entries.append(Entry(title=title, url=make_url(row[1]), imdb_id=row[1], imdb_name=title)) except IndexError: log.critical('IndexError! Unable to handle row: %s' % row) return entries
def on_task_input(self, task, config): sess = requests.Session() if config.get('username') and config.get('password'): log.verbose('Logging in ...') # Log in to imdb with our handler params = { 'login': config['username'], 'password': config['password'] } try: # First get the login page so we can get the hidden input value soup = get_soup( sess.get( 'https://secure.imdb.com/register-imdb/login').content) # Fix for bs4 bug. see #2313 and github#118 auxsoup = soup.find('div', id='nb20').next_sibling.next_sibling tag = auxsoup.find('input', attrs={'name': '49e6c'}) if tag: params['49e6c'] = tag['value'] else: log.warning( 'Unable to find required info for imdb login, maybe their login method has changed.' ) # Now we do the actual login with appropriate parameters r = sess.post('https://secure.imdb.com/register-imdb/login', data=params, raise_status=False) except requests.RequestException as e: raise plugin.PluginError('Unable to login to imdb: %s' % e.message) # IMDb redirects us upon a successful login. # removed - doesn't happen always? # if r.status_code != 302: # log.warning('It appears logging in to IMDb was unsuccessful.') # try to automatically figure out user_id from watchlist redirect url if not 'user_id' in config: log.verbose('Getting user_id ...') try: response = sess.get('http://www.imdb.com/list/watchlist') except requests.RequestException as e: log.error('Error retrieving user ID from imdb: %s' % e.message) user_id = '' else: log.debug('redirected to %s' % response.url) user_id = response.url.split('/')[-2] if re.match(USER_ID_RE, user_id): config['user_id'] = user_id else: raise plugin.PluginError( 'Couldn\'t figure out user_id, please configure it manually.' ) if not 'user_id' in config: raise plugin.PluginError( 'Configuration option `user_id` required.') log.verbose('Retrieving list %s ...' % config['list']) # Get the imdb list in csv format try: url = 'http://www.imdb.com/list/export' params = { 'list_id': config['list'], 'author_id': config['user_id'] } log.debug('Requesting %s' % url) opener = sess.get(url, params=params) mime_type = parse_header(opener.headers['content-type'])[0] log.debug('mime_type: %s' % mime_type) if mime_type != 'text/csv': raise plugin.PluginError( 'Didn\'t get CSV export as response. Probably specified list `%s` ' 'does not exist.' % config['list']) csv_rows = csv.reader(opener.iter_lines()) except requests.RequestException as e: raise plugin.PluginError('Unable to get imdb list: %s' % e.message) # Create an Entry for each movie in the list entries = [] for row in csv_rows: if not row or row[0] == 'position': # Don't use blank rows or the headings row continue try: title = decode_html(row[5]).decode('utf-8') entries.append( Entry(title=title, url=make_url(row[1]), imdb_id=row[1], imdb_name=title)) except IndexError: log.critical('IndexError! Unable to handle row: %s' % row) return entries
def lookup(self, entry, search_allowed=True): """ Perform imdb lookup for entry. :param entry: Entry instance :param search_allowed: Allow fallback to search :raises PluginError: Failure reason """ from flexget.manager import manager if entry.get("imdb_url", eval_lazy=False): log.debug("No title passed. Lookup for %s" % entry["imdb_url"]) elif entry.get("imdb_id", eval_lazy=False): log.debug("No title passed. Lookup for %s" % entry["imdb_id"]) elif entry.get("title", eval_lazy=False): log.debug("lookup for %s" % entry["title"]) else: raise PluginError("looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.") take_a_break = False session = Session() try: # entry sanity checks for field in ["imdb_votes", "imdb_score"]: if entry.get(field, eval_lazy=False): value = entry[field] if not isinstance(value, (int, float)): raise PluginError("Entry field %s should be a number!" % field) # if imdb_id is included, build the url. if entry.get("imdb_id", eval_lazy=False) and not entry.get("imdb_url", eval_lazy=False): entry["imdb_url"] = make_url(entry["imdb_id"]) # make sure imdb url is valid if entry.get("imdb_url", eval_lazy=False): imdb_id = extract_id(entry["imdb_url"]) if imdb_id: entry["imdb_url"] = make_url(imdb_id) else: log.debug("imdb url %s is invalid, removing it" % entry["imdb_url"]) del (entry["imdb_url"]) # no imdb_url, check if there is cached result for it or if the # search is known to fail if not entry.get("imdb_url", eval_lazy=False): result = session.query(SearchResult).filter(SearchResult.title == entry["title"]).first() if result: if result.fails and not manager.options.retry: # this movie cannot be found, not worth trying again ... log.debug("%s will fail lookup" % entry["title"]) raise PluginError("Title `%s` lookup fails" % entry["title"]) else: if result.url: log.trace("Setting imdb url for %s from db" % entry["title"]) entry["imdb_url"] = result.url # no imdb url, but information required, try searching if not entry.get("imdb_url", eval_lazy=False) and search_allowed: log.verbose("Searching from imdb `%s`" % entry["title"]) take_a_break = True search = ImdbSearch() search_result = search.smart_match(entry["title"]) if search_result: entry["imdb_url"] = search_result["url"] # store url for this movie, so we don't have to search on # every run result = SearchResult(entry["title"], entry["imdb_url"]) session.add(result) log.verbose("Found %s" % (entry["imdb_url"])) else: log_once("Imdb lookup failed for %s" % entry["title"], log) # store FAIL for this title result = SearchResult(entry["title"]) result.fails = True session.add(result) raise PluginError("Title `%s` lookup failed" % entry["title"]) # check if this imdb page has been parsed & cached movie = ( session.query(Movie) .options( joinedload_all(Movie.genres), joinedload_all(Movie.languages), joinedload_all(Movie.actors), joinedload_all(Movie.directors), ) .filter(Movie.url == entry["imdb_url"]) .first() ) # determine whether or not movie details needs to be parsed req_parse = False if not movie: req_parse = True elif movie.expired: req_parse = True if req_parse: if movie is not None: if movie.expired: log.verbose("Movie `%s` details expired, refreshing ..." % movie.title) # Remove the old movie, we'll store another one later. session.query(Movie).filter(Movie.url == entry["imdb_url"]).delete() # search and store to cache if "title" in entry: log.verbose("Parsing imdb for `%s`" % entry["title"]) else: log.verbose("Parsing imdb for `%s`" % entry["imdb_id"]) try: take_a_break = True movie = self._parse_new_movie(entry["imdb_url"], session) except UnicodeDecodeError: log.error( "Unable to determine encoding for %s. Installing chardet library may help." % entry["imdb_url"] ) # store cache so this will not be tried again movie = Movie() movie.url = entry["imdb_url"] session.add(movie) raise PluginError("UnicodeDecodeError") except ValueError, e: # TODO: might be a little too broad catch, what was this for anyway? ;P if manager.options.debug: log.exception(e) raise PluginError("Invalid parameter: %s" % entry["imdb_url"], log) for att in ["title", "score", "votes", "year", "genres", "languages", "actors", "directors", "mpaa_rating"]: log.trace("movie.%s: %s" % (att, getattr(movie, att))) # store to entry entry.update_using_map(self.field_map, movie) # give imdb a little break between requests (see: http://flexget.com/ticket/129#comment:1) if take_a_break and not manager.options.debug and not manager.unit_test: import time time.sleep(3)
# Get the imdb list in csv format try: url = 'http://www.imdb.com/list/export' params = {'list_id': config['list'], 'author_id': config['user_id']} log.debug('Requesting %s' % url) opener = sess.get(url, params=params) mime_type = parse_header(opener.headers['content-type'])[0] log.debug('mime_type: %s' % mime_type) if mime_type != 'text/csv': raise PluginError('Didn\'t get CSV export as response. Probably specified list `%s` does not exists.' % config['list']) csv_rows = csv.reader(opener.iter_lines()) except requests.RequestException, e: raise PluginError('Unable to get imdb list: %s' % e.message) # Create an Entry for each movie in the list entries = [] for row in csv_rows: if not row or row[0] == 'position': # Don't use blank rows or the headings row continue try: title = decode_html(row[5]).decode('utf-8') entries.append(Entry(title=title, url=make_url(row[1]), imdb_id=row[1], imdb_name=title)) except IndexError: log.critical('IndexError! Unable to handle row: %s' % row) return entries register_plugin(ImdbList, 'imdb_list', api_ver=2)
def lookup(self, entry, search_allowed=True): """Perform imdb lookup for entry. Raises PluginError with failure reason.""" from flexget.manager import manager if entry.get('imdb_url', lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_url']) elif entry.get('imdb_id', lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_id']) elif entry.get('title', lazy=False): log.debug('lookup for %s' % entry['title']) else: raise PluginError('looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.') take_a_break = False session = Session() try: # entry sanity checks for field in ['imdb_votes', 'imdb_score']: if entry.get(field, lazy=False): value = entry[field] if not isinstance(value, (int, float)): raise PluginError('Entry field %s should be a number!' % field) # if imdb_id is included, build the url. if entry.get('imdb_id', lazy=False) and not entry.get('imdb_url', lazy=False): entry['imdb_url'] = make_url(entry['imdb_id']) # make sure imdb url is valid if entry.get('imdb_url', lazy=False): imdb_id = extract_id(entry['imdb_url']) if imdb_id: entry['imdb_url'] = make_url(imdb_id) else: log.debug('imdb url %s is invalid, removing it' % entry['imdb_url']) del(entry['imdb_url']) # no imdb_url, check if there is cached result for it or if the # search is known to fail if not entry.get('imdb_url', lazy=False): result = session.query(SearchResult).\ filter(SearchResult.title == entry['title']).first() if result: if result.fails and not manager.options.retry: # this movie cannot be found, not worth trying again ... log.debug('%s will fail lookup' % entry['title']) raise PluginError('Title `%s` lookup fails' % entry['title']) else: if result.url: log.trace('Setting imdb url for %s from db' % entry['title']) entry['imdb_url'] = result.url # no imdb url, but information required, try searching if not entry.get('imdb_url', lazy=False) and search_allowed: log.verbose('Searching from imdb `%s`' % entry['title']) take_a_break = True search = ImdbSearch() search_result = search.smart_match(entry['title']) if search_result: entry['imdb_url'] = search_result['url'] # store url for this movie, so we don't have to search on # every run result = SearchResult(entry['title'], entry['imdb_url']) session.add(result) log.verbose('Found %s' % (entry['imdb_url'])) else: log_once('Imdb lookup failed for %s' % entry['title'], log) # store FAIL for this title result = SearchResult(entry['title']) result.fails = True session.add(result) raise PluginError('Title `%s` lookup failed' % entry['title']) # check if this imdb page has been parsed & cached movie = session.query(Movie).\ options(joinedload_all(Movie.genres, Movie.languages, Movie.actors, Movie.directors)).\ filter(Movie.url == entry['imdb_url']).first() refresh_interval = 2 if movie: if movie.year: age = (datetime.now().year - movie.year) refresh_interval += age * 5 log.debug('cached movie `%s` age %i refresh interval %i days' % (movie.title, age, refresh_interval)) if not movie or movie.updated is None or \ movie.updated < datetime.now() - timedelta(days=refresh_interval): # Remove the old movie, we'll store another one later. session.query(Movie).filter(Movie.url == entry['imdb_url']).delete() # search and store to cache if 'title' in entry: log.verbose('Parsing imdb for `%s`' % entry['title']) else: log.verbose('Parsing imdb for `%s`' % entry['imdb_id']) try: take_a_break = True imdb = ImdbParser() imdb.parse(entry['imdb_url']) # store to database movie = Movie() movie.photo = imdb.photo movie.title = imdb.name movie.score = imdb.score movie.votes = imdb.votes movie.year = imdb.year movie.mpaa_rating = imdb.mpaa_rating movie.plot_outline = imdb.plot_outline movie.url = entry['imdb_url'] for name in imdb.genres: genre = session.query(Genre).\ filter(Genre.name == name).first() if not genre: genre = Genre(name) movie.genres.append(genre) # pylint:disable=E1101 for name in imdb.languages: language = session.query(Language).\ filter(Language.name == name).first() if not language: language = Language(name) movie.languages.append(language) # pylint:disable=E1101 for imdb_id, name in imdb.actors.iteritems(): actor = session.query(Actor).\ filter(Actor.imdb_id == imdb_id).first() if not actor: actor = Actor(imdb_id, name) movie.actors.append(actor) # pylint:disable=E1101 for imdb_id, name in imdb.directors.iteritems(): director = session.query(Director).\ filter(Director.imdb_id == imdb_id).first() if not director: director = Director(imdb_id, name) movie.directors.append(director) # pylint:disable=E1101 # so that we can track how long since we've updated the info later movie.updated = datetime.now() session.add(movie) except UnicodeDecodeError: log.error('Unable to determine encoding for %s. Installing chardet library may help.' % entry['imdb_url']) # store cache so this will not be tried again movie = Movie() movie.url = entry['imdb_url'] session.add(movie) raise PluginError('UnicodeDecodeError') except ValueError, e: # TODO: might be a little too broad catch, what was this for anyway? ;P if manager.options.debug: log.exception(e) raise PluginError('Invalid parameter: %s' % entry['imdb_url'], log) for att in ['title', 'score', 'votes', 'year', 'genres', 'languages', 'actors', 'directors', 'mpaa_rating']: log.trace('movie.%s: %s' % (att, getattr(movie, att))) # store to entry entry.update_using_map(self.field_map, movie) # give imdb a little break between requests (see: http://flexget.com/ticket/129#comment:1) if (take_a_break and not manager.options.debug and not manager.unit_test): import time time.sleep(3)
def lookup(self, entry, search_allowed=True, session=None): """ Perform imdb lookup for entry. :param entry: Entry instance :param search_allowed: Allow fallback to search :raises PluginError: Failure reason """ from flexget.manager import manager if entry.get("imdb_id", eval_lazy=False): log.debug("No title passed. Lookup for %s" % entry["imdb_id"]) elif entry.get("imdb_url", eval_lazy=False): log.debug("No title passed. Lookup for %s" % entry["imdb_url"]) elif entry.get("title", eval_lazy=False): log.debug("lookup for %s" % entry["title"]) else: raise plugin.PluginError("looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.") # if imdb_id is included, build the url. if entry.get("imdb_id", eval_lazy=False) and not entry.get("imdb_url", eval_lazy=False): entry["imdb_url"] = make_url(entry["imdb_id"]) # make sure imdb url is valid if entry.get("imdb_url", eval_lazy=False): imdb_id = extract_id(entry["imdb_url"]) if imdb_id: entry["imdb_url"] = make_url(imdb_id) else: log.debug("imdb url %s is invalid, removing it" % entry["imdb_url"]) del (entry["imdb_url"]) # no imdb_url, check if there is cached result for it or if the # search is known to fail if not entry.get("imdb_url", eval_lazy=False): result = session.query(SearchResult).filter(SearchResult.title == entry["title"]).first() if result: # TODO: 1.2 this should really be checking task.options.retry if result.fails and not manager.options.execute.retry: # this movie cannot be found, not worth trying again ... log.debug("%s will fail lookup" % entry["title"]) raise plugin.PluginError("IMDB lookup failed for %s" % entry["title"]) else: if result.url: log.trace("Setting imdb url for %s from db" % entry["title"]) entry["imdb_id"] = result.imdb_id entry["imdb_url"] = result.url # no imdb url, but information required, try searching if not entry.get("imdb_url", eval_lazy=False) and search_allowed: log.verbose("Searching from imdb `%s`" % entry["title"]) search = ImdbSearch() search_name = entry.get("movie_name", entry["title"], eval_lazy=False) search_result = search.smart_match(search_name) if search_result: entry["imdb_url"] = search_result["url"] # store url for this movie, so we don't have to search on every run result = SearchResult(entry["title"], entry["imdb_url"]) session.add(result) session.commit() log.verbose("Found %s" % (entry["imdb_url"])) else: log_once("IMDB lookup failed for %s" % entry["title"], log, logging.WARN, session=session) # store FAIL for this title result = SearchResult(entry["title"]) result.fails = True session.add(result) session.commit() raise plugin.PluginError("Title `%s` lookup failed" % entry["title"]) # check if this imdb page has been parsed & cached movie = session.query(Movie).filter(Movie.url == entry["imdb_url"]).first() # If we have a movie from cache, we are done if movie and not movie.expired: entry.update_using_map(self.field_map, movie) return # Movie was not found in cache, or was expired if movie is not None: if movie.expired: log.verbose("Movie `%s` details expired, refreshing ..." % movie.title) # Remove the old movie, we'll store another one later. session.query(MovieLanguage).filter(MovieLanguage.movie_id == movie.id).delete() session.query(Movie).filter(Movie.url == entry["imdb_url"]).delete() session.commit() # search and store to cache if "title" in entry: log.verbose("Parsing imdb for `%s`" % entry["title"]) else: log.verbose("Parsing imdb for `%s`" % entry["imdb_id"]) try: movie = self._parse_new_movie(entry["imdb_url"], session) except UnicodeDecodeError: log.error("Unable to determine encoding for %s. Installing chardet library may help." % entry["imdb_url"]) # store cache so this will not be tried again movie = Movie() movie.url = entry["imdb_url"] session.add(movie) session.commit() raise plugin.PluginError("UnicodeDecodeError") except ValueError as e: # TODO: might be a little too broad catch, what was this for anyway? ;P if manager.options.debug: log.exception(e) raise plugin.PluginError("Invalid parameter: %s" % entry["imdb_url"], log) for att in [ "title", "score", "votes", "year", "genres", "languages", "actors", "directors", "writers", "mpaa_rating", ]: log.trace("movie.%s: %s" % (att, getattr(movie, att))) # Update the entry fields entry.update_using_map(self.field_map, movie)
opener = sess.get(url, params=params) mime_type = parse_header(opener.headers['content-type'])[0] log.debug('mime_type: %s' % mime_type) if mime_type != 'text/csv': raise PluginError( 'Didn\'t get CSV export as response. Probably specified list `%s` does not exist.' % config['list']) csv_rows = csv.reader(opener.iter_lines()) except requests.RequestException, e: raise PluginError('Unable to get imdb list: %s' % e.message) # Create an Entry for each movie in the list entries = [] for row in csv_rows: if not row or row[0] == 'position': # Don't use blank rows or the headings row continue try: title = decode_html(row[5]).decode('utf-8') entries.append( Entry(title=title, url=make_url(row[1]), imdb_id=row[1], imdb_name=title)) except IndexError: log.critical('IndexError! Unable to handle row: %s' % row) return entries register_plugin(ImdbList, 'imdb_list', api_ver=2)