def seen_search(options, session=None): search_term = options.search_term if is_imdb_url(search_term): console('IMDB url detected, parsing ID') imdb_id = extract_id(search_term) if imdb_id: search_term = imdb_id else: console("Could not parse IMDB ID") else: search_term = '%' + options.search_term + '%' seen_entries = db.search(value=search_term, status=None, session=session) table_data = [] for se in seen_entries.all(): table_data.append(['Title', se.title]) for sf in se.fields: if sf.field.lower() == 'title': continue table_data.append(['{}'.format(sf.field.upper()), str(sf.value)]) table_data.append(['Task', se.task]) table_data.append(['Added', se.added.strftime('%Y-%m-%d %H:%M')]) if options.table_type != 'porcelain': table_data.append(['', '']) if not table_data: console('No results found for search') return if options.table_type != 'porcelain': del table_data[-1] try: table = TerminalTable(options.table_type, table_data, wrap_columns=[1]) table.table.inner_heading_row_border = False console(table.output) except TerminalTableError as e: console('ERROR: %s' % str(e))
def upgrade(ver, session): if ver is None: log.info('Converting seen imdb_url to imdb_id for seen movies.') field_table = table_schema('seen_field', session) for row in session.execute( select([field_table.c.id, field_table.c.value], field_table.c.field == 'imdb_url') ): new_values = {'field': 'imdb_id', 'value': extract_id(row['value'])} session.execute(update(field_table, field_table.c.id == row['id'], new_values)) ver = 1 if ver == 1: field_table = table_schema('seen_field', session) log.info('Adding index to seen_field table.') Index('ix_seen_field_seen_entry_id', field_table.c.seen_entry_id).create(bind=session.bind) ver = 2 if ver == 2: log.info('Adding local column to seen_entry table') table_add_column('seen_entry', 'local', Boolean, session, default=False) ver = 3 if ver == 3: # setting the default to False in the last migration was broken, fix the data log.info('Repairing seen table') entry_table = table_schema('seen_entry', session) session.execute(update(entry_table, entry_table.c.local == None, {'local': False})) ver = 4 return ver
def on_task_metainfo(self, task, config): # check if disabled (value set to false) if 'scan_imdb' in task.config: if not task.config['scan_imdb']: return for entry in task.entries: # Don't override already populated imdb_ids if entry.get('imdb_id', eval_lazy=False): continue if not entry.get('description'): continue urls = re.findall(r'\bimdb.com/title/tt\d+\b', entry['description']) # Find unique imdb ids imdb_ids = [ _f for _f in set(extract_id(url) for url in urls) if _f ] if not imdb_ids: continue if len(imdb_ids) > 1: logger.debug('Found multiple imdb ids; not using any of: {}', ' '.join(imdb_ids)) continue entry['imdb_id'] = imdb_ids[0] entry['imdb_url'] = make_url(entry['imdb_id']) logger.debug('Found imdb url in description {}', entry['imdb_url'])
def on_task_metainfo(self, task, config): # check if disabled (value set to false) if 'scan_imdb' in task.config: if not task.config['scan_imdb']: return for entry in task.entries: # Don't override already populated imdb_ids if entry.get('imdb_id', eval_lazy=False): continue if 'description' not in entry: continue urls = re.findall(r'\bimdb.com/title/tt\d+\b', entry['description']) # Find unique imdb ids imdb_ids = [_f for _f in set(extract_id(url) for url in urls) if _f] if not imdb_ids: continue if len(imdb_ids) > 1: log.debug('Found multiple imdb ids; not using any of: %s' % ' '.join(imdb_ids)) continue entry['imdb_id'] = imdb_ids[0] entry['imdb_url'] = make_url(entry['imdb_id']) log.debug('Found imdb url in description %s' % entry['imdb_url'])
def parse_site(self, url, task): """Parse configured url and return releases array""" try: page = task.requests.get(url).content except RequestException as e: raise plugin.PluginError('Error getting input page: %s' % e) soup = get_soup(page) releases = [] for entry in soup.find_all('div', attrs={'class': 'entry'}): release = {} title = entry.find('h2') if not title: log.debug('No h2 entrytitle') continue release['title'] = title.a.contents[0].strip() log.debug('Processing title %s' % (release['title'])) for link in entry.find_all('a'): # no content in the link if not link.contents: continue link_name = link.contents[0] if link_name is None: continue if not isinstance(link_name, NavigableString): continue link_name = link_name.strip().lower() if link.has_attr('href'): link_href = link['href'] else: continue log.debug('found link %s -> %s' % (link_name, link_href)) # handle imdb link if link_name.lower() == 'imdb': log.debug('found imdb link %s' % link_href) release['imdb_id'] = extract_id(link_href) # test if entry with this url would be rewritable by known plugins (ie. downloadable) temp = {} temp['title'] = release['title'] temp['url'] = link_href urlrewriting = plugin.get('urlrewriting', self) if urlrewriting.url_rewritable(task, temp): release['url'] = link_href log.trace('--> accepting %s (resolvable)' % link_href) else: log.trace('<-- ignoring %s (non-resolvable)' % link_href) # reject if no torrent link if 'url' not in release: from flexget.utils.log import log_once log_once('%s skipped due to missing or unsupported (unresolvable) download link' % (release['title']), log) else: releases.append(release) return releases
def seen_search(options, session=None): search_term = options.search_term if is_imdb_url(search_term): console('IMDB url detected, parsing ID') imdb_id = extract_id(search_term) if imdb_id: search_term = imdb_id else: console("Could not parse IMDB ID") else: search_term = '%' + options.search_term + '%' seen_entries = db.search(value=search_term, status=None, session=session) table = TerminalTable('Field', 'Value', table_type=options.table_type) for se in seen_entries.all(): table.add_row('Title', se.title) for sf in se.fields: if sf.field.lower() == 'title': continue table.add_row('{}'.format(sf.field.upper()), str(sf.value)) table.add_row('Task', se.task) table.add_row('Added', se.added.strftime('%Y-%m-%d %H:%M'), end_section=True) if not table.rows: console('No results found for search') return console(table)
def seen_add(manager: Manager, options): DEFAULT_TASK = 'cli_add' seen_name = options.add_value if is_imdb_url(seen_name): console('IMDB url detected, try to parse ID') imdb_id = extract_id(seen_name) if imdb_id: seen_name = imdb_id else: console("Could not parse IMDB ID") task = DEFAULT_TASK local = None if options.task and not options.task in manager.tasks: console(f"Task `{options.task}` not in config") return else: task = options.task local = True db.add(seen_name, task, {'cli_add': seen_name}, local=local) if task == DEFAULT_TASK: console(f'Added `{seen_name}` as seen. This will affect all tasks.') else: console(f'Added `{seen_name}` as seen. This will affect `{task}` task.')
def seen_forget(manager: Manager, options): forget_name = options.forget_value if is_imdb_url(forget_name): imdb_id = extract_id(forget_name) if imdb_id: forget_name = imdb_id tasks = None if options.tasks: tasks = [] for task in options.tasks: try: tasks.extend(m for m in manager.matching_tasks(task) if m not in tasks) except ValueError as e: console(e) continue # If tasks are specified it should use pattern matching as search if tasks: forget_name = forget_name.replace("%", "\\%").replace("_", "\\_") forget_name = forget_name.replace("*", "%").replace("?", "_") count, fcount = db.forget(forget_name, tasks=tasks, test=options.test) console(f'Removed {count} titles ({fcount} fields)') manager.config_changed()
def parse_html_list(self, task, config, url, params, headers): page = self.fetch_page(task, url, params, headers) soup = get_soup(page.text) try: item_text = soup.find('div', class_='lister-total-num-results').string.split() total_item_count = int(item_text[0].replace(',', '')) log.verbose('imdb list contains %d items', total_item_count) except AttributeError: total_item_count = 0 except (ValueError, TypeError) as e: # TODO Something is wrong if we get a ValueError, I think raise plugin.PluginError( 'Received invalid movie count: %s ; %s' % (soup.find('div', class_='lister-total-num-results').string, e) ) if not total_item_count: log.verbose('No movies were found in imdb list: %s', config['list']) return entries = [] items_processed = 0 page_no = 1 while items_processed < total_item_count: # Fetch the next page unless we've just begun if items_processed: page_no += 1 params['page'] = page_no page = self.fetch_page(task, url, params, headers) soup = get_soup(page.text) items = soup.find_all('div', class_='lister-item') if not items: log.debug('no items found on page: %s, aborting.', url) break log.debug('%d items found on page %d', len(items), page_no) for item in items: items_processed += 1 a = item.find('h3', class_='lister-item-header').find('a') if not a: log.debug('no title link found for row, skipping') continue link = ('http://www.imdb.com' + a.get('href')).rstrip('/') entry = Entry() entry['title'] = a.text try: year = int(item.find('span', class_='lister-item-year').text) entry['title'] += ' (%s)' % year entry['imdb_year'] = year except (ValueError, TypeError): pass entry['url'] = link entry['imdb_id'] = extract_id(link) entry['imdb_name'] = entry['title'] entries.append(entry) return entries
def seen_forget(manager, options): forget_name = options.forget_value if is_imdb_url(forget_name): imdb_id = extract_id(forget_name) if imdb_id: forget_name = imdb_id count, fcount = db.forget(forget_name) console('Removed %s titles (%s fields)' % (count, fcount)) manager.config_changed()
def seen_add(options): seen_name = options.add_value if is_imdb_url(seen_name): console('IMDB url detected, try to parse ID') imdb_id = extract_id(seen_name) if imdb_id: seen_name = imdb_id else: console("Could not parse IMDB ID") db.add(seen_name, 'cli_add', {'cli_add': seen_name}) console('Added %s as seen. This will affect all tasks.' % seen_name)
def search(self, task, entry, config): if not session.cookies: try: login_params = { 'username': config['username'], 'password': config['password'], 'loginkey': config['login_key'], } r = session.post('https://piratethenet.org/takelogin.php', data=login_params, verify=False) except requests.RequestException as e: log.error('Error while logging in to PtN: %s', e) raise plugin.PluginError('Could not log in to PtN') passkey = re.search(r'passkey=([\d\w]+)"', r.text) if not passkey: log.error("It doesn't look like PtN login worked properly.") raise plugin.PluginError('PTN cookie info invalid') search_params = default_search_params.copy() if 'movie_name' in entry: if 'movie_year' in entry: search_params[ 'advancedsearchparameters'] = '[year=%s]' % entry[ 'movie_year'] searches = [entry['movie_name']] else: searches = entry.get('search_strings', [entry['title']]) results = set() for search in searches: search_params['searchstring'] = search try: r = session.get('http://piratethenet.org/torrentsutils.php', params=search_params) except requests.RequestException as e: log.error('Error searching ptn: %s' % e) continue # html5parser doesn't work properly for some reason soup = get_soup(r.text, parser='html.parser') for movie in soup.select('.torrentstd'): imdb_id = movie.find('a', href=re.compile('.*imdb\.com/title/tt')) if imdb_id: imdb_id = extract_id(imdb_id['href']) if imdb_id and 'imdb_id' in entry and imdb_id != entry[ 'imdb_id']: continue results.update(self.create_entries(movie, imdb_id=imdb_id)) return results
def search(self, task, entry, config): if not session.cookies: try: login_params = { 'username': config['username'], 'password': config['password'], 'loginkey': config['login_key'], } r = session.post( 'https://piratethenet.org/takelogin.php', data=login_params, verify=False ) except requests.RequestException as e: log.error('Error while logging in to PtN: %s', e) raise plugin.PluginError('Could not log in to PtN') passkey = re.search(r'passkey=([\d\w]+)"', r.text) if not passkey: log.error("It doesn't look like PtN login worked properly.") raise plugin.PluginError('PTN cookie info invalid') search_params = default_search_params.copy() if 'movie_name' in entry: if 'movie_year' in entry: search_params['advancedsearchparameters'] = '[year=%s]' % entry['movie_year'] searches = [entry['movie_name']] else: searches = entry.get('search_strings', [entry['title']]) results = set() for search in searches: search_params['searchstring'] = search try: r = session.get('http://piratethenet.org/torrentsutils.php', params=search_params) except requests.RequestException as e: log.error('Error searching ptn: %s' % e) continue # html5parser doesn't work properly for some reason soup = get_soup(r.text, parser='html.parser') for movie in soup.select('.torrentstd'): imdb_id = movie.find('a', href=re.compile('.*imdb\.com/title/tt')) if imdb_id: imdb_id = extract_id(imdb_id['href']) if imdb_id and 'imdb_id' in entry and imdb_id != entry['imdb_id']: continue results.update(self.create_entries(movie, imdb_id=imdb_id)) return results
def parse_rlslog(self, rlslog_url, task): """ :param rlslog_url: Url to parse from :param task: Task instance :return: List of release dictionaries """ # BeautifulSoup doesn't seem to work if data is already decoded to unicode :/ soup = get_soup(task.requests.get(rlslog_url, timeout=25).content) releases = [] for entry in soup.find_all('div', attrs={'class': 'entry'}): release = {} h3 = entry.find('h3', attrs={'class': 'entrytitle'}) if not h3: logger.debug('FAIL: No h3 entrytitle') continue release['title'] = h3.a.contents[0].strip() entrybody = entry.find('div', attrs={'class': 'entrybody'}) if not entrybody: logger.debug('FAIL: No entrybody') continue logger.trace('Processing title {}', release['title']) # find imdb url link_imdb = entrybody.find('a', text=re.compile(r'imdb', re.IGNORECASE)) if link_imdb: release['imdb_id'] = extract_id(link_imdb['href']) release['imdb_url'] = link_imdb['href'] # find google search url google = entrybody.find('a', href=re.compile(r'google', re.IGNORECASE)) if google: release['url'] = google['href'] releases.append(release) else: log_once( '%s skipped due to missing or unsupported download link' % (release['title']), logger, ) return releases
def lazy_loader(self, entry, language): """Does the lookup for this entry and populates the entry fields.""" lookup = plugin.get('api_tmdb', self).lookup imdb_id = entry.get('imdb_id', eval_lazy=False) or extract_id( entry.get('imdb_url', eval_lazy=False) ) try: with Session() as session: movie = lookup( smart_match=entry['title'], tmdb_id=entry.get('tmdb_id', eval_lazy=False), imdb_id=imdb_id, language=language, session=session, ) entry.update_using_map(self.field_map, movie) except LookupError: log_once('TMDB lookup failed for %s' % entry['title'], log, logging.WARN)
def parse_rlslog(self, rlslog_url, task): """ :param rlslog_url: Url to parse from :param task: Task instance :return: List of release dictionaries """ # BeautifulSoup doesn't seem to work if data is already decoded to unicode :/ soup = get_soup(task.requests.get(rlslog_url, timeout=25).content) releases = [] for entry in soup.find_all('div', attrs={'class': 'entry'}): release = {} h3 = entry.find('h3', attrs={'class': 'entrytitle'}) if not h3: log.debug('FAIL: No h3 entrytitle') continue release['title'] = h3.a.contents[0].strip() entrybody = entry.find('div', attrs={'class': 'entrybody'}) if not entrybody: log.debug('FAIL: No entrybody') continue log.trace('Processing title %s' % (release['title'])) # find imdb url link_imdb = entrybody.find('a', text=re.compile(r'imdb', re.IGNORECASE)) if link_imdb: release['imdb_id'] = extract_id(link_imdb['href']) release['imdb_url'] = link_imdb['href'] # find google search url google = entrybody.find('a', href=re.compile(r'google', re.IGNORECASE)) if google: release['url'] = google['href'] releases.append(release) else: log_once( '%s skipped due to missing or unsupported download link' % (release['title']), log, ) return releases
def lazy_loader(self, entry, language): """Does the lookup for this entry and populates the entry fields.""" lookup = plugin.get('api_tmdb', self).lookup imdb_id = entry.get('imdb_id', eval_lazy=False) or extract_id( entry.get('imdb_url', eval_lazy=False)) try: with Session() as session: movie = lookup( smart_match=entry['title'], tmdb_id=entry.get('tmdb_id', eval_lazy=False), imdb_id=imdb_id, language=language, session=session, ) entry.update_using_map(self.field_map, movie) except LookupError: log_once('TMDB lookup failed for %s' % entry['title'], log, logging.WARN)
def seen_search(manager: Manager, options, session=None): search_term = options.search_term if is_imdb_url(search_term): console('IMDB url detected, parsing ID') imdb_id = extract_id(search_term) if imdb_id: search_term = imdb_id else: console("Could not parse IMDB ID") else: search_term = search_term.replace("%", "\\%").replace("_", "\\_") search_term = search_term.replace("*", "%").replace("?", "_") tasks = None if options.tasks: tasks = [] for task in options.tasks: try: tasks.extend(m for m in manager.matching_tasks(task) if m not in tasks) except ValueError as e: console(e) continue seen_entries = db.search(value=search_term, status=None, tasks=tasks, session=session) table = TerminalTable('Field', 'Value', table_type=options.table_type) for se in seen_entries.all(): table.add_row('Title', se.title) for sf in se.fields: if sf.field.lower() == 'title': continue table.add_row('{}'.format(sf.field.upper()), str(sf.value)) table.add_row('Task', se.task) if se.local: table.add_row('Local', 'Yes') table.add_row('Added', se.added.strftime('%Y-%m-%d %H:%M'), end_section=True) if not table.rows: console('No results found for search') return console(table)
class ImdbLookup: """ Retrieves imdb information for entries. Also provides imdb lookup functionality to all other imdb related plugins. Example:: imdb_lookup: yes """ field_map = { 'imdb_url': 'url', 'imdb_id': lambda movie: extract_id(movie.url), 'imdb_name': 'title', 'imdb_original_name': 'original_title', 'imdb_photo': 'photo', 'imdb_plot_outline': 'plot_outline', 'imdb_score': 'score', 'imdb_votes': 'votes', 'imdb_meta_score': 'meta_score', 'imdb_year': 'year', 'imdb_genres': lambda movie: [genre.name for genre in movie.genres], 'imdb_languages': lambda movie: [lang.language.name for lang in movie.languages], 'imdb_actors': lambda movie: dict( (actor.imdb_id, actor.name) for actor in movie.actors), 'imdb_directors': lambda movie: dict( (director.imdb_id, director.name) for director in movie.directors), 'imdb_writers': lambda movie: dict( (writer.imdb_id, writer.name) for writer in movie.writers), 'imdb_mpaa_rating': 'mpaa_rating', # Generic fields filled by all movie lookup plugins: 'movie_name': 'title', 'movie_year': 'year', } schema = {'type': 'boolean'} @plugin.priority(130) def on_task_metainfo(self, task, config): if not config: return for entry in task.entries: self.register_lazy_fields(entry) def register_lazy_fields(self, entry): entry.register_lazy_func(self.lazy_loader, self.field_map) def lazy_loader(self, entry): """Does the lookup for this entry and populates the entry fields.""" try: self.lookup(entry) except plugin.PluginError as e: log_once(str(e.value).capitalize(), logger=log) @with_session def imdb_id_lookup(self, movie_title=None, movie_year=None, raw_title=None, session=None): """ Perform faster lookup providing just imdb_id. Falls back to using basic lookup if data cannot be found from cache. .. note:: API will be changed, it's dumb to return None on errors AND raise PluginError on some else :param movie_title: Name of the movie :param raw_title: Raw entry title :return: imdb id or None :raises PluginError: Failure reason """ if movie_title: log.debug('imdb_id_lookup: trying with title: %s' % movie_title) query = session.query( db.Movie).filter(db.Movie.title == movie_title) if movie_year is not None: query = query.filter(db.Movie.year == movie_year) movie = query.first() if movie: log.debug('--> success! got %s returning %s' % (movie, movie.imdb_id)) return movie.imdb_id if raw_title: log.debug('imdb_id_lookup: trying cache with: %s' % raw_title) result = (session.query(db.SearchResult).filter( db.SearchResult.title == raw_title).first()) if result: # this title is hopeless, give up .. if result.fails: return None log.debug('--> success! got %s returning %s' % (result, result.imdb_id)) return result.imdb_id if raw_title: # last hope with hacky lookup fake_entry = Entry(raw_title, '') self.lookup(fake_entry) return fake_entry['imdb_id'] @plugin.internet(log) @with_session def lookup(self, entry, search_allowed=True, session=None): """ Perform imdb lookup for entry. :param entry: Entry instance :param search_allowed: Allow fallback to search :raises PluginError: Failure reason """ from flexget.manager import manager if entry.get('imdb_id', eval_lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_id']) elif entry.get('imdb_url', eval_lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_url']) elif entry.get('title', eval_lazy=False): log.debug('lookup for %s' % entry['title']) else: raise plugin.PluginError( 'looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.' ) # if imdb_id is included, build the url. if entry.get('imdb_id', eval_lazy=False) and not entry.get( 'imdb_url', eval_lazy=False): entry['imdb_url'] = make_url(entry['imdb_id']) # make sure imdb url is valid if entry.get('imdb_url', eval_lazy=False): imdb_id = extract_id(entry['imdb_url']) if imdb_id: entry['imdb_url'] = make_url(imdb_id) else: log.debug('imdb url %s is invalid, removing it' % entry['imdb_url']) entry['imdb_url'] = '' # no imdb_url, check if there is cached result for it or if the # search is known to fail if not entry.get('imdb_url', eval_lazy=False): result = (session.query(db.SearchResult).filter( db.SearchResult.title == entry['title']).first()) if result: # TODO: 1.2 this should really be checking task.options.retry if result.fails and not manager.options.execute.retry: # this movie cannot be found, not worth trying again ... log.debug('%s will fail lookup' % entry['title']) raise plugin.PluginError('IMDB lookup failed for %s' % entry['title']) else: if result.url: log.trace('Setting imdb url for %s from db' % entry['title']) entry['imdb_id'] = result.imdb_id entry['imdb_url'] = result.url # no imdb url, but information required, try searching if not entry.get('imdb_url', eval_lazy=False) and search_allowed: log.verbose('Searching from imdb `%s`' % entry['title']) search = ImdbSearch() search_name = entry.get('movie_name', entry['title'], eval_lazy=False) search_result = search.smart_match(search_name) if search_result: entry['imdb_url'] = search_result['url'] # store url for this movie, so we don't have to search on every run result = db.SearchResult(entry['title'], entry['imdb_url']) session.add(result) session.commit() log.verbose('Found %s' % (entry['imdb_url'])) else: log_once( 'IMDB lookup failed for %s' % entry['title'], log, logging.WARN, session=session, ) # store FAIL for this title result = db.SearchResult(entry['title']) result.fails = True session.add(result) session.commit() raise plugin.PluginError('Title `%s` lookup failed' % entry['title']) # check if this imdb page has been parsed & cached movie = session.query( db.Movie).filter(db.Movie.url == entry['imdb_url']).first() # If we have a movie from cache, we are done if movie and not movie.expired: entry.update_using_map(self.field_map, movie) return # Movie was not found in cache, or was expired if movie is not None: if movie.expired: log.verbose('Movie `%s` details expired, refreshing ...' % movie.title) # Remove the old movie, we'll store another one later. session.query(db.MovieLanguage).filter( db.MovieLanguage.movie_id == movie.id).delete() session.query( db.Movie).filter(db.Movie.url == entry['imdb_url']).delete() session.commit() # search and store to cache if 'title' in entry: log.verbose('Parsing imdb for `%s`' % entry['title']) else: log.verbose('Parsing imdb for `%s`' % entry['imdb_id']) try: movie = self._parse_new_movie(entry['imdb_url'], session) except UnicodeDecodeError: log.error( 'Unable to determine encoding for %s. Installing chardet library may help.' % entry['imdb_url']) # store cache so this will not be tried again movie = db.Movie() movie.url = entry['imdb_url'] session.add(movie) session.commit() raise plugin.PluginError('UnicodeDecodeError') except ValueError as e: # TODO: might be a little too broad catch, what was this for anyway? ;P if manager.options.debug: log.exception(e) raise plugin.PluginError( 'Invalid parameter: %s' % entry['imdb_url'], log) for att in [ 'title', 'score', 'votes', 'meta_score', 'year', 'genres', 'languages', 'actors', 'directors', 'writers', 'mpaa_rating', ]: log.trace('movie.%s: %s' % (att, getattr(movie, att))) # Update the entry fields entry.update_using_map(self.field_map, movie) def _parse_new_movie(self, imdb_url, session): """ Get Movie object by parsing imdb page and save movie into the database. :param imdb_url: IMDB url :param session: Session to be used :return: Newly added Movie """ parser = ImdbParser() parser.parse(imdb_url) # store to database movie = db.Movie() movie.photo = parser.photo movie.title = parser.name movie.original_title = parser.original_name movie.score = parser.score movie.votes = parser.votes movie.meta_score = parser.meta_score movie.year = parser.year movie.mpaa_rating = parser.mpaa_rating movie.plot_outline = parser.plot_outline movie.url = imdb_url for name in parser.genres: genre = session.query( db.Genre).filter(db.Genre.name == name).first() if not genre: genre = db.Genre(name) movie.genres.append(genre) # pylint:disable=E1101 for index, name in enumerate(parser.languages): language = session.query( db.Language).filter(db.Language.name == name).first() if not language: language = db.Language(name) movie.languages.append(db.MovieLanguage(language, prominence=index)) for imdb_id, name in parser.actors.items(): actor = session.query( db.Actor).filter(db.Actor.imdb_id == imdb_id).first() if not actor: actor = db.Actor(imdb_id, name) movie.actors.append(actor) # pylint:disable=E1101 for imdb_id, name in parser.directors.items(): director = session.query( db.Director).filter(db.Director.imdb_id == imdb_id).first() if not director: director = db.Director(imdb_id, name) movie.directors.append(director) # pylint:disable=E1101 for imdb_id, name in parser.writers.items(): writer = session.query( db.Writer).filter(db.Writer.imdb_id == imdb_id).first() if not writer: writer = db.Writer(imdb_id, name) movie.writers.append(writer) # pylint:disable=E1101 # so that we can track how long since we've updated the info later movie.updated = datetime.now() session.add(movie) return movie @property def movie_identifier(self): """Returns the plugin main identifier type""" return 'imdb_id'
def lookup(self, entry, search_allowed=True, session=None): """ Perform imdb lookup for entry. :param entry: Entry instance :param search_allowed: Allow fallback to search :raises PluginError: Failure reason """ from flexget.manager import manager if entry.get('imdb_id', eval_lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_id']) elif entry.get('imdb_url', eval_lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_url']) elif entry.get('title', eval_lazy=False): log.debug('lookup for %s' % entry['title']) else: raise plugin.PluginError( 'looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.' ) # if imdb_id is included, build the url. if entry.get('imdb_id', eval_lazy=False) and not entry.get( 'imdb_url', eval_lazy=False): entry['imdb_url'] = make_url(entry['imdb_id']) # make sure imdb url is valid if entry.get('imdb_url', eval_lazy=False): imdb_id = extract_id(entry['imdb_url']) if imdb_id: entry['imdb_url'] = make_url(imdb_id) else: log.debug('imdb url %s is invalid, removing it' % entry['imdb_url']) entry['imdb_url'] = '' # no imdb_url, check if there is cached result for it or if the # search is known to fail if not entry.get('imdb_url', eval_lazy=False): result = (session.query(db.SearchResult).filter( db.SearchResult.title == entry['title']).first()) if result: # TODO: 1.2 this should really be checking task.options.retry if result.fails and not manager.options.execute.retry: # this movie cannot be found, not worth trying again ... log.debug('%s will fail lookup' % entry['title']) raise plugin.PluginError('IMDB lookup failed for %s' % entry['title']) else: if result.url: log.trace('Setting imdb url for %s from db' % entry['title']) entry['imdb_id'] = result.imdb_id entry['imdb_url'] = result.url # no imdb url, but information required, try searching if not entry.get('imdb_url', eval_lazy=False) and search_allowed: log.verbose('Searching from imdb `%s`' % entry['title']) search = ImdbSearch() search_name = entry.get('movie_name', entry['title'], eval_lazy=False) search_result = search.smart_match(search_name) if search_result: entry['imdb_url'] = search_result['url'] # store url for this movie, so we don't have to search on every run result = db.SearchResult(entry['title'], entry['imdb_url']) session.add(result) session.commit() log.verbose('Found %s' % (entry['imdb_url'])) else: log_once( 'IMDB lookup failed for %s' % entry['title'], log, logging.WARN, session=session, ) # store FAIL for this title result = db.SearchResult(entry['title']) result.fails = True session.add(result) session.commit() raise plugin.PluginError('Title `%s` lookup failed' % entry['title']) # check if this imdb page has been parsed & cached movie = session.query( db.Movie).filter(db.Movie.url == entry['imdb_url']).first() # If we have a movie from cache, we are done if movie and not movie.expired: entry.update_using_map(self.field_map, movie) return # Movie was not found in cache, or was expired if movie is not None: if movie.expired: log.verbose('Movie `%s` details expired, refreshing ...' % movie.title) # Remove the old movie, we'll store another one later. session.query(db.MovieLanguage).filter( db.MovieLanguage.movie_id == movie.id).delete() session.query( db.Movie).filter(db.Movie.url == entry['imdb_url']).delete() session.commit() # search and store to cache if 'title' in entry: log.verbose('Parsing imdb for `%s`' % entry['title']) else: log.verbose('Parsing imdb for `%s`' % entry['imdb_id']) try: movie = self._parse_new_movie(entry['imdb_url'], session) except UnicodeDecodeError: log.error( 'Unable to determine encoding for %s. Installing chardet library may help.' % entry['imdb_url']) # store cache so this will not be tried again movie = db.Movie() movie.url = entry['imdb_url'] session.add(movie) session.commit() raise plugin.PluginError('UnicodeDecodeError') except ValueError as e: # TODO: might be a little too broad catch, what was this for anyway? ;P if manager.options.debug: log.exception(e) raise plugin.PluginError( 'Invalid parameter: %s' % entry['imdb_url'], log) for att in [ 'title', 'score', 'votes', 'meta_score', 'year', 'genres', 'languages', 'actors', 'directors', 'writers', 'mpaa_rating', ]: log.trace('movie.%s: %s' % (att, getattr(movie, att))) # Update the entry fields entry.update_using_map(self.field_map, movie)
def imdb_id(self): return extract_id(self.url)
def parse_html_list(self, task, config, url, params, headers): page = self.fetch_page(task, url, params, headers) soup = get_soup(page.text) try: item_text = soup.find( 'div', class_='lister-total-num-results').string.split() total_item_count = int(item_text[0].replace(',', '')) logger.verbose('imdb list contains {} items', total_item_count) except AttributeError: total_item_count = 0 except (ValueError, TypeError) as e: # TODO Something is wrong if we get a ValueError, I think raise plugin.PluginError( 'Received invalid movie count: %s ; %s' % (soup.find( 'div', class_='lister-total-num-results').string, e)) if not total_item_count: logger.verbose('No movies were found in imdb list: {}', config['list']) return entries = [] items_processed = 0 page_no = 1 while items_processed < total_item_count: # Fetch the next page unless we've just begun if items_processed: page_no += 1 params['page'] = page_no page = self.fetch_page(task, url, params, headers) soup = get_soup(page.text) items = soup.find_all('div', class_='lister-item') if not items: logger.debug('no items found on page: {}, aborting.', url) break logger.debug('{} items found on page {}', len(items), page_no) for item in items: items_processed += 1 a = item.find('h3', class_='lister-item-header').find('a') if not a: logger.debug('no title link found for row, skipping') continue link = ('http://www.imdb.com' + a.get('href')).rstrip('/') entry = Entry() entry['title'] = a.text try: year = int( item.find('span', class_='lister-item-year').text) entry['title'] += ' (%s)' % year entry['imdb_year'] = year except (ValueError, TypeError): pass entry['url'] = link entry['imdb_id'] = extract_id(link) entry['imdb_name'] = entry['title'] entries.append(entry) return entries
def lookup(self, entry, search_allowed=True, session=None): """ Perform imdb lookup for entry. :param entry: Entry instance :param search_allowed: Allow fallback to search :raises PluginError: Failure reason """ from flexget.manager import manager if entry.get('imdb_id', eval_lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_id']) elif entry.get('imdb_url', eval_lazy=False): log.debug('No title passed. Lookup for %s' % entry['imdb_url']) elif entry.get('title', eval_lazy=False): log.debug('lookup for %s' % entry['title']) else: raise plugin.PluginError( 'looking up IMDB for entry failed, no title, imdb_url or imdb_id passed.' ) # if imdb_id is included, build the url. if entry.get('imdb_id', eval_lazy=False) and not entry.get('imdb_url', eval_lazy=False): entry['imdb_url'] = make_url(entry['imdb_id']) # make sure imdb url is valid if entry.get('imdb_url', eval_lazy=False): imdb_id = extract_id(entry['imdb_url']) if imdb_id: entry['imdb_url'] = make_url(imdb_id) else: log.debug('imdb url %s is invalid, removing it' % entry['imdb_url']) entry['imdb_url'] = '' # no imdb_url, check if there is cached result for it or if the # search is known to fail if not entry.get('imdb_url', eval_lazy=False): result = ( session.query(db.SearchResult) .filter(db.SearchResult.title == entry['title']) .first() ) if result: # TODO: 1.2 this should really be checking task.options.retry if result.fails and not manager.options.execute.retry: # this movie cannot be found, not worth trying again ... log.debug('%s will fail lookup' % entry['title']) raise plugin.PluginError('IMDB lookup failed for %s' % entry['title']) else: if result.url: log.trace('Setting imdb url for %s from db' % entry['title']) entry['imdb_id'] = result.imdb_id entry['imdb_url'] = result.url # no imdb url, but information required, try searching if not entry.get('imdb_url', eval_lazy=False) and search_allowed: log.verbose('Searching from imdb `%s`' % entry['title']) search = ImdbSearch() search_name = entry.get('movie_name', entry['title'], eval_lazy=False) search_result = search.smart_match(search_name) if search_result: entry['imdb_url'] = search_result['url'] # store url for this movie, so we don't have to search on every run result = db.SearchResult(entry['title'], entry['imdb_url']) session.add(result) session.commit() log.verbose('Found %s' % (entry['imdb_url'])) else: log_once( 'IMDB lookup failed for %s' % entry['title'], log, logging.WARN, session=session, ) # store FAIL for this title result = db.SearchResult(entry['title']) result.fails = True session.add(result) session.commit() raise plugin.PluginError('Title `%s` lookup failed' % entry['title']) # check if this imdb page has been parsed & cached movie = session.query(db.Movie).filter(db.Movie.url == entry['imdb_url']).first() # If we have a movie from cache, we are done if movie and not movie.expired: entry.update_using_map(self.field_map, movie) return # Movie was not found in cache, or was expired if movie is not None: if movie.expired: log.verbose('Movie `%s` details expired, refreshing ...' % movie.title) # Remove the old movie, we'll store another one later. session.query(db.MovieLanguage).filter(db.MovieLanguage.movie_id == movie.id).delete() session.query(db.Movie).filter(db.Movie.url == entry['imdb_url']).delete() session.commit() # search and store to cache if 'title' in entry: log.verbose('Parsing imdb for `%s`' % entry['title']) else: log.verbose('Parsing imdb for `%s`' % entry['imdb_id']) try: movie = self._parse_new_movie(entry['imdb_url'], session) except UnicodeDecodeError: log.error( 'Unable to determine encoding for %s. Installing chardet library may help.' % entry['imdb_url'] ) # store cache so this will not be tried again movie = db.Movie() movie.url = entry['imdb_url'] session.add(movie) session.commit() raise plugin.PluginError('UnicodeDecodeError') except ValueError as e: # TODO: might be a little too broad catch, what was this for anyway? ;P if manager.options.debug: log.exception(e) raise plugin.PluginError('Invalid parameter: %s' % entry['imdb_url'], log) for att in [ 'title', 'score', 'votes', 'meta_score', 'year', 'genres', 'languages', 'actors', 'directors', 'writers', 'mpaa_rating', ]: log.trace('movie.%s: %s' % (att, getattr(movie, att))) # Update the entry fields entry.update_using_map(self.field_map, movie)
def parse_site(self, url, task): """Parse configured url and return releases array""" try: page = task.requests.get(url).content except RequestException as e: raise plugin.PluginError('Error getting input page: %s' % e) soup = get_soup(page) releases = [] for entry in soup.find_all('div', attrs={'class': 'entry'}): release = {} title = entry.find('h2') if not title: log.debug('No h2 entrytitle') continue release['title'] = title.a.contents[0].strip() log.debug('Processing title %s' % (release['title'])) for link in entry.find_all('a'): # no content in the link if not link.contents: continue link_name = link.contents[0] if link_name is None: continue if not isinstance(link_name, NavigableString): continue link_name = link_name.strip().lower() if link.has_attr('href'): link_href = link['href'] else: continue log.debug('found link %s -> %s' % (link_name, link_href)) # handle imdb link if link_name.lower() == 'imdb': log.debug('found imdb link %s' % link_href) release['imdb_id'] = extract_id(link_href) # test if entry with this url would be rewritable by known plugins (ie. downloadable) temp = {} temp['title'] = release['title'] temp['url'] = link_href urlrewriting = plugin.get('urlrewriting', self) if urlrewriting.url_rewritable(task, temp): release['url'] = link_href log.trace('--> accepting %s (resolvable)' % link_href) else: log.trace('<-- ignoring %s (non-resolvable)' % link_href) # reject if no torrent link if 'url' not in release: from flexget.utils.log import log_once log_once( '%s skipped due to missing or unsupported (unresolvable) download link' % (release['title']), log, ) else: releases.append(release) return releases