def on_task_input(self, task, config): if not config: return config = self.prepare_config(config) entries = [] queue_name = config.get('queue_name') with Session() as session: for queue_item in queue_get(session=session, downloaded=False, queue_name=queue_name): entry = Entry() # make sure the entry has IMDB fields filled entry['url'] = '' if queue_item.imdb_id: entry['imdb_id'] = queue_item.imdb_id entry['imdb_url'] = make_imdb_url(queue_item.imdb_id) if queue_item.tmdb_id: entry['tmdb_id'] = queue_item.tmdb_id # check if title is a imdb url (leftovers from old database?) # TODO: maybe this should be fixed at the queue_get ... if 'http://' in queue_item.title: plugin.get_plugin_by_name('tmdb_lookup').instance.lookup(entry) log.debug('queue contains url instead of title') if entry.get('movie_name'): entry['title'] = entry['movie_name'] else: log.error('Found imdb url in imdb queue, but lookup failed: %s' % entry['title']) continue else: # normal title entry['title'] = queue_item.title # Add the year and quality if configured to (make sure not to double it up) if config.get('year') and entry.get('movie_year') \ and str(entry['movie_year']) not in entry['title']: plugin.get_plugin_by_name('tmdb_lookup').instance.lookup(entry) entry['title'] += ' %s' % entry['movie_year'] # TODO: qualities can now be ranges.. how should we handle this? if config.get('quality') and queue_item.quality != 'ANY': log.info('quality option of emit_movie_queue is disabled while we figure out how to handle ranges') # entry['title'] += ' %s' % queue_item.quality entries.append(entry) if entry.get('imdb_id'): log.debug('Added title and IMDB id to new entry: %s - %s', entry['title'], entry['imdb_id']) elif entry.get('tmdb_id'): log.debug('Added title and TMDB id to new entry: %s - %s', entry['title'], entry['tmdb_id']) else: # should this ever happen though? log.debug('Added title to new entry: %s', entry['title']) return entries
def entries_from_lines(self, lines): """ :param lines: list of lines :return list: list of entries generated from lines """ entries = [] for line in lines: entry = Entry(irc_raw_message=line) # Use the message as title entry['title'] = line # find a url... url_match = URL_MATCHER.findall(line) if url_match: # We have a URL(s)!, generate an entry urls = list(url_match) url = urls[-1] entry.update({'urls': urls, 'url': url}) if not entry.get('url'): log.error('Parsing message failed. No url found.') continue entries.append(entry) return entries
def on_task_input(self, task, config): entries = [] with Session() as session: digest_entries = session.query(DigestEntry).filter( DigestEntry.list == config['list']) # Remove any entries older than the expire time, if defined. if isinstance(config.get('expire'), basestring): expire_time = parse_timedelta(config['expire']) digest_entries.filter( DigestEntry.added < datetime.now() - expire_time).delete() for index, digest_entry in enumerate( digest_entries.order_by(DigestEntry.added.desc()).all()): # Just remove any entries past the limit, if set. if 0 < config.get('limit', -1) <= index: session.delete(digest_entry) continue entry = Entry(digest_entry.entry) if config.get('restore_state') and entry.get('digest_state'): # Not sure this is the best way, but we don't want hooks running on this task # (like backlog hooking entry.fail) entry._state = entry['digest_state'] entries.append(entry) # If expire is 'True', we remove it after it is output once. if config.get('expire', True) is True: session.delete(digest_entry) return entries
def __init__(self, path: str, fields: list, encoding: str): self.filename = path self.fields = fields self.encoding = encoding self.entries = [] try: content = open(self.filename, encoding=self.encoding) except FileNotFoundError as exc: entries = [] pass else: try: # TODO: use the load from our serialization system if that goes in entries = load_yaml(content) except Exception as exc: raise PluginError(f'Error opening yaml file `{self.filename}`: {exc}') if not entries: return if isinstance(entries, list): for entry in entries: if isinstance(entry, dict): entry = Entry(**entry) else: raise PluginError(f'Elements of `{self.filename}` must be dictionaries') if not entry.get('url'): entry['url'] = f'mock://localhost/entry_list/{random.random()}' self.entries.append(entry) else: raise PluginError(f'List `{self.filename}` must be a yaml list')
def lookup_movie(title, session, identifiers=None): try: imdb_lookup = plugin.get('imdb_lookup', 'movie_list').lookup except DependencyError: imdb_lookup = None try: tmdb_lookup = plugin.get('tmdb_lookup', 'movie_list').lookup except DependencyError: tmdb_lookup = None if not (imdb_lookup or tmdb_lookup): return entry = Entry(title=title) if identifiers: for identifier in identifiers: for key, value in identifier.items(): entry[key] = value try: imdb_lookup(entry, session=session) # IMDB lookup raises PluginError instead of the normal ValueError except PluginError: tmdb_lookup(entry) # Return only if lookup was successful if entry.get('movie_name'): return entry return
def entries_from_lines(self, lines): """ :param lines: list of lines :return list: list of entries generated from lines """ entries = [] for line in lines: entry = Entry(irc_raw_message=line) # Use the message as title entry['title'] = line # find a url... url_match = URL_MATCHER.findall(line) if url_match: # We have a URL(s)!, generate an entry urls = list(url_match) url = urls[-1] entry.update({'urls': urls, 'url': url}) if not entry.get('url'): logger.error('Parsing message failed. No url found.') continue entries.append(entry) return entries
def on_task_input(self, task): if not task.manager.options.inject: return options = self.parse_arguments(task.manager.options.inject) # disable other inputs log.info('Disabling the rest of the input phase.') task.disable_phase('input') # create our injected entry entry = Entry(options['entry'], injected=True) if not 'url' in entry: entry['url'] = 'http://localhost/inject/%s' % ''.join([ random.choice(string.letters + string.digits) for x in range(1, 30) ]) if entry.get('immortal'): log.debug('Injected entry is immortal') task.all_entries.append(entry) if options.get('accept', False): log.debug('accepting the injection') entry.accept('--inject accepted')
def on_task_input(self, task, config): if not config: return config = self.prepare_config(config) entries = [] with Session() as session: for queue_item in queue_get(session=session, downloaded=False): entry = Entry() # make sure the entry has IMDB fields filled entry["url"] = "" if queue_item.imdb_id: entry["imdb_id"] = queue_item.imdb_id entry["imdb_url"] = make_imdb_url(queue_item.imdb_id) if queue_item.tmdb_id: entry["tmdb_id"] = queue_item.tmdb_id plugin.get_plugin_by_name("tmdb_lookup").instance.lookup(entry) # check if title is a imdb url (leftovers from old database?) # TODO: maybe this should be fixed at the queue_get ... if "http://" in queue_item.title: log.debug("queue contains url instead of title") if entry.get("movie_name"): entry["title"] = entry["movie_name"] else: log.error("Found imdb url in imdb queue, but lookup failed: %s" % entry["title"]) continue else: # normal title entry["title"] = queue_item.title # Add the year and quality if configured to (make sure not to double it up) if ( config.get("year") and entry.get("movie_year") and unicode(entry["movie_year"]) not in entry["title"] ): entry["title"] += " %s" % entry["movie_year"] # TODO: qualities can now be ranges.. how should we handle this? if config.get("quality") and queue_item.quality != "ANY": log.info("quality option of emit_movie_queue is disabled while we figure out how to handle ranges") # entry['title'] += ' %s' % queue_item.quality entries.append(entry) log.debug("Added title and IMDB id to new entry: %s - %s" % (entry["title"], entry["imdb_id"])) return entries
def on_task_input(self, task, config): if not config: return config = self.prepare_config(config) entries = [] for queue_item in queue_get(): entry = Entry() # make sure the entry has IMDB fields filled entry['url'] = '' if queue_item.imdb_id: entry[ 'imdb_url'] = 'http://www.imdb.com/title/' + queue_item.imdb_id entry['imdb_id'] = queue_item.imdb_id if queue_item.tmdb_id: entry['tmdb_id'] = queue_item.tmdb_id get_plugin_by_name('tmdb_lookup').instance.lookup(entry) # check if title is a imdb url (leftovers from old database?) # TODO: maybe this should be fixed at the queue_get ... if 'http://' in queue_item.title: log.debug('queue contains url instead of title') if entry.get('movie_name'): entry['title'] = entry['movie_name'] else: log.error( 'Found imdb url in imdb queue, but lookup failed: %s' % entry['title']) continue else: # normal title entry['title'] = queue_item.title # Add the year and quality if configured to if config.get('year') and entry.get('movie_year'): entry['title'] += ' %s' % entry['movie_year'] # TODO: qualities can now be ranges.. how should we handle this? #if config.get('quality') and queue_item.quality != 'ANY': # entry['title'] += ' %s' % queue_item.quality entries.append(entry) log.debug('Added title and IMDB id to new entry: %s - %s' % (entry['title'], entry['imdb_id'])) return entries
def parse_what(what, lookup=True, session=None): """ Determines what information was provided by the search string `what`. If `lookup` is true, will fill in other information from tmdb. :param what: Can be one of: <Movie Title>: Search based on title imdb_id=<IMDB id>: search based on imdb id tmdb_id=<TMDB id>: search based on tmdb id :param bool lookup: Whether missing info should be filled in from tmdb. :param session: An existing session that will be used for lookups if provided. :rtype: dict :return: A dictionary with 'title', 'imdb_id' and 'tmdb_id' keys """ tmdb_lookup = plugin.get_plugin_by_name('api_tmdb').instance.lookup result = {'title': None, 'imdb_id': None, 'tmdb_id': None} result['imdb_id'] = extract_id(what) if not result['imdb_id']: if isinstance(what, int): result['tmdb_id'] = what elif what.startswith('tmdb_id='): result['tmdb_id'] = what[8:] else: result['title'] = what if not lookup: # If not doing an online lookup we can return here return result search_entry = Entry(title=result['title'] or '') for field in ['imdb_id', 'tmdb_id']: if result.get(field): search_entry[field] = result[field] # Put lazy lookup fields on the search entry plugin.get_plugin_by_name('imdb_lookup').instance.register_lazy_fields(search_entry) plugin.get_plugin_by_name('tmdb_lookup').instance.lookup(search_entry) try: # Both ids are optional, but if movie_name was populated at least one of them will be there return {'title': search_entry['movie_name'], 'imdb_id': search_entry.get('imdb_id'), 'tmdb_id': search_entry.get('tmdb_id')} except KeyError as e: raise QueueError(e.message)
def on_task_input(self, task, config): config = get_config(config) log.debug('Starting MyAnimeList plugin') # Retrieve username and remove invalid characters username = safe_username(config['username']) status = config.get('list', 'watching') url = self.API_URL % username log.verbose('Retrieving MyAnimeList on %s.', url) headers = {'User-Agent': config.get('user-agent', self.user_agent)} log.debug('Using %s', headers) resp = task.requests.get(url, headers=headers) if not resp or resp.status_code != 200: log.warning('No data returned from MyAnimeList.') return content_type = resp.headers.get('content-type') if content_type == 'application/xml; charset=UTF-8': data = parse_xml(resp.text.encode('utf-8')) log.debug('Parsed xml to list of dicts') else: log.warning('Content type not xml: %s' % content_type) data = '' if not isinstance(data, list): raise plugin.PluginError('Incompatible response: %r.' % data) entries = [] for item in data: if item['my_status'] == maps['my_status'][status]: entry = Entry() entry.update_using_map(anime_map, item, ignore_none=True) names = item['series_synonyms'] if names and ';' in names: log.debug('Parsing series_synonyms: %s', names) names = [n.strip() for n in names.split(';')] names = [ n for n in names if n and n != item['series_title'] ] if names: entry['configure_series_alternate_name'] = names log.debug('Added alternate names: %r', names) if entry.isvalid(): entries.append(entry) log.debug('Appended entry: %s', entry.get('title')) else: log.debug('Invalid entry? %s', entry) log.debug('Returning %s entries', len(entries)) return entries
def on_task_input(self, task, config): if not config: return config = self.prepare_config(config) entries = [] for queue_item in queue_get(): entry = Entry() # make sure the entry has IMDB fields filled entry['url'] = '' if queue_item.imdb_id: entry['imdb_url'] = 'http://www.imdb.com/title/' + queue_item.imdb_id entry['imdb_id'] = queue_item.imdb_id if queue_item.tmdb_id: entry['tmdb_id'] = queue_item.tmdb_id get_plugin_by_name('tmdb_lookup').instance.lookup(entry) # check if title is a imdb url (leftovers from old database?) # TODO: maybe this should be fixed at the queue_get ... if 'http://' in queue_item.title: log.debug('queue contains url instead of title') if entry.get('movie_name'): entry['title'] = entry['movie_name'] else: log.error('Found imdb url in imdb queue, but lookup failed: %s' % entry['title']) continue else: # normal title entry['title'] = queue_item.title # Add the year and quality if configured to if config.get('year') and entry.get('movie_year'): entry['title'] += ' %s' % entry['movie_year'] # TODO: qualities can now be ranges.. how should we handle this? #if config.get('quality') and queue_item.quality != 'ANY': # entry['title'] += ' %s' % queue_item.quality entries.append(entry) log.debug('Added title and IMDB id to new entry: %s - %s' % (entry['title'], entry['imdb_id'])) return entries
def on_task_input(self, task, config): config = get_config(config) log.debug('Starting MyAnimeList plugin') # Retrieve username and remove invalid characters username = safe_username(config['username']) status = config.get('list', 'watching') url = self.API_URL % username log.verbose('Retrieving MyAnimeList on %s.', url) headers = {'User-Agent': config.get('user-agent', self.user_agent)} log.debug('Using %s', headers) resp = task.requests.get(url, headers=headers) if not resp or resp.status_code != 200: log.warning('No data returned from MyAnimeList.') return content_type = resp.headers.get('content-type') if content_type == 'application/xml; charset=UTF-8': data = parse_xml(resp.text.encode('utf-8')) log.debug('Parsed xml to list of dicts') else: log.warning('Content type not xml: %s' % content_type) data = '' if not isinstance(data, list): raise plugin.PluginError('Incompatible response: %r.' % data) entries = [] for item in data: if item['my_status'] == maps['my_status'][status]: entry = Entry() entry.update_using_map(anime_map, item, ignore_none=True) names = item['series_synonyms'] if names and ';' in names: log.debug('Parsing series_synonyms: %s', names) names = [n.strip() for n in names.split(';')] names = [n for n in names if n and n != item['series_title']] if names: entry['configure_series_alternate_name'] = names log.debug('Added alternate names: %r', names) if entry.isvalid(): entries.append(entry) log.debug('Appended entry: %s', entry.get('title')) else: log.debug('Invalid entry? %s', entry) log.debug('Returning %s entries', len(entries)) return entries
def on_feed_input(self, feed, config): if not config: return config = self.prepare_config(config) entries = [] imdb_entries = queue_get() for imdb_entry in imdb_entries: entry = Entry() # make sure the entry has IMDB fields filled entry['url'] = '' entry['imaginary'] = True entry['imdb_url'] = 'http://www.imdb.com/title/' + imdb_entry.imdb_id entry['imdb_id'] = imdb_entry.imdb_id get_plugin_by_name('tmdb_lookup').instance.lookup(entry) # check if title is a imdb url (leftovers from old database?) # TODO: maybe this should be fixed at the queue_get ... if 'http://' in imdb_entry.title: log.debug('queue contains url instead of title') if entry.get('movie_name'): entry['title'] = entry['movie_name'] else: log.error('Found imdb url in imdb queue, but lookup failed: %s' % entry['title']) continue else: # normal title entry['title'] = imdb_entry.title # Add the year and quality if configured to if config.get('year') and entry.get('movie_year'): entry['title'] += ' %s' % entry['movie_year'] if config.get('quality') and imdb_entry.quality != 'ANY': entry['title'] += ' %s' % imdb_entry.quality entries.append(entry) log.debug('Added title and IMDB id to new entry: %s - %s' % (entry['title'], entry['imdb_id'])) return entries
def lookup_movie(title, session, identifiers=None): entry = Entry(title=title) if identifiers: for identifier in identifiers: for key, value in identifier.items(): entry[key] = value try: imdb_lookup(entry, session=session) except PluginError: tmdb_lookup(entry) if entry.get('movie_name'): return entry
def on_feed_input(self, feed, config): if not config: return config = self.prepare_config(config) entries = [] imdb_entries = queue_get() for imdb_entry in imdb_entries: entry = Entry() # make sure the entry has IMDB fields filled entry["url"] = "" entry["imdb_url"] = "http://www.imdb.com/title/" + imdb_entry.imdb_id entry["imdb_id"] = imdb_entry.imdb_id get_plugin_by_name("tmdb_lookup").instance.lookup(entry) # check if title is a imdb url (leftovers from old database?) # TODO: maybe this should be fixed at the queue_get ... if "http://" in imdb_entry.title: log.debug("queue contains url instead of title") if entry.get("movie_name"): entry["title"] = entry["movie_name"] else: log.error("Found imdb url in imdb queue, but lookup failed: %s" % entry["title"]) continue else: # normal title entry["title"] = imdb_entry.title # Add the year and quality if configured to if config.get("year") and entry.get("movie_year"): entry["title"] += " %s" % entry["movie_year"] if config.get("quality") and imdb_entry.quality != "ANY": entry["title"] += " %s" % imdb_entry.quality entries.append(entry) log.debug("Added title and IMDB id to new entry: %s - %s" % (entry["title"], entry["imdb_id"])) return entries
def _get_sftp_config(cls, entry: Entry): """ Parses a url and returns a hashable config, source path, and destination path """ # parse url parsed = urlparse(entry['url']) host: str = parsed.hostname username: str = parsed.username password: str = parsed.password port: int = parsed.port or DEFAULT_SFTP_PORT # get private key info if it exists private_key: str = entry.get('private_key') private_key_pass: str = entry.get('private_key_pass') config: Optional[SftpConfig] = None if parsed.scheme == 'sftp': config = SftpConfig(host, port, username, password, private_key, private_key_pass) else: logger.warning('Scheme does not match SFTP: {}', entry['url']) return config
def on_task_input(self, task, config): config = get_config(config) log.debug('Starting MyAnimeList plugin') # Retrieve username and remove invalid characters username = safe_username(config['username']) status = config.get('list', 'watching') url = self.API_URL % username log.verbose('Retrieving MyAnimeList on %s.', url) headers = {'User-Agent': config.get('user-agent', self.user_agent)} log.debug('Using %s', headers) resp = task.requests.get(url, headers=headers) if not resp or resp.status_code != 200: log.warning('No data returned from MyAnimeList.') return content_type = resp.headers.get('content-type') if content_type == 'application/xml; charset=UTF-8': data = parse_xml(resp.text.encode('utf-8')) log.debug('Parsed xml to list of dicts') else: log.warning('Content type not xml: %s' % content_type) data = '' if not isinstance(data, list): raise PluginError('Incompatible items in response: %r.' % data) entries = [] for item in data: if item['my_status'] == self.watched_map.get(status): entry = Entry() entry.update_using_map(self.anime_map, item, ignore_none=True) mal_url = 'http://myanimelist.net/anime/%s' % entry['mal_id'] entry['url'] = mal_url entry['mal_url'] = mal_url entry['mal_type'] = self.type_map.get(entry['mal_type']) entry['mal_my_status'] = self.inv_watched_map.get(entry['mal_my_status']) entry['mal_status'] = self.status_map.get(entry['mal_status']) entries.append(entry) log.debug('Appended entry: %s', entry.get('title')) log.debug('Returning %s entries', len(entries)) return entries
def on_task_input(self, task): if not task.manager.options.inject: return options = self.parse_arguments(task.manager.options.inject) # disable other inputs log.info('Disabling the rest of the input phase.') task.disable_phase('input') # create our injected entry entry = Entry(options['entry'], injected=True) if not 'url' in entry: entry['url'] = 'http://localhost/inject/%s' % ''.join([random.choice(string.letters + string.digits) for x in range(1, 30)]) if entry.get('immortal'): log.debug('Injected entry is immortal') task.all_entries.append(entry) if options.get('accept', False): log.debug('accepting the injection') task.accept(entry, '--inject accepted')
def on_task_input(self, task, config): entries = [] with Session() as session: digest_entries = session.query(DigestEntry).filter(DigestEntry.list == config['list']) # Remove any entries older than the expire time, if defined. if isinstance(config.get('expire'), basestring): expire_time = parse_timedelta(config['expire']) digest_entries.filter(DigestEntry.added < datetime.now() - expire_time).delete() for index, digest_entry in enumerate(digest_entries.order_by(DigestEntry.added.desc()).all()): # Just remove any entries past the limit, if set. if 0 < config.get('limit', -1) <= index: session.delete(digest_entry) continue entry = Entry(digest_entry.entry) if config.get('restore_state') and entry.get('digest_state'): # Not sure this is the best way, but we don't want hooks running on this task # (like backlog hooking entry.fail) entry._state = entry['digest_state'] entries.append(entry) # If expire is 'True', we remove it after it is output once. if config.get('expire', True) is True: session.delete(digest_entry) return entries
def on_feed_input(self, feed): if not InputInject.options: return # disable other inputs log.info('Disabling the rest of the input phase.') feed.disable_phase('input') # create our injected entry import string import random entry = Entry(InputInject.options['entry'], injected=True) if not 'url' in entry: entry['url'] = 'http://localhost/inject/%s' % ''.join([random.choice(string.letters + string.digits) for x in range(1, 30)]) if entry.get('immortal'): log.debug('Injected entry is immortal') feed.entries.append(entry) if InputInject.options.get('accept', False): log.debug('accepting the injection') feed.accept(entry, '--inject accepted')
def items(self): if self._items is None: if self.config['list'] in ['collection', 'watched'] and self.config['type'] == 'auto': raise plugin.PluginError('`type` cannot be `auto` for %s list.' % self.config['list']) endpoint = self.get_list_endpoint() log.verbose('Retrieving `%s` list `%s`', self.config['type'], self.config['list']) try: result = self.session.get(get_api_url(endpoint)) try: data = result.json() except ValueError: log.debug('Could not decode json from response: %s', result.text) raise plugin.PluginError('Error getting list from trakt.') except RequestException as e: raise plugin.PluginError('Could not retrieve list from trakt (%s)' % e) if not data: log.warning('No data returned from trakt for %s list %s.', self.config['type'], self.config['list']) return [] entries = [] list_type = (self.config['type']).rstrip('s') for item in data: if self.config['type'] == 'auto': list_type = item['type'] # Collection and watched lists don't return 'type' along with the items (right now) if 'type' in item and item['type'] != list_type: log.debug('Skipping %s because it is not a %s', item[item['type']].get('title', 'unknown'), list_type) continue if list_type != 'episode' and not item[list_type]['title']: # Skip shows/movies with no title log.warning('Item in trakt list does not appear to have a title, skipping.') continue entry = Entry() if list_type == 'episode': entry['url'] = 'https://trakt.tv/shows/%s/seasons/%s/episodes/%s' % ( item['show']['ids']['slug'], item['episode']['season'], item['episode']['number']) else: entry['url'] = 'https://trakt.tv/%ss/%s' % (list_type, item[list_type]['ids'].get('slug')) entry.update_using_map(field_maps[list_type], item) # get movie name translation language = self.config.get('language') if list_type == 'movie' and language: endpoint = ['movies', entry['trakt_movie_id'], 'translations', language] try: result = self.session.get(get_api_url(endpoint)) try: translation = result.json() except ValueError: raise plugin.PluginError('Error decoding movie translation from trakt: %s.' % result.text) except RequestException as e: raise plugin.PluginError('Could not retrieve movie translation from trakt: %s' % str(e)) if not translation: log.warning('No translation data returned from trakt for movie %s.', entry['title']) else: log.verbose('Found `%s` translation for movie `%s`: %s', language, entry['movie_name'], translation[0]['title']) entry['title'] = translation[0]['title'] if entry.get('movie_year'): entry['title'] += ' (' + str(entry['movie_year']) + ')' entry['movie_name'] = translation[0]['title'] # Override the title if strip_dates is on. TODO: a better way? if self.config.get('strip_dates'): if list_type in ['show', 'movie']: entry['title'] = item[list_type]['title'] elif list_type == 'episode': entry['title'] = '{show[title]} S{episode[season]:02}E{episode[number]:02}'.format(**item) if item['episode']['title']: entry['title'] += ' {episode[title]}'.format(**item) if entry.isvalid(): if self.config.get('strip_dates'): # Remove year from end of name if present entry['title'] = split_title_year(entry['title'])[0] entries.append(entry) else: log.debug('Invalid entry created? %s', entry) self._items = entries return self._items
def on_task_input(self, task, config): """Creates an entry for each item in your uoccin watchlist. Example:: uoccin_emit: path: /path/to/gdrive/uoccin type: series tags: [ 'favorite', 'hires' ] check_tags: all Options path and type are required while the others are for filtering: - 'any' will include all the items marked with one or more tags in the list - 'all' will only include the items marked with all the listed tags - 'none' will only include the items not marked with any of the listed tags. The entries created will have a valid imdb/tvdb url and id. """ imdb_lookup = plugin.get_plugin_by_name('imdb_lookup').instance udata = load_uoccin_data(config['path']) section = udata['movies'] if config['type'] == 'movies' else udata[ 'series'] entries = [] for eid, itm in list(section.items()): if not itm['watchlist']: continue if 'tags' in config: n = len(set(config['tags']) & set(itm.get('tags', []))) if config['check_tags'] == 'any' and n <= 0: continue if config['check_tags'] == 'all' and n != len(config['tags']): continue if config['check_tags'] == 'none' and n > 0: continue if config['type'] == 'movies': entry = Entry() entry['url'] = 'http://www.imdb.com/title/' + eid entry['imdb_id'] = eid if itm['name'] != 'N/A': entry['title'] = itm['name'] else: try: imdb_lookup.lookup(entry) except plugin.PluginError as e: self.log.trace('entry %s imdb failed (%s)' % (entry['imdb_id'], e.value)) continue entry['title'] = entry.get('imdb_name') if 'tags' in itm: entry['uoccin_tags'] = itm['tags'] if entry.isvalid(): entries.append(entry) else: self.log.debug('Invalid entry created? %s' % entry) else: sname = itm['name'] try: sname = lookup_series(tvdb_id=eid).name except LookupError: self.log.warning( 'Unable to lookup series %s from tvdb, using raw name.' % eid) surl = 'http://thetvdb.com/?tab=series&id=' + eid if config['type'] == 'series': entry = Entry() entry['url'] = surl entry['title'] = sname entry['tvdb_id'] = eid if 'tags' in itm: entry['uoccin_tags'] = itm['tags'] if entry.isvalid(): entries.append(entry) else: self.log.debug('Invalid entry created? %s' % entry) elif config['ep_flags'] == 'collected': slist = itm.get('collected', {}) for sno in list(slist.keys()): for eno in slist[sno]: entry = Entry() entry['url'] = surl entry['title'] = '%s S%02dE%02d' % ( sname, int(sno), int(eno)) entry['tvdb_id'] = eid if entry.isvalid(): entries.append(entry) else: self.log.debug('Invalid entry created? %s' % entry) else: slist = itm.get('watched', {}) for sno in list(slist.keys()): for eno in slist[sno]: entry = Entry() entry['url'] = surl entry['title'] = '%s S%02dE%02d' % (sname, int(sno), eno) entry['tvdb_id'] = eid if entry.isvalid(): entries.append(entry) else: self.log.debug('Invalid entry created? %s' % entry) entries.sort(key=lambda x: x['title']) return entries
def process(self): imdb_lookup = plugin.get_plugin_by_name('imdb_lookup').instance self.changes.sort() udata = load_uoccin_data(self.folder) for line in self.changes: tmp = line.split('|') typ = tmp[1] tid = tmp[2] fld = tmp[3] val = tmp[4] self.log.verbose('processing: type=%s, target=%s, field=%s, value=%s' % (typ, tid, fld, val)) if typ == 'movie': # default mov = udata['movies'].setdefault(tid, {'name':'N/A', 'watchlist':False, 'collected':False, 'watched':False}) # movie title is unknown at this time fake = Entry() fake['url'] = 'http://www.imdb.com/title/' + tid fake['imdb_id'] = tid try: imdb_lookup.lookup(fake) mov['name'] = fake.get('imdb_name') except plugin.PluginError: self.log.warning('Unable to lookup movie %s from imdb, using raw name.' % tid) # setting if fld == 'watchlist': mov['watchlist'] = val == 'true' elif fld == 'collected': mov['collected'] = val == 'true' elif fld == 'watched': mov['watched'] = val == 'true' elif fld == 'tags': mov['tags'] = re.split(',\s*', val) elif fld == 'subtitles': mov['subtitles'] = re.split(',\s*', val) elif fld == 'rating': mov['rating'] = int(val) # cleaning if not (mov['watchlist'] or mov['collected'] or mov['watched']): self.log.verbose('deleting unused section: movies\%s' % tid) udata['movies'].pop(tid) elif typ == 'series': tmp = tid.split('.') sid = tmp[0] sno = tmp[1] if len(tmp) > 2 else None eno = tmp[2] if len(tmp) > 2 else None # default ser = udata['series'].setdefault(sid, {'name':'N/A', 'watchlist':False, 'collected':{}, 'watched':{}}) # series name is unknown at this time try: series = lookup_series(tvdb_id=sid) ser['name'] = series.seriesname except LookupError: self.log.warning('Unable to lookup series %s from tvdb, using raw name.' % sid) # setting if fld == 'watchlist': ser['watchlist'] = val == 'true' elif fld == 'tags': ser['tags'] = re.split(',\s*', val) elif fld == 'rating': ser['rating'] = int(val) elif sno is None or eno is None: self.log.warning('invalid line "%s": season and episode numbers are required' % line) elif fld == 'collected': season = ser['collected'].setdefault(sno, {}) if val == 'true': season.setdefault(eno, []) else: if eno in season: season.pop(eno) if not season: self.log.verbose('deleting unused section: series\%s\collected\%s' % (sid, sno)) ser['collected'].pop(sno) elif fld == 'subtitles': ser['collected'].setdefault(sno, {})[eno] = re.split(',\s*', val) elif fld == 'watched': season = ser['watched'].setdefault(sno, []) if val == 'true': season = ser['watched'][sno] = list(set(season) | set([int(eno)])) elif int(eno) in season: season.remove(int(eno)) season.sort() if not season: self.log.debug('deleting unused section: series\%s\watched\%s' % (sid, sno)) ser['watched'].pop(sno) # cleaning if not (ser['watchlist'] or ser['collected'] or ser['watched']): self.log.debug('deleting unused section: series\%s' % sid) udata['series'].pop(sid) else: self.log.warning('invalid element type "%s"' % typ) # save the updated uoccin.json ufile = os.path.join(self.folder, 'uoccin.json') try: text = json.dumps(udata, sort_keys=True, indent=4, separators=(',', ': ')) with open(ufile, 'w') as f: f.write(text) except Exception as err: self.log.debug('error writing %s: %s' % (ufile, err)) raise plugin.PluginError('error writing %s: %s' % (ufile, err))
def on_task_input(self, task, config): config = self.prepare_config(config) passkeys = config.get('passkeys') limit = config.get('limit') show_detail = config.get('show_detail') torrent_dict, torrents_hashes = self.get_torrents_data(task, config) if not torrents_hashes: return torrents_hashes try: data = {'sign': config['iyuu'], 'version': config['version']} sites_response = task.requests.get( 'http://api.iyuu.cn/index.php?s=App.Api.Sites', timeout=60, params=data).json() if sites_response.get('ret') != 200: raise plugin.PluginError( 'http://api.iyuu.cn/index.php?s=App.Api.Sites: {}'.format( sites_response)) sites_json = self.modify_sites(sites_response['data']['sites']) reseed_response = task.requests.post( 'http://api.iyuu.cn/index.php?s=App.Api.Infohash', json=torrents_hashes, timeout=60).json() if reseed_response.get('ret') != 200: raise plugin.PluginError( 'http://api.iyuu.cn/index.php?s=App.Api.Infohash Error: {}' .format(reseed_response)) reseed_json = reseed_response['data'] except (RequestException, JSONDecodeError) as e: raise plugin.PluginError( 'Error when trying to send request to iyuu: {}'.format(e)) entries = [] site_limit = {} if sites_json and reseed_json: for info_hash, seeds_data in reseed_json.items(): client_torrent = torrent_dict[info_hash] for torrent in seeds_data['torrent']: site = sites_json.get(str(torrent['sid'])) if not site: continue if torrent['info_hash'] in torrent_dict.keys(): continue site_name = self._get_site_name(site['base_url']) passkey = passkeys.get(site_name) if not passkey: if show_detail: logger.info( 'no passkey, skip site: {}, title: {}'.format( site_name, client_torrent['title'])) continue if not site_limit.get(site_name): site_limit[site_name] = 1 else: if site_limit[site_name] >= limit: logger.info( 'site_limit:{} >= limit: {}, skip site: {}, title: {}' .format(site_limit[site_name], limit, site_name, client_torrent['title'])) continue site_limit[site_name] = site_limit[site_name] + 1 torrent_id = str(torrent['torrent_id']) entry = Entry(title=client_torrent['title'], torrent_info_hash=torrent['info_hash']) entry['autoTMM'] = client_torrent['qbittorrent_auto_tmm'] entry['category'] = client_torrent['qbittorrent_category'] entry['savepath'] = client_torrent['qbittorrent_save_path'] entry['paused'] = 'true' entry['class_name'] = site_name Executor.build_reseed(entry, config, site, passkey, torrent_id) if show_detail: logger.info( f"accept site: {site_name}, title: {client_torrent['title']}, url: {entry.get('url', None)}" ) if entry.get('url'): entries.append(entry) return entries
def on_task_input(self, task, config): config = self.build_config(config) log.debug("Requesting task `%s` url `%s`", task.name, config["url"]) # Used to identify which etag/modified to use url_hash = str(hash(config["url"])) # set etag and last modified headers if config has not changed since # last run and if caching wasn't disabled with --no-cache argument. all_entries = ( config["all_entries"] or task.config_modified or task.manager.options.nocache or task.manager.options.retry ) headers = {} if not all_entries: etag = task.simple_persistence.get("%s_etag" % url_hash, None) if etag: log.debug("Sending etag %s for task %s", etag, task.name) headers["If-None-Match"] = etag modified = task.simple_persistence.get("%s_modified" % url_hash, None) if modified: if not isinstance(modified, basestring): log.debug("Invalid date was stored for last modified time.") else: headers["If-Modified-Since"] = modified log.debug("Sending last-modified %s for task %s", headers["If-Modified-Since"], task.name) # Get the feed content if config["url"].startswith(("http", "https", "ftp", "file")): # Get feed using requests library auth = None if "username" in config and "password" in config: auth = (config["username"], config["password"]) try: # Use the raw response so feedparser can read the headers and status values response = task.requests.get(config["url"], timeout=60, headers=headers, raise_status=False, auth=auth) content = response.content except RequestException as e: raise PluginError("Unable to download the RSS for task %s (%s): %s" % (task.name, config["url"], e)) if config.get("ascii"): # convert content to ascii (cleanup), can also help with parsing problems on malformed feeds content = response.text.encode("ascii", "ignore") # status checks status = response.status_code if status == 304: log.verbose("%s hasn't changed since last run. Not creating entries.", config["url"]) # Let details plugin know that it is ok if this feed doesn't produce any entries task.no_entries_ok = True return [] elif status == 401: raise PluginError( "Authentication needed for task %s (%s): %s" % (task.name, config["url"], response.headers["www-authenticate"]), log, ) elif status == 404: raise PluginError("RSS Feed %s (%s) not found" % (task.name, config["url"]), log) elif status == 500: raise PluginError("Internal server exception on task %s (%s)" % (task.name, config["url"]), log) elif status != 200: raise PluginError("HTTP error %s received from %s" % (status, config["url"]), log) # update etag and last modified if not config["all_entries"]: etag = response.headers.get("etag") if etag: task.simple_persistence["%s_etag" % url_hash] = etag log.debug("etag %s saved for task %s", etag, task.name) if response.headers.get("last-modified"): modified = response.headers["last-modified"] task.simple_persistence["%s_modified" % url_hash] = modified log.debug("last modified %s saved for task %s", modified, task.name) else: # This is a file, open it with open(config["url"], "rb") as f: content = f.read() if config.get("ascii"): # Just assuming utf-8 file in this case content = content.decode("utf-8", "ignore").encode("ascii", "ignore") if not content: log.error("No data recieved for rss feed.") return try: rss = feedparser.parse(content) except LookupError as e: raise PluginError("Unable to parse the RSS (from %s): %s" % (config["url"], e)) # check for bozo ex = rss.get("bozo_exception", False) if ex or rss.get("bozo"): if rss.entries: msg = "Bozo error %s while parsing feed, but entries were produced, ignoring the error." % type(ex) if config.get("silent", False): log.debug(msg) else: log.verbose(msg) else: if isinstance(ex, feedparser.NonXMLContentType): # see: http://www.feedparser.org/docs/character-encoding.html#advanced.encoding.nonxml log.debug("ignoring feedparser.NonXMLContentType") elif isinstance(ex, feedparser.CharacterEncodingOverride): # see: ticket 88 log.debug("ignoring feedparser.CharacterEncodingOverride") elif isinstance(ex, UnicodeEncodeError): raise PluginError("Feed has UnicodeEncodeError while parsing...") elif isinstance(ex, (xml.sax._exceptions.SAXParseException, xml.sax._exceptions.SAXException)): # save invalid data for review, this is a bit ugly but users seem to really confused when # html pages (login pages) are received self.process_invalid_content(task, content, config["url"]) if task.manager.options.debug: log.exception(ex) raise PluginError("Received invalid RSS content from task %s (%s)" % (task.name, config["url"])) elif isinstance(ex, httplib.BadStatusLine) or isinstance(ex, IOError): raise ex # let the @internet decorator handle else: # all other bozo errors self.process_invalid_content(task, content, config["url"]) raise PluginError( "Unhandled bozo_exception. Type: %s (task: %s)" % (ex.__class__.__name__, task.name), log ) log.debug("encoding %s", rss.encoding) last_entry_id = "" if not all_entries: # Test to make sure entries are in descending order if rss.entries and rss.entries[0].get("published_parsed") and rss.entries[-1].get("published_parsed"): if rss.entries[0]["published_parsed"] < rss.entries[-1]["published_parsed"]: # Sort them if they are not rss.entries.sort(key=lambda x: x["published_parsed"], reverse=True) last_entry_id = task.simple_persistence.get("%s_last_entry" % url_hash) # new entries to be created entries = [] # field name for url can be configured by setting link. # default value is auto but for example guid is used in some feeds ignored = 0 for entry in rss.entries: # Check if title field is overridden in config title_field = config.get("title", "title") # ignore entries without title if not entry.get(title_field): log.debug("skipping entry without title") ignored += 1 continue # Set the title from the source field entry.title = entry[title_field] # Check we haven't already processed this entry in a previous run if last_entry_id == entry.title + entry.get("guid", ""): log.verbose("Not processing entries from last run.") # Let details plugin know that it is ok if this task doesn't produce any entries task.no_entries_ok = True break # remove annoying zero width spaces entry.title = entry.title.replace("\u200B", "") # Dict with fields to grab mapping from rss field name to FlexGet field name fields = {"guid": "guid", "author": "author", "description": "description", "infohash": "torrent_info_hash"} # extend the dict of fields to grab with other_fields list in config for field_map in config.get("other_fields", []): fields.update(field_map) # helper # TODO: confusing? refactor into class member ... def add_entry(ea): ea["title"] = entry.title for rss_field, flexget_field in fields.iteritems(): if rss_field in entry: if not isinstance(getattr(entry, rss_field), basestring): # Error if this field is not a string log.error("Cannot grab non text field `%s` from rss.", rss_field) # Remove field from list of fields to avoid repeated error config["other_fields"].remove(rss_field) continue if not getattr(entry, rss_field): log.debug("Not grabbing blank field %s from rss for %s.", rss_field, ea["title"]) continue try: ea[flexget_field] = decode_html(entry[rss_field]) if rss_field in config.get("other_fields", []): # Print a debug message for custom added fields log.debug("Field `%s` set to `%s` for `%s`", rss_field, ea[rss_field], ea["title"]) except UnicodeDecodeError: log.warning("Failed to decode entry `%s` field `%s`", ea["title"], rss_field) # Also grab pubdate if available if hasattr(entry, "published_parsed") and entry.published_parsed: ea["rss_pubdate"] = datetime(*entry.published_parsed[:6]) # store basic auth info if "username" in config and "password" in config: ea["basic_auth_username"] = config["username"] ea["basic_auth_password"] = config["password"] entries.append(ea) # create from enclosures if present enclosures = entry.get("enclosures", []) if len(enclosures) > 1 and not config.get("group_links"): # There is more than 1 enclosure, create an Entry for each of them log.debug("adding %i entries from enclosures", len(enclosures)) for enclosure in enclosures: if not "href" in enclosure: log.debug("RSS-entry `%s` enclosure does not have URL", entry.title) continue # There is a valid url for this enclosure, create an Entry for it ee = Entry() self.add_enclosure_info(ee, enclosure, config.get("filename", True), True) add_entry(ee) # If we created entries for enclosures, we should not create an Entry for the main rss item continue # create flexget entry e = Entry() if not isinstance(config.get("link"), list): # If the link field is not a list, search for first valid url if config["link"] == "auto": # Auto mode, check for a single enclosure url first if len(entry.get("enclosures", [])) == 1 and entry["enclosures"][0].get("href"): self.add_enclosure_info(e, entry["enclosures"][0], config.get("filename", True)) else: # If there is no enclosure url, check link, then guid field for urls for field in ["link", "guid"]: if entry.get(field): e["url"] = entry[field] break else: if entry.get(config["link"]): e["url"] = entry[config["link"]] else: # If link was passed as a list, we create a list of urls for field in config["link"]: if entry.get(field): e.setdefault("url", entry[field]) if entry[field] not in e.setdefault("urls", []): e["urls"].append(entry[field]) if config.get("group_links"): # Append a list of urls from enclosures to the urls field if group_links is enabled e.setdefault("urls", [e["url"]]).extend( [enc.href for enc in entry.get("enclosures", []) if enc.get("href") not in e["urls"]] ) if not e.get("url"): log.debug("%s does not have link (%s) or enclosure", entry.title, config["link"]) ignored += 1 continue add_entry(e) # Save last spot in rss if rss.entries: log.debug("Saving location in rss feed.") task.simple_persistence["%s_last_entry" % url_hash] = rss.entries[0].title + rss.entries[0].get("guid", "") if ignored: if not config.get("silent"): log.warning("Skipped %s RSS-entries without required information (title, link or enclosures)", ignored) return entries
def process(self): imdb_lookup = plugin.get_plugin_by_name('imdb_lookup').instance self.changes.sort() udata = load_uoccin_data(self.folder) for line in self.changes: tmp = line.split('|') typ = tmp[1] tid = tmp[2] fld = tmp[3] val = tmp[4] self.log.verbose( 'processing: type=%s, target=%s, field=%s, value=%s' % (typ, tid, fld, val)) if typ == 'movie': # default mov = udata['movies'].setdefault( tid, { 'name': 'N/A', 'watchlist': False, 'collected': False, 'watched': False }) # movie title is unknown at this time fake = Entry() fake['url'] = 'http://www.imdb.com/title/' + tid fake['imdb_id'] = tid try: imdb_lookup.lookup(fake) mov['name'] = fake.get('imdb_name') except plugin.PluginError: self.log.warning( 'Unable to lookup movie %s from imdb, using raw name.' % tid) # setting if fld == 'watchlist': mov['watchlist'] = val == 'true' elif fld == 'collected': mov['collected'] = val == 'true' elif fld == 'watched': mov['watched'] = val == 'true' elif fld == 'tags': mov['tags'] = re.split(',\s*', val) elif fld == 'subtitles': mov['subtitles'] = re.split(',\s*', val) elif fld == 'rating': mov['rating'] = int(val) # cleaning if not (mov['watchlist'] or mov['collected'] or mov['watched']): self.log.verbose('deleting unused section: movies\%s' % tid) udata['movies'].pop(tid) elif typ == 'series': tmp = tid.split('.') sid = tmp[0] sno = tmp[1] if len(tmp) > 2 else None eno = tmp[2] if len(tmp) > 2 else None # default ser = udata['series'].setdefault( sid, { 'name': 'N/A', 'watchlist': False, 'collected': {}, 'watched': {} }) # series name is unknown at this time try: series = lookup_series(tvdb_id=sid) ser['name'] = series.name except LookupError: self.log.warning( 'Unable to lookup series %s from tvdb, using raw name.' % sid) # setting if fld == 'watchlist': ser['watchlist'] = val == 'true' elif fld == 'tags': ser['tags'] = re.split(',\s*', val) elif fld == 'rating': ser['rating'] = int(val) elif sno is None or eno is None: self.log.warning( 'invalid line "%s": season and episode numbers are required' % line) elif fld == 'collected': season = ser['collected'].setdefault(sno, {}) if val == 'true': season.setdefault(eno, []) else: if eno in season: season.pop(eno) if not season: self.log.verbose( 'deleting unused section: series\%s\collected\%s' % (sid, sno)) ser['collected'].pop(sno) elif fld == 'subtitles': ser['collected'].setdefault(sno, {})[eno] = re.split( ',\s*', val) elif fld == 'watched': season = ser['watched'].setdefault(sno, []) if val == 'true': season = ser['watched'][sno] = list( set(season) | set([int(eno)])) elif int(eno) in season: season.remove(int(eno)) season.sort() if not season: self.log.debug( 'deleting unused section: series\%s\watched\%s' % (sid, sno)) ser['watched'].pop(sno) # cleaning if not (ser['watchlist'] or ser['collected'] or ser['watched']): self.log.debug('deleting unused section: series\%s' % sid) udata['series'].pop(sid) else: self.log.warning('invalid element type "%s"' % typ) # save the updated uoccin.json ufile = os.path.join(self.folder, 'uoccin.json') try: text = json.dumps(udata, sort_keys=True, indent=4, separators=(',', ': ')) with open(ufile, 'w') as f: f.write(text) except Exception as err: self.log.debug('error writing %s: %s' % (ufile, err)) raise plugin.PluginError('error writing %s: %s' % (ufile, err))
else: # If link was passed as a list, we create a list of urls for field in config['link']: if entry.get(field): e.setdefault('url', entry[field]) if entry[field] not in e.setdefault('urls', []): e['urls'].append(entry[field]) if config.get('group_links'): # Append a list of urls from enclosures to the urls field if group_links is enabled e.setdefault('urls', [e['url']]).extend([ enc.href for enc in entry.get('enclosures', []) if enc.get('href') not in e['urls'] ]) if not e.get('url'): log.debug('%s does not have link (%s) or enclosure' % (entry.title, config['link'])) ignored += 1 continue add_entry(e) # Save last spot in rss if rss.entries: log.debug('Saving location in rss feed.') task.simple_persistence[ '%s_last_entry' % url_hash] = rss.entries[0].title + rss.entries[0].get( 'guid', '')
def on_task_input(self, task, config): config = self.build_config(config) log.debug('Requesting task `%s` url `%s`', task.name, config['url']) # Used to identify which etag/modified to use url_hash = hashlib.md5(config['url'].encode('utf-8')).hexdigest() # set etag and last modified headers if config has not changed since # last run and if caching wasn't disabled with --no-cache argument. all_entries = (config['all_entries'] or task.config_modified or task.options.nocache or task.options.retry) headers = task.requests.headers if not all_entries: etag = task.simple_persistence.get('%s_etag' % url_hash, None) if etag: log.debug('Sending etag %s for task %s', etag, task.name) headers['If-None-Match'] = etag modified = task.simple_persistence.get('%s_modified' % url_hash, None) if modified: if not isinstance(modified, str): log.debug( 'Invalid date was stored for last modified time.') else: headers['If-Modified-Since'] = modified log.debug( 'Sending last-modified %s for task %s', headers['If-Modified-Since'], task.name, ) # Get the feed content if config['url'].startswith(('http', 'https', 'ftp', 'file')): # Get feed using requests library auth = None if 'username' in config and 'password' in config: auth = (config['username'], config['password']) try: # Use the raw response so feedparser can read the headers and status values response = task.requests.get(config['url'], timeout=60, headers=headers, raise_status=False, auth=auth) content = response.content except RequestException as e: raise plugin.PluginError( 'Unable to download the RSS for task %s (%s): %s' % (task.name, config['url'], e)) if config.get('ascii'): # convert content to ascii (cleanup), can also help with parsing problems on malformed feeds content = response.text.encode('ascii', 'ignore') # status checks status = response.status_code if status == 304: log.verbose( '%s hasn\'t changed since last run. Not creating entries.', config['url']) # Let details plugin know that it is ok if this feed doesn't produce any entries task.no_entries_ok = True return [] elif status == 401: raise plugin.PluginError( 'Authentication needed for task %s (%s): %s' % (task.name, config['url'], response.headers['www-authenticate']), log, ) elif status == 404: raise plugin.PluginError( 'RSS Feed %s (%s) not found' % (task.name, config['url']), log) elif status == 500: raise plugin.PluginError( 'Internal server exception on task %s (%s)' % (task.name, config['url']), log) elif status != 200: raise plugin.PluginError( 'HTTP error %s received from %s' % (status, config['url']), log) # update etag and last modified if not config['all_entries']: etag = response.headers.get('etag') if etag: task.simple_persistence['%s_etag' % url_hash] = etag log.debug('etag %s saved for task %s', etag, task.name) if response.headers.get('last-modified'): modified = response.headers['last-modified'] task.simple_persistence['%s_modified' % url_hash] = modified log.debug('last modified %s saved for task %s', modified, task.name) else: # This is a file, open it with open(config['url'], 'rb') as f: content = f.read() if config.get('ascii'): # Just assuming utf-8 file in this case content = content.decode('utf-8', 'ignore').encode('ascii', 'ignore') if not content: log.error('No data recieved for rss feed.') return [] if config.get('escape'): log.debug("Trying to escape unescaped in RSS") content = self.escape_content(content) try: rss = feedparser.parse(content) except LookupError as e: raise plugin.PluginError('Unable to parse the RSS (from %s): %s' % (config['url'], e)) # check for bozo ex = rss.get('bozo_exception', False) if ex or rss.get('bozo'): if rss.entries: msg = ( 'Bozo error %s while parsing feed, but entries were produced, ignoring the error.' % type(ex)) if config.get('silent', False): log.debug(msg) else: log.verbose(msg) else: if isinstance(ex, feedparser.NonXMLContentType): # see: http://www.feedparser.org/docs/character-encoding.html#advanced.encoding.nonxml log.debug('ignoring feedparser.NonXMLContentType') elif isinstance(ex, feedparser.CharacterEncodingOverride): # see: ticket 88 log.debug('ignoring feedparser.CharacterEncodingOverride') elif isinstance(ex, UnicodeEncodeError): raise plugin.PluginError( 'Feed has UnicodeEncodeError while parsing...') elif isinstance(ex, (xml.sax._exceptions.SAXParseException, xml.sax._exceptions.SAXException)): # save invalid data for review, this is a bit ugly but users seem to really confused when # html pages (login pages) are received self.process_invalid_content(task, content, config['url']) if task.options.debug: log.error('bozo error parsing rss: %s' % ex) raise plugin.PluginError( 'Received invalid RSS content from task %s (%s)' % (task.name, config['url'])) elif isinstance(ex, http.client.BadStatusLine) or isinstance( ex, IOError): raise ex # let the @internet decorator handle else: # all other bozo errors self.process_invalid_content(task, content, config['url']) raise plugin.PluginError( 'Unhandled bozo_exception. Type: %s (task: %s)' % (ex.__class__.__name__, task.name), log, ) log.debug('encoding %s', rss.encoding) last_entry_id = '' if not all_entries: # Test to make sure entries are in descending order if (rss.entries and rss.entries[0].get('published_parsed') and rss.entries[-1].get('published_parsed')): if rss.entries[0]['published_parsed'] < rss.entries[-1][ 'published_parsed']: # Sort them if they are not rss.entries.sort(key=lambda x: x['published_parsed'], reverse=True) last_entry_id = task.simple_persistence.get('%s_last_entry' % url_hash) # new entries to be created entries = [] # Dict with fields to grab mapping from rss field name to FlexGet field name fields = { 'guid': 'guid', 'author': 'author', 'description': 'description', 'infohash': 'torrent_info_hash', } # extend the dict of fields to grab with other_fields list in config for field_map in config.get('other_fields', []): fields.update(field_map) # field name for url can be configured by setting link. # default value is auto but for example guid is used in some feeds ignored = 0 for entry in rss.entries: # Check if title field is overridden in config title_field = config.get('title', 'title') # ignore entries without title if not entry.get(title_field): log.debug('skipping entry without title') ignored += 1 continue # Set the title from the source field entry.title = entry[title_field] # Check we haven't already processed this entry in a previous run if last_entry_id == entry.title + entry.get('guid', ''): log.verbose('Not processing entries from last run.') # Let details plugin know that it is ok if this task doesn't produce any entries task.no_entries_ok = True break # remove annoying zero width spaces entry.title = entry.title.replace(u'\u200B', u'') # helper # TODO: confusing? refactor into class member ... def add_entry(ea): ea['title'] = entry.title # fields dict may be modified during this loop, so loop over a copy (fields.items()) for rss_field, flexget_field in list(fields.items()): if rss_field in entry: if rss_field == 'content': content_str = '' for content in entry[rss_field]: try: content_str += decode_html(content.value) except UnicodeDecodeError: log.warning( 'Failed to decode entry `%s` field `%s`', ea['title'], rss_field, ) ea[flexget_field] = content_str log.debug( 'Field `%s` set to `%s` for `%s`', rss_field, ea[rss_field], ea['title'], ) continue if not isinstance(getattr(entry, rss_field), str): # Error if this field is not a string log.error( 'Cannot grab non text field `%s` from rss.', rss_field) # Remove field from list of fields to avoid repeated error del fields[rss_field] continue if not getattr(entry, rss_field): log.debug( 'Not grabbing blank field %s from rss for %s.', rss_field, ea['title'], ) continue try: ea[flexget_field] = decode_html(entry[rss_field]) if rss_field in config.get('other_fields', []): # Print a debug message for custom added fields log.debug( 'Field `%s` set to `%s` for `%s`', rss_field, ea[rss_field], ea['title'], ) except UnicodeDecodeError: log.warning( 'Failed to decode entry `%s` field `%s`', ea['title'], rss_field) # Also grab pubdate if available if hasattr(entry, 'published_parsed') and entry.published_parsed: ea['rss_pubdate'] = datetime(*entry.published_parsed[:6]) # store basic auth info if 'username' in config and 'password' in config: ea['download_auth'] = (config['username'], config['password']) entries.append(ea) # create from enclosures if present enclosures = entry.get('enclosures', []) if len(enclosures) > 1 and not config.get('group_links'): # There is more than 1 enclosure, create an Entry for each of them log.debug('adding %i entries from enclosures', len(enclosures)) for enclosure in enclosures: if 'href' not in enclosure: log.debug('RSS-entry `%s` enclosure does not have URL', entry.title) continue # There is a valid url for this enclosure, create an Entry for it ee = Entry() self.add_enclosure_info(ee, enclosure, config.get('filename', True), True) add_entry(ee) # If we created entries for enclosures, we should not create an Entry for the main rss item continue # create flexget entry e = Entry() if not isinstance(config.get('link'), list): # If the link field is not a list, search for first valid url if config['link'] == 'auto': # Auto mode, check for a single enclosure url first if len(entry.get( 'enclosures', [])) == 1 and entry['enclosures'][0].get('href'): self.add_enclosure_info(e, entry['enclosures'][0], config.get('filename', True)) else: # If there is no enclosure url, check link, then guid field for urls for field in ['link', 'guid']: if entry.get(field): e['url'] = entry[field] break else: if entry.get(config['link']): e['url'] = entry[config['link']] else: # If link was passed as a list, we create a list of urls for field in config['link']: if entry.get(field): e.setdefault('url', entry[field]) if entry[field] not in e.setdefault('urls', []): e['urls'].append(entry[field]) if config.get('group_links'): # Append a list of urls from enclosures to the urls field if group_links is enabled e.setdefault('urls', [e['url']]).extend([ enc.href for enc in entry.get('enclosures', []) if enc.get('href') not in e['urls'] ]) if not e.get('url'): log.debug('%s does not have link (%s) or enclosure', entry.title, config['link']) ignored += 1 continue add_entry(e) # Save last spot in rss if rss.entries: log.debug('Saving location in rss feed.') try: entry_id = rss.entries[0].title + rss.entries[0].get( 'guid', '') except AttributeError: entry_id = '' if entry_id.strip(): task.simple_persistence['%s_last_entry' % url_hash] = entry_id else: log.debug( 'rss feed location saving skipped: no title information in first entry' ) if ignored: if not config.get('silent'): log.warning( 'Skipped %s RSS-entries without required information (title, link or enclosures)', ignored, ) return entries
def parse_privmsg(self, nickname, channel, message): """ Parses a public message and generates an entry if the message matches the regex or contains a url :param nickname: Nickname of who sent the message :param channel: Channel where the message was receivied :param message: Message text :return: """ # Clean up the message message = MESSAGE_CLEAN.sub('', message) # If we have announcers defined, ignore any messages not from them if self.announcer_list and nickname not in self.announcer_list: log.debug('Ignoring message: from non-announcer %s', nickname) return # If it's listed in ignore lines, skip it for (rx, expected) in self.ignore_lines: if rx.match(message) and expected: log.debug('Ignoring message: matched ignore line') return # Create the entry entry = Entry(irc_raw_message=message) # Run the config regex patterns over the message matched_linepatterns = self.match_message_patterns(self.linepatterns, message) matched_multilinepatterns = self.match_message_patterns(self.multilinepatterns, message, multiline=True) if matched_linepatterns: match_found = True entry.update(matched_linepatterns) elif matched_multilinepatterns: match_found = True entry.update(matched_multilinepatterns) else: log.warning('Received message doesn\'t match any regexes.') return None # Generate the entry and process it through the linematched rules if self.tracker_config is not None and match_found: entry.update(self.process_tracker_config_rules(entry)) elif self.tracker_config is not None: log.error('Failed to parse message. Skipping.') return None # If we have a torrentname, use it as the title entry['title'] = entry.get('irc_torrentname', message) # If we have a URL, use it if 'irc_torrenturl' in entry: entry['url'] = entry['irc_torrenturl'] else: # find a url... url_match = URL_MATCHER.findall(message) if url_match: # We have a URL(s)!, generate an entry urls = list(url_match) url = urls[-1] entry.update({ 'urls': urls, 'url': url, }) log.debug('Entry after processing: %s', dict(entry)) if not entry.get('url'): log.error('Parsing message failed. No url found.') return None return entry
def items(self): if self._items is None: if self.config['list'] in ['collection', 'watched', 'trending', 'popular'] and self.config['type'] == 'auto': raise plugin.PluginError( '`type` cannot be `auto` for %s list.' % self.config['list'] ) endpoint = self.get_list_endpoint() log.verbose('Retrieving `%s` list `%s`', self.config['type'], self.config['list']) try: result = self.session.get(db.get_api_url(endpoint)) try: data = result.json() except ValueError: log.debug('Could not decode json from response: %s', result.text) raise plugin.PluginError('Error getting list from trakt.') current_page = int(result.headers.get('X-Pagination-Page', 1)) current_page_count = int(result.headers.get('X-Pagination-Page-Count', 1)) if current_page < current_page_count: # Pagination, gotta get it all, but we'll limit it to 1000 per page # but we'll have to start over from 0 data = [] limit = 1000 pagination_item_count = int(result.headers.get('X-Pagination-Item-Count', 0)) number_of_pages = math.ceil(pagination_item_count / limit) log.debug( 'Response is paginated. Number of items: %s, number of pages: %s', pagination_item_count, number_of_pages, ) page = int(result.headers.get('X-Pagination-Page')) while page <= number_of_pages: paginated_result = self.session.get( db.get_api_url(endpoint), params={'limit': limit, 'page': page} ) page += 1 try: data.extend(paginated_result.json()) except ValueError: log.debug( 'Could not decode json from response: %s', paginated_result.text ) raise plugin.PluginError('Error getting list from trakt.') except RequestException as e: raise plugin.PluginError('Could not retrieve list from trakt (%s)' % e) if not data: log.warning( 'No data returned from trakt for %s list %s.', self.config['type'], self.config['list'], ) return [] entries = [] list_type = (self.config['type']).rstrip('s') for item in data: if self.config['type'] == 'auto': list_type = item['type'] if self.config['list'] == 'popular': item = {list_type: item} # Collection and watched lists don't return 'type' along with the items (right now) if 'type' in item and item['type'] != list_type: log.debug( 'Skipping %s because it is not a %s', item[item['type']].get('title', 'unknown'), list_type, ) continue if list_type != 'episode' and not item[list_type]['title']: # Skip shows/movies with no title log.warning('Item in trakt list does not appear to have a title, skipping.') continue entry = Entry() if list_type == 'episode': entry['url'] = 'https://trakt.tv/shows/%s/seasons/%s/episodes/%s' % ( item['show']['ids']['slug'], item['episode']['season'], item['episode']['number'], ) else: entry['url'] = 'https://trakt.tv/%ss/%s' % ( list_type, item[list_type]['ids'].get('slug'), ) entry.update_using_map(field_maps[list_type], item) # get movie name translation language = self.config.get('language') if list_type == 'movie' and language: endpoint = ['movies', entry['trakt_movie_id'], 'translations', language] try: result = self.session.get(db.get_api_url(endpoint)) try: translation = result.json() except ValueError: raise plugin.PluginError( 'Error decoding movie translation from trakt: %s.' % result.text ) except RequestException as e: raise plugin.PluginError( 'Could not retrieve movie translation from trakt: %s' % str(e) ) if not translation: log.warning( 'No translation data returned from trakt for movie %s.', entry['title'] ) else: log.verbose( 'Found `%s` translation for movie `%s`: %s', language, entry['movie_name'], translation[0]['title'], ) entry['title'] = translation[0]['title'] if entry.get('movie_year'): entry['title'] += ' (' + str(entry['movie_year']) + ')' entry['movie_name'] = translation[0]['title'] # Override the title if strip_dates is on. TODO: a better way? if self.config.get('strip_dates'): if list_type in ['show', 'movie']: entry['title'] = item[list_type]['title'] elif list_type == 'episode': entry[ 'title' ] = '{show[title]} S{episode[season]:02}E{episode[number]:02}'.format( **item ) if item['episode']['title']: entry['title'] += ' {episode[title]}'.format(**item) if entry.isvalid(): if self.config.get('strip_dates'): # Remove year from end of name if present entry['title'] = split_title_year(entry['title'])[0] entries.append(entry) if self.config.get('limit') and len(entries) >= self.config.get('limit'): break else: log.debug('Invalid entry created? %s', entry) self._items = entries return self._items
def items(self): if self._items is None: if self.config['list'] in ['collection', 'watched' ] and self.config['type'] == 'auto': raise plugin.PluginError( '`type` cannot be `auto` for %s list.' % self.config['list']) endpoint = self.get_list_endpoint() log.verbose('Retrieving `%s` list `%s`', self.config['type'], self.config['list']) try: result = self.session.get(get_api_url(endpoint)) try: data = result.json() except ValueError: log.debug('Could not decode json from response: %s', result.text) raise plugin.PluginError('Error getting list from trakt.') except RequestException as e: raise plugin.PluginError( 'Could not retrieve list from trakt (%s)' % e) if not data: log.warning('No data returned from trakt for %s list %s.', self.config['type'], self.config['list']) return [] entries = [] list_type = (self.config['type']).rstrip('s') for item in data: if self.config['type'] == 'auto': list_type = item['type'] # Collection and watched lists don't return 'type' along with the items (right now) if 'type' in item and item['type'] != list_type: log.debug('Skipping %s because it is not a %s', item[item['type']].get('title', 'unknown'), list_type) continue if list_type != 'episode' and not item[list_type]['title']: # Skip shows/movies with no title log.warning( 'Item in trakt list does not appear to have a title, skipping.' ) continue entry = Entry() if list_type == 'episode': entry[ 'url'] = 'https://trakt.tv/shows/%s/seasons/%s/episodes/%s' % ( item['show']['ids']['slug'], item['episode']['season'], item['episode']['number']) else: entry['url'] = 'https://trakt.tv/%ss/%s' % ( list_type, item[list_type]['ids'].get('slug')) entry.update_using_map(field_maps[list_type], item) # get movie name translation language = self.config.get('language') if list_type == 'movie' and language: endpoint = [ 'movies', entry['trakt_movie_id'], 'translations', language ] try: result = self.session.get(get_api_url(endpoint)) try: translation = result.json() except ValueError: raise plugin.PluginError( 'Error decoding movie translation from trakt: %s.' % result.text) except RequestException as e: raise plugin.PluginError( 'Could not retrieve movie translation from trakt: %s' % str(e)) if not translation: log.warning( 'No translation data returned from trakt for movie %s.', entry['title']) else: log.verbose( 'Found `%s` translation for movie `%s`: %s', language, entry['movie_name'], translation[0]['title']) entry['title'] = translation[0]['title'] if entry.get('movie_year'): entry['title'] += ' (' + str( entry['movie_year']) + ')' entry['movie_name'] = translation[0]['title'] # Override the title if strip_dates is on. TODO: a better way? if self.config.get('strip_dates'): if list_type in ['show', 'movie']: entry['title'] = item[list_type]['title'] elif list_type == 'episode': entry[ 'title'] = '{show[title]} S{episode[season]:02}E{episode[number]:02}'.format( **item) if item['episode']['title']: entry['title'] += ' {episode[title]}'.format( **item) if entry.isvalid(): if self.config.get('strip_dates'): # Remove year from end of name if present entry['title'] = split_title_year(entry['title'])[0] entries.append(entry) else: log.debug('Invalid entry created? %s', entry) self._items = entries return self._items
def on_task_input(self, task, config): """Creates an entry for each item in your uoccin watchlist. Example:: from_uoccin: path: /path/to/gdrive/uoccin type: series tags: [ 'favorite', 'hires' ] check_tags: all Options path and type are required while the others are for filtering: - 'any' will include all the items marked with one or more tags in the list - 'all' will only include the items marked with all the listed tags - 'none' will only include the items not marked with any of the listed tags. The entries created will have a valid imdb/tvdb url and id. """ imdb_lookup = plugin.get_plugin_by_name('imdb_lookup').instance udata = load_uoccin_data(config['path']) section = udata['movies'] if config['type'] == 'movies' else udata['series'] entries = [] for eid, itm in list(section.items()): if not itm['watchlist']: continue if 'tags' in config: n = len(set(config['tags']) & set(itm.get('tags', []))) if config['check_tags'] == 'any' and n <= 0: continue if config['check_tags'] == 'all' and n != len(config['tags']): continue if config['check_tags'] == 'none' and n > 0: continue if config['type'] == 'movies': entry = Entry() entry['url'] = 'http://www.imdb.com/title/' + eid entry['imdb_id'] = eid if itm['name'] != 'N/A': entry['title'] = itm['name'] else: try: imdb_lookup.lookup(entry) except plugin.PluginError as e: self.log.trace('entry %s imdb failed (%s)' % (entry['imdb_id'], e.value)) continue entry['title'] = entry.get('imdb_name') if 'tags' in itm: entry['uoccin_tags'] = itm['tags'] if entry.isvalid(): entries.append(entry) else: self.log.debug('Invalid entry created? %s' % entry) else: sname = itm['name'] try: sname = lookup_series(tvdb_id=eid).seriesname except LookupError: self.log.warning('Unable to lookup series %s from tvdb, using raw name.' % eid) surl = 'http://thetvdb.com/?tab=series&id=' + eid if config['type'] == 'series': entry = Entry() entry['url'] = surl entry['title'] = sname entry['tvdb_id'] = eid if 'tags' in itm: entry['uoccin_tags'] = itm['tags'] if entry.isvalid(): entries.append(entry) else: self.log.debug('Invalid entry created? %s' % entry) elif config['ep_flags'] == 'collected': slist = itm.get('collected', {}) for sno in list(slist.keys()): for eno in slist[sno]: entry = Entry() entry['url'] = surl entry['title'] = '%s S%02dE%02d' % (sname, int(sno), int(eno)) entry['tvdb_id'] = eid if entry.isvalid(): entries.append(entry) else: self.log.debug('Invalid entry created? %s' % entry) else: slist = itm.get('watched', {}) for sno in list(slist.keys()): for eno in slist[sno]: entry = Entry() entry['url'] = surl entry['title'] = '%s S%02dE%02d' % (sname, int(sno), eno) entry['tvdb_id'] = eid if entry.isvalid(): entries.append(entry) else: self.log.debug('Invalid entry created? %s' % entry) entries.sort(key=lambda x: x['title']) return entries
def get_items(self): """Iterator over etrieved itesms from the trakt api.""" if ( self.config['list'] in ['collection', 'watched', 'trending', 'popular'] and self.config['type'] == 'auto' ): raise plugin.PluginError('`type` cannot be `auto` for %s list.' % self.config['list']) limit_per_page = 1000 endpoint = self.get_list_endpoint() list_type = (self.config['type']).rstrip('s') log.verbose('Retrieving `%s` list `%s`', self.config['type'], self.config['list']) try: page = 1 collecting_finished = False while not collecting_finished: result = self.session.get( db.get_api_url(endpoint), params={'limit': limit_per_page, 'page': page} ) page = int(result.headers.get('X-Pagination-Page', 1)) number_of_pages = int(result.headers.get('X-Pagination-Page-Count', 1)) if page == 2: # If there is more than one page (more than 1000 items) warn user they may want to limit log.verbose( 'There are a large number of items in trakt `%s` list. You may want to enable `limit`' ' plugin to reduce the amount of entries in this task.', self.config['list'], ) collecting_finished = page >= number_of_pages page += 1 try: trakt_items = result.json() except ValueError: log.debug('Could not decode json from response: %s', result.text) raise plugin.PluginError('Error getting list from trakt.') if not trakt_items: log.warning( 'No data returned from trakt for %s list %s.', self.config['type'], self.config['list'], ) return for item in trakt_items: if self.config['type'] == 'auto': list_type = item['type'] if self.config['list'] == 'popular': item = {list_type: item} # Collection and watched lists don't return 'type' along with the items (right now) if 'type' in item and item['type'] != list_type: log.debug( 'Skipping %s because it is not a %s', item[item['type']].get('title', 'unknown'), list_type, ) continue if list_type not in item: # Issue 2445 log.warning("Item type can not be determined, skipping item %s", item) continue if list_type != 'episode' and not item[list_type]['title']: # Skip shows/movies with no title log.warning( 'Item in trakt list does not appear to have a title, skipping.' ) continue entry = Entry() if list_type == 'episode': entry['url'] = 'https://trakt.tv/shows/%s/seasons/%s/episodes/%s' % ( item['show']['ids']['slug'], item['episode']['season'], item['episode']['number'], ) else: entry['url'] = 'https://trakt.tv/%ss/%s' % ( list_type, item[list_type]['ids'].get('slug'), ) # Pass the strip dates option in so it can be used in the update maps item['strip_dates'] = self.config.get('strip_dates') entry.update_using_map(field_maps[list_type], item) # get movie name translation language = self.config.get('language') if list_type == 'movie' and language: endpoint = ['movies', entry['trakt_movie_id'], 'translations', language] try: result = self.session.get(db.get_api_url(endpoint)) try: translation = result.json() except ValueError: raise plugin.PluginError( 'Error decoding movie translation from trakt: %s.' % result.text ) except RequestException as e: raise plugin.PluginError( 'Could not retrieve movie translation from trakt: %s' % str(e) ) if not translation: log.warning( 'No translation data returned from trakt for movie %s.', entry['title'], ) else: log.verbose( 'Found `%s` translation for movie `%s`: %s', language, entry['movie_name'], translation[0]['title'], ) entry['title'] = translation[0]['title'] if entry.get('movie_year') and not self.config.get('strip_dates'): entry['title'] += ' ({})'.format(entry['movie_year']) entry['movie_name'] = translation[0]['title'] if entry.isvalid(): yield entry else: log.debug('Invalid entry created? %s', entry) except RequestException as e: raise plugin.PluginError('Could not retrieve list from trakt (%s)' % e)
def on_task_input(self, task, config): config = self.build_config(config) log.debug('Requesting task `%s` url `%s`', task.name, config['url']) # Used to identify which etag/modified to use url_hash = str(hash(config['url'])) # set etag and last modified headers if config has not changed since # last run and if caching wasn't disabled with --no-cache argument. all_entries = (config['all_entries'] or task.config_modified or task.options.nocache or task.options.retry) headers = {} if not all_entries: etag = task.simple_persistence.get('%s_etag' % url_hash, None) if etag: log.debug('Sending etag %s for task %s', etag, task.name) headers['If-None-Match'] = etag modified = task.simple_persistence.get('%s_modified' % url_hash, None) if modified: if not isinstance(modified, basestring): log.debug('Invalid date was stored for last modified time.') else: headers['If-Modified-Since'] = modified log.debug('Sending last-modified %s for task %s', headers['If-Modified-Since'], task.name) # Get the feed content if config['url'].startswith(('http', 'https', 'ftp', 'file')): # Get feed using requests library auth = None if 'username' in config and 'password' in config: auth = (config['username'], config['password']) try: # Use the raw response so feedparser can read the headers and status values response = task.requests.get(config['url'], timeout=60, headers=headers, raise_status=False, auth=auth) content = response.content except RequestException as e: raise plugin.PluginError('Unable to download the RSS for task %s (%s): %s' % (task.name, config['url'], e)) if config.get('ascii'): # convert content to ascii (cleanup), can also help with parsing problems on malformed feeds content = response.text.encode('ascii', 'ignore') # status checks status = response.status_code if status == 304: log.verbose('%s hasn\'t changed since last run. Not creating entries.', config['url']) # Let details plugin know that it is ok if this feed doesn't produce any entries task.no_entries_ok = True return [] elif status == 401: raise plugin.PluginError('Authentication needed for task %s (%s): %s' % (task.name, config['url'], response.headers['www-authenticate']), log) elif status == 404: raise plugin.PluginError('RSS Feed %s (%s) not found' % (task.name, config['url']), log) elif status == 500: raise plugin.PluginError('Internal server exception on task %s (%s)' % (task.name, config['url']), log) elif status != 200: raise plugin.PluginError('HTTP error %s received from %s' % (status, config['url']), log) # update etag and last modified if not config['all_entries']: etag = response.headers.get('etag') if etag: task.simple_persistence['%s_etag' % url_hash] = etag log.debug('etag %s saved for task %s', etag, task.name) if response.headers.get('last-modified'): modified = response.headers['last-modified'] task.simple_persistence['%s_modified' % url_hash] = modified log.debug('last modified %s saved for task %s', modified, task.name) else: # This is a file, open it with open(config['url'], 'rb') as f: content = f.read() if config.get('ascii'): # Just assuming utf-8 file in this case content = content.decode('utf-8', 'ignore').encode('ascii', 'ignore') if not content: log.error('No data recieved for rss feed.') return [] try: rss = feedparser.parse(content) except LookupError as e: raise plugin.PluginError('Unable to parse the RSS (from %s): %s' % (config['url'], e)) # check for bozo ex = rss.get('bozo_exception', False) if ex or rss.get('bozo'): if rss.entries: msg = 'Bozo error %s while parsing feed, but entries were produced, ignoring the error.' % type(ex) if config.get('silent', False): log.debug(msg) else: log.verbose(msg) else: if isinstance(ex, feedparser.NonXMLContentType): # see: http://www.feedparser.org/docs/character-encoding.html#advanced.encoding.nonxml log.debug('ignoring feedparser.NonXMLContentType') elif isinstance(ex, feedparser.CharacterEncodingOverride): # see: ticket 88 log.debug('ignoring feedparser.CharacterEncodingOverride') elif isinstance(ex, UnicodeEncodeError): raise plugin.PluginError('Feed has UnicodeEncodeError while parsing...') elif isinstance(ex, (xml.sax._exceptions.SAXParseException, xml.sax._exceptions.SAXException)): # save invalid data for review, this is a bit ugly but users seem to really confused when # html pages (login pages) are received self.process_invalid_content(task, content, config['url']) if task.options.debug: log.error('bozo error parsing rss: %s' % ex) raise plugin.PluginError('Received invalid RSS content from task %s (%s)' % (task.name, config['url'])) elif isinstance(ex, http.client.BadStatusLine) or isinstance(ex, IOError): raise ex # let the @internet decorator handle else: # all other bozo errors self.process_invalid_content(task, content, config['url']) raise plugin.PluginError('Unhandled bozo_exception. Type: %s (task: %s)' % (ex.__class__.__name__, task.name), log) log.debug('encoding %s', rss.encoding) last_entry_id = '' if not all_entries: # Test to make sure entries are in descending order if rss.entries and rss.entries[0].get('published_parsed') and rss.entries[-1].get('published_parsed'): if rss.entries[0]['published_parsed'] < rss.entries[-1]['published_parsed']: # Sort them if they are not rss.entries.sort(key=lambda x: x['published_parsed'], reverse=True) last_entry_id = task.simple_persistence.get('%s_last_entry' % url_hash) # new entries to be created entries = [] # Dict with fields to grab mapping from rss field name to FlexGet field name fields = {'guid': 'guid', 'author': 'author', 'description': 'description', 'infohash': 'torrent_info_hash'} # extend the dict of fields to grab with other_fields list in config for field_map in config.get('other_fields', []): fields.update(field_map) # field name for url can be configured by setting link. # default value is auto but for example guid is used in some feeds ignored = 0 for entry in rss.entries: # Check if title field is overridden in config title_field = config.get('title', 'title') # ignore entries without title if not entry.get(title_field): log.debug('skipping entry without title') ignored += 1 continue # Set the title from the source field entry.title = entry[title_field] # Check we haven't already processed this entry in a previous run if last_entry_id == entry.title + entry.get('guid', ''): log.verbose('Not processing entries from last run.') # Let details plugin know that it is ok if this task doesn't produce any entries task.no_entries_ok = True break # remove annoying zero width spaces entry.title = entry.title.replace(u'\u200B', u'') # helper # TODO: confusing? refactor into class member ... def add_entry(ea): ea['title'] = entry.title # fields dict may be modified during this loop, so loop over a copy (fields.items()) for rss_field, flexget_field in list(fields.items()): if rss_field in entry: if not isinstance(getattr(entry, rss_field), basestring): # Error if this field is not a string log.error('Cannot grab non text field `%s` from rss.', rss_field) # Remove field from list of fields to avoid repeated error del fields[rss_field] continue if not getattr(entry, rss_field): log.debug('Not grabbing blank field %s from rss for %s.', rss_field, ea['title']) continue try: ea[flexget_field] = decode_html(entry[rss_field]) if rss_field in config.get('other_fields', []): # Print a debug message for custom added fields log.debug('Field `%s` set to `%s` for `%s`', rss_field, ea[rss_field], ea['title']) except UnicodeDecodeError: log.warning('Failed to decode entry `%s` field `%s`', ea['title'], rss_field) # Also grab pubdate if available if hasattr(entry, 'published_parsed') and entry.published_parsed: ea['rss_pubdate'] = datetime(*entry.published_parsed[:6]) # store basic auth info if 'username' in config and 'password' in config: ea['download_auth'] = (config['username'], config['password']) entries.append(ea) # create from enclosures if present enclosures = entry.get('enclosures', []) if len(enclosures) > 1 and not config.get('group_links'): # There is more than 1 enclosure, create an Entry for each of them log.debug('adding %i entries from enclosures', len(enclosures)) for enclosure in enclosures: if 'href' not in enclosure: log.debug('RSS-entry `%s` enclosure does not have URL', entry.title) continue # There is a valid url for this enclosure, create an Entry for it ee = Entry() self.add_enclosure_info(ee, enclosure, config.get('filename', True), True) add_entry(ee) # If we created entries for enclosures, we should not create an Entry for the main rss item continue # create flexget entry e = Entry() if not isinstance(config.get('link'), list): # If the link field is not a list, search for first valid url if config['link'] == 'auto': # Auto mode, check for a single enclosure url first if len(entry.get('enclosures', [])) == 1 and entry['enclosures'][0].get('href'): self.add_enclosure_info(e, entry['enclosures'][0], config.get('filename', True)) else: # If there is no enclosure url, check link, then guid field for urls for field in ['link', 'guid']: if entry.get(field): e['url'] = entry[field] break else: if entry.get(config['link']): e['url'] = entry[config['link']] else: # If link was passed as a list, we create a list of urls for field in config['link']: if entry.get(field): e.setdefault('url', entry[field]) if entry[field] not in e.setdefault('urls', []): e['urls'].append(entry[field]) if config.get('group_links'): # Append a list of urls from enclosures to the urls field if group_links is enabled e.setdefault('urls', [e['url']]).extend( [enc.href for enc in entry.get('enclosures', []) if enc.get('href') not in e['urls']]) if not e.get('url'): log.debug('%s does not have link (%s) or enclosure', entry.title, config['link']) ignored += 1 continue add_entry(e) # Save last spot in rss if rss.entries: log.debug('Saving location in rss feed.') try: task.simple_persistence['%s_last_entry' % url_hash] = (rss.entries[0].title + rss.entries[0].get('guid', '')) except AttributeError: log.debug('rss feed location saving skipped: no title information in first entry') if ignored: if not config.get('silent'): log.warning('Skipped %s RSS-entries without required information (title, link or enclosures)', ignored) return entries
else: if entry.get(config['link']): e['url'] = entry[config['link']] else: # If link was passed as a list, we create a list of urls for field in config['link']: if entry.get(field): e.setdefault('url', entry[field]) if entry[field] not in e.setdefault('urls', []): e['urls'].append(entry[field]) if config.get('group_links'): # Append a list of urls from enclosures to the urls field if group_links is enabled e.setdefault('urls', [e['url']]).extend( [enc.href for enc in entry.get('enclosures', []) if enc.get('href') not in e['urls']]) if not e.get('url'): log.debug('%s does not have link (%s) or enclosure' % (entry.title, config['link'])) ignored += 1 continue add_entry(e) if ignored: if not config.get('silent'): log.warning('Skipped %s RSS-entries without required information (title, link or enclosures)' % ignored) return entries register_plugin(InputRSS, 'rss', api_ver=2)