def test_negative_matching(self, execute_task): task = execute_task('test_negative_matching') entry = task.find_entry('entries', title='Testfile.HDTV') assert entry.get('quality') == qualities.Quality('1080p HDTV') entry = task.find_entry('entries', title='Testfile.xvid.mp3') assert entry.get('quality') == qualities.Quality('xvid mp3')
def test_entry_serialization(self): entry1 = entry.Entry( { 'title': 'blah', 'url': 'http://blah', 'listfield': ['a', 'b', 1, 2], 'dictfield': {'a': 1, 'b': 2}, 'intfield': 5, 'floatfield': 5.5, 'datefield': datetime.date(1999, 9, 9), 'datetimefield': datetime.datetime(1999, 9, 9, 9, 9), 'qualityfield': qualities.Quality('720p hdtv'), 'nestedlist': [qualities.Quality('1080p')], 'nesteddict': {'a': datetime.date(1999, 9, 9)}, } ) entry1.add_lazy_fields('lazy function', ['lazyfield']) assert entry1.is_lazy('lazyfield') serialized = serialization.dumps(entry1) print(serialized) entry2 = serialization.loads(serialized) # Use the underlying dict, so we compare all fields assert entry2.is_lazy('lazyfield') assert dict(entry1) == dict(entry2) assert entry2['lazyfield'] == 'value a'
def matches(self, task, config, entry): # Tell tmdb_lookup to add lazy lookup fields if not already present try: get_plugin_by_name('tmdb_lookup').instance.lookup(entry) except DependencyError: log.debug('tmdb_lookup is not available, queue will not work if movie ids are not populated') try: get_plugin_by_name('imdb_lookup').instance.register_lazy_fields(entry) except DependencyError: log.debug('imdb_lookup is not available, queue will not work if movie ids are not populated') # make sure the entry has a movie id field filled conditions = [] # Check if a movie id is already populated before incurring a lazy lookup for lazy in [False, True]: if entry.get('imdb_id', eval_lazy=lazy): conditions.append(QueuedMovie.imdb_id == entry['imdb_id']) if entry.get('tmdb_id', eval_lazy=lazy): conditions.append(QueuedMovie.tmdb_id == entry['tmdb_id']) if conditions: break if not conditions: log.verbose('IMDB and TMDB lookups failed for %s.' % entry['title']) return quality = entry.get('quality', qualities.Quality()) movie = task.session.query(QueuedMovie).filter(QueuedMovie.downloaded == None). \ filter(or_(*conditions)).first() if movie and movie.quality_req.allows(quality): return movie
def reset(self): # parsing results self.name = None self.year = None self.year_pos = None self.quality = qualities.Quality() self.proper_count = 0
def get_quality(self, entry): if entry.get('quality', eval_lazy=False): log.debug('Quality is already set to %s for %s, skipping quality detection.' % (entry['quality'], entry['title'])) return quality = qualities.Quality() for field_name in ['title', 'description']: if field_name not in entry: continue quality = qualities.Quality(entry[field_name]) if quality: # if we find a quality in this field, stop searching break entry['quality'] = quality if quality: log.trace('Found quality %s (%s) for %s from field %s' % (entry['quality'], quality, entry['title'], field_name))
def get_quality(self, entry): if entry.get('quality', eval_lazy=False): log.debug('Quality is already set to %s for %s, skipping quality detection.' % (entry['quality'], entry['title'])) return entry['quality'] = qualities.Quality(entry['title']) if entry['quality']: log.trace('Found quality %s for %s' % (entry['quality'], entry['title']))
def to_old_quality(self, assumed_quality=None): resolution = self.old_resolution source = self.old_source codec = self.old_codec audio = self.old_audio old_quality = qualities.Quality(' '.join(filter(None, [resolution, source, codec, audio]))) old_quality = old_assume_quality(old_quality, assumed_quality) return old_quality
def on_task_metainfo(self, task, config): # check if disabled (value set to false) if config is False: return for entry in task.entries: if isinstance(entry.get('quality', eval_lazy=False), str): log.debug('Quality is already set to %s for %s, but has not been instantiated properly.' % (entry['quality'], entry['title'])) entry['quality'] = qualities.Quality(entry.get('quality', eval_lazy=False)) else: entry.register_lazy_func(self.get_quality, ['quality'])
def get_quality(self, entry): if entry.get('quality', eval_lazy=False): logger.debug( 'Quality is already set to {} for {}, skipping quality detection.', entry['quality'], entry['title'], ) return entry['quality'] = qualities.Quality(entry['title']) if entry['quality']: logger.trace('Found quality {} for {}', entry['quality'], entry['title'])
def _quality(self, guessit_result): """Generate a FlexGet Quality from a guessit result.""" resolution = normalize_component(guessit_result.get('screen_size')) other = normalize_component(guessit_result.get('other')) if not resolution and 'hr' in other: resolution.append('hr') source = normalize_component(guessit_result.get('format')) if 'preair' in other: source.append('preair') if 'screener' in other: if 'bluray' in source: source.append('bdscr') else: source.append('dvdscr') if 'r5' in other: source.append('r5') codec = normalize_component(guessit_result.get('video_codec')) if '10bit' in normalize_component(guessit_result.get('video_profile')): codec.append('10bit') audio = normalize_component(guessit_result.get('audio_codec')) audio_profile = normalize_component( guessit_result.get('audio_profile')) audio_channels = normalize_component( guessit_result.get('audio_channels')) # unlike the other components, audio can be a bit iffy with multiple codecs, so we limit it to one if 'dts' in audio and any(hd in audio_profile for hd in ['HD', 'HDMA']): audio = ['dtshd'] elif '5.1' in audio_channels and any( dd in audio for dd in ['ac3', 'dolbydigital']): audio = ['dd5.1'] # Make sure everything are strings (guessit will return lists when there are multiples) flattened_qualities = [] for component in (resolution, source, codec, audio): if isinstance(component, list): flattened_qualities.append(' '.join(component)) elif isinstance(component, str): flattened_qualities.append(component) else: raise ParseWarning( self, 'Guessit quality returned type {}: {}. Expected str or list.' .format(type(component), component)) return qualities.Quality(' '.join(flattened_qualities))
def matches(self, task, config, entry): if not config: return if not isinstance(config, dict): config = {'action': config} # only the accept action is applied in the 'matches' section if config.get('action') != 'accept': return queue_name = config.get('queue_name', 'default') # Tell tmdb_lookup to add lazy lookup fields if not already present try: plugin.get_plugin_by_name( 'imdb_lookup').instance.register_lazy_fields(entry) except plugin.DependencyError: log.debug( 'imdb_lookup is not available, queue will not work if movie ids are not populated' ) try: plugin.get_plugin_by_name('tmdb_lookup').instance.lookup(entry) except plugin.DependencyError: log.debug( 'tmdb_lookup is not available, queue will not work if movie ids are not populated' ) conditions = [] # Check if a movie id is already populated before incurring a lazy lookup for lazy in [False, True]: if entry.get('imdb_id', eval_lazy=lazy): conditions.append(QueuedMovie.imdb_id == entry['imdb_id']) if entry.get('tmdb_id', eval_lazy=lazy and not conditions): conditions.append(QueuedMovie.tmdb_id == entry['tmdb_id']) if conditions: break if not conditions: log_once('IMDB and TMDB lookups failed for %s.' % entry['title'], log, logging.WARN) return quality = entry.get('quality', qualities.Quality()) movie = task.session.query(QueuedMovie).filter( QueuedMovie.downloaded == None).filter( QueuedMovie.queue_name == queue_name).filter( or_(*conditions)).first() if movie and movie.quality_req.allows(quality): return movie
def _reset(self): # parse produces these self.season = None self.episode = None self.episodes = 1 self.id = None self.id_type = None self.id_groups = None self.quality = qualities.Quality() self.proper_count = 0 self.special = False # TODO: group is only produced with allow_groups self.group = None # false if item does not match series self.valid = False
def assume(self, entry, quality): newquality = qualities.Quality() log.debug('Current qualities: %s', entry.get('quality')) for component in entry.get('quality').components: qualitycomponent = getattr(quality, component.type) log.debug('\t%s: %s vs %s', component.type, component.name, qualitycomponent.name) if component.name != 'unknown': log.debug('\t%s: keeping %s', component.type, component.name) setattr(newquality, component.type, component) elif qualitycomponent.name != 'unknown': log.debug('\t%s: assuming %s', component.type, qualitycomponent.name) setattr(newquality, component.type, qualitycomponent) entry['assumed_quality'] = True elif component.name == 'unknown' and qualitycomponent.name == 'unknown': log.debug('\t%s: got nothing', component.type) entry['quality'] = newquality log.debug('Quality updated: %s', entry.get('quality'))
def _quality(self, guessit_result): """Generate a FlexGet Quality from a guessit result.""" resolution = guessit_result.get('screen_size', '') other = lower_to_list(guessit_result.get('other')) if not resolution and 'hr' in other: resolution = 'hr' source = guessit_result.get('format', '').replace('-', '') if 'preair' in other: source = 'preair' if 'screener' in other: if source == 'BluRay': source = 'bdscr' else: source = 'dvdscr' if 'r5' in other: source = 'r5' codec = guessit_result.get('video_codec', '') if guessit_result.get('video_profile') == '10bit': codec = '10bit' audio = guessit_result.get('audio_codec', '') if audio == 'DTS' and guessit_result.get('audio_profile') in [ 'HD', 'HDMA' ]: audio = 'dtshd' elif guessit_result.get( 'audio_channels' ) == '5.1' and audio == 'AC3' or audio == 'DolbyDigital': audio = 'dd5.1' # Make sure everything are strings (guessit will return lists when there are multiples) flattened_qualities = [] for component in (resolution, source, codec, audio): if isinstance(component, list): flattened_qualities.append(' '.join(component)) elif isinstance(component, str): flattened_qualities.append(component) else: raise ParseWarning( self, 'Guessit quality returned type {}: {}. Expected str or list.' .format(type(component), component)) return qualities.Quality(' '.join(flattened_qualities))
def __init__(self): self.a_year, self.b_year = None, None self.a_quality, self.b_quality = qualities.Quality( ), qualities.Quality() self.parser = MovieParser() super(MovieComparator, self).__init__(cutoff=0.9)
def on_task_filter(self, task, config): if not config: return identified_by = '{{ id }}' if config[ 'identified_by'] == 'auto' else config['identified_by'] grouped_entries = group_entries(task.accepted + task.undecided, identified_by) if not grouped_entries: return action_on_waiting = entry_actions[config[ 'on_waiting']] if config['on_waiting'] != 'do_nothing' else None action_on_reached = entry_actions[config[ 'on_reached']] if config['on_reached'] != 'do_nothing' else None with Session() as session: # Prefetch Data existing_ids = session.query(EntryTimeFrame).filter( EntryTimeFrame.id.in_(grouped_entries.keys())).all() existing_ids = {e.id: e for e in existing_ids} for identifier, entries in grouped_entries.items(): if not entries: continue id_timeframe = existing_ids.get(identifier) if not id_timeframe: id_timeframe = EntryTimeFrame() id_timeframe.id = identifier id_timeframe.status = 'waiting' id_timeframe.first_seen = datetime.now() session.add(id_timeframe) if id_timeframe.status == 'accepted': log.debug('Previously accepted %s with %s skipping', identifier, id_timeframe.title) continue # Sort entities in order of quality and best proper entries.sort(key=lambda e: (e['quality'], e.get('proper_count', 0)), reverse=True) best_entry = entries[0] log.debug('Current best for identifier %s is %s', identifier, best_entry['title']) id_timeframe.title = best_entry['title'] id_timeframe.quality = best_entry['quality'] id_timeframe.proper_count = best_entry.get('proper_count', 0) # Check we hit target or better target_requirement = qualities.Requirements(config['target']) target_quality = qualities.Quality(config['target']) if target_requirement.allows( best_entry['quality'] ) or best_entry['quality'] >= target_quality: log.debug( 'timeframe reach target quality %s or higher for %s' % (target_quality, identifier)) if action_on_reached: action_on_reached( best_entry, 'timeframe reached target quality or higher') continue # Check if passed wait time expires = id_timeframe.first_seen + parse_timedelta( config['wait']) if expires <= datetime.now(): log.debug( 'timeframe expired, releasing quality restriction for %s' % identifier) if action_on_reached: action_on_reached(best_entry, 'timeframe wait expired') continue # Verbose waiting, add to backlog if action_on_waiting: for entry in entries: action_on_waiting(entry, 'timeframe waiting') diff = expires - datetime.now() hours, remainder = divmod(diff.seconds, 3600) hours += diff.days * 24 minutes, _ = divmod(remainder, 60) log.info( '`%s`: timeframe waiting for %02dh:%02dmin. Currently best is `%s`.', identifier, hours, minutes, best_entry['title']) # add best entry to backlog (backlog is able to handle duplicate adds) if self.backlog: self.backlog.instance.add_backlog(task, best_entry, session=session)
def test_default(self): self.execute_task('test_default') entry = self.task.find_entry('entries', title='Testfile.noquality') assert entry.get('quality') == qualities.Quality( '720p h264'), 'Testfile.noquality quality not \'720p h264\''
def parse(self, data=None, field=None, quality=None): # Clear the output variables before parsing self._reset() self.field = field if quality: self.quality = quality if data: self.data = data if not self.data: raise ParseWarning(self, 'No data supplied to parse.') if not self.name: logger.trace('No name for series `{}` supplied, guessing name.', self.data) if not self.guess_name(): logger.trace('Could not determine a series name') return logger.trace('Series name for {} guessed to be {}', self.data, self.name) # check if data appears to be unwanted (abort) if self.parse_unwanted(self.remove_dirt(self.data)): raise ParseWarning( self, '`{data}` appears to be an episode pack'.format( data=self.data)) name = self.remove_dirt(self.name) logger.trace('name: {} data: {}', name, self.data) # name end position name_start = 0 name_end = 0 # regexp name matching if not self.name_regexps: # if we don't have name_regexps, generate one from the name self.name_regexps = ReList( name_to_re(name, self.ignore_prefixes, self) for name in [self.name] + self.alternate_names) # With auto regex generation, the first regex group captures the name self.re_from_name = True # try all specified regexps on this data for name_re in self.name_regexps: match = re.search(name_re, self.data) if match: match_start, match_end = match.span( 1 if self.re_from_name else 0) # Always pick the longest matching regex if match_end > name_end: name_start, name_end = match_start, match_end logger.trace('NAME SUCCESS: {} matched to {}', name_re.pattern, self.data) if not name_end: # leave this invalid logger.trace( 'FAIL: name regexps {} do not match {}', [regexp.pattern for regexp in self.name_regexps], self.data, ) return # remove series name from raw data, move any prefix to end of string data_stripped = self.data[name_end:] + ' ' + self.data[:name_start] data_stripped = data_stripped.lower() logger.trace('data stripped: {}', data_stripped) # allow group(s) if self.allow_groups: for group in self.allow_groups: group = group.lower() for fmt in ['[%s]', '-%s', '(%s)']: if fmt % group in data_stripped: logger.trace('{} is from group {}', self.data, group) self.group = group data_stripped = data_stripped.replace(fmt % group, '') break if self.group: break else: logger.trace('{} is not from groups {}', self.data, self.allow_groups) return # leave invalid # Find quality and clean from data logger.trace('parsing quality ->') quality = qualities.Quality(data_stripped) if quality: # Remove quality string from data logger.trace('quality detected, using remaining data `{}`', quality.clean_text) data_stripped = quality.clean_text # Don't override passed in quality if not self.quality: self.quality = quality # Remove unwanted words from data for ep / id parsing data_stripped = self.remove_words(data_stripped, self.remove, not_in_word=True) data_parts = re.split(r'[\W_]+', data_stripped) for part in data_parts[:]: if part in self.propers: self.proper_count += 1 data_parts.remove(part) elif part == 'fastsub': # Subtract 5 to leave room for fastsub propers before the normal release self.proper_count -= 5 data_parts.remove(part) elif part in self.specials: self.special = True data_parts.remove(part) data_stripped = ' '.join(data_parts).strip() logger.trace("data for date/ep/id parsing '{}'", data_stripped) # Try date mode before ep mode if self.identified_by in ['date', 'auto']: date_match = self.parse_date(data_stripped) if date_match: if self.strict_name: if date_match['match'].start() > 1: return self.id = date_match['date'] self.id_groups = date_match['match'].groups() self.id_type = 'date' self.valid = True if not (self.special and self.prefer_specials): return else: logger.trace('-> no luck with date_regexps') if self.identified_by in ['ep', 'auto'] and not self.valid: ep_match = self.parse_episode(data_stripped) if ep_match: # strict_name if self.strict_name: if ep_match['match'].start() > 1: return if ep_match['end_episode'] and ep_match[ 'end_episode'] > ep_match['episode'] + 2: # This is a pack of too many episodes, ignore it. logger.trace( 'Series pack contains too many episodes ({}). Rejecting', ep_match['end_episode'] - ep_match['episode'], ) return self.season = ep_match['season'] self.episode = ep_match['episode'] if ep_match['end_episode']: self.episodes = (ep_match['end_episode'] - ep_match['episode']) + 1 self.id = (self.season, self.episode) self.id_type = 'ep' self.valid = True if not (self.special and self.prefer_specials): return else: season_pack_match = self.parse_season_packs(data_stripped) # If a title looks like a special, give it precedence over season pack if season_pack_match and not self.special: if self.strict_name and season_pack_match['match'].start( ) > 1: return self.season = season_pack_match['season'] self.season_pack = True self.id = (season_pack_match['season'], 0) self.id_type = 'ep' self.valid = True else: logger.trace('-> no luck with ep_regexps') if self.identified_by == 'ep' and not self.season_pack: # we should be getting season, ep ! # try to look up idiotic numbering scheme 101,102,103,201,202 # ressu: Added matching for 0101, 0102... It will fail on # season 11 though logger.trace( 'ep identifier expected. Attempting SEE format parsing.') match = re.search( self.re_not_in_word(r'(\d?\d)(\d\d)'), data_stripped, re.IGNORECASE | re.UNICODE, ) if match: # strict_name if self.strict_name: if match.start() > 1: return self.season = int(match.group(1)) self.episode = int(match.group(2)) self.id = (self.season, self.episode) logger.trace(self) self.id_type = 'ep' self.valid = True return else: logger.trace('-> no luck with SEE') # Check id regexps if self.identified_by in ['id', 'auto'] and not self.valid: for id_re in self.id_regexps: match = re.search(id_re, data_stripped) if match: # strict_name if self.strict_name: if match.start() > 1: return found_id = '-'.join(g for g in match.groups() if g) if not found_id: # If match groups were all blank, don't accept this match continue self.id = found_id self.id_type = 'id' self.valid = True logger.trace("found id '{}' with regexp '{}'", self.id, id_re.pattern) if not (self.special and self.prefer_specials): return else: break else: logger.trace('-> no luck with id_regexps') # Other modes are done, check for unwanted sequence ids if self.parse_unwanted_sequence(data_stripped): return # Check sequences last as they contain the broadest matches if self.identified_by in ['sequence', 'auto'] and not self.valid: for sequence_re in self.sequence_regexps: match = re.search(sequence_re, data_stripped) if match: # strict_name if self.strict_name: if match.start() > 1: return # First matching group is the sequence number try: self.id = int(match.group(1)) except ValueError: self.id = self.roman_to_int(match.group(1)) self.season = 0 self.episode = self.id # If anime style version was found, overwrite the proper count with it if 'version' in match.groupdict(): if match.group('version'): self.proper_count = int(match.group('version')) - 1 self.id_type = 'sequence' self.valid = True logger.trace("found id '{}' with regexp '{}'", self.id, sequence_re.pattern) if not (self.special and self.prefer_specials): return else: break else: logger.trace('-> no luck with sequence_regexps') # No id found, check if this is a special if self.special or self.assume_special: # Attempt to set id as the title of the special self.id = data_stripped or 'special' self.id_type = 'special' self.valid = True logger.trace("found special, setting id to '{}'", self.id) return if self.valid: return msg = 'Title `%s` looks like series `%s` but cannot find ' % ( self.data, self.name) if self.identified_by == 'auto': msg += 'any series numbering.' else: msg += 'a(n) `%s` style identifier.' % self.identified_by raise ParseWarning(self, msg)
def on_task_input(self, task, config): config = self.build_config(config) logger.trace('Config is {}', config) if not config['enabled']: return if config.get('lf_session') is not None: task.requests.cookies.set('lf_session', config['lf_session']) logger.debug('lf_session is set') task.requests.headers.update({ 'Cache-Control': 'no-cache', 'Pragma': 'no-cache' }) prefilter_list = set() if config['prefilter']: prefilter_list = self._get_series(task) if prefilter_list: logger.verbose('Generated pre-filter list with {} entries', len(prefilter_list)) else: logger.warning( 'Pre-filter list is empty. No series names are configured?' ) proxy_handler = None if task.requests.proxies is not None: proxy_handler = ProxyHandler(task.requests.proxies) site_urls = config['site_urls'] tried_urls = [] while site_urls: rss_url = site_urls[ 0] + "rss.xml" # If RSS url changes, update it here logger.trace('Trying to get and parse the RSS feed: {}', rss_url) try: rss = feedparser.parse(rss_url, handlers=[proxy_handler], request_headers={ 'Cache-Control': 'no-cache', 'Pragma': 'no-cache' }) status = rss.get('status') if status == 200: logger.verbose('Received RSS feed from {}', rss_url) break logger.info( 'Received {} status instead of 200 (OK) when trying to download the RSS feed {}', status, rss_url) except Exception as e: logger.info('Cannot get or parse the RSS feed {}. Error: {}', rss_url, e) rss = None tried_urls.append(site_urls.pop(0)) if not rss: raise PluginError('Cannot get the RSS feed') # Use failed site locations as the last resot option for the redirect page site_urls.extend(tried_urls) entries = [] for idx, item in enumerate(rss.entries, 1): series_name_rus = series_name_org = None episode_name_rus = episode_name_org = None season_num = episode_num = None perfect_match = False if item.get('title') is None: logger.warning('RSS item doesn\'t have a title') else: logger.trace('Got RSS item title: {}', item['title']) title_match = RSS_TITLE_REGEXP.fullmatch(item['title']) if title_match is not None: if title_match['sr_org'] is not None: series_name_org = title_match['sr_org'] series_name_rus = title_match['sr_rus'] if title_match['ep_rus'] is not None: perfect_match = True else: series_name_org = title_match['sr_rus'] series_name_rus = None season_num = int(title_match['season']) episode_num = int(title_match['episode']) episode_name_rus = title_match['ep_rus'] else: logger.warning('Cannot parse RSS item title: {}', item['title']) # Skip series names that are not configured. # Do not filter out the current item if it is not matched perfectly. # It's better to process an extra item which will be filtered later by # series plugin than throw out actually needed item because incorrectly # matched name was not found in the pre-filter list. if prefilter_list: if perfect_match: try: folded_name = self._simplify_name(series_name_org) except TextProcessingError as e: logger.warning( 'RSS item series name "{}" could be wrong', series_name_org) folded_name = None if folded_name and folded_name not in prefilter_list: if idx != len( rss.entries) or entries or task.no_entries_ok: logger.debug( 'Skipping "{}" as "{}" was not found in the list of configured series', item['title'], series_name_org) continue else: logger.debug( 'Force adding the last RSS item to the result to avoid warning of empty output' ) else: logger.trace( '"{}" was found in the list of configured series', series_name_org) else: logger.debug( 'Not skipping RSS item as series names may be detected incorrectly' ) if item.get('description') is None: logger.warning( 'RSS item doesn\'t have a description, skipping') continue lostfilm_id_match = RSS_LF_ID_REGEXP.search(item['description']) if lostfilm_id_match is None or lostfilm_id_match['id'] is None: logger.warning( 'RSS item doesn\'t have lostfilm id in the description: {}, skipping' .item['description']) continue lostfilm_id = int(lostfilm_id_match['id']) if not series_name_org or season_num is None or episode_num is None: if item.get('link') is None: logger.warning('RSS item doesn\'t have a link, skipping') continue link_match = RSS_LINK_REGEXP.fullmatch(item['link']) if link_match is None: logger.warning('Cannot parse RSS item link, skipping: {}', item['link']) continue series_name_org = link_match['sr_org2'].replace('_', ' ') season_num = int(link_match['season']) episode_num = int(link_match['episode']) logger.verbose( 'Using imprecise information from RSS item \'link\'') logger.trace(( 'Processing RSS entry: names: series "{}", series ru "{}", episode ru "{}"; ' 'numbers: season "{}", episode "{}", lostfilm id "{}"; perfect detect: {}' ), series_name_org, series_name_rus, episode_name_rus, season_num, episode_num, lostfilm_id, perfect_match) params = {'c': lostfilm_id, 's': season_num, 'e': episode_num} tried_urls = [] while site_urls: redirect_url = site_urls[0] + 'v_search.php' logger.trace('Trying to get the redirect page: {}', redirect_url) try: response = task.requests.get(redirect_url, params=params) if response.status_code == 200: logger.debug('The redirect page is downloaded from {}', redirect_url) break logger.verbose( 'Got status {} while retriving the redirect page {}', response.status_code, redirect_url) except RequestException as e: logger.verbose( 'Failed to get the redirect page from {}. Error: {}', redirect_url, e) except cf_exceptions as e: logger.verbose( 'Cannot bypass CF page protection to get the redirect page {}. Error: {}', redirect_url, e) except Exception as e: # Catch other errors related to download to avoid crash logger.warning( 'Got unexpected exception when trying to get the redirect page. Error: {}', redirect_url, e) response = None tried_urls.append(site_urls.pop(0)) # Use failed site locations as the last resot option for the next attempts site_urls.extend(tried_urls) if not response: if config.get('lf_session') is not None: logger.error('Failed to get the redirect page. ' \ 'Check whether "lf_session" parameter is correct.') else: logger.error('Failed to get the redirect page. ' \ 'Specify your "lf_session" cookie value in plugin parameters.') continue page = get_soup(response.content) download_page_url = None find_item = page.find('html', recursive=False) if find_item is not None: find_item = find_item.find('head', recursive=False) if find_item is not None: find_item = find_item.find('meta', attrs={'http-equiv': "refresh"}, recursive=False) if find_item is not None and find_item.has_attr( 'content') and find_item['content'].startswith( '0; url=http'): download_page_url = find_item['content'][7:] if not download_page_url: if config.get('lf_session') is not None: logger.error('Links were not foung on lostfilm.tv torrent download page. ' \ 'Check whether "lf_session" parameter is correct.') else: logger.error('Links were not foung on lostfilm.tv torrent download page. ' \ 'Specify your "lf_session" cookie value in plugin parameters.') continue try: response = task.requests.get(download_page_url) except RequestException as e: logger.error('Failed to get the download page {}. Error: {}', download_page_url, e) continue except cf_exceptions as e: logger.error( 'Cannot pass CF page protection to get the download page {}. Error: {}', download_page_url, e) continue except Exception as e: # Catch other errors related to download to avoid crash logger.error( 'Got unexpected exception when trying to get the download page {}. Error: {}', download_page_url, e) continue page = get_soup(response.content) if not perfect_match: logger.trace( 'Trying to find series names in the final torrents download page' ) find_item = page.find('div', class_='inner-box--subtitle') if find_item is not None: title_org_div = find_item.text.strip() if title_org_div.endswith( ', сериал') and len(title_org_div) != 8: series_name_org = title_org_div[:-8] else: logger.info( 'Cannot parse text on the final download page for original series name' ) else: logger.info( 'Cannot parse the final download page for original series name' ) find_item = page.find('div', class_='inner-box--title') if find_item is not None and \ find_item.text.strip(): series_name_rus = find_item.text.strip() else: logger.info( 'Cannot parse the final download page for russian series name' ) find_item = page.find('div', class_='inner-box--text') if find_item is not None: info_match = PAGE_TEXT_REGEXP.fullmatch(find_item.text.strip()) if info_match is not None: if int(info_match['season']) != season_num or int( info_match['episode']) != episode_num: logger.warning(( 'Using season number ({}) and episode number ({}) from download page instead of ' 'season number ({}) and episode number ({}) in RSS item' ), int(info_match['season']), int(info_match['episode']), season_num, episode_num) season_num = int(info_match['season']) eposode_num = int(info_match['episode']) if info_match['ep_org'] is not None: episode_name_org = info_match['ep_org'].strip() if not perfect_match and info_match['ep_rus'] is not None and \ info_match['ep_rus'].strip(): episode_name_rus = info_match['ep_rus'].strip() else: logger.info( 'Cannot parse text on the final download page for episode names' ) else: logger.info( 'Cannot parse the final download page for episode names') r_type = '' find_item = page.find('div', class_='inner-box--link main') if find_item: find_item = find_item.find('a') if find_item: info_match = PAGE_LINKMAIN_REGEXP.search(find_item.text) if info_match: r_type = info_match['tp'] logger.debug('Found rip type "{}"', r_type) if not series_name_org: find_item = item.get['title'] if find_item: logger.warning(( 'Unable to detect series name. Full RSS item title will be used in hope ' 'that series parser will be able to detect something: {}' ), find_item) series_name_org = None else: logger.error( 'Unable to detect series name. Skipping RSS item.') continue d_items = page.find_all('div', class_='inner-box--item') if not d_items: logger.error( 'No download links were found on the download page') continue episode_id = 'S{:02d}E{:02d}'.format(season_num, episode_num) for d_item in d_items: find_item = d_item.find('div', class_='inner-box--link sub').a['href'] if not find_item: logger.warning('Download item does not have a link') continue torrent_link = find_item find_item = d_item.find('div', class_='inner-box--label') if not find_item: logger.warning( 'Download item does not have quality indicator') continue lf_quality = find_item.text.strip() if quality_map.get(lf_quality): quality = quality_map.get(lf_quality) else: logger.info( 'Download item has unknown quality indicator: {}', lf_quality) quality = lf_quality if series_name_org: new_title = '.'.join([ series_name_org, episode_id, quality, r_type, 'LostFilm.TV' ]) else: new_title = '{} {}'.format(item['title'], quality).strip() new_entry = Entry() new_entry['title'] = new_title new_entry['url'] = torrent_link if series_name_org: new_entry['series_name'] = series_name_org new_entry['series_name_org'] = series_name_org if perfect_match: new_entry['series_exact'] = True new_entry['series_id'] = episode_id new_entry['series_id_type'] = 'ep' new_entry['series_season'] = season_num new_entry['series_episode'] = episode_num new_entry['series_episodes'] = 1 new_entry['season_pack'] = None new_entry['proper'] = False new_entry['proper_count'] = 0 new_entry['special'] = False new_entry['release_group'] = 'LostFilm.TV' if quality_map.get(lf_quality): if r_type: new_entry['quality'] = qualities.Quality('.'.join( [quality, r_type])) else: new_entry['quality'] = qualities.Quality(quality) if series_name_rus: new_entry['series_name_rus'] = series_name_rus if episode_name_rus: new_entry['episode_name_rus'] = episode_name_rus if episode_name_org: new_entry['episode_name_org'] = episode_name_org new_entry['lostfilm_id'] = lostfilm_id entries.append(new_entry) logger.trace(( 'Added new entry: names: series "{}", series ru "{}", episode "{}", episode ru "{}"; ' 'numbers: season "{}", episode "{}", lostfilm id "{}"; quality: "{}", perfect detect: {}' ), series_name_org, series_name_rus, episode_name_org, episode_name_rus, season_num, episode_num, lostfilm_id, quality, perfect_match) return entries
def parse(self, data=None): """Parse movie name. Populates name, year, quality and proper_count attributes""" # Reset before parsing, so the parser can be reused. self.reset() if data is None: data = self.data # Move anything in leading brackets to the end data = re.sub(r'^\[(.*?)\](.*)', r'\2 \1', data) for char in '[]()_,.': data = data.replace(char, ' ') # if there are no spaces if data.find(' ') == -1: data = data.replace('-', ' ') # remove unwanted words (imax, ..) self.remove_words(data, self.remove) data = self.strip_spaces(data) # split to parts parts = data.split(' ') cut_part = 256 all_caps = True for part_pos, part in enumerate(parts): cut = False # Don't let the first word be cutoff word if part_pos < 1: continue # check for year num = str_to_int(part) if num is not None: if 1930 < num <= datetime.now().year: if self.year_pos == cut_part: # Looks like a year, but we already set the cutpoint to a year, let's move it forward cut_part = part_pos self.year = num self.year_pos = part_pos cut = True # Don't consider all caps words cut words if the whole title has been all caps if not part.isupper(): all_caps = False # if length > 3 and whole word in uppers, consider as cut word (most likely a group name) if len(part) > 3 and part.isupper() and part.isalpha() and not all_caps: cut = True # check for cutoff words if part.lower() in self.cutoffs: cut = True # check for propers if part.lower() in self.propers: # 'real' and 'final' are too common in movie titles, only cut if it comes after year if part.lower() not in ['real', 'final'] or self.year: self.proper_count += 1 cut = True # update cut position if cut and parts.index(part) < cut_part: cut_part = part_pos if cut_part != 256: log.debug('parts: %s, cut is: %s', parts, parts[cut_part]) # calculate cut positon from cut_part abs_cut = len(' '.join(parts[:cut_part])) log.debug('after parts check, cut data would be: `%s` abs_cut: %i', data[:abs_cut], abs_cut) # parse quality quality = qualities.Quality(data) if quality: self.quality = quality # remaining string is same as data but quality information removed # find out position where there is first difference, this is earliest # quality bit, anything after that has no relevance to the movie name dp = diff_pos(data, quality.clean_text) if dp is not None: log.debug('quality start: %s', dp) if dp < abs_cut: log.debug('quality cut is even shorter') abs_cut = dp # make cut data = data[:abs_cut].strip() log.debug('data cut to `%s` - this will be the name', data) # save results self.name = data
def test_matching(self, execute_task): task = execute_task('test_matching') entry = task.find_entry('entries', title='Testfile.HDTV') assert entry.get('quality') == qualities.Quality('720p HDTV')
def test_priority(self, execute_task): task = execute_task('test_priority') entry = task.find_entry('entries', title='Testfile[h264-720p]') assert entry.get('quality') != qualities.Quality('720p h264 mp3') assert entry.get('quality') == qualities.Quality('720p h264 flac')
def test_simple(self, execute_task): task = execute_task('test_simple') entry = task.find_entry('entries', title='Testfile.noquality') assert entry.get('quality') == qualities.Quality( '720p h264'), 'Testfile.noquality quality not \'720p h264\''
def getter(self): return qualities.Quality(getattr(self, text_attr))
def test_no_clobber(self, execute_task): task = execute_task('test_no_clobber') entry = task.find_entry('entries', title='Testfile[h264-720p]') assert entry.get('quality') != qualities.Quality('720p xvid') assert entry.get('quality') == qualities.Quality('720p h264')
def parse(self, data=None, field=None, quality=None): # Clear the output variables before parsing self._reset() self.field = field self.quality = quality or qualities.Quality() if data: self.data = data if not self.name or not self.data: raise Exception('SeriesParser initialization error, name: %s data: %s' % \ (repr(self.name), repr(self.data))) name = self.remove_dirt(self.name) # check if data appears to be unwanted (abort) if self.parse_unwanted(self.remove_dirt(self.data)): return log.debug('name: %s data: %s' % (name, self.data)) # name end position name_start = 0 name_end = 0 # regexp name matching if not self.name_regexps: # if we don't have name_regexps, generate one from the name self.name_regexps = ReList([self.name_to_re(self.name)]) self.re_from_name = True # try all specified regexps on this data for name_re in self.name_regexps: match = re.search(name_re, self.data) if match: if self.re_from_name: name_start, name_end = match.span(1) else: name_start, name_end = match.span() log.debug('NAME SUCCESS: %s matched to %s' % (name_re.pattern, self.data)) break else: # leave this invalid log.debug('FAIL: name regexps %s do not match %s' % ([regexp.pattern for regexp in self.name_regexps], self.data)) return # remove series name from raw data, move any prefix to end of string data_stripped = self.data[name_end:] + ' ' + self.data[:name_start] data_stripped = data_stripped.lower() log.debug('data stripped: %s' % data_stripped) # allow group(s) if self.allow_groups: for group in self.allow_groups: group = group.lower() for fmt in ['[%s]', '-%s']: if fmt % group in data_stripped: log.debug('%s is from group %s' % (self.data, group)) self.group = group data_stripped = data_stripped.replace(fmt % group, '') break if self.group: break else: log.debug('%s is not from groups %s' % (self.data, self.allow_groups)) return # leave invalid # Find quality and clean from data log.debug('parsing quality ->') quality = qualities.Quality(data_stripped) if quality: self.quality = quality # Remove quality string from data log.debug('quality detected, using remaining data `%s`' % quality.clean_text) data_stripped = quality.clean_text # Remove unwanted words from data for ep / id parsing data_stripped = self.remove_words(data_stripped, self.remove, not_in_word=True) data_parts = re.split('[\W_]+', data_stripped) for part in data_parts[:]: if part in self.propers: self.proper_count += 1 data_parts.remove(part) elif part == 'fastsub': # Subtract 5 to leave room for fastsub propers before the normal release self.proper_count -= 5 data_parts.remove(part) elif part in self.specials: self.special = True data_parts.remove(part) data_stripped = ' '.join(data_parts).strip() log.debug("data for date/ep/id parsing '%s'" % data_stripped) # Try date mode before ep mode if self.identified_by in ['date', 'auto']: date_match = self.parse_date(data_stripped) if date_match: if self.strict_name: if date_match['match'].start() > 1: return self.id = date_match['date'] self.id_groups = date_match['match'].groups() self.id_type = 'date' self.valid = True return log.debug('-> no luck with date_regexps') if self.identified_by in ['ep', 'auto']: ep_match = self.parse_episode(data_stripped) if ep_match: # strict_name if self.strict_name: if ep_match['match'].start() > 1: return if ep_match['end_episode'] > ep_match['episode'] + 2: # This is a pack of too many episodes, ignore it. log.debug( 'Series pack contains too many episodes (%d). Rejecting' % (ep_match['end_episode'] - ep_match['episode'])) return self.season = ep_match['season'] self.episode = ep_match['episode'] if ep_match['end_episode']: self.episodes = (ep_match['end_episode'] - ep_match['episode']) + 1 else: self.episodes = 1 self.id_type = 'ep' self.valid = True return log.debug('-> no luck with ep_regexps') if self.identified_by == 'ep': # we should be getting season, ep ! # try to look up idiotic numbering scheme 101,102,103,201,202 # ressu: Added matching for 0101, 0102... It will fail on # season 11 though log.debug('expect_ep enabled') match = re.search(self.re_not_in_word(r'(0?\d)(\d\d)'), data_stripped, re.IGNORECASE | re.UNICODE) if match: # strict_name if self.strict_name: if match.start() > 1: return self.season = int(match.group(1)) self.episode = int(match.group(2)) log.debug(self) self.id_type = 'ep' self.valid = True return log.debug('-> no luck with the expect_ep') # Ep mode is done, check for unwanted ids if self.parse_unwanted_id(data_stripped): return # Check id regexps if self.identified_by in ['id', 'auto']: for id_re in self.id_regexps: match = re.search(id_re, data_stripped) if match: # strict_name if self.strict_name: if match.start() > 1: return id = '-'.join(g for g in match.groups() if g) if not id: # If match groups were all blank, don't accept this match continue self.id = id self.id_type = 'id' self.valid = True log.debug('found id \'%s\' with regexp \'%s\'' % (self.id, id_re.pattern)) return log.debug('-> no luck with id_regexps') # Check sequences last as they contain the broadest matches if self.identified_by in ['sequence', 'auto']: for sequence_re in self.sequence_regexps: match = re.search(sequence_re, data_stripped) if match: # strict_name if self.strict_name: if match.start() > 1: return # First matching group is the sequence number try: self.id = int(match.group(1)) except ValueError: self.id = self.roman_to_int(match.group(1)) self.season = 0 self.episode = self.id # If anime style version was found, overwrite the proper count with it if 'version' in match.groupdict(): if match.group('version'): self.proper_count = int(match.group('version')) - 1 self.id_type = 'sequence' self.valid = True log.debug('found id \'%s\' with regexp \'%s\'' % (self.id, sequence_re.pattern)) return log.debug('-> no luck with sequence_regexps') # No id found, check if this is a special if self.special: # Attempt to set id as the title of the special self.id = data_stripped self.id_type = 'special' self.valid = True log.debug('found special, setting id to \'%s\'' % self.id) return msg = 'Title `%s` looks like series `%s` but I cannot find ' % ( self.data, self.name) if self.identified_by == 'auto': msg += 'any series numbering.' else: msg += 'a(n) `%s` style identifier.' % self.identified_by raise ParseWarning(msg)
def on_task_input(self, task, config): """Creates an entry for each item in your couchpotato wanted list. Syntax: couchpotato: base_url: <value> port: <value> (Default is 80) api_key: <value> include_data: <value> (Boolean, default is False. Options base_url and api_key are required. When the include_data property is set to true, the """ parsedurl = urlparse(config.get('base_url')) url = '%s://%s:%s%s/api/%s/movie.list?status=active' \ % (parsedurl.scheme, parsedurl.netloc, config.get('port'), parsedurl.path, config.get('api_key')) try: json = task.requests.get(url).json() except RequestException: raise plugin.PluginError('Unable to connect to Couchpotato at %s://%s:%s%s.' % (parsedurl.scheme, parsedurl.netloc, config.get('port'), parsedurl.path)) entries = [] # Converts quality from CP format to Flexget format # TODO: Not all values have exact matches in flexget, need to update flexget qualities cp_to_flexget = {'BR-Disk': 'remux', # Not a perfect match, but as close as currently possible '1080p': '1080p', '720p': '720p', 'brrip': 'bluray', 'dvdr': 'dvdrip', # Not a perfect match, but as close as currently possible 'dvdrip': 'dvdrip', 'scr': 'dvdscr', 'r5': 'r5', 'tc': 'tc', 'ts': 'ts', 'cam': 'cam'} # Gets profile and quality lists if include_data is TRUE if config.get('include_data'): profile_url = '%s://%s:%s%s/api/%s/profile.list' \ % (parsedurl.scheme, parsedurl.netloc, config.get('port'), parsedurl.path, config.get('api_key')) try: profile_json = task.requests.get(profile_url).json() except RequestException as e: raise plugin.PluginError('Unable to connect to Couchpotato at %s://%s:%s%s. Error: %s' % (parsedurl.scheme, parsedurl.netloc, config.get('port'), parsedurl.path, e)) for movie in json['movies']: quality = '' if movie['status'] == 'active': if config.get('include_data'): for profile in profile_json['list']: if profile['_id'] == movie['profile_id']: # Matches movie profile with profile JSON # Creates a string of flexget qualities from CP's qualities list converted_list = ', '.join([cp_to_flexget[quality] for quality in profile['qualities']]) try: quality = qualities.Quality(converted_list) # Return the best components from the list except ValueError as e: log.debug(e) title = movie["title"] imdb = movie['info'].get('imdb') tmdb = movie['info'].get('tmdb_id') entry = Entry(title=title, url='', imdb_id=imdb, tmdb_id=tmdb, quality=quality) if entry.isvalid(): entries.append(entry) else: log.error('Invalid entry created? %s' % entry) # Test mode logging if entry and task.options.test: log.info("Test mode. Entry includes:") log.info(" Title: %s" % entry["title"]) log.info(" URL: %s" % entry["url"]) log.info(" IMDB ID: %s" % entry["imdb_id"]) log.info(" TMDB ID: %s" % entry["tmdb_id"]) log.info(" Quality: %s" % entry["quality"]) continue return entries