def correctWords(self, rel_name, media): media_title = fire_event('searcher.get_search_title', media, single=True) media_words = re.split('\W+', simplify_string(media_title)) rel_name = simplify_string(rel_name) rel_words = re.split('\W+', rel_name) required_words, contains_required = self.containsWords(rel_name, rel_words, 'required', media) if len(required_words) > 0 and not contains_required: log.info2('Wrong: Required word missing: %s', rel_name) return False ignored_words, contains_ignored = self.containsWords(rel_name, rel_words, 'ignored', media) if len(ignored_words) > 0 and contains_ignored: log.info2("Wrong: '%s' contains 'ignored words'", rel_name) return False # Ignore p**n stuff pron_tags = ['xxx', 'sex', 'anal', 't**s', 'f**k', 'p**n', 'orgy', 'milf', 'boobs', 'erotica', 'erotic', 'c**k', 'dick'] pron_words = list(set(rel_words) & set(pron_tags) - set(media_words)) if pron_words: log.info('Wrong: %s, probably pr0n', rel_name) return False return True
def createStringIdentifier(self, file_path, folder='', exclude_filename=False): identifier = file_path.replace(folder, '').lstrip(os.path.sep) # root folder identifier = os.path.splitext(identifier)[0] # ext # Exclude file name path if needed (f.e. for DVD files) if exclude_filename: identifier = identifier[:len(identifier) - len(os.path.split(identifier)[-1])] # Make sure the identifier is lower case as all regex is with lower case tags identifier = identifier.lower() try: path_split = split_string(identifier, os.path.sep) identifier = path_split[-2] if len(path_split) > 1 and len( path_split[-2]) > len(path_split[-1]) else path_split[ -1] # Only get filename except: pass # multipart identifier = self.removeMultipart(identifier) # remove cptag identifier = self.removeCPTag(identifier) # simplify the string identifier = simplify_string(identifier) year = self.findYear(file_path) # groups, release tags, scenename cleaner identifier = re.sub(self.clean, '::', identifier).strip(':') # Year if year and identifier[:4] != year: split_by = ':::' if ':::' in identifier else year identifier = '%s %s' % (identifier.split(split_by)[0].strip(), year) else: identifier = identifier.split('::')[0] # Remove duplicates out = [] for word in identifier.split(): if not word in out: out.append(word) identifier = ' '.join(out) return simplify_string(identifier)
def namePositionScore(nzb_name, movie_name): score = 0 nzb_words = re.split('\W+', simplify_string(nzb_name)) qualities = fire_event('quality.all', single=True) try: nzb_name = re.search(r'([\'"])[^\1]*\1', nzb_name).group(0) except: pass name_year = fire_event('scanner.name_year', nzb_name, single=True) # Give points for movies beginning with the correct name split_by = simplify_string(movie_name) name_split = [] if len(split_by) > 0: name_split = simplify_string(nzb_name).split(split_by) if name_split[0].strip() == '': score += 10 # If year is second in line, give more points if len(name_split) > 1 and name_year: after_name = name_split[1].strip() if try_int(after_name[:4]) == name_year.get('year', None): score += 10 after_name = after_name[4:] # Give -point to crap between year and quality found_quality = None for quality in qualities: # Main in words if quality['identifier'] in nzb_words: found_quality = quality['identifier'] # Alt in words for alt in quality['alternative']: if alt in nzb_words: found_quality = alt break if not found_quality: return score - 20 allowed = [] for value in name_scores: name, sc = value.split(':') allowed.append(name) inbetween = re.split('\W+', after_name.split(found_quality)[0].strip()) score -= (10 * len(set(inbetween) - set(allowed))) return score
def nameScore(name, year, preferred_words): """ Calculate score for words in the NZB name """ try: score = 0 name = name.lower() # give points for the cool stuff for value in name_scores: v = value.split(':') add = int(v.pop()) if v.pop() in name: score += add # points if the year is correct if str(year) in name: score += 5 # Contains preferred word nzb_words = re.split('\W+', simplify_string(name)) score += 100 * len(list(set(nzb_words) & set(preferred_words))) return score except: log.error('Failed doing nameScore: %s', traceback.format_exc()) return 0
def duplicateScore(nzb_name, movie_name): try: nzb_words = re.split('\W+', simplify_string(nzb_name)) movie_words = re.split('\W+', simplify_string(movie_name)) # minus for duplicates duplicates = [ x for i, x in enumerate(nzb_words) if nzb_words[i:].count(x) > 1 ] return len(list(set(duplicates) - set(movie_words))) * -4 except: log.error('Failed doing duplicateScore: %s', traceback.format_exc()) return 0
def correctName(self, check_name, movie_name): check_names = [check_name] # Match names between " try: check_names.append(re.search(r'([\'"])[^\1]*\1', check_name).group(0)) except: pass # Match longest name between [] try: check_names.append(max(re.findall(r'[^[]*\[([^]]*)\]', check_name), key = len).strip()) except: pass for check_name in remove_duplicate(check_names): check_movie = fire_event('scanner.name_year', check_name, single=True) try: check_words = remove_empty(re.split('\W+', check_movie.get('name', ''))) movie_words = remove_empty(re.split('\W+', simplify_string(movie_name))) if len(check_words) > 0 and len(movie_words) > 0 and len(list(set(check_words) - set(movie_words))) == 0: return True except: pass return False
def simplify_value(self, value): if not value: return value if isinstance(value, str): return simplify_string(value) if isinstance(value, list): return [self.simplify_value(x) for x in value] raise ValueError("Unsupported value type")
def first(self, title): title = to_unicode(title) title = simplify_string(title) for prefix in ['the ', 'an ', 'a ']: if prefix == title[:len(prefix)]: title = title[len(prefix):] break return str(title[0] if title and len(title) > 0 and title[0] in ascii_letters else '#').lower()
def nameRatioScore(nzb_name, movie_name): try: nzb_words = re.split( '\W+', fire_event('scanner.create_file_identifier', nzb_name, single=True)) movie_words = re.split('\W+', simplify_string(movie_name)) left_over = set(nzb_words) - set(movie_words) return 10 - len(left_over) except: log.error('Failed doing nameRatioScore: %s', traceback.format_exc()) return 0
def simplify(self, title): title = to_unicode(title) nr_prefix = '' if title and len( title) > 0 and title[0] in ascii_letters else '#' title = simplify_string(title) for prefix in ['the ', 'an ', 'a ']: if prefix == title[:len(prefix)]: title = title[len(prefix):] break return str(nr_prefix + title).ljust(32, ' ')[:32]
def make_key_value(self, data): if data.get('_t') == 'media' and len(data.get('title', '')) > 0: out = set() title = str(simplify_string(data.get('title').lower())) l = self.__l title_split = title.split() for x in range(len(title_split)): combo = ' '.join(title_split[x:])[:32].strip() out.add(combo.rjust(32, '_')) combo_range = max(l, min(len(combo), 32)) for cx in range(1, combo_range): ccombo = combo[:-cx].strip() if len(ccombo) > l: out.add(ccombo.rjust(32, '_')) return out, None
def correctRelease(self, nzb=None, media=None, quality=None, **kwargs): if media.get('type') != 'movie': return media_title = fire_event('searcher.get_search_title', media, single=True) imdb_results = kwargs.get('imdb_results', False) retention = Env.setting('retention', section='nzb') if nzb.get('seeders') is None and 0 < retention < nzb.get('age', 0): log.info2( 'Wrong: Outside retention, age is %s, needs %s or lower: %s', (nzb['age'], retention, nzb['name'])) return False # Check for required and ignored words if not fire_event( 'searcher.correct_words', nzb['name'], media, single=True): return False preferred_quality = quality if quality else fire_event( 'quality.single', identifier=quality['identifier'], single=True) # Contains lower quality string contains_other = fire_event('searcher.contains_other_quality', nzb, movie_year=media['info']['year'], preferred_quality=preferred_quality, single=True) if contains_other and isinstance(contains_other, dict): log.info2( 'Wrong: %s, looking for %s, found %s', (nzb['name'], quality['label'], [x for x in contains_other] if contains_other else 'no quality')) return False # Contains lower quality string if not fire_event('searcher.correct_3d', nzb, preferred_quality=preferred_quality, single=True): log.info2( 'Wrong: %s, %slooking for %s in 3D', (nzb['name'], ('' if preferred_quality['custom'].get('3d') else 'NOT '), quality['label'])) return False # File to small if nzb['size'] and try_int(preferred_quality['size_min']) > try_int( nzb['size']): log.info2( 'Wrong: "%s" is too small to be %s. %sMB instead of the minimal of %sMB.', (nzb['name'], preferred_quality['label'], nzb['size'], preferred_quality['size_min'])) return False # File to large if nzb['size'] and try_int(preferred_quality['size_max']) < try_int( nzb['size']): log.info2( 'Wrong: "%s" is too large to be %s. %sMB instead of the maximum of %sMB.', (nzb['name'], preferred_quality['label'], nzb['size'], preferred_quality['size_max'])) return False # Provider specific functions get_more = nzb.get('get_more_info') if get_more: get_more(nzb) extra_check = nzb.get('extra_check') if extra_check and not extra_check(nzb): return False if imdb_results: return True # Check if nzb contains imdb link if get_imdb(nzb.get('description', '')) == get_identifier(media): return True for raw_title in media['info']['titles']: for movie_title in possible_titles(raw_title): movie_words = re.split('\W+', simplify_string(movie_title)) if fire_event('searcher.correct_name', nzb['name'], movie_title, single=True): # if no IMDB link, at least check year range 1 if len(movie_words) > 2 and fire_event( 'searcher.correct_year', nzb['name'], media['info']['year'], 1, single=True): return True # if no IMDB link, at least check year if len(movie_words) <= 2 and fire_event( 'searcher.correct_year', nzb['name'], media['info']['year'], 0, single=True): return True log.info("Wrong: %s, undetermined naming. Looking for '%s (%s)'", (nzb['name'], media_title, media['info']['year'])) return False
def getReleaseNameYear(self, release_name, file_name=None): release_name = release_name.strip(' .-_') # Use guessit first guess = {} if file_name: try: guessit = guess_movie_info(to_unicode(file_name)) if guessit.get('title') and guessit.get('year'): guess = { 'name': guessit.get('title'), 'year': guessit.get('year'), } except: log.debug('Could not detect via guessit "%s": %s', (file_name, traceback.format_exc())) # Backup to simple release_name = os.path.basename(release_name.replace('\\', '/')) cleaned = ' '.join(re.split('\W+', simplify_string(release_name))) cleaned = re.sub(self.clean, ' ', cleaned) year = None for year_str in [file_name, release_name, cleaned]: if not year_str: continue year = self.findYear(year_str) if year: break cp_guess = {} if year: # Split name on year try: movie_name = cleaned.rsplit(year, 1).pop(0).strip() if movie_name: cp_guess = { 'name': movie_name, 'year': int(year), } except: pass if not cp_guess: # Split name on multiple spaces try: movie_name = cleaned.split(' ').pop(0).strip() cp_guess = { 'name': movie_name, 'year': int(year) if movie_name[:4] != year else 0, } except: pass if cp_guess.get('year') == guess.get('year') and len( cp_guess.get('name', '')) > len(guess.get('name', '')): cp_guess['other'] = guess return cp_guess elif guess == {}: cp_guess['other'] = guess return cp_guess guess['other'] = cp_guess return guess
def getUrl(self, url): return self.getCache(md5(simplify_string(url)), url=url)