def correctWords(self, rel_name, media): media_title = fireEvent('searcher.get_search_title', media, single = True) media_words = re.split('\W+', simplifyString(media_title)) rel_name = simplifyString(rel_name) rel_words = re.split('\W+', rel_name) required_words, contains_required = self.containsWords(rel_name, rel_words, 'required', media) if len(required_words) > 0 and not contains_required: log.info2('Wrong: Required word missing: %s', rel_name) return False ignored_words, contains_ignored = self.containsWords(rel_name, rel_words, 'ignored', media) if len(ignored_words) > 0 and contains_ignored: log.info2("Wrong: '%s' contains 'ignored words'", rel_name) return False # Ignore p**n stuff pron_tags = ['xxx', 'sex', 'anal', 't**s', 'f**k', 'p**n', 'orgy', 'milf', 'boobs', 'erotica', 'erotic', 'c**k', 'dick'] pron_words = list(set(rel_words) & set(pron_tags) - set(media_words)) if pron_words: log.info('Wrong: %s, probably pr0n', rel_name) return False return True
def namePositionScore(nzb_name, movie_name): score = 0 nzb_words = re.split('\W+', simplifyString(nzb_name)) qualities = fireEvent('quality.all', single = True) try: nzb_name = re.search(r'([\'"])[^\1]*\1', nzb_name).group(0) except: pass name_year = fireEvent('scanner.name_year', nzb_name, single = True) # Give points for movies beginning with the correct name split_by = simplifyString(movie_name) name_split = [] if len(split_by) > 0: name_split = simplifyString(nzb_name).split(split_by) if name_split[0].strip() == '': score += 10 # If year is second in line, give more points if len(name_split) > 1 and name_year: after_name = name_split[1].strip() if tryInt(after_name[:4]) == name_year.get('year', None): score += 10 after_name = after_name[4:] # Give -point to crap between year and quality found_quality = None for quality in qualities: # Main in words if quality['identifier'] in nzb_words: found_quality = quality['identifier'] # Alt in words for alt in quality['alternative']: if alt in nzb_words: found_quality = alt break if not found_quality: return score - 20 allowed = [] for value in name_scores: name, sc = value.split(':') allowed.append(name) inbetween = re.split('\W+', after_name.split(found_quality)[0].strip()) score -= (10 * len(set(inbetween) - set(allowed))) return score
def possibleTitles(raw_title): titles = [ toSafeString(raw_title).lower(), raw_title.lower(), simplifyString(raw_title) ] # replace some chars new_title = raw_title.replace('&', 'and') titles.append(simplifyString(new_title)) return removeDuplicate(titles)
def duplicateScore(nzb_name, movie_name): try: nzb_words = re.split('\W+', simplifyString(nzb_name)) movie_words = re.split('\W+', simplifyString(movie_name)) # minus for duplicates duplicates = [x for i, x in enumerate(nzb_words) if nzb_words[i:].count(x) > 1] return len(list(set(duplicates) - set(movie_words))) * -4 except: log.error('Failed doing duplicateScore: %s', traceback.format_exc()) return 0
def correctName(self, check_name, movie_name): check_names = [check_name] # Match names between " try: check_names.append(re.search(r'([\'"])[^\1]*\1', check_name).group(0)) except: pass # Match longest name between [] try: check_names.append(max(re.findall(r'[^[]*\[([^]]*)\]', check_name), key = len).strip()) except: pass for check_name in removeDuplicate(check_names): check_movie = fireEvent('scanner.name_year', check_name, single = True) try: check_words = removeEmpty(re.split('\W+', check_movie.get('name', ''))) movie_words = removeEmpty(re.split('\W+', simplifyString(movie_name))) if len(check_words) > 0 and len(movie_words) > 0 and len(list(set(check_words) - set(movie_words))) == 0: return True except: pass return False
def nameScore(name, year, preferred_words): """ Calculate score for words in the NZB name """ try: score = 0 name = name.lower() # give points for the cool stuff for value in name_scores: v = value.split(':') add = int(v.pop()) if v.pop() in name: score += add # points if the year is correct if str(year) in name: score += 5 # Contains preferred word nzb_words = re.split('\W+', simplifyString(name)) score += 100 * len(list(set(nzb_words) & set(preferred_words))) return score except: log.error('Failed doing nameScore: %s', traceback.format_exc()) return 0
def createStringIdentifier(self, file_path, folder = '', exclude_filename = False): identifier = file_path.replace(folder, '').lstrip(os.path.sep) # root folder identifier = os.path.splitext(identifier)[0] # ext # Exclude file name path if needed (f.e. for DVD files) if exclude_filename: identifier = identifier[:len(identifier) - len(os.path.split(identifier)[-1])] # Make sure the identifier is lower case as all regex is with lower case tags identifier = identifier.lower() try: path_split = splitString(identifier, os.path.sep) identifier = path_split[-2] if len(path_split) > 1 and len(path_split[-2]) > len(path_split[-1]) else path_split[-1] # Only get filename except: pass # multipart identifier = self.removeMultipart(identifier) # remove cptag identifier = self.removeCPTag(identifier) # simplify the string identifier = simplifyString(identifier) year = self.findYear(file_path) # groups, release tags, scenename cleaner identifier = re.sub(self.clean, '::', identifier).strip(':') # Year if year and identifier[:4] != year: split_by = ':::' if ':::' in identifier else year identifier = '%s %s' % (identifier.split(split_by)[0].strip(), year) else: identifier = identifier.split('::')[0] # Remove duplicates out = [] for word in identifier.split(): if not word in out: out.append(word) identifier = ' '.join(out) return simplifyString(identifier)
def first(self, title): title = toUnicode(title) title = simplifyString(title) for prefix in ['the ', 'an ', 'a ']: if prefix == title[:len(prefix)]: title = title[len(prefix):] break return str(title[0] if title and len(title) > 0 and title[0] in ascii_letters else '#').lower()
def nameRatioScore(nzb_name, movie_name): try: nzb_words = re.split('\W+', fireEvent('scanner.create_file_identifier', nzb_name, single = True)) movie_words = re.split('\W+', simplifyString(movie_name)) left_over = set(nzb_words) - set(movie_words) return 10 - len(left_over) except: log.error('Failed doing nameRatioScore: %s', traceback.format_exc()) return 0
def simplifyValue(self, value): if not value: return value if isinstance(value, basestring): return simplifyString(value) if isinstance(value, list): return [self.simplifyValue(x) for x in value] raise ValueError("Unsupported value type")
def simplify(self, title): title = toUnicode(title) nr_prefix = '' if title and len(title) > 0 and title[0] in ascii_letters else '#' title = simplifyString(title) for prefix in ['the ', 'an ', 'a ']: if prefix == title[:len(prefix)]: title = title[len(prefix):] break return str(nr_prefix + title).ljust(32, ' ')[:32]
def make_key_value(self, data): if data.get('_t') == 'media' and len(data.get('title', '')) > 0: out = set() title = str(simplifyString(data.get('title').lower())) l = self.__l title_split = title.split() for x in range(len(title_split)): combo = ' '.join(title_split[x:])[:32].strip() out.add(combo.rjust(32, '_')) combo_range = max(l, min(len(combo), 32)) for cx in range(1, combo_range): ccombo = combo[:-cx].strip() if len(ccombo) > l: out.add(ccombo.rjust(32, '_')) return out, None
def getImdb(txt, check_inside = False, multiple = False): if not check_inside: txt = simplifyString(txt) else: txt = ss(txt) if check_inside and os.path.isfile(txt): output = open(txt, 'r') txt = output.read() output.close() try: ids = re.findall('(tt\d{4,7})', txt) if multiple: return removeDuplicate(['tt%07d' % tryInt(x[2:]) for x in ids]) if len(ids) > 0 else [] return 'tt%07d' % tryInt(ids[0][2:]) except IndexError: pass return False
def getUrl(self, url): return self.getCache(md5(simplifyString(url)), url = url)
def getReleaseNameYear(self, release_name, file_name = None): release_name = release_name.strip(' .-_') # Use guessit first guess = {} if file_name: try: guessit = guess_movie_info(toUnicode(file_name)) if guessit.get('title') and guessit.get('year'): guess = { 'name': guessit.get('title'), 'year': guessit.get('year'), } except: log.debug('Could not detect via guessit "%s": %s', (file_name, traceback.format_exc())) # Backup to simple release_name = os.path.basename(release_name.replace('\\', '/')) cleaned = ' '.join(re.split('\W+', simplifyString(release_name))) cleaned = re.sub(self.clean, ' ', cleaned) year = None for year_str in [file_name, release_name, cleaned]: if not year_str: continue year = self.findYear(year_str) if year: break cp_guess = {} if year: # Split name on year try: movie_name = cleaned.rsplit(year, 1).pop(0).strip() if movie_name: cp_guess = { 'name': movie_name, 'year': int(year), } except: pass if not cp_guess: # Split name on multiple spaces try: movie_name = cleaned.split(' ').pop(0).strip() cp_guess = { 'name': movie_name, 'year': int(year) if movie_name[:4] != year else 0, } except: pass if cp_guess.get('year') == guess.get('year') and len(cp_guess.get('name', '')) > len(guess.get('name', '')): cp_guess['other'] = guess return cp_guess elif guess == {}: cp_guess['other'] = guess return cp_guess guess['other'] = cp_guess return guess
def correctRelease(self, nzb = None, media = None, quality = None, **kwargs): if media.get('type') != 'movie': return media_title = fireEvent('searcher.get_search_title', media, single = True) imdb_results = kwargs.get('imdb_results', False) retention = Env.setting('retention', section = 'nzb') if nzb.get('seeders') is None and 0 < retention < nzb.get('age', 0): log.info2('Wrong: Outside retention, age is %s, needs %s or lower: %s', (nzb['age'], retention, nzb['name'])) return False # Check for required and ignored words if not fireEvent('searcher.correct_words', nzb['name'], media, single = True): return False preferred_quality = quality if quality else fireEvent('quality.single', identifier = quality['identifier'], single = True) # Contains lower quality string contains_other = fireEvent('searcher.contains_other_quality', nzb, movie_year = media['info']['year'], preferred_quality = preferred_quality, single = True) if contains_other and isinstance(contains_other, dict): log.info2('Wrong: %s, looking for %s, found %s', (nzb['name'], quality['label'], [x for x in contains_other] if contains_other else 'no quality')) return False # Contains lower quality string if not fireEvent('searcher.correct_3d', nzb, preferred_quality = preferred_quality, single = True): log.info2('Wrong: %s, %slooking for %s in 3D', (nzb['name'], ('' if preferred_quality['custom'].get('3d') else 'NOT '), quality['label'])) return False # File to small if nzb['size'] and tryInt(preferred_quality['size_min']) > tryInt(nzb['size']): log.info2('Wrong: "%s" is too small to be %s. %sMB instead of the minimal of %sMB.', (nzb['name'], preferred_quality['label'], nzb['size'], preferred_quality['size_min'])) return False # File to large if nzb['size'] and tryInt(preferred_quality['size_max']) < tryInt(nzb['size']): log.info2('Wrong: "%s" is too large to be %s. %sMB instead of the maximum of %sMB.', (nzb['name'], preferred_quality['label'], nzb['size'], preferred_quality['size_max'])) return False # Provider specific functions get_more = nzb.get('get_more_info') if get_more: get_more(nzb) extra_check = nzb.get('extra_check') if extra_check and not extra_check(nzb): return False if imdb_results: return True # Check if nzb contains imdb link if getImdb(nzb.get('description', '')) == getIdentifier(media): return True for raw_title in media['info']['titles']: for movie_title in possibleTitles(raw_title): movie_words = re.split('\W+', simplifyString(movie_title)) if fireEvent('searcher.correct_name', nzb['name'], movie_title, single = True): # if no IMDB link, at least check year range 1 if len(movie_words) > 2 and fireEvent('searcher.correct_year', nzb['name'], media['info']['year'], 1, single = True): return True # if no IMDB link, at least check year if len(movie_words) <= 2 and fireEvent('searcher.correct_year', nzb['name'], media['info']['year'], 0, single = True): return True log.info("Wrong: %s, undetermined naming. Looking for '%s (%s)'", (nzb['name'], media_title, media['info']['year'])) return False