def check_title(title, aliases, release_title, hdlr, year, years=None): try: aliases = aliases_to_array(jsloads(aliases)) except: aliases = None title_list = [] title_list_append = title_list.append if aliases: for item in aliases: try: alias = item.replace('!', '').replace('(', '').replace( ')', '').replace('&', 'and').replace(year, '') # alias = re.sub(r'[^A-Za-z0-9\s\.-]+', '', alias) if years: # for movies only, scraper to pass None for episodes for i in years: alias = alias.replace(i, '') if alias in title_list: continue title_list_append(alias) except: from fenomscrapers.modules import log_utils log_utils.error() try: match = True title = title.replace('!', '').replace('(', '').replace(')', '').replace('&', 'and') # title = re.sub(r'[^A-Za-z0-9\s\.-]+', '', title) title_list_append(title) release_title = release_title_format( release_title) # converts to .lower() h = hdlr.lower() t = release_title.split(h)[0].replace(year, '').replace('(', '').replace( ')', '').replace('&', 'and') if years: for i in years: t = t.split(i)[0] t = t.split('2160p')[0].split('4k')[0].split('1080p')[0].split( '720p')[0] if all(cleantitle.get(i) != cleantitle.get(t) for i in title_list): match = False if years: # for movies only, scraper to pass None for episodes if not any(value in release_title for value in years): match = False else: if h not in release_title: match = False return match except: from fenomscrapers.modules import log_utils log_utils.error() return match
def search(self, title, year): try: url = urljoin(self.base_link, self.search_link % (quote_plus(title))) # r = self.scraper.get(url, headers=self.headers).content r = py_tools.ensure_str(self.scraper.get( url, headers=self.headers).content, errors='replace') # switch to client.parseDOM() to rid import if not r: return None r = dom_parser.parse_dom(r, 'div', {'class': 'list_items'})[0] r = dom_parser.parse_dom(r.content, 'li') r = [(dom_parser.parse_dom(i, 'a', {'class': 'title'})) for i in r] r = [(i[0].attrs['href'], i[0].content) for i in r] r = [(urljoin(self.base_link, i[0])) for i in r if cleantitle.get(title) in cleantitle.get(i[1]) and year in i[1]] if r: return r[0] else: return None except: return None
def filter_show_pack(show_title, aliases, imdb, year, season, release_title, total_seasons): try: aliases = aliases_to_array(jsloads(aliases)) except: aliases = None title_list = [] title_list_append = title_list.append if aliases: for item in aliases: try: alias = item.replace('!', '').replace('(', '').replace( ')', '').replace('&', 'and').replace(year, '') # alias = re.sub(r'[^A-Za-z0-9\s\.-]+', '', alias) if alias in title_list: continue title_list_append(alias) except: from fenomscrapers.modules import log_utils log_utils.error() try: show_title = show_title.replace('!', '').replace('(', '').replace( ')', '').replace('&', 'and') # show_title = re.sub(r'[^A-Za-z0-9\s\.-]+', '', show_title) title_list_append(show_title) split_list = [ '.all.seasons', 'seasons', 'season', 'the.complete', 'complete', 'all.torrent', 'total.series', 'tv.series', 'series', 'edited', 's1', 's01', year ] #s1 or s01 used so show pack only kept that begin with 1 release_title = release_title_format(release_title) t = release_title.replace('-', '.') for i in split_list: t_split = t.split t = t_split(i)[0] if all(cleantitle.get(x) != cleantitle.get(t) for x in title_list): return False, 0 # remove single episodes(returned in single ep scrape) episode_regex = [ r's\d{1,3}e\d{1,3}', r's[0-3]{1}[0-9]{1}[.-]e\d{1,2}', r's\d{1,3}[.-]\d{1,3}e\d{1,3}', r'season[.-]?\d{1,3}[.-]?ep[.-]?\d{1,3}', r'season[.-]?\d{1,3}[.-]?episode[.-]?\d{1,3}' ] for item in episode_regex: if bool(re.search(item, release_title)): return False, 0 # remove season ranges that do not begin at 1 season_range_regex = [ r'(?:season|seasons|s)[.-]?(?:0?[2-9]{1}|[1-3]{1}[0-9]{1})(?:[.-]?to[.-]?|[.-]?thru[.-]?|[.-])(?:season|seasons|s|)[.-]?(?:0?[3-9]{1}(?!\d{2}p)|[1-3]{1}[0-9]{1}(?!\d{2}p))' ] # seasons.5-6, seasons5.to.6, seasons.5.thru.6, season.2-9.s02-s09.1080p for item in season_range_regex: if bool(re.search(item, release_title)): return False, 0 # remove single seasons - returned in seasonPack scrape season_regex = [ r'season[.-]?([1-9]{1})[.-]0{1}\1[.-]?complete', # "season.1.01.complete" when 2nd number matches the fiirst group with leading 0 r'season[.-]?([2-9]{1})[.-](?:[0-9]+)[.-]?complete', # "season.9.10.complete" when first number is >1 followed by 2 digit number r'season[.-]?\d{1,2}[.-]s\d{1,2}', # season.02.s02 r'season[.-]?\d{1,2}[.-]complete', # season.02.complete r'season[.-]?\d{1,2}[.-]\d{3,4}p{0,1}', # "season.02.1080p" and no seperator "season02.1080p" r'season[.-]?\d{1,2}[.-](?!thru|to|\d{1,2}[.-])', # "season.02." or "season.1" not followed by "to", "thru", or another single or 2 digit number then a dot(which would be a range) r'season[.-]?\d{1,2}[.]?$', # end of line ex."season.1", "season.01", "season01" can also have trailing dot or end of line(dash would be a range) r'season[.-]?\d{1,2}[.-](?:19|20)[0-9]{2}', # single season followed by 4 digit year ex."season.1.1971", "season.01.1971", or "season01.1971" r'season[.-]?\d{1,2}[.-]\d{3}[.-]{1,2}(?:19|20)[0-9]{2}', # single season followed by 3 digits then 4 digit year ex."season.1.004.1971" or "season.01.004.1971" (comic book format) r'(?<!thru)(?<!to)(?<!\d{2})[.-]s\d{2}[.-]complete', # ".s01.complete" not preceded by "thru", "to", or 2 digit number r'(?<!thru)(?<!to)(?<!s\d{2})[.-]s\d{2}(?![.-]thru)(?![.-]to)(?![.-]s\d{2})(?![.-]\d{2})' # .s02. not preceded by "thru", "to", or "s01". Not followed by ".thru", ".to", ".s02", "-s02", ".02.", or "-02." ] for item in season_regex: if bool(re.search(item, release_title)): return False, 0 # remove spelled out single seasons season_regex = [] [ season_regex.append(r'(complete[.-]%s[.-]season)' % x) for x in season_ordinal_list ] [ season_regex.append(r'(complete[.-]%s[.-]season)' % x) for x in season_ordinal2_list ] [season_regex.append(r'(season[.-]%s)' % x) for x in season_list] for item in season_regex: if bool(re.search(item, release_title)): return False, 0 # from here down we don't filter out, we set and pass "last_season" it covers for the range and addon can filter it so the db will have full valid showPacks. # set last_season for range type ex "1.2.3.4" or "1.2.3.and.4" (dots or dashes) dot_release_title = release_title.replace('-', '.') dot_season_ranges = [] all_seasons = '1' season_count = 2 while season_count <= int(total_seasons): dot_season_ranges.append(all_seasons + '.and.%s' % str(season_count)) all_seasons += '.%s' % str(season_count) dot_season_ranges.append(all_seasons) season_count += 1 if any(i in dot_release_title for i in dot_season_ranges): keys = [i for i in dot_season_ranges if i in dot_release_title] last_season = int(keys[-1].split('.')[-1]) return True, last_season # "1.to.9" type range filter (dots or dashes) to_season_ranges = [] start_season = '1' season_count = 2 while season_count <= int(total_seasons): to_season_ranges.append(start_season + '.to.%s' % str(season_count)) season_count += 1 if any(i in dot_release_title for i in to_season_ranges): keys = [i for i in to_season_ranges if i in dot_release_title] last_season = int(keys[0].split('to.')[1]) return True, last_season # "1.thru.9" range filter (dots or dashes) thru_ranges = [i.replace('to', 'thru') for i in to_season_ranges] if any(i in dot_release_title for i in thru_ranges): keys = [i for i in thru_ranges if i in dot_release_title] last_season = int(keys[0].split('thru.')[1]) return True, last_season # "1-9" range filter dash_ranges = [i.replace('.to.', '-') for i in to_season_ranges] if any(i in release_title for i in dash_ranges): keys = [i for i in dash_ranges if i in release_title] last_season = int(keys[0].split('-')[1]) return True, last_season # "1~9" range filter tilde_ranges = [i.replace('.to.', '~') for i in to_season_ranges] if any(i in release_title for i in tilde_ranges): keys = [i for i in tilde_ranges if i in release_title] last_season = int(keys[0].split('~')[1]) return True, last_season # "01.to.09" 2 digit range filter (dots or dashes) to_season_ranges = [] start_season = '01' season_count = 2 while season_count <= int(total_seasons): to_season_ranges.append( start_season + '.to.%s' % '0' + str(season_count) if int(season_count) < 10 else start_season + '.to.%s' % str(season_count)) season_count += 1 if any(i in dot_release_title for i in to_season_ranges): keys = [i for i in to_season_ranges if i in dot_release_title] last_season = int(keys[0].split('to.')[1]) return True, last_season # "01.thru.09" 2 digit range filter (dots or dashes) thru_ranges = [i.replace('to', 'thru') for i in to_season_ranges] if any(i in dot_release_title for i in thru_ranges): keys = [i for i in thru_ranges if i in dot_release_title] last_season = int(keys[0].split('thru.')[1]) return True, last_season # "01-09" 2 digit range filtering dash_ranges = [i.replace('.to.', '-') for i in to_season_ranges] if any(i in release_title for i in dash_ranges): keys = [i for i in dash_ranges if i in release_title] last_season = int(keys[0].split('-')[1]) return True, last_season # "01~09" 2 digit range filtering tilde_ranges = [i.replace('.to.', '~') for i in to_season_ranges] if any(i in release_title for i in tilde_ranges): keys = [i for i in tilde_ranges if i in release_title] last_season = int(keys[0].split('~')[1]) return True, last_season # "s1.to.s9" single digit range filter (dots or dashes) to_season_ranges = [] start_season = 's1' season_count = 2 while season_count <= int(total_seasons): to_season_ranges.append(start_season + '.to.s%s' % str(season_count)) season_count += 1 if any(i in dot_release_title for i in to_season_ranges): keys = [i for i in to_season_ranges if i in dot_release_title] last_season = int(keys[0].split('to.s')[1]) return True, last_season # "s1.thru.s9" single digit range filter (dots or dashes) thru_ranges = [i.replace('to', 'thru') for i in to_season_ranges] if any(i in dot_release_title for i in thru_ranges): keys = [i for i in thru_ranges if i in dot_release_title] last_season = int(keys[0].split('thru.s')[1]) return True, last_season # "s1-s9" single digit range filtering (dashes) dash_ranges = [i.replace('.to.', '-') for i in to_season_ranges] if any(i in release_title for i in dash_ranges): keys = [i for i in dash_ranges if i in release_title] last_season = int(keys[0].split('-s')[1]) return True, last_season # "s1~s9" single digit range filtering (dashes) tilde_ranges = [i.replace('.to.', '~') for i in to_season_ranges] if any(i in release_title for i in tilde_ranges): keys = [i for i in tilde_ranges if i in release_title] last_season = int(keys[0].split('~s')[1]) return True, last_season # "s01.to.s09" 2 digit range filter (dots or dash) to_season_ranges = [] start_season = 's01' season_count = 2 while season_count <= int(total_seasons): to_season_ranges.append( start_season + '.to.s%s' % '0' + str(season_count) if int(season_count) < 10 else start_season + '.to.s%s' % str(season_count)) season_count += 1 if any(i in dot_release_title for i in to_season_ranges): keys = [i for i in to_season_ranges if i in dot_release_title] last_season = int(keys[0].split('to.s')[1]) return True, last_season # "s01.thru.s09" 2 digit range filter (dots or dashes) thru_ranges = [i.replace('to', 'thru') for i in to_season_ranges] if any(i in dot_release_title for i in thru_ranges): keys = [i for i in thru_ranges if i in dot_release_title] last_season = int(keys[0].split('thru.s')[1]) return True, last_season # "s01-s09" 2 digit range filtering (dashes) dash_ranges = [i.replace('.to.', '-') for i in to_season_ranges] if any(i in release_title for i in dash_ranges): keys = [i for i in dash_ranges if i in release_title] last_season = int(keys[0].split('-s')[1]) return True, last_season # "s01~s09" 2 digit range filtering (dashes) tilde_ranges = [i.replace('.to.', '~') for i in to_season_ranges] if any(i in release_title for i in tilde_ranges): keys = [i for i in tilde_ranges if i in release_title] last_season = int(keys[0].split('~s')[1]) return True, last_season # "s01.s09" 2 digit range filtering (dots) dot_ranges = [i.replace('.to.', '.') for i in to_season_ranges] if any(i in release_title for i in dot_ranges): keys = [i for i in dot_ranges if i in release_title] last_season = int(keys[0].split('.s')[1]) return True, last_season return True, total_seasons except: from fenomscrapers.modules import log_utils log_utils.error()
def filter_season_pack(show_title, aliases, year, season, release_title): try: aliases = aliases_to_array(jsloads(aliases)) except: aliases = None title_list = [] title_list_append = title_list.append if aliases: for item in aliases: try: alias = item.replace('!', '').replace('(', '').replace( ')', '').replace('&', 'and').replace(year, '') # alias = re.sub(r'[^A-Za-z0-9\s\.-]+', '', alias) if alias in title_list: continue title_list_append(alias) except: from fenomscrapers.modules import log_utils log_utils.error() try: show_title = show_title.replace('!', '').replace('(', '').replace( ')', '').replace('&', 'and') # show_title = re.sub(r'[^A-Za-z0-9\s\.-]+', '', show_title) title_list_append(show_title) season_fill = season.zfill(2) season_check = '.s%s.' % season season_fill_check = '.s%s.' % season_fill season_full_check = '.season.%s.' % season season_full_check_ns = '.season%s.' % season season_full_fill_check = '.season.%s.' % season_fill season_full_fill_check_ns = '.season%s.' % season_fill string_list = [ season_check, season_fill_check, season_full_check, season_full_check_ns, season_full_fill_check, season_full_fill_check_ns ] split_list = [ season_check, season_fill_check, '.' + season + '.season', 'total.season', 'season', 'the.complete', 'complete', year ] release_title = release_title_format(release_title) t = release_title.replace('-', '.') for i in split_list: t_split = t.split t = t_split(i)[0] if all(cleantitle.get(x) != cleantitle.get(t) for x in title_list): return False # remove single episodes(returned in single ep scrape) episode_regex = [ r's\d{1,3}e\d{1,3}', r's[0-3]{1}[0-9]{1}[.-]e\d{1,2}', r's\d{1,3}[.-]\d{1,3}e\d{1,3}', r'season[.-]?\d{1,3}[.-]?ep[.-]?\d{1,3}', r'season[.-]?\d{1,3}[.-]?episode[.-]?\d{1,3}' ] for item in episode_regex: if bool(re.search(item, release_title)): return False # remove season ranges - returned in showPack scrape, plus non conforming season and specific crap rt = release_title.replace('-', '.') if any(i in rt for i in string_list): for item in [ season_check.rstrip('.') + r'[.-]s([2-9]{1}|[1-3]{1}[0-9]{1})(?:[.-]|$)', # ex. ".s1-s9.", .s1-s39. season_fill_check.rstrip('.') + r'[.-]s\d{2}(?:[.-]|$)', # ".s01-s09.", .s01-s39. season_fill_check.rstrip('.') + r'[.-]\d{2}(?:[.-]|$)', # ".s01.09." r'\Ws\d{2}\W%s' % season_fill_check.lstrip( '.'), # may need more reverse ranges season_full_check.rstrip('.') + r'[.-]to[.-]([2-9]{1}|[1-3]{1}[0-9]{1})(?:[.-]|$)', # ".season.1.to.9.", ".season.1.to.39" season_full_check.rstrip('.') + r'[.-]season[.-]([2-9]{1}|[1-3]{1}[0-9]{1})(?:[.-]|$)', # ".season.1.season.9.", ".season.1.season.39" season_full_check.rstrip('.') + r'[.-]([2-9]{1}|[1-3]{1}[0-9]{1})(?:[.-]|$)', # "season.1.9.", "season.1.39. season_full_check.rstrip('.') + r'[.-]\d{1}[.-]\d{1,2}(?:[.-]|$)', # "season.1.9.09." season_full_check.rstrip('.') + r'[.-]\d{3}[.-](?:19|20)[0-9]{2}(?:[.-]|$)', # single season followed by 3 digit followed by 4 digit year ex."season.1.004.1971" season_full_fill_check.rstrip('.') + r'[.-]\d{3}[.-]\d{3}(?:[.-]|$)', # 2 digit season followed by 3 digit dash range ex."season.10.001-025." season_full_fill_check.rstrip('.') + r'[.-]season[.-]\d{2}(?:[.-]|$)' # 2 digit season followed by 2 digit season range ex."season.01-season.09." ]: if bool(re.search(item, release_title)): return False return True return False except: from fenomscrapers.modules import log_utils log_utils.error() return True