def get_sources(self, item): try: quality, info = source_utils.get_release_quality(item[1], item[2]) if item[3] != '0': info.insert(0, item[3]) info = ' | '.join(info) data = client.request(item[2], timeout='10') data = client.parseDOM(data, 'a', ret='href') if not data: return url = [i for i in data if 'magnet:' in i][0] url = unquote_plus(url).replace('&', '&').replace(' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] self.sources.append({ 'provider': '1337x', 'source': 'torrent', 'seeders': item[5], 'hash': hash, 'name': item[0], 'name_info': item[1], 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': item[4] }) except: source_utils.scraper_error('1337X')
def sources(self, data, hostDict): sources = [] if not data: return sources try: title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') aliases = data['aliases'] episode_title = data['title'] if 'tvshowtitle' in data else None year = data['year'] hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else year query = re.sub(r'[^A-Za-z0-9\s\.-]+', '', '%s %s' % (title, hdlr)) url = '%s%s' % (self.base_link, self.search_link % quote_plus(query)) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = client.request(url, timeout='5') if not r: return sources if any(value in str(r) for value in ['something went wrong', 'Connection timed out', '521: Web server is down', '503 Service Unavailable']): return sources table = client.parseDOM(r, 'tbody') rows = client.parseDOM(table, 'tr') except: source_utils.scraper_error('BITCQ') return sources for row in rows: try: if 'magnet' not in row: continue url = re.findall(r'href="(magnet:.+?)"', row, re.DOTALL)[0] url = unquote_plus(url).replace('&', '&').replace(' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.search(r'btih:(.*?)&', url, re.I).group(1) name = source_utils.clean_name(url.split('&dn=')[1]) if not source_utils.check_title(title, aliases, name, hdlr, year): continue name_info = source_utils.info_from_name(name, title, year, hdlr, episode_title) if source_utils.remove_lang(name_info): continue if not episode_title: # filter for eps returned in movie query (rare but movie and show exists for Run in 2020) ep_strings = [r'(?:\.|\-)s\d{2}e\d{2}(?:\.|\-|$)', r'(?:\.|\-)s\d{2}(?:\.|\-|$)', r'(?:\.|\-)season(?:\.|\-)\d{1,2}(?:\.|\-|$)'] if any(re.search(item, name.lower()) for item in ep_strings): continue try: seeders = int(re.search(r'<td>(\d+)<', row).group(1)) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = re.search(r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', row).group(0) dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) sources.append({'provider': 'bitcq', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('BITCQ') return sources return sources
def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link), __name__, log_utils.LOGDEBUG) try: r = client.request(link, timeout='5') if not r: return if any(value in r for value in ['something went wrong', 'Connection timed out', '521: Web server is down', '503 Service Unavailable']): return sources table = client.parseDOM(r, 'table', attrs={'id': 'table'}) table_body = client.parseDOM(table, 'tbody') rows = client.parseDOM(table_body, 'tr') except: source_utils.scraper_error('TORRENTZ2') return for row in rows: try: if 'magnet:' not in row: continue url = re.findall(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', row, re.DOTALL | re.I)[0] url = unquote_plus(url).replace('&', '&').replace(' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(name) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack(self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack(self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: # seeders = int(client.parseDOM(row, 'td', attrs={'data-title': 'Seeds'})[0]) seeders = int(client.parseDOM(row, 'td', attrs={'data-title': 'Last Updated'})[0]) #keep an eye on this, looks like they gaffed their col's (seeders and size) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = re.findall(r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', row.replace(u'\xa0', u' ').replace(u' ', u' '))[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = {'provider': 'torrentz2', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package} if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('TORRENTZ2')
def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link), __name__, log_utils.LOGDEBUG) try: r = client.request(link, timeout='5') if not r: return if any(value in str(r) for value in ['something went wrong', 'Connection timed out', '521: Web server is down', '503 Service Unavailable']): return table = client.parseDOM(r, 'tbody') rows = client.parseDOM(table, 'tr') except: source_utils.scraper_error('BITCQ') return for row in rows: try: if 'magnet' not in row: continue url = re.findall(r'href="(magnet:.+?)"', row, re.DOTALL)[0] url = unquote_plus(url).replace('&', '&').replace(' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.search(r'btih:(.*?)&', url, re.I).group(1) name = source_utils.clean_name(url.split('&dn=')[1]) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack(self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack(self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int(re.search(r'<td>(\d+)<', row).group(1)) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = re.search(r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', row).group(0) dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = {'provider': 'bitcq', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package} if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('BITCQ')
def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link), __name__, log_utils.LOGDEBUG) try: r = py_tools.ensure_str(self.scraper.get(link).content, errors='replace') if not r: return posts = client.parseDOM(r, 'div', attrs={'class': 'tgxtable'}) if not posts: return except: source_utils.scraper_error('TORRENTGALAXY') return for post in posts: try: links = zip( re.findall(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', post, re.DOTALL | re.I), re.findall(r'<span\s*class\s*=\s*["\']badge\s*badge-secondary["\']\s*style\s*=\s*["\']border-radius:4px;["\']>(.*?)</span>', post, re.DOTALL | re.I), re.findall(r'<span\s*title\s*=\s*["\']Seeders/Leechers["\']>\[<font\s*color\s*=\s*["\']green["\']><b>(.*?)<', post, re.DOTALL | re.I)) for link in links: url = unquote_plus(link[0]).split('&tr')[0].replace(' ', '.') url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(name) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack(self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack(self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int(link[2]) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: dsize, isize = source_utils._size(link[1]) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = {'provider': 'torrentgalaxy', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package} if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('TORRENTGALAXY')
def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link), __name__, log_utils.LOGDEBUG) try: r = client.request(link, timeout='5') if not r: return posts = client.parseDOM(r, 'div', attrs={'class': 'media'}) except: source_utils.scraper_error('BTDB') return for post in posts: try: if 'magnet:' not in post: continue url = re.findall(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', post, re.DOTALL | re.I)[0] url = unquote_plus(url).replace('&', '&').replace(' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable(url) if url in str(self.sources): return hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(name) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack(self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack(self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int(re.findall(r'Seeders.*?["\']>([0-9]+|[0-9]+,[0-9]+)</strong>', post, re.DOTALL | re.I)[0].replace(',', '')) if self.min_seeders > seeders: return except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = re.findall(r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', post)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = {'provider': 'btdb', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package} if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('BTDB')
def get_sources(self, url): try: r = client.request(url, timeout='5') if not r: return posts = client.parseDOM(r, 'div', attrs={'class': 'media'}) except: source_utils.scraper_error('BTDB') return for post in posts: try: if 'magnet:' not in post: continue url = re.findall(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', post, re.DOTALL | re.I)[0] url = unquote_plus(url).replace('&', '&').replace(' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(name) if not source_utils.check_title(self.title, self.aliases, name, self.hdlr, self.year): continue name_info = source_utils.info_from_name(name, self.title, self.year, self.hdlr, self.episode_title) if source_utils.remove_lang(name_info): continue if not self.episode_title: #filter for eps returned in movie query (rare but movie and show exists for Run in 2020) ep_strings = [r'[.-]s\d{2}e\d{2}([.-]?)', r'[.-]s\d{2}([.-]?)', r'[.-]season[.-]?\d{1,2}[.-]?'] if any(re.search(item, name.lower()) for item in ep_strings): continue try: seeders = int(re.findall(r'Seeders.*?["\']>([0-9]+|[0-9]+,[0-9]+)</strong>', post, re.DOTALL | re.I)[0].replace(',', '')) if self.min_seeders > seeders: return except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = re.findall(r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', post)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) self.sources.append({'provider': 'btdb', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('BTDB')
def get_sources(self, link): try: url = re.compile(r'href\s*=\s*["\'](.+?)["\']', re.I).findall(link)[0] url = urljoin(self.base_link, url) result = client.request(url, timeout='10') if not result or 'magnet' not in result: return url = re.findall(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', result, re.DOTALL | re.I)[0] url = unquote_plus(url).replace('&', '&').replace(' ', '.').split('&xl=')[0] url = source_utils.strip_non_ascii_and_unprintable(url) if url in str(self.sources): return hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(name) if not source_utils.check_title(self.title, self.aliases, name, self.hdlr, self.year): return name_info = source_utils.info_from_name(name, self.title, self.year, self.hdlr, self.episode_title) if source_utils.remove_lang(name_info): return if not self.episode_title: #filter for eps returned in movie query (rare but movie and show exists for Run in 2020) ep_strings = [r'[.-]s\d{2}e\d{2}([.-]?)', r'[.-]s\d{2}([.-]?)', r'[.-]season[.-]?\d{1,2}[.-]?'] if any(re.search(item, name.lower()) for item in ep_strings): return try: seeders = int(re.findall(r'>Seeds:.*?["\']>([0-9]+|[0-9]+,[0-9]+)</', result, re.DOTALL | re.I)[0].replace(',', '')) if self.min_seeders > seeders: return except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = re.findall(r'>Total Size:.*>(\d.*?)<', result, re.I)[0].strip() dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) self.sources.append({'provider': 'ettv', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('ETTV')
def sources(self, data, hostDict): sources = [] if not data: return sources try: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') aliases = data['aliases'] episode_title = data['title'] if 'tvshowtitle' in data else None year = data['year'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else year query = '%s %s' % (title, hdlr) query = re.sub(r'(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = '%s%s' % (self.base_link, self.search_link % quote_plus(query)) # log_utils.log('url = %s' % url, __name__, log_utils.LOGDEBUG) r = client.request(url, timeout='5') if not r or 'Error 404' in r: return sources r = client.parseDOM(r, 'div', attrs={'id': 'content'}) r1 = client.parseDOM(r, 'h2') posts = zip(client.parseDOM(r1, 'a', ret='href'), client.parseDOM(r1, 'a')) except: source_utils.scraper_error('MYVIDEOLINK') return sources items = [] for post in posts: try: name = source_utils.strip_non_ascii_and_unprintable(post[1]) if '<' in name: name = re.sub(r'<.*?>', '', name) name = client.replaceHTMLCodes(name) name = source_utils.clean_name(name) if 'tvshowtitle' in data: if not source_utils.check_title(title, aliases, name, hdlr, year): if not source_utils.check_title( title, aliases, name, 'S%02d' % int(data['season']), year): if not source_utils.check_title( title, aliases, name, 'Season.%d' % int(data['season']), year): if not source_utils.check_title( title, aliases, name, 'S%d' % int(data['season']), year): continue else: if not source_utils.check_title(title, aliases, name, hdlr, year): continue name_info = source_utils.info_from_name( name, title, year, hdlr, episode_title) link = post[0] results = client.request(link, timeout='5') results = client.parseDOM(results, 'div', attrs={'class': 'entry-content cf'})[0] if 'tvshowtitle' in data: isSeasonList = False if 'Season' in name or 'S%02d' % int( data['season']) in name: isSeasonList = True results = re.sub(r'\n', '', results) results = re.sub(r'\t', '', results).replace('> <', '><') test = re.findall( r'<p><b>(.*?)</ul>', results, re.DOTALL ) # parsing this site for episodes is a bitch, f**k it this is close as I'm doing for x in test: test2 = re.search(r'(.*?)</b>', x).group(1) if hdlr in test2: if isSeasonList: name = re.sub(r'\.Season\.\d+', '.%s.' % test2.replace(' ', '.'), name) name = re.sub(r'\.S\d+', '.%s' % test2.replace(' ', '.'), name) else: name = test2 links = client.parseDOM(x, 'a', ret='href') break else: try: test3 = re.search(r'<p><b>(.*?)</b></p>', x).group(1) except: continue if hdlr in test3: if isSeasonList: name = re.sub( r'\.Season\.\d+', '.%s.' % test3.replace(' ', '.'), name) name = re.sub( r'\.S\d+', '.%s' % test3.replace(' ', '.'), name) else: name = test3 links = client.parseDOM(x, 'a', ret='href') break else: links = client.parseDOM(results, 'a', attrs={'class': 'autohyperlink'}, ret='href') for link in links: try: url = py_tools.ensure_text(client.replaceHTMLCodes( str(link)), errors='replace') if url.endswith(('.rar', '.zip', '.iso', '.part', '.png', '.jpg', '.bmp', '.gif')): continue if url in str(sources): continue valid, host = source_utils.is_host_valid(url, hostDict) if not valid: continue quality, info = source_utils.get_release_quality( name_info, url) try: size = re.search( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', results).group(0) dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) sources.append({ 'provider': 'myvideolink', 'source': host, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('MYVIDEOLINK') except: source_utils.scraper_error('MYVIDEOLINK') return sources
def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link), __name__, log_utils.LOGDEBUG) try: r = client.request(link, timeout='5') if not r or '<table' not in r: return table = client.parseDOM(r, 'table', attrs={'class': 'rtable'}) rows = client.parseDOM(table, 'tr') except: source_utils.scraper_error('BTSCENE') return for row in rows: try: if 'magnet:' not in row: continue url = re.search(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', row, re.I).group(1) url = unquote_plus(url).replace('&', '&').replace( ' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.search(r'btih:(.*?)&', url, re.I).group(1) name = url.split('&dn=')[1] name = source_utils.clean_name(name) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack( self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack( self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int( client.parseDOM(row, 'td', attrs={ 'class': 'seeds is-hidden-sm-mobile' })[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: size = re.search( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', row).group(0) dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = { 'provider': 'btscene', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package } if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('BTSCENE')
def sources(self, url, hostDict): sources = [] if not url: return sources try: data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') aliases = data['aliases'] year = data['year'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else year hdlr2 = 'S%d - %d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else year query = '%s %s' % (title, hdlr) query = re.sub(r'(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) query2 = '%s %s' % (title, hdlr2) query2 = re.sub(r'(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query2) urls = [] url = self.search_link % quote_plus(query) url = urljoin(self.base_link, url) urls.append(url) url2 = self.search_link % quote_plus(query2) url2 = urljoin(self.base_link, url2) urls.append(url2) # log_utils.log('urls = %s' % urls, log_utils.LOGDEBUG) except: source_utils.scraper_error('NYYAA') return sources for url in urls: try: r = client.request(url, timeout='5') if not r or 'magnet' not in r: return sources r = re.sub(r'\n', '', r) r = re.sub(r'\t', '', r) tbody = client.parseDOM(r, 'tbody') rows = client.parseDOM(tbody, 'tr') for row in rows: links = zip( re.findall(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', row, re.DOTALL | re.I), re.findall( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', row, re.DOTALL), [ re.findall( r'<td class\s*=\s*["\']text-center["\']>([0-9]+)</td>', row, re.DOTALL) ]) for link in links: url = unquote_plus(link[0]).replace( '&', '&').replace(' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(name) if hdlr not in name and hdlr2 not in name: continue if source_utils.remove_lang(name): continue if hdlr in name: t = name.split(hdlr)[0].replace(year, '').replace( '(', '').replace(')', '').replace('&', 'and').replace( '.US.', '.').replace('.us.', '.') if hdlr2 in name: t = name.split(hdlr2)[0].replace(year, '').replace( '(', '').replace(')', '').replace('&', 'and').replace( '.US.', '.').replace('.us.', '.') # if cleantitle.get(t) != cleantitle.get(title): continue # Anime title matching is a bitch! try: seeders = int(link[2][0]) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name, url) try: size = link[1] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) sources.append({ 'provider': 'nyaa', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('NYAA') return sources return sources
def get_sources_packs(self, link): try: # log_utils.log('link = %s' % link) r = py_tools.ensure_str(self.scraper.get(link).content, errors='replace') if not r: return posts = client.parseDOM(r, 'tr', attrs={'class': 'tlr'}) posts += client.parseDOM(r, 'tr', attrs={'class': 'tlz'}) except: source_utils.scraper_error('EXTRATORRENT') return for post in posts: try: post = re.sub(r'\n', '', post) post = re.sub(r'\t', '', post) url = re.search(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', post, re.I).group(1) url = unquote_plus(url).replace('&', '&').replace( ' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable(url) if url in str(self.sources): continue hash = re.search(r'btih:(.*?)&', url, re.I).group(1) name = url.split('&dn=')[1] name = source_utils.clean_name(name) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack( self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack( self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int( client.parseDOM(post, 'td', attrs={'class': 'sn'})[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: size = re.search( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', post).group(0) dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = { 'provider': 'extratorrent', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package } if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('EXTRATORRENT')
def sources(self, url, hostDict): sources = [] if not url: return sources try: data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') aliases = data['aliases'] episode_title = data['title'] if 'tvshowtitle' in data else None year = data['year'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else ('(' + year + ')') query = title query = re.sub(r'[^A-Za-z0-9\s\.-]+', '', query) if 'tvshowtitle' in data: url = self.show_link % query.replace(' ', '-') else: url = self.search_link % quote_plus(query) url = urljoin(self.base_link, url) # log_utils.log('url = %s' % url, __name__, log_utils.LOGDEBUG) r = client.request(url, timeout='5') if not r: return sources r = r.replace('\r', '').replace('\n', '').replace('\t', '') r = client.parseDOM(r, 'div', attrs={'class': 'card'}) if not r: return sources except: source_utils.scraper_error('TOPNOW') return sources for i in r: try: if 'magnet:' not in i: continue name = client.parseDOM(i, 'img', attrs={'class': 'thumbnails'}, ret='alt')[0].replace(u'\xa0', u' ') if not source_utils.check_title( title, aliases, name, hdlr.replace('(', '').replace(')', ''), year): continue url = re.search(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', i, re.DOTALL | re.I).group(1) try: url = unquote_plus(url).decode('utf8').replace( '&', '&').replace(' ', '.') except: url = unquote_plus(url).replace('&', '&').replace(' ', '.') url = re.sub(r'(&tr=.+)&dn=', '&dn=', url) # some links on topnow &tr= before &dn= url = url.split('&tr=')[0].replace(' ', '.') url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] release_name = url.split('&dn=')[1] release_name = source_utils.clean_name(release_name) name_info = source_utils.info_from_name( release_name, title, year, hdlr, episode_title) if source_utils.remove_lang(name_info): continue seeders = 0 # seeders not available on topnow quality, info = source_utils.get_release_quality( name_info, url) try: size = re.findall( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', i )[-1] # file size is no longer available on topnow's new site dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) sources.append({ 'provider': 'topnow', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': release_name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('TOPNOW') return sources
def get_sources_packs(self, link, url): try: self.headers.update({'Referer': link}) query_data = { 'query': url, 'offset': 0, 'limit': 99, 'filters[field]': 'seeds', 'filters[sort]': 'desc', 'filters[time]': 4, 'filters[category]': 4, 'filters[adult]': False, 'filters[risky]': False} api_url = '%s%s' % (self.base_link, self.api_search_link) rjson = client.request(api_url, post=query_data, headers=self.headers, timeout='5') if not rjson: return files = jsloads(rjson) error = files.get('error') if error: return except: source_utils.scraper_error('BITLORD') return for file in files.get('content'): try: name = source_utils.clean_name(file.get('name')) url = unquote_plus(file.get('magnet')).replace('&', '&').replace(' ', '.') url = re.sub(r'(&tr=.+)&dn=', '&dn=', url) # some links on bitlord &tr= before &dn= url = url.split('&tr=')[0].split('&xl=')[0] url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.search(r'btih:(.*?)&', url, re.I).group(1) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack(self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack(self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = file.get('seeds') if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = file.get('size') size = str(size) + ' GB' if len(str(size)) <= 2 else str(size) + ' MB' # bitlord size is all over the place between MB and GB dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = {'provider': 'bitlord', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package} if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('BITLORD')
def get_sources(self, link): try: url = re.findall(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', link, re.DOTALL | re.I)[0] url = unquote_plus(url).replace('&', '&').replace(' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable(url) if url in str(self.sources): return hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(name) if not source_utils.check_title(self.title, self.aliases, name, self.hdlr, self.year): return name_info = source_utils.info_from_name(name, self.title, self.year, self.hdlr, self.episode_title) if source_utils.remove_lang(name_info): return if not self.episode_title: #filter for eps returned in movie query (rare but movie and show exists for Run in 2020) ep_strings = [ r'(?:\.|\-)s\d{2}e\d{2}(?:\.|\-|$)', r'(?:\.|\-)s\d{2}(?:\.|\-|$)', r'(?:\.|\-)season(?:\.|\-)\d{1,2}(?:\.|\-|$)' ] if any(re.search(item, name.lower()) for item in ep_strings): return try: seeders = int( client.parseDOM(link, 'td', attrs={'class': 'sy'})[0].replace(',', '')) if self.min_seeders > seeders: return except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = re.findall( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', link)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) self.sources.append({ 'provider': 'extratorrent', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('EXTRATORRENT')
def get_sources(self, url): try: r = client.request(url, timeout='5') if not r: return results = jsloads(r)['results'] except: source_utils.scraper_error('SOLIDTORRENTS') return for item in results: try: url = unquote_plus(item['magnet']).replace(' ', '.') url = re.sub( r'(&tr=.+)&dn=', '&dn=', url) # some links on solidtorrents &tr= before &dn= url = source_utils.strip_non_ascii_and_unprintable(url) hash = item['infohash'].lower() if url in str(self.sources): continue name = source_utils.clean_name(item['title']) if not source_utils.check_title(self.title, self.aliases, name, self.hdlr, self.year): continue name_info = source_utils.info_from_name( name, self.title, self.year, self.hdlr, self.episode_title) if source_utils.remove_lang(name_info): continue if not self.episode_title: #filter for eps returned in movie query (rare but movie and show exists for Run in 2020) ep_strings = [ r'[.-]s\d{2}e\d{2}([.-]?)', r'[.-]s\d{2}([.-]?)', r'[.-]season[.-]?\d{1,2}[.-]?' ] if any( re.search(item, name.lower()) for item in ep_strings): continue try: seeders = int(item['swarm']['seeders']) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: dsize, isize = source_utils.convert_size(item["size"], to='GB') info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) self.sources.append({ 'provider': 'solidtorrents', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('SOLIDTORRENTS')
def sources(self, data, hostDict): sources = [] if not data: return sources try: title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') aliases = data['aliases'] episode_title = data['title'] if 'tvshowtitle' in data else None year = data['year'] hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else year query = '%s %s' % (title, hdlr) query = re.sub(r'[^A-Za-z0-9\s\.-]+', '', query) url = '%s%s' % (self.base_link, self.search_link % quote_plus(query)) # log_utils.log('url = %s' % url, __name__, log_utils.LOGDEBUG) api_url = '%s%s' % (self.base_link, self.api_search_link) headers = cache.get(self._get_token_and_cookies, 1) if not headers: return sources headers.update({'Referer': url}) query_data = { 'query': query, 'offset': 0, 'limit': 99, 'filters[field]': 'seeds', 'filters[sort]': 'desc', 'filters[time]': 4, 'filters[category]': 3 if 'tvshowtitle' not in data else 4, 'filters[adult]': False, 'filters[risky]': False} rjson = client.request(api_url, post=query_data, headers=headers, timeout='5') if not rjson: return sources files = jsloads(rjson) error = files.get('error') if error: return sources except: source_utils.scraper_error('BITLORD') return sources for file in files.get('content'): try: name = source_utils.clean_name(file.get('name')) if not source_utils.check_title(title, aliases, name, hdlr, year): continue name_info = source_utils.info_from_name(name, title, year, hdlr, episode_title) if source_utils.remove_lang(name_info): continue url = unquote_plus(file.get('magnet')).replace('&', '&').replace(' ', '.') url = re.sub(r'(&tr=.+)&dn=', '&dn=', url) # some links on bitlord &tr= before &dn= url = url.split('&tr=')[0].split('&xl=')[0] url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.search(r'btih:(.*?)&', url, re.I).group(1) if not episode_title: #filter for eps returned in movie query (rare but movie and show exists for Run in 2020) ep_strings = [r'[.-]s\d{2}e\d{2}([.-]?)', r'[.-]s\d{2}([.-]?)', r'[.-]season[.-]?\d{1,2}[.-]?'] if any(re.search(item, name.lower()) for item in ep_strings): continue try: seeders = file.get('seeds') if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = file.get('size') size = str(size) + ' GB' if len(str(size)) <= 2 else str(size) + ' MB' # bitlord size is all over the place between MB and GB dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) sources.append({'provider': 'bitlord', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('BITLORD') return sources
def sources(self, url, hostDict): sources = [] if not url: return sources try: data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') aliases = data['aliases'] episode_title = data['title'] if 'tvshowtitle' in data else None year = data['year'] hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else year query = '"^%s" %s' % (title, hdlr) query = re.sub(r'[^A-Za-z0-9\s\.\-\"\^]+', '', query) url = self.search_link % quote_plus(query) url = urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = client.request(url, timeout='5') if not r: return sources if any(value in r for value in ['something went wrong', 'Connection timed out', '521: Web server is down', '503 Service Unavailable']): return sources table = client.parseDOM(r, 'table', attrs={'id': 'table'}) table_body = client.parseDOM(table, 'tbody') rows = client.parseDOM(table_body, 'tr') except: source_utils.scraper_error('TORRENTZ2') return sources for row in rows: try: if 'magnet:' not in row: continue url = re.findall(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', row, re.DOTALL | re.I)[0] url = unquote_plus(url).replace('&', '&').replace(' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(name) if not source_utils.check_title(title, aliases, name, hdlr, year): continue name_info = source_utils.info_from_name(name, title, year, hdlr, episode_title) if source_utils.remove_lang(name_info): continue if not episode_title: #filter for eps returned in movie query (rare but movie and show exists for Run in 2020) ep_strings = [r'[.-]s\d{2}e\d{2}([.-]?)', r'[.-]s\d{2}([.-]?)', r'[.-]season[.-]?\d{1,2}[.-]?'] if any(re.search(item, name.lower()) for item in ep_strings): continue try: # seeders = int(client.parseDOM(row, 'td', attrs={'data-title': 'Seeds'})[0]) seeders = int(client.parseDOM(row, 'td', attrs={'data-title': 'Last Updated'})[0]) #keep an eye on this, looks like they gaffed their col's (seeders and size) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = re.findall(r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', row.replace(u'\xa0', u' ').replace(u' ', u' '))[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) sources.append({'provider': 'torrentz2', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('TORRENTZ2') return sources
def sources(self, url, hostDict): sources = [] if not url: return sources try: scraper = cfscrape.create_scraper(delay=5) data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') aliases = data['aliases'] episode_title = data['title'] if 'tvshowtitle' in data else None year = data['year'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else year isSeasonQuery = False query = '%s %s' % (title, hdlr) query = re.sub(r'(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) # query = re.sub(r'[^A-Za-z0-9\s\.-]+', '', query) query = re.sub(r'\s', '-', query) if int(year) >= 2021: self.base_link = self.base_new else: self.base_link = self.base_old url = urljoin(self.base_link, query) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) # r = scraper.get(url).content r = py_tools.ensure_str(scraper.get(url).content, errors='replace') if not r or 'nothing was found' in r: if 'tvshowtitle' in data: season = re.search(r'S(.*?)E', hdlr).group(1) query = re.sub(r'(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', title) # query = re.sub(r'[^A-Za-z0-9\s\.-]+', '', title) query = re.sub(r'\s', '-', query) query = query + "-S" + season url = urljoin(self.base_link, query) # r = scraper.get(url).content r = py_tools.ensure_str(scraper.get(url).content, errors='replace') isSeasonQuery = True else: return sources if not r or 'nothing was found' in r: return sources # may need to add fallback to use self.search_link if nothing found posts = client.parseDOM(r, "div", attrs={"class": "content"}) if not posts: return sources except: source_utils.scraper_error('RLSBB') return sources release_title = re.sub(r'[^A-Za-z0-9\s\.-]+', '', title).replace(' ', '.') items = [] count = 0 for post in posts: if count >= 300: break # to limit large link list and slow scrape time try: post_titles = re.findall( r'(?:.*>|>\sRelease Name.*|\s)(%s.*?)<' % release_title, post, re.I ) #parse all matching release_titles in each post(content) group items = [] if len(post_titles) > 1: index = 0 for name in post_titles: start = post_titles[index].replace('[', '\\[').replace( '(', '\\(').replace(')', '\\)').replace( '+', '\\+').replace(' \\ ', ' \\\\ ') end = (post_titles[index + 1].replace( '[', '\\[').replace('(', '\\(').replace( ')', '\\)').replace('+', '\\+')).replace( ' \\ ', ' \\\\ ' ) if index + 1 < len(post_titles) else '' try: container = re.findall( r'(?:%s)([\S\s]+)(?:%s)' % (start, end), post, re.I )[0] #parse all data between release_titles in multi post(content) group except: source_utils.scraper_error('RLSBB') continue try: size = re.findall( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', container)[0].replace(',', '.') except: size = '0' container = client.parseDOM(container, 'a', ret='href') items.append((name, size, container)) index += 1 elif len(post_titles) == 1: name = post_titles[0] container = client.parseDOM( post, 'a', ret='href' ) #parse all links in a single post(content) group try: size = re.findall( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', post)[0].replace(',', '.') except: size = '0' items.append((name, size, container)) else: continue for group_name, size, links in items: for i in links: name = group_name # if isSeasonQuery and hdlr not in name.upper(): # name = i.rsplit("/", 1)[-1] # if hdlr not in name.upper(): continue if hdlr not in name.upper(): name = i.rsplit("/", 1)[-1] if hdlr not in name.upper(): continue name = client.replaceHTMLCodes(name) name = source_utils.strip_non_ascii_and_unprintable( name) name_info = source_utils.info_from_name( name, title, year, hdlr, episode_title) url = py_tools.ensure_text(client.replaceHTMLCodes( str(i)), errors='replace') if url in str(sources): continue if url.endswith(('.rar', '.zip', '.iso', '.part', '.png', '.jpg', '.bmp', '.gif')): continue valid, host = source_utils.is_host_valid(url, hostDict) if not valid: continue quality, info = source_utils.get_release_quality( name, url) try: if size == '0': try: size = re.findall( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', name)[0].replace(',', '.') except: raise Exception() dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) sources.append({ 'provider': 'rlsbb', 'source': host, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) count += 1 except: source_utils.scraper_error('RLSBB') return sources
def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link), __name__, log_utils.LOGDEBUG) try: # For some reason Zooqle returns 404 even though the response has a body. # This is probably a bug on Zooqle's server and the error should just be ignored. html = client.request(link, ignoreErrors=404, timeout='5') if not html: return html = html.replace(' ', ' ') try: table = client.parseDOM( html, 'table', attrs={ 'class': 'table table-condensed table-torrents vmiddle' })[0] except: return rows = client.parseDOM(table, 'tr') if not rows: return except: source_utils.scraper_error('ZOOQLE') return for row in rows: try: try: if 'magnet:' not in row: continue url = re.findall(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', row, re.DOTALL | re.I)[0] url = unquote_plus(url).replace('&', '&').replace( ' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable(url) if url in str(self.sources): continue except: continue hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] try: name = re.findall(r'<a class\s*=\s*["\'].+?>(.+?)</a>', row, re.DOTALL | re.I)[0] name = client.cleanHTML(name) name = unquote_plus(name) name = source_utils.clean_name(name) except: continue # some titles have foreign title translation in front so remove it if './.' in name: name = name.split('./.', 1)[1] if '.com.' in name.lower(): try: name = re.sub(r'(.*?)\W{2,10}', '', name) except: name = name.split('-.', 1)[1].lstrip() if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack( self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack( self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int( re.findall( r'["\']Seeders:\s*([0-9]+|[0-9]+,[0-9]+)\s*\|', row, re.DOTALL | re.I)[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: size = re.findall( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', row)[-1] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = { 'provider': 'zooqle', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package } if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('ZOOQLE')
def get_sources(self, url): try: # For some reason Zooqle returns 404 even though the response has a body. # This is probably a bug on Zooqle's server and the error should just be ignored. html = client.request(url, ignoreErrors=404, timeout='5') if not html: return html = html.replace(' ', ' ') try: table = client.parseDOM( html, 'table', attrs={ 'class': 'table table-condensed table-torrents vmiddle' })[0] except: return rows = client.parseDOM(table, 'tr') if not rows: return except: source_utils.scraper_error('ZOOQLE') return for row in rows: try: try: if 'magnet:' not in row: continue url = re.findall(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', row, re.DOTALL | re.I)[0] url = unquote_plus(url).replace('&', '&').replace( ' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable(url) if url in str(self.sources): continue except: continue hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] try: name = re.findall(r'<a class\s*=\s*["\'].+?>(.+?)</a>', row, re.DOTALL | re.I)[0] name = client.cleanHTML(name) name = unquote_plus(name) name = source_utils.clean_name(name) except: continue # some titles have foreign title translation in front so remove it if './.' in name: name = name.split('./.', 1)[1] if '.com.' in name.lower(): try: name = re.sub(r'(.*?)\W{2,10}', '', name) except: name = name.split('-.', 1)[1].lstrip() if not source_utils.check_title(self.title, self.aliases, name, self.hdlr, self.year): continue name_info = source_utils.info_from_name( name, self.title, self.year, self.hdlr, self.episode_title) if source_utils.remove_lang(name_info): continue if not self.episode_title: #filter for eps returned in movie query (rare but movie and show exists for Run in 2020) ep_strings = [ r'[.-]s\d{2}e\d{2}([.-]?)', r'[.-]s\d{2}([.-]?)', r'[.-]season[.-]?\d{1,2}[.-]?' ] if any( re.search(item, name.lower()) for item in ep_strings): continue try: seeders = int( re.findall( r'["\']Seeders:\s*([0-9]+|[0-9]+,[0-9]+)\s*\|', row, re.DOTALL | re.I)[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: size = re.findall( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', row)[-1] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) self.sources.append({ 'provider': 'zooqle', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('ZOOQLE')
def get_sources(self, row): row = re.sub(r'\n', '', row) row = re.sub(r'\t', '', row) data = re.compile( r'<a\s*href\s*=\s*["\'](/torrent_details/.+?)["\']><span>(.+?)</span>.*?<td\s*class\s*=\s*["\']size-row["\']>(.+?)</td><td\s*class\s*=\s*["\']sn["\']>([0-9]+)</td>' ).findall(row) if not data: return for items in data: try: # item[1] does not contain full info like the &dn= portion of magnet link = urljoin(self.base_link, items[0]) link = client.request(link, timeout='5') if not link or 'Download Magnet link' not in link: continue link = unquote_plus( client.parseDOM(link, 'a', attrs={'title': 'Download Magnet link'}, ret='href')[0]) if not link: continue url = re.compile(r'(magnet:.*)').findall(link)[0].replace( '&', '&').replace(' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable( unquote_plus( url)) # many links dbl quoted so we must unquote again hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] name = unquote_plus(url.split('&dn=')[1]) name = source_utils.clean_name(name) if not source_utils.check_title(self.title, self.aliases, name, self.hdlr, self.year): continue name_info = source_utils.info_from_name( name, self.title, self.year, self.hdlr, self.episode_title) if source_utils.remove_lang(name_info): continue if not self.episode_title: #filter for eps returned in movie query (rare but movie and show exists for Run in 2020) ep_strings = [ r'[.-]s\d{2}e\d{2}([.-]?)', r'[.-]s\d{2}([.-]?)', r'[.-]season[.-]?\d{1,2}[.-]?' ] if any( re.search(item, name.lower()) for item in ep_strings): continue try: seeders = int(items[3].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: size = re.findall( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', items[2])[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) self.sources.append({ 'provider': 'isohunt2', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('ISOHUNT2')
def sources(self, url, hostDict): sources = [] if not url: return sources try: scraper = cfscrape.create_scraper() data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') aliases = data['aliases'] episode_title = data['title'] if 'tvshowtitle' in data else None year = data['year'] hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else year if 'tvshowtitle' in data: query = '%s %s' % (title, hdlr) query = re.sub(r'[^A-Za-z0-9\s\.-]+', '', query) url = self.search_link % quote_plus(query) else: url = self.search_link % data['imdb'] url = urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = py_tools.ensure_str(scraper.get(url).content, errors='replace') posts = client.parseDOM(r, 'div', attrs={'class': 'tgxtable'}) if not posts: return sources except: source_utils.scraper_error('TORRENTGALAXY') return sources for post in posts: try: links = zip( re.findall(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', post, re.DOTALL | re.I), re.findall(r'<span\s*class\s*=\s*["\']badge\s*badge-secondary["\']\s*style\s*=\s*["\']border-radius:4px;["\']>(.*?)</span>', post, re.DOTALL | re.I), re.findall(r'<span\s*title\s*=\s*["\']Seeders/Leechers["\']>\[<font\s*color\s*=\s*["\']green["\']><b>(.*?)<', post, re.DOTALL | re.I)) for link in links: url = unquote_plus(link[0]).split('&tr')[0].replace(' ', '.') url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(name) if not source_utils.check_title(title, aliases, name, hdlr, year): continue name_info = source_utils.info_from_name(name, title, year, hdlr, episode_title) if source_utils.remove_lang(name_info): continue if not episode_title: #filter for eps returned in movie query (rare but movie and show exists for Run in 2020) ep_strings = [r'[.-]s\d{2}e\d{2}([.-]?)', r'[.-]s\d{2}([.-]?)', r'[.-]season[.-]?\d{1,2}[.-]?'] if any(re.search(item, name.lower()) for item in ep_strings): continue try: seeders = int(link[2]) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: dsize, isize = source_utils._size(link[1]) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) sources.append({'provider': 'torrentgalaxy', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('TORRENTGALAXY') return sources
def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link)) try: r = client.request(link, timeout='5') if not r: return results = jsloads(r)['results'] except: source_utils.scraper_error('SOLIDTORRENTS') return for item in results: try: url = unquote_plus(item['magnet']).replace(' ', '.') url = re.sub( r'(&tr=.+)&dn=', '&dn=', url) # some links on solidtorrents &tr= before &dn= url = source_utils.strip_non_ascii_and_unprintable(url) hash = item['infohash'].lower() if url in str(self.sources): continue name = source_utils.clean_name(item['title']) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack( self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack( self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int(item['swarm']['seeders']) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: dsize, isize = source_utils.convert_size(item["size"], to='GB') info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = { 'provider': 'solidtorrents', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package } if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('SOLIDTORRENTS')