def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link), __name__, log_utils.LOGDEBUG) try: r = client.request(link, timeout='5') if not r: return r = re.sub(r'\n', '', r) r = re.sub(r'\t', '', r) posts = re.compile(r'<table\s*class\s*=\s*["\']table2["\']\s*cellspacing\s*=\s*["\']\d+["\']>(.*?)</table>', re.I).findall(r) posts = client.parseDOM(posts, 'tr') except: source_utils.scraper_error('TORRENTDOWNLOAD') return for post in posts: try: if '<th' in post: continue links = re.compile(r'<a\s*href\s*=\s*["\'](.+?)["\']>.*?<td class\s*=\s*["\']tdnormal["\']>((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))</td><td class\s*=\s*["\']tdseed["\']>([0-9]+|[0-9]+,[0-9]+)</td>', re.I).findall(post) for items in links: link = items[0].split("/") hash = link[1].lower() name = link[2].replace('+MB+', '') name = unquote_plus(name).replace('&', '&') name = source_utils.clean_name(name) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack(self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack(self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue url = 'magnet:?xt=urn:btih:%s&dn=%s' % (hash, name) try: seeders = int(items[2].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = re.findall(r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', items[1])[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = {'provider': 'torrentdownload', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package} if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('TORRENTDOWNLOAD')
def sources(self, data, hostDict): sources = [] if not data: return sources try: title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') aliases = data['aliases'] episode_title = data['title'] if 'tvshowtitle' in data else None year = data['year'] hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else year query = re.sub(r'[^A-Za-z0-9\s\.-]+', '', '%s %s' % (title, hdlr)) url = '%s%s' % (self.base_link, self.search_link % quote_plus(query)) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = client.request(url, timeout='5') if not r: return sources if any(value in str(r) for value in ['something went wrong', 'Connection timed out', '521: Web server is down', '503 Service Unavailable']): return sources table = client.parseDOM(r, 'tbody') rows = client.parseDOM(table, 'tr') except: source_utils.scraper_error('BITCQ') return sources for row in rows: try: if 'magnet' not in row: continue url = re.findall(r'href="(magnet:.+?)"', row, re.DOTALL)[0] url = unquote_plus(url).replace('&', '&').replace(' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.search(r'btih:(.*?)&', url, re.I).group(1) name = source_utils.clean_name(url.split('&dn=')[1]) if not source_utils.check_title(title, aliases, name, hdlr, year): continue name_info = source_utils.info_from_name(name, title, year, hdlr, episode_title) if source_utils.remove_lang(name_info): continue if not episode_title: # filter for eps returned in movie query (rare but movie and show exists for Run in 2020) ep_strings = [r'(?:\.|\-)s\d{2}e\d{2}(?:\.|\-|$)', r'(?:\.|\-)s\d{2}(?:\.|\-|$)', r'(?:\.|\-)season(?:\.|\-)\d{1,2}(?:\.|\-|$)'] if any(re.search(item, name.lower()) for item in ep_strings): continue try: seeders = int(re.search(r'<td>(\d+)<', row).group(1)) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = re.search(r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', row).group(0) dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) sources.append({'provider': 'bitcq', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('BITCQ') return sources return sources
def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link), __name__, log_utils.LOGDEBUG) try: r = client.request(link, timeout='5') if not r: return if any(value in r for value in ['something went wrong', 'Connection timed out', '521: Web server is down', '503 Service Unavailable']): return sources table = client.parseDOM(r, 'table', attrs={'id': 'table'}) table_body = client.parseDOM(table, 'tbody') rows = client.parseDOM(table_body, 'tr') except: source_utils.scraper_error('TORRENTZ2') return for row in rows: try: if 'magnet:' not in row: continue url = re.findall(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', row, re.DOTALL | re.I)[0] url = unquote_plus(url).replace('&', '&').replace(' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(name) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack(self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack(self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: # seeders = int(client.parseDOM(row, 'td', attrs={'data-title': 'Seeds'})[0]) seeders = int(client.parseDOM(row, 'td', attrs={'data-title': 'Last Updated'})[0]) #keep an eye on this, looks like they gaffed their col's (seeders and size) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = re.findall(r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', row.replace(u'\xa0', u' ').replace(u' ', u' '))[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = {'provider': 'torrentz2', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package} if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('TORRENTZ2')
def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link), __name__, log_utils.LOGDEBUG) try: r = client.request(link, timeout='5') if not r: return if any(value in str(r) for value in ['something went wrong', 'Connection timed out', '521: Web server is down', '503 Service Unavailable']): return table = client.parseDOM(r, 'tbody') rows = client.parseDOM(table, 'tr') except: source_utils.scraper_error('BITCQ') return for row in rows: try: if 'magnet' not in row: continue url = re.findall(r'href="(magnet:.+?)"', row, re.DOTALL)[0] url = unquote_plus(url).replace('&', '&').replace(' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.search(r'btih:(.*?)&', url, re.I).group(1) name = source_utils.clean_name(url.split('&dn=')[1]) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack(self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack(self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int(re.search(r'<td>(\d+)<', row).group(1)) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = re.search(r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', row).group(0) dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = {'provider': 'bitcq', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package} if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('BITCQ')
def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link), __name__, log_utils.LOGDEBUG) try: r = py_tools.ensure_str(self.scraper.get(link).content, errors='replace') if not r: return posts = client.parseDOM(r, 'div', attrs={'class': 'tgxtable'}) if not posts: return except: source_utils.scraper_error('TORRENTGALAXY') return for post in posts: try: links = zip( re.findall(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', post, re.DOTALL | re.I), re.findall(r'<span\s*class\s*=\s*["\']badge\s*badge-secondary["\']\s*style\s*=\s*["\']border-radius:4px;["\']>(.*?)</span>', post, re.DOTALL | re.I), re.findall(r'<span\s*title\s*=\s*["\']Seeders/Leechers["\']>\[<font\s*color\s*=\s*["\']green["\']><b>(.*?)<', post, re.DOTALL | re.I)) for link in links: url = unquote_plus(link[0]).split('&tr')[0].replace(' ', '.') url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(name) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack(self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack(self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int(link[2]) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: dsize, isize = source_utils._size(link[1]) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = {'provider': 'torrentgalaxy', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package} if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('TORRENTGALAXY')
def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link), __name__, log_utils.LOGDEBUG) try: headers = {'User-Agent': client.agent()} r = py_tools.ensure_str(self.scraper.get(link, headers=headers).content, errors='replace') if not r or '<table' not in r: return table = client.parseDOM(r, 'table', attrs={'class': 'table2'})[0] rows = client.parseDOM(table, 'tr') if not rows: return except: source_utils.scraper_error('LIMETORRENTS') return for row in rows: try: data = client.parseDOM(row, 'a', ret='href')[0] if '/search/' in data: continue data = re.sub(r'\s', '', data).strip() hash = re.compile(r'/torrent/(.+?).torrent', re.I).findall(data)[0] name = re.findall(r'title\s*=\s*(.+?)$', data, re.DOTALL | re.I)[0] name = source_utils.clean_name(name) url = 'magnet:?xt=urn:btih:%s&dn=%s' % (hash, name) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack(self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack(self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int(client.parseDOM(row, 'td', attrs={'class': 'tdseed'})[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = re.findall(r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', row)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = {'provider': 'limetorrents', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package} if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('LIMETORRENTS')
def sources(self, url, hostDict): sources = [] if not url: return sources try: data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['title'].replace('&', 'and') aliases = data['aliases'] hdlr = data['year'] year = data['year'] imdb = data['imdb'] url = self.search_link % imdb api_url = urljoin(self.base_link, url) # log_utils.log('api_url = %s' % api_url, log_utils.LOGDEBUG) rjson = client.request(api_url, timeout='5') if not rjson: return sources files = jsloads(rjson) if files.get('status') == 'error' or files.get('data').get('movie_count') == 0: return sources title_long = files.get('data').get('movies')[0].get('title_long').replace(' ', '.') torrents = files.get('data').get('movies')[0].get('torrents') except: source_utils.scraper_error('YTSMX') return sources for torrent in torrents: try: quality = torrent.get('quality') type = torrent.get('type') hash = torrent.get('hash') name = '%s.[%s].[%s].[YTS.MX]' % (title_long, quality, type) url = 'magnet:?xt=urn:btih:%s&dn=%s' % (hash, name) if not source_utils.check_title(title, aliases, name, hdlr, year): continue name_info = source_utils.info_from_name(name, title, year, hdlr) if source_utils.remove_lang(name_info): continue try: seeders = torrent.get('seeds') if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = torrent.get('size') dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) sources.append({'provider': 'ytsmx', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('YTSMX') return sources
def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link), __name__, log_utils.LOGDEBUG) try: r = client.request(link, timeout='5') if not r: return posts = client.parseDOM(r, 'div', attrs={'class': 'media'}) except: source_utils.scraper_error('BTDB') return for post in posts: try: if 'magnet:' not in post: continue url = re.findall(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', post, re.DOTALL | re.I)[0] url = unquote_plus(url).replace('&', '&').replace(' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable(url) if url in str(self.sources): return hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(name) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack(self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack(self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int(re.findall(r'Seeders.*?["\']>([0-9]+|[0-9]+,[0-9]+)</strong>', post, re.DOTALL | re.I)[0].replace(',', '')) if self.min_seeders > seeders: return except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = re.findall(r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', post)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = {'provider': 'btdb', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package} if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('BTDB')
def get_items(self, url): try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers, timeout='10') if not r or '<tbody' not in r: return posts = client.parseDOM(r, 'tbody')[0] posts = client.parseDOM(posts, 'tr') except: source_utils.scraper_error('1337X') return for post in posts: try: data = client.parseDOM(post, 'a', ret='href')[1] link = urljoin(self.base_link, data) name = client.parseDOM(post, 'a')[1] name = source_utils.clean_name(unquote_plus(name)) if not source_utils.check_title(self.title, self.aliases, name, self.hdlr, self.year): continue name_info = source_utils.info_from_name( name, self.title, self.year, self.hdlr, self.episode_title) if source_utils.remove_lang(name_info): continue if not self.episode_title: #filter for eps returned in movie query (rare but movie and show exists for Run in 2020) ep_strings = [ r'[.-]s\d{2}e\d{2}([.-]?)', r'[.-]s\d{2}([.-]?)', r'[.-]season[.-]?\d{1,2}[.-]?' ] if any( re.search(item, name.lower()) for item in ep_strings): continue try: seeders = int( client.parseDOM(post, 'td', attrs={'class': 'coll-2 seeds' })[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 try: size = re.findall( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', post)[0] dsize, isize = source_utils._size(size) except: isize = '0' dsize = 0 self.items.append( (name, name_info, link, isize, dsize, seeders)) except: source_utils.scraper_error('1337X')
def get_sources(self, url): try: r = client.request(url, timeout='5') if not r: return r = re.sub(r'\n', '', r) r = re.sub(r'\t', '', r) posts = re.compile(r'<table\s*class\s*=\s*["\']table2["\']\s*cellspacing\s*=\s*["\']\d+["\']>(.*?)</table>', re.I).findall(r) posts = client.parseDOM(posts, 'tr') except: source_utils.scraper_error('TORRENTDOWNLOAD') return for post in posts: try: if '<th' in post: continue links = re.compile(r'<a\s*href\s*=\s*["\'](.+?)["\']>.*?<td class\s*=\s*["\']tdnormal["\']>((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))</td><td class\s*=\s*["\']tdseed["\']>([0-9]+|[0-9]+,[0-9]+)</td>', re.I).findall(post) for items in links: link = items[0].split("/") hash = link[1].lower() name = link[2].replace('+MB+', '') name = unquote_plus(name).replace('&', '&') name = source_utils.clean_name(name) if not source_utils.check_title(self.title, self.aliases, name, self.hdlr, self.year): continue name_info = source_utils.info_from_name(name, self.title, self.year, self.hdlr, self.episode_title) if source_utils.remove_lang(name_info): continue url = 'magnet:?xt=urn:btih:%s&dn=%s' % (hash, name) if not self.episode_title: #filter for eps returned in movie query (rare but movie and show exists for Run in 2020) ep_strings = [r'[.-]s\d{2}e\d{2}([.-]?)', r'[.-]s\d{2}([.-]?)', r'[.-]season[.-]?\d{1,2}[.-]?'] if any(re.search(item, name.lower()) for item in ep_strings): continue try: seeders = int(items[2].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = re.findall(r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', items[1])[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) self.sources.append({'provider': 'torrentdownload', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('TORRENTDOWNLOAD')
def get_sources(self, link): # log_utils.log('link = %s' % link, log_utils.LOGDEBUG) try: headers = {'User-Agent': client.agent()} r = py_tools.ensure_str(self.scraper.get(link, headers=headers).content, errors='replace') if not r or '<table' not in r: return table = client.parseDOM(r, 'table', attrs={'class': 'table2'})[0] rows = client.parseDOM(table, 'tr') if not rows: return except: source_utils.scraper_error('LIMETORRENTS') return for row in rows: try: data = client.parseDOM(row, 'a', ret='href')[0] if '/search/' in data: continue data = re.sub(r'\s', '', data).strip() hash = re.compile(r'/torrent/(.+?).torrent', re.I).findall(data)[0] name = re.findall(r'title\s*=\s*(.+?)$', data, re.DOTALL | re.I)[0] name = source_utils.clean_name(name) if not source_utils.check_title(self.title, self.aliases, name, self.hdlr, self.year): continue name_info = source_utils.info_from_name(name, self.title, self.year, self.hdlr, self.episode_title) if source_utils.remove_lang(name_info): continue url = 'magnet:?xt=urn:btih:%s&dn=%s' % (hash, name) if not self.episode_title: #filter for eps returned in movie query (rare but movie and show exists for Run in 2020) ep_strings = [r'[.-]s\d{2}e\d{2}([.-]?)', r'[.-]s\d{2}([.-]?)', r'[.-]season[.-]?\d{1,2}[.-]?'] if any(re.search(item, name.lower()) for item in ep_strings): continue try: seeders = int(client.parseDOM(row, 'td', attrs={'class': 'tdseed'})[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = re.findall(r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', row)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) self.sources.append({'provider': 'limetorrents', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('LIMETORRENTS')
def get_sources(self, link): try: try: url = link[0].encode('ascii', errors='ignore').decode('ascii', errors='ignore').replace(' ', ' ') except: url = link[0].replace(' ', ' ') if '/torrent/' not in url: return try: name = link[1].encode('ascii', errors='ignore').decode('ascii', errors='ignore').replace(' ', '.') except: name = link[1].replace(' ', '.') if '<span' in name: nam = name.split('<span')[0].replace(' ', '.') span = client.parseDOM(name, 'span')[0].replace('-', '.') name = '%s%s' % (nam, span) name = source_utils.clean_name(name) if not source_utils.check_title(self.title, self.aliases, name, self.hdlr, self.year): return name_info = source_utils.info_from_name(name, self.title, self.year, self.hdlr, self.episode_title) if source_utils.remove_lang(name_info): return if not self.episode_title: #filter for eps returned in movie query (rare but movie and show exists for Run in 2020) ep_strings = [r'[.-]s\d{2}e\d{2}([.-]?)', r'[.-]s\d{2}([.-]?)', r'[.-]season[.-]?\d{1,2}[.-]?'] if any(re.search(item, name.lower()) for item in ep_strings): return if not url.startswith('http'): link = urljoin(self.base_link, url) link = client.request(link, timeout='5') if link is None: return hash = re.findall(r'Infohash.*?>(?!<)(.+?)</', link, re.DOTALL | re.I)[0] url = 'magnet:?xt=urn:btih:%s&dn=%s' % (hash, name) if url in str(self.sources): return try: seeders = int(re.findall(r'Swarm.*?>(?!<)([0-9]+)</', link, re.DOTALL | re.I)[0].replace(',', '')) if self.min_seeders > seeders: return except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = re.findall(r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', link)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) self.sources.append({'provider': 'torrentfunk', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('TORRENTFUNK')
def get_pack_items(self, url): # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) try: r = client.request(url, timeout='5') if not r: return r = client.parseDOM(r, 'table', attrs={'class': 'tmain'})[0] links = re.findall(r'<a\s*href\s*=\s*["\'](/torrent/.+?)["\']>(.+?)</a>', r, re.DOTALL | re.I) except: source_utils.scraper_error('TORRENTFUNK') return for link in links: try: try: url = link[0].encode('ascii', errors='ignore').decode('ascii', errors='ignore').replace(' ', ' ') except: url = link[0].replace(' ', ' ') if '/torrent/' not in url: continue try: name = link[1].encode('ascii', errors='ignore').decode('ascii', errors='ignore').replace(' ', '.') except: name = link[1].replace(' ', '.') if '<span' in name: nam = name.split('<span')[0].replace(' ', '.') span = client.parseDOM(name, 'span')[0].replace('-', '.') name = '%s%s' % (nam, span) name = source_utils.clean_name(name) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack(self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack(self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue if not url.startswith('http'): url = urljoin(self.base_link, url) if self.search_series: self.items.append((name, name_info, url, package, last_season)) else: self.items.append((name, name_info, url, package)) except: source_utils.scraper_error('TORRENTFUNK')
def get_sources(self, url): try: r = client.request(url, timeout='5') if not r: return posts = client.parseDOM(r, 'div', attrs={'class': 'media'}) except: source_utils.scraper_error('BTDB') return for post in posts: try: if 'magnet:' not in post: continue url = re.findall(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', post, re.DOTALL | re.I)[0] url = unquote_plus(url).replace('&', '&').replace(' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(name) if not source_utils.check_title(self.title, self.aliases, name, self.hdlr, self.year): continue name_info = source_utils.info_from_name(name, self.title, self.year, self.hdlr, self.episode_title) if source_utils.remove_lang(name_info): continue if not self.episode_title: #filter for eps returned in movie query (rare but movie and show exists for Run in 2020) ep_strings = [r'[.-]s\d{2}e\d{2}([.-]?)', r'[.-]s\d{2}([.-]?)', r'[.-]season[.-]?\d{1,2}[.-]?'] if any(re.search(item, name.lower()) for item in ep_strings): continue try: seeders = int(re.findall(r'Seeders.*?["\']>([0-9]+|[0-9]+,[0-9]+)</strong>', post, re.DOTALL | re.I)[0].replace(',', '')) if self.min_seeders > seeders: return except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = re.findall(r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', post)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) self.sources.append({'provider': 'btdb', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('BTDB')
def get_sources(self, link): try: url = re.compile(r'href\s*=\s*["\'](.+?)["\']', re.I).findall(link)[0] url = urljoin(self.base_link, url) result = client.request(url, timeout='10') if not result or 'magnet' not in result: return url = re.findall(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', result, re.DOTALL | re.I)[0] url = unquote_plus(url).replace('&', '&').replace(' ', '.').split('&xl=')[0] url = source_utils.strip_non_ascii_and_unprintable(url) if url in str(self.sources): return hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(name) if not source_utils.check_title(self.title, self.aliases, name, self.hdlr, self.year): return name_info = source_utils.info_from_name(name, self.title, self.year, self.hdlr, self.episode_title) if source_utils.remove_lang(name_info): return if not self.episode_title: #filter for eps returned in movie query (rare but movie and show exists for Run in 2020) ep_strings = [r'[.-]s\d{2}e\d{2}([.-]?)', r'[.-]s\d{2}([.-]?)', r'[.-]season[.-]?\d{1,2}[.-]?'] if any(re.search(item, name.lower()) for item in ep_strings): return try: seeders = int(re.findall(r'>Seeds:.*?["\']>([0-9]+|[0-9]+,[0-9]+)</', result, re.DOTALL | re.I)[0].replace(',', '')) if self.min_seeders > seeders: return except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = re.findall(r'>Total Size:.*>(\d.*?)<', result, re.I)[0].strip() dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) self.sources.append({'provider': 'ettv', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('ETTV')
def sources(self, url, hostDict): sources = [] if not url: return sources try: scraper = cfscrape.create_scraper() data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') aliases = data['aliases'] episode_title = data['title'] if 'tvshowtitle' in data else None year = data['year'] hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else year if 'tvshowtitle' in data: query = '%s %s' % (title, hdlr) query = re.sub(r'[^A-Za-z0-9\s\.-]+', '', query) url = self.search_link % quote_plus(query) else: url = self.search_link % data['imdb'] url = urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = py_tools.ensure_str(scraper.get(url).content, errors='replace') posts = client.parseDOM(r, 'div', attrs={'class': 'tgxtable'}) if not posts: return sources except: source_utils.scraper_error('TORRENTGALAXY') return sources for post in posts: try: links = zip( re.findall(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', post, re.DOTALL | re.I), re.findall(r'<span\s*class\s*=\s*["\']badge\s*badge-secondary["\']\s*style\s*=\s*["\']border-radius:4px;["\']>(.*?)</span>', post, re.DOTALL | re.I), re.findall(r'<span\s*title\s*=\s*["\']Seeders/Leechers["\']>\[<font\s*color\s*=\s*["\']green["\']><b>(.*?)<', post, re.DOTALL | re.I)) for link in links: url = unquote_plus(link[0]).split('&tr')[0].replace(' ', '.') url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(name) if not source_utils.check_title(title, aliases, name, hdlr, year): continue name_info = source_utils.info_from_name(name, title, year, hdlr, episode_title) if source_utils.remove_lang(name_info): continue if not episode_title: #filter for eps returned in movie query (rare but movie and show exists for Run in 2020) ep_strings = [r'[.-]s\d{2}e\d{2}([.-]?)', r'[.-]s\d{2}([.-]?)', r'[.-]season[.-]?\d{1,2}[.-]?'] if any(re.search(item, name.lower()) for item in ep_strings): continue try: seeders = int(link[2]) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: dsize, isize = source_utils._size(link[1]) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) sources.append({'provider': 'torrentgalaxy', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('TORRENTGALAXY') return sources
def sources(self, data, hostDict): sources = [] if not data: return sources try: self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.title = self.title.replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.aliases = data['aliases'] self.episode_title = data[ 'title'] if 'tvshowtitle' in data else None self.year = data['year'] self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else self.year query = '%s %s' % (self.title, self.hdlr) query = re.sub(r'[^A-Za-z0-9\s\.-]+', '', query) if 'tvshowtitle' in data: url = self.tvsearch.format(quote_plus(query)) else: url = self.moviesearch.format(quote_plus(query)) url = '%s%s' % (self.base_link, url) # log_utils.log('url = %s' % url) headers = {'User-Agent': client.agent()} result = client.request(url, headers=headers, timeout='5') if not result: return sources rows = client.parseDOM(result, 'tr', attrs={'class': 't-row'}) if not rows: return sources rows = [i for i in rows if 'racker:' not in i] except: source_utils.scraper_error('GLODLS') return sources for row in rows: try: ref = client.parseDOM(row, 'a', ret='href') url = [i for i in ref if 'magnet:' in i][0] url = unquote_plus(url).replace('&', '&').replace( ' ', '.').split('&tr')[0] hash = re.search(r'btih:(.*?)&', url, re.I).group(1).lower() name = unquote_plus(client.parseDOM(row, 'a', ret='title')[0]) name = source_utils.clean_name(name) if not source_utils.check_title(self.title, self.aliases, name, self.hdlr, self.year): continue name_info = source_utils.info_from_name( name, self.title, self.year, self.hdlr, self.episode_title) if source_utils.remove_lang(name_info): continue if not self.episode_title: #filter for eps returned in movie query (rare but movie and show exists for Run in 2020) ep_strings = [ r'[.-]s\d{2}e\d{2}([.-]?)', r'[.-]s\d{2}([.-]?)', r'[.-]season[.-]?\d{1,2}[.-]?' ] if any( re.search(item, name.lower()) for item in ep_strings): continue try: seeders = int( re.search( r'<td.*?<font\s*color\s*=\s*["\'].+?["\']><b>([0-9]+|[0-9]+,[0-9]+)</b>', row).group(1).replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: size = re.search( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', row).group(0) dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) sources.append({ 'provider': 'glodls', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('GLODLS') return sources
def sources(self, url, hostDict): sources = [] if not url: return sources try: data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') aliases = data['aliases'] episode_title = data['title'] if 'tvshowtitle' in data else None year = data['year'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else year query = '%s %s' % (title, hdlr) query = re.sub(r'[^A-Za-z0-9\s\.-]+', '', query) url = self.search_link % quote_plus(query) url = urljoin(self.base_link, url) # log_utils.log('url = %s' % url) rjson = client.request(url, timeout='5') if not rjson or rjson == 'null' or any( value in rjson for value in [ '521 Origin Down', 'No results returned', 'Connection Time-out', 'Database maintenance' ]): return sources files = jsloads(rjson) except: source_utils.scraper_error('TORRENTPARADISE') return sources for file in files: try: hash = file['id'] name = source_utils.clean_name(file['text']) if not source_utils.check_title(title, aliases, name, hdlr, year): continue name_info = source_utils.info_from_name( name, title, year, hdlr, episode_title) if source_utils.remove_lang(name_info): continue url = 'magnet:?xt=urn:btih:%s&dn=%s' % (hash, name) if not episode_title: #filter for eps returned in movie query (rare but movie and show exists for Run in 2020) ep_strings = [ r'(?:\.|\-)s\d{2}e\d{2}(?:\.|\-|$)', r'(?:\.|\-)s\d{2}(?:\.|\-|$)', r'(?:\.|\-)season(?:\.|\-)\d{1,2}(?:\.|\-|$)' ] if any( re.search(item, name.lower()) for item in ep_strings): continue try: seeders = int(file['s']) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: dsize, isize = source_utils.convert_size(float( file["len"]), to='GB') info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) sources.append({ 'provider': 'torrentparadise', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('TORRENTPARADISE') return sources
def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link)) try: # headers = {'User-Agent': client.agent()} headers = {'User-Agent': client.randomagent()} result = client.request(link, headers=headers, timeout='5') if not result: return rows = client.parseDOM(result, 'tr', attrs={'class': 't-row'}) if not rows: return rows = [i for i in rows if 'racker:' not in i] except: source_utils.scraper_error('GLODLS') return for row in rows: try: ref = client.parseDOM(row, 'a', ret='href') url = [i for i in ref if 'magnet:' in i][0] url = unquote_plus(url).replace('&', '&').replace( ' ', '.').split('&tr')[0] hash = re.search(r'btih:(.*?)&', url, re.I).group(1).lower() name = unquote_plus(client.parseDOM(row, 'a', ret='title')[0]) name = source_utils.clean_name(name) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack( self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack( self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int( re.search( r'<td.*?<font\s*color\s*=\s*["\'].+?["\']><b>([0-9]+|[0-9]+,[0-9]+)</b>', row).group(1).replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: size = re.search( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', row).group(0) dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = { 'provider': 'glodls', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package } if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('GLODLS')
def get_sources(self, name, url): try: # r = self.scraper.get(url, headers=self.headers).content r = py_tools.ensure_str(self.scraper.get( url, headers=self.headers).content, errors='replace') name = client.replaceHTMLCodes(name) if name.startswith('['): name = name.split(']')[1] name = name.strip().replace(' ', '.') name_info = source_utils.info_from_name(name, self.title, self.year, self.hdlr, self.episode_title) if source_utils.remove_lang(name_info): return self.sources l = dom_parser.parse_dom(r, 'pre', {'class': 'links'}) if l == []: return s = '' for i in l: s += i.content urls = re.findall( r'''((?:http|ftp|https)://[\w_-]+(?:(?:\.[\w_-]+)+)[\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])''', i.content, flags=re.M | re.S) urls = [ i for i in urls if not i.endswith(('.rar', '.zip', '.iso', '.idx', '.sub', '.srt')) ] for link in urls: url = py_tools.ensure_text(client.replaceHTMLCodes(str(link)), errors='replace') if url in str(self.sources): continue valid, host = source_utils.is_host_valid(url, self.hostDict) if not valid: continue quality, info = source_utils.get_release_quality(name, url) try: size = re.search( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', name).group(0) dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) self.sources.append({ 'provider': 'rapidmoviez', 'source': host, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('RAPIDMOVIEZ')
def get_sources_packs(self, link): try: # log_utils.log('link = %s' % str(link), __name__, log_utils.LOGDEBUG) rjson = client.request(link, timeout='5') if not rjson or rjson == 'null' or any( value in rjson for value in [ '521 Origin Down', 'No results returned', 'Connection Time-out', 'Database maintenance' ]): return files = jsloads(rjson) except: source_utils.scraper_error('TORRENTPARADISE') return for file in files: try: hash = file['id'] name = source_utils.clean_name(file['text']) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack( self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack( self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue url = 'magnet:?xt=urn:btih:%s&dn=%s' % (hash, name) try: seeders = int(file['s']) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: dsize, isize = source_utils.convert_size(float( file["len"]), to='GB') info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = { 'provider': 'torrentparadise', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package } if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('TORRENTPARADISE')
def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link), __name__, log_utils.LOGDEBUG) try: r = client.request(link, timeout='5') if not r or '<table' not in r: return table = client.parseDOM(r, 'table', attrs={'class': 'rtable'}) rows = client.parseDOM(table, 'tr') except: source_utils.scraper_error('BTSCENE') return for row in rows: try: if 'magnet:' not in row: continue url = re.search(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', row, re.I).group(1) url = unquote_plus(url).replace('&', '&').replace( ' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.search(r'btih:(.*?)&', url, re.I).group(1) name = url.split('&dn=')[1] name = source_utils.clean_name(name) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack( self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack( self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int( client.parseDOM(row, 'td', attrs={ 'class': 'seeds is-hidden-sm-mobile' })[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: size = re.search( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', row).group(0) dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = { 'provider': 'btscene', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package } if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('BTSCENE')
def sources(self, url, hostDict): sources = [] if not url: return sources try: data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') aliases = data['aliases'] year = data['year'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else year hdlr2 = 'S%d - %d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else year query = '%s %s' % (title, hdlr) query = re.sub(r'(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) query2 = '%s %s' % (title, hdlr2) query2 = re.sub(r'(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query2) urls = [] url = self.search_link % quote_plus(query) url = urljoin(self.base_link, url) urls.append(url) url2 = self.search_link % quote_plus(query2) url2 = urljoin(self.base_link, url2) urls.append(url2) # log_utils.log('urls = %s' % urls, log_utils.LOGDEBUG) except: source_utils.scraper_error('NYYAA') return sources for url in urls: try: r = client.request(url, timeout='5') if not r or 'magnet' not in r: return sources r = re.sub(r'\n', '', r) r = re.sub(r'\t', '', r) tbody = client.parseDOM(r, 'tbody') rows = client.parseDOM(tbody, 'tr') for row in rows: links = zip( re.findall(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', row, re.DOTALL | re.I), re.findall( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', row, re.DOTALL), [ re.findall( r'<td class\s*=\s*["\']text-center["\']>([0-9]+)</td>', row, re.DOTALL) ]) for link in links: url = unquote_plus(link[0]).replace( '&', '&').replace(' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(name) if hdlr not in name and hdlr2 not in name: continue if source_utils.remove_lang(name): continue if hdlr in name: t = name.split(hdlr)[0].replace(year, '').replace( '(', '').replace(')', '').replace('&', 'and').replace( '.US.', '.').replace('.us.', '.') if hdlr2 in name: t = name.split(hdlr2)[0].replace(year, '').replace( '(', '').replace(')', '').replace('&', 'and').replace( '.US.', '.').replace('.us.', '.') # if cleantitle.get(t) != cleantitle.get(title): continue # Anime title matching is a bitch! try: seeders = int(link[2][0]) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name, url) try: size = link[1] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) sources.append({ 'provider': 'nyaa', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('NYAA') return sources return sources
def sources(self, url, hostDict): sources = [] if not url: return sources try: data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') aliases = data['aliases'] episode_title = data['title'] if 'tvshowtitle' in data else None year = data['year'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else year query = '%s %s' % (title, hdlr) query = re.sub(r'[^A-Za-z0-9\s\.-]+', '', query) url = urljoin( self.base_link, self.search_link.format(query[0].lower(), cleantitle.geturl(query))) # log_utils.log('url = %s' % url, __name__, log_utils.LOGDEBUG) r = client.request(url, timeout='5') if not r or '<tbody' not in r: return sources r = client.parseDOM(r, 'tbody')[0] results = client.parseDOM(r, 'tr') posts = [i for i in results if 'magnet:' in i] except: source_utils.scraper_error('MAGNETDL') return sources try: next_page = [i for i in results if 'Next Page' in i] if not next_page: raise Exception() page = client.parseDOM(next_page, 'a', ret='href', attrs={'title': 'Downloads | Page 2'})[0] r2 = client.request(self.base_link + page) results2 = client.parseDOM(r2, 'tr') posts += [i for i in results2 if 'magnet:' in i] except: pass for post in posts: try: post = post.replace(' ', ' ') links = client.parseDOM(post, 'a', ret='href') magnet = [ i.replace('&', '&') for i in links if 'magnet:' in i ][0] url = unquote_plus(magnet).split('&tr')[0].replace(' ', '.') if url in str(sources): continue hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] name = client.parseDOM(post, 'a', ret='title')[1].replace('–', '-') name = source_utils.clean_name(unquote_plus(name)) if not source_utils.check_title(title, aliases, name, hdlr, year): continue name_info = source_utils.info_from_name( name, title, year, hdlr, episode_title) if source_utils.remove_lang(name_info): continue elif not episode_title: #filter for eps returned in movie query (rare but movie and show exists for Run in 2020) ep_strings = [ r'[.-]s\d{2}e\d{2}([.-]?)', r'[.-]s\d{2}([.-]?)', r'[.-]season[.-]?\d{1,2}[.-]?' ] if any( re.search(item, name.lower()) for item in ep_strings): continue try: seeders = int( client.parseDOM(post, 'td', attrs={'class': 's'})[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: size = re.findall( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', post)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) sources.append({ 'provider': 'magnetdl', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('MAGNETDL') return sources
def get_sources_packs(self, url): # log_utils.log('url = %s' % str(url), __name__, log_utils.LOGDEBUG) try: r = client.request(url, timeout='5') if not r or '<tbody' not in r: return r = client.parseDOM(r, 'tbody')[0] results = client.parseDOM(r, 'tr') posts = [i for i in results if 'magnet:' in i] except: source_utils.scraper_error('MAGNETDL') return try: next_page = [i for i in results if 'Next Page' in i] if not next_page: raise Exception() page = client.parseDOM(next_page, 'a', ret='href', attrs={'title': 'Downloads | Page 2'})[0] r2 = client.request(self.base_link + page) results2 = client.parseDOM(r2, 'tr') posts += [i for i in results2 if 'magnet:' in i] except: pass for post in posts: try: post = post.replace(' ', ' ') links = client.parseDOM(post, 'a', ret='href') magnet = [ i.replace('&', '&') for i in links if 'magnet:' in i ][0] url = unquote_plus(magnet).split('&tr')[0].replace(' ', '.') if url in str(self.sources): continue hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] name = client.parseDOM(post, 'a', ret='title')[1].replace('–', '-') name = source_utils.clean_name(unquote_plus(name)) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack( self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack( self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int( client.parseDOM(post, 'td', attrs={'class': 's'})[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: size = re.findall( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', post)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = { 'provider': 'magnetdl', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package } if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('MAGNETDL')
def sources(self, data, hostDict): sources = [] if not data: return sources try: self.scraper = cfscrape.create_scraper() self.key = cache.get(self._get_token, 0.2) # 800 secs token is valid for title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') aliases = data['aliases'] episode_title = data['title'] if 'tvshowtitle' in data else None year = data['year'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else year query = '%s %s' % (title, hdlr) query = re.sub(r'[^A-Za-z0-9\s\.-]+', '', query) if 'tvshowtitle' in data: search_link = self.tvshowsearch.format(self.key, data['imdb'], hdlr) else: search_link = self.msearch.format(self.key, data['imdb']) sleep(2.1) rjson = self.scraper.get(search_link).content if not rjson or 'torrent_results' not in str(rjson): return sources files = jsloads(rjson)['torrent_results'] except: source_utils.scraper_error('TORRENTAPI') return sources for file in files: try: url = file["download"].split('&tr')[0] hash = re.search(r'btih:(.*?)&', url, re.I).group(1) name = source_utils.clean_name(unquote_plus(file["title"])) if not source_utils.check_title(title, aliases, name, hdlr, year): continue name_info = source_utils.info_from_name( name, title, year, hdlr, episode_title) if source_utils.remove_lang(name_info): continue if not episode_title: #filter for eps returned in movie query (rare but movie and show exists for Run in 2020) ep_strings = [ r'[.-]s\d{2}e\d{2}([.-]?)', r'[.-]s\d{2}([.-]?)', r'[.-]season[.-]?\d{1,2}[.-]?' ] if any( re.search(item, name.lower()) for item in ep_strings): continue try: seeders = int(file["seeders"]) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: dsize, isize = source_utils.convert_size(file["size"], to='GB') info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) sources.append({ 'provider': 'torrentapi', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('TORRENTAPI') return sources
def sources_packs(self, data, hostDict, search_series=False, total_seasons=None, bypass_filter=False): sources = [] if not data: return sources if search_series: # torrentapi does not have showPacks return sources try: self.scraper = cfscrape.create_scraper() self.key = cache.get(self._get_token, 0.2) # 800 secs token is valid for self.bypass_filter = bypass_filter self.title = data['tvshowtitle'].replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.aliases = data['aliases'] self.year = data['year'] self.season_x = data['season'] self.season_xx = self.season_x.zfill(2) search_link = self.tvshowsearch.format(self.key, data['imdb'], 'S%s' % self.season_xx) # log_utils.log('search_link = %s' % str(search_link)) sleep(2.1) rjson = self.scraper.get(search_link).content if not rjson or 'torrent_results' not in str(rjson): return sources files = jsloads(rjson)['torrent_results'] except: source_utils.scraper_error('TORRENTAPI') return sources for file in files: try: url = file["download"].split('&tr')[0] hash = re.search(r'btih:(.*?)&', url, re.I).group(1) name = source_utils.clean_name(unquote_plus(file["title"])) if not self.bypass_filter: if not source_utils.filter_season_pack( self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int(file["seeders"]) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: dsize, isize = source_utils.convert_size(file["size"], to='GB') info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) sources.append({ 'provider': 'torrentapi', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package }) except: source_utils.scraper_error('TORRENTAPI') return sources
def sources(self, url, hostDict): sources = [] if not url: return sources try: data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') aliases = data['aliases'] episode_title = data['title'] if 'tvshowtitle' in data else None year = data['year'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else ('(' + year + ')') query = title query = re.sub(r'[^A-Za-z0-9\s\.-]+', '', query) if 'tvshowtitle' in data: url = self.show_link % query.replace(' ', '-') else: url = self.search_link % quote_plus(query) url = urljoin(self.base_link, url) # log_utils.log('url = %s' % url, __name__, log_utils.LOGDEBUG) r = client.request(url, timeout='5') if not r: return sources r = r.replace('\r', '').replace('\n', '').replace('\t', '') r = client.parseDOM(r, 'div', attrs={'class': 'card'}) if not r: return sources except: source_utils.scraper_error('TOPNOW') return sources for i in r: try: if 'magnet:' not in i: continue name = client.parseDOM(i, 'img', attrs={'class': 'thumbnails'}, ret='alt')[0].replace(u'\xa0', u' ') if not source_utils.check_title( title, aliases, name, hdlr.replace('(', '').replace(')', ''), year): continue url = re.search(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', i, re.DOTALL | re.I).group(1) try: url = unquote_plus(url).decode('utf8').replace( '&', '&').replace(' ', '.') except: url = unquote_plus(url).replace('&', '&').replace(' ', '.') url = re.sub(r'(&tr=.+)&dn=', '&dn=', url) # some links on topnow &tr= before &dn= url = url.split('&tr=')[0].replace(' ', '.') url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] release_name = url.split('&dn=')[1] release_name = source_utils.clean_name(release_name) name_info = source_utils.info_from_name( release_name, title, year, hdlr, episode_title) if source_utils.remove_lang(name_info): continue seeders = 0 # seeders not available on topnow quality, info = source_utils.get_release_quality( name_info, url) try: size = re.findall( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', i )[-1] # file size is no longer available on topnow's new site dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) sources.append({ 'provider': 'topnow', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': release_name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('TOPNOW') return sources
def sources(self, data, hostDict): sources = [] if not data: return sources try: api_key = getSetting('filepursuit.api') if api_key == '': return sources headers = { "x-rapidapi-host": "filepursuit.p.rapidapi.com", "x-rapidapi-key": api_key } title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') aliases = data['aliases'] episode_title = data['title'] if 'tvshowtitle' in data else None year = data['year'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else year query = '%s %s' % (title, hdlr) query = re.sub(r'(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = '%s%s' % (self.base_link, self.search_link % quote_plus(query)) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = client.request(url, headers=headers) if not r: return sources r = jsloads(r) if 'not_found' in r['status']: return sources results = r['files_found'] except: source_utils.scraper_error('FILEPURSUIT') return sources for item in results: try: url = item['file_link'] try: size = int(item['file_size_bytes']) except: size = 0 try: name = item['file_name'] except: name = item['file_link'].split('/')[-1] name = source_utils.clean_name(name) if not source_utils.check_title(title, aliases, name, hdlr, year): continue name_info = source_utils.info_from_name( name, title, year, hdlr, episode_title) if source_utils.remove_lang(name_info): continue # link_header = client.request(url, output='headers', timeout='5') # to slow to check validity of links # if not any(value in str(link_header) for value in ['stream', 'video/mkv']): continue quality, info = source_utils.get_release_quality( name_info, url) try: dsize, isize = source_utils.convert_size(size, to='GB') if isize: info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) sources.append({ 'provider': 'filepursuit', 'source': 'direct', 'quality': quality, 'name': name, 'name_info': name_info, 'language': "en", 'url': url, 'info': info, 'direct': True, 'debridonly': False, 'size': dsize }) except: source_utils.scraper_error('FILEPURSUIT') return sources
def sources(self, url, hostDict): sources = [] if not url: return sources try: data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') aliases = data['aliases'] episode_title = data['title'] if 'tvshowtitle' in data else None year = data['year'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else year query = '%s %s' % (title, hdlr) query = re.sub(r'(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % quote_plus(query) url = urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = client.request(url) if not r: return sources posts = client.parseDOM(r, 'h2') urls = [] for item in posts: if not item.startswith('<a href'): continue try: name = client.parseDOM(item, "a")[0] if not source_utils.check_title(title, aliases, name, hdlr, year): continue name_info = source_utils.info_from_name( name, title, year, hdlr, episode_title) if source_utils.remove_lang(name_info): continue quality, info = source_utils.get_release_quality( name_info, item[0]) try: size = re.findall( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', item)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = client.parseDOM(item, 'a', ret='href') url = item links = self.links(url) if links is None: continue urls += [(i, name, name_info, quality, info, dsize) for i in links] except: source_utils.scraper_error('300MBFILMS') for item in urls: if 'earn-money' in item[0]: continue url = py_tools.ensure_text(client.replaceHTMLCodes(item[0]), errors='replace') valid, host = source_utils.is_host_valid(url, hostDict) if not valid: continue sources.append({ 'provider': '300mbfilms', 'source': host, 'name': item[1], 'name_info': item[2], 'quality': item[3], 'language': 'en', 'url': url, 'info': item[4], 'direct': False, 'debridonly': True, 'size': item[5] }) return sources except: source_utils.scraper_error('300MBFILMS') return sources