def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link), __name__, log_utils.LOGDEBUG) try: r = client.request(link, timeout='5') if not r: return r = re.sub(r'\n', '', r) r = re.sub(r'\t', '', r) posts = re.compile(r'<table\s*class\s*=\s*["\']table2["\']\s*cellspacing\s*=\s*["\']\d+["\']>(.*?)</table>', re.I).findall(r) posts = client.parseDOM(posts, 'tr') except: source_utils.scraper_error('TORRENTDOWNLOAD') return for post in posts: try: if '<th' in post: continue links = re.compile(r'<a\s*href\s*=\s*["\'](.+?)["\']>.*?<td class\s*=\s*["\']tdnormal["\']>((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))</td><td class\s*=\s*["\']tdseed["\']>([0-9]+|[0-9]+,[0-9]+)</td>', re.I).findall(post) for items in links: link = items[0].split("/") hash = link[1].lower() name = link[2].replace('+MB+', '') name = unquote_plus(name).replace('&', '&') name = source_utils.clean_name(name) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack(self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack(self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue url = 'magnet:?xt=urn:btih:%s&dn=%s' % (hash, name) try: seeders = int(items[2].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = re.findall(r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', items[1])[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = {'provider': 'torrentdownload', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package} if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('TORRENTDOWNLOAD')
def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link), __name__, log_utils.LOGDEBUG) try: r = client.request(link, timeout='5') if not r: return if any(value in r for value in ['something went wrong', 'Connection timed out', '521: Web server is down', '503 Service Unavailable']): return sources table = client.parseDOM(r, 'table', attrs={'id': 'table'}) table_body = client.parseDOM(table, 'tbody') rows = client.parseDOM(table_body, 'tr') except: source_utils.scraper_error('TORRENTZ2') return for row in rows: try: if 'magnet:' not in row: continue url = re.findall(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', row, re.DOTALL | re.I)[0] url = unquote_plus(url).replace('&', '&').replace(' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(name) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack(self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack(self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: # seeders = int(client.parseDOM(row, 'td', attrs={'data-title': 'Seeds'})[0]) seeders = int(client.parseDOM(row, 'td', attrs={'data-title': 'Last Updated'})[0]) #keep an eye on this, looks like they gaffed their col's (seeders and size) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = re.findall(r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', row.replace(u'\xa0', u' ').replace(u' ', u' '))[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = {'provider': 'torrentz2', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package} if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('TORRENTZ2')
def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link), __name__, log_utils.LOGDEBUG) try: r = client.request(link, timeout='5') if not r: return if any(value in str(r) for value in ['something went wrong', 'Connection timed out', '521: Web server is down', '503 Service Unavailable']): return table = client.parseDOM(r, 'tbody') rows = client.parseDOM(table, 'tr') except: source_utils.scraper_error('BITCQ') return for row in rows: try: if 'magnet' not in row: continue url = re.findall(r'href="(magnet:.+?)"', row, re.DOTALL)[0] url = unquote_plus(url).replace('&', '&').replace(' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.search(r'btih:(.*?)&', url, re.I).group(1) name = source_utils.clean_name(url.split('&dn=')[1]) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack(self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack(self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int(re.search(r'<td>(\d+)<', row).group(1)) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = re.search(r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', row).group(0) dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = {'provider': 'bitcq', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package} if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('BITCQ')
def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link), __name__, log_utils.LOGDEBUG) try: headers = {'User-Agent': client.agent()} r = py_tools.ensure_str(self.scraper.get(link, headers=headers).content, errors='replace') if not r or '<table' not in r: return table = client.parseDOM(r, 'table', attrs={'class': 'table2'})[0] rows = client.parseDOM(table, 'tr') if not rows: return except: source_utils.scraper_error('LIMETORRENTS') return for row in rows: try: data = client.parseDOM(row, 'a', ret='href')[0] if '/search/' in data: continue data = re.sub(r'\s', '', data).strip() hash = re.compile(r'/torrent/(.+?).torrent', re.I).findall(data)[0] name = re.findall(r'title\s*=\s*(.+?)$', data, re.DOTALL | re.I)[0] name = source_utils.clean_name(name) url = 'magnet:?xt=urn:btih:%s&dn=%s' % (hash, name) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack(self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack(self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int(client.parseDOM(row, 'td', attrs={'class': 'tdseed'})[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = re.findall(r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', row)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = {'provider': 'limetorrents', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package} if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('LIMETORRENTS')
def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link), __name__, log_utils.LOGDEBUG) try: r = py_tools.ensure_str(self.scraper.get(link).content, errors='replace') if not r: return posts = client.parseDOM(r, 'div', attrs={'class': 'tgxtable'}) if not posts: return except: source_utils.scraper_error('TORRENTGALAXY') return for post in posts: try: links = zip( re.findall(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', post, re.DOTALL | re.I), re.findall(r'<span\s*class\s*=\s*["\']badge\s*badge-secondary["\']\s*style\s*=\s*["\']border-radius:4px;["\']>(.*?)</span>', post, re.DOTALL | re.I), re.findall(r'<span\s*title\s*=\s*["\']Seeders/Leechers["\']>\[<font\s*color\s*=\s*["\']green["\']><b>(.*?)<', post, re.DOTALL | re.I)) for link in links: url = unquote_plus(link[0]).split('&tr')[0].replace(' ', '.') url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(name) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack(self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack(self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int(link[2]) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: dsize, isize = source_utils._size(link[1]) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = {'provider': 'torrentgalaxy', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package} if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('TORRENTGALAXY')
def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link), __name__, log_utils.LOGDEBUG) try: r = client.request(link, timeout='5') if not r: return posts = client.parseDOM(r, 'div', attrs={'class': 'media'}) except: source_utils.scraper_error('BTDB') return for post in posts: try: if 'magnet:' not in post: continue url = re.findall(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', post, re.DOTALL | re.I)[0] url = unquote_plus(url).replace('&', '&').replace(' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable(url) if url in str(self.sources): return hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(name) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack(self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack(self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int(re.findall(r'Seeders.*?["\']>([0-9]+|[0-9]+,[0-9]+)</strong>', post, re.DOTALL | re.I)[0].replace(',', '')) if self.min_seeders > seeders: return except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = re.findall(r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', post)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = {'provider': 'btdb', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package} if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('BTDB')
def get_pack_items(self, url): # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) try: r = client.request(url, timeout='5') if not r: return r = client.parseDOM(r, 'table', attrs={'class': 'tmain'})[0] links = re.findall(r'<a\s*href\s*=\s*["\'](/torrent/.+?)["\']>(.+?)</a>', r, re.DOTALL | re.I) except: source_utils.scraper_error('TORRENTFUNK') return for link in links: try: try: url = link[0].encode('ascii', errors='ignore').decode('ascii', errors='ignore').replace(' ', ' ') except: url = link[0].replace(' ', ' ') if '/torrent/' not in url: continue try: name = link[1].encode('ascii', errors='ignore').decode('ascii', errors='ignore').replace(' ', '.') except: name = link[1].replace(' ', '.') if '<span' in name: nam = name.split('<span')[0].replace(' ', '.') span = client.parseDOM(name, 'span')[0].replace('-', '.') name = '%s%s' % (nam, span) name = source_utils.clean_name(name) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack(self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack(self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue if not url.startswith('http'): url = urljoin(self.base_link, url) if self.search_series: self.items.append((name, name_info, url, package, last_season)) else: self.items.append((name, name_info, url, package)) except: source_utils.scraper_error('TORRENTFUNK')
def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link)) try: r = client.request(link, timeout='5') if not r: return results = jsloads(r)['results'] except: source_utils.scraper_error('SOLIDTORRENTS') return for item in results: try: url = unquote_plus(item['magnet']).replace(' ', '.') url = re.sub( r'(&tr=.+)&dn=', '&dn=', url) # some links on solidtorrents &tr= before &dn= url = source_utils.strip_non_ascii_and_unprintable(url) hash = item['infohash'].lower() if url in str(self.sources): continue name = source_utils.clean_name(item['title']) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack( self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack( self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int(item['swarm']['seeders']) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: dsize, isize = source_utils.convert_size(item["size"], to='GB') info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = { 'provider': 'solidtorrents', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package } if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('SOLIDTORRENTS')
def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link)) try: # headers = {'User-Agent': client.agent()} headers = {'User-Agent': client.randomagent()} result = client.request(link, headers=headers, timeout='5') if not result: return rows = client.parseDOM(result, 'tr', attrs={'class': 't-row'}) if not rows: return rows = [i for i in rows if 'racker:' not in i] except: source_utils.scraper_error('GLODLS') return for row in rows: try: ref = client.parseDOM(row, 'a', ret='href') url = [i for i in ref if 'magnet:' in i][0] url = unquote_plus(url).replace('&', '&').replace( ' ', '.').split('&tr')[0] hash = re.search(r'btih:(.*?)&', url, re.I).group(1).lower() name = unquote_plus(client.parseDOM(row, 'a', ret='title')[0]) name = source_utils.clean_name(name) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack( self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack( self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int( re.search( r'<td.*?<font\s*color\s*=\s*["\'].+?["\']><b>([0-9]+|[0-9]+,[0-9]+)</b>', row).group(1).replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: size = re.search( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', row).group(0) dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = { 'provider': 'glodls', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package } if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('GLODLS')
def get_sources_packs(self, link): try: # log_utils.log('link = %s' % str(link), __name__, log_utils.LOGDEBUG) rjson = client.request(link, timeout='5') if not rjson or rjson == 'null' or any( value in rjson for value in [ '521 Origin Down', 'No results returned', 'Connection Time-out', 'Database maintenance' ]): return files = jsloads(rjson) except: source_utils.scraper_error('TORRENTPARADISE') return for file in files: try: hash = file['id'] name = source_utils.clean_name(file['text']) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack( self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack( self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue url = 'magnet:?xt=urn:btih:%s&dn=%s' % (hash, name) try: seeders = int(file['s']) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: dsize, isize = source_utils.convert_size(float( file["len"]), to='GB') info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = { 'provider': 'torrentparadise', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package } if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('TORRENTPARADISE')
def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link), __name__, log_utils.LOGDEBUG) try: r = client.request(link, timeout='5') if not r or '<table' not in r: return table = client.parseDOM(r, 'table', attrs={'class': 'rtable'}) rows = client.parseDOM(table, 'tr') except: source_utils.scraper_error('BTSCENE') return for row in rows: try: if 'magnet:' not in row: continue url = re.search(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', row, re.I).group(1) url = unquote_plus(url).replace('&', '&').replace( ' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.search(r'btih:(.*?)&', url, re.I).group(1) name = url.split('&dn=')[1] name = source_utils.clean_name(name) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack( self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack( self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int( client.parseDOM(row, 'td', attrs={ 'class': 'seeds is-hidden-sm-mobile' })[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: size = re.search( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', row).group(0) dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = { 'provider': 'btscene', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package } if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('BTSCENE')
def get_pack_sources(self, url): try: # log_utils.log('url = %s' % str(url), __name__, log_utils.LOGDEBUG) result = client.request(url, timeout='5') if not result: return if '<kbd>' not in result: return hash = re.findall(r'<kbd>(.+?)<', result, re.DOTALL | re.I)[0] name = re.findall(r'<h3\s*class\s*=\s*["\']card-title["\']>(.+?)<', result, re.DOTALL | re.I)[0].replace( 'Original Name: ', '') name = source_utils.clean_name(unquote_plus(name)) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack( self.title, self.aliases, self.year, self.season_x, name): return package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack( self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: return else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): return url = 'magnet:?xt=urn:btih:%s&dn=%s' % (hash, name) if url in str(self.sources): return try: seeders = int( re.findall(r'>Seeders:.*?>\s*([0-9]+|[0-9]+,[0-9]+)\s*</', result, re.DOTALL | re.I)[0].replace(',', '')) if self.min_seeders > seeders: return except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = re.findall(r'File size:.*?["\']>(.+?)<', result, re.DOTALL | re.I)[0] size = re.findall( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', size)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = { 'provider': 'yourbittorrent', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package } if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('YOURBITTORRENT')
def get_sources_packs(self, url): # log_utils.log('url = %s' % str(url), __name__, log_utils.LOGDEBUG) try: r = client.request(url, timeout='5') if not r or '<tbody' not in r: return r = client.parseDOM(r, 'tbody')[0] results = client.parseDOM(r, 'tr') posts = [i for i in results if 'magnet:' in i] except: source_utils.scraper_error('MAGNETDL') return try: next_page = [i for i in results if 'Next Page' in i] if not next_page: raise Exception() page = client.parseDOM(next_page, 'a', ret='href', attrs={'title': 'Downloads | Page 2'})[0] r2 = client.request(self.base_link + page) results2 = client.parseDOM(r2, 'tr') posts += [i for i in results2 if 'magnet:' in i] except: pass for post in posts: try: post = post.replace(' ', ' ') links = client.parseDOM(post, 'a', ret='href') magnet = [ i.replace('&', '&') for i in links if 'magnet:' in i ][0] url = unquote_plus(magnet).split('&tr')[0].replace(' ', '.') if url in str(self.sources): continue hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] name = client.parseDOM(post, 'a', ret='title')[1].replace('–', '-') name = source_utils.clean_name(unquote_plus(name)) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack( self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack( self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int( client.parseDOM(post, 'td', attrs={'class': 's'})[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: size = re.findall( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', post)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = { 'provider': 'magnetdl', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package } if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('MAGNETDL')
def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link), __name__, log_utils.LOGDEBUG) r = client.request(link, timeout='5') if not r or 'did not match any documents' in r: return r = r.replace(' ', ' ') r = client.parseDOM(r, 'div', attrs={'class': 'col s12'}) posts = client.parseDOM(r, 'div')[1:] posts = [i for i in posts if 'magnet/' in i] for post in posts: try: name = client.parseDOM(post, 'a', ret='title')[0] name = source_utils.clean_name(name) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack( self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack( self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue hash = client.parseDOM(post, 'a', ret='href')[0].split('magnet/')[1] url = 'magnet:?xt=urn:btih:%s&dn=%s' % (hash, name) try: seeders = int( client.parseDOM(post, 'b', attrs={'id': 'seeders'})[0].replace(',', '')) if self.min_seeders > seeders: return except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: size = re.findall( r'<b class\s*=\s*["\']cpill .+?-pill["\']>(.+?)</b>', post, re.I)[0] #pack size calc will re-evalute this dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = { 'provider': 'bt4g', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package } if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('BT4G')
def get_sources_packs(self, link, url): try: self.headers.update({'Referer': link}) query_data = { 'query': url, 'offset': 0, 'limit': 99, 'filters[field]': 'seeds', 'filters[sort]': 'desc', 'filters[time]': 4, 'filters[category]': 4, 'filters[adult]': False, 'filters[risky]': False} api_url = '%s%s' % (self.base_link, self.api_search_link) rjson = client.request(api_url, post=query_data, headers=self.headers, timeout='5') if not rjson: return files = jsloads(rjson) error = files.get('error') if error: return except: source_utils.scraper_error('BITLORD') return for file in files.get('content'): try: name = source_utils.clean_name(file.get('name')) url = unquote_plus(file.get('magnet')).replace('&', '&').replace(' ', '.') url = re.sub(r'(&tr=.+)&dn=', '&dn=', url) # some links on bitlord &tr= before &dn= url = url.split('&tr=')[0].split('&xl=')[0] url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.search(r'btih:(.*?)&', url, re.I).group(1) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack(self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack(self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = file.get('seeds') if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = file.get('size') size = str(size) + ' GB' if len(str(size)) <= 2 else str(size) + ' MB' # bitlord size is all over the place between MB and GB dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = {'provider': 'bitlord', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package} if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('BITLORD')
def get_sources_packs(self, link): try: # log_utils.log('link = %s' % link) r = py_tools.ensure_str(self.scraper.get(link).content, errors='replace') if not r: return posts = client.parseDOM(r, 'tr', attrs={'class': 'tlr'}) posts += client.parseDOM(r, 'tr', attrs={'class': 'tlz'}) except: source_utils.scraper_error('EXTRATORRENT') return for post in posts: try: post = re.sub(r'\n', '', post) post = re.sub(r'\t', '', post) url = re.search(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', post, re.I).group(1) url = unquote_plus(url).replace('&', '&').replace( ' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable(url) if url in str(self.sources): continue hash = re.search(r'btih:(.*?)&', url, re.I).group(1) name = url.split('&dn=')[1] name = source_utils.clean_name(name) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack( self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack( self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int( client.parseDOM(post, 'td', attrs={'class': 'sn'})[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: size = re.search( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', post).group(0) dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = { 'provider': 'extratorrent', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package } if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('EXTRATORRENT')
def get_sources_packs(self, link): # log_utils.log('link = %s' % link, __name__, log_utils.LOGDEBUG) try: headers = {'User-Agent': client.agent()} r = client.request(link, headers=headers, timeout='5') if not r: return posts = client.parseDOM(r, 'tr', attrs={'id': 'torrent_latest_torrents'}) except: source_utils.scraper_error('KICKASS2') return for post in posts: try: ref = client.parseDOM(post, 'a', attrs={'title': 'Torrent magnet link'}, ret='href')[0] link = ref.split('url=')[1] url = unquote_plus(link).replace('&', '&').replace( ' ', '.').split('&tr')[0] hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] name = unquote_plus(url.split('&dn=')[1]) name = source_utils.clean_name(name) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack( self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack( self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int( re.findall( r'<td\s*class\s*=\s*["\']green\s*center["\']>([0-9]+|[0-9]+,[0-9]+)</td>', post, re.DOTALL)[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: size = re.findall( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', post)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = { 'provider': 'kickass2', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package } if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('KICKASS2')
def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link), __name__, log_utils.LOGDEBUG) try: r = client.request(link, timeout='5') if not r: return rows = client.parseDOM(r, 'tr', attrs={'class': 'row'}) if not rows: return except: source_utils.scraper_error('IDOPE') return for row in rows: try: url = client.parseDOM( row, 'a', attrs={'title': 'Download Torrent Magnet'}, ret='href')[0] url = unquote_plus(url).replace('&', '&').replace( ' ', '.').split('&tr')[0] hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(name) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack( self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack( self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int( re.findall( r'<td\s*class\s*=\s*["\']seeds\s*is-hidden-sm-mobile["\']>([0-9]+|[0-9]+,[0-9]+)<', row, re.S | re.I)[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: size = re.findall( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', row)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = { 'provider': 'idope', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package } if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('IDOPE')
def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link), __name__, log_utils.LOGDEBUG) try: # For some reason Zooqle returns 404 even though the response has a body. # This is probably a bug on Zooqle's server and the error should just be ignored. html = client.request(link, ignoreErrors=404, timeout='5') if not html: return html = html.replace(' ', ' ') try: table = client.parseDOM( html, 'table', attrs={ 'class': 'table table-condensed table-torrents vmiddle' })[0] except: return rows = client.parseDOM(table, 'tr') if not rows: return except: source_utils.scraper_error('ZOOQLE') return for row in rows: try: try: if 'magnet:' not in row: continue url = re.findall(r'href\s*=\s*["\'](magnet:[^"\']+)["\']', row, re.DOTALL | re.I)[0] url = unquote_plus(url).replace('&', '&').replace( ' ', '.').split('&tr')[0] url = source_utils.strip_non_ascii_and_unprintable(url) if url in str(self.sources): continue except: continue hash = re.compile(r'btih:(.*?)&', re.I).findall(url)[0] try: name = re.findall(r'<a class\s*=\s*["\'].+?>(.+?)</a>', row, re.DOTALL | re.I)[0] name = client.cleanHTML(name) name = unquote_plus(name) name = source_utils.clean_name(name) except: continue # some titles have foreign title translation in front so remove it if './.' in name: name = name.split('./.', 1)[1] if '.com.' in name.lower(): try: name = re.sub(r'(.*?)\W{2,10}', '', name) except: name = name.split('-.', 1)[1].lstrip() if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack( self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack( self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int( re.findall( r'["\']Seeders:\s*([0-9]+|[0-9]+,[0-9]+)\s*\|', row, re.DOTALL | re.I)[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: size = re.findall( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', row)[-1] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = { 'provider': 'zooqle', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package } if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('ZOOQLE')
def get_pack_sources(self, url): try: # log_utils.log('url = %s' % str(url)) result = client.request(url, timeout='5') if not result: return hash = re.search(r'<a\s*title\s*=\s*["\']hash:(.+?)\s*torrent', result, re.I).group(1) name = re.search(r'<title>(.+?)</title>', result, re.I).group(1) name = source_utils.clean_name(unquote_plus(name)) if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack( self.title, self.aliases, self.year, self.season_x, name): return package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack( self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: return else: last_season = self.total_seasons package = 'show' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): return url = 'magnet:?xt=urn:btih:%s&dn=%s' % (hash, name) if url in str(self.sources): return try: seeders = int( re.search( r'["\']tseeders["\']>\s*([0-9]+|[0-9]+,[0-9]+)\s*<', result, re.I).group(1).replace(',', '')) if self.min_seeders > seeders: return except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = re.search( r'<div id\s*=\s*["\']torrent-size["\']>(.+?)<', result, re.I).group(1) dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = { 'provider': 'torrentproject2', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package } if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('TORRENTPROJECT2')