def get_sources(self, link): try: url = '%s%s' % (self.base_link, link) result = client.request(url) if result is None: return info_hash = re.findall('<kbd>(.+?)<', result, re.DOTALL)[0] url = '%s%s' % ('magnet:?xt=urn:btih:', info_hash) name = re.findall('<h3 class="card-title">(.+?)<', result, re.DOTALL)[0] name = urllib.unquote_plus(name).replace(' ', '.') url = '%s%s%s' % (url, '&dn=', str(name)) if source_utils.remove_lang(name): return if url in str(self.sources): return t = name.split(self.hdlr)[0].replace(self.year, '').replace( '(', '').replace(')', '').replace('&', 'and').replace( '.US.', '.').replace('.us.', '.') if cleantitle.get(t) != cleantitle.get(self.title): return if self.hdlr not in name: return size = re.findall( '<div class="col-3">File size:</div><div class="col">(.+?)<', result, re.DOTALL)[0] quality, info = source_utils.get_release_quality(name, url) try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', size)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: isize = '0' dsize = 0 pass info = ' | '.join(info) self.sources.append({ 'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('YOURBITTORRENT') pass
def _get_items(self, url): try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) if '<tbody' not in r: return self.items posts = client.parseDOM(r, 'tbody')[0] posts = client.parseDOM(posts, 'tr') for post in posts: data = client.parseDOM(post, 'a', ret='href')[1] link = urljoin(self.base_link, data) try: seeders = int( client.parseDOM(post, 'td', attrs={'class': 'coll-2 seeds' })[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 pass name = client.parseDOM(post, 'a')[1] name = unquote_plus(name) name = source_utils.clean_name(self.title, name) if source_utils.remove_lang(name, self.episode_title): continue if not source_utils.check_title(self.title, self.aliases, name, self.hdlr, self.year): continue # filter for episode multi packs (ex. S01E01-E17 is also returned in query) if self.episode_title: if not source_utils.filter_single_episodes( self.hdlr, name): continue try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) except: isize = '0' dsize = 0 pass self.items.append((name, link, isize, dsize, seeders)) return self.items except: source_utils.scraper_error('1337X') return self.items
def _get_items(self, url): try: headers = {'User-Agent': client.agent()} r = self.scraper.get(url, headers=headers).content if not r: return posts = client.parseDOM(r, 'table', attrs={'class': 'table2'})[0] posts = client.parseDOM(posts, 'tr') for post in posts: data = client.parseDOM(post, 'a', ret='href')[1] if '/search/' in data: continue try: data = data.encode('ascii', 'ignore') except: pass data = re.sub('\s', '', data).strip() link = urljoin(self.base_link, data) name = client.parseDOM(post, 'a')[1] name = unquote_plus(name) name = re.sub('[^A-Za-z0-9]+', '.', name).lstrip('.') if source_utils.remove_lang(name): continue match = source_utils.check_title(self.title, name, self.hdlr, self.year) if not match: continue try: seeders = int( client.parseDOM(post, 'td', attrs={'class': 'tdseed'})[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 pass try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) except: isize = '0' dsize = 0 pass self.items.append((name, link, isize, dsize, seeders)) return self.items except: source_utils.scraper_error('LIMETORRENTS') return self.items
def get_sources(self, link): try: url = re.compile('href="(.+?)"').findall(link)[0] url = '%s%s' % (self.base_link, url) result = client.request(url) if result is None: return if 'magnet' not in result: return url = 'magnet:%s' % (re.findall('a href="magnet:(.+?)"', result, re.DOTALL)[0]) url = unquote_plus(url).split('&xl=')[0].replace('&', '&').replace(' ', '.') url = source_utils.strip_non_ascii_and_unprintable(url) if url in str(self.sources): return hash = re.compile('btih:(.*?)&').findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(self.title, name) if source_utils.remove_lang(name, self.episode_title): return if not source_utils.check_title(self.title, self.aliases, name, self.hdlr, self.year): return # filter for episode multi packs (ex. S01E01-E17 is also returned in query) if self.episode_title: if not source_utils.filter_single_episodes(self.hdlr, name): return try: seeders = int(re.findall(r'<b>Seeds: </b>.*?>([0-9]+|[0-9]+,[0-9]+)</font>', result, re.DOTALL)[0].replace(',', '')) if self.min_seeders > seeders: return except: seeders = 0 pass quality, info = source_utils.get_release_quality(name, url) try: size = re.findall(r'<b>Total Size:</b></td><td>(.*?)</td>', result, re.DOTALL)[0].strip() dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) self.sources.append({'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('ETTV') pass
def _get_sources(self, url): try: r = client.request(url) div = client.parseDOM(r, 'div', attrs={'id': 'div2child'}) for row in div: row = client.parseDOM(r, 'div', attrs={'class': 'resultdivbotton'}) for post in row: hash = re.findall('<div id="hideinfohash.+?" class="hideinfohash">(.+?)<', post, re.DOTALL)[0] name = re.findall('<div id="hidename.+?" class="hideinfohash">(.+?)<', post, re.DOTALL)[0] name = unquote_plus(name) name = re.sub('[^A-Za-z0-9]+', '.', name).lstrip('.') if name.startswith('www'): try: name = re.sub(r'www(.*?)\W{2,10}', '', name) except: name = name.split('-.', 1)[1].lstrip() url = 'magnet:?xt=urn:btih:%s&dn=%s' % (hash, name) if url in str(self.sources): continue try: seeders = int(re.findall('<div class="resultdivbottonseed">([0-9]+|[0-9]+,[0-9]+)<', post, re.DOTALL)[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 pass if source_utils.remove_lang(name): continue match = source_utils.check_title(self.title, name, self.hdlr, self.year) if not match: continue quality, info = source_utils.get_release_quality(name, url) try: size = re.findall('<div class="resultdivbottonlength">(.+?)<', post)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) self.sources.append({'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('IDOPE') pass
def get_sources(self, url): items = [] try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'tr', attrs={'class': 't-row'}) posts = [i for i in posts if not 'racker:' in i] for post in posts: ref = client.parseDOM(post, 'a', ret='href') url = [i for i in ref if 'magnet:' in i][0] name = client.parseDOM(post, 'a', ret='title')[0] name = unquote_plus(name) name = source_utils.clean_name(self.title, name) if source_utils.remove_lang(name, self.episode_title): continue if not source_utils.check_title(self.title, self.aliases, name, self.hdlr, self.year): continue # filter for episode multi packs (ex. S01E01-E17 is also returned in query) if self.episode_title: if not source_utils.filter_single_episodes( self.hdlr, name): continue try: seeders = int( re.findall( "<td.*?<font color='green'><b>([0-9]+|[0-9]+,[0-9]+)</b>", post)[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 pass try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) except: isize = '0' dsize = 0 pass items.append((name, url, isize, dsize, seeders)) return items except: source_utils.scraper_error('GLODLS') return items
def get_sources(self, url): # log_utils.log('url = %s' % url, __name__, log_utils.LOGDEBUG) try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) if not r: return posts = client.parseDOM(r, 'tr', attrs={'id': 'torrent_latest_torrents'}) for post in posts: ref = client.parseDOM(post, 'a', attrs={'title': 'Torrent magnet link'}, ret='href')[0] link = ref.split('url=')[1] url = unquote_plus(link).replace('&', '&').replace(' ', '.') url = url.split('&tr')[0] hash = re.compile('btih:(.*?)&').findall(url)[0] name = unquote_plus(url.split('&dn=')[1]) name = source_utils.clean_name(self.title, name) if source_utils.remove_lang(name, self.episode_title): continue if not source_utils.check_title(self.title, self.aliases, name, self.hdlr, self.year): continue # filter for episode multi packs (ex. S01E01-E17 is also returned in query) if self.episode_title: if not source_utils.filter_single_episodes(self.hdlr, name): continue try: seeders = int(re.findall('<td class="green center">([0-9]+|[0-9]+,[0-9]+)</td>', post, re.DOTALL)[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 pass quality, info = source_utils.get_release_quality(name, url) try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) self.sources.append({'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('KICKASS2') pass
def get_sources(self, url): try: r = client.request(url) if not r: return posts = client.parseDOM(r, 'tr') for post in posts: link = re.findall('a title="Download Torrent Magnet" href="(magnet:.+?)"', post, re.DOTALL) if not link: continue for url in link: url = unquote_plus(url).split('&tr')[0].replace('&', '&').replace(' ', '.') url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.compile('btih:(.*?)&').findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(self.title, name) if source_utils.remove_lang(name, self.episode_title): continue if not source_utils.check_title(self.title, self.aliases, name, self.hdlr, self.year): continue # filter for episode multi packs (ex. S01E01-E17 is also returned in query) if self.episode_title: if not source_utils.filter_single_episodes(self.hdlr, name): continue try: seeders = int(client.parseDOM(post, 'td', attrs={'class': 'seeds is-hidden-sm-mobile'})[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 pass quality, info = source_utils.get_release_quality(name, url) try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) self.sources.append({'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('BTSCENE') pass
def _get_items(self, url): try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'tr', attrs={'id': 'torrent_latest_torrents'}) for post in posts: ref = client.parseDOM(post, 'a', attrs={'title': 'Torrent magnet link'}, ret='href')[0] link = urllib.unquote(ref).decode('utf8').replace( 'https://mylink.me.uk/?url=', '').replace('https://mylink.cx/?url=', '') name = urllib.unquote_plus( re.search('dn=([^&]+)', link).groups()[0]) name = re.sub('[^A-Za-z0-9]+', '.', name).lstrip('.') if source_utils.remove_lang(name): continue match = source_utils.check_title(self.title, name, self.hdlr, self.year) if not match: continue try: seeders = int( re.findall( '<td class="green center">([0-9]+|[0-9]+,[0-9]+)</td>', post, re.DOTALL)[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 pass try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) except: isize = '0' dsize = 0 pass self.items.append((name, link, isize, dsize, seeders)) return self.items except: source_utils.scraper_error('KICKASS2') return self.items
def _get_items(self, url): try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'table', attrs={'class': 'table2'})[0] posts = client.parseDOM(posts, 'tr') for post in posts: data = client.parseDOM(post, 'a', ret='href')[1] if '/search/' in data: continue # Remove non-ASCII characters...freakin limetorrents try: data = data.encode('ascii', 'ignore') except: pass # some broken links with withespace data = re.sub('\s', '', data).strip() link = urlparse.urljoin(self.base_link, data) name = client.parseDOM(post, 'a')[1] name = urllib.unquote_plus(name).replace(' ', '.') if source_utils.remove_lang(name): continue t = name.split(self.hdlr)[0].replace(self.year, '').replace( '(', '').replace(')', '').replace('&', 'and').replace( '.US.', '.').replace('.us.', '.') if cleantitle.get(t) != cleantitle.get(self.title): continue if self.hdlr not in name: continue try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) except: isize = '0' dsize = 0 pass self.items.append((name, link, isize, dsize)) return self.items except: source_utils.scraper_error('LIMETORRENTS') return self.items
def _get_items(self, url): try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) if '<tbody' not in r: return self.items posts = client.parseDOM(r, 'tbody')[0] posts = client.parseDOM(posts, 'tr') for post in posts: data = client.parseDOM(post, 'a', ret='href')[1] link = urlparse.urljoin(self.base_link, data) try: seeders = int( client.parseDOM(post, 'td', attrs={'class': 'coll-2 seeds' })[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 pass name = client.parseDOM(post, 'a')[1] name = urllib.unquote_plus(name) name = re.sub('[^A-Za-z0-9]+', '.', name).lstrip('.') if source_utils.remove_lang(name): continue match = source_utils.check_title(self.title, name, self.hdlr, self.year) if not match: continue try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) except: isize = '0' dsize = 0 pass self.items.append((name, link, isize, dsize, seeders)) return self.items except: source_utils.scraper_error('1337X') return self.items
def get_sources(self, link): try: url = re.compile('href="(.+?)"').findall(link)[0] url = '%s%s' % (self.base_link, url) result = client.request(url) if 'magnet' not in result: return url = 'magnet:%s' % (re.findall('a href="magnet:(.+?)"', result, re.DOTALL)[0]) url = urllib.unquote(url).decode('utf8').replace('&', '&') url = url.split('&xl=')[0] if url in str(self.sources): return size_list = client.parseDOM(result, "td", attrs={"class": "table_col2"}) name = url.split('&dn=')[1] name = urllib.unquote_plus(urllib.unquote_plus(name)).replace(' ', '.') if source_utils.remove_lang(name): return t = name.split(self.hdlr)[0].replace(self.year, '').replace('(', '').replace(')', '').replace('&', 'and').replace('.US.', '.').replace('.us.', '.') if cleantitle.get(t) != cleantitle.get(self.title): return if self.hdlr not in name: return quality, info = source_utils.get_release_quality(name, url) for match in size_list: try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', match)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) if size: break except: isize = '0' dsize = 0 pass info = ' | '.join(info) self.sources.append({'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('ETTV') pass
def _get_items(self, url): items = [] try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'tr', attrs={'class': 't-row'}) posts = [i for i in posts if not 'racker:' in i] for post in posts: ref = client.parseDOM(post, 'a', ret='href') url = [i for i in ref if 'magnet:' in i][0] name = client.parseDOM(post, 'a', ret='title')[0] name = urllib.unquote_plus(name) name = re.sub('[^A-Za-z0-9]+', '.', name).lstrip('.') if source_utils.remove_lang(name): continue match = source_utils.check_title(self.title, name, self.hdlr, self.year) if not match: continue try: seeders = int( re.findall( "<td.*?<font color='green'><b>([0-9]+|[0-9]+,[0-9]+)</b>", post)[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 pass try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) except: isize = '0' dsize = 0 pass items.append((name, url, isize, dsize, seeders)) return items except: source_utils.scraper_error('GLODLS') return items
def get_sources(self, link): try: url = 'magnet:%s' % (re.findall('a href="magnet:(.+?)"', link, re.DOTALL)[0]) url = unquote_plus(url).replace('&', '&').replace(' ', '.') url = url.split('&tr')[0] try: url = url.encode('ascii', errors='ignore').decode('ascii', errors='ignore') except: pass hash = re.compile('btih:(.*?)&').findall(url)[0] name = url.split('&dn=')[1] name = re.sub('[^A-Za-z0-9]+', '.', name).lstrip('.') if source_utils.remove_lang(name): return match = source_utils.check_title(self.title, name, self.hdlr, self.year) if not match: return if url in str(self.sources): return try: seeders = int(client.parseDOM(link, 'td', attrs={'class': 'sy'})[0].replace(',', '')) if self.min_seeders > seeders: return except: seeders = 0 pass quality, info = source_utils.get_release_quality(name, url) try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', link)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: source_utils.scraper_error('EXTRATORRENT') dsize = 0 pass info = ' | '.join(info) self.sources.append({'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('EXTRATORRENT') pass
def _get_sources(self, url): try: r = client.request(url) posts = client.parseDOM(r, 'tr') for post in posts: link = re.findall('a title="Download Torrent Magnet" href="(magnet:.+?)"', post, re.DOTALL) if link == []: continue for url in link: try: seeders = int(client.parseDOM(post, 'td', attrs={'class': 'seeds is-hidden-sm-mobile'})[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 pass url = urllib.unquote_plus(url).replace('&', '&').replace(' ', '.') url = url.split('&tr')[0] hash = re.compile('btih:(.*?)&').findall(url)[0] name = url.split('&dn=')[1] name = re.sub('[^A-Za-z0-9]+', '.', name).lstrip('.') if source_utils.remove_lang(name): continue match = source_utils.check_title(self.title, name, self.hdlr, self.year) if not match: continue quality, info = source_utils.get_release_quality(name, url) try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) self.sources.append({'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('BTSCENE') pass
def _get_sources(self, name, url): try: headers = {'User-Agent': client.agent()} r = self.scraper.get(url, headers=headers).content name = client.replaceHTMLCodes(name) if name.startswith('['): name = name.split(']')[1] name = name.strip().replace(' ', '.') l = dom_parser.parse_dom(r, 'div', {'class': 'ppu2h'}) if l == []: return s = '' for i in l: s += i.content urls = re.findall(r'''((?:http|ftp|https)://[\w_-]+(?:(?:\.[\w_-]+)+)[\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])''', i.content, flags=re.MULTILINE|re.DOTALL) urls = [i for i in urls if '.rar' not in i or '.zip' not in i or '.iso' not in i or '.idx' not in i or '.sub' not in i] for url in urls: if url in str(self.sources): continue valid, host = source_utils.is_host_valid(url, self.hostDict) if not valid: continue host = client.replaceHTMLCodes(host) host = host.encode('utf-8') quality, info = source_utils.get_release_quality(name, url) try: size = re.findall('((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', name)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass fileType = source_utils.getFileType(name) info.append(fileType) info = ' | '.join(info) if fileType else info[0] self.sources.append({'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('RAPIDMOVIEZ') pass
def _get_sources(self, url): try: r = client.request(url) posts = client.parseDOM(r, 'tr') for post in posts: link = re.findall('a title="Download Torrent Magnet" href="(magnet:.+?)"', post, re.DOTALL) if link == []: continue for url in link: url = url.split('&tr')[0] name = url.split('&dn=')[1] name = urllib.unquote_plus(name).replace(' ', '.') if source_utils.remove_lang(name): continue t = name.split(self.hdlr)[0].replace(self.year, '').replace('(', '').replace(')', '').replace('&', 'and').replace('.US.', '.').replace('.us.', '.') if cleantitle.get(t) != cleantitle.get(self.title): continue if self.hdlr not in url: continue quality, info = source_utils.get_release_quality(name, url) try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: pass info = ' | '.join(info) self.sources.append({'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('BTSCENE') pass
def _get_items(self, url): items = [] try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'tr', attrs={'class': 't-row'}) posts = [i for i in posts if not 'racker:' in i] for post in posts: ref = client.parseDOM(post, 'a', ret='href') url = [i for i in ref if 'magnet:' in i][0] name = client.parseDOM(post, 'a', ret='title')[0] name = urllib.unquote_plus(name).replace(' ', '.') if source_utils.remove_lang(name): continue t = name.split(self.hdlr)[0].replace(self.year, '').replace( '(', '').replace(')', '').replace('&', 'and').replace( '.US.', '.').replace('.us.', '.') if cleantitle.get(t) != cleantitle.get(self.title): continue if self.hdlr not in name: continue try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) except: isize = '0' dsize = 0 pass items.append((name, url, isize, dsize)) return items except: source_utils.scraper_error('GLODLS') return items
def _get_items(self, url): try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) if '<tbody' not in r: return self.items posts = client.parseDOM(r, 'tbody')[0] posts = client.parseDOM(posts, 'tr') for post in posts: data = client.parseDOM(post, 'a', ret='href')[1] link = urlparse.urljoin(self.base_link, data) name = client.parseDOM(post, 'a')[1] name = urllib.unquote_plus(name).replace(' ', '.') if source_utils.remove_lang(name): return t = name.split(self.hdlr)[0].replace(self.year, '').replace('(', '').replace(')', '').replace('&', 'and').replace('.US.', '.').replace('.us.', '.') if cleantitle.get(t) != cleantitle.get(self.title): continue if self.hdlr not in name: continue try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) except: isize = '0' dsize = 0 pass self.items.append((name, link, isize, dsize)) return self.items except: source_utils.scraper_error('1337X') return self.items
def _get_items(self, url): try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'tr', attrs={'id': 'torrent_latest_torrents'}) for post in posts: ref = client.parseDOM(post, 'a', attrs={'title': 'Torrent magnet link'}, ret='href')[0] link = urllib.unquote(ref).decode('utf8').replace('https://mylink.me.uk/?url=', '').replace('https://mylink.cx/?url=', '') name = urllib.unquote_plus(re.search('dn=([^&]+)', link).groups()[0]) name = name.replace(' ', '.') if source_utils.remove_lang(name): continue t = name.split(self.hdlr)[0].replace(self.year, '').replace('(', '').replace(')', '').replace('&', 'and').replace('.US.', '.').replace('.us.', '.') if cleantitle.get(t) != cleantitle.get(self.title): continue if self.hdlr not in name: continue try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) except: isize = '0' dsize = 0 pass self.items.append((name, link, isize, dsize)) return self.items except: source_utils.scraper_error('KICKASS2') return self.items
def get_sources(self, item): try: name = item[0].replace(' ', '.') url = item[1] r = self.scraper.get(url, headers=self.headers).content r = re.sub(r'\n', '', r) r = re.sub(r'\t', '', r) list = client.parseDOM(r, 'div', attrs={'id': 'content'}) # log_utils.log('list = %s' % list, log_utils.LOGDEBUG) if 'tvshowtitle' in self.data: regex = '(<p><strong>(.*?)</strong><br />([A-Z]*)\s*\|\s*([A-Z,0-9,\s*]*)\|\s*((\d+\.\d+|\d*)\s*(?:GB|GiB|Gb|MB|MiB|Mb))?</p>(?:\s*<p><a href=\".*?\" .*?_blank\">.*?</a></p>)+)' else: regex = '(<strong>Release Name:</strong>\s*(.*?)<br />.*<strong>Size:</strong>\s*((\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))?<br />(.*\s)*)' # regex = '(<strong>Release Name:</strong>\s*(.*?)<br />.*<strong>Size:</strong>\s*((\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))?<br />.*<strong>Audio:</strong>\s*[A-Z]*[a-z]*\s*\|\s*([A-z]*[0-9]*)(.*\s)*)' for match in re.finditer( regex, list[0].encode('ascii', errors='ignore').decode( 'ascii', errors='ignore').replace(' ', ' ')): name = str(match.group(2)) t = name.split(self.hdlr)[0].replace(self.year, '').replace( '(', '').replace(')', '').replace('&', 'and') if cleantitle.get(t) != cleantitle.get(self.title): continue if self.hdlr not in name: continue if source_utils.remove_lang(name): continue if 'tvshowtitle' in self.data: size = str(match.group(5)) else: size = str(match.group(3)) links = client.parseDOM(match.group(1), 'a', attrs={'class': 'autohyperlink'}, ret='href') for url in links: try: if any(x in url for x in ['.rar', '.zip', '.iso', '.sample.']): continue if url in str(self.sources): continue valid, host = source_utils.is_host_valid( url, self.hostDict) if not valid: continue host = client.replaceHTMLCodes(host) host = host.encode('utf-8') quality, info = source_utils.get_release_quality( name, url) if 'tvshowtitle' in self.data: info.append(str(match.group(4))) try: dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) self.sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('TVDOWNLOADS') pass return self.sources except: source_utils.scraper_error('TVDOWNLOADS') pass
def sources(self, url, hostDict, hostprDict): sources = [] try: if url is None: return sources if debrid.status() is False: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] hdlr2 = 'S%d - %d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) query2 = '%s %s' % (title, hdlr2) query2 = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query2) urls = [] url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) urls.append(url) url2 = self.search_link % urllib.quote_plus(query2) url2 = urlparse.urljoin(self.base_link, url2) urls.append(url2) # log_utils.log('urls = %s' % urls, log_utils.LOGDEBUG) for url in urls: try: r = client.request(url) if 'magnet' not in r: return sources r = re.sub(r'\n', '', r) r = re.sub(r'\t', '', r) tbody = client.parseDOM(r, 'tbody') rows = client.parseDOM(tbody, 'tr') for row in rows: links = zip( re.findall('href="(magnet:.+?)"', row, re.DOTALL), re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', row, re.DOTALL), [ re.findall( '<td class="text-center">([0-9]+)</td>', row, re.DOTALL) ]) for link in links: url = urllib.unquote_plus(link[0]).replace( '&', '&').replace(' ', '.') url = url.split('&tr')[0] url = url.encode('ascii', errors='ignore').decode( 'ascii', errors='ignore') hash = re.compile('btih:(.*?)&').findall(url)[0] name = url.split('&dn=')[1] name = re.sub('[^A-Za-z0-9]+', '.', name).lstrip('.') # if name.startswith('www'): # try: # name = re.sub(r'www(.*?)\W{2,10}', '', name) # except: # name = name.split('-.', 1)[1].lstrip() if hdlr not in name and hdlr2 not in name: continue if source_utils.remove_lang(name): continue if hdlr in name: t = name.split(hdlr)[0].replace( data['year'], '').replace('(', '').replace( ')', '').replace('&', 'and').replace( '.US.', '.').replace('.us.', '.') if hdlr2 in name: t = name.split(hdlr2)[0].replace( data['year'], '').replace('(', '').replace( ')', '').replace('&', 'and').replace( '.US.', '.').replace('.us.', '.') # if cleantitle.get(t) != cleantitle.get(title): # continue seeders = int(link[2][0]) if self.min_seeders > seeders: continue quality, info = source_utils.get_release_quality( name, url) try: size = link[1] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) sources.append({ 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('NYAA') return sources return sources except: source_utils.scraper_error('NYYAA') return sources
def sources(self, url, hostDict, hostprDict): scraper = cfscrape.create_scraper() sources = [] try: if url is None: return sources if debrid.status() is False: return sources data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % quote_plus(query) url = urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = scraper.get(url).content posts = client.parseDOM(r, 'div', attrs={'class': 'tgxtable'}) for post in posts: links = zip( re.findall('a href="(magnet:.+?)"', post, re.DOTALL), re.findall( r"<span class='badge badge-secondary' style='border-radius:4px;'>(.*?)</span>", post, re.DOTALL), re.findall( r"<span title='Seeders/Leechers'>\[<font color='green'><b>(.*?)<", post, re.DOTALL)) for link in links: url = unquote_plus(link[0]).split('&tr')[0].replace( ' ', '.') hash = re.compile('btih:(.*?)&').findall(url)[0] name = url.split('&dn=')[1] name = re.sub('[^A-Za-z0-9]+', '.', name).lstrip('.') if source_utils.remove_lang(name): continue match = source_utils.check_title(title, name, hdlr, data['year']) if not match: continue try: seeders = int(link[2]) if self.min_seeders > seeders: continue except: seeders = 0 pass quality, info = source_utils.get_release_quality(name, url) try: dsize, isize = source_utils._size(link[1]) info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) sources.append({ 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) return sources except: source_utils.scraper_error('TORRENTGALAXY') return sources
def get_sources(self, link): try: url = '%s%s' % (self.base_link, link) result = client.request(url, timeout='5') if result is None: return if 'magnet' not in result: return url = 'magnet:%s' % (re.findall('a href="magnet:(.+?)"', result, re.DOTALL)[0]) try: url = unquote_plus(url).decode('utf8').replace('&', '&') except: url = unquote_plus(url).replace('&', '&') url = url.split('&tr=')[0].replace(' ', '.') if url in str(self.sources): return hash = re.compile('btih:(.*?)&').findall(url)[0] name = url.split('&dn=')[1] name = re.sub('[^A-Za-z0-9]+', '.', name).lstrip('.') if name.startswith('www'): try: name = re.sub(r'www(.*?)\W{2,10}', '', name) except: name = name.split('-.', 1)[1].lstrip() if source_utils.remove_lang(name): return match = source_utils.check_title(self.title, name, self.hdlr, self.year) if not match: return try: seeders = int( re.findall('<dt>SWARM</dt><dd>.*?>([0-9]+)</b>', result, re.DOTALL)[0].replace(',', '')) if self.min_seeders > seeders: return except: seeders = 0 pass quality, info = source_utils.get_release_quality(name, url) try: size = re.findall('<dt>SIZE</dt><dd>(.*? [a-zA-Z]{2})', result, re.DOTALL)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) self.sources.append({ 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('TORLOCK') pass
def get_sources(self, link): try: url = '%s%s' % (self.base_link, link) result = client.request(url, timeout='5') if result is None: return if '<kbd>' not in result: return hash = re.findall('<kbd>(.+?)<', result, re.DOTALL)[0] url = '%s%s' % ('magnet:?xt=urn:btih:', hash) name = re.findall('<h3 class="card-title">(.+?)<', result, re.DOTALL)[0].replace('Original Name: ', '') name = unquote_plus(name) name = re.sub('[^A-Za-z0-9]+', '.', name).lstrip('.') if source_utils.remove_lang(name): return match = source_utils.check_title(self.title, name, self.hdlr, self.year) if not match: return url = '%s%s%s' % (url, '&dn=', str(name)) if url in str(self.sources): return size = re.findall( '<div class="col-3">File size:</div><div class="col">(.+?)<', result, re.DOTALL)[0] quality, info = source_utils.get_release_quality(name, url) try: seeders = int( re.findall( '<div class="col-3">Seeders:</div><div class="col"><span style="color:green">([0-9]+|[0-9]+,[0-9]+)<', result, re.DOTALL)[0].replace(',', '')) if self.min_seeders > seeders: return except: seeders = 0 pass try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', size)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) self.sources.append({ 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('YOURBITTORRENT') pass
def get_sources(self, url): try: scraper = cfscrape.create_scraper() r = scraper.get(url).content if not r: return posts = client.parseDOM(r, 'div', attrs={'class': 'media'}) for post in posts: # file_name = client.parseDOM(post, 'span', attrs={'class': 'file-name'}) # file_name and &dn= differ 25% of the time. May add check try: seeders = int( re.findall( r'Seeders\s+:\s+<strong class="text-success">([0-9]+|[0-9]+,[0-9]+)</strong>', post, re.DOTALL)[0].replace(',', '')) if self.min_seeders > seeders: return except: seeders = 0 pass link = re.findall('<a href="(magnet:.+?)"', post, re.DOTALL) for url in link: url = unquote_plus(url).split('&tr')[0].replace( '&', '&').replace(' ', '.') url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.compile('btih:(.*?)&').findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(self.title, name) if source_utils.remove_lang(name, self.episode_title): continue if not source_utils.check_title(self.title, self.aliases, name, self.hdlr, self.year): continue # filter for episode multi packs (ex. S01E01-E17 is also returned in query) if self.episode_title: if not source_utils.filter_single_episodes( self.hdlr, name): continue quality, info = source_utils.get_release_quality(name, url) try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', post)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) self.sources.append({ 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('BTDB') pass
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: return sources hostDict = hostprDict + hostDict data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % quote_plus(query) url = urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = client.request(url) posts = client.parseDOM(r, 'h2') urls = [] for item in posts: if not item.startswith('<a href'): continue try: name = client.parseDOM(item, "a")[0] t = name.split(hdlr)[0].replace(data['year'], '').replace( '(', '').replace(')', '').replace('&', 'and') if cleantitle.get(t) != cleantitle.get(title): continue if hdlr not in name: continue quality, info = source_utils.get_release_quality( name, item[0]) try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', item)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass fileType = source_utils.getFileType(name) info.append(fileType) info = ' | '.join(info) if fileType else info[0] item = client.parseDOM(item, 'a', ret='href') url = item links = self.links(url) if links is None: continue urls += [(i, quality, info) for i in links] except: source_utils.scraper_error('300MBFILMS') pass for item in urls: if 'earn-money' in item[0]: continue if any(x in item[0] for x in ['.rar', '.zip', '.iso']): continue url = client.replaceHTMLCodes(item[0]) try: url = url.encode('utf-8') except: pass valid, host = source_utils.is_host_valid(url, hostDict) if not valid: continue host = client.replaceHTMLCodes(host) try: host = host.encode('utf-8') except: pass sources.append({ 'source': host, 'quality': item[1], 'language': 'en', 'url': url, 'info': item[2], 'direct': False, 'debridonly': True, 'size': dsize }) return sources except: source_utils.scraper_error('300MBFILMS') return sources
def get_sources(self, link): try: url = link[0].encode('ascii', errors='ignore').decode( 'ascii', errors='ignore').replace(' ', ' ') if '/torrent/' not in url: return name = link[1].encode('ascii', errors='ignore').decode( 'ascii', errors='ignore').replace(' ', '.').replace(' ', '.') name = re.sub('[^A-Za-z0-9]+', '.', name).lstrip('.') if source_utils.remove_lang(name): return match = source_utils.check_title(self.title, name, self.hdlr, self.year) if not match: return if not url.startswith('http'): link = urlparse.urljoin(self.base_link, url) link = client.request(link) if link is None: return hash = re.findall('<b>Infohash</b></td><td valign=top>(.+?)</td>', link, re.DOTALL)[0] url = 'magnet:?xt=urn:btih:%s&dn=%s' % (hash, name) if url in str(self.sources): return try: seeders = int( re.findall( '<b>Swarm:</b></td><td valign=top><font color=red>([0-9]+)</font>', link, re.DOTALL)[0].replace(',', '')) if self.min_seeders > seeders: # site does not seem to report seeders return except: seeders = 0 pass quality, info = source_utils.get_release_quality(name, url) try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', link)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) self.sources.append({ 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('TORRENTFUNK') pass
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = client.request(url) posts = client.parseDOM(r, 'div', attrs={'class': 'tgxtable'}) for post in posts: link = re.findall('a href="(magnet:.+?)"', post, re.DOTALL) try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) except: isize = '0' dsize = 0 for url in link: url = url.split('&tr')[0] name = url.split('&dn=')[1] name = urllib.unquote_plus(name).replace(' ', '.') if source_utils.remove_lang(name): continue t = name.split(hdlr)[0].replace(data['year'], '').replace( '(', '').replace(')', '').replace('&', 'and').replace( '.US.', '.').replace('.us.', '.') if cleantitle.get(t) != cleantitle.get(title): continue if hdlr not in name: continue quality, info = source_utils.get_release_quality(name, url) info.insert(0, isize) info = ' | '.join(info) sources.append({ 'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) return sources except: source_utils.scraper_error('TORRENTGALAXY') return sources
def sources(self, url, hostDict, hostprDict): scraper = cfscrape.create_scraper() sources = [] try: if url is None: return sources if debrid.status() is False: return sources data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) q = '%s' % cleantitle.get_gan_url(data['title']) url = self.base_link + self.search_link % q # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = scraper.get(url).content v = re.compile('<a href="(.+?)" class="ml-mask jt" title="View(.+?)">\s+<span class=".+?">(.+?)</span>').findall(r) t = '%s (%s)' % (data['title'], data['year']) for url, name, qual in v: if t not in name: continue item = client.request(url) item = client.parseDOM(item, 'div', attrs={'class': 'mvici-left'})[0] details = re.compile('<strong>Movie Source.*\s*.*/Person">(.*)</').findall(item)[0] name = re.sub('[^A-Za-z0-9]+', '.', name).lstrip('.') if source_utils.remove_lang(name): continue key = url.split('-hd')[1] r = scraper.get('https://soapgate.online/moviedownload.php?q=' + key).content r = re.compile('<a rel=".+?" href="(.+?)" target=".+?">').findall(r) for url in r: if any(x in url for x in ['.rar', '.zip', '.iso']): continue quality, info = source_utils.get_release_quality(qual) try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', item)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass fileType = source_utils.getFileType(details) info.append(fileType) info = ' | '.join(info) if fileType else info[0] valid, host = source_utils.is_host_valid(url, hostDict) if not valid: continue sources.append({'source': host, 'quality': quality, 'info': info, 'language': 'en', 'url': url, 'direct': False, 'debridonly': True, 'size': dsize}) return sources except: source_utils.scraper_error('GANOOL') return sources