def sources(self, url, hostDict, hostprDict): sources = [] try: if url is None: return sources if debrid.status() is False: return sources data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % quote(query) url = urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) html = client.request(url) html = html.replace(' ', ' ') try: results = client.parseDOM(html, 'table', attrs={'id': 'searchResult'}) except: return sources url2 = url.replace('/0/', '/1/') html2 = client.request(url2) html2 = html2.replace(' ', ' ') try: results += client.parseDOM(html2, 'table', attrs={'id': 'searchResult'}) except: return sources results = ''.join(results) rows = re.findall('<tr(.+?)</tr>', results, re.DOTALL) if rows is None: return sources for entry in rows: if 'magnet' not in entry: continue try: url = 'magnet:%s' % (re.findall('a href="magnet:(.+?)"', entry, re.DOTALL)[0]) url = unquote_plus(url).replace('&', '&').replace(' ', '.') url = url.split('&tr')[0] hash = re.compile('btih:(.*?)&').findall(url)[0] name = re.findall('class="detLink" title=".+?">(.+?)</a>', entry, re.DOTALL)[0] name = unquote_plus(name) name = re.sub('[^A-Za-z0-9]+', '.', name).lstrip('.') if source_utils.remove_lang(name): continue match = source_utils.check_title(title, name, hdlr, data['year']) if not match: continue try: seeders = int( re.findall( '<td align="right">([0-9]+|[0-9]+,[0-9]+)</td>', entry, re.DOTALL)[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 pass quality, info = source_utils.get_release_quality(name, url) try: size = re.findall( '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|MB|MiB))', entry)[-1] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) sources.append({ 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('PIRATEBAY') continue return sources except: source_utils.scraper_error('PIRATEBAY') return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: return sources hostDict = hostprDict + hostDict data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url).replace('-', '+') # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = client.request(url) posts = client.parseDOM(r, "table", attrs={"class": "download"}) if posts == []: return sources for post in posts: items = zip(client.parseDOM(post, 'a', ret='title'), client.parseDOM(post, 'a', ret='href')) for item in items: try: name = item[0].replace(' ', '.') t = name.split(hdlr)[0].replace( data['year'], '').replace('(', '').replace(')', '').replace('&', 'and') if cleantitle.get(t) != cleantitle.get(title): continue if hdlr not in name: continue if source_utils.remove_lang(name): continue i = str(item[1]) i = self.base_link + i r = client.request(i) u = client.parseDOM(r, "div", attrs={"class": "dl-links"}) for t in u: r = zip( re.compile( "a href=.+? dl\W+'(.+?)'\W+").findall(t), re.findall('>.\((.+?Mb)\)', t)) for link in r: url = link[0] if any(x in url for x in ['.rar', '.zip', '.iso', '.sample.']): continue if url in str(sources): continue quality, info = source_utils.get_release_quality( name, url) try: dsize, isize = source_utils._size(link[1]) info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) valid, host = source_utils.is_host_valid( url, hostDict) if not valid: continue host = client.replaceHTMLCodes(host) host = host.encode('utf-8') sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('DDLSPOT') pass return sources except: source_utils.scraper_error('DDLSPOT') return
def sources(self, url, hostDict, hostprDict): try: sources = [] if url == None: return sources if debrid.status() is False: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['title'].replace(':', '').lower() year = data['year'] query = '%s %s' % (data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = urlparse.urljoin(self.base_link, self.post_link) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) post = 'do=search&subaction=search&search_start=0&full_search=0&result_from=1&story=%s' % urllib.quote_plus( query) r = client.request(url, post=post) r = client.parseDOM(r, 'div', attrs={'class': 'box-out margin'}) r = [(dom_parser.parse_dom(i, 'div', attrs={'class': 'news-title'})) for i in r if data['imdb'] in i] r = [(dom_parser.parse_dom(i[0], 'a', req='href')) for i in r if i] r = [(i[0].attrs['href'], i[0].content) for i in r if i] hostDict = hostprDict + hostDict for item in r: try: name = item[0].replace(' ', '.') s = re.findall( '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', name) s = s[0] if s else '0' data = client.request(item[0]) data = dom_parser.parse_dom(data, 'div', attrs={'id': 'r-content'}) data = re.findall('\s*<b><a href="(.+?)".+?</a></b>', data[0].content, re.DOTALL) for url in data: url = client.replaceHTMLCodes(url) url = url.encode('utf-8') if 'turbobit' not in url: continue valid, host = source_utils.is_host_valid(url, hostDict) if not valid: continue try: qual = client.request(url) quals = re.findall( 'span class="file-title" id="file-title">(.+?)</span', qual) for quals in quals: quality = source_utils.check_sd_url(quals) info = [] if '3D' in name or '.3D.' in quals: info.append('3D') quality = '1080p' if any(i in ['hevc', 'h265', 'x265'] for i in quals): info.append('HEVC') info = ' | '.join(info) sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': True, 'debridonly': False }) except: source_utils.scraper_error('ULTRAHDINDIR') pass except: source_utils.scraper_error('ULTRAHDINDIR') pass return sources except Exception: source_utils.scraper_error('ULTRAHDINDIR') return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = urlparse.urljoin( self.base_link, self.search_link.format(query[0].lower(), cleantitle.geturl(query))) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = client.request(url) if '<tbody' not in r: return sources r = client.parseDOM(r, 'tbody')[0] results = client.parseDOM(r, 'tr') posts = [i for i in results if 'magnet:' in i] try: next_page = [i for i in results if 'Next Page' in i] if next_page == []: raise Exception() page = client.parseDOM(next_page, 'a', ret='href', attrs={'title': 'Downloads | Page 2'})[0] r2 = client.request(self.base_link + page) results2 = client.parseDOM(r2, 'tr') posts += [i for i in results2 if 'magnet:' in i] except: pass for post in posts: post = post.replace(' ', ' ') links = client.parseDOM(post, 'a', ret='href') magnet = [ i.replace('&', '&') for i in links if 'magnet:' in i ][0] url = magnet.split('&tr')[0] if any(x in url.lower() for x in [ 'french', 'italian', 'spanish', 'truefrench', 'dublado', 'dubbed' ]): continue name = client.parseDOM(post, 'a', ret='title')[1] name = urllib.unquote_plus(name).replace(' ', '.') if source_utils.remove_lang(name): continue t = name.split(hdlr)[0].replace(data['year'], '').replace( '(', '').replace(')', '').replace('&', 'and').replace( '.US.', '.').replace('.us.', '.') if cleantitle.get(t) != cleantitle.get(title): continue if hdlr not in name: continue quality, info = source_utils.get_release_quality(name, url) try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size info.insert(0, size) except: pass info = ' | '.join(info) sources.append({ 'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) return sources except: source_utils.scraper_error('MAGNETDL') return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url is None: return sources if debrid.status() is False: return sources data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] hdlr2 = 'S%d - %d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) query2 = '%s %s' % (title, hdlr2) query2 = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query2) urls = [] url = self.search_link % quote_plus(query) url = urljoin(self.base_link, url) urls.append(url) url2 = self.search_link % quote_plus(query2) url2 = urljoin(self.base_link, url2) urls.append(url2) # log_utils.log('urls = %s' % urls, log_utils.LOGDEBUG) for url in urls: try: r = client.request(url) if 'magnet' not in r: return sources r = re.sub(r'\n', '', r) r = re.sub(r'\t', '', r) tbody = client.parseDOM(r, 'tbody') rows = client.parseDOM(tbody, 'tr') for row in rows: links = zip(re.findall('href="(magnet:.+?)"', row, re.DOTALL), re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', row, re.DOTALL), [re.findall('<td class="text-center">([0-9]+)</td>', row, re.DOTALL)]) for link in links: url = unquote_plus(link[0]).replace('&', '&').replace(' ', '.') url = url.split('&tr')[0] try: url = url.encode('ascii', errors='ignore').decode('ascii', errors='ignore') except: pass hash = re.compile('btih:(.*?)&').findall(url)[0] name = url.split('&dn=')[1] name = re.sub('[^A-Za-z0-9]+', '.', name).lstrip('.') # if name.startswith('www'): # try: # name = re.sub(r'www(.*?)\W{2,10}', '', name) # except: # name = name.split('-.', 1)[1].lstrip() if hdlr not in name and hdlr2 not in name: continue if source_utils.remove_lang(name): continue if hdlr in name: t = name.split(hdlr)[0].replace(data['year'], '').replace('(', '').replace(')', '').replace('&', 'and').replace('.US.', '.').replace('.us.', '.') if hdlr2 in name: t = name.split(hdlr2)[0].replace(data['year'], '').replace('(', '').replace(')', '').replace('&', 'and').replace('.US.', '.').replace('.us.', '.') # if cleantitle.get(t) != cleantitle.get(title): # continue seeders = int(link[2][0]) if self.min_seeders > seeders: continue quality, info = source_utils.get_release_quality(name, url) try: size = link[1] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) sources.append({'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('NYAA') return sources return sources except: source_utils.scraper_error('NYYAA') return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url is None: return sources if debrid.status() is False: return sources data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') aliases = data['aliases'] episode_title = data['title'] if 'tvshowtitle' in data else None hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('[^A-Za-z0-9\s\.-]+', '', query) url = self.search_link % quote_plus(query) url = urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = client.request(url) if '<tbody' not in r: return sources posts = client.parseDOM(r, 'tbody')[0] posts = client.parseDOM(posts, 'tr') except: source_utils.scraper_error('SKYTORRENTS') return sources for post in posts: try: post = re.sub(r'\n', '', post) post = re.sub(r'\t', '', post) link = re.findall( 'href="(magnet:.+?)".+<td style="text-align: center;color:green;">([0-9]+|[0-9]+,[0-9]+)</td>', post, re.DOTALL) for url, seeders, in link: url = unquote_plus(url).split('&tr')[0].replace( '&', '&').replace(' ', '.') url = source_utils.strip_non_ascii_and_unprintable(url) if url in str(self.sources): return hash = re.compile('btih:(.*?)&').findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(title, name) if source_utils.remove_lang(name, episode_title): continue if not source_utils.check_title(title, aliases, name, hdlr, data['year']): continue # filter for episode multi packs (ex. S01E01-E17 is also returned in query) if episode_title: if not source_utils.filter_single_episodes(hdlr, name): continue try: seeders = int(seeders) if self.min_seeders > seeders: continue except: seeders = 0 pass quality, info = source_utils.get_release_quality(name, url) try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) sources.append({ 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('SKYTORRENTS') return sources return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: return sources data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') aliases = data['aliases'] episode_title = data['title'] if 'tvshowtitle' in data else None hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else ('(' + data['year'] + ')') # query = '%s %s' % (title, hdlr) #site now fails with year in query query = title query = re.sub('[^A-Za-z0-9\s\.-]+', '', query) if 'tvshowtitle' in data: url = self.show_link % query.replace(' ', '-') else: url = self.search_link % quote_plus(query) url = urljoin(self.base_link, url) # log_utils.log('url = %s' % url, __name__, log_utils.LOGDEBUG) r = client.request(url) if not r: return sources if 'No results were found' in r: return sources r = client.parseDOM(r, 'div', attrs={'class': 'card'}) for i in r: url = re.compile('href="(magnet.+?)\s*?"').findall(i)[0] try: url = unquote_plus(url).decode('utf8').replace( '&', '&').replace(' ', '.') except: url = unquote_plus(url).replace('&', '&').replace(' ', '.') url = url.split('&tr=')[0].replace(' ', '.') hash = re.compile('btih:(.*?)&').findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(title, name) if source_utils.remove_lang(name, episode_title): continue if not source_utils.check_title( title, aliases, name, hdlr.replace('(', '').replace(')', ''), data['year']): continue seeders = 0 # seeders not available on topnow quality, info = source_utils.get_release_quality(name, url) try: size = re.findall( '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|MB|MiB))', i )[-1] # file size is no longer available on topnow's new site dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) sources.append({ 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) return sources except: source_utils.scraper_error('TOPNOW') return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if not url: return sources if debrid.status() is False: return sources data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') aliases = data['aliases'] hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] episode_title = data['title'] if 'tvshowtitle' in data else None query = '%s %s' % (title, hdlr) query = re.sub('[^A-Za-z0-9\s\.-]+', '', query) url = self.search_link % quote(query) url = urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) rjson = client.request(url, error=True) if not rjson or 'No results returned' in str(rjson) or 'Connection Time-out' in str(rjson): return sources files = json.loads(rjson) for file in files: try: hash = file['info_hash'] name = file['name'] name = source_utils.clean_name(title, name) if source_utils.remove_lang(name, episode_title): continue url = 'magnet:?xt=urn:btih:%s&dn=%s' % (hash, name) if not source_utils.check_title(title, aliases, name, hdlr, data['year']): continue # filter for episode multi packs (ex. S01E01-E17 is also returned in query) if episode_title: if not source_utils.filter_single_episodes(hdlr, name): continue try: seeders= file['seeders'] if self.min_seeders > seeders: continue except: seeders = 0 pass quality, info = source_utils.get_release_quality(name, url) try: dsize, isize = source_utils.convert_size(float(file["size"]), to='GB') info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) sources.append({'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('PIRATEBAY') continue return sources except: source_utils.scraper_error('PIRATEBAY') return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url == None: return sources if debrid.status() == False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s S%02dE%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % ( data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) r = self.scraper.get(url).content posts = client.parseDOM(r, 'item') hostDict = hostprDict + hostDict items = [] for post in posts: try: t = client.parseDOM(post, 'title')[0] c = client.parseDOM(post, 'content.+?')[0] u = client.parseDOM(c, 'p') u = [client.parseDOM(i, 'a', ret='href') for i in u] u = [i[0] for i in u if len(i) == 1] if not u: raise Exception() if 'tvshowtitle' in data: u = [(re.sub('(720p|1080p)', '', t) + ' ' + [x for x in i.strip('//').split('/')][-1], i) for i in u] else: u = [(t, i) for i in u] items += u except: pass for item in items: try: name = item[0] name = client.replaceHTMLCodes(name) t = re.sub( '(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*|3D)(\.|\)|\]|\s|)(.+|)', '', name) if not cleantitle.get(t) == cleantitle.get(title): raise Exception() y = re.findall( '[\.|\(|\[|\s](\d{4}|S\d*E\d*|S\d*)[\.|\)|\]|\s]', name)[-1].upper() if not y == hdlr: raise Exception() fmt = re.sub( '(.+)(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*)(\.|\)|\]|\s)', '', name.upper()) fmt = re.split('\.|\(|\)|\[|\]|\s|\-', fmt) fmt = [i.lower() for i in fmt] if any( i.endswith(('subs', 'sub', 'dubbed', 'dub')) for i in fmt): raise Exception() if any(i in ['extras'] for i in fmt): raise Exception() if '1080p' in fmt: quality = '1080p' elif '720p' in fmt: quality = 'HD' else: quality = 'SD' if any(i in ['dvdscr', 'r5', 'r6'] for i in fmt): quality = 'SCR' elif any(i in [ 'camrip', 'tsrip', 'hdcam', 'hdts', 'dvdcam', 'dvdts', 'cam', 'telesync', 'ts' ] for i in fmt): quality = 'CAM' info = [] if '3d' in fmt: info.append('3D') try: size = re.findall( '((?:\d+\.\d+|\d+\,\d+|\d+) (?:GB|GiB|MB|MiB))', item[2])[-1] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size)) / div size = '%.2f GB' % size info.append(size) except: pass if any(i in ['hevc', 'h265', 'x265'] for i in fmt): info.append('HEVC') info = ' | '.join(info) url = item[1] if any(x in url for x in ['.rar', '.zip', '.iso']): raise Exception() url = client.replaceHTMLCodes(url) url = url.encode('utf-8') host = re.findall( '([\w]+[.][\w]+)$', urlparse.urlparse(url.strip().lower()).netloc)[0] if not host in hostDict: raise Exception() host = client.replaceHTMLCodes(host) host = host.encode('utf-8') sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: pass check = [i for i in sources if not i['quality'] == 'CAM'] if check: sources = check return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url is None: return sources if debrid.status() is False: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) try: r = client.request(url) links = zip( client.parseDOM( r, 'a', attrs={ 'class': 'btn btn-default magnet-button stats-action banner-button' }, ret='href'), client.parseDOM(r, 'td', attrs={'class': 'size'})) for link in links: url = link[0].replace('&', '&') url = re.sub(r'(&tr=.+)&dn=', '&dn=', url) # some links on bitlord &tr= before &dn= url = url.split('&tr=')[0] if 'magnet' not in url: continue size = int(link[1]) name = url.split('&dn=')[1] name = urllib.unquote_plus(name).replace(' ', '.') if source_utils.remove_lang(name): continue t = name.split(hdlr)[0].replace(data['year'], '').replace( '(', '').replace(')', '').replace('&', 'and').replace( '.US.', '.').replace('.us.', '.') if cleantitle.get(t) != cleantitle.get(title): continue if hdlr not in name: continue quality, info = source_utils.get_release_quality(name, url) try: if size < 5.12: raise Exception() size = float(size) / 1024 size = '%.2f GB' % size info.insert(0, size) except: pass info = ' | '.join(info) sources.append({ 'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) return sources except: source_utils.scraper_error('BITLORD') return sources except: source_utils.scraper_error('BITLORD') return sources
def sources(self, url, hostDict, hostprDict): scraper = cfscrape.create_scraper() sources = [] try: if url is None: return sources if debrid.status() is False: return sources hostDict = hostprDict + hostDict data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) query = query.replace("&", "and") query = re.sub('\s', '-', query) # log_utils.log('query = %s' % query, log_utils.LOGDEBUG) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) url = "http://rlsbb.ru/" + query if 'tvshowtitle' not in data: url = url + "-1080p" r = scraper.get(url).content if r is None and 'tvshowtitle' in data: season = re.search('S(.*?)E', hdlr) season = season.group(1) query = title query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) query = query + "-S" + season query = query.replace("&", "and") query = query.replace(" ", " ") query = query.replace(" ", "-") url = "http://rlsbb.ru/" + query r = scraper.get(url).content posts = client.parseDOM(r, "div", attrs={"class": "content"}) items = [] for post in posts: try: # size = re.findall('>\nSize: (.+?)<', post, re.DOTALL) # log_utils.log('size = %s' % size, log_utils.LOGDEBUG) u = client.parseDOM(post, 'a', ret='href') for i in u: try: name = i.encode('ascii', errors='ignore').decode( 'ascii', errors='ignore').replace(' ', ' ') tit = name.rsplit('/', 1)[1] t = tit.split(hdlr)[0].replace( data['year'], '').replace('(', '').replace( ')', '').replace('&', 'and') if cleantitle.get(t) != cleantitle.get(title): continue if hdlr in name.upper(): items.append(name) except: source_utils.scraper_error('RLSBB') pass except: source_utils.scraper_error('RLSBB') pass seen_urls = set() for item in items: try: info = [] url = str(item) url = client.replaceHTMLCodes(url) url = url.encode('utf-8') if url in seen_urls: continue seen_urls.add(url) host = url.replace("\\", "") host2 = host.strip('"') if url in str(sources): continue host = re.findall( '([\w]+[.][\w]+)$', urlparse.urlparse(host2.strip().lower()).netloc)[0] if not host in hostDict: continue if any(x in host2 for x in ['.rar', '.zip', '.iso']): continue quality, info = source_utils.get_release_quality(host2) # this site is an absolute nightmare to parse size. Some comment section 16gb but size reflects all links in comment try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', name)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) host = client.replaceHTMLCodes(host) host = host.encode('utf-8') sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': host2, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('RLSBB') pass return sources except: source_utils.scraper_error('RLSBB') return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url == None: return sources if debrid.status() is False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s S%02dE%02d' % (data['tvshowtitle'], int(data['season']), int(data['episode'])) \ if 'tvshowtitle' in data else '%s %s' % (data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) html = client.request(url) posts = client.parseDOM(html, 'item') hostDict = hostprDict + hostDict items = [] for post in posts: try: t = client.parseDOM(post, 'title')[0] u = client.parseDOM(post, 'a', ret='href') s = re.search('((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|MB|MiB))', post) s = s.groups()[0] if s else '0' items += [(t, i, s) for i in u] except: pass for item in items: try: url = item[1] if any(x in url for x in ['.rar', '.zip', '.iso']): raise Exception() url = client.replaceHTMLCodes(url) url = url.encode('utf-8') if url in str(sources): continue valid, host = source_utils.is_host_valid(url, hostDict) if not valid: raise Exception() host = client.replaceHTMLCodes(host) host = host.encode('utf-8') name = item[0] name = client.replaceHTMLCodes(name) t = re.sub('(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*|3D)(\.|\)|\]|\s|)(.+|)', '', name, flags=re.I) if not cleantitle.get(t) == cleantitle.get(title): raise Exception() y = re.findall('[\.|\(|\[|\s](\d{4}|S\d*E\d*|S\d*)[\.|\)|\]|\s]', name)[-1].upper() if not y == hdlr: raise Exception() quality, info = source_utils.get_release_quality(name, url) try: size = re.findall('((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|MB|MiB))', item[2])[-1] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size)) / div size = '%.2f GB' % size info.append(size) except: pass info = ' | '.join(info) sources.append({'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True}) except: pass check = [i for i in sources if not i['quality'] == 'CAM'] if check: sources = check return sources except: return
def sources(self, url, hostDict, hostprDict): sources = [] try: self.scraper = cfscrape.create_scraper() if url is None: return sources if debrid.status() is False: return sources hostDict = hostprDict + hostDict data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url).replace('%3A+', '+') # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = self.scraper.get(url).content if r is None and 'tvshowtitle' in data: season = re.search('S(.*?)E', hdlr) season = season.group(1) url = title r = self.scraper.get(url).content for loopCount in range(0, 2): if loopCount == 1 or (r is None and 'tvshowtitle' in data): r = self.scraper.get(url).content posts = client.parseDOM(r, "h2", attrs={"class": "postTitle"}) items = [] for post in posts: try: u = client.parseDOM(post, 'a', ret='href') for i in u: link = str(i) name = link.rsplit('/', 1)[0] name = name.rsplit('/', 1)[1].upper() if source_utils.remove_lang(name): return t = name.split(hdlr)[0].replace( data['year'], '').replace('(', '').replace( ')', '').replace('&', 'and') if cleantitle.get(t) != cleantitle.get(title): break if hdlr not in name: break # check year for reboot/remake show issues if year is available-crap shoot # if 'tvshowtitle' in data: # if re.search(r'([1-3][0-9]{3})', name): # if not any(value in name for value in [data['year'], str(int(data['year'])+1), str(int(data['year'])-1)]): # break items.append(link) except: source_utils.scraper_error('MAXRLS') pass if len(items) > 0: break for item in items: try: i = str(item) r = self.scraper.get(url).content u = client.parseDOM(r, "div", attrs={"class": "postContent"}) for t in u: links = zip( re.findall('Download: (.*?)</strong>', t, re.DOTALL), re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB|gb|mb))', t, re.DOTALL)) for link in links: urls = link[0] results = re.compile('href="(.+?)"', re.DOTALL).findall(urls) for url in results: if url in str(self.sources): return quality, info = source_utils.get_release_quality( url) try: dsize, isize = source_utils._size(link[1]) info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) valid, host = source_utils.is_host_valid( url, hostDict) if not valid: continue sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('MAXRLS') pass return sources except: source_utils.scraper_error('MAXRLS') return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s S%02dE%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % ( data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|<|>|\|)', ' ', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) html = client.request(url) html = html.replace(' ', ' ') try: results = client.parseDOM(html, 'table', attrs={'id': 'searchResult'})[0] except Exception: return sources rows = re.findall('<tr(.+?)</tr>', results, re.DOTALL) if rows is None: return sources for entry in rows: try: try: name = re.findall('class="detLink" title=".+?">(.+?)</a>', entry, re.DOTALL)[0] name = client.replaceHTMLCodes(name) # t = re.sub('(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*|3D)(\.|\)|\]|\s|)(.+|)', '', name, flags=re.I) if not cleantitle.get(title) in cleantitle.get(name): continue except Exception: continue y = re.findall('[\.|\(|\[|\s](\d{4}|S\d*E\d*|S\d*)[\.|\)|\]|\s]', name)[-1].upper() if not y == hdlr: continue try: seeders = int(re.findall('<td align="right">(.+?)</td>', entry, re.DOTALL)[0]) except Exception: continue if self.min_seeders > seeders: continue try: link = 'magnet:%s' % (re.findall('a href="magnet:(.+?)"', entry, re.DOTALL)[0]) link = str(client.replaceHTMLCodes(link).split('&tr')[0]) except Exception: continue quality, info = source_utils.get_release_quality(name, name) try: size = re.findall('((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|MB|MiB))', entry)[-1] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size)) / div size = '%.2f GB' % size info.append(size) except Exception: pass info = ' | '.join(info) sources.append( {'source': 'Torrent', 'quality': quality, 'language': 'en', 'url': link, 'info': info, 'direct': False, 'debridonly': True}) except Exception: continue check = [i for i in sources if not i['quality'] == 'CAM'] if check: sources = check return sources except Exception: return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] category = '+category%3ATV' if 'tvshowtitle' in data else '+category%3AMovies' query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) + str(category) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) html = client.request(url) html = html.replace(' ', ' ') try: results = client.parseDOM( html, 'table', attrs={ 'class': 'table table-condensed table-torrents vmiddle' })[0] except: return sources rows = re.findall('<tr(.+?)</tr>', results, re.DOTALL) if rows is None: return sources for entry in rows: try: try: url = 'magnet:%s' % (re.findall( 'href="magnet:(.+?)"', entry, re.DOTALL)[0]) url = str(client.replaceHTMLCodes(url).split('&tr')[0]) if url in str(sources): continue except: continue if any(x in url.lower() for x in [ 'french', 'italian', 'spanish', 'truefrench', 'dublado', 'dubbed' ]): continue try: name = re.findall('<a class=".+?>(.+?)</a>', entry, re.DOTALL)[0] name = client.replaceHTMLCodes(name).replace( '<hl>', '').replace('</hl>', '') except: continue # allot of movies have foreign title translation in front so remove it if ' / ' in name: name = name.split(' / ')[1] t = name.split(hdlr)[0].replace(data['year'], '').replace( '(', '').replace(')', '').replace('&', 'and') if cleantitle.get(t) != cleantitle.get(title): continue if hdlr not in name: continue try: seeders = int( re.findall( 'class="progress prog trans90" title="Seeders: (.+?) \|', entry, re.DOTALL)[0]) except: continue if self.min_seeders > seeders: continue quality, info = source_utils.get_release_quality(name, url) try: size = re.findall( '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|MB|MiB))', entry)[-1] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size)) / div size = '%.2f GB' % size info.append(size) except: pass info = ' | '.join(info) sources.append({ 'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: continue return sources except: source_utils.scraper_error('ZOOGLE') return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url is None: return sources if debrid.status() is False: return sources data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'].replace('&', 'and').replace( 'Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % (quote_plus(query).replace('+', '-')) url = urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) html = client.request(url) try: results = client.parseDOM( html, 'table', attrs={'class': 'forum_header_border'}) for result in results: if 'magnet:' in result: results = result break except: return sources rows = re.findall( '<tr name="hover" class="forum_header_border">(.+?)</tr>', results, re.DOTALL) if rows is None: return sources for entry in rows: try: try: columns = re.findall('<td\s.+?>(.+?)</td>', entry, re.DOTALL) derka = re.findall( 'href="magnet:(.+?)" class="magnet" title="(.+?)"', columns[2], re.DOTALL)[0] except: continue url = 'magnet:%s' % (str( client.replaceHTMLCodes(derka[0]).split('&tr')[0])) try: url = unquote(url).decode('utf8') except: pass hash = re.compile('btih:(.*?)&').findall(url)[0] magnet_title = derka[1] name = unquote_plus(magnet_title) name = re.sub('[^A-Za-z0-9]+', '.', name).lstrip('.') if source_utils.remove_lang(name): continue match = source_utils.check_title(title, name, hdlr, data['year']) if not match: continue try: seeders = int( re.findall( '<font color=".+?">([0-9]+|[0-9]+,[0-9]+)</font>', columns[5], re.DOTALL)[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 pass quality, info = source_utils.get_release_quality(name, url) try: size = re.findall( '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|MB|MiB))', magnet_title)[-1] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) sources.append({ 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('EZTV') continue return sources except: source_utils.scraper_error('EZTV') return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: raise Exception() hostDict = hostprDict + hostDict data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url).replace('-', '+') # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = client.request(url) if r is None and 'tvshowtitle' in data: season = re.search('S(.*?)E', hdlr) season = season.group(1) url = title r = client.request(url) for loopCount in range(0, 2): if loopCount == 1 or (r is None and 'tvshowtitle' in data): r = client.request(url) posts = client.parseDOM(r, "table", attrs={"class": "download"}) items = [] for post in posts: try: u = client.parseDOM(post, 'a', ret='href') for i in u: try: name = str(i) items.append(name) except: pass except: source_utils.scraper_error('DDLSPOT') pass if len(items) > 0: break for item in items: try: info = [] i = str(item) i = self.base_link + i r = client.request(i) u = client.parseDOM(r, "div", attrs={"class": "dl-links"}) for t in u: r = re.compile( 'a href=".+?" rel=".+?">(.+?)<').findall(t) for url in r: if any(x in url for x in ['.rar', '.zip', '.iso']): continue quality, info = source_utils.get_release_quality( url) valid, host = source_utils.is_host_valid( url, hostDict) sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: source_utils.scraper_error('DDLSPOT') pass return sources except: source_utils.scraper_error('DDLSPOT') return
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s S%02dE%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % ( data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) r = client.request(url) posts = client.parseDOM(r, 'item') hostDict = hostprDict + hostDict items = [] for post in posts: try: t = client.parseDOM(post, 'title')[0] u = client.parseDOM(post, 'link')[0] s = re.findall('((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', t) s = s[0] if s else '0' items += [(t, u, s)] except: pass urls = [] for item in items: try: name = item[0] name = client.replaceHTMLCodes(name) t = re.sub('(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*|3D)(\.|\)|\]|\s|)(.+|)', '', name) if not cleantitle.get(t) == cleantitle.get(title): raise Exception() y = re.findall('[\.|\(|\[|\s](\d{4}|S\d*E\d*|S\d*)[\.|\)|\]|\s]', name)[-1].upper() if not y == hdlr: raise Exception() quality, info = source_utils.get_release_quality(name, item[1]) if any(x in quality for x in ['CAM', 'SD']): continue try: size = re.sub('i', '', item[2]) div = 1 if size.endswith('GB') else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size)) / div size = '%.2f GB' % size info.append(size) except: pass info = ' | '.join(info) url = item[1] links = self.links(url) urls += [(i, quality, info) for i in links] except: pass for item in urls: if 'earn-money' in item[0]: continue if any(x in item[0] for x in ['.rar', '.zip', '.iso']): continue url = client.replaceHTMLCodes(item[0]) url = url.encode('utf-8') valid, host = source_utils.is_host_valid(url, hostDict) if not valid: continue host = client.replaceHTMLCodes(host) host = host.encode('utf-8') sources.append( {'source': host, 'quality': item[1], 'language': 'en', 'url': url, 'info': item[2], 'direct': False, 'debridonly': True}) return sources except: return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url == None: return sources if debrid.status() is False: raise Exception() hostDict = hostprDict + hostDict data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] hdlr = 's%02de%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s s%02de%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % ( data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) try: url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) r = requests.get(url).content posts = client.parseDOM(r, 'h2', attrs={'class': 'title'}) items = [] dupes = [] for post in posts: try: u = client.parseDOM(post, 'a', ret='href') for r in u: r = requests.get(r).content u = client.parseDOM(r, "div", attrs={"id": "content"}) u = client.parseDOM(u, 'a', ret='href') u = [(i.strip('/').split('/')[-1], i) for i in u] items += u except: pass except: pass for item in items: try: name = item[0] name = client.replaceHTMLCodes(name) t = re.sub( '(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*|3D)(\.|\)|\]|\s|)(.+|)', '', name) if not cleantitle.get(t) == cleantitle.get(title): continue quality, info = source_utils.get_release_quality( name, item[1]) url = item[1] if any(x in url for x in ['.rar', '.zip', '.iso']): raise Exception() url = client.replaceHTMLCodes(url) url = url.encode('utf-8') if url in str(sources): continue host = re.findall( '([\w]+[.][\w]+)$', urlparse.urlparse(url.strip().lower()).netloc)[0] if not host in hostDict: raise Exception() host = client.replaceHTMLCodes(host) host = host.encode('utf-8') sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: pass return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url is None: return sources if debrid.status() is False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s s%02de%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % ( data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) r = client.request(url) try: posts = client.parseDOM(r, 'h2', attrs={'class': 'entry-title'}) for post in posts: data = client.parseDOM(post, 'a', ret='href') for u in data: r = client.request(u) r = client.parseDOM( r, 'div', attrs={'class': 'clearfix entry-content'}) for t in r: link = re.findall( 'a class="buttn magnet" href="(.+?)"', t)[0] quality, info = source_utils.get_release_quality(u) try: size = re.findall( '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:gb|gib|mb|mib))', str(data))[-1] div = 1 if size.endswith(('gb')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size)) / div size = '%.2f gb' % size info.append(size) except: pass info = ' | '.join(info) sources.append({ 'source': 'Torrent', 'quality': quality, 'language': 'en', 'url': link, 'info': info, 'direct': False, 'debridonly': True }) except: return return sources except: return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) try: r = self.scraper.get(url).content posts = client.parseDOM(r, 'li') for post in posts: link = re.findall( 'a title="Download using magnet" href="(magnet:.+?)"', post, re.DOTALL) for url in link: url = url.split('&tr')[0] name = url.split('&dn=')[1] name = urllib.unquote_plus(name).replace(' ', '.') if source_utils.remove_lang(name): continue if name.startswith('www.'): try: name = name.split(' - ')[1].lstrip() except: name = re.sub(r'\www..+? ', '', name) t = name.split(hdlr)[0].replace( data['year'], '').replace('(', '').replace(')', '').replace( '&', 'and').replace('.US.', '.').replace('.us.', '.') if cleantitle.get(t) != cleantitle.get(title): continue if hdlr not in name: continue quality, info = source_utils.get_release_quality(url) try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: pass info = ' | '.join(info) sources.append({ 'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('BTDB') return return sources except: source_utils.scraper_error('BTDB') return sources
def sources(self, url, hostDict, hostprDict): try: hostDict = hostDict + hostprDict sources = [] query_bases = [] options = [] if url is None: return sources if not debrid.status(): return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = (data['tvshowtitle'] if 'tvshowtitle' in data else data['title']) hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] # tvshowtitle if 'tvshowtitle' in data: query_bases.append('%s ' % (data['tvshowtitle'].replace("-", ""))) if 'year' in data: query_bases.append('%s %s ' % (data['tvshowtitle'], data['year'])) options.append('S%02dE%02d' % (int(data['season']), int(data['episode']))) options.append('S%02d' % (int(data['season']))) else: query_bases.append('%s %s ' % (data['title'], data['year'])) query_bases.append('%s ' % (data['title'])) query_bases.append('2160p') query_bases.append('') for option in options: for query_base in query_bases: q = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query_base + option) q = q.replace(" ", " ").replace(" ", "+") url = self.base_link + self.search_link % q html = self.scraper.get(url) if html.status_code == 200: posts = client.parseDOM(html.content, "h3", attrs={"class": "elegantwp-fp09-post-title"}) for post in posts: url = client.parseDOM(post, "a", ret='href') if len(url) > 0: html = self.scraper.get(url[0]) if html.status_code == 200: quotes = client.parseDOM(html.content, "div", attrs={"class": "entry-content " "clearfix"}) log_utils.log('best-moviez.ws - href: \n' + str(quotes)) for quote in quotes: hrefs = client.parseDOM(quote, "a", ret='href') if not hrefs: continue for href in hrefs: if any(x in href for x in ['.rar', '.zip', '.iso']): continue quality = source_utils.check_sd_url(href) href = href.encode('utf-8') valid, host = source_utils.is_host_valid(href, hostDict) if not valid: continue if hdlr in href.upper() and cleantitle.get(title) in cleantitle.get(href): sources.append( {'source': host, 'quality': quality, 'language': 'en', 'url': href, 'direct': False, 'debridonly': False}) if len(sources) > 0: return sources return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url is None: return sources if debrid.status() is False: return sources hostDict = hostprDict + hostDict data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) ref_url = url = data['url'] # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] _headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/72.0'} r = client.request(url, headers=_headers) posts = client.parseDOM(r, 'h2', attrs={'class': 'title'}) posts = zip(client.parseDOM(posts, 'a', ret='title'), client.parseDOM(posts, 'a', ret='href')) if posts == []: return sources for item in posts: try: name = item[0].replace(' ', '.') url = item[1] r = client.request(url, headers=_headers) list = client.parseDOM(r, 'div', attrs={'id': 'content'}) if 'tvshowtitle' in data: regex = '(<strong>(.*?)</strong><br />\s?[A-Z,0-9]*?\s\|\s([A-Z,0-9,\s]*)\|\s((\d+\.\d+|\d*)\s?(?:GB|GiB|Gb|MB|MiB|Mb))?</p>(?:\s<p><a href=\".*?\" .*?_blank\">.*?</a></p>)+)' else: regex = '(<strong>Release Name:</strong>\s*(.*?)<br />\s?<strong>Size:</strong>\s?((\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+)\s(?:GB|GiB|Gb|MB|MiB|Mb))?<br />(.*\s)*)' for match in re.finditer(regex, list[0].encode('ascii', errors='ignore').decode('ascii', errors='ignore').replace(' ', ' ')): name = str(match.group(2)) t = name.split(hdlr)[0].replace(data['year'], '').replace('(', '').replace(')', '').replace('&', 'and') if cleantitle.get(t) != cleantitle.get(title): continue if hdlr not in name: continue if source_utils.remove_lang(name): continue # audio = str(match.group(3)) if 'tvshowtitle' in data: size = str(match.group(4)) else: size = str(match.group(3)) links = client.parseDOM(match.group(1), 'a', attrs={'class': 'autohyperlink'}, ret='href') for url in links: try: if any(x in url for x in ['.rar', '.zip', '.iso', '.sample.']): continue if url in str(sources): continue valid, host = source_utils.is_host_valid(url, hostDict) if not valid: continue host = client.replaceHTMLCodes(host) host = host.encode('utf-8') quality, info = source_utils.get_release_quality(name, url) try: div = 1 if size.endswith(('GB', 'GiB', 'Gb')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size info.insert(0, size) except: pass info = ' | '.join(info) sources.append({'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True}) except: source_utils.scraper_error('TVDOWNLOADS') pass except: source_utils.scraper_error('TVDOWNLOADS') pass return sources except: source_utils.scraper_error('TVDOWNLOADS') return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] scraper = cfscrape.create_scraper() if url == None: return sources if debrid.status() == False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] premDate = '' query = '%s S%02dE%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % ( data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) query = query.replace("&", "and") query = query.replace(" ", " ") query = query.replace(" ", "-") url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) url = "http://rlsbb.ru/" + query if 'tvshowtitle' not in data: url = url + "-1080p" r = scraper.get(url).content if r == None and 'tvshowtitle' in data: season = re.search('S(.*?)E', hdlr) season = season.group(1) query = title query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) query = query + "-S" + season query = query.replace("&", "and") query = query.replace(" ", " ") query = query.replace(" ", "-") url = "http://rlsbb.ru/" + query r = scraper.get(url).content for loopCount in range(0, 2): if loopCount == 1 or (r == None and 'tvshowtitle' in data): premDate = re.sub('[ \.]', '-', data['premiered']) query = re.sub('[\\\\:;*?"<>|/\-\']', '', data['tvshowtitle']) query = query.replace("&", " and ").replace(" ", " ").replace( " ", "-") query = query + "-" + premDate url = "http://rlsbb.ru/" + query url = url.replace('The-Late-Show-with-Stephen-Colbert', 'Stephen-Colbert') r = scraper.get(url).content posts = client.parseDOM(r, "div", attrs={"class": "content"}) hostDict = hostprDict + hostDict items = [] for post in posts: try: u = client.parseDOM(post, 'a', ret='href') for i in u: try: name = str(i) if hdlr in name.upper(): items.append(name) elif len(premDate ) > 0 and premDate in name.replace( ".", "-"): items.append(name) except: pass except: pass if len(items) > 0: break seen_urls = set() for item in items: try: info = [] url = str(item) url = client.replaceHTMLCodes(url) url = url.encode('utf-8') if url in seen_urls: continue seen_urls.add(url) host = url.replace("\\", "") host2 = host.strip('"') host = re.findall( '([\w]+[.][\w]+)$', urlparse.urlparse(host2.strip().lower()).netloc)[0] if not host in hostDict: raise Exception() if any(x in host2 for x in ['.rar', '.zip', '.iso']): continue if '720p' in host2: quality = 'HD' elif '1080p' in host2: quality = '1080p' elif '2160p' in host2: quality = '4K' else: quality = 'SD' info = ' | '.join(info) host = client.replaceHTMLCodes(host) host = host.encode('utf-8') sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': host2, 'info': info, 'direct': False, 'debridonly': False }) except: pass check = [i for i in sources if not i['quality'] == 'CAM'] if check: sources = check return sources except: return sources
def sources(self, url, hostDict, hostprDict): scraper = cfscrape.create_scraper() sources = [] try: if url is None: return sources if debrid.status() is False: return sources hostDict = hostprDict + hostDict data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) try: url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = scraper.get(url).content posts = client.parseDOM(r, 'div', attrs={'class': 'post'}) items = [] dupes = [] for post in posts: try: content = client.parseDOM( post, "div", attrs={"class": "postContent"}) size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', content[0])[0] u = client.parseDOM(content, "h2") u = client.parseDOM(u, 'a', ret='href') u = [(i.strip('/').split('/')[-1], i, size) for i in u] items += u except: source_utils.scraper_error('SCENERLS') pass except: source_utils.scraper_error('SCENERLS') pass for item in items: try: name = item[0] name = client.replaceHTMLCodes(name) if source_utils.remove_lang(name): return t = name.split(hdlr)[0].replace(data['year'], '').replace( '(', '').replace(')', '').replace('&', 'and') if cleantitle.get(t) != cleantitle.get(title): continue if hdlr not in name: continue # check year for reboot/remake show issues if year is available-crap shoot # if 'tvshowtitle' in data: # if re.search(r'([1-3][0-9]{3})', name): # if not any(value in name for value in [data['year'], str(int(data['year'])+1), str(int(data['year'])-1)]): # continue quality, info = source_utils.get_release_quality( name, item[1]) try: dsize, isize = source_utils._size(item[2]) info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) url = item[1] if any(x in url for x in ['.rar', '.zip', '.iso', '.sample.']): continue url = client.replaceHTMLCodes(url) url = url.encode('utf-8') if url in str(sources): continue host = re.findall( '([\w]+[.][\w]+)$', urlparse.urlparse(url.strip().lower()).netloc)[0] if not host in hostDict: continue host = client.replaceHTMLCodes(host) host = host.encode('utf-8') sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('SCENERLS') pass return sources except: source_utils.scraper_error('SCENERLS') return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url is None: return sources if debrid.status() is False: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) try: r = client.request(url) posts = client.parseDOM(r, 'div', attrs={'class': 'results'})[0] posts = client.parseDOM(posts, 'dl') for post in posts: links = re.findall('<dt><a href=/(.+)</a>', post, re.DOTALL) for link in links: magnet = link.split('</a>')[0] hash = 'magnet:?xt=urn:btih:' + magnet.split('>')[0] dn = '&dn=' + magnet.split('>')[1] url = hash + dn if any(x in url.lower() for x in ['french', 'italian', 'spanish', 'truefrench', 'dublado', 'dubbed']): continue name = url.split('&dn=')[1] t = name.split(hdlr)[0].replace(data['year'], '').replace('(', '').replace(')', '').replace('&', 'and') if cleantitle.get(t) != cleantitle.get(title): continue if hdlr not in name: continue quality, info = source_utils.get_release_quality(name, url) try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith('GB') else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size info.append(size) except: pass info = ' | '.join(info) sources.append({'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True}) return sources except: source_utils.scraper_error('TORRENTZ') return except: source_utils.scraper_error('TORRENTZ') return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url == None: return sources if debrid.status() == False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s S%02dE%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % ( data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) s = client.request(self.base_link) s = re.findall('\'(http.+?)\'', s) + re.findall('\"(http.+?)\"', s) s = [ i for i in s if urlparse.urlparse(self.base_link).netloc in i and len(i.strip('/').split('/')) > 3 ] s = s[0] if s else urlparse.urljoin(self.base_link, 'posts') s = s.strip('/') url = s + self.search_link % urllib.quote_plus(query) r = client.request(url) r = client.parseDOM(r, 'h2', attrs={'class': 'post-title .+?'}) l = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title')) r = [(i[0], i[1], re.sub('(\.|\(|\[|\s)(\d{4}|3D)(\.|\)|\]|\s|)(.+|)', '', i[1]), re.findall('[\.|\(|\[|\s](\d{4}|)([\.|\)|\]|\s|].+)', i[1])) for i in l] r = [(i[0], i[1], i[2], i[3][0][0], i[3][0][1]) for i in r if i[3]] r = [(i[0], i[1], i[2], i[3], re.split('\.|\(|\)|\[|\]|\s|\-', i[4])) for i in r] r = [ i for i in r if cleantitle.get(title) == cleantitle.get(i[2]) and data['year'] == i[3] ] r = [ i for i in r if not any(x in i[4] for x in [ 'HDCAM', 'CAM', 'DVDR', 'DVDRip', 'DVDSCR', 'HDTS', 'TS', '3D' ]) ] r = [i for i in r if '1080p' in i[4] ][:1] + [i for i in r if '720p' in i[4]][:1] if 'tvshowtitle' in data: posts = [(i[1], i[0]) for i in l] else: posts = [(i[1], i[0]) for i in l] hostDict = hostprDict + hostDict items = [] for post in posts: try: t = post[0] u = client.request(post[1]) u = re.findall('"(http.+?)"', u) + re.findall( '"(http.+?)"', u) u = [i for i in u if not '/embed/' in i] u = [i for i in u if not 'youtube' in i] items += [(t, i) for i in u] except: pass for item in items: try: name = item[0] name = client.replaceHTMLCodes(name) t = re.sub( '(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*|3D)(\.|\)|\]|\s|)(.+|)', '', name) if not cleantitle.get(t) == cleantitle.get(title): raise Exception() y = re.findall( '[\.|\(|\[|\s](\d{4}|S\d*E\d*|S\d*)[\.|\)|\]|\s]', name)[-1].upper() if not y == hdlr: raise Exception() fmt = re.sub( '(.+)(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*)(\.|\)|\]|\s)', '', name.upper()) fmt = re.split('\.|\(|\)|\[|\]|\s|\-', fmt) fmt = [i.lower() for i in fmt] print fmt if any( i.endswith(('subs', 'sub', 'dubbed', 'dub')) for i in fmt): raise Exception() if any(i in ['extras'] for i in fmt): raise Exception() if '1080p' in fmt: quality = '1080p' elif '720p' in fmt: quality = '720p' else: quality = '720p' if any(i in ['dvdscr', 'r5', 'r6'] for i in fmt): quality = 'SCR' elif any(i in [ 'camrip', 'tsrip', 'hdcam', 'hdts', 'dvdcam', 'dvdts', 'cam', 'telesync', 'ts' ] for i in fmt): quality = 'CAM' info = [] if '3d' in fmt: info.append('3D') try: size = re.findall( '((?:\d+\.\d+|\d+\,\d+|\d+) (?:GB|GiB|MB|MiB))', item[2])[-1] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size)) / div size = '%.2f GB' % size info.append(size) except: pass if any(i in ['hevc', 'h265', 'x265'] for i in fmt): info.append('HEVC') info = ' | '.join(info) url = item[1] if any(x in url for x in ['.rar', '.zip', '.iso']): raise Exception() url = client.replaceHTMLCodes(url) url = url.encode('utf-8') host = re.findall( '([\w]+[.][\w]+)$', urlparse.urlparse(url.strip().lower()).netloc)[0] if not host in hostDict: raise Exception() host = client.replaceHTMLCodes(host) host = host.encode('utf-8') sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: pass check = [i for i in sources if not i['quality'] == 'CAM'] if check: sources = check return sources except: return
def sources(self, url, hostDict, hostprDict): sources = [] try: if url is None: return sources if debrid.status() is False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s s%02de%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % ( data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) try: r = client.request(url) posts = client.parseDOM(r, 'tr') for post in posts: links = re.findall('<a href="(/torrent/.+?)">(.+?)<', post, re.DOTALL) for link, data in links: link = urlparse.urljoin(self.base_link, link) link = client.request(link) link = re.findall( 'a class=".+?" rel=".+?" href="(magnet:.+?)"', link, re.DOTALL) try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith('GB') else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except BaseException: size = '0' for url in link: if hdlr not in url: continue url = url.split('&tr')[0] quality, info = source_utils.get_release_quality( data) if any(x in url for x in [ 'Tamil', 'FRENCH', 'Ita', 'italian', 'TRUEFRENCH', '-lat-', 'Dublado' ]): continue info.append(size) info = ' | '.join(info) sources.append({ 'source': 'Torrent', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: return return sources except: return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) query = '%s S%02dE%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % ( data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|<|>|\|)', ' ', query) url = self.search_link % (urllib.quote_plus(query).replace( '+', '-')) url = urlparse.urljoin(self.base_link, url) html = client.request(url) try: results = client.parseDOM( html, 'table', attrs={'class': 'forum_header_border'}) for result in results: if 'magnet:' in result: results = result break except Exception: return sources rows = re.findall( '<tr name="hover" class="forum_header_border">(.+?)</tr>', results, re.DOTALL) if rows is None: return sources for entry in rows: try: try: columns = re.findall('<td\s.+?>(.+?)</td>', entry, re.DOTALL) derka = re.findall( 'href="magnet:(.+?)" class="magnet" title="(.+?)"', columns[2], re.DOTALL)[0] name = derka[1] link = 'magnet:%s' % (str( client.replaceHTMLCodes(derka[0]).split('&tr')[0])) t = name.split(hdlr)[0] if not cleantitle.get(re.sub( '(|)', '', t)) == cleantitle.get(title): continue except Exception: continue y = re.findall( '[\.|\(|\[|\s](\d{4}|S\d*E\d*|S\d*)[\.|\)|\]|\s]', name)[-1].upper() if not y == hdlr: continue try: seeders = int( re.findall('<font color=".+?">(.+?)</font>', columns[5], re.DOTALL)[0]) except Exception: continue if self.min_seeders > seeders: continue quality, info = source_utils.get_release_quality( name, name) try: size = re.findall( '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|MB|MiB))', name)[-1] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size)) / div size = '%.2f GB' % size info.append(size) except Exception: pass info = ' | '.join(info) sources.append({ 'source': 'Torrent', 'quality': quality, 'language': 'en', 'url': link, 'info': info, 'direct': False, 'debridonly': True }) except Exception: continue check = [i for i in sources if not i['quality'] == 'CAM'] if check: sources = check return sources except Exception: return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: return sources hostDict = hostprDict + hostDict data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url).replace('-', '+') r = client.request(url) if r is None and 'tvshowtitle' in data: season = re.search('S(.*?)E', hdlr) season = season.group(1) url = title r = client.request(url) for loopCount in range(0, 2): if loopCount == 1 or (r is None and 'tvshowtitle' in data): r = client.request(url) posts = client.parseDOM(r, "h2", attrs={"class": "entry-title"}) items = [] for post in posts: try: tit = client.parseDOM(post, "a")[0] t = tit.split(hdlr)[0].replace( data['year'], '').replace('(', '').replace(')', '').replace('&', 'and') if cleantitle.get(t) != cleantitle.get(title): continue if hdlr not in tit: continue u = client.parseDOM(post, 'a', ret='href') for i in u: name = str(i) items.append(name) except: source_utils.scraper_error('SCENEDDL') pass if len(items) > 0: break for item in items: try: i = str(item) r = client.request(i) u = client.parseDOM(r, "div", attrs={"class": "entry-content"}) for t in u: r = client.parseDOM(t, 'a', ret='href') for url in r: if '.rar' in url or 'imdb.com' in url: continue quality, info = source_utils.get_release_quality( url) valid, host = source_utils.is_host_valid( url, hostDict) if valid: sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: source_utils.scraper_error('SCENEDDL') pass return sources except: source_utils.scraper_error('SCENEDDL') return sources