def _get_sources(self, url): try: r = client.request(url) div = client.parseDOM(r, 'div', attrs={'id': 'div2child'}) for row in div: row = client.parseDOM(r, 'div', attrs={'class': 'resultdivbotton'}) for post in row: infohash = re.findall( '<div id="hideinfohash.+?" class="hideinfohash">(.+?)<', post, re.DOTALL)[0] name = re.findall( '<div id="hidename.+?" class="hideinfohash">(.+?)<', post, re.DOTALL)[0] name = urllib.unquote_plus(name).replace(' ', '.') url = 'magnet:?xt=urn:btih:%s&dn=%s' % (infohash, name) if url in str(self.sources): continue seeders = re.findall( '<div class="resultdivbottonseed">(.+?)<', post, re.DOTALL)[0] if self.min_seeders > seeders: continue if source_utils.remove_lang(name): continue t = name.split(self.hdlr)[0].replace( self.year, '').replace('(', '').replace(')', '').replace( '&', 'and').replace('.US.', '.').replace('.us.', '.') if cleantitle.get(t) != cleantitle.get(self.title): continue if self.hdlr not in url: continue quality, info = source_utils.get_release_quality(name, url) try: size = re.findall( '<div class="resultdivbottonlength">(.+?)<', post)[0] div = 1 if size.endswith(('GB', 'GiB', 'Gb')) else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size.replace( ',', '.'))) / div size = '%.2f GB' % size info.insert(0, size) except: size = '0' pass info = ' | '.join(info) self.sources.append({ 'source': 'Torrent', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: source_utils.scraper_error('IDOPE') pass
def sources(self, url, hostDict, hostprDict): try: print '------------------------------- -------------------------------' sources = [] print url data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) print data title = data['title'] year = data['year'] if 'year' in data else data['year'] season = data['season'] if 'season' in data else False episode = data['episode'] if 'episode' in data else False localtitle = data['localtitle'] if 'localtitle' in data else False if season and episode: localtitle = data[ 'localtvshowtitle'] if 'localtvshowtitle' in data else False #r = 'http://www.fullmoviz.org/?s=deadpool' #r = client.request(r) #r = client.parseDOM(r, 'div', attrs={'class': 'post-thumbnail'}) #r = client.parseDOM(r, 'a', ret='href') #r = client.request(r[0]) #r = client.parseDOM(r, 'div', attrs={'class': 'tab-me-content-wrapper'}) #r = client.parseDOM(r, 'iframe', ret='src') t = cleantitle.get(title) tq = cleantitle.query(localtitle) tq2 = re.sub(' ', '', cleantitle.query(localtitle).lower()) tq = re.sub(' ', '%20', tq) y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] query = 'http://www.fullmoviz.org' r = client.request('http://www.fullmoviz.org/?s=%s' % tq) print 'http://www.fullmoviz.org/?s=%s' % tq r = client.parseDOM(r, 'div', attrs={'class': 'post-thumbnail'}) r0 = client.parseDOM(r, 'a', ret='href')[0] r2 = client.parseDOM(r, 'a', ret='title')[0] r1 = re.sub('(\([0-9]{4}\)|streaming|\s+)', '', r2) #r = sorted(set(r)) r = [(r0, r1) for i in r] #r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r] #r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] #r = [(i[0], i[1], i[2], re.findall('(.+?)\s+(?:saison|s)\s+(\d+)', i[1])) for i in r] #r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r] #r = [(i[0], re.sub(' \&\#[0-9]{4,6};', '', i[1]), i[2], i[3]) for i in r] r = [i[0] for i in r if tq2 == cleantitle.get(i[1])][0] url = re.findall('(?://.+?|)(/.+)', r)[0] url = client.replaceHTMLCodes(url) url = url.encode('utf-8') r = client.request('http://www.fullmoviz.org' + url) print 'http://www.fullmoviz.org' + url r = client.parseDOM(r, 'div', attrs={'class': 'tab-me-content-wrapper'}) r = client.parseDOM(r, 'iframe', ret='src') for i in r: url = i host = re.findall( '([\w]+[.][\w]+)$', urlparse.urlparse(url.strip().lower()).netloc)[0] if not host in hostDict: continue host = client.replaceHTMLCodes(host) host = host.encode('utf-8') sources.append({ 'source': host, 'quality': 'SD', 'language': 'FR', 'url': url, 'direct': False, 'debridonly': False }) return sources except: return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote(query) url = urlparse.urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) html = client.request(url) html = html.replace(' ', ' ') try: results = client.parseDOM(html, 'table', attrs={'id': 'searchResult'}) except: return sources url2 = url.replace('/1/', '/2/') html2 = client.request(url2) html2 = html2.replace(' ', ' ') try: results += client.parseDOM(html2, 'table', attrs={'id': 'searchResult'}) except: return sources results = ''.join(results) rows = re.findall('<tr(.+?)</tr>', results, re.DOTALL) if rows is None: return sources for entry in rows: try: try: url = 'magnet:%s' % (re.findall( 'a href="magnet:(.+?)"', entry, re.DOTALL)[0]) url = str(client.replaceHTMLCodes(url).split('&tr')[0]) except: continue try: name = re.findall( 'class="detLink" title=".+?">(.+?)</a>', entry, re.DOTALL)[0] name = client.replaceHTMLCodes(name) name = urllib.unquote_plus(name).replace(' ', '.') if source_utils.remove_lang(name): continue t = name.split(hdlr)[0].replace( data['year'], '').replace('(', '').replace(')', '').replace( '&', 'and').replace('.US.', '.').replace('.us.', '.') if cleantitle.get(t) != cleantitle.get(title): continue except: continue if hdlr not in name: continue try: seeders = int( re.findall('<td align="right">(.+?)</td>', entry, re.DOTALL)[0]) except: continue if self.min_seeders > seeders: continue quality, info = source_utils.get_release_quality(name, url) try: size = re.findall( '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|MB|MiB))', entry)[-1] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size)) / div size = '%.2f GB' % size info.insert(0, size) except: pass info = ' | '.join(info) sources.append({ 'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: source_utils.scraper_error('PIRATEBAY') continue return sources except: source_utils.scraper_error('PIRATEBAY') return sources
def _get_sources(self, url): try: r = client.request(url) r = re.sub(r'\n', '', r) r = re.sub(r'\t', '', r) posts = re.compile( '<table class="table2" cellspacing="0">(.*?)</table>').findall( r) posts = client.parseDOM(posts, 'tr') for post in posts: if '<th' in post: continue links = re.compile( '<a href="(.+?)">.*?<td class="tdnormal">((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))</td><td class="tdseed">([0-9]+|[0-9]+,[0-9]+)</td>' ).findall(post) for items in links: link = items[0].split("/") hash = link[1].lower() name = link[2].replace('+MB+', '') name = re.sub('[^A-Za-z0-9]+', '.', name).lstrip('.') name = name.replace('Worldfree4u.Wiki.', '').replace('Bolly4u.pro.', '') if source_utils.remove_lang(name): continue match = source_utils.check_title(self.title, name, self.hdlr, self.year) if not match: continue url = 'magnet:?xt=urn:btih:%s&dn=%s' % (hash, name) try: seeders = int(items[2].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 pass quality, info = source_utils.get_release_quality(name, url) try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', items[1])[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) self.sources.append({ 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('TORRENTDOWNLOAD') pass
def sources(self, url, hostDict, hostprDict): sources = [] try: sources = [] if url is None: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['title'] year = data['year'] query = '%s %s' % (data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) r = client.request(url) posts = client.parseDOM(r, 'item') for post in posts: try: name = client.parseDOM(post, 'title') links = client.parseDOM(post, 'a', ret='href') t = re.sub('(\.|\(|\[|\s|)(\d{4})(\.|\)|\]|\s|)(.+|)', '', name[0]) if cleantitle.get(t) != cleantitle.get(title): raise Exception() y = re.findall('\(\s*(\d{4})\s*\)', name[0])[0] if y != year: raise Exception() for url in links: if any(x in url for x in [ '.online', 'xrysoi.se', 'filmer', '.bp', '.blogger' ]): continue url = client.replaceHTMLCodes(url) url = url.encode('utf-8') valid, host = source_utils.is_host_valid(url, hostDict) if 'hdvid' in host: valid = True if not valid: continue quality = 'SD' info = 'SUB' sources.append({ 'source': host, 'quality': quality, 'language': 'gr', 'url': url, 'info': info, 'direct': False, 'debridonly': False }) except: pass return sources except: return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = urlparse.urljoin( self.base_link, self.search_link.format(query[0].lower(), cleantitle.geturl(query))) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = client.request(url) if '<tbody' not in r: return sources r = client.parseDOM(r, 'tbody')[0] posts = client.parseDOM(r, 'tr') posts = [i for i in posts if 'magnet:' in i] for post in posts: post = post.replace(' ', ' ') links = client.parseDOM(post, 'a', ret='href') magnet = [ i.replace('&', '&') for i in links if 'magnet:' in i ][0] url = magnet.split('&tr')[0] if url in str(sources): continue name = client.parseDOM(post, 'a', ret='title')[1] name = urllib.unquote_plus(name).replace(' ', '.') if source_utils.remove_lang(name): continue t = name.split(hdlr)[0].replace(data['year'], '').replace( '(', '').replace(')', '').replace('&', 'and').replace( '.US.', '.').replace('.us.', '.') if cleantitle.get(t) != cleantitle.get(title): continue if hdlr not in name: continue quality, info = source_utils.get_release_quality(name, url) try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: pass info = ' | '.join(info) sources.append({ 'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) return sources except: source_utils.scraper_error('TORRENTQUEST') return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if not str(url).startswith('http'): data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] if 'season' in data: season = data['season'] if 'episode' in data: episode = data['episode'] year = data['year'] r = client.request(self.base_link, output='extended', timeout='10') cookie = r[4] headers = r[3] result = r[0] headers['Cookie'] = cookie query = urljoin( self.base_link, self.search_link % quote_plus(cleantitle.getsearch(title))) r = client.request(query, headers=headers, XHR=True) r = json.loads(r)['content'] r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a')) if 'tvshowtitle' in data: cltitle = cleantitle.get(title + 'season' + season) cltitle2 = cleantitle.get(title + 'season%02d' % int(season)) r = [ i for i in r if cltitle == cleantitle.get(i[1]) or cltitle2 == cleantitle.get(i[1]) ] vurl = '%s%s-episode-%s' % (self.base_link, str( r[0][0]).replace('/info', ''), episode) vurl2 = None else: cltitle = cleantitle.getsearch(title) cltitle2 = cleantitle.getsearch('%s (%s)' % (title, year)) r = [ i for i in r if cltitle2 == cleantitle.getsearch(i[1]) or cltitle == cleantitle.getsearch(i[1]) ] vurl = '%s%s-episode-0' % (self.base_link, str( r[0][0]).replace('/info', '')) vurl2 = '%s%s-episode-1' % (self.base_link, str( r[0][0]).replace('/info', '')) r = client.request(vurl, headers=headers) headers['Referer'] = vurl slinks = client.parseDOM(r, 'div', attrs={'class': 'anime_muti_link'}) slinks = client.parseDOM(slinks, 'li', ret='data-video') if len(slinks) == 0 and vurl2 is not None: r = client.request(vurl2, headers=headers) headers['Referer'] = vurl2 slinks = client.parseDOM( r, 'div', attrs={'class': 'anime_muti_link'}) slinks = client.parseDOM(slinks, 'li', ret='data-video') for slink in slinks: try: if 'vidnode.net' in slink: for source in more_sources.more_vidnode( slink, hostDict): sources.append(source) else: quality = source_utils.check_url(slink) valid, hoster = source_utils.is_host_valid( slink, hostDict) if valid: sources.append({ 'source': hoster, 'quality': quality, 'info': '', 'language': 'en', 'url': slink, 'direct': False, 'debridonly': False }) except: source_utils.scraper_error('GOWATCHSERIES') pass return sources except: source_utils.scraper_error('GOWATCHSERIES') return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s S%02dE%02d' % (data['tvshowtitle'], int(data['season']), int(data['episode'])) \ if 'tvshowtitle' in data else '%s %s' % (data['title'], data['year']) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url).replace('-', '+') r = self.scraper.get(url).content if r is None and 'tvshowtitle' in data: season = re.search('S(.*?)E', hdlr) season = season.group(1) url = title r = self.scraper.get(url).content for loopCount in range(0, 2): if loopCount == 1 or (r is None and 'tvshowtitle' in data): r = self.scraper.get(url).content posts = client.parseDOM(r, "h2") hostDict = hostprDict + hostDict items = [] for post in posts: try: u = client.parseDOM(post, 'a', ret='href') for i in u: try: name = str(i) items.append(name) except: source_utils.scraper_error('0DAY') pass except: source_utils.scraper_error('0DAY') pass if len(items) > 0: break for item in items: try: info = [] i = str(item) r = self.scraper.get(i).content u = client.parseDOM(r, "div", attrs={"class": "entry-content"}) for t in u: r = re.compile('a href="(.+?)">.+?<').findall(t) query = query.replace(' ', '.') for url in r: if not query in url: continue if any(x in url for x in ['.rar', '.zip', '.iso']): continue quality, info = source_utils.get_release_quality( url) valid, host = source_utils.is_host_valid( url, hostDict) sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: source_utils.scraper_error('0DAY') pass return sources except: source_utils.scraper_error('0DAY') return sources
def get_sources_packs(self, link): # log_utils.log('link = %s' % link, __name__, log_utils.LOGDEBUG) try: headers = {'User-Agent': client.agent()} r = client.request(link, headers=headers) if not r: return posts = client.parseDOM(r, 'tr', attrs={'id': 'torrent_latest_torrents'}) for post in posts: ref = client.parseDOM(post, 'a', attrs={'title': 'Torrent magnet link'}, ret='href')[0] link = ref.split('url=')[1] url = unquote_plus(link).replace('&', '&').replace(' ', '.') url = url.split('&tr')[0] hash = re.compile('btih:(.*?)&').findall(url)[0] name = unquote_plus(url.split('&dn=')[1]) name = source_utils.clean_name(self.title, name) if source_utils.remove_lang(name): continue if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack(self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack(self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' try: seeders = int(re.findall('<td class="green center">([0-9]+|[0-9]+,[0-9]+)</td>', post, re.DOTALL)[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 pass quality, info = source_utils.get_release_quality(name, url) try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) item = {'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package} if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('KICKASS2') pass
def sources(self, url, hostDict, hostprDict): sources = [] try: if url is None: return sources if debrid.status() is False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s s%02de%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % ( data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) r = client.request(url) try: posts = client.parseDOM(r, 'h2', attrs={'class': 'entry-title'}) for post in posts: data = client.parseDOM(post, 'a', ret='href') for u in data: r = client.request(u) r = client.parseDOM( r, 'div', attrs={'class': 'clearfix entry-content'}) for t in r: link = re.findall( 'a class="buttn magnet" href="(.+?)"', t)[0] quality, info = source_utils.get_release_quality(u) try: size = re.findall( '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:gb|gib|mb|mib))', str(data))[-1] div = 1 if size.endswith(('gb')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size)) / div size = '%.2f gb' % size info.append(size) except: pass info = ' | '.join(info) sources.append({ 'source': 'Torrent', 'quality': quality, 'language': 'en', 'url': link, 'info': info, 'direct': False, 'debridonly': True }) except: return return sources except: return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url == None: return sources if debrid.status() is False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s S%02dE%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % ( data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) html = client.request(url) posts = client.parseDOM(html, 'item') hostDict = hostprDict + hostDict items = [] for post in posts: try: t = client.parseDOM(post, 'title')[0] u = client.parseDOM(post, 'a', ret='href') s = re.search( '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|MB|MiB))', post) s = s.groups()[0] if s else '0' items += [(t, i, s) for i in u] except: pass for item in items: try: url = item[1] if any(x in url for x in ['.rar', '.zip', '.iso']): raise Exception() url = client.replaceHTMLCodes(url) url = url.encode('utf-8') valid, host = source_utils.is_host_valid(url, hostDict) if not valid: raise Exception() host = client.replaceHTMLCodes(host) host = host.encode('utf-8') name = item[0] name = client.replaceHTMLCodes(name) t = re.sub( '(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*|3D)(\.|\)|\]|\s|)(.+|)', '', name, flags=re.I) if not cleantitle.get(t) == cleantitle.get(title): raise Exception() y = re.findall( '[\.|\(|\[|\s](\d{4}|S\d*E\d*|S\d*)[\.|\)|\]|\s]', name)[-1].upper() if not y == hdlr: raise Exception() quality, info = source_utils.get_release_quality(name, url) try: size = re.findall( '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|MB|MiB))', item[2])[-1] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size)) / div size = '%.2f GB' % size info.append(size) except: pass info = ' | '.join(info) sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: pass check = [i for i in sources if not i['quality'] == 'CAM'] if check: sources = check return sources except: return
def get_sources_packs(self, link): # log_utils.log('link = %s' % str(link), __name__, log_utils.LOGDEBUG) try: r = client.request(link) if not r: return posts = client.parseDOM(r, 'tr') for post in posts: link = re.findall('a title="Download Torrent Magnet" href="(magnet:.+?)"', post, re.DOTALL) if not link: continue for url in link: url = unquote_plus(url).split('&tr')[0].replace('&', '&').replace(' ', '.') url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.compile('btih:(.*?)&').findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(self.title, name) if source_utils.remove_lang(name): continue if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack(self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack(self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' try: seeders = int(client.parseDOM(post, 'td', attrs={'class': 'seeds is-hidden-sm-mobile'})[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 pass quality, info = source_utils.get_release_quality(name, url) try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) item = {'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package} if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('BTSCENE') pass
def sources(self, url, hostDict, hostprDict): try: print '------------------------------- -------------------------------' sources = [] data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['title'] year = data['year'] if 'year' in data else data['year'] season = data['season'] if 'season' in data else False episode = data['episode'] if 'episode' in data else False localtitle = data['localtitle'] if 'localtitle' in data else False if season and episode: localtitle = data[ 'localtvshowtitle'] if 'localtvshowtitle' in data else False t = cleantitle.get(title) tq = cleantitle.get(localtitle) y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] query = self.base_link r = client.request(query, post='s=%s' % tq) r = client.parseDOM(r, 'div', attrs={'class': 'title'}) r = [(client.parseDOM(i, 'a', ret='href'), re.compile('title=\"Permanent Link to(.+?) \[').findall(i)) for i in r] r = [(i[0][0], i[1][0].lower()) for i in r if len(i[0]) > 0 and len(i[1]) > 0] r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = [(i[0], i[1], i[2], re.findall('(.+?)\s+(?:saison|s)\s+(\d+)', i[1])) for i in r] r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r] r = [(i[0], re.sub(' \&\#[0-9]{4,6};', '', i[1]), i[2], i[3]) for i in r] r = [ i[0] for i in r if tq == cleantitle.get(i[1]) and i[2] in y and int(i[3]) == int(season) ][0] url = re.findall('(?://.+?|)(/.+)', r)[0] url = client.replaceHTMLCodes(url) url = url.encode('utf-8') r = client.request('ww1.dpstreaming.cc' + url) if season and episode: if int(episode) < 10: r = re.compile('<p align="center">Épisode 0%s(.+?)</p>' % episode).findall(r)[0] else: r = re.compile('<p align="center">Épisode %s(.+?)</p>' % episode).findall(r)[0] r = re.compile('<a href=\"(.+?)\"', re.MULTILINE | re.DOTALL).findall(r) else: r = client.parseDOM(r, 'div', attrs={'id': 'light'}) r = client.parseDOM(r, 'a', ret='href') for url in r: host = re.findall( '([\w]+[.][\w]+)$', urlparse.urlparse(url.strip().lower()).netloc)[0] if not host in hostDict: continue host = client.replaceHTMLCodes(host) host = host.encode('utf-8') sources.append({ 'source': host, 'quality': 'SD', 'language': 'FR', 'url': url, 'direct': False, 'debridonly': False }) return sources except: return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url == None: return sources if debrid.status() == False: raise Exception() hostDict = hostprDict + hostDict data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] hdlr = '%sS%02dE%02d' % ( data['year'], int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] query = '%s %s S%02dE%02d' % ( data['tvshowtitle'], data['year'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % ( data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) try: url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) r = self.scraper.get(url).content posts = client.parseDOM(r, 'div', attrs={'class': 'post'}) items = [] dupes = [] for post in posts: try: t = client.parseDOM(post, 'a')[0] t = re.sub('<.+?>|</.+?>', '', t) x = re.sub( '(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*|3D)(\.|\)|\]|\s|)(.+|)', '', t) if not cleantitle.get(title) in cleantitle.get(x): raise Exception() y = re.findall( '[\.|\(|\[|\s](\d{4}|S\d*E\d*|S\d*)[\.|\)|\]|\s]', t)[-1].upper() if not y == hdlr: raise Exception() fmt = re.sub( '(.+)(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*)(\.|\)|\]|\s)', '', t.upper()) fmt = re.split('\.|\(|\)|\[|\]|\s|\-', fmt) fmt = [i.lower() for i in fmt] #if not any(i in ['1080p', '720p'] for i in fmt): raise Exception() if len(dupes) > 2: raise Exception() dupes += [x] u = client.parseDOM(post, 'a', ret='href')[0] r = self.scraper.get(u).content u = client.parseDOM(r, 'a', ret='href') u = [(i.strip('/').split('/')[-1], i) for i in u] items += u except: pass except: pass for item in items: try: name = item[0] name = client.replaceHTMLCodes(name) t = re.sub( '(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*|3D)(\.|\)|\]|\s|)(.+|)', '', name) if not cleantitle.get(t) == cleantitle.get(title): raise Exception() y = re.findall( '[\.|\(|\[|\s](\d{4}|S\d*E\d*|S\d*)[\.|\)|\]|\s]', name)[-1].upper() if not y == hdlr: raise Exception() quality, info = source_utils.get_release_quality( name, item[1]) url = item[1] if any(x in url for x in ['.rar', '.zip', '.iso']): raise Exception() url = client.replaceHTMLCodes(url) url = url.encode('utf-8') host = re.findall( '([\w]+[.][\w]+)$', urlparse.urlparse(url.strip().lower()).netloc)[0] if not host in hostDict: raise Exception() host = client.replaceHTMLCodes(host) host = host.encode('utf-8') sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: pass return sources except: return
def sources(self, url, hostDict, hostprDict): try: sources = [] scraper = cfscrape.create_scraper() if url == None: return sources if debrid.status() == False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] premDate = '' query = '%s S%02dE%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode'])) \ if 'tvshowtitle' in data else '%s %s' % (data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) query = query.replace("&", "and") query = query.replace(" ", " ") query = query.replace(" ", "-") url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) url = "http://rlsbb.ru/" + query if 'tvshowtitle' not in data: url = url + "-1080p" r = self.scraper.get(url).content if r is None and 'tvshowtitle' in data: season = re.search('S(.*?)E', hdlr) season = season.group(1) query = title query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) query = query + "-S" + season query = query.replace("&", "and") query = query.replace(" ", " ") query = query.replace(" ", "-") url = "http://rlsbb.ru/" + query r = self.scraper.get(url).content for loopCount in range(0,2): if loopCount == 1 or (r is None and 'tvshowtitle' in data): premDate = re.sub('[ \.]','-',data['premiered']) query = re.sub('[\\\\:;*?"<>|/\-\']', '', data['tvshowtitle']) query = query.replace("&", " and ").replace(" ", " ").replace(" ", "-") query = query + "-" + premDate url = "http://rlsbb.ru/" + query url = url.replace('The-Late-Show-with-Stephen-Colbert','Stephen-Colbert') r = self.scraper.get(url).content posts = client.parseDOM(r, "div", attrs={"class": "content"}) hostDict = hostprDict + hostDict items = [] for post in posts: try: u = client.parseDOM(post, 'a', ret='href') for i in u: try: name = str(i) if hdlr in name.upper(): items.append(name) elif len(premDate) > 0 and premDate in name.replace(".","-"): items.append(name) except: pass except: pass if len(items) > 0: break seen_urls = set() for item in items: try: info = [] url = str(item) url = client.replaceHTMLCodes(url) url = url.encode('utf-8') if url in seen_urls: continue seen_urls.add(url) host = url.replace("\\", "") host2 = host.strip('"') host = re.findall('([\w]+[.][\w]+)$', urlparse.urlparse(host2.strip().lower()).netloc)[0] if not host in hostDict: raise Exception() if any(x in host2 for x in ['.rar', '.zip', '.iso']): continue if '4K' in host2: quality = '4K' elif '2160p' in host2: quality = '4K' elif '1080p' in host2: quality = '1080p' elif '720p' in host2: quality = '720p' else: quality = 'SD' info = ' | '.join(info) host = client.replaceHTMLCodes(host) host = host.encode('utf-8') sources.append({'source': host, 'quality': quality, 'language': 'en', 'url': host2, 'info': info, 'direct': False, 'debridonly': True}) except: pass check = [i for i in sources if not i['quality'] == 'CAM'] if check: sources = check return sources except: return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['title'].replace('&', 'and') hdlr = data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote(query) url = urlparse.urljoin(self.base_link, url).replace('%20', '-') # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) html = client.request(url) if html is None: return sources quality_size = client.parseDOM(html, 'p', attrs={'class': 'quality-size'}) tit = client.parseDOM(html, 'title')[0] try: results = client.parseDOM(html, 'div', attrs={'class': 'ava1'}) except: return sources p = 0 for torrent in results: link = re.findall('a data-torrent-id=".+?" href="(magnet:.+?)" class=".+?" title="(.+?)"', torrent, re.DOTALL) for url, ref in link: url = str(client.replaceHTMLCodes(url).split('&tr')[0]) url = url.replace(' ', '') hash = re.compile('btih:(.*?)&').findall(url)[0] name = url.split('&dn=')[1] name = urllib.unquote_plus(name) name = re.sub('[^A-Za-z0-9]+', '.', name).lstrip('.') if source_utils.remove_lang(name): continue match = source_utils.check_title(title, tit, hdlr, data['year']) if not match: continue seeders = 0 # not available on yts quality, info = source_utils.get_release_quality(ref, url) try: size = re.findall('((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|MB|MiB))', quality_size[p])[-1] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass p += 1 info = ' | '.join(info) sources.append({'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) return sources except: source_utils.scraper_error('YTSWS') return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] openload_limit = 1 vshare_limit = 1 flashx_limit = 1 thevideobee_limit = 1 entervideo_limit = 1 megamp4_limit = 1 vidtodo_limit = 1 r = self.scraper.get(url).content try: v = re.findall('document.write\(Base64.decode\("(.+?)"\)', r)[0] b64 = base64.b64decode(v) url = client.parseDOM(b64, 'iframe', ret='src')[0] try: host = re.findall( '([\w]+[.][\w]+)$', urlparse.urlparse(url.strip().lower()).netloc)[0] host = client.replaceHTMLCodes(host) host = host.encode('utf-8') valid, host = source_utils.is_host_valid(host, hostDict) if valid: sources.append({ 'source': host, 'quality': 'SD', 'language': 'en', 'url': url.replace('\/', '/'), 'direct': False, 'debridonly': False }) except: pass except: pass r = client.parseDOM(r, 'div', {'class': 'server_line'}) r = [(client.parseDOM(i, 'a', ret='href')[0], client.parseDOM(i, 'p', attrs={'class': 'server_servername'})[0]) for i in r] if r: for i in r: try: host = re.sub('Server|Link\s*\d+', '', i[1]).lower() url = i[0] host = client.replaceHTMLCodes(host) host = host.encode('utf-8') if 'other' in host: continue valid, host = source_utils.is_host_valid( host, hostDict) if 'openload' in host: if openload_limit < 1: continue else: openload_limit -= 1 if 'vshare' in host: if vshare_limit < 1: continue else: vshare_limit -= 1 if 'flashx' in host: if flashx_limit < 1: continue else: flashx_limit -= 1 if 'thevideobee' in host: if thevideobee_limit < 1: continue else: thevideobee_limit -= 1 if 'entervideo' in host: if entervideo_limit < 1: continue else: entervideo_limit -= 1 if 'megamp4' in host: if megamp4_limit < 1: continue else: megamp4_limit -= 1 if 'vidtodo' in host: if vidtodo_limit < 1: continue else: vidtodo_limit -= 1 if valid: sources.append({ 'source': host, 'quality': 'SD', 'language': 'en', 'url': url.replace('\/', '/'), 'direct': False, 'debridonly': False }) except: pass return sources except Exception: return
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) aliases = eval(data['aliases']) headers = {} if 'tvshowtitle' in data: episode = int(data['episode']) url = self.searchShow(data['tvshowtitle'], data['season'], aliases, headers) else: episode = 0 url = self.searchMovie(data['title'], data['year'], aliases, headers) mid = re.findall('-(\d+)', url)[-1] try: headers = {'Referer': url} u = urlparse.urljoin(self.base_link, self.server_link % mid) r = self.scraper.get(u).content r = json.loads(r)['html'] r = client.parseDOM(r, 'div', attrs={'class': 'pas-list'}) ids = client.parseDOM(r, 'li', ret='data-id') servers = client.parseDOM(r, 'li', ret='data-server') labels = client.parseDOM(r, 'a', ret='title') r = zip(ids, servers, labels) u = urlparse.urljoin(self.base_link, self.info_link % mid) quality = self.scraper.get(u).content quality = dom_parser.parse_dom(quality, 'div', attrs={'class': 'jtip-quality'})[0].content if quality == "HD": quality = "720p" for eid in r: try: try: ep = re.findall('episode.*?(\d+).*?', eid[2].lower())[0] except Exception: ep = 0 if (episode == 0) or (int(ep) == episode): if eid[1] != '6': url = urlparse.urljoin(self.base_link, self.embed_link % eid[0]) link = self.scraper.get(url).content link = json.loads(link)['src'] valid, host = source_utils.is_host_valid(link, hostDict) sources.append({'source': host, 'quality': quality, 'language': 'en', 'url': link, 'info': [], 'direct': False, 'debridonly': False}) else: url = urlparse.urljoin(self.base_link, self.token_link % (eid[0], mid)) script = self.scraper.get(url).content if '$_$' in script: params = self.uncensored1(script) elif script.startswith('[]') and script.endswith('()'): params = self.uncensored2(script) elif '_x=' in script: x = re.search('''_x=['"]([^"']+)''', script).group(1) y = re.search('''_y=['"]([^"']+)''', script).group(1) params = {'x': x, 'y': y} else: raise Exception() u = urlparse.urljoin(self.base_link, self.source_link % (eid[0], params['x'], params['y'])) r = self.scraper.get(u).content url = json.loads(r)['playlist'][0]['sources'] url = [i['file'] for i in url if 'file' in i] url = [directstream.googletag(i) for i in url] url = [i[0] for i in url if i] for s in url: if 'lh3.googleusercontent.com' in s['url']: s['url'] = directstream.googleredirect(s['url']) sources.append({'source': 'gvideo', 'quality': s['quality'], 'language': 'en', 'url': s['url'], 'direct': True, 'debridonly': False}) except Exception: pass except Exception: pass return sources except Exception: return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) url = self.searchMovie(data['title'], data['year']) if url is None: return sources r = client.request(url) data = client.parseDOM(r, 'div', attrs={'class': 'playex'})[0] frames = client.parseDOM(data, 'iframe', ret='src') frames += re.compile('''<iframe\s*src=['"](.+?)['"]''', re.DOTALL).findall(data) quality = client.parseDOM(r, 'span', attrs={'class': 'qualityx'})[0] for frame in frames: url = frame.split('=')[1] if frame.startswith('<') else frame url = client.replaceHTMLCodes(url) url = url.encode('utf-8') valid, host = source_utils.is_host_valid(url, hostDict) if valid: quality, info = source_utils.get_release_quality( quality, url) info = ' | '.join(info) sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': False }) elif url.endswith('mp4'): url += '|User-Agent=%s' % urllib.quote_plus(client.agent()) sources.append({ 'source': 'MP4', 'quality': quality, 'language': 'en', 'url': url, 'direct': True, 'debridonly': False }) elif 'mystream' in url: data = client.request(url) links = dom_parser2.parse_dom(data, 'source', req=['src', 'label']) for link in links: label = link.attrs['label'] url = link.attrs[ 'src'] + '|User-Agent=%s' % urllib.quote_plus( client.agent()) sources.append({ 'source': 'MYSTREAM', 'quality': label, 'language': 'en', 'url': url, 'direct': True, 'debridonly': False }) else: continue return sources except Exception: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url is None: return sources if debrid.status() is False: return sources data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % quote_plus(query) url = urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = client.request(url) if '<tbody' not in r: return sources posts = client.parseDOM(r, 'tbody')[0] posts = client.parseDOM(posts, 'tr') except: source_utils.scraper_error('SKYTORRENTS') return sources for post in posts: try: post = re.sub(r'\n', '', post) post = re.sub(r'\t', '', post) link = re.findall('href="(magnet:.+?)".+<td style="text-align: center;color:green;">([0-9]+|[0-9]+,[0-9]+)</td>', post, re.DOTALL) for url, seeders, in link: url = unquote_plus(url).replace('&', '&').replace(' ', '.') url = url.split('&tr')[0] try: url = url.encode('ascii', errors='ignore').decode('ascii', errors='ignore') except: pass hash = re.compile('btih:(.*?)&').findall(url)[0] name = url.split('&dn=')[1] name = re.sub('[^A-Za-z0-9]+', '.', name).lstrip('.') if name.startswith('www'): try: name = re.sub(r'www(.*?)\W{2,10}', '', name) except: name = name.split('-.', 1)[1].lstrip() if source_utils.remove_lang(name): continue match = source_utils.check_title(title, name, hdlr, data['year']) if not match: continue try: seeders = int(seeders) if self.min_seeders > seeders: continue except: seeders = 0 pass quality, info = source_utils.get_release_quality(name, url) try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) sources.append({'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('SKYTORRENTS') return sources return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url == None: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) aliases = eval(data['aliases']) headers = {} if 'tvshowtitle' in data: ep = data['episode'] url = '%s/film/%s-season-%01d/watching.html?ep=%s' % ( self.base_link, cleantitle.geturl( data['tvshowtitle']), int(data['season']), ep) r = client.request(url, headers=headers, timeout='10', output='geturl') if url == None: url = self.searchShow(data['tvshowtitle'], data['season'], aliases, headers) else: url = self.searchMovie(data['title'], data['year'], aliases, headers) if url == None: url = '%s/film/%s/watching.html?ep=0' % ( self.base_link, cleantitle.geturl(data['title'])) if url == None: raise Exception() r = client.request(url, headers=headers, timeout='10') r = client.parseDOM(r, 'div', attrs={'class': 'les-content'}) if 'tvshowtitle' in data: ep = data['episode'] links = client.parseDOM(r, 'a', attrs={'episode-data': ep}, ret='player-data') else: links = client.parseDOM(r, 'a', ret='player-data') for link in links: if '123movieshd' in link or 'seriesonline' in link: r = client.request(link, headers=headers, timeout='10') r = re.findall('(https:.*?redirector.*?)[\'\"]', r) for i in r: try: sources.append({ 'source': 'gvideo', 'quality': directstream.googletag(i)[0]['quality'], 'language': 'en', 'url': i, 'direct': True, 'debridonly': False }) except: pass else: try: host = re.findall( '([\w]+[.][\w]+)$', urlparse.urlparse(link.strip().lower()).netloc)[0] if not host in hostDict: raise Exception() host = client.replaceHTMLCodes(host) host = host.encode('utf-8') sources.append({ 'source': host, 'quality': 'SD', 'language': 'en', 'url': link, 'direct': False, 'debridonly': False }) except: pass return sources except: return sources
def _get_sources(self, i): try: name = i[0] name = client.replaceHTMLCodes(name) r = client.request(i[1].strip()) r = client.parseDOM(r, 'p') r = [ e for e in r if all(x in e.lower() for x in ['single', 'link']) ] links = client.parseDOM(r[0], 'a', ret='href') for url in links: try: if any(x in url for x in ['.rar.', '.zip.', '.iso.']) or any( url.endswith(x) for x in ['.rar', '.zip', '.iso']): raise Exception() t = re.sub( '(\.|\(|\[|\s)(\d{4}|S\d+E\d+|S\d+|3D)(\.|\)|\]|\s|)(.+|)', '', name, flags=re.I) if not cleantitle.get(t) == cleantitle.get(self.title): raise Exception() try: y = re.findall( '(?:\.|\(|\[|\s*|)(S\d+E\d+|S\d+)(?:\.|\)|\]|\s*|)', name, re.I)[-1].upper() except BaseException: y = re.findall( '(?:\.|\(|\[|\s*|)(\d{4})(?:\.|\)|\]|\s*|)', name, re.I)[0].upper() if not y == self.hdlr: raise Exception() valid, host = source_utils.is_host_valid( url, self.hostDict) if not valid: continue if host in ['1fichier.com', 'uptobox.com']: raise Exception() host = client.replaceHTMLCodes(host) host = host.encode('utf-8') info = [] quality, info = source_utils.get_release_quality(name, url) try: size = re.findall( '((?:\d+\.\d+|\d+\,\d+|\d+) (?:GB|GiB|MB|MiB))', i[2])[0] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size)) / div size = '%.2f GB' % size info.append(size) except BaseException: pass info = ' | '.join(info) self._sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except BaseException: pass except BaseException: pass
def sources(self, url, hostDict, hostprDict): try: sources = [] if url == None: return sources if debrid.status() == False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s S%02dE%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode'])) \ if 'tvshowtitle' in data else '%s %s' % (data['title'], data['year']) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url).replace('-', '+') r = client.request(url) if r == None and 'tvshowtitle' in data: season = re.search('S(.*?)E', hdlr) season = season.group(1) url = title r = client.request(url) for loopCount in range(0, 2): if loopCount == 1 or (r == None and 'tvshowtitle' in data): r = client.request(url) posts = client.parseDOM( r, "div", attrs={"class": "postpage_movie_download"}) hostDict = hostprDict + hostDict items = [] for post in posts: try: u = client.parseDOM(post, 'a', ret='href') for i in u: try: name = str(i) items.append(name) print items except: pass except: pass if len(items) > 0: break for item in items: try: info = [] i = str(item) r = client.request(i) u = client.parseDOM(r, "div", attrs={"class": "multilink_lnks"}) for t in u: r = client.parseDOM(t, 'a', ret='href') for url in r: if '1080p' in url: quality = '1080p' elif '1080' in url: quality = '1080p' elif '720p' in url: quality = '720p' elif '720' in url: quality = '720p' elif 'HD' in url: quality = '720p' else: quality = 'SD' info = ' | '.join(info) valid, host = source_utils.is_host_valid( url, hostDict) sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: pass check = [i for i in sources if not i['quality'] == 'CAM'] if check: sources = check return sources except: return sources
def sources(self, url, hostDict, hostprDict): try: self._sources = [] if url is None: return self._sources if debrid.status() is False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] self.hostDict = hostprDict + hostDict items = [] urls = [] posts = [] links = [] if 'tvshowtitle' not in data: url = urlparse.urljoin(self.base_link, self.search_link % data['imdb']) r = client.request(url, headers={'User-Agent': client.agent()}) posts = client.parseDOM(r, 'item') else: url = urlparse.urljoin( self.base_link, self.search_link % (cleantitle.geturl(self.title).replace('-', '+') + '+' + self.hdlr)) r = client.request(url, headers={'User-Agent': client.agent()}) posts = client.parseDOM(r, 'item') if not posts: return self._sources for post in posts: try: t = client.parseDOM(post, 'title')[0] u = client.parseDOM(post, 'link')[0] s = re.search( '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post) s = s.groups()[0] if s else '0' items += [(t, u, s)] except BaseException: pass items = set(items) threads = [] for i in items: threads.append(workers.Thread(self._get_sources, i)) [i.start() for i in threads] [i.join() for i in threads] return self._sources except BaseException: return self._sources
def _get_sources(self, url): try: r = client.request(url) posts = client.parseDOM(r, 'tr') for post in posts: link = re.findall( 'a title="Download Torrent Magnet" href="(magnet:.+?)"', post, re.DOTALL) if link == []: continue for url in link: url = url.split('&tr')[0] if any(x in url.lower() for x in [ 'french', 'italian', 'spanish', 'truefrench', 'dublado', 'dubbed' ]): continue name = url.split('&dn=')[1] name = urllib.unquote_plus(name) t = name.split(self.hdlr)[0].replace( self.year, '').replace('(', '').replace(')', '').replace('&', 'and') if cleantitle.get(t) != cleantitle.get(self.title): continue if self.hdlr not in url: continue quality, info = source_utils.get_release_quality(name, url) try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size)) / div size = '%.2f GB' % size info.append(size) except: pass info = ' | '.join(info) self.sources.append({ 'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: source_utils.scraper_error('BTSCENE') pass
def sources(self, url, hostDict, hostprDict): try: sources = [] scraper = cfscrape.create_scraper() if url == None: return sources if debrid.status() == False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s s%02de%02d' % (data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % (data['title'], data['year']) query = re.sub('[\\\\:;*?"<>|/ \+\']+', '-', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) r = scraper.get(url).content r = client.parseDOM(r, "div", attrs={'class': 'entry-content'})[0] r = re.sub('shareaholic-canvas.+', '', r, flags=re.DOTALL) a_txt = '' a_url = '' a_txt = client.parseDOM(r, "a", attrs={'href': '.+?'}) a_url = client.parseDOM(r, "a", ret = "href") r = re.sub('<a .+?</a>', '', r, flags=re.DOTALL) r = re.sub('<img .+?>', '', r, flags=re.DOTALL) size = '' pre_txt = [] pre_url = [] pres = client.parseDOM(r, "pre", attrs={'style': '.+?'}) for pre in pres: try: size = re.findall('([0-9,\.]+ ?(?:GB|GiB|MB|MiB))', pre)[0] except: pass url0 = re.findall('https?://[^ <"\'\s]+', pre, re.DOTALL) txt0 = [size] * len(url0) pre_url = pre_url + url0 pre_txt = pre_txt + txt0 r = re.sub('<pre .+?</pre>', '', r, flags=re.DOTALL) size = '' if not 'tvshowtitle' in data: try: size = " " + re.findall('([0-9,\.]+ ?(?:GB|GiB|MB|MiB))', r)[0] except: pass raw_url = re.findall('https?://[^ <"\'\s]+', r, re.DOTALL) raw_txt = [size] * len(raw_url) pairs = zip(a_url+pre_url+raw_url, a_txt+pre_txt+raw_txt) for pair in pairs: try: url = str(pair[0]) info = re.sub('<.+?>','',pair[1]) if any(x in url for x in ['.rar', '.zip', '.iso']): raise Exception() if not query.lower() in re.sub('[\\\\:;*?"<>|/ \+\'\.]+', '-', url+info).lower(): raise Exception() size0 = info + " " + size try: size0 = re.findall('([0-9,\.]+ ?(?:GB|GiB|MB|MiB))', size0)[0] div = 1 if size0.endswith(('GB', 'GiB')) else 1024 size0 = float(re.sub('[^0-9\.]', '', size0)) / div size0 = '%.2f GB' % size0 except: size0 = '' pass quality, info = source_utils.get_release_quality(url,info) info.append(size0) info = ' | '.join(info) url = url.encode('utf-8') hostDict = hostDict + hostprDict valid, host = source_utils.is_host_valid(url, hostDict) if not valid: continue sources.append({'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True}) except: pass return sources except: return sources
def get_sources(self, link): try: url = 'magnet:%s' % (re.findall('a href="magnet:(.+?)"', link, re.DOTALL)[0]) url = unquote_plus(url).split('&tr')[0].replace('&', '&').replace( ' ', '.') url = source_utils.strip_non_ascii_and_unprintable(url) if url in str(self.sources): return hash = re.compile('btih:(.*?)&').findall(url)[0] name = url.split('&dn=')[1] name = source_utils.clean_name(self.title, name) if source_utils.remove_lang(name, self.episode_title): return if not source_utils.check_title(self.title, self.aliases, name, self.hdlr, self.year): return # filter for episode multi packs (ex. S01E01-E17 is also returned in query) if self.episode_title: if not source_utils.filter_single_episodes(self.hdlr, name): return try: seeders = int( client.parseDOM(link, 'td', attrs={'class': 'sy'})[0].replace(',', '')) if self.min_seeders > seeders: return except: seeders = 0 pass quality, info = source_utils.get_release_quality(name, url) try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', link)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: source_utils.scraper_error('EXTRATORRENT') dsize = 0 pass info = ' | '.join(info) self.sources.append({ 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('EXTRATORRENT') pass
def sources(self, url, hostDict, hostprDict): try: sources = [] if url == None: return sources if debrid.status() is False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['title'].replace(':', '').lower() year = data['year'] query = '%s %s' % (data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = urlparse.urljoin(self.base_link, self.post_link) post = 'do=search&subaction=search&search_start=0&full_search=0&result_from=1&story=%s' % urllib.quote_plus( query) r = client.request(url, post=post) r = client.parseDOM(r, 'div', attrs={'class': 'box-out margin'}) r = [(dom_parser.parse_dom(i, 'div', attrs={'class': 'news-title'})) for i in r if data['imdb'] in i] r = [(dom_parser.parse_dom(i[0], 'a', req='href')) for i in r if i] r = [(i[0].attrs['href'], i[0].content) for i in r if i] hostDict = hostprDict + hostDict for item in r: try: name = item[0] s = re.findall( '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', name) s = s[0] if s else '0' data = client.request(item[0]) data = dom_parser.parse_dom(data, 'div', attrs={'id': 'r-content'}) data = re.findall('\s*<b><a href="(.+?)".+?</a></b>', data[0].content, re.DOTALL) for url in data: try: qual = client.request(url) quals = re.findall( 'span class="file-title" id="file-title">(.+?)</span', qual) for quals in quals: if '4K' in quals: quality = '4K' elif '2160p' in quals: quality = '4K' elif '1080p' in quals: quality = '1080p' elif '720p' in quals: quality = '720p' elif any(i in ['dvdscr', 'r5', 'r6'] for i in quals): quality = 'SCR' elif any(i in [ 'camrip', 'tsrip', 'hdcam', 'hdts', 'dvdcam', 'dvdts', 'cam', 'telesync', 'ts' ] for i in quals): quality = 'CAM' else: quality = '720p' info = [] if '3D' in name or '.3D.' in quals: info.append('3D') quality = '1080p' if any(i in ['hevc', 'h265', 'x265'] for i in quals): info.append('HEVC') info = ' | '.join(info) url = client.replaceHTMLCodes(url) url = url.encode('utf-8') if any(x in url for x in ['.rar', '.zip', '.iso']): raise Exception() if not 'turbobit' in url: continue if url in str(sources): continue sources.append({ 'source': 'turbobit', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': True, 'debridonly': True }) except: pass except: pass return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url is None: return sources if debrid.status() is False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s s%02de%02d' % (data['tvshowtitle'], int(data['season']), int(data['episode'])) \ if 'tvshowtitle' in data else '%s %s' % (data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = urlparse.urljoin( self.base_link, self.search_link.format(query[0].lower(), cleantitle.geturl(query))) r = client.request(url) r = client.parseDOM(r, 'tbody')[0] posts = client.parseDOM(r, 'tr') posts = [i for i in posts if 'magnet:' in i] for post in posts: post = post.replace(' ', ' ') name = client.parseDOM(post, 'a', ret='title')[1] t = name.split(hdlr)[0] if not cleantitle.get(re.sub('(|)', '', t)) == cleantitle.get(title): continue try: y = re.findall( '[\.|\(|\[|\s|\_|\-](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_|\-]', name, re.I)[-1].upper() except BaseException: y = re.findall( '[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s\_|\-]', name, re.I)[-1].upper() if not y == hdlr: continue links = client.parseDOM(post, 'a', ret='href') magnet = [ i.replace('&', '&') for i in links if 'magnet:' in i ][0] url = magnet.split('&tr')[0] quality, info = source_utils.get_release_quality(name, name) try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except BaseException: size = '0' info.append(size) info = ' | '.join(info) sources.append({ 'source': 'Torrent', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) return sources except BaseException: return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) try: r = self.scraper.get(url).content posts = client.parseDOM(r, 'li') for post in posts: link = re.findall( 'a title="Download using magnet" href="(magnet:.+?)"', post, re.DOTALL) for url in link: url = url.split('&tr')[0] name = url.split('&dn=')[1] name = urllib.unquote_plus(name).replace(' ', '.') if source_utils.remove_lang(name): continue if name.startswith('www.'): try: name = name.split(' - ')[1].lstrip() except: name = re.sub(r'\www..+? ', '', name) t = name.split(hdlr)[0].replace( data['year'], '').replace('(', '').replace(')', '').replace( '&', 'and').replace('.US.', '.').replace('.us.', '.') if cleantitle.get(t) != cleantitle.get(title): continue if hdlr not in name: continue quality, info = source_utils.get_release_quality(url) try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith('GB') else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size info.insert(0, size) except: pass info = ' | '.join(info) sources.append({ 'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: source_utils.scraper_error('BTDB') return return sources except: source_utils.scraper_error('BTDB') return sources