def sources(self, url, hostDict, hostprDict): try: self._sources = [] if url is None: return self._sources if debrid.status() is False: raise Exception() self.hostDict = hostDict + hostprDict data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.title = self.title.replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] self.year = data['year'] query = '%s %s' % (self.title, self.hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = client.request(url) # switch to client.parseDOM() to rid import posts = dom_parser.parse_dom(r, 'div', {'class': 'eTitle'}) posts = [ dom_parser.parse_dom(i.content, 'a', req='href') for i in posts if i ] posts = [(i[0].attrs['href'], re.sub('<.+?>', '', i[0].content)) for i in posts if i] posts = [[i[0], i[1]] for i in posts] threads = [] for i in posts: threads.append(workers.Thread(self._get_sources, i)) [i.start() for i in threads] [i.join() for i in threads] alive = [x for x in threads if x.is_alive() is True] while alive: alive = [x for x in threads if x.is_alive() is True] time.sleep(0.1) return self._sources except: source_utils.scraper_error('ONLINESERIES') return self._sources
def _get_sources(self, name, url): try: headers = {'User-Agent': client.agent()} r = self.scraper.get(url, headers=headers).content name = client.replaceHTMLCodes(name) l = dom_parser.parse_dom(r, 'div', {'class': 'ppu2h'}) s = '' for i in l: s += i.content urls = re.findall( r'''((?:http|ftp|https)://[\w_-]+(?:(?:\.[\w_-]+)+)[\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])''', i.content, flags=re.MULTILINE | re.DOTALL) urls = [ i for i in urls if '.rar' not in i or '.zip' not in i or '.iso' not in i or '.idx' not in i or '.sub' not in i ] for url in urls: if url in str(self.sources): continue valid, host = source_utils.is_host_valid(url, self.hostDict) if not valid: continue host = client.replaceHTMLCodes(host) host = host.encode('utf-8') quality, info = source_utils.get_release_quality(name, url) try: size = re.findall( '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', name)[0] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size)) / div size = '%.2f GB' % size info.append(size) except: pass info = ' | '.join(info) self.sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: source_utils.scraper_error('RAPIDMOVIEZ') pass
def sources(self, url, hostDict, hostprDict): try: self.sources = [] if url is None: return self.sources if debrid.status() is False: raise Exception() self.hostDict = hostDict + hostprDict data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] # title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = data['year'] hdlr2 = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else '' imdb = data['imdb'] url = self.search(title, hdlr) headers = {'User-Agent': client.agent()} r = self.scraper.get(url, headers=headers).content if hdlr2 == '': r = dom_parser.parse_dom(r, 'ul', {'id': 'releases'})[0] else: r = dom_parser.parse_dom(r, 'ul', {'id': 'episodes'})[0] r = dom_parser.parse_dom(r.content, 'a', req=['href']) r = [(i.content, urlparse.urljoin(self.base_link, i.attrs['href'])) for i in r if i and i.content != 'Watch'] if hdlr2 != '': r = [(i[0], i[1]) for i in r if hdlr2.lower() in i[0].lower()] threads = [] for i in r: threads.append(workers.Thread(self._get_sources, i[0], i[1])) [i.start() for i in threads] # [i.join() for i in threads] alive = [x for x in threads if x.is_alive() is True] while alive: alive = [x for x in threads if x.is_alive() is True] time.sleep(0.1) return self.sources except: source_utils.scraper_error('RAPIDMOVIEZ') return self.sources
def get_sources(self, link): try: url = link[0].encode('ascii', errors='ignore').decode('ascii', errors='ignore').replace(' ', ' ') if '/torrent/' not in url: return name = link[1].encode('ascii', errors='ignore').decode('ascii', errors='ignore').replace(' ', '.').replace(' ', '.') if any(x in url.lower() for x in ['french', 'italian', 'spanish', 'truefrench', 'dublado', 'dubbed']): raise Exception() t = name.split(self.hdlr)[0].replace(self.year, '').replace('(', '').replace(')', '').replace('&', 'and').replace('.US.', '.').replace('.us.', '.') if cleantitle.get(t) != cleantitle.get(self.title): return if self.hdlr not in name: return if not url.startswith('http'): link = urlparse.urljoin(self.base_link, url) link = client.request(link) if link is None: return infohash = re.findall('<b>Infohash</b></td><td valign=top>(.+?)</td>', link, re.DOTALL)[0] url = 'magnet:?xt=urn:btih:%s&dn=%s' % (infohash, name) if url in str(self.sources): return try: seeders = int(re.findall('<font color=red>(.*?)</font>.+Seeds', link, re.DOTALL)[0].replace(',', '')) if self.min_seeders > seeders: return except: pass quality, info = source_utils.get_release_quality(name, url) try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', link)[0] div = 1 if size.endswith('GB') else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size info.insert(0, size) except: size = '0' pass info = ' | '.join(info) self.sources.append({'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True}) except: source_utils.scraper_error('TORRENTFUNK') pass
def sources(self, url, hostDict, hostprDict): self.sources = [] try: if url is None: return self.sources if debrid.status() is False: return self.sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.title = self.title.replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] self.year = data['year'] query = '%s %s' % (self.title, self.hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) urls = [] url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) urls.append(url) urls.append(url.replace('/1/', '/2/')) urls.append(url.replace('/1/', '/3/')) links = [] for x in urls: r = self.scraper.get(x).content list = client.parseDOM(r, 'tr', attrs={'class': 'tlz'}) for item in list: links.append(item) threads = [] for link in links: threads.append(workers.Thread(self.get_sources, link)) [i.start() for i in threads] [i.join() for i in threads] return self.sources except: source_utils.scraper_error('EXTRATORRENT') return self.sources
def sources(self, url, hostDict, hostprDict): self.sources = [] try: if url is None: return self.sources if debrid.status() is False: return self.sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.title = self.title.replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] self.year = data['year'] query = '%s %s' % (self.title, self.hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) try: r = client.request(url) links = re.findall('<a href=(/torrent/.+?)>', r, re.DOTALL) # log_utils.log('links = %s' % str(links), log_utils.LOGDEBUG) threads = [] for link in links: threads.append(workers.Thread(self.get_sources, link)) [i.start() for i in threads] [i.join() for i in threads] return self.sources except: source_utils.scraper_error('ETTV') return self.sources except: source_utils.scraper_error('ETTV') return self.sources
def get_sources(self, link): try: url = '%s%s' % (self.base_link, link) result = client.request(url) info_hash = re.findall('<kbd>(.+?)<', result, re.DOTALL)[0] url = '%s%s' % ('magnet:?xt=urn:btih:', info_hash) name = re.findall('<h3 class="card-title">(.+?)<', result, re.DOTALL)[0] url = '%s%s%s' % (url, '&dn=', str(name)) size = re.findall('<div class="col-3">File size:</div><div class="col">(.+?)<', result, re.DOTALL)[0] if url in str(self.sources): return if any(x in url.lower() for x in ['french', 'italian', 'spanish', 'truefrench', 'dublado', 'dubbed']): return t = name.split(self.hdlr)[0].replace(self.year, '').replace('(', '').replace(')', '').replace('&', 'and').replace('+', ' ') if cleantitle.get(t) != cleantitle.get(self.title): return if self.hdlr not in name: return quality, info = source_utils.get_release_quality(name, url) try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', size)[0] div = 1 if size.endswith('GB') else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size info.insert(0, size) except: size = '0' pass info = ' | '.join(info) self.sources.append({'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True}) except: source_utils.scraper_error('YOURBITTORRENT') pass
def movie(self, imdb, title, localtitle, aliases, year): try: t = cleantitle.geturl(title).replace('-', '+').replace('++', '+') self.title = t url = self.base_link + self.search_link % (t, year) print url r = client.request(url) u = client.parseDOM(r, "div", attrs={"class": "col-md-2 col-sm-2 mrgb"}) for i in u: link = re.compile('<a href="(.+?)"').findall(i) for url in link: if not cleantitle.get(title) in cleantitle.get(url): continue return url except: source_utils.scraper_error('HDMTO') return
def links(self, url): urls = [] try: if url is None: return for url in url: r = client.request(url) r = client.parseDOM(r, 'div', attrs={'class': 'entry'}) r = client.parseDOM(r, 'a', ret='href') r1 = [i for i in r if 'money' in i][0] r = client.request(r1) r = client.parseDOM(r, 'div', attrs={'id': 'post-\d+'})[0] if 'enter the password' in r: plink= client.parseDOM(r, 'form', ret='action')[0] post = {'post_password': '******', 'Submit': 'Submit'} send_post = client.request(plink, post=post, output='cookie') link = client.request(r1, cookie=send_post) else: link = client.request(r1) if '<strong>Single' not in link: continue link = re.findall('<strong>Single(.+?)</tr', link, re.DOTALL)[0] link = client.parseDOM(link, 'a', ret='href') link = [(i.split('=')[-1]) for i in link] for i in link: urls.append(i) return urls except: source_utils.scraper_error('300MBFILMS') pass
def _get_sources(self, url): try: item = client.request(url[0]) if item is None: return name = url[1] self.title = self.title.replace('!', '') # some shows like "Power" have year and hdlr in name t = name.split(self.hdlr)[0].replace(self.year, '').replace( '(', '').replace(')', '').replace('&', 'and') tc = cleantitle.get(t) if tc != cleantitle.get(self.title): try: if tc == self.aliases[0]: pass else: return except: return if self.hdlr not in name: return links = dom_parser.parse_dom(item, 'a', req='href') links = [i.attrs['href'] for i in links] info = [] try: size = re.findall( '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', item)[0] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size)) / div size = '%.2f GB' % size info.append(size) except: pass info = ' | '.join(info) for url in links: if any(x in url.lower() for x in ['.rar.', '.zip.', '.iso.']) or any( url.lower().endswith(x) for x in ['.rar', '.zip', '.iso']): continue if any(x in url.lower() for x in ['youtube', 'sample', 'trailer']): continue valid, host = source_utils.is_host_valid(url, self.hostDict) if not valid: continue host = client.replaceHTMLCodes(host) host = host.encode('utf-8') quality, info2 = source_utils.get_release_quality(name, url) if url in str(self._sources): continue self._sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: source_utils.scraper_error('ONLINESERIES') pass
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: return sources hostDict = hostprDict + hostDict data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] premDate = '' query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) query = query.replace("&", "and") query = query.replace(" ", " ") query = query.replace(" ", "-") url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) url = "http://rlsbb.ru/" + query # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) if 'tvshowtitle' not in data: url = url + "-1080p" r = self.scraper.get(url).content if r is None and 'tvshowtitle' in data: season = re.search('S(.*?)E', hdlr) season = season.group(1) query = title query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) query = query + "-S" + season query = query.replace("&", "and") query = query.replace(" ", " ") query = query.replace(" ", "-") url = "http://rlsbb.ru/" + query r = self.scraper.get(url).content # for loopCount in range(0,2): # if loopCount == 1 or (r is None and 'tvshowtitle' in data): # premDate = re.sub('[ \.]','-',data['premiered']) # query = re.sub('[\\\\:;*?"<>|/\-\']', '', data['tvshowtitle']) # query = query.replace("&", " and ").replace(" ", " ").replace(" ", "-") # query = query + "-" + premDate # url = "http://rlsbb.ru/" + query # url = url.replace('The-Late-Show-with-Stephen-Colbert', 'Stephen-Colbert') # r = self.scraper.get(url).content posts = client.parseDOM(r, "div", attrs={"class": "content"}) items = [] for post in posts: try: u = client.parseDOM(post, 'a', ret='href') for i in u: try: name = str(i) tit = name.rsplit('/', 1)[1] t = tit.split(hdlr)[0].replace( data['year'], '').replace('(', '').replace( ')', '').replace('&', 'and') if cleantitle.get(t) != cleantitle.get(title): continue if hdlr in name.upper(): items.append(name) # elif len(premDate) > 0 and premDate in name.replace(".","-"): items.append(name) except: source_utils.scraper_error('RLSBB') pass except: source_utils.scraper_error('RLSBB') pass # if len(items) > 0: # break seen_urls = set() for item in items: try: info = [] url = str(item) url = client.replaceHTMLCodes(url) url = url.encode('utf-8') if url in seen_urls: continue seen_urls.add(url) host = url.replace("\\", "") host2 = host.strip('"') host = re.findall( '([\w]+[.][\w]+)$', urlparse.urlparse(host2.strip().lower()).netloc)[0] if not host in hostDict: continue if any(x in host2 for x in ['.rar', '.zip', '.iso']): continue quality, info = source_utils.get_release_quality(host2) try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', name)[0] div = 1 if size.endswith('GB') else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size.replace( ',', '.'))) / div size = '%.2f GB' % size info.append(size) except: pass info = ' | '.join(info) host = client.replaceHTMLCodes(host) host = host.encode('utf-8') sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': host2, 'info': info, 'direct': False, 'debridonly': True }) except: source_utils.scraper_error('RLSBB') pass return sources except: source_utils.scraper_error('RLSBB') return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: return sources hostDict = hostprDict + hostDict data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url).replace('-', '+') # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = client.request(url) # r = self.scraper.get(url).content if r is None and 'tvshowtitle' in data: season = re.search('S(.*?)E', hdlr) season = season.group(1) url = title # r = self.scraper.get(url).content r = client.request(url) for loopCount in range(0,2): if loopCount == 1 or (r is None and 'tvshowtitle' in data): r = self.scraper.get(url).content # r = client.request(url) posts = client.parseDOM(r, "div", attrs={"class": "postpage_movie_download"}) items = [] for post in posts: try: u = client.parseDOM(post, 'a', ret='href') for i in u: name = str(i) items.append(name) except: pass if len(items) > 0: break for item in items: try: i = str(item) # r = self.scraper.get(i).content r = client.request(i) if r is None: continue tit = client.parseDOM(r, 'meta', attrs={'property': 'og:title'}, ret='content')[0] t = tit.split(hdlr)[0].replace(data['year'], '').replace('(', '').replace(')', '').replace('&', 'and') if cleantitle.get(t) != cleantitle.get(title): continue if hdlr not in tit: continue u = client.parseDOM(r, "div", attrs={"class": "multilink_lnks"}) for t in u: r = client.parseDOM(t, 'a', ret='href') for url in r: if 'www.share-online.biz' in url: continue if url in str(sources): continue quality, info = source_utils.get_release_quality(url, url) valid, host = source_utils.is_host_valid(url, hostDict) if valid: sources.append({'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True}) except: source_utils.scraper_error('2DDL') pass return sources except: source_utils.scraper_error('2DDL') return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url).replace('%3A+', '+') # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = client.request(url) if r is None and 'tvshowtitle' in data: season = re.search('S(.*?)E', hdlr) season = season.group(1) url = title r = client.request(url) for loopCount in range(0, 2): if loopCount == 1 or (r is None and 'tvshowtitle' in data): r = client.request(url) posts = client.parseDOM(r, "h2", attrs={"class": "postTitle"}) hostDict = hostprDict + hostDict items = [] for post in posts: try: u = client.parseDOM(post, 'a', ret='href') for i in u: name = str(i) items.append(name) tit = name.rsplit('/', 1)[0] tit = tit.rsplit('/', 1)[1].upper() t = tit.split(hdlr)[0].replace(data['year'], '').replace('(', '').replace(')', '').replace('&', 'and') if cleantitle.get(t) != cleantitle.get(title): continue except: source_utils.scraper_error('MAXRLS') pass if len(items) > 0: break for item in items: try: i = str(item) r = client.request(i) u = client.parseDOM(r, "div", attrs={"class": "postContent"}) for t in u: r = client.parseDOM(t, 'a', ret='href') for url in r: quality, info = source_utils.get_release_quality(url) valid, host = source_utils.is_host_valid(url, hostDict) if not valid: continue sources.append({'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True}) except: source_utils.scraper_error('MAXRLS') pass return sources except: source_utils.scraper_error('MAXRLS') return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) try: # r = client.request(url) r = self.scraper.get(url).content posts = client.parseDOM(r, 'tr') for post in posts: links = re.findall('<a href="(/torrent/.+?)">(.+?)<', post, re.DOTALL) try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith('GB') else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size.replace( ',', '.'))) / div size = '%.2f GB' % size except: size = '0' for link, ref in links: link = urlparse.urljoin(self.base_link, link) # link = client.request(link) link = self.scraper.get(link).content link = re.findall( 'a class=".+?" rel=".+?" href="(magnet:.+?)"', link, re.DOTALL) for url in link: url = url.split('&tr')[0] if any(x in url.lower() for x in [ 'french', 'italian', 'spanish', 'truefrench', 'dublado', 'dubbed' ]): continue if url in str(sources): continue name = url.split('&dn=')[1] name = urllib.unquote_plus( urllib.unquote_plus(name)) if name.startswith('www.'): try: name = name.split(' - ')[1].lstrip() except: name = re.sub(r'\www..+? ', '', name) t = name.split(hdlr)[0].replace( data['year'], '').replace('(', '').replace( ')', '').replace('&', 'and') if cleantitle.get(t) != cleantitle.get(title): continue if hdlr not in name: continue quality, info = source_utils.get_release_quality( name, url) info.append(size) info = ' | '.join(info) sources.append({ 'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) return sources except: source_utils.scraper_error('DOUBLR') return sources except: source_utils.scraper_error('DOUBLR') return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: return sources hostDict = hostprDict + hostDict data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) try: url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = self.scraper.get(url).content posts = client.parseDOM(r, 'div', attrs={'class': 'post'}) items = [] dupes = [] for post in posts: try: u = client.parseDOM(post, "div", attrs={"class": "postContent"}) u = client.parseDOM(u, "h2") u = client.parseDOM(u, 'a', ret='href') u = [(i.strip('/').split('/')[-1], i) for i in u] items += u except: source_utils.scraper_error('SCENERLS') pass except: source_utils.scraper_error('SCENERLS') pass for item in items: try: name = item[0] name = client.replaceHTMLCodes(name) t = name.split(hdlr)[0].replace(data['year'], '').replace( '(', '').replace(')', '').replace('&', 'and') if cleantitle.get(t) != cleantitle.get(title): continue tit = name.replace('.', ' ') if hdlr not in tit: continue quality, info = source_utils.get_release_quality( name, item[1]) info = ' | '.join(info) url = item[1] if any(x in url for x in ['.rar', '.zip', '.iso']): continue url = client.replaceHTMLCodes(url) url = url.encode('utf-8') host = re.findall( '([\w]+[.][\w]+)$', urlparse.urlparse(url.strip().lower()).netloc)[0] if not host in hostDict: continue host = client.replaceHTMLCodes(host) host = host.encode('utf-8') sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: source_utils.scraper_error('SCENERLS') pass return sources except: source_utils.scraper_error('SCENERLS') return sources
def sources(self, url, hostDict, hostprDict): try: self._sources = [] if url is None: return self._sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02d' % (int( data['season'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = client.request(url) posts = client.parseDOM(r, 'figure') items = [] for post in posts: try: tit = client.parseDOM(post, 'img', ret='title')[0] tit = client.replaceHTMLCodes(tit) t = tit.split(hdlr)[0].replace(data['year'], '').replace( '(', '').replace(')', '').replace('&', 'and') if cleantitle.get(t) != cleantitle.get(title): continue if hdlr not in tit: continue url = client.parseDOM(post, 'a', ret='href')[0] items.append((url, tit)) except: source_utils.scraper_error('MKVHUB') pass threads = [] for i in items: threads.append( workers.Thread(self._get_sources, i[0], i[1], hostDict, hostprDict)) [i.start() for i in threads] [i.join() for i in threads] return self._sources except: source_utils.scraper_error('MKVHUB') return self._sources
def get_sources(self, link): try: url = 'magnet:%s' % (re.findall('a href="magnet:(.+?)"', link, re.DOTALL)[0]) url = urllib.unquote_plus(url).split('&tr=')[0].replace(' ', '.') url = url.encode('ascii', errors='ignore').decode('ascii', errors='ignore') name = url.split('&dn=')[1] if any(x in url.lower() for x in [ 'french', 'italian', 'spanish', 'truefrench', 'dublado', 'dubbed' ]): raise Exception() t = name.split(self.hdlr)[0].replace(self.year, '').replace( '(', '').replace(')', '').replace('&', 'and').replace( '.US.', '.').replace('.us.', '.') if cleantitle.get(t) != cleantitle.get(self.title): return if self.hdlr not in name: return if url in str(self.sources): return try: seeders = int( client.parseDOM(link, 'td', attrs={'class': 'sy'})[0].replace(',', '')) if self.min_seeders > seeders: return except: pass quality, info = source_utils.get_release_quality(name, url) try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', link)[0] div = 1 if size.endswith('GB') else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size.replace( ',', '.'))) / div size = '%.2f GB' % size info.insert(0, size) except: size = '0' pass info = ' | '.join(info) self.sources.append({ 'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: source_utils.scraper_error('EXTRATORRENT') pass
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: raise Exception() hostDict = hostprDict + hostDict data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url).replace('-', '+') # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = client.request(url) if r is None and 'tvshowtitle' in data: season = re.search('S(.*?)E', hdlr) season = season.group(1) url = title r = client.request(url) for loopCount in range(0, 2): if loopCount == 1 or (r is None and 'tvshowtitle' in data): r = client.request(url) posts = client.parseDOM(r, "table", attrs={"class": "download"}) items = [] for post in posts: try: u = client.parseDOM(post, 'a', ret='href') for i in u: try: name = str(i) items.append(name) except: pass except: source_utils.scraper_error('DDLSPOT') pass if len(items) > 0: break for item in items: try: info = [] i = str(item) i = self.base_link + i r = client.request(i) u = client.parseDOM(r, "div", attrs={"class": "dl-links"}) for t in u: r = re.compile( 'a href=".+?" rel=".+?">(.+?)<').findall(t) for url in r: if any(x in url for x in ['.rar', '.zip', '.iso']): continue quality, info = source_utils.get_release_quality( url) valid, host = source_utils.is_host_valid( url, hostDict) sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: source_utils.scraper_error('DDLSPOT') pass return sources except: source_utils.scraper_error('DDLSPOT') return
def sources(self, url, hostDict, hostprDict): sources = [] try: if url is None: return sources if debrid.status() is False: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) try: r = client.request(url) links = zip(client.parseDOM(r, 'a', attrs={'class': 'btn btn-default magnet-button stats-action banner-button'}, ret='href'), client.parseDOM(r, 'td', attrs={'class': 'size'})) for link in links: url = link[0].replace('&', '&') url = re.sub(r'(&tr=.+)&dn=', '&dn=', url) # some links on bitlord &tr= before &dn= url = url.split('&tr=')[0] if 'magnet' not in url: continue size = int(link[1]) if any(x in url.lower() for x in ['french', 'italian', 'spanish', 'truefrench', 'dublado', 'dubbed']): continue name = url.split('&dn=')[1] t = name.split(hdlr)[0].replace(data['year'], '').replace('(', '').replace(')', '').replace('&', 'and') if cleantitle.get(t) != cleantitle.get(title): continue if hdlr not in name: continue quality, info = source_utils.get_release_quality(name, url) try: if size < 5.12: raise Exception() size = float(size) / 1024 size = '%.2f GB' % size info.append(size) except: pass info = ' | '.join(info) sources.append({'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True}) return sources except: source_utils.scraper_error('BITLORD') return sources except: source_utils.scraper_error('BITLORD') return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s S%02dE%02d' % (data['tvshowtitle'], int(data['season']), int(data['episode'])) \ if 'tvshowtitle' in data else '%s %s' % (data['title'], data['year']) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url).replace('-', '+') r = self.scraper.get(url).content if r is None and 'tvshowtitle' in data: season = re.search('S(.*?)E', hdlr) season = season.group(1) url = title r = self.scraper.get(url).content for loopCount in range(0, 2): if loopCount == 1 or (r is None and 'tvshowtitle' in data): r = self.scraper.get(url).content posts = client.parseDOM(r, "h2") hostDict = hostprDict + hostDict items = [] for post in posts: try: u = client.parseDOM(post, 'a', ret='href') for i in u: try: name = str(i) items.append(name) except: source_utils.scraper_error('0DAY') pass except: source_utils.scraper_error('0DAY') pass if len(items) > 0: break for item in items: try: info = [] i = str(item) r = self.scraper.get(i).content u = client.parseDOM(r, "div", attrs={"class": "entry-content"}) for t in u: r = re.compile('a href="(.+?)">.+?<').findall(t) query = query.replace(' ', '.') for url in r: if not query in url: continue if any(x in url for x in ['.rar', '.zip', '.iso']): continue quality, info = source_utils.get_release_quality( url) valid, host = source_utils.is_host_valid( url, hostDict) sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: source_utils.scraper_error('0DAY') pass return sources except: source_utils.scraper_error('0DAY') return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = urlparse.urljoin(self.base_link, self.search_link) # url = url % urllib.quote_plus(query) url = url % urllib.quote(query) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = client.request(url) r = client.parseDOM(r, 'h2') # z = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title')) z = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a')) if 'tvshowtitle' in data: posts = [(i[1], i[0]) for i in z] else: posts = [(i[1], i[0]) for i in z] hostDict = hostprDict + hostDict items = [] for post in posts: try: try: t = post[0].encode('utf-8') except: t = post[0] u = client.request(post[1]) u = re.findall('\'(http.+?)\'', u) + re.findall('\"(http.+?)\"', u) u = [i for i in u if '/embed/' not in i] u = [i for i in u if 'youtube' not in i] try: s = re.search('((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|MB|MiB))', post) s = s.groups()[0] if s else '0' except: s = '0' pass items += [(t, i, s) for i in u] except: source_utils.scraper_error('MYVIDEOLINK') pass for item in items: try: url = item[1] url = client.replaceHTMLCodes(url) url = url.encode('utf-8') void = ('.rar', '.zip', '.iso', '.part', '.png', '.jpg', '.bmp', '.gif') if url.endswith(void): continue valid, host = source_utils.is_host_valid(url, hostDict) if not valid: continue host = client.replaceHTMLCodes(host) host = host.encode('utf-8') name = item[0] name = client.replaceHTMLCodes(name) t = name.split(hdlr)[0].replace(data['year'], '').replace('(', '').replace(')', '').replace('&', 'and') if cleantitle.get(t) != cleantitle.get(title): continue if hdlr not in name: continue quality, info = source_utils.get_release_quality(name, url) try: size = re.findall('((?:\d+\.\d+|\d+\,\d+|\d+) (?:GB|GiB|MB|MiB))', item[2])[-1] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size)) / div size = '%.2f GB' % size info.append(size) except: pass info = ' | '.join(info) sources.append({'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True}) except: source_utils.scraper_error('MYVIDEOLINK') pass return sources except: source_utils.scraper_error('MYVIDEOLINK') return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: return sources hostDict = hostprDict + hostDict data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s %s' % (title, hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = client.request(url) posts = client.parseDOM(r, 'h2') urls = [] for item in posts: if not item.startswith('<a href'): continue try: tit = client.parseDOM(item, "a")[0] t = tit.split(hdlr)[0].replace(data['year'], '').replace('(', '').replace(')', '').replace('&', 'and') if cleantitle.get(t) != cleantitle.get(title): continue if hdlr not in tit: continue quality, info = source_utils.get_release_quality(tit, item[0]) try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', item)[0] div = 1 if size.endswith(('GB', 'GiB', 'Gb')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except: size = '0' pass info.append(size) fileType = source_utils.getFileType(tit) info.append(fileType) info = ' | '.join(info) if fileType else info[0] item = client.parseDOM(item, 'a', ret='href') url = item links = self.links(url) if links is None: continue urls += [(i, quality, info) for i in links] except: source_utils.scraper_error('300MBFILMS') pass for item in urls: if 'earn-money' in item[0]: continue if any(x in item[0] for x in ['.rar', '.zip', '.iso']): continue url = client.replaceHTMLCodes(item[0]) url = url.encode('utf-8') valid, host = source_utils.is_host_valid(url, hostDict) if not valid: continue host = client.replaceHTMLCodes(host) host = host.encode('utf-8') sources.append({'source': host, 'quality': item[1], 'language': 'en', 'url': url, 'info': item[2], 'direct': False, 'debridonly': True}) return sources except: source_utils.scraper_error('300MBFILMS') return sources
def _get_sources(self, url, name, hostDict, hostprDict): try: urls = [] result = client.request(url) urls = [(client.parseDOM(result, 'a', ret='href', attrs={'class': 'dbuttn watch'})[0], client.parseDOM(result, 'a', ret='href', attrs={'class': 'dbuttn blue'})[0], client.parseDOM(result, 'a', ret='href', attrs={'class': 'dbuttn magnet'})[0])] # '''<a class="dbuttn watch" href="https://www.linkomark.xyz/view/EnWNqSNeLw" target="_blank" rel="nofollow noopener">Watch Online Links</a> # <a class="dbuttn blue" href="https://www.linkomark.xyz/view/3-Gjyz5Q2R" target="_blank" rel="nofollow noopener">Get Download Links</a> # <a class="dbuttn magnet" href="https://torrentbox.site/save/2970fa51e8af52b7e2d1d5fa61a6005777d768ba" target="_blank" rel="nofollow noopener">Magnet Link</a>''' quality, info = source_utils.get_release_quality(name, url) try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', result)[0] div = 1 if size.endswith(('GB', 'GiB', 'Gb')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size.replace( ',', '.'))) / div size = '%.2f GB' % size info.append(size) except: pass fileType = source_utils.getFileType(name) info.append(fileType) info = ' | '.join(info) if fileType else info[0] # Debrid_info = info.append(fileType) # Debrid_info = ' | '.join(info) if fileType else info[0] # Torrent_info = ' | '.join(info) except: source_utils.scraper_error('MKVHUB') return for url in urls[0]: try: r = client.request(url) if r is None: continue if 'linkomark' in url: # info = Debrid_info p_link = client.parseDOM(r, 'link', attrs={'rel': 'canonical'}, ret='href')[0] #<input type="hidden" name="_csrf_token_" value=""/> input_name = client.parseDOM(r, 'input', ret='name')[0] input_value = client.parseDOM(r, 'input', ret='value')[0] post = {input_name: input_value} p_data = client.request(p_link, post=post) links = client.parseDOM(p_data, 'a', ret='href', attrs={'target': '_blank'}) for i in links: valid, host = source_utils.is_host_valid(i, hostDict) if not valid: valid, host = source_utils.is_host_valid( i, hostprDict) if not valid: continue else: rd = True else: rd = False if i in str(self._sources): continue if 'rapidgator' in i: rd = True if rd: self._sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': i, 'info': info, 'direct': False, 'debridonly': True }) else: self._sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': i, 'info': info, 'direct': False, 'debridonly': False }) elif 'torrent' in url: # info = Torrent_info data = client.parseDOM(r, 'a', ret='href') url = [i for i in data if 'magnet:' in i][0] url = url.split('&tr')[0] self._sources.append({ 'source': 'torrent', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: source_utils.scraper_error('MKVHUB') pass