def __init__(self): self.priority = 1 self.language = ['en'] self.domains = ['l23movies.com'] self.base_link = 'http://l23movies.com' self.movies_search_path = ('search-movies/%s.html') scraper = cfscrape.create_scraper()
def sources(self, url, hostDict, hostprDict): try: sources = [] scraper = cfscrape.create_scraper() r = scraper.get(url).content r = dom_parser2.parse_dom(r, 'p', {'class': 'server_play'}) r = [dom_parser2.parse_dom(i, 'a', req=['href']) for i in r if i] r = [(i[0].attrs['href'], re.search('/(\w+).html', i[0].attrs['href'])) for i in r if i] r = [(i[0], i[1].groups()[0]) for i in r if i[0] and i[1]] for i in r: try: host = i[1] if str(host) in str(hostDict): host = client.replaceHTMLCodes(host) host = host.encode('utf-8') sources.append({ 'source': host, 'quality': 'SD', 'language': 'en', 'url': i[0].replace('\/', '/'), 'direct': False, 'debridonly': False }) except: pass return sources except Exception: return
def __init__(self): self.priority = 1 self.language = ['en'] self.domains = ['hdpopcorns.co', 'hdpopcorns.eu'] self.base_link = 'http://hdpopcorns.co' self.search_link = '/?s=%s' self.scraper = cfscrape.create_scraper()
def __init__(self): self.priority = 1 self.language = ['en'] self.domains = ['wrzcraft.net'] self.base_link = 'http://wrzcraft.net' self.search_link = '/search/%s/feed/rss2/' self.scraper = cfscrape.create_scraper()
def episode(self, url, imdb, tvdb, title, premiered, season, episode): try: if url == None: return scraper = cfscrape.create_scraper() url = urlparse.parse_qs(url) url = dict([(i, url[i][0]) if url[i] else (i, '') for i in url]) url['premiered'], url['season'], url[ 'episode'] = premiered, season, episode try: clean_title = cleantitle.geturl( url['tvshowtitle']) + '-season-%d' % int(season) search_url = urlparse.urljoin( self.base_link, self.search_link % clean_title.replace('-', '+')) r = scraper.get(search_url).content r = client.parseDOM(r, 'div', {'id': 'movie-featured'}) r = [(client.parseDOM(i, 'a', ret='href'), re.findall('<b><i>(.+?)</i>', i)) for i in r] r = [(i[0][0], i[1][0]) for i in r if cleantitle.get(i[1][0]) == cleantitle.get(clean_title)] url = r[0][0] except: pass data = scraper.get(url).content data = client.parseDOM(data, 'div', attrs={'id': 'details'}) data = zip(client.parseDOM(data, 'a'), client.parseDOM(data, 'a', ret='href')) url = [(i[0], i[1]) for i in data if i[0] == str(int(episode))] return url[0][1] except: return
def __init__(self): self.priority = 1 self.language = ['en'] self.domains = ['123hbo.com'] self.base_link = 'http://www0.123hbo.com' self.search_link = '/search-movies/%s.html' self.scraper = cfscrape.create_scraper()
def __init__(self): self.priority = 1 self.language = ['en'] self.domains = ['seehd.pl'] self.base_link = 'http://www.seehd.pl' self.search_link = '/%s-%s-watch-online/' self.scraper = cfscrape.create_scraper()
def movie(self, imdb, title, localtitle, aliases, year): try: scraper = cfscrape.create_scraper() url = {'imdb': imdb, 'title': title, 'year': year} return url except: return
def sources(self, url, hostDict, hostprDict): try: sources = [] scraper = cfscrape.create_scraper() r = scraper.get(url).content try: match = re.compile( '<iframe.+?src="(.+?)://(.+?)/(.+?)"').findall(r) for http, host, url in match: host = host.replace('www.', '') url = '%s://%s/%s' % (http, host, url) if 'seehd' in host: pass else: sources.append({ 'source': host, 'quality': 'HD', 'language': 'en', 'url': url, 'direct': False, 'debridonly': False }) except: return except Exception: return return sources
def __init__(self): self.priority = 1 self.language = ['en'] self.domains = ['scene-rls.com', 'scene-rls.net'] self.base_link = 'http://scene-rls.net' self.search_link = '/?s=%s&submit=Find' self.scraper = cfscrape.create_scraper()
def __init__(self): self.priority = 1 self.language = ['en'] self.domains = ['ondarewatch.com', 'dailytvfix.com'] self.base_link = 'http://www.dailytvfix.com' self.scraper = cfscrape.create_scraper()
def resolve(self, url): if self.base_link in url: scraper = cfscrape.create_scraper() url = scraper.get(url).content v = re.findall('document.write\(Base64.decode\("(.+?)"\)', url)[0] b64 = base64.b64decode(v) url = client.parseDOM(b64, 'iframe', ret='src')[0] return url
def __init__(self): self.priority = 1 self.language = ['en'] self.domains = ['123moviesfree.ws'] self.base_link = 'https://www.123moviesfree.ws' self.tv_link = '/episode/%s-season-%s-episode-%s' self.movie_link = '/%s' self.scraper = cfscrape.create_scraper()
def __init__(self): self.priority = 1 self.language = ['en'] self.domains = ['rlsbb.ru', 'rlsbb.to', 'rlsbb.com'] self.base_link = 'http://rlsbb.ru' self.search_base_link = 'http://search.rlsbb.ru' self.search_cookie = 'serach_mode=rlsbb' self.search_link = '/lib/search526049.php?phrase=%s&pindex=1&content=true' self.scraper = cfscrape.create_scraper()
def resolve(self, url): try: scraper = cfscrape.create_scraper() r = scraper.get(url).content url = re.findall('document.write.+?"([^"]*)', r)[0] url = base64.b64decode(url) url = re.findall('src="([^"]*)', url)[0] return url except Exception: return
def sources(self, url, hostDict, hostprDict): try: sources = [] scraper = cfscrape.create_scraper() r = scraper.get(url).content try: v = re.findall('document.write\(Base64.decode\("(.+?)"\)', r)[0] b64 = base64.b64decode(v) url = client.parseDOM(b64, 'iframe', ret='src')[0] try: host = re.findall( '([\w]+[.][\w]+)$', urlparse.urlparse(url.strip().lower()).netloc)[0] host = client.replaceHTMLCodes(host) host = host.encode('utf-8') sources.append({ 'source': host, 'quality': 'SD', 'language': 'en', 'url': url.replace('\/', '/'), 'direct': False, 'debridonly': False }) except: pass except: pass r = client.parseDOM(r, 'div', {'class': 'server_line'}) r = [(client.parseDOM(i, 'a', ret='href')[0], client.parseDOM(i, 'p', attrs={'class': 'server_servername'})[0]) for i in r] if r: for i in r: try: host = re.sub('Server|Link\s*\d+', '', i[1]).lower() url = i[0] host = client.replaceHTMLCodes(host) host = host.encode('utf-8') if 'other' in host: continue sources.append({ 'source': host, 'quality': 'SD', 'language': 'en', 'url': url.replace('\/', '/'), 'direct': False, 'debridonly': False }) except: pass return sources except Exception: return
def movie(self, imdb, title, localtitle, aliases, year): try: scraper = cfscrape.create_scraper() clean_title = cleantitle.geturl(title).replace('-', '+') url = urlparse.urljoin(self.base_link, (self.search_link % clean_title)) r = scraper.get(url).content r = dom_parser2.parse_dom(r, 'div', {'id': 'movie-featured'}) r = [dom_parser2.parse_dom(i, 'a', req=['href']) for i in r if i] r = [(i[0].attrs['href'], re.search('Release:\s*(\d+)', i[0].content)) for i in r if i] r = [(i[0], i[1].groups()[0]) for i in r if i[0] and i[1]] r = [(i[0], i[1]) for i in r if i[1] == year] if r[0]: url = r[0][0] return url else: return except Exception: return
def sources(self, url, hostDict, hostprDict): try: sources = [] scraper = cfscrape.create_scraper() r = scraper.get(url).content try: qual = re.compile('class="quality">(.+?)<').findall(r) print qual for i in qual: if 'HD' in i: quality = '1080p' else: quality = 'SD' match = re.compile('<iframe src="(.+?)"').findall(r) for url in match: valid, host = source_utils.is_host_valid(url, hostDict) sources.append({'source': host,'quality': quality,'language': 'en','url': url,'direct': False,'debridonly': False}) except: return except Exception: return return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] scraper = cfscrape.create_scraper() if url == None: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s s%02de%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % ( data['title'], data['year']) query = re.sub('[\\\\:;*?"<>|/ \+\']+', '-', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) #log_utils.log('\n\n\n\n\n\nquery, url: %s, %s' % (query,url)) r = scraper.get(url).content # grab the (only?) relevant div and cut off the footer r = client.parseDOM(r, "div", attrs={'class': 'entry-content'})[0] r = re.sub('shareaholic-canvas.+', '', r, flags=re.DOTALL) # gather actual <a> links then clear all <a>/<img> to prep for naked-url scan # inner text could be useful if url looks like http://somehost.com/ugly_hash_377cbc738eff a_txt = '' a_url = '' a_txt = client.parseDOM(r, "a", attrs={'href': '.+?'}) a_url = client.parseDOM(r, "a", ret="href") r = re.sub('<a .+?</a>', '', r, flags=re.DOTALL) r = re.sub('<img .+?>', '', r, flags=re.DOTALL) # check pre blocks for size and gather naked-urls size = '' pre_txt = [] pre_url = [] pres = client.parseDOM(r, "pre", attrs={'style': '.+?'}) for pre in pres: try: size = re.findall('([0-9,\.]+ ?(?:GB|GiB|MB|MiB))', pre)[0] except: pass url0 = re.findall( 'https?://[^ <"\'\s]+', pre, re.DOTALL) # bad form but works with this site txt0 = [size] * len(url0) pre_url = pre_url + url0 pre_txt = pre_txt + txt0 # we're just grabbing raw urls so there's no other info r = re.sub('<pre .+?</pre>', '', r, flags=re.DOTALL) # assume info at page top is true for all movie links, and only movie links # (and that otherwise, only <pre>'s have scrapable sizes) size = '' if not 'tvshowtitle' in data: try: size = " " + re.findall('([0-9,\.]+ ?(?:GB|GiB|MB|MiB))', r)[0] except: pass # get naked urls (after exhausting <a>'s and <pre>'s) # note: all examples use full titles in links, so we can be careful raw_url = re.findall( 'https?://[^ <"\'\s]+', r, re.DOTALL) # bad form but works with this site raw_txt = [size] * len( raw_url ) # we're just grabbing raw urls so there's no other info # combine the 3 types of scrapes pairs = zip(a_url + pre_url + raw_url, a_txt + pre_txt + raw_txt) for pair in pairs: try: url = str(pair[0]) info = re.sub( '<.+?>', '', pair[1]) #+ size # usually (??) no <span> inside # immediately abandon pairs with undesired traits # (if they stop using urls w/ titles, would need to accomodate here) if any(x in url for x in ['.rar', '.zip', '.iso']): raise Exception() if not query.lower() in re.sub('[\\\\:;*?"<>|/ \+\'\.]+', '-', url + info).lower(): raise Exception() # establish size0 for this pair: 'size' is pre-loaded for movies only... # ...but prepend 'info' to lead with more-specific sizes (from a <pre>) size0 = info + " " + size # grab first reasonable data size from size0 string try: size0 = re.findall('([0-9,\.]+ ?(?:GB|GiB|MB|MiB))', size0)[0] div = 1 if size0.endswith(('GB', 'GiB')) else 1024 size0 = float(re.sub('[^0-9\.]', '', size0)) / div size0 = '%.2f GB' % size0 except: size0 = '' pass # process through source_tools and hint with size0 quality, info = source_utils.get_release_quality(url, info) info.append(size0) info = ' | '.join(info) #log_utils.log('** pair: [%s / %s] %s' % (quality,info,url)) url = url.encode('utf-8') hostDict = hostDict + hostprDict valid, host = source_utils.is_host_valid(url, hostDict) if not valid: continue sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: pass return sources except: return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url == None: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s S%02dE%02d' % (data['tvshowtitle'], int(data['season']), int(data['episode'])) if\ 'tvshowtitle' in data else '%s %s' % (data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) scraper = cfscrape.create_scraper() r = scraper.get(url).content items = dom_parser2.parse_dom(r, 'h2') items = [ dom_parser2.parse_dom( i.content, 'a', req=['href', 'rel', 'title', 'data-wpel-link']) for i in items ] items = [(i[0].content, i[0].attrs['href']) for i in items] hostDict = hostprDict + hostDict for item in items: try: name = item[0] name = client.replaceHTMLCodes(name) scraper = cfscrape.create_scraper() r = scraper.get(item[1]).content links = dom_parser2.parse_dom( r, 'a', req=['href', 'rel', 'data-wpel-link', 'target']) links = [i.attrs['href'] for i in links] for url in links: try: if hdlr in name: fmt = re.sub( '(.+)(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*)(\.|\)|\]|\s)', '', name.upper()) fmt = re.split('\.|\(|\)|\[|\]|\s|\-', fmt) fmt = [i.lower() for i in fmt] if any( i.endswith(('subs', 'sub', 'dubbed', 'dub')) for i in fmt): raise Exception() if any(i in ['extras'] for i in fmt): raise Exception() if '1080p' in fmt: quality = '1080p' elif '720p' in fmt: quality = '720p' else: quality = 'SD' if any(i in ['dvdscr', 'r5', 'r6'] for i in fmt): quality = 'SCR' elif any(i in [ 'camrip', 'tsrip', 'hdcam', 'hdts', 'dvdcam', 'dvdts', 'cam', 'telesync', 'ts' ] for i in fmt): quality = 'CAM' info = [] if '3d' in fmt: info.append('3D') try: size = re.findall( '((?:\d+\.\d+|\d+\,\d+|\d+) (?:GB|GiB|MB|MiB))', name[2])[-1] div = 1 if size.endswith( ('GB', 'GiB')) else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size)) / div size = '%.2f GB' % size info.append(size) except: pass if any(i in ['hevc', 'h265', 'x265'] for i in fmt): info.append('HEVC') info = ' | '.join(info) if not any(x in url for x in ['.rar', '.zip', '.iso']): url = client.replaceHTMLCodes(url) url = url.encode('utf-8') host = re.findall( '([\w]+[.][\w]+)$', urlparse.urlparse( url.strip().lower()).netloc)[0] if host in hostDict: host = client.replaceHTMLCodes(host) host = host.encode('utf-8') sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: pass except: pass check = [i for i in sources if not i['quality'] == 'CAM'] if check: sources = check return sources except: return sources
def __init__(self): self.priority = 1 self.language = ['en'] self.domains = ['hdm.to'] self.base_link = 'https://hdm.to' self.scraper = cfscrape.create_scraper()
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) aliases = eval(data['aliases']) mozhdr = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3'} headers = mozhdr headers['X-Requested-With'] = 'XMLHttpRequest' self.s = cfscrape.create_scraper() if 'tvshowtitle' in data: episode = int(data['episode']) url = self.searchShow(data['tvshowtitle'], data['season'], aliases, headers) else: episode = 0 url = self.searchMovie(data['title'], data['year'], aliases, headers) headers['Referer'] = url ref_url = url mid = re.findall('-(\d*)\.',url)[0] data = {'id':mid} r = self.s.post(url, headers=headers) try: u = urlparse.urljoin(self.base_link, self.server_link % mid) r = self.s.get(u, headers=mozhdr).content r = json.loads(r)['html'] rl = client.parseDOM(r, 'div', attrs = {'class': 'pas-list'}) rh = client.parseDOM(r, 'div', attrs = {'class': 'pas-header'}) ids = client.parseDOM(rl, 'li', ret='data-id') servers = client.parseDOM(rl, 'li', ret='data-server') labels = client.parseDOM(rl, 'a', ret='title') r = zip(ids, servers, labels) rrr = zip(client.parseDOM(rh, 'li', ret='data-id'), client.parseDOM(rh, 'li', ret='class')) types = {} for rr in rrr: types[rr[0]] = rr[1] for eid in r: try: try: ep = re.findall('episode.*?(\d+).*?',eid[2].lower())[0] except: ep = 0 if (episode == 0) or (int(ep) == episode): t = str(int(time.time()*1000)) quali = source_utils.get_release_quality(eid[2])[0] if 'embed' in types[eid[1]]: url = urlparse.urljoin(self.base_link, self.embed_link % (eid[0])) xml = self.s.get(url, headers=headers).content url = json.loads(xml)['src'] valid, hoster = source_utils.is_host_valid(url, hostDict) if not valid: continue q = source_utils.check_sd_url(url) q = q if q != 'SD' else quali sources.append({'source': hoster, 'quality': q, 'language': 'en', 'url': url, 'direct': False, 'debridonly': False }) continue else: url = urlparse.urljoin(self.base_link, self.token_link % (eid[0], mid, t)) script = self.s.get(url, headers=headers).content if '$_$' in script: params = self.uncensored1(script) elif script.startswith('[]') and script.endswith('()'): params = self.uncensored2(script) elif '_x=' in script: x = re.search('''_x=['"]([^"']+)''', script).group(1) y = re.search('''_y=['"]([^"']+)''', script).group(1) params = {'x': x, 'y': y} else: raise Exception() u = urlparse.urljoin(self.base_link, self.source_link % (eid[0], params['x'], params['y'])) length = 0 count = 0 while length == 0 and count < 11: r = self.s.get(u, headers=headers).text length = len(r) if length == 0: count += 1 uri = None uri = json.loads(r)['playlist'][0]['sources'] try: uri = [i['file'] for i in uri if 'file' in i] except: try: uri = [uri['file']] except: continue for url in uri: if 'googleapis' in url: q = source_utils.check_sd_url(url) sources.append({'source': 'gvideo', 'quality': q, 'language': 'en', 'url': url, 'direct': True, 'debridonly': False}) continue valid, hoster = source_utils.is_host_valid(url, hostDict) #urls, host, direct = source_utils.check_directstreams(url, hoster) q = quali if valid: #for z in urls: if hoster == 'gvideo': direct = True try: q = directstream.googletag(url)[0]['quality'] except: pass url = directstream.google(url, ref=ref_url) else: direct = False sources.append({'source': hoster, 'quality': q, 'language': 'en', 'url': url, 'direct': direct, 'debridonly': False}) else: sources.append({'source': 'CDN', 'quality': q, 'language': 'en', 'url': url, 'direct': True, 'debridonly': False}) except: pass except: pass return sources except: return sources