def sources(self, url, hostDict, hostprDict): try: sources = [] if url == None: return sources hostDict += ['akamaized.net', 'google.com', 'picasa.com', 'blogspot.com'] result = self.scraper.get(url, timeout=10).content dom = dom_parser.parse_dom(result, 'a', req='data-video') urls = [ i.attrs['data-video'] if i.attrs['data-video'].startswith('https') else 'https:' + i.attrs['data-video'] for i in dom] for url in urls: dom = [] if 'vidnode.net' in url: result = self.scraper.get(url, timeout=10).content dom = dom_parser.parse_dom(result, 'source', req=['src', 'label']) dom = [(i.attrs['src'] if i.attrs['src'].startswith('https') else 'https:' + i.attrs['src'], i.attrs['label']) for i in dom if i] elif 'ocloud.stream' in url: result = self.scraper.get(url, timeout=10).content base = re.findall('<base href="([^"]+)">', result)[0] hostDict += [base] dom = dom_parser.parse_dom(result, 'a', req=['href', 'id']) dom = [(i.attrs['href'].replace('./embed', base + 'embed'), i.attrs['id']) for i in dom if i] dom = [(re.findall("var\s*ifleID\s*=\s*'([^']+)", client.request(i[0]))[0], i[1]) for i in dom if i] if dom: try: for r in dom: valid, hoster = source_utils.is_host_valid(r[0], hostDict) if not valid: continue quality = source_utils.label_to_quality(r[1]) urls, host, direct = source_utils.check_directstreams(r[0], hoster) for x in urls: if direct: size = source_utils.get_size(x['url']) if size: sources.append( {'source': host, 'quality': quality, 'language': 'en', 'url': x['url'], 'direct': direct, 'debridonly': False, 'info': size}) else: sources.append( {'source': host, 'quality': quality, 'language': 'en', 'url': x['url'], 'direct': direct, 'debridonly': False}) except: pass else: valid, hoster = source_utils.is_host_valid(url, hostDict) if not valid: continue try: url.decode('utf-8') sources.append( {'source': hoster, 'quality': 'SD', 'language': 'en', 'url': url, 'direct': False, 'debridonly': False}) except: pass return sources except: return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources url = urlparse.urljoin(self.base_link, url) for i in range(3): result = self.scraper.get(url).content if not result is None: break links = re.compile('onclick="report\(\'([^\']+)').findall(result) for link in links: try: valid, hoster = source_utils.is_host_valid(link, hostDict) if not valid: continue urls, host, direct = source_utils.check_directstreams( link, hoster) if source_utils.limit_hosts() is True and host in str( sources): continue for x in urls: sources.append({ 'source': host, 'quality': x['quality'], 'language': 'en', 'url': x['url'], 'direct': direct, 'debridonly': False }) except: pass return sources except: return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if not url: return sources data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) aliases = eval(data['aliases']) headers = {} title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] year = data['year'] if 'tvshowtitle' in data: episode = data['episode'] season = data['season'] url = self._search(title, year, aliases, headers) # url = url.replace('online-free', 'season-%s-episode-%s-online-free' % (season, episode)) url = url.rstrip('/') + '-s%se%s' % (season, episode) else: episode = None year = data['year'] url = self._search(data['title'], data['year'], aliases, headers) url = url if 'http' in url else urljoin(self.base_link, url) # log_utils.log('url = %s' % url, __name__, log_utils.LOGDEBUG) result = client.request(url); result = client.parseDOM(result, 'li', attrs={'class':'link-button'}) links = client.parseDOM(result, 'a', ret='href') i = 0 for l in links: if i == 15: break try: l = l.split('=')[1] l = urljoin(self.base_link, self.video_link % l) result = client.request(l, post={}, headers={'Referer':url}) u = result if 'http' in result else 'http:' + result if ' href' in u: u = u.replace('\r', '').replace('\n', '').replace('\t', '') u = 'http:' + re.compile(r" href='(.+?)'").findall(u)[0] if 'google' in u: valid, hoster = source_utils.is_host_valid(u, hostDict) urls, host, direct = source_utils.check_directstreams(u, hoster) for x in urls: sources.append({'source': host, 'quality': x['quality'], 'language': 'en', 'url': x['url'], 'direct': direct, 'debridonly': False}) else: valid, hoster = source_utils.is_host_valid(u, hostDict) if not valid: continue try: u = u.decode('utf-8') except: u = source_utils.strip_non_ascii_and_unprintable(u) sources.append({'source': hoster, 'quality': '720p', 'language': 'en', 'url': u, 'direct': False, 'debridonly': False}) i += 1 except: source_utils.scraper_error('5MOVIES') pass return sources except: source_utils.scraper_error('5MOVIES') return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url == None: return sources url = urlparse.urljoin(self.base_link, url) for i in range(3): result = client.request(url, timeout=10) if not result == None: break dom = dom_parser.parse_dom(result, 'div', attrs={ 'class': 'links', 'id': 'noSubs' }) result = dom[0].content links = re.compile( '<tr\s*>\s*<td><i\s+class="fa fa-youtube link-logo"></i>([^<]+).*?href="([^"]+)"\s+class="watch', re.DOTALL).findall(result) for link in links[:5]: try: url2 = urlparse.urljoin(self.base_link, link[1]) for i in range(2): result2 = client.request(url2, timeout=3) if not result2 == None: break r = re.compile('href="([^"]+)"\s+class="action-btn' ).findall(result2)[0] valid, hoster = source_utils.is_host_valid(r, hostDict) if not valid: continue #log_utils.log('JairoxDebug1: %s - %s' % (url2,r), log_utils.LOGDEBUG) urls, host, direct = source_utils.check_directstreams( r, hoster) for x in urls: sources.append({ 'source': host, 'quality': x['quality'], 'language': 'en', 'url': x['url'], 'direct': direct, 'debridonly': False }) except: #traceback.print_exc() pass #log_utils.log('JairoxDebug2: %s' % (str(sources)), log_utils.LOGDEBUG) return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if not url: return sources url = urlparse.urljoin(self.base_link, url) r = client.request(url) rels = dom_parser.parse_dom(r, 'nav', attrs={'class': 'player'}) rels = dom_parser.parse_dom(rels, 'ul', attrs={'class': 'idTabs'}) rels = dom_parser.parse_dom(rels, 'li') rels = [(dom_parser.parse_dom(i, 'a', attrs={'class': 'options'}, req='href'), dom_parser.parse_dom(i, 'img', req='src')) for i in rels] rels = [(i[0][0].attrs['href'][1:], re.findall('/flags/(\w+)\.png$', i[1][0].attrs['src'])) for i in rels if i[0] and i[1]] rels = [i[0] for i in rels if len(i[1]) > 0 and i[1][0].lower() == 'de'] r = [dom_parser.parse_dom(r, 'div', attrs={'id': i}) for i in rels] r = [(re.findall('link"?\s*:\s*"(.+?)"', ''.join([x.content for x in i])), dom_parser.parse_dom(i, 'iframe', attrs={'class': 'metaframe'}, req='src')) for i in r] r = [i[0][0] if i[0] else i[1][0].attrs['src'] for i in r if i[0] or i[1]] for i in r: try: i = re.sub('\[.+?\]|\[/.+?\]', '', i) i = client.replaceHTMLCodes(i) if not i.startswith('http'): i = self.__decode_hash(i) if 'play.seriesever' in i: i = client.request(i) i = dom_parser.parse_dom(i, 'iframe', req='src') if len(i) < 1: continue i = i[0].attrs['src'] valid, host = source_utils.is_host_valid(i, hostDict) if not valid: continue urls, host, direct = source_utils.check_directstreams(i, host) for x in urls: sources.append( {'source': host, 'quality': x['quality'], 'language': 'de', 'url': x['url'], 'direct': direct, 'debridonly': False}) except: pass return sources except: return sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) aliases = eval(data['aliases']) headers = {} title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = data['year'] if 'tvshowtitle' in data: episode = data['episode'] season = data['season'] url = self._search(title, data['year'], aliases, headers) url = url.replace( 'online-free', 'season-%s-episode-%s-online-free' % (season, episode)) else: episode = None year = data['year'] url = self._search(data['title'], data['year'], aliases, headers) url = url if 'http' in url else urlparse.urljoin( self.base_link, url) result = client.request(url) result = client.parseDOM(result, 'li', attrs={'class': 'link-button'}) links = client.parseDOM(result, 'a', ret='href') i = 0 for l in links: if i == 10: break try: l = l.split('=')[1] l = urlparse.urljoin(self.base_link, self.video_link % l) result = client.request(l, post={}, headers={'Referer': url}) u = result if 'http' in result else 'http:' + result if 'google' in u: valid, hoster = source_utils.is_host_valid(u, hostDict) urls, host, direct = source_utils.check_directstreams( u, hoster) for x in urls: sources.append({ 'source': host, 'quality': x['quality'], 'language': 'en', 'url': x['url'], 'direct': direct, 'debridonly': False }) else: valid, hoster = source_utils.is_host_valid(u, hostDict) if not valid: continue try: u.decode('utf-8') sources.append({ 'source': hoster, 'quality': 'SD', 'language': 'en', 'url': u, 'direct': False, 'debridonly': False }) i += 1 except: pass except: pass return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: return sources url = urlparse.urljoin(self.base_link, url) cookie = self.__get_premium_cookie() r = client.request(url, mobile=True, cookie=cookie) query = urlparse.urljoin(self.base_link, self.part_link) id = re.compile('var\s*video_id\s*=\s*"(\d+)"').findall(r)[0] p = dom_parser.parse_dom(r, 'a', attrs={'class': 'changePart', 'data-part': re.compile('\d+p')}, req='data-part') for i in p: i = i.attrs['data-part'] p = urllib.urlencode({'video_id': id, 'part_name': i, 'page': '0'}) p = client.request(query, cookie=cookie, mobile=True, XHR=True, post=p, referer=url) p = json.loads(p) p = p.get('part_count', 0) for part_count in range(0, p): try: r = urllib.urlencode({'video_id': id, 'part_name': i, 'page': part_count}) r = client.request(query, cookie=cookie, mobile=True, XHR=True, post=r, referer=url) r = json.loads(r) r = r.get('part', {}) s = r.get('source', '') url = r.get('code', '') if s == 'url' and 'http' not in url: url = self.__decode_hash(url) elif s == 'other': url = dom_parser.parse_dom(url, 'iframe', req='src') if len(url) < 1: continue url = url[0].attrs['src'] if '/old/seframer.php' in url: url = self.__get_old_url(url) if 'keepup' in url: print url # needs to be fixed (keepup.gq) elif self.domains[0] in url: url = re.search('(?<=id=).*$', url).group() url = 'https://drive.google.com/file/d/' + url valid, host = source_utils.is_host_valid(url, hostDict) if not valid: continue if i in ['720p', 'HD']: quali = 'HD' elif i in ['1080p', '1440p']: quali = i elif i in ['2160p']: quali = '4K' else: quali = 'SD' urls, host, direct = source_utils.check_directstreams(url, host, quali) for i in urls: sources.append({'source': host, 'quality': i['quality'], 'language': 'de', 'url': i['url'], 'direct': direct, 'debridonly': False}) except: pass return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if not url: return sources url = urlparse.urljoin(self.base_link, url) r = client.request(url, output='extended') headers = r[3] headers.update({ 'Cookie': r[2].get('Set-Cookie'), 'Referer': self.base_link }) r = r[0] rels = dom_parser.parse_dom(r, 'nav', attrs={'class': 'player'}) rels = dom_parser.parse_dom(rels, 'ul', attrs={'class': 'idTabs'}) rels = dom_parser.parse_dom(rels, 'li') rels = [(dom_parser.parse_dom(i, 'a', attrs={'class': 'options'}, req='href'), dom_parser.parse_dom(i, 'img', req='src')) for i in rels] rels = [(i[0][0].attrs['href'][1:], re.findall('/flags/(\w+)\.png$', i[1][0].attrs['src'])) for i in rels if i[0] and i[1]] rels = [ i[0] for i in rels if len(i[1]) > 0 and i[1][0].lower() == 'de' ] r = [dom_parser.parse_dom(r, 'div', attrs={'id': i}) for i in rels] links = re.findall('''(?:link|file)["']?\s*:\s*["'](.+?)["']''', ''.join([i[0].content for i in r])) links += [ l.attrs['src'] for i in r for l in dom_parser.parse_dom( i, 'iframe', attrs={'class': 'metaframe'}, req='src') ] links += [ l.attrs['src'] for i in r for l in dom_parser.parse_dom(i, 'source', req='src') ] for i in links: try: i = re.sub('\[.+?\]|\[/.+?\]', '', i) i = client.replaceHTMLCodes(i) if '/play/' in i: i = urlparse.urljoin(self.base_link, i) if self.domains[0] in i: i = client.request(i, headers=headers, referer=url) for x in re.findall('''\(["']?(.*)["']?\)''', i): try: i += jsunpack.unpack( base64.decodestring( re.sub('"\s*\+\s*"', '', x))).replace('\\', '') except: pass for x in re.findall('(eval\s*\(function.*?)</script>', i, re.DOTALL): try: i += jsunpack.unpack(x).replace('\\', '') except: pass links = [(match[0], match[1]) for match in re.findall( '''['"]?file['"]?\s*:\s*['"]([^'"]+)['"][^}]*['"]?label['"]?\s*:\s*['"]([^'"]*)''', i, re.DOTALL)] links = [(x[0].replace('\/', '/'), source_utils.label_to_quality(x[1])) for x in links if '/no-video.mp4' not in x[0]] doc_links = [ directstream.google( 'https://drive.google.com/file/d/%s/view' % match) for match in re.findall( '''file:\s*["'](?:[^"']+youtu.be/([^"']+))''', i, re.DOTALL) ] doc_links = [(u['url'], u['quality']) for x in doc_links if x for u in x] links += doc_links for url, quality in links: if self.base_link in url: url = url + '|Referer=' + self.base_link sources.append({ 'source': 'gvideo', 'quality': quality, 'language': 'de', 'url': url, 'direct': True, 'debridonly': False }) else: try: # as long as URLResolver get no Update for this URL (So just a Temp-Solution) did = re.findall( 'youtube.googleapis.com.*?docid=(\w+)', i) if did: i = 'https://drive.google.com/file/d/%s/view' % did[ 0] valid, host = source_utils.is_host_valid( i, hostDict) if not valid: continue urls, host, direct = source_utils.check_directstreams( i, host) for x in urls: sources.append({ 'source': host, 'quality': x['quality'], 'language': 'de', 'url': x['url'], 'direct': direct, 'debridonly': False }) except: pass except: pass return sources except: return sources
def sources(self, url, hostDict, hostprDict): #log_utils.log('\n\n~~~ incoming sources() url') #log_utils.log(url) try: sources = [] if url == None: return sources req = urlparse.urljoin(self.base_link, url) # three attempts to pull up the episode-page, then bail for i in range(4): result = client.request(req, timeout=3) if not result == None: break # get the key div's contents # then get all the links along with preceding text hinting at host # ep pages sort links by hoster which is bad if the top hosters # are unavailable for debrid OR if they're ONLY avail for debrid # (for non-debrid peeps) so shuffle the list dom = dom_parser.parse_dom(result, 'div', attrs={ 'class': 'links', 'id': 'noSubs' }) result = dom[0].content links = re.compile( '<i class="fa fa-youtube link-logo"></i>([^<]+).*?href="([^"]+)"\s+class="watch', re.DOTALL).findall(result) random.shuffle(links) # Here we stack the deck for debrid users by copying # all debrid hosts to the top of the list # This is ugly but it works. Someone else please make it cleaner? if debrid.status() == True: debrid_links = [] for pair in links: for r in debrid.debrid_resolvers: if r.valid_url('', pair[0].strip()): debrid_links.append(pair) links = debrid_links + links # master list of hosts ResolveURL and placenta itself can resolve # we'll check against this list to not waste connections on unsupported hosts hostDict = hostDict + hostprDict conns = 0 for pair in links: # try to be a little polite, and limit connections # (unless we're not getting sources) if conns > self.max_conns and len(sources) > self.min_srcs: break # the 2 groups from the link search = hoster name, episode page url host = pair[0].strip() link = pair[1] # check for valid hosts and jump to next loop if not valid valid, host = source_utils.is_host_valid(host, hostDict) #log_utils.log("\n\n** conn #%s: %s (valid:%s) %s" % (conns,host,valid,link)) ####### if not valid: continue # two attempts per source link, then bail # NB: n sources could potentially cost n*range connections!!! link = urlparse.urljoin(self.base_link, link) for i in range(2): result = client.request(link, timeout=3) conns += 1 if not result == None: break # if both attempts failed, using the result will too, so bail to next loop try: link = re.compile('href="([^"]+)"\s+class="action-btn' ).findall(result)[0] except: continue # I don't think this scraper EVER has direct links, but... # (if nothing else, it sets the quality) try: u_q, host, direct = source_utils.check_directstreams( link, host) except: continue # check_directstreams strangely returns a list instead of a single 2-tuple link, quality = u_q[0]['url'], u_q[0]['quality'] #log_utils.log(' checked host: %s' % host) #log_utils.log(' checked direct: %s' % direct) #log_utils.log(' quality, link: %s, %s' % (quality,link)) #log_utils.log(' # of urls: %s' % len(u_q)) sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': link, 'direct': direct, 'debridonly': False }) return sources except: failure = traceback.format_exc() log_utils.log('WATCHSERIES - Exception: \n' + str(failure)) return sources