def get_source(self, item_url, title, year, start_time, qual): try: #print 'PASSEDURL >>>>>>'+item_url count = 0 OPEN = client.request(item_url) frame = client.parseDOM(OPEN, 'iframe', ret='src')[0] if 'openload' in frame: count += 1 self.sources.append( {'source': 'openload', 'quality': qual, 'scraper': self.name, 'url': frame, 'direct': False}) extra_links = re.findall('''window.open\(['"]([^'"]+)['"]\).+?server:([^<]+)''', OPEN, re.DOTALL) for link, host in extra_links: if not filter_host(host.replace(' ', '')): continue link = client.replaceHTMLCodes(link).encode('utf-8') link = urlparse.urljoin(self.base_link, link) count += 1 self.sources.append({'source': host, 'quality': qual, 'scraper': self.name, 'url': link, 'direct': False}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except Exception, argument: if dev_log == 'true': error_log(self.name, argument)
def get_source(self, item_url, title, year, season, episode, start_time): try: count = 0 headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36', 'Referer': self.base_link} scraper = cfscrape.create_scraper() r = scraper.get(item_url, headers=headers).content data = client.parseDOM(r, 'tr') for item in data: qual = client.parseDOM(item, 'span', ret='class')[0] qual = qual.replace('quality_', '') link = client.parseDOM(item, 'a', ret='data-href')[0] host = link.split('//')[1].replace('www.', '') host = host.split('/')[0] if not filter_host(host): continue count += 1 self.sources.append({'source': host, 'quality': qual, 'scraper': self.name, 'url': link, 'direct': False}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season='', episode='') return self.sources except BaseException: return self.sources
def get_sources(self, episode_url, title, year, season, episode, start_time): try: links = client.request(episode_url) links = client.parseDOM(links, 'div', attrs={'class': 'll-item'}) count = 0 for link in links: data = dom.parse_dom(link, 'a')[0] host = data.content if not filter_host(host): continue count += 1 url = data.attrs['href'] self.sources.append({ 'source': host, 'quality': 'DVD', 'scraper': self.name, 'url': url, 'direct': False }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season=season, episode=episode) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): try: count = 0 start_time = time.time() search_id = urllib.quote_plus('%s %s' % (clean_search(title), year)) start_url = '%s/?s=%s' % (self.base_link, search_id) html = client.request(start_url, referer=self.base_link) match = re.compile('class="thumb".+?title="(.+?)".+?href="(.+?)">', re.DOTALL).findall(html) for name, item_url in match: if not year in name: continue if not clean_title(title) == clean_title((name.split(year)[0][:-1])): continue OPEN = client.request(item_url, referer=self.base_link) link = client.parseDOM(OPEN, 'iframe', ret='src')[0] host = link.split('//')[1].replace('www.', '') host = host.split('/')[0] if not filter_host(host): continue count += 1 self.sources.append({'source': host, 'quality': 'HD', 'scraper': self.name, 'url': link, 'direct': False}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def get_sources(self, episode_url, title, year, season, episode, start_time): #print '::::::::::::::'+episode_url try: headers = {'User_Agent':User_Agent} links = requests.get(episode_url,headers=headers,timeout=5).content LINK = re.compile('<div class="link-number".+?data-actuallink="(.+?)"',re.DOTALL).findall(links) count = 0 for final_url in LINK: #print final_url host = final_url.split('//')[1].replace('www.','') host = host.split('/')[0].lower() if not filter_host(host): continue host = host.split('.')[0].title() count +=1 if count<25: self.sources.append({'source': host,'quality': 'DVD','scraper': self.name,'url': final_url,'direct': False}) if dev_log=='true': end_time = time.time() - start_time send_log(self.name,end_time,count,title,year, season=season,episode=episode) except Exception, argument: if dev_log == 'true': error_log(self.name,argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): try: count = 0 urls = [] start_time = time.time() search_id = clean_search(title.lower()) start_url = '%s/search/?keyword=%s' %(self.base_link, urllib.quote_plus(search_id)) headers = {'User-Agent': client.agent(), 'Referer': self.base_link} scraper = cfscrape.create_scraper() html = scraper.get(start_url, headers=headers).content match = re.compile('class="ml-item".+?href="(.+?)".+?<b>(.+?)</b>.+?<b>(.+?)</b>.+?alt="(.+?)"',re.DOTALL).findall(html) for item_url1, date, res, name in match: item_url = urlparse.urljoin(self.base_link, item_url1) if not clean_title(search_id) == clean_title(name): continue OPEN = scraper.get(item_url, headers=headers).content Endlinks = re.compile('class="movie_links"><li(.+?)<h3><b class="icon-share-alt"', re.DOTALL).findall(OPEN)[0] links = re.compile('target="_blank" href="(.+?)"', re.DOTALL).findall(Endlinks) for link in links: if not link.startswith('http'): continue count += 1 host = link.split('//')[1].replace('www.', '') host = host.split('/')[0] if not filter_host(host): continue self.sources.append({'source': host, 'quality': res, 'scraper': self.name, 'url': link, 'direct': False}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def get_source(self, item_url, title, year, season, episode, debrid, start_time): try: count = 0 frames = [] frames += client.parseDOM(item_url, 'iframe', ret='src') frames += client.parseDOM(item_url, 'a', ret='href') frames += client.parseDOM(item_url, 'source', ret='src') frames += client.parseDOM(item_url, 'enclosure', ret='url') try: q = re.findall('<strong>Quality:</strong>([^<]+)', item_url, re.DOTALL)[0] if 'high' in q.lower(): qual = '720p' elif 'cam' in q.lower(): qual = 'CAM' else: qual = 'SD' except: qual = 'SD' for link in frames: if 'http://24hd.org' in link: continue if '.pl/link/' in link: continue if 'seehd.pl/d/' in link: r = self.scraper.get(link).content link = client.parseDOM(r, 'iframe', ret='src')[0] host = link.split('//')[1].replace('www.', '') host = host.split('/')[0].lower() if debrid is True: rd_domains = get_rd_domains() if host not in rd_domains: continue count += 1 self.sources.append({ 'source': host, 'quality': qual, 'scraper': self.name, 'url': link, 'direct': False, 'debridonly': True }) if not filter_host(host): continue count += 1 self.sources.append({ 'source': host, 'quality': qual, 'scraper': self.name, 'url': link, 'direct': False }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except Exception, argument: if dev_log == 'true': error_log(self.name, argument)
def get_source(self, link, title, year, season, episode, start_time): try: html = client.request(link) match = re.compile('var link_server.+?"(.+?)"', re.DOTALL).findall(html) count = 0 for link in match: if not link.startswith('https:'): link = 'http:' + link if 'vidnode' in link: if not 'load.php' in link: continue #print 'vidnodelink >>> '+link html = client.request(link) grab = re.compile("sources.+?file: '(.+?)',label: '(.+?)'", re.DOTALL).findall(html) for end_link, rez in grab: if '1080' in rez: res = '1080p' elif '720' in rez: res = '720p' else: res = 'SD' count += 1 self.sources.append({ 'source': 'Vidnode', 'quality': res, 'scraper': self.name, 'url': end_link, 'direct': False }) else: host = link.split('//')[1].replace('www.', '') host = host.split('/')[0] if not filter_host(host): continue count += 1 self.sources.append({ 'source': host, 'quality': 'SD', 'scraper': self.name, 'url': link, 'direct': False }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season=season, episode=episode) except: pass
def get_sources(self, url, title, year, season, episode, start_time): try: count = 0 url = urlparse.urljoin(self.base_link, url) if url.startswith('/') else url r = client.request(url) data = re.findall(r'\s*(eval.+?)\s*</script', r, re.DOTALL)[1] data = jsunpack.unpack(data).replace('\\', '') # https://www.primewire.ink/ajax-78583.php?slug=watch-2809620-Black-Panther&cp=7TYP4N # var rtv=\'aja\';var aa=\'x-7\';var ba=\'85\';var ca=\'83\';var da=\'.ph\';var ea=\'p?sl\';var fa=\'ug=\';var ia=\'&cp=7T\';var ja=\'YP\';var ka=\'4N\';var code=ia+ja+ka;var page=rtv+aa+ba+ca+da+ea+fa;function goml(loc){$(\'#div1\').load(domain+page+loc+code)} patern = '''rtv='(.+?)';var aa='(.+?)';var ba='(.+?)';var ca='(.+?)';var da='(.+?)';var ea='(.+?)';var fa='(.+?)';var ia='(.+?)';var ja='(.+?)';var ka='(.+?)';''' links_url = re.findall(patern, data, re.DOTALL)[0] slug = 'slug={}'.format(url.split('/')[-1]) links_url = self.base_link + [''.join(links_url)][0].replace( 'slug=', slug) links = client.request(links_url) links = client.parseDOM(links, 'tbody') #xbmc.log('@#@LINKSSSS: %s' % links, xbmc.LOGNOTICE) for link in links: try: data = [(client.parseDOM(link, 'a', ret='href')[0], client.parseDOM(link, 'span', attrs={'class': 'version_host'})[0])][0] link = urlparse.urljoin(self.base_link, data[0]) host = data[1] if not filter_host(host): continue self.sources.append({ 'source': host, 'quality': 'SD', 'scraper': self.name, 'url': link, 'direct': False }) except: pass if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season=season, episode=episode) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False): try: start_time = time.time() count = 0 search_id = clean_search(title.lower()).replace(' ', '-') sepi = 's%01de%01d' % (int(season), int(episode)) epi_link = 'https://stream2watch.mrunlock.pw/%s-%s' % (search_id, sepi) r = client.request(epi_link) if not r: return #link1 = re.compile('<p><ul.+?iframe.+?src="(.+?)".+?frameborder',re.DOTALL).findall(r) links = client.parseDOM(r, 'iframe', ret='src') for link in links: host = link.split('//')[1].replace('www.', '') host = host.split('/')[0] if not filter_host(host): continue count += 1 self.sources.append({ 'source': host, 'quality': 'DVD', 'scraper': self.name, 'url': link, 'direct': False }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season=season, episode=episode) return self.sources except Exception as argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def get_sources(self, url, title, year, season, episode, start_time): try: count = 0 url = urlparse.urljoin(self.base_link, url) if url.startswith('/') else url r = client.request(url) links = client.parseDOM(r, 'tbody') for link in links: try: data = [(client.parseDOM(link, 'a', ret='href')[0], client.parseDOM(link, 'span', attrs={'class': 'version_host'})[0])][0] link = urlparse.urljoin(self.base_link, data[0]) #xbmc.log('@#@link: %s' % link, xbmc.LOGNOTICE) host = data[1] if not filter_host(host): continue self.sources.append({ 'source': host, 'quality': 'SD', 'scraper': self.name, 'url': link, 'direct': False }) except: pass if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season=season, episode=episode) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def get_sources(self, episode_url, title, year, season, episode, start_time): try: r = client.request(episode_url) links = client.parseDOM(r, 'div', attrs={'class': 'host-link'}) try: links += client.parseDOM(r, 'IFRAME', ret='SRC')[0] except: pass count = 0 for link in links: host = client.parseDOM(link, 'span')[0] host = client.replaceHTMLCodes(host) host = host.encode('utf-8') if not filter_host(host): continue # icon('ciaHR0cDovL3d3dy5zcGVlZHZpZC5uZXQvMGZvcjBqbTYwcDdzd') # icon(\'ciaHR0cDovL3d3dy5zcGVlZHZpZC5uZXQvMGZvcjBqbTYwcDdzd\') url = re.findall('''icon\(.+?(\w+).+?\)''', link, re.DOTALL)[0] url = urlparse.urljoin(self.base_link, '/cale/%s' % url) count += 1 self.sources.append({ 'source': host, 'quality': 'DVD', 'scraper': self.name, 'url': url, 'direct': False }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season=season, episode=episode) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def get_source(self, start_url, title, year, start_time): try: #print 'PASSEDURL >>>>>>'+start_url count = 0 OPEN = client.request(start_url, timeout=5) Endlinks = re.compile('<iframe.+?src="(.+?)"', re.DOTALL).findall(OPEN) #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks) for link1 in Endlinks: #print 'scraperchk - scrape_movie - link: '+str(link1) link = 'https:' + link1 #print link+'?<<<<<<<<<<<<<<<<<<<,,,' if '1080' in link: qual = '1080p' if '720' in link: qual = '720p' else: qual = 'SD' count += 1 host = link.split('//')[1].replace('www.', '') host = host.split('/')[0].split('.')[0].title() if not filter_host(host): continue self.sources.append({ 'source': host, 'quality': qual, 'scraper': self.name, 'url': link, 'direct': False }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def get_source(self, item_url, title, year, season, episode, start_time): try: #print 'CHKMYURL >'+url rez = item_url if '1080' in rez: res = '1080p' elif '720' in rez: res = '720p' else: res = 'DVD' OPEN = client.request(item_url) #print OPEN Regexs = re.compile('<h4 style="(.+?)</h4>', re.DOTALL).findall(OPEN) #print Regexs Regex = re.compile('link=(.+?)"', re.DOTALL).findall(str(Regexs)) stream = re.compile('href="(.+?)"', re.DOTALL).findall(str(Regexs)) count = 0 for links in stream: #print links if 'video.php' in links: link = 'https://lh3.googleusercontent.com/' + links.split( '=' )[1].replace( '&s', '' ) + '=m18|User-Agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:61.0) Gecko/20100101 Firefox/61.0' #print link count += 1 self.sources.append({ 'source': 'Google', 'quality': res, 'scraper': self.name, 'url': link, 'direct': True }) elif '/openload.php?url=' in links: link = 'https://openload.co/embed/' + links.split('=')[1] #print link host = link.split('//')[1].replace('www.', '') host = host.split('/')[0].split('.')[0].title() if 'Www' not in host: count += 1 #print link #print ' ##finalurl## %s | >%s<' %(self.name,link) self.sources.append({ 'source': host, 'quality': res, 'scraper': self.name, 'url': link, 'direct': False }) for link in Regex: #print link try: link = base64.b64decode(link) except: pass host = link.split('//')[1].replace('www.', '') host = host.split('/')[0].split('.')[0].title() if not filter_host(host): continue if 'Www' not in host: count += 1 #print link #print ' ##finalurl## %s | >%s<' %(self.name,link) self.sources.append({ 'source': host, 'quality': res, 'scraper': self.name, 'url': link, 'direct': False }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season, episode) except: pass # extramovies().scrape_movie('justice league', '2017','')
def get_sources(self, url, title, year, season, episode, start_time): try: count = 0 if url is None: return self.sources r = client.request(url) frame = client.parseDOM(r, 'table', attrs={'class': 'striped'})[0] frame = client.parseDOM(frame, 'a', ret='href')[0] frame = urlparse.urljoin(self.base_link, frame) if frame.startswith('/') else frame r = client.request(frame) hash = re.findall( '''var\s*hash\s*=\s*['"]([^'"]+)''', r, re.MULTILINE)[ 0] #var hash = '9fafa6c0c1771b38a1c72a5bd893c503'; pdata = 'hash=%s&confirm_continue=I+understand%s+I+want+to+continue' % ( str(hash), '%2C') data = client.request(frame, post=pdata, referer=frame) frames = re.compile( '''vlink.+?title=['"]([^'"]+).+?href=['"]([^'"]+).+?onclick.+?>(.+?)</a''', re.M | re.DOTALL).findall(data.replace('\n', '')) #xbmc.log('@#@Frames:%s' % frames, xbmc.LOGNOTICE) for name, link, host in frames: try: host = host.replace('\xc5\x8d', 'o').replace( '\xc4\x93', 'e' ).replace('\xc4\x81', 'a').replace( '\xc4\xab', 'i' ) #.replace('\u014d', 'o').replace('\u0113', 'e').replace('\u0101', 'a').replace('\u012b', 'i') if not filter_host(host): continue count += 1 quality, info = quality_tags.get_release_quality( name, name) if quality == '4K': quality = '1080p' elif quality == '1080p' and not 'openload' in host: quality = '720p' link = urlparse.urljoin( self.base_link, link) if link.startswith('/') else link self.sources.append({ 'source': host, 'quality': quality, 'scraper': self.name, 'url': link, 'direct': False }) except: pass if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season=season, episode=episode) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def get_source(self, m_url, title, year, season, episode, start_time): #import xbmc try: hdlr = 'S%02dE%02d' % (int(season), int(episode)) if not season == '' else year r = client.request(m_url) if not hdlr in m_url.upper(): quality = client.parseDOM(r, 'h4')[0] regex = '<p>\s*%s\s*</p>(.+?)</ul>' % hdlr data = re.search(regex, r, re.DOTALL | re.I).groups()[0] frames = client.parseDOM(data, 'a', ret='href') else: data = client.parseDOM(r, 'div', attrs={'class': 'entry-content'})[0] data = re.compile('<h4>(.+?)</h4>(.+?)</ul>', re.DOTALL).findall(data) #xbmc.log('DATAAAA:%s' % data, xbmc.LOGNOTICE) frames = [] for qual, links in data: quality = qual frames += client.parseDOM(links, 'a', ret='href') for link in frames: host = link.split('//')[1].replace('www.', '') host = host.split('/')[0] if not filter_host(host): continue if 'filebebo' in link: continue rez, info = quality_tags.get_release_quality(quality, link) if '1080p' in rez and not host.lower() in [ 'openload', 'oload' ]: rez = '720p' elif '720p' in quality and not host.lower() in [ 'openload', 'oload' ]: rez = 'SD' else: rez, info = quality_tags.get_release_quality(link, link) self.count += 1 self.sources.append({ 'source': host, 'quality': rez, 'scraper': self.name, 'url': link, 'direct': False }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, self.count, title, year, season=season, episode=episode) except: pass