def get_sources(self, episode_url, title, year, season, episode, start_time): #print '::::::::::::::'+episode_url try: headers = {'User_Agent':User_Agent} links = requests.get(episode_url,headers=headers,timeout=5).content LINK = re.compile('<div class="link-number".+?data-actuallink="(.+?)"',re.DOTALL).findall(links) count = 0 for final_url in LINK: #print final_url host = final_url.split('//')[1].replace('www.','') host = host.split('/')[0].lower() if not filter_host(host): continue host = host.split('.')[0].title() count +=1 if count<25: self.sources.append({'source': host,'quality': 'DVD','scraper': self.name,'url': final_url,'direct': False}) if dev_log=='true': end_time = time.time() - start_time send_log(self.name,end_time,count,title,year, season=season,episode=episode) except Exception, argument: if dev_log == 'true': error_log(self.name,argument) return self.sources
def get_source(self, start_url, title, year, season, episode, start_time): try: #print 'URL PASSED OKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK'+start_url count = 0 headers = {'User-Agent': client.agent()} r = client.request(start_url, headers=headers) #print r Endlinks = re.compile( 'class="nobr center">(.+?)</span></td>.+?title="Torrent magnet link" href="(.+?)".+?class="cellMainLink">(.+?)</a>', re.DOTALL).findall(r) #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks) for size, Magnet, qual in Endlinks: Magnet = Magnet.replace('https://mylink.cx/?url=', '') Magnet = Magnet.replace('%3A', ':').replace('%3F', '?').replace( '%3D', '=').split('%26dn')[0] print Magnet + '<><><><><>' qual = quality_tags.get_release_quality(qual, None)[0] count += 1 self.sources.append({ 'source': 'Torrent', 'quality': qual + ' ' + size, 'scraper': self.name, 'url': Magnet, 'direct': False, 'debridonly': True }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return []
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False): try: start_time = time.time() hdlr = 'S%02dE%02d' % (int(season), int(episode)) query = clean_search(title) query = urllib.quote_plus(query + ' ' + hdlr) urls = [] for link in self.search_links: try: url = urlparse.urljoin(self.base_link, link % query) url = urlparse.urljoin(self.base_link, url) r = client.request(url) posts = client.parseDOM(r, 'tbody') posts = client.parseDOM(posts, 'tr') urls += [(client.parseDOM(i, 'button', ret='data-clipboard-text')[0]) for i in posts if i] except: pass count = 0 for url in urls: name = url.split('/')[-1].lower() name = client.replaceHTMLCodes(name).replace('%20', '') if 'movies' in url: continue if any(x in url for x in ['italian', 'dubbed', 'teaser', 'subs', 'sub', 'dub', 'samples', 'extras', 'french', 'trailer', 'trailers', 'sample']): continue t = re.sub('(\.|\(|\[|\s)(S\d+E\d+|S\d+)(\.|\)|\]|\s)(.+|)', '', name, flags=re.I) if clean_title(t) not in clean_title(title): continue y = re.findall('[\.|\(|\[|\s](S\d+E\d+|S\d+)[\.|\)|\]|\s]', name, re.I)[-1].upper() if not y == hdlr: continue res, info = quality_tags.get_release_quality(name, url) if any(x in url for x in ['hastidl', '1tehmovies', '62.210.103.107', '79.127', '213.32.113.82', 'dl5.downloadha', '89.163.255.42', '185.56.20.142', 's1.0music', 'dl3.yoozdl', 'dl4.lavinmovie.net', 'dl6.lavinmovie.net', 'dl3.upload08.com', 'dl8.uploadt.com', '163.172.6.218', 'samba.allunix.ru', 'server417']): count += 1 url += '|User-Agent=%s&Referer=%s' % (client.agent(), self.base_link) url = urllib.quote(url, '|:?/&+=_-') self.sources.append( {'source': 'DirectLink', 'quality': res, 'scraper': self.name, 'url': url, 'direct': True}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources #filepursuit().scrape_movie('Black Panther', '2018', '')
def get_source(self, item_url, title, year, start_time): try: count = 0 data = requests.get(item_url).json() for item in data: title = item["title"]["rendered"] content = item["content"]["rendered"] year2 = item["date"][:4] if int(year) != int(year2): continue #Links = re.findall(r"(http.*streamango.com\/embed\/\w{1,}|https:\/\/openload\.co\/embed\/\w{1,}\/)",content) Links = client.parseDOM(content, 'iframe', ret='src') for link in Links: count += 1 host = link.split('//')[1].replace('www.', '') host = host.split('/')[0].split('.')[0].title() label = "DVD" self.sources.append({ 'source': host, 'quality': label, 'scraper': self.name, 'url': link, 'direct': False }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except Exception, argument: if dev_log == 'true': error_log(self.name, argument)
def scrape_movie(self, title, year, imdb, debrid=False): try: count = 0 start_time = time.time() search_id = urllib.quote_plus('%s %s' % (clean_search(title), year)) start_url = '%s/?s=%s' % (self.base_link, search_id) html = client.request(start_url, referer=self.base_link) match = re.compile('class="thumb".+?title="(.+?)".+?href="(.+?)">', re.DOTALL).findall(html) for name, item_url in match: if not year in name: continue if not clean_title(title) == clean_title((name.split(year)[0][:-1])): continue OPEN = client.request(item_url, referer=self.base_link) link = client.parseDOM(OPEN, 'iframe', ret='src')[0] host = link.split('//')[1].replace('www.', '') host = host.split('/')[0] if not filter_host(host): continue count += 1 self.sources.append({'source': host, 'quality': 'HD', 'scraper': self.name, 'url': link, 'direct': False}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def get_source(self, item_url, title, year, start_time, qual): try: #print 'PASSEDURL >>>>>>'+item_url count = 0 OPEN = client.request(item_url) frame = client.parseDOM(OPEN, 'iframe', ret='src')[0] if 'openload' in frame: count += 1 self.sources.append( {'source': 'openload', 'quality': qual, 'scraper': self.name, 'url': frame, 'direct': False}) extra_links = re.findall('''window.open\(['"]([^'"]+)['"]\).+?server:([^<]+)''', OPEN, re.DOTALL) for link, host in extra_links: if not filter_host(host.replace(' ', '')): continue link = client.replaceHTMLCodes(link).encode('utf-8') link = urlparse.urljoin(self.base_link, link) count += 1 self.sources.append({'source': host, 'quality': qual, 'scraper': self.name, 'url': link, 'direct': False}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except Exception, argument: if dev_log == 'true': error_log(self.name, argument)
def get_source(self, item_url, title, year, season, episode, start_time): try: count = 0 headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36', 'Referer': self.base_link} scraper = cfscrape.create_scraper() r = scraper.get(item_url, headers=headers).content data = client.parseDOM(r, 'tr') for item in data: qual = client.parseDOM(item, 'span', ret='class')[0] qual = qual.replace('quality_', '') link = client.parseDOM(item, 'a', ret='data-href')[0] host = link.split('//')[1].replace('www.', '') host = host.split('/')[0] if not filter_host(host): continue count += 1 self.sources.append({'source': host, 'quality': qual, 'scraper': self.name, 'url': link, 'direct': False}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season='', episode='') return self.sources except BaseException: return self.sources
def scrape_music(self, title, artist, debrid=False): try: song_search = clean_title(title.lower()).replace(' ','+') artist_search = clean_title(artist.lower()).replace(' ','+') start_url = '%sresults?search_query=%s+%s' %(self.base_link,artist_search,song_search) html = requests.get(start_url, headers=headers, timeout=20).content match = re.compile('<h4 class="card-title">.+?</i>(.+?)</h4>.+?id="(.+?)"',re.DOTALL).findall(html) count = 0 for m, link in match: match4 = m.replace('\n','').replace('\t','').replace(' ',' ').replace(' ',' ').replace(' ',' ').replace(' ',' ') match5 = re.sub('&#(\d+);', '', match4) match5 = re.sub('(&#[0-9]+)([^;^0-9]+)', '\\1;\\2', match5) match5 = match5.replace('"', '\"').replace('&', '&') match5 = re.sub('\\\|/|\(|\)|\[|\]|\{|\}|-|:|;|\*|\?|"|\'|<|>|\_|\.|\?', ' ', match5) match5 = ' '.join(match5.split()) match2 = m.replace('\n','').replace('\t','').replace(' ','') if clean_title(title).lower() in clean_title(match2).lower(): if clean_title(artist).lower() in clean_title(match2).lower(): final_link = 'https://www.youtube.com/watch?v='+link count +=1 self.sources.append({'source':match5, 'quality':'SD', 'scraper':self.name, 'url':final_link, 'direct': False}) if dev_log=='true': end_time = time.time() - self.start_time send_log(self.name,end_time,count) return self.sources except Exception, argument: return self.sources
def get_source(self,url,title,year,season,episode,start_time): try: headers = {'User-Agent': client.agent()} OPEN = client.request(url, headers=headers) holder = client.parseDOM(OPEN, 'div', attrs={'class':'bwa-content'})[0] holder = client.parseDOM(holder, 'a', ret='href')[0] links = client.request(holder, headers=headers) Regex = client.parseDOM(links, 'iframe', ret='src', attrs={'class': 'metaframe rptss'}) count = 0 for link in Regex: if 'player.php' in link: link = client.request(link, headers=headers, output='geturl') qual = client.request(link, headers=headers) qual = client.parseDOM(qual, 'meta', ret='content')[0] else: link = link host = link.split('//')[1].replace('www.','') host = host.split('/')[0].split('.')[0].title() if '1080p' in qual: rez = '1080p' elif '720p' in qual: rez = '720p' else: rez = 'SD' count += 1 self.sources.append({'source': host, 'quality': rez, 'scraper': self.name, 'url': link, 'direct': False}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season=season, episode=episode) except: pass #movie4u().scrape_movie('Wonder Woman', '2017','') #movie4u().scrape_episode('Suits','2011','','8','5','','')
def get_source(self, item_url, title, year, start_time, qual): try: #print 'PASSEDURL >>>>>>'+item_url count = 0 headers = {'User-Agent': client.agent()} OPEN = client.request(item_url, headers=headers) Endlinks = re.compile('<iframe src="(.+?)"', re.DOTALL).findall(OPEN) #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks) for link in Endlinks: #print 'scraperchk - scrape_movie - link: '+str(link) count += 1 host = link.split('//')[1].replace('www.', '') host = host.split('/')[0].split('.')[0].title() self.sources.append({ 'source': host, 'quality': qual, 'scraper': self.name, 'url': link, 'direct': False }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources #movienolimit().scrape_movie('Upgrade', '2018', '')
def get_source(self,url,title,year,season,episode,start_time): try: #print 'URL PASSED OK'+url count = 0 headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) Endlinks=re.compile("<tr id=.+?a href='(.+?)'.+?class='quality'>(.+?) BR<",re.DOTALL).findall(r) #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks) for link1,qual in Endlinks: #link1=link1.replace('#038;','&') #print link1+qual+">>>>>>>>>>" headers = {'User-Agent': client.agent()} r = client.request(link1, headers=headers) #print r Endlinks1=re.compile('id="link".+?href="(.+?)"',re.DOTALL).findall(r) for link in Endlinks1: #print 'scraperchk - scrape_movie - link: '+str(link) count+=1 host = link.split('//')[1].replace('www.','') host = host.split('/')[0].split('.')[0].title() self.sources.append({'source':host, 'quality':qual, 'scraper':self.name, 'url':link, 'direct':False}) if dev_log=='true': end_time = time.time() - start_time send_log(self.name,end_time,count,title,year) except Exception, argument: if dev_log=='true': error_log(self.name,argument) return[]
def get_source(self, start_url, title, year, season, episode, start_time): try: #print 'URL PASSED OKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK'+start_url count = 0 headers = {'User-Agent': client.agent()} r = client.request(start_url, headers=headers) #print r Endlinks = re.compile( 'class="iaconbox center floatright".+?title="Torrent magnet link" href="(.+?)">.+?class="cellMainLink">(.+?)</a>.+?class="nobr center">(.+?)</span></td>', re.DOTALL).findall(r) #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks) for Magnet, quality, size in Endlinks: Magnet = Magnet.replace('https://mylink.me.uk/?url=', '') qual = quality_tags.check_sd_url(quality) #print Magnet + '<><><><><>' count += 1 self.sources.append({ 'source': 'Torrent', 'quality': size + ' ' + qual, 'scraper': self.name, 'url': Magnet, 'direct': False, 'debridonly': True }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return []
def check_for_play(self, link, title, year, season, episode, start_time): try: #print 'Pass url '+ link frame_page = requests.get(link).content links = re.compile('class="playlist".+?src="(.+?)"', re.DOTALL).findall(frame_page) count = 0 for url in links: url = url.replace( 'videozoo.me/embed.php', 'videozoo.me/videojs/').replace( 'playbb.me/embed.php', 'playbb.me/new/').replace( 'easyvideo.me/gogo/', 'easyvideo.me/gogo/new/').replace( 'play44.net/embed.php', 'play44.net/new/').replace('&file=', '&vid=') host = url.split('//')[1].replace('www.', '') host = host.split('/')[0].split('.')[0].title() url = self.resolve(url) count += 1 self.sources.append({ 'source': host, 'quality': 'SD', 'scraper': self.name, 'url': url, 'direct': True }) #print 'PASSED for PLAY '+url if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season, episode) except: pass
def scrape_movie(self, title, year, imdb, debrid=False): count = 0 try: start_time = time.time() search_id = '%s %s' % (clean_search(title), year) start_url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(search_id)) headers = {'User-Agent': client.agent()} html = requests.get(start_url, headers=headers, timeout=5).content posts = client.parseDOM(html, 'item') posts = [(client.parseDOM(i, 'title')[0], client.parseDOM(i, 'a', ret='href')) for i in posts if i] posts = [i[1] for i in posts if clean_title(i[0]) == clean_title(title)][0] for url in posts: if 'cmovies' in url: continue link = 'https:' + url if url.startswith('//') else url if '1080' in link: qual = '1080p' elif '720' in link: qual = '720p' else: qual = 'SD' host = url.split('//')[1].replace('www.', '') host = host.split('/')[0].split('.')[0].title() count += 1 self.sources.append({'source': host, 'quality': qual, 'scraper': self.name, 'url': link, 'direct': False}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): if int(year) > 1980: return self.sources try: start_time = time.time() query = urllib.quote_plus(clean_search(title.lower())) start_url = urlparse.urljoin(self.base_link, self.search_link % query) headers = {'User-Agent': client.agent(), 'Referer': self.base_link} count = 0 html = client.request(start_url, headers=headers) posts = client.parseDOM(html, 'div', attrs={'class': 'post'}) posts = [(dom_parser.parse_dom(i, 'a', req='href')[0]) for i in posts if i] posts = [(i.attrs['href'], i.content) for i in posts if i] post = [(i[0]) for i in posts if clean_title(i[1]) == clean_title(title)][0] r = client.request(post, headers=headers) y = client.parseDOM(r, 'h1')[0] if not year in y: return self.sources links = client.parseDOM(r, 'source', ret='src') link = [i for i in links if i.endswith('mp4')][0] link += '|User-Agent=%s&Referer=%s' % (client.agent(), post) link = urllib.quote(link, ':/-_|&+=') count += 1 self.sources.append({'source': 'bnw', 'quality': 'SD', 'scraper': self.name, 'url': link, 'direct': True}) if dev_log=='true': end_time = time.time() - start_time send_log(self.name,end_time,count,title,year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name,argument) return self.sources
def get_source(self, start_url, title, year, season, episode, start_time): try: #print 'URL PASSED OKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK'+start_url count = 0 headers = {'User-Agent': client.agent()} r = client.request(start_url, headers=headers) #print r Endlinks = re.compile( 'class="text-muted3 smaller pad-l2".+?style="color:green"></i>(.+?)</span>.+?rel="nofollow" href="(.+?)".+?class="progress-bar prog-blue prog-l".+?>(.+?)</div></div>', re.DOTALL).findall(r) #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks) for qual, Magnet, size in Endlinks: #print Magnet + '<><><><><>'+size count += 1 self.sources.append({ 'source': 'Torrent', 'quality': size + qual, 'scraper': self.name, 'url': Magnet, 'direct': False, 'debridonly': True }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return []
def scrape_movie(self, title, year, imdb, debrid=False): if int(year) > 1980: return self.sources try: start_time = time.time() query = urllib.quote_plus(clean_search(title.lower())) start_url = urlparse.urljoin(self.base_link, self.search_link % query) headers = {'User-Agent': client.agent(), 'Referer': self.base_link} count = 0 html = client.request(start_url, headers=headers) posts = client.parseDOM(html, 'div', attrs={'class': 'post'}) posts = [(dom_parser.parse_dom(i, 'a', req='href')[0]) for i in posts if i] posts = [(i.attrs['href'], i.content) for i in posts if i] post = [(i[0]) for i in posts if clean_title(i[1]) == clean_title(title)][0] r = client.request(post, headers=headers) y = client.parseDOM(r, 'h1')[0] if not year in y: return self.sources links = client.parseDOM(r, 'source', ret='src') link = [i for i in links if i.endswith('mp4')][0] link += '|User-Agent=%s&Referer=%s' % (client.agent(), post) link = urllib.quote(link, ':/-_|&+=') count += 1 self.sources.append({'source': 'bnw', 'quality': 'SD', 'scraper': self.name, 'url': link, 'direct': True}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name,end_time,count,title,year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name,argument) return self.sources
def get_sources(self, episode_url, title, year, season, episode, start_time): try: links = client.request(episode_url) links = client.parseDOM(links, 'div', attrs={'class': 'll-item'}) count = 0 for link in links: data = dom.parse_dom(link, 'a')[0] host = data.content if not filter_host(host): continue count += 1 url = data.attrs['href'] self.sources.append({ 'source': host, 'quality': 'DVD', 'scraper': self.name, 'url': url, 'direct': False }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season=season, episode=episode) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def get_source(self,item_url,title,year,start_time): try: #print 'PASSEDURL >>>>>>'+item_url count = 0 headers = {'User-Agent': client.agent()} OPEN = client.request(item_url, headers=headers) frame = client.parseDOM(OPEN, 'iframe', ret='src')[0] data = client.request(frame, headers=headers) data = client.parseDOM(data, 'ul', attrs={'class': 'menuPlayer'})[0] links = client.parseDOM(data, 'a', ret='href') for link in links: #print link+'<<<<<<<<<<<<<<<<<<<<<<<<<<' qual = quality_tags.check_sd_url(link) if qual == 'SD' and 'openload' in link: data = client.request(link, headers=headers) data = client.parseDOM(data, 'meta', ret='content')[0] qual2, info = quality_tags.get_release_quality(data, None) else: qual2 = qual count += 1 host = link.split('//')[1].replace('www.','') host = host.split('/')[0].split('.')[0].title() self.sources.append({'source':host, 'quality':qual2, 'scraper': self.name, 'url':link, 'direct':False}) if dev_log=='true': end_time = time.time() - start_time send_log(self.name,end_time,count,title,year) except Exception, argument: if dev_log=='true': error_log(self.name, argument) #hdvix().scrape_movie('Black Panther', '2018', 'tt1825683', False)
def get_source(self, url, title, year, season, episode, start_time): sources = [] try: count = 0 if url is None: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data['tvshowtitle'] self.hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) query = '%s S%02dE%02d' % (data['tvshowtitle'], int(data['season']), int(data['episode'])) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = self.tvsearch.format(urllib.quote_plus(query).replace('+', '-')) items = self._get_items(url) for item in items: try: name = item[0] quality, info = quality_tags.get_release_quality(name, name) info.append(item[2]) info = ' | '.join(info) url = item[1] url = url.split('&tr')[0] count += 1 qual = '{0} | {1}'.format(quality, info) self.sources.append({'source': 'MAGNET', 'quality': qual, 'scraper': self.name, 'url': url, 'direct': False, 'debridonly': True}) except BaseException: pass if dev_log == 'true': end_time = time.time() - float(start_time) send_log(self.name, end_time, count, title, year, season=season, episode=episode) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def get_source(self, start_url, title, year, season, episode, start_time): try: #print 'URL PASSED OKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK'+start_url count = 0 headers = {'User-Agent': client.agent()} #scraper= cfscrape.create_scraper() #r=scraper.get(start_url, headers=headers) r = client.request(start_url, headers=headers) #print r Endlinks = re.compile( 'class="imagnet icon16" href="(.+?)">.+?<font color=#004E98>(.+?)</font>.+?><b>(.+?)</b></a', re.DOTALL).findall(r) #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks) for Magnet, size, quality in Endlinks: #Magnet=Magnet.replace('https://mylink.me.uk/?url=', '') qual = quality_tags.get_release_quality(quality, None)[0] #print Magnet + '<><><><><>' count += 1 self.sources.append({ 'source': 'Torrent', 'quality': qual + ' ' + size, 'scraper': self.name, 'url': Magnet, 'direct': False, 'debridonly': True }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return []
def scrape_music(self, title, artist, debrid=False): try: song_search = clean_title(title.lower()).replace(' ','+') artist_search = clean_title(artist.lower()).replace(' ','+') start_url = '%sresults?search_query=%s+%s' % (self.base_link,artist_search,song_search) html = requests.get(start_url, headers=headers, timeout=20).content match = re.compile('<h4 class="card-title">.+?</i>(.+?)</h4>.+?id="(.+?)"',re.DOTALL).findall(html) count = 0 for m, link in match: match4 = m.replace('\n','').replace('\t','').replace(' ',' ').replace(' ',' ').replace(' ',' ').replace(' ',' ') match5 = re.sub('&#(\d+);', '', match4) match5 = re.sub('(&#[0-9]+)([^;^0-9]+)', '\\1;\\2', match5) match5 = match5.replace('"', '\"').replace('&', '&') match5 = re.sub('\\\|/|\(|\)|\[|\]|\{|\}|-|:|;|\*|\?|"|\'|<|>|\_|\.|\?', ' ', match5) match5 = ' '.join(match5.split()) match2 = m.replace('\n','').replace('\t','').replace(' ','') if clean_title(title).lower() in clean_title(match2).lower(): if clean_title(artist).lower() in clean_title(match2).lower(): final_link = 'https://www.youtube.com/watch?v='+link count +=1 self.sources.append({'source':match5, 'quality':'SD', 'scraper':self.name, 'url':final_link, 'direct': False}) if dev_log=='true': end_time = time.time() - self.start_time send_log(self.name,end_time,count) return self.sources except Exception, argument: return self.sources
def get_source(self, start_url, title, year, season, episode, start_time): try: #print 'URL PASSED OKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK'+start_url count = 0 headers = {'User-Agent': client.agent()} r = client.request(start_url, headers=headers) #print r Endlinks = re.compile( 'torrent" rel="nofollow".+?img alt="(.+?)".+?href="(.+?)".+?class="is-hidden-touch">(.+?)</td>', re.DOTALL).findall(r) #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks) for qual, Magnet, size in Endlinks: Magnet = Magnet.replace('%3A', ':').replace('%3F', '?').replace( '%3D', '=').split('&dn=')[0] print Magnet + '<><><><><>' qual = quality_tags.get_release_quality(qual, None)[0] count += 1 self.sources.append({ 'source': 'Torrent', 'quality': qual + ' ' + size, 'scraper': self.name, 'url': Magnet, 'direct': False, 'debridonly': True }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return []
def scrape_movie(self, title, year, imdb, debrid=False): try: count = 0 urls = [] start_time = time.time() search_id = clean_search(title.lower()) start_url = '%s/search/?keyword=%s' %(self.base_link, urllib.quote_plus(search_id)) headers = {'User-Agent': client.agent(), 'Referer': self.base_link} scraper = cfscrape.create_scraper() html = scraper.get(start_url, headers=headers).content match = re.compile('class="ml-item".+?href="(.+?)".+?<b>(.+?)</b>.+?<b>(.+?)</b>.+?alt="(.+?)"',re.DOTALL).findall(html) for item_url1, date, res, name in match: item_url = urlparse.urljoin(self.base_link, item_url1) if not clean_title(search_id) == clean_title(name): continue OPEN = scraper.get(item_url, headers=headers).content Endlinks = re.compile('class="movie_links"><li(.+?)<h3><b class="icon-share-alt"', re.DOTALL).findall(OPEN)[0] links = re.compile('target="_blank" href="(.+?)"', re.DOTALL).findall(Endlinks) for link in links: if not link.startswith('http'): continue count += 1 host = link.split('//')[1].replace('www.', '') host = host.split('/')[0] if not filter_host(host): continue self.sources.append({'source': host, 'quality': res, 'scraper': self.name, 'url': link, 'direct': False}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def get_source(self, item_url, title, year, season, episode, debrid, start_time): try: count = 0 frames = [] frames += client.parseDOM(item_url, 'iframe', ret='src') frames += client.parseDOM(item_url, 'a', ret='href') frames += client.parseDOM(item_url, 'source', ret='src') frames += client.parseDOM(item_url, 'enclosure', ret='url') try: q = re.findall('<strong>Quality:</strong>([^<]+)', item_url, re.DOTALL)[0] if 'high' in q.lower(): qual = '720p' elif 'cam' in q.lower(): qual = 'CAM' else: qual = 'SD' except: qual = 'SD' for link in frames: if 'http://24hd.org' in link: continue if '.pl/link/' in link: continue if 'seehd.pl/d/' in link: r = self.scraper.get(link).content link = client.parseDOM(r, 'iframe', ret='src')[0] host = link.split('//')[1].replace('www.', '') host = host.split('/')[0].lower() if debrid is True: rd_domains = get_rd_domains() if host not in rd_domains: continue count += 1 self.sources.append({ 'source': host, 'quality': qual, 'scraper': self.name, 'url': link, 'direct': False, 'debridonly': True }) if not filter_host(host): continue count += 1 self.sources.append({ 'source': host, 'quality': qual, 'scraper': self.name, 'url': link, 'direct': False }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except Exception, argument: if dev_log == 'true': error_log(self.name, argument)
def get_source(self, url, title, year, season, episode, start_time): try: self.items = [] count = 0 if url is None: return self.sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] query = '%s S%02dE%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % ( data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) urls = [] if 'tvshowtitle' in data: urls.append(self.tvsearch.format(urllib.quote(query), '1')) urls.append(self.tvsearch.format(urllib.quote(query), '2')) urls.append(self.tvsearch.format(urllib.quote(query), '3')) else: urls.append(self.moviesearch.format(urllib.quote(query), '1')) urls.append(self.moviesearch.format(urllib.quote(query), '2')) urls.append(self.moviesearch.format(urllib.quote(query), '3')) threads = [] for url in urls: threads.append(workers.Thread(self._get_items, url)) [i.start() for i in threads] [i.join() for i in threads] threads2 = [] for i in self.items: count += 1 threads2.append(workers.Thread(self._get_sources, i)) [i.start() for i in threads2] [i.join() for i in threads2] if dev_log == 'true': end_time = time.time() - float(start_time) send_log(self.name, end_time, count, title, year, season=season, episode=episode) # xbmc.log('@#@SOURCES:%s' % self._sources, xbmc.LOGNOTICE) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def get_source(self, item_url, title, year, season, episode, start_time): count = 0 try: if item_url is None: return self.sources qual = re.search('Quality\s*:(.+?)<br', item_url, re.DOTALL).groups()[0] qual = re.sub('<.+?>', '', qual) qual, info = quality_tags.get_release_quality(qual, qual) headers = { 'Origin': self.base_link, 'Referer': client.parseDOM(item_url, 'link')[0], 'X-Requested-With': 'XMLHttpRequest', 'User_Agent': client.agent() } fn = client.parseDOM(item_url, 'input', attrs={'name': 'FName'}, ret='value')[0] fs = client.parseDOM(item_url, 'input', attrs={'name': 'FSize'}, ret='value')[0] fsid = client.parseDOM(item_url, 'input', attrs={'name': 'FSID'}, ret='value')[0] #params = re.compile('<input name="FName" type="hidden" value="(.+?)" /><input name="FSize" type="hidden" value="(.+?)" /><input name="FSID" type="hidden" value="(.+?)"').findall(html) post_url = self.base_link + '/thanks-for-downloading/' form_data = {'FName': fn, 'FSize': fs, 'FSID': fsid} #link = requests.post(request_url, data=form_data, headers=headers).content link = client.request(post_url, post=form_data, headers=headers) stream_url = client.parseDOM(link, 'meta', attrs={'http-equiv': 'refresh'}, ret='content')[0] stream_url = client.replaceHTMLCodes(stream_url).split('url=')[-1] stream_url += '|User-Agent=%s' % urllib.quote(client.agent()) count += 1 self.sources.append({ 'source': 'DirectLink', 'quality': qual, 'scraper': self.name, 'url': stream_url, 'direct': True }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title + ' | ' + stream_url, year, season=season, episode=episode) except: pass
def get_source(self, item_url, title, year, start_time): try: #print 'PASSEDURL >>>>>>'+item_url count = 0 headers = {'User-Agent': client.agent()} OPEN = client.request(item_url, headers=headers) #print OPEN Endlinks = dom_parser.parse_dom(OPEN, 'a', req='player-data') Endlinks = [(i.attrs['player-data'], i.content) for i in Endlinks if i] if 'Season' in year: Endlinks = [(i[0], 'SD') for i in Endlinks if i[1] in year] else: Endlinks = [(i[0], i[1]) for i in Endlinks if i] #print 'series8 - scrape_movie - EndLinks: '+str(Endlinks) for link, quality in Endlinks: qual = quality_tags.check_sd_url(quality) if 'vidcloud' in link: link = 'https:' + link if link.startswith('//') else link data = client.request(link, headers=headers) link = re.findall( '''file\s*:\s*['"](.+?)['"].+?type['"]\s*:\s*['"](.+?)['"]''', data, re.DOTALL)[0] host = link[1] link = link[ 0] + '|User-Agent=%s&Referer=https://vidcloud.icu/' % urllib.quote( client.agent()) direct = True else: host = link.split('//')[1].replace('www.', '') host = host.split('/')[0].split('.')[0].title() direct = False count += 1 self.sources.append({ 'source': host, 'quality': qual, 'scraper': self.name, 'url': link, 'direct': direct }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources #seriesonline8().scrape_movie('Black Panther', '2018', 'tt1825683', False) #seriesonline8().scrape_episode('Suits','2011','','8','5','','')
def get_source(self, link, title, year, season, episode, start_time): try: html = client.request(link) match = re.compile('var link_server.+?"(.+?)"', re.DOTALL).findall(html) count = 0 for link in match: if not link.startswith('https:'): link = 'http:' + link if 'vidnode' in link: if not 'load.php' in link: continue #print 'vidnodelink >>> '+link html = client.request(link) grab = re.compile("sources.+?file: '(.+?)',label: '(.+?)'", re.DOTALL).findall(html) for end_link, rez in grab: if '1080' in rez: res = '1080p' elif '720' in rez: res = '720p' else: res = 'SD' count += 1 self.sources.append({ 'source': 'Vidnode', 'quality': res, 'scraper': self.name, 'url': end_link, 'direct': False }) else: host = link.split('//')[1].replace('www.', '') host = host.split('/')[0] if not filter_host(host): continue count += 1 self.sources.append({ 'source': host, 'quality': 'SD', 'scraper': self.name, 'url': link, 'direct': False }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season=season, episode=episode) except: pass
def get_sources(self, url, title, year, season, episode, start_time): try: count = 0 url = urlparse.urljoin(self.base_link, url) if url.startswith('/') else url r = client.request(url) data = re.findall(r'\s*(eval.+?)\s*</script', r, re.DOTALL)[1] data = jsunpack.unpack(data).replace('\\', '') # https://www.primewire.ink/ajax-78583.php?slug=watch-2809620-Black-Panther&cp=7TYP4N # var rtv=\'aja\';var aa=\'x-7\';var ba=\'85\';var ca=\'83\';var da=\'.ph\';var ea=\'p?sl\';var fa=\'ug=\';var ia=\'&cp=7T\';var ja=\'YP\';var ka=\'4N\';var code=ia+ja+ka;var page=rtv+aa+ba+ca+da+ea+fa;function goml(loc){$(\'#div1\').load(domain+page+loc+code)} patern = '''rtv='(.+?)';var aa='(.+?)';var ba='(.+?)';var ca='(.+?)';var da='(.+?)';var ea='(.+?)';var fa='(.+?)';var ia='(.+?)';var ja='(.+?)';var ka='(.+?)';''' links_url = re.findall(patern, data, re.DOTALL)[0] slug = 'slug={}'.format(url.split('/')[-1]) links_url = self.base_link + [''.join(links_url)][0].replace( 'slug=', slug) links = client.request(links_url) links = client.parseDOM(links, 'tbody') #xbmc.log('@#@LINKSSSS: %s' % links, xbmc.LOGNOTICE) for link in links: try: data = [(client.parseDOM(link, 'a', ret='href')[0], client.parseDOM(link, 'span', attrs={'class': 'version_host'})[0])][0] link = urlparse.urljoin(self.base_link, data[0]) host = data[1] if not filter_host(host): continue self.sources.append({ 'source': host, 'quality': 'SD', 'scraper': self.name, 'url': link, 'direct': False }) except: pass if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season=season, episode=episode) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def get_source(self, media_url, title, year, season, episode, start_time): season_bollox = "0%s" % season if len(season) < 2 else season episode_bollox = "0%s" % episode if len(episode) < 2 else episode all_bollox = 's%se%s' % (season_bollox, episode_bollox) try: headers = {'User-Agent': client.agent()} html = client.request(media_url, headers=headers) match = re.findall(r'<li><a href="([^"]+)">([^<>]*)<span.+?>', str(html), re.I | re.DOTALL) count = 0 for media_url, media_title in match: if all_bollox in media_title.lower(): link = client.request(media_url, headers=headers) frame = client.parseDOM(link, 'iframe', ret='src') for frame_link in frame: self.sources.append({ 'source': 'Openload', 'quality': 'Unknown', 'scraper': self.name, 'url': frame_link, 'direct': False }) cool_links = re.compile( '"dwn-box".+?ref="(.+?)" rel="nofollow">(.+?)<span', re.DOTALL).findall(link) for vid_url, res in cool_links: if '1080' in res: res = '1080p' elif '720' in res: res = '720p' elif 'HD' in res: res = 'HD' else: res = 'SD' count += 1 vid_url += '|User-Agent=%s&Referer=%s' % ( client.agent(), media_url) vid_url = urllib.quote(vid_url, '|:?/&+=_-') self.sources.append({ 'source': 'Direct', 'quality': res, 'scraper': self.name, 'url': vid_url, 'direct': True }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season='', episode='') except: pass
def get_source(self, item_url, title, year, season, episode, start_time): try: #print 'coolmovies pass ' + item_url headers = {'User-Agent': client.agent()} r = client.request(item_url, headers=headers) #xbmc.log('@#@HTML:%s' % r, xbmc.LOGNOTICE) data = client.parseDOM(r, 'table', attrs={'class': 'source-links'})[0] data = client.parseDOM(data, 'tr') data = [(client.parseDOM(i, 'a', ret='href')[0], client.parseDOM(i, 'td')[1]) for i in data if 'version' in i.lower()] #Watch Version Endlinks = [(i[0], re.sub('<.+?>', '', i[1])) for i in data if i] #Endlinks = re.compile('<td align="center"><strong><a href="(.+?)"',re.DOTALL).findall(r) #print 'coolmoviezone - scrape_movie - EndLinks: '+str(Endlinks) count = 0 for link, host in Endlinks: if 'filebebo' in host: continue #host with captcha if 'fruitad' in host: link = client.request(link) link = client.parseDOM( link, 'meta', attrs={'name': 'og:url'}, ret='content')[0] #returns the real url if not link: continue import resolveurl if resolveurl.HostedMediaFile(link): from universalscrapers.modules import quality_tags quality, info = quality_tags.get_release_quality( link, link) if quality == 'SD': quality = 'DVD' host = host.split('/')[0].split('.')[0].title() count += 1 self.sources.append({ 'source': host, 'quality': quality, 'scraper': self.name, 'url': link, 'direct': False }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season='', episode='') except: pass
def get_source(self, url, title, year, season, episode, start_time): sources = [] try: count = 0 if url is None: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) tit = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s S%02dE%02d' % (data['tvshowtitle'], int(data['season']), int(data['episode'])) \ if 'tvshowtitle' in data else '%s %s' % (data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = urlparse.urljoin(self.base_link, self.search_link%(urllib.quote(query))) r = client.request(url) r = client.parseDOM(r, 'table', attrs={'id': 'searchResult'})[0] posts = client.parseDOM(r, 'td') posts = [i for i in posts if 'detName' in i] for post in posts: post = post.replace(' ', ' ') name = client.parseDOM(post, 'a')[0] t = name.split(hdlr)[0] if not clean_title(re.sub('(|)', '', t)) == clean_title(tit): continue try: y = re.findall('[\.|\(|\[|\s|\_|\-](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_|\-]', name, re.I)[-1].upper() except BaseException: y = re.findall('[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s\_|\-]', name, re.I)[-1].upper() if not y == hdlr: continue links = client.parseDOM(post, 'a', ret='href') magnet = [i for i in links if 'magnet:' in i][0] url = magnet.split('&tr')[0] count += 1 quality, info = quality_tags.get_release_quality(name, name) try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except BaseException: size = '0' info.append(size) info = ' | '.join(info) qual = '{0} | {1}'.format(quality, info) self.sources.append({'source': 'Torrent', 'quality': qual, 'scraper': self.name, 'url': url, 'direct': False, 'debridonly': True}) if dev_log == 'true': end_time = time.time() - float(start_time) send_log(self.name, end_time, count, title, year, season=season, episode=episode) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = clean_search(title.lower()) #print search_id #xbmc.log('@#@TITLE: %s' % search_id, xbmc.LOGNOTICE) start_url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(search_id)) headers = {'User-Agent': client.agent()} r = client.request(start_url, headers=headers) posts = client.parseDOM(r, 'div', attrs={'id': 'movie-\d+'}) posts = [(client.parseDOM(i, 'h4')[0]) for i in posts if i] #print posts posts = [(client.parseDOM(i, 'a', ret='href')[0], client.parseDOM(i, 'a')[0]) for i in posts if i] #posts = [(i[0]) for i in posts if clean_title(search_id) == clean_title(i[1])] count = 0 for link, found_title in posts: link = urlparse.urljoin(self.base_link, link) if link.startswith('/') else link if not clean_title(title) == clean_title(found_title): continue result = client.request(link, headers=headers) y = client.parseDOM(result, 'div', attrs={'class': 'showValue showValueRelease'})[0] if not year == y: continue streams = client.parseDOM(result, 'div', attrs={'class': 'linkTr'}) for stream in streams: quality = client.parseDOM(stream, 'div', attrs={'class': 'linkQualityText'})[0] link = client.parseDOM(stream, 'div', attrs={'class':'linkHidden linkHiddenUrl'})[0] #print link if 'vidnode' in link: continue if 'HD' in quality: quality = 'HD' else: quality = 'SD' host = quality_tags._give_host(link) #print host count += 1 self.sources.append( {'source': host, 'quality': quality, 'scraper': self.name, 'url': link, 'direct': False}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def get_source(self,media_url, title,year,season,episode,start_time): #print 'source season ' + media_url season_bollox = "0%s" % season if len(season) < 2 else season episode_bollox = "0%s" % episode if len(episode) < 2 else episode all_bollox = 's%se%s' % (season_bollox, episode_bollox) try: headers = {'User-Agent': client.agent()} html = client.request(media_url,headers=headers) match = re.findall(r'<li><a href="([^"]+)">([^<>]*)<span.+?>', str(html), re.I | re.DOTALL) count = 0 for media_url, media_title in match: if all_bollox in media_title.lower(): link = client.request(media_url, headers=headers) frame = client.parseDOM(link, 'iframe', ret='src') print frame for frame_link in frame: self.sources.append({'source': 'Openload', 'quality': 'Unknown', 'scraper': self.name, 'url': frame_link, 'direct': False}) cool_links = re.compile('"dwn-box".+?ref="(.+?)" rel="nofollow">(.+?)<span',re.DOTALL).findall(link) for vid_url, res in cool_links: if '1080' in res: res = '1080p' elif '720' in res: res = '720p' elif 'HD' in res: res = 'HD' else: res = 'SD' count += 1 vid_url += '|User-Agent=%s&Referer=%s' % (client.agent(), media_url) vid_url = urllib.quote(vid_url, '|:?/&+=_-') self.sources.append({'source': 'Direct', 'quality': res, 'scraper': self.name, 'url': vid_url, 'direct': True}) if dev_log=='true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season='', episode='') except: pass
def get_source(self, item_url, title, year, start_time): try: print 'PASSEDURL >>>>>>'+item_url count = 0 headers = {'User-Agent': client.agent()} OPEN = client.request(item_url, headers=headers) #print OPEN Endlinks = dom_parser.parse_dom(OPEN, 'a', req='player-data') Endlinks = [(i.attrs['player-data'], i.content) for i in Endlinks if i] if 'Season' in year: Endlinks = [(i[0], 'SD') for i in Endlinks if i[1] in year] else: Endlinks = [(i[0], i[1]) for i in Endlinks if i] #print 'series8 - scrape_movie - EndLinks: '+str(Endlinks) for link, quality in Endlinks: qual = quality_tags.check_sd_url(quality) if 'vidcloud' in link: link = 'https:' + link if link.startswith('//') else link data = client.request(link, headers=headers) link = re.findall('''file\s*:\s*['"](.+?)['"].+?type['"]\s*:\s*['"](.+?)['"]''', data, re.DOTALL)[0] host = link[1] link = link[0] + '|User-Agent=%s&Referer=https://vidcloud.icu/' % client.agent() direct = True else: host = link.split('//')[1].replace('www.', '') host = host.split('/')[0].split('.')[0].title() direct = False count += 1 self.sources.append({'source': host, 'quality': qual, 'scraper': self.name, 'url': link, 'direct': direct}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except Exception, argument: if dev_log=='true': error_log(self.name, argument) return[] #seriesonline8().scrape_movie('Black Panther', '2018', 'tt1825683', False) #seriesonline8().scrape_episode('Suits','2011','','8','5','','')
def get_source(self,url, title, year, season, episode, start_time): try: scraper = cfscrape.create_scraper() headers = {'Origin': 'http://hdpopcorns.com', 'Referer': url, 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': client.agent()} count = 0 data = scraper.get(url, headers=headers).content data = client.parseDOM(data, 'div', attrs={'class': 'thecontent'})[0] FN720p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileName720p'})[0] FS720p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileSize720p'})[0] FSID720p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FSID720p'})[0] FN1080p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileName1080p'})[0] FS1080p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileSize1080p'})[0] FSID1080p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FSID1080p'})[0] post = {'FileName720p': FN720p, 'FileSize720p': FS720p, 'FSID720p': FSID720p, 'FileName1080p': FN1080p, 'FileSize1080p': FS1080p, 'FSID1080p': FSID1080p, 'x': 173, 'y': 22} data = scraper.post('%s/select-movie-quality.php' % self.base_link, data=post).content data = client.parseDOM(data, 'div', attrs={'id': 'btn_\d+p'}) u = [client.parseDOM(i, 'a', ret='href')[0] for i in data] for url in u: quality, info = quality_tags.get_release_quality(url, url) url = client.replaceHTMLCodes(url) url = url.encode('utf-8') count += 1 self.sources.append( {'source': 'DirectLink', 'quality': quality, 'scraper': self.name, 'url': url, 'direct': True}) if dev_log=='true': end_time = time.time() - start_time send_log(self.name,end_time,count,title,year, season=season,episode=episode) except: pass #hdpopcorn().scrape_movie('Blade Runner 2049', '2017', '', False) title contains 2 years #hdpopcorn().scrape_movie('Deadpool 2', '2018', '', False) title contains number
def get_source(self,item_url,title,year,start_time,qual): try: #print 'PASSEDURL >>>>>>'+item_url count = 0 headers={'User-Agent': client.agent()} OPEN = client.request(item_url, headers=headers) Endlinks = re.compile('<iframe src="(.+?)"',re.DOTALL).findall(OPEN) #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks) for link in Endlinks: #print 'scraperchk - scrape_movie - link: '+str(link) count += 1 host = link.split('//')[1].replace('www.','') host = host.split('/')[0].split('.')[0].title() self.sources.append({'source': host, 'quality': qual, 'scraper': self.name, 'url': link, 'direct': False}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources #movienolimit().scrape_movie('Upgrade', '2018', '')
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(search_id)) headers={'User-Agent': client.agent()} html = client.request(start_url, headers=headers) results = client.parseDOM(html, 'div', attrs={'class': 'video_title'}) items = [] for item in results: try: data = dom_parser.parse_dom(item, 'a', req=['href', 'title'])[0] t = data.content y = re.findall('\((\d{4})\)', data.attrs['title'])[0] qual = data.attrs['title'].split('-')[1] link = data.attrs['href'] if not clean_title(t) == clean_title(title): continue if not y == year: continue items += [(link, qual)] except: pass for item in items: count = 0 try: url = item[0] if item[0].startswith('http') else urlparse.urljoin(self.base_link, item[0]) r = client.request(url) qual = client.parseDOM(r, 'h1')[0] res = quality_tags.get_release_quality(item[1], qual)[0] url = re.findall('''frame_url\s*=\s*["']([^']+)['"]\;''', r, re.DOTALL)[0] url = url if url.startswith('http') else urlparse.urljoin('https://', url) if 'vidlink' in url: html = client.request(url, headers=headers) action = re.findall("action'\s*:\s*'([^']+)", html)[0] postID = re.findall("postID\s*=\s*'([^']+)", html)[0] url = 'https://vidlink.org' + re.findall("var\s*url\s*=\s*'([^']+)", html)[0] data = {'browserName': 'Firefox', 'platform': 'Win32', 'postID': postID, 'action': action} headers['X-Requested-With'] = 'XMLHttpRequest' headers['Referer'] = url html = client.request(url, post=data, headers=headers) html = jsunpack.unpack(html).replace('\\', '') sources = json.loads(re.findall('window\.srcs\s*=\s*([^;]+)', html, re.DOTALL)[0]) for src in sources: r = requests.head(src['url'], headers={'User-Agent': client.agent()}) if r.status_code < 400: movie_link = src['url'] count += 1 self.sources.append({'source': 'Googlelink', 'quality': res, 'scraper': self.name, 'url': movie_link, 'direct': True}) else: continue except: pass if dev_log=='true': end_time = time.time() - start_time send_log(self.name,end_time, count, title,year) #print self.sources return self.sources except Exception, argument: print argument if dev_log == 'true': error_log(self.name,argument) return self.sources #watch32().scrape_movie('Black Panther', '2018', 'tt1825683', False)
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(search_id)) #print start_url headers = {'User-Agent': client.agent()} r = client.request(start_url, headers=headers) posts = client.parseDOM(r, 'div', attrs={'id': 'movie-\d+'}) posts = [(client.parseDOM(i, 'h4')[0]) for i in posts if i] for item in posts: name = client.parseDOM(item, 'a')[0] link = client.parseDOM(item, 'a', ret='href')[0] if not clean_title(title) == clean_title(name): continue link = urlparse.urljoin(self.base_link, link) html = client.request(link) #<div class="season" id="season8"> sep_id = 'Season %s Serie %s' % (int(season), int(episode)) #print sep_id seasons = client.parseDOM(html, 'div', attrs={'class': 'season'}) seasons = [i for i in seasons if 'season %s' % int(season) in i.lower()][0] epis = re.findall('<h3>(.+?)</div>\s+</div>\s+</div>\s+</div>', seasons, re.DOTALL | re.MULTILINE) epis = [i for i in epis if sep_id in i][0] count = 0 streams = client.parseDOM(epis, 'div', attrs={'class': 'linkTr'}) for stream in streams: quality = client.parseDOM(stream, 'div', attrs={'class': 'linkQualityText'})[0] link = client.parseDOM(stream, 'div', attrs={'class': 'linkHidden linkHiddenUrl'})[0] #print link if 'vidnode' in link: continue if 'HD' in quality: quality = 'HD' else: quality = 'SD' host = quality_tags._give_host(link) # print host count += 1 self.sources.append( {'source': host, 'quality': quality, 'scraper': self.name, 'url': link, 'direct': False}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season=season, episode=episode) return self.sources except Exception as argument: if dev_log == 'true': error_log(self.name, argument) return [] #vkflix().scrape_movie('Black Panther', '2018', '', False) #vkflix().scrape_episode('Suits', '2011','','8','5','','')