def get_source(self, start_url, title, year, season, episode, start_time): try: #print 'URL PASSED OKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK'+start_url count = 0 headers = {'User-Agent': client.agent()} r = client.request(start_url, headers=headers) #print r Endlinks = re.compile( 'torrent" rel="nofollow".+?img alt="(.+?)".+?href="(.+?)".+?class="is-hidden-touch">(.+?)</td>', re.DOTALL).findall(r) #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks) for qual, Magnet, size in Endlinks: Magnet = Magnet.replace('%3A', ':').replace('%3F', '?').replace( '%3D', '=').split('&dn=')[0] print Magnet + '<><><><><>' qual = quality_tags.get_release_quality(qual, None)[0] count += 1 self.sources.append({ 'source': 'Torrent', 'quality': qual + ' ' + size, 'scraper': self.name, 'url': Magnet, 'direct': False, 'debridonly': True }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return []
def get_source(self, start_url, title, year, season, episode, start_time): try: #print 'URL PASSED OKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK'+start_url count = 0 headers = {'User-Agent': client.agent()} #scraper= cfscrape.create_scraper() #r=scraper.get(start_url, headers=headers) r = client.request(start_url, headers=headers) #print r Endlinks = re.compile( 'class="imagnet icon16" href="(.+?)">.+?<font color=#004E98>(.+?)</font>.+?><b>(.+?)</b></a', re.DOTALL).findall(r) #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks) for Magnet, size, quality in Endlinks: #Magnet=Magnet.replace('https://mylink.me.uk/?url=', '') qual = quality_tags.get_release_quality(quality, None)[0] #print Magnet + '<><><><><>' count += 1 self.sources.append({ 'source': 'Torrent', 'quality': qual + ' ' + size, 'scraper': self.name, 'url': Magnet, 'direct': False, 'debridonly': True }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return []
def get_source(self, url, title, year, season, episode, start_time): sources = [] try: count = 0 if url is None: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data['tvshowtitle'] self.hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) query = '%s S%02dE%02d' % (data['tvshowtitle'], int(data['season']), int(data['episode'])) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = self.tvsearch.format(urllib.quote_plus(query).replace('+', '-')) items = self._get_items(url) for item in items: try: name = item[0] quality, info = quality_tags.get_release_quality(name, name) info.append(item[2]) info = ' | '.join(info) url = item[1] url = url.split('&tr')[0] count += 1 qual = '{0} | {1}'.format(quality, info) self.sources.append({'source': 'MAGNET', 'quality': qual, 'scraper': self.name, 'url': url, 'direct': False, 'debridonly': True}) except BaseException: pass if dev_log == 'true': end_time = time.time() - float(start_time) send_log(self.name, end_time, count, title, year, season=season, episode=episode) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False): try: start_time = time.time() hdlr = 'S%02dE%02d' % (int(season), int(episode)) query = clean_search(title) query = urllib.quote_plus(query + ' ' + hdlr) urls = [] for link in self.search_links: try: url = urlparse.urljoin(self.base_link, link % query) url = urlparse.urljoin(self.base_link, url) r = client.request(url) posts = client.parseDOM(r, 'tbody') posts = client.parseDOM(posts, 'tr') urls += [(client.parseDOM(i, 'button', ret='data-clipboard-text')[0]) for i in posts if i] except: pass count = 0 for url in urls: name = url.split('/')[-1].lower() name = client.replaceHTMLCodes(name).replace('%20', '') if 'movies' in url: continue if any(x in url for x in ['italian', 'dubbed', 'teaser', 'subs', 'sub', 'dub', 'samples', 'extras', 'french', 'trailer', 'trailers', 'sample']): continue t = re.sub('(\.|\(|\[|\s)(S\d+E\d+|S\d+)(\.|\)|\]|\s)(.+|)', '', name, flags=re.I) if clean_title(t) not in clean_title(title): continue y = re.findall('[\.|\(|\[|\s](S\d+E\d+|S\d+)[\.|\)|\]|\s]', name, re.I)[-1].upper() if not y == hdlr: continue res, info = quality_tags.get_release_quality(name, url) if any(x in url for x in ['hastidl', '1tehmovies', '62.210.103.107', '79.127', '213.32.113.82', 'dl5.downloadha', '89.163.255.42', '185.56.20.142', 's1.0music', 'dl3.yoozdl', 'dl4.lavinmovie.net', 'dl6.lavinmovie.net', 'dl3.upload08.com', 'dl8.uploadt.com', '163.172.6.218', 'samba.allunix.ru', 'server417']): count += 1 url += '|User-Agent=%s&Referer=%s' % (client.agent(), self.base_link) url = urllib.quote(url, '|:?/&+=_-') self.sources.append( {'source': 'DirectLink', 'quality': res, 'scraper': self.name, 'url': url, 'direct': True}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources #filepursuit().scrape_movie('Black Panther', '2018', '')
def get_source(self, start_url, title, year, season, episode, start_time): try: #print 'URL PASSED OKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK'+start_url count = 0 headers = {'User-Agent': client.agent()} r = client.request(start_url, headers=headers) #print r Endlinks = re.compile( 'class="nobr center">(.+?)</span></td>.+?title="Torrent magnet link" href="(.+?)".+?class="cellMainLink">(.+?)</a>', re.DOTALL).findall(r) #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks) for size, Magnet, qual in Endlinks: Magnet = Magnet.replace('https://mylink.cx/?url=', '') Magnet = Magnet.replace('%3A', ':').replace('%3F', '?').replace( '%3D', '=').split('%26dn')[0] print Magnet + '<><><><><>' qual = quality_tags.get_release_quality(qual, None)[0] count += 1 self.sources.append({ 'source': 'Torrent', 'quality': qual + ' ' + size, 'scraper': self.name, 'url': Magnet, 'direct': False, 'debridonly': True }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return []
def get_source(self,item_url,title,year,start_time): try: #print 'PASSEDURL >>>>>>'+item_url count = 0 headers = {'User-Agent': client.agent()} OPEN = client.request(item_url, headers=headers) frame = client.parseDOM(OPEN, 'iframe', ret='src')[0] data = client.request(frame, headers=headers) data = client.parseDOM(data, 'ul', attrs={'class': 'menuPlayer'})[0] links = client.parseDOM(data, 'a', ret='href') for link in links: #print link+'<<<<<<<<<<<<<<<<<<<<<<<<<<' qual = quality_tags.check_sd_url(link) if qual == 'SD' and 'openload' in link: data = client.request(link, headers=headers) data = client.parseDOM(data, 'meta', ret='content')[0] qual2, info = quality_tags.get_release_quality(data, None) else: qual2 = qual count += 1 host = link.split('//')[1].replace('www.','') host = host.split('/')[0].split('.')[0].title() self.sources.append({'source':host, 'quality':qual2, 'scraper': self.name, 'url':link, 'direct':False}) if dev_log=='true': end_time = time.time() - start_time send_log(self.name,end_time,count,title,year) except Exception, argument: if dev_log=='true': error_log(self.name, argument) #hdvix().scrape_movie('Black Panther', '2018', 'tt1825683', False)
def get_source(self, item_url, title, year, season, episode, start_time): count = 0 try: if item_url is None: return self.sources qual = re.search('Quality\s*:(.+?)<br', item_url, re.DOTALL).groups()[0] qual = re.sub('<.+?>', '', qual) qual, info = quality_tags.get_release_quality(qual, qual) headers = { 'Origin': self.base_link, 'Referer': client.parseDOM(item_url, 'link')[0], 'X-Requested-With': 'XMLHttpRequest', 'User_Agent': client.agent() } fn = client.parseDOM(item_url, 'input', attrs={'name': 'FName'}, ret='value')[0] fs = client.parseDOM(item_url, 'input', attrs={'name': 'FSize'}, ret='value')[0] fsid = client.parseDOM(item_url, 'input', attrs={'name': 'FSID'}, ret='value')[0] #params = re.compile('<input name="FName" type="hidden" value="(.+?)" /><input name="FSize" type="hidden" value="(.+?)" /><input name="FSID" type="hidden" value="(.+?)"').findall(html) post_url = self.base_link + '/thanks-for-downloading/' form_data = {'FName': fn, 'FSize': fs, 'FSID': fsid} #link = requests.post(request_url, data=form_data, headers=headers).content link = client.request(post_url, post=form_data, headers=headers) stream_url = client.parseDOM(link, 'meta', attrs={'http-equiv': 'refresh'}, ret='content')[0] stream_url = client.replaceHTMLCodes(stream_url).split('url=')[-1] stream_url += '|User-Agent=%s' % urllib.quote(client.agent()) count += 1 self.sources.append({ 'source': 'DirectLink', 'quality': qual, 'scraper': self.name, 'url': stream_url, 'direct': True }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title + ' | ' + stream_url, year, season=season, episode=episode) except: pass
def get_source(self, item_url, title, year, season, episode, start_time): try: #print 'coolmovies pass ' + item_url headers = {'User-Agent': client.agent()} r = client.request(item_url, headers=headers) #xbmc.log('@#@HTML:%s' % r, xbmc.LOGNOTICE) data = client.parseDOM(r, 'table', attrs={'class': 'source-links'})[0] data = client.parseDOM(data, 'tr') data = [(client.parseDOM(i, 'a', ret='href')[0], client.parseDOM(i, 'td')[1]) for i in data if 'version' in i.lower()] #Watch Version Endlinks = [(i[0], re.sub('<.+?>', '', i[1])) for i in data if i] #Endlinks = re.compile('<td align="center"><strong><a href="(.+?)"',re.DOTALL).findall(r) #print 'coolmoviezone - scrape_movie - EndLinks: '+str(Endlinks) count = 0 for link, host in Endlinks: if 'filebebo' in host: continue #host with captcha if 'fruitad' in host: link = client.request(link) link = client.parseDOM( link, 'meta', attrs={'name': 'og:url'}, ret='content')[0] #returns the real url if not link: continue import resolveurl if resolveurl.HostedMediaFile(link): from universalscrapers.modules import quality_tags quality, info = quality_tags.get_release_quality( link, link) if quality == 'SD': quality = 'DVD' host = host.split('/')[0].split('.')[0].title() count += 1 self.sources.append({ 'source': host, 'quality': quality, 'scraper': self.name, 'url': link, 'direct': False }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season='', episode='') except: pass
def get_source(self, url, title, year, season, episode, start_time): sources = [] try: count = 0 if url is None: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) tit = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s S%02dE%02d' % (data['tvshowtitle'], int(data['season']), int(data['episode'])) \ if 'tvshowtitle' in data else '%s %s' % (data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = urlparse.urljoin(self.base_link, self.search_link%(urllib.quote(query))) r = client.request(url) r = client.parseDOM(r, 'table', attrs={'id': 'searchResult'})[0] posts = client.parseDOM(r, 'td') posts = [i for i in posts if 'detName' in i] for post in posts: post = post.replace(' ', ' ') name = client.parseDOM(post, 'a')[0] t = name.split(hdlr)[0] if not clean_title(re.sub('(|)', '', t)) == clean_title(tit): continue try: y = re.findall('[\.|\(|\[|\s|\_|\-](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_|\-]', name, re.I)[-1].upper() except BaseException: y = re.findall('[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s\_|\-]', name, re.I)[-1].upper() if not y == hdlr: continue links = client.parseDOM(post, 'a', ret='href') magnet = [i for i in links if 'magnet:' in i][0] url = magnet.split('&tr')[0] count += 1 quality, info = quality_tags.get_release_quality(name, name) try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except BaseException: size = '0' info.append(size) info = ' | '.join(info) qual = '{0} | {1}'.format(quality, info) self.sources.append({'source': 'Torrent', 'quality': qual, 'scraper': self.name, 'url': url, 'direct': False, 'debridonly': True}) if dev_log == 'true': end_time = time.time() - float(start_time) send_log(self.name, end_time, count, title, year, season=season, episode=episode) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def _get_sources(self, item): try: name = item[0] quality, info = quality_tags.get_release_quality(item[1], name) info.append(item[2]) info = ' | '.join(info) qual = '{0} | {1}'.format(quality, info) data = client.request(item[1]) data = client.parseDOM(data, 'a', ret='href') url = [i for i in data if 'magnet:' in i][0] url = url.split('&tr')[0] self.sources.append( {'source': 'MAGNET', 'quality': qual, 'scraper': self.name, 'url': url, 'direct': False, 'debridonly': True}) except BaseException: pass
def get_source(self, item_url, title, year, start_time): try: #print 'PASSEDURL >>>>>>'+item_url count = 0 headers = {'User-Agent': client.agent()} OPEN = client.request(item_url, headers=headers) frame = client.parseDOM(OPEN, 'iframe', ret='src')[0] data = client.request(frame, headers=headers) data = client.parseDOM(data, 'ul', attrs={'class': 'menuPlayer'})[0] links = client.parseDOM(data, 'a', ret='href') for link in links: #print link+'<<<<<<<<<<<<<<<<<<<<<<<<<<' qual = quality_tags.check_sd_url(link) if qual == 'SD' and 'openload' in link: data = client.request(link, headers=headers) data = client.parseDOM(data, 'meta', ret='content')[0] qual2, info = quality_tags.get_release_quality(data, None) else: qual2 = qual count += 1 host = link.split('//')[1].replace('www.', '') host = host.split('/')[0].split('.')[0].title() self.sources.append({ 'source': host, 'quality': qual2, 'scraper': self.name, 'url': link, 'direct': False }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) #hdvix().scrape_movie('Black Panther', '2018', 'tt1825683', False)
def get_source(self,url, title, year, season, episode, start_time): try: scraper = cfscrape.create_scraper() headers = {'Origin': 'http://hdpopcorns.com', 'Referer': url, 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': client.agent()} count = 0 data = scraper.get(url, headers=headers).content data = client.parseDOM(data, 'div', attrs={'class': 'thecontent'})[0] FN720p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileName720p'})[0] FS720p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileSize720p'})[0] FSID720p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FSID720p'})[0] FN1080p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileName1080p'})[0] FS1080p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileSize1080p'})[0] FSID1080p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FSID1080p'})[0] post = {'FileName720p': FN720p, 'FileSize720p': FS720p, 'FSID720p': FSID720p, 'FileName1080p': FN1080p, 'FileSize1080p': FS1080p, 'FSID1080p': FSID1080p, 'x': 173, 'y': 22} data = scraper.post('%s/select-movie-quality.php' % self.base_link, data=post).content data = client.parseDOM(data, 'div', attrs={'id': 'btn_\d+p'}) u = [client.parseDOM(i, 'a', ret='href')[0] for i in data] for url in u: quality, info = quality_tags.get_release_quality(url, url) url = client.replaceHTMLCodes(url) url = url.encode('utf-8') count += 1 self.sources.append( {'source': 'DirectLink', 'quality': quality, 'scraper': self.name, 'url': url, 'direct': True}) if dev_log=='true': end_time = time.time() - start_time send_log(self.name,end_time,count,title,year, season=season,episode=episode) except: pass #hdpopcorn().scrape_movie('Blade Runner 2049', '2017', '', False) title contains 2 years #hdpopcorn().scrape_movie('Deadpool 2', '2018', '', False) title contains number
def get_source(self, start_url, title, year, season, episode, start_time): try: #print 'URL PASSED OKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK'+start_url count = 0 headers = {'User-Agent': client.agent()} r = client.request(start_url, headers=headers) #print r Endlinks = re.compile( 'class="resultdiv".+?<a href="(.+?)".+?class="resultdivtopname" >(.+?)</div></a>.+?class="resultdivbottonlength">(.+?)</div>', re.DOTALL).findall(r) #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks) for nxtpg, info, size in Endlinks: nxtpg = self.base_link + nxtpg info = info.lstrip() #print nxtpg + '<><><><><>'+size+'><><><>'+info qual = quality_tags.get_release_quality(info, None)[0] #print qual nxtpg = nxtpg.split('torrent/')[1].split('/')[1] #print nxtpg Magnet = 'magnet:?xt=urn:btih:' + nxtpg count += 1 self.sources.append({ 'source': 'Torrent', 'quality': qual + ' ' + size, 'scraper': self.name, 'url': Magnet, 'direct': False, 'debridonly': True }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return []
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(search_id)) headers={'User-Agent': client.agent()} html = client.request(start_url, headers=headers) results = client.parseDOM(html, 'div', attrs={'class': 'video_title'}) items = [] for item in results: try: data = dom_parser.parse_dom(item, 'a', req=['href', 'title'])[0] t = data.content y = re.findall('\((\d{4})\)', data.attrs['title'])[0] qual = data.attrs['title'].split('-')[1] link = data.attrs['href'] if not clean_title(t) == clean_title(title): continue if not y == year: continue items += [(link, qual)] except: pass for item in items: count = 0 try: url = item[0] if item[0].startswith('http') else urlparse.urljoin(self.base_link, item[0]) r = client.request(url) qual = client.parseDOM(r, 'h1')[0] res = quality_tags.get_release_quality(item[1], qual)[0] url = re.findall('''frame_url\s*=\s*["']([^']+)['"]\;''', r, re.DOTALL)[0] url = url if url.startswith('http') else urlparse.urljoin('https://', url) if 'vidlink' in url: html = client.request(url, headers=headers) action = re.findall("action'\s*:\s*'([^']+)", html)[0] postID = re.findall("postID\s*=\s*'([^']+)", html)[0] url = 'https://vidlink.org' + re.findall("var\s*url\s*=\s*'([^']+)", html)[0] data = {'browserName': 'Firefox', 'platform': 'Win32', 'postID': postID, 'action': action} headers['X-Requested-With'] = 'XMLHttpRequest' headers['Referer'] = url html = client.request(url, post=data, headers=headers) html = jsunpack.unpack(html).replace('\\', '') sources = json.loads(re.findall('window\.srcs\s*=\s*([^;]+)', html, re.DOTALL)[0]) for src in sources: r = requests.head(src['url'], headers={'User-Agent': client.agent()}) if r.status_code < 400: movie_link = src['url'] count += 1 self.sources.append({'source': 'Googlelink', 'quality': res, 'scraper': self.name, 'url': movie_link, 'direct': True}) else: continue except: pass if dev_log=='true': end_time = time.time() - start_time send_log(self.name,end_time, count, title,year) #print self.sources return self.sources except Exception, argument: print argument if dev_log == 'true': error_log(self.name,argument) return self.sources #watch32().scrape_movie('Black Panther', '2018', 'tt1825683', False)
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False): try: start_time = time.time() hdlr = 'S%02dE%02d' % (int(season), int(episode)) query = clean_search(title) query = urllib.quote_plus(query + ' ' + hdlr).replace('+', '%20') urls = [] for link in self.search_links: try: url = urlparse.urljoin(self.base_link, link % query) url = urlparse.urljoin(self.base_link, url) r = client.request(url) posts = client.parseDOM(r, 'tbody') posts = client.parseDOM(posts, 'tr') urls += [(client.parseDOM(i, 'button', ret='data-clipboard-text')[0]) for i in posts if i] except BaseException: return count = 0 for url in urls: name = url.split('/')[-1].lower() name = client.replaceHTMLCodes(name).replace('%20', '').replace( '%27', "'") if 'movies' in url: continue if any(x in url for x in [ 'italian', 'dubbed', 'teaser', 'subs', 'sub', 'dub', 'samples', 'extras', 'french', 'trailer', 'trailers', 'sample' ]): continue t = re.sub('(\.|\(|\[|\s)(S\d+E\d+|S\d+)(\.|\)|\]|\s)(.+|)', '', name, flags=re.I) if clean_title(t) not in clean_title(title): continue y = re.findall('[\.|\(|\[|\s](S\d+E\d+|S\d+)[\.|\)|\]|\s]', name, re.I)[-1].upper() if not y == hdlr: continue res, info = quality_tags.get_release_quality(name, url) count += 1 url += '|User-Agent=%s&Referer=%s' % (client.agent(), self.base_link) url = urllib.quote(url, '|%:?/&+=_-') host = url.split('/')[2] self.sources.append({ 'source': host, 'quality': res, 'scraper': self.name, 'url': url, 'direct': True }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources #filepursuit().scrape_movie('Black Panther', '2018', '')
def get_source(self, m_url, title, year, season, episode, start_time): #import xbmc try: hdlr = 'S%02dE%02d' % (int(season), int(episode)) if not season == '' else year r = client.request(m_url) if not hdlr in m_url.upper(): quality = client.parseDOM(r, 'h4')[0] regex = '<p>\s*%s\s*</p>(.+?)</ul>' % hdlr data = re.search(regex, r, re.DOTALL | re.I).groups()[0] frames = client.parseDOM(data, 'a', ret='href') else: data = client.parseDOM(r, 'div', attrs={'class': 'entry-content'})[0] data = re.compile('<h4>(.+?)</h4>(.+?)</ul>', re.DOTALL).findall(data) #xbmc.log('DATAAAA:%s' % data, xbmc.LOGNOTICE) frames = [] for qual, links in data: quality = qual frames += client.parseDOM(links, 'a', ret='href') for link in frames: host = link.split('//')[1].replace('www.', '') host = host.split('/')[0] if not filter_host(host): continue if 'filebebo' in link: continue rez, info = quality_tags.get_release_quality(quality, link) if '1080p' in rez and not host.lower() in [ 'openload', 'oload' ]: rez = '720p' elif '720p' in quality and not host.lower() in [ 'openload', 'oload' ]: rez = 'SD' else: rez, info = quality_tags.get_release_quality(link, link) self.count += 1 self.sources.append({ 'source': host, 'quality': rez, 'scraper': self.name, 'url': link, 'direct': False }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, self.count, title, year, season=season, episode=episode) except: pass
def _get_sources(self, link, title, year, _type, season, episode, start_time): try: squery = self.query.replace('%20', '+') self.headers = { 'User-Agent': self.ua, 'Referer': self.search_referer.format(squery) } srch = cache.get(client.request, 8, self.base_link) srch = client.parseDOM(srch, 'form', ret='action', attrs={'name': 'frm'})[0] srch = srch[1:] if srch.startswith('/') else srch link = urlparse.urljoin(self.base_link, link % (srch, self.query)) r = client.request(link, headers=self.headers) posts = client.parseDOM(r, 'tbody')[0] posts = client.parseDOM(posts, 'tr') urls = [(client.parseDOM(i, 'a', ret='href')[1], client.parseDOM(i, 'a')[1], client.parseDOM(i, 'a', ret='href', attrs={'id': 'refer.+?'})[0]) for i in posts if i] count = 0 for url, name, host in urls: name = client.replaceHTMLCodes(name).replace('%20', ' ').replace( '%27', "'") if any(x in url.lower() for x in [ 'italian', 'teaser', 'bonus.disc', 'subs', 'sub', 'samples', 'extras', 'french', 'trailer', 'trailers', 'sample' ]): continue if _type == 'movie': t = name.split(year)[0] if clean_title(t) not in clean_title(title): continue y = re.findall( '[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s|\_|\-]', name, re.I)[-1].upper() if not year == y: continue else: hdlr = 'S%02dE%02d' % (int(season), int(episode)) t = name.split(hdlr)[0] if clean_title(t) not in clean_title(title): continue y = re.findall( '[\.|\(|\[|\s|\_](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_]', name, re.I)[-1].upper() if not y == hdlr: continue quality, info = quality_tags.get_release_quality(name, url) info = ' | '.join(info) res = '{0} | {1}'.format(quality, info) count += 1 url = urlparse.urljoin(self.base_link, url) if url.startswith('/') else url host = host.split('/')[2] self.sources.append({ 'source': host, 'quality': res, 'scraper': self.name, 'url': url, 'direct': True }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except: pass
def get_sources(self, url, title, year, season, episode, start_time): try: count = 0 if url is None: return self.sources r = client.request(url) frame = client.parseDOM(r, 'table', attrs={'class': 'striped'})[0] frame = client.parseDOM(frame, 'a', ret='href')[0] frame = urlparse.urljoin(self.base_link, frame) if frame.startswith('/') else frame r = client.request(frame) hash = re.findall( '''var\s*hash\s*=\s*['"]([^'"]+)''', r, re.MULTILINE)[ 0] #var hash = '9fafa6c0c1771b38a1c72a5bd893c503'; pdata = 'hash=%s&confirm_continue=I+understand%s+I+want+to+continue' % ( str(hash), '%2C') data = client.request(frame, post=pdata, referer=frame) frames = re.compile( '''vlink.+?title=['"]([^'"]+).+?href=['"]([^'"]+).+?onclick.+?>(.+?)</a''', re.M | re.DOTALL).findall(data.replace('\n', '')) #xbmc.log('@#@Frames:%s' % frames, xbmc.LOGNOTICE) for name, link, host in frames: try: host = host.replace('\xc5\x8d', 'o').replace( '\xc4\x93', 'e' ).replace('\xc4\x81', 'a').replace( '\xc4\xab', 'i' ) #.replace('\u014d', 'o').replace('\u0113', 'e').replace('\u0101', 'a').replace('\u012b', 'i') if not filter_host(host): continue count += 1 quality, info = quality_tags.get_release_quality( name, name) if quality == '4K': quality = '1080p' elif quality == '1080p' and not 'openload' in host: quality = '720p' link = urlparse.urljoin( self.base_link, link) if link.startswith('/') else link self.sources.append({ 'source': host, 'quality': quality, 'scraper': self.name, 'url': link, 'direct': False }) except: pass if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season=season, episode=episode) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = urlparse.urljoin( self.base_link, self.search_link % urllib.quote_plus(search_id)) headers = {'User-Agent': client.agent()} html = client.request(start_url, headers=headers) results = client.parseDOM(html, 'div', attrs={'class': 'video_title'}) items = [] for item in results: try: data = dom_parser.parse_dom(item, 'a', req=['href', 'title'])[0] t = data.content y = re.findall('\((\d{4})\)', data.attrs['title'])[0] qual = data.attrs['title'].split('-')[1] link = data.attrs['href'] if not clean_title(t) == clean_title(title): continue if not y == year: continue items += [(link, qual)] except: pass for item in items: count = 0 try: url = item[0] if item[0].startswith( 'http') else urlparse.urljoin(self.base_link, item[0]) r = client.request(url) qual = client.parseDOM(r, 'h1')[0] res = quality_tags.get_release_quality(item[1], qual)[0] url = re.findall('''frame_url\s*=\s*["']([^']+)['"]\;''', r, re.DOTALL)[0] url = url if url.startswith('http') else urlparse.urljoin( 'https://', url) if 'vidlink' in url: html = client.request(url, headers=headers) action = re.findall("action'\s*:\s*'([^']+)", html)[0] postID = re.findall("postID\s*=\s*'([^']+)", html)[0] url = 'https://vidlink.org' + re.findall( "var\s*url\s*=\s*'([^']+)", html)[0] data = { 'browserName': 'Firefox', 'platform': 'Win32', 'postID': postID, 'action': action } headers['X-Requested-With'] = 'XMLHttpRequest' headers['Referer'] = url html = client.request(url, post=data, headers=headers) html = jsunpack.unpack(html).replace('\\', '') sources = json.loads( re.findall('window\.srcs\s*=\s*([^;]+)', html, re.DOTALL)[0]) for src in sources: r = requests.head( src['url'], headers={'User-Agent': client.agent()}) if r.status_code < 400: movie_link = src['url'] count += 1 self.sources.append({ 'source': 'Googlelink', 'quality': res, 'scraper': self.name, 'url': movie_link, 'direct': True }) else: continue except: pass if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) #print self.sources return self.sources except Exception, argument: print argument if dev_log == 'true': error_log(self.name, argument) return self.sources #watch32().scrape_movie('Black Panther', '2018', 'tt1825683', False)
def get_sources(self, url, title, year, season, episode, start_time): try: if url is None: return self.sources count = 0 url, hdlr = url[0], url[1] main = [] try: headers = { 'User-Agent': client.agent(), 'Referer': self.base_link } scraper = cfscrape.create_scraper() data = scraper.get(url, headers=headers).content main = dom.parse_dom(data, 'div', {'class': 'postContent'}) main = [i.content for i in main] comments = dom.parse_dom(data, 'div', {'class': re.compile('content')}) main += [i.content for i in comments] except: pass for con in main: try: frames = client.parseDOM(con, 'a', ret='href') for link in frames: if 'youtube' in link: continue if any(x in link for x in ['.rar', '.zip', '.iso']) or any( link.endswith(x) for x in ['.rar', '.zip', '.iso']): continue host = re.findall( '([\w]+[.][\w]+)$', urlparse.urlparse(link.strip().lower()).netloc)[0] host = client.replaceHTMLCodes(host) host = host.encode('utf-8') if not hdlr.lower() in link.lower(): continue quality, info = quality_tags.get_release_quality( link, link) if link in str(self.sources): continue rd_domains = get_rd_domains() if host in rd_domains: count += 1 self.sources.append({ 'source': host, 'quality': quality, 'scraper': self.name, 'url': link, 'direct': False, 'debridonly': True }) except: pass if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season=season, episode=episode) # xbmc.log('@#@SOURCES:%s' % self._sources, xbmc.LOGNOTICE) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources #Releasebb().scrape_movie('Black Panther', '2018', '', True)