def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = '%s?s=%s' % (self.base_link, search_id.replace( ' ', '+')) print start_url headers = {'User_Agent': User_Agent} scraper = cfscrape.create_scraper() html = scraper.get(start_url, headers=headers, timeout=5).content # print html match = re.compile( '<div class="result-item">.+?href="(.+?)".+?alt="(.+?)".+?class="year">(.+?)</span>', re.DOTALL).findall(html) for item_url, name, yrs in match: #print item_url #print name ##print yrs if clean_title(search_id).lower() == clean_title(name).lower(): if year in yrs: print 'pass me ' + item_url self.get_source(item_url, title, year, start_time) return self.sources except Exception, argument: print argument if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): try: count = 0 urls = [] start_time = time.time() search_id = clean_search(title.lower()) start_url = '%s/search/?keyword=%s' %(self.base_link, urllib.quote_plus(search_id)) headers = {'User-Agent': client.agent(), 'Referer': self.base_link} scraper = cfscrape.create_scraper() html = scraper.get(start_url, headers=headers).content match = re.compile('class="ml-item".+?href="(.+?)".+?<b>(.+?)</b>.+?<b>(.+?)</b>.+?alt="(.+?)"',re.DOTALL).findall(html) for item_url1, date, res, name in match: item_url = urlparse.urljoin(self.base_link, item_url1) if not clean_title(search_id) == clean_title(name): continue OPEN = scraper.get(item_url, headers=headers).content Endlinks = re.compile('class="movie_links"><li(.+?)<h3><b class="icon-share-alt"', re.DOTALL).findall(OPEN)[0] links = re.compile('target="_blank" href="(.+?)"', re.DOTALL).findall(Endlinks) for link in links: if not link.startswith('http'): continue count += 1 host = link.split('//')[1].replace('www.', '') host = host.split('/')[0] if not filter_host(host): continue self.sources.append({'source': host, 'quality': res, 'scraper': self.name, 'url': link, 'direct': False}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() scraper = cfscrape.create_scraper() scrape = urllib.quote_plus(title.lower()) start_url = '%s/search.html?keyword=%s' %(self.base_link,scrape) #print 'SEARCH > '+start_url headers = {'User_Agent':User_Agent} html = scraper.get(start_url, headers=headers,timeout=10).content #print html thumbs = re.compile('<ul class="listing items">(.+?)</ul> ',re.DOTALL).findall(html) thumb = re.compile('href="(.+?)".+?alt="(.+?)"',re.DOTALL).findall(str(thumbs)) for link,link_title in thumb: if clean_title(title).lower() == clean_title(link_title).lower(): #print "<<<<<<<<<<<<<link>>>>>>>>>>"+link page_link = self.base_link+link headers = {'User_Agent':User_Agent} holdpage = scraper.get(page_link, headers=headers,timeout=5).content datecheck = re.compile('<span>Release: </span>(.+?)</li>',re.DOTALL).findall(holdpage)[0] if year in datecheck: movie_link = re.compile('<li class="child_episode".+?href="(.+?)"',re.DOTALL).findall(holdpage)[0] movie_link = self.base_link + movie_link #print 'GW >>>'+movie_link self.get_source(movie_link,title,year,'','',start_time) else:pass return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name,argument) return self.sources
def get_source(self,movie_link, title, year, season, episode, start_time): try: print 'passed show '+movie_link headers = {'User_Agent':User_Agent} scraper = cfscrape.create_scraper() html = scraper.get(movie_link,headers=headers,timeout=5).content # grab_id = re.compile('data-ids="(.+?)"',re.DOTALL).findall(html)[0] # nonce = re.compile('ajax_get_video_info":"(.+?)"',re.DOTALL).findall(html)[0] # print grab_id # print nonce # req_post = '%s/wp-admin/admin-ajax.php' %(self.base_link) # headers = {'User-Agent':User_Agent,'Referer':movie_link} # data = {'action':'ajax_get_video_info','ids':grab_id, # 'server':'1','nonce':nonce} # get_links = scraper.post(req_post,headers=headers,data=data,verify=False).content # print get_links links = re.compile('"file":"(.+?)","label":"(.+?)"',re.DOTALL).findall(html) count = 0 for final_url,res in links: final_url = final_url.replace('\\','') if '1080' in res: rez = '1080p' elif '720' in res: rez = '720p' else: rez = 'SD' count +=1 self.sources.append({'source': 'DirectLink','quality': rez,'scraper': self.name,'url': final_url,'direct': True}) if dev_log=='true': end_time = time.time() - start_time send_log(self.name,end_time,count,title,year, season=season,episode=episode) except: pass
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() start_url = self.base_link + self.search_link + title.replace( ' ', '%20') scraper = cfscrape.create_scraper() html = scraper.get(start_url, timeout=10).content match = re.compile( '<div class="video-thumbimg">.+?href="(.+?)".+?title="(.+?)"', re.DOTALL).findall(html) for url, name in match: season_name_check = title.lower().replace(' ', '') name_check = name.replace('-', '').replace(' ', '').lower() check = difflib.SequenceMatcher(a=season_name_check, b=name_check) d = check.ratio() * 100 if int(d) > 80: #print name html2 = scraper.get(self.base_link + url, timeout=10).content final_page_match = re.compile( '<div class="vc_col-sm-8 wpb_column column_container">.+?Released:(.+?)<.+?/series/(.+?)"', re.DOTALL).findall(html2) for release_year, fin_url in final_page_match: release_year = release_year.replace(' ', '') fin_url = self.base_link + '/series/' + fin_url if release_year == year: self.get_sources(fin_url, title, year, '', '', start_time) return self.sources except Exception as argument: if dev_log == 'true': error_log(self.name, argument) return []
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() if not debrid: return [] search_id = clean_search(title.lower()) start_url = "%s/?s=%s+%s" % (self.base_link, search_id.replace(' ', '+'), year) #print start_url headers = {'User_Agent': User_Agent} scraper = cfscrape.create_scraper() OPEN = scraper.get(start_url, headers=headers, timeout=5).content content = re.compile('<h2><a href="(.+?)"', re.DOTALL).findall(OPEN) for url in content: if 'truehd' in url: continue if not clean_title(title).lower() in clean_title(url).lower(): continue #print 'PASS '+url self.get_source(url, title, year, '', '', start_time) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, 'Check Search') return self.sources
def scrape_episode(self,title, show_year, year, season, episode, imdb, tvdb, debrid = False): try: start_time = time.time() scraper = cfscrape.create_scraper() scrape = urllib.quote_plus(title.lower()) start_url = '%s/search.html?keyword=%s' %(self.base_link,scrape) #print 'SEARCH > '+start_url headers = {'User_Agent':User_Agent} html = scraper.get(start_url, headers=headers,timeout=10).content thumbs = re.compile('<ul class="listing items">(.+?)</ul> ',re.DOTALL).findall(html) thumb = re.compile('href="(.+?)".+?alt="(.+?)"',re.DOTALL).findall(str(thumbs)) for link,link_title in thumb: if clean_title(title).lower() in clean_title(link_title).lower(): season_chk = '-season-%s' %season #print 'season chk% '+season_chk if season_chk in link: page_link = self.base_link + link #print 'page_link:::::::::::::: '+page_link headers = {'User_Agent':User_Agent} holdpage = scraper.get(page_link, headers=headers,timeout=5).content series_links = re.compile('<li class="child_episode".+?href="(.+?)"',re.DOTALL).findall(holdpage) for movie_link in series_links: episode_chk = '-episode-%sBOLLOX' %episode spoof_link = movie_link + 'BOLLOX' if episode_chk in spoof_link: movie_link = self.base_link + movie_link #print 'pass TWS episode check: '+movie_link self.get_source(movie_link,title,year,season,episode,start_time) else:pass return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name,argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid = False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = '%s?s=%s' %(self.base_link,search_id.replace(' ','+')) print start_url headers = {'User_Agent':User_Agent} scraper = cfscrape.create_scraper() html = scraper.get(start_url,headers=headers, timeout=5).content # print html match = re.compile('<div class="result-item">.+?href="(.+?)".+?alt="(.+?)".+?class="year">(.+?)</span>',re.DOTALL).findall(html) for item_url,name,yrs in match: #print item_url #print name ##print yrs if clean_title(search_id).lower() == clean_title(name).lower(): if year in yrs: print 'pass me '+item_url self.get_source(item_url,title,year,start_time) return self.sources except Exception, argument: print argument if dev_log == 'true': error_log(self.name,argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = '%s/?s=%s' % (self.base_link, search_id.replace(' ', '+')) headers = {'User_Agent': User_Agent} scraper = cfscrape.create_scraper() html = scraper.get(start_url, headers=headers, timeout=5).content Regex = re.compile( 'class="result-item".+?href="(.+?)".+?alt="(.+?)"', re.DOTALL).findall(html) for item_url, name in Regex: if not clean_title(title).lower() == clean_title(name).lower(): continue if not year in name: continue movie_link = item_url print 'Grabbed movie url to pass > ' + movie_link self.get_source(movie_link, title, year, '', '', start_time) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def get_source(self, item_url, title, year, season, episode, start_time): try: count = 0 headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36', 'Referer': self.base_link} scraper = cfscrape.create_scraper() r = scraper.get(item_url, headers=headers).content data = client.parseDOM(r, 'tr') for item in data: qual = client.parseDOM(item, 'span', ret='class')[0] qual = qual.replace('quality_', '') link = client.parseDOM(item, 'a', ret='data-href')[0] host = link.split('//')[1].replace('www.', '') host = host.split('/')[0] if not filter_host(host): continue count += 1 self.sources.append({'source': host, 'quality': qual, 'scraper': self.name, 'url': link, 'direct': False}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season='', episode='') return self.sources except BaseException: return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() if not debrid: return [] search_id = clean_search(title.lower()) start_url = "%s/?s=%s+%s" % (self.base_link, search_id.replace(' ','+'),year) #print start_url headers = {'User_Agent':User_Agent} scraper = cfscrape.create_scraper() OPEN = scraper.get(start_url,headers=headers,timeout=5).content content = re.compile('<h2><a href="(.+?)"',re.DOTALL).findall(OPEN) for url in content: if 'truehd' in url: continue if not clean_title(title).lower() in clean_title(url).lower(): continue #print 'PASS '+url self.get_source(url,title,year,'','',start_time) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name,'Check Search') return self.sources
def get_source(self,movie_link, title, year, season, episode, start_time): try: #print 'passed show '+movie_link scraper = cfscrape.create_scraper() html = scraper.get(movie_link).content links = re.compile('data-lazy-src="(.+?)"',re.DOTALL).findall(html) count = 0 for link in links: if 'youtube' not in link: if '1080p' in link: qual = '1080p' elif '720p' in link: qual='720p' else: qual='SD' host = link.split('//')[1].replace('www.','') host = host.split('/')[0].split('.')[0].title() count +=1 self.sources.append({'source': host,'quality': qual,'scraper': self.name,'url': link,'direct': False}) if dev_log=='true': end_time = time.time() - start_time send_log(self.name,end_time,count,title,year, season=season,episode=episode) except: pass
def scrape_episode(self,title, show_year, year, season, episode, imdb, tvdb, debrid = False): try: start_time = time.time() if not debrid: return [] season_url = "0%s"%season if len(season)<2 else season episode_url = "0%s"%episode if len(episode)<2 else episode sea_epi ='s%se%s'%(season_url,episode_url) search_id = clean_search(title.lower()) start_url = "%s/?s=%s+%s" % (self.base_link, search_id.replace(' ','+'),sea_epi) print start_url headers = {'User_Agent':User_Agent} scraper = cfscrape.create_scraper() OPEN = scraper.get(start_url,headers=headers,timeout=5).content content = re.compile('<h2><a href="(.+?)"',re.DOTALL).findall(OPEN) for url in content: if not clean_title(title).lower() in clean_title(url).lower(): continue #print 'PASS '+url self.get_source(url,title,year,season,episode,start_time) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name,'Check Search') return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = '%s %s' % (clean_search(title.lower()), year) start_url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(search_id)) headers={'User-Agent': client.agent()} scraper = cfscrape.create_scraper() html = scraper.get(start_url, headers=headers).content #xbmc.log('@#@DATA:%s' % html, xbmc.LOGNOTICE) data = client.parseDOM(html, 'div', attrs={'id': 'content_box'})[0] data = client.parseDOM(data, 'h2') #returns a list with all search results data = [dom_parser.parse_dom(i, 'a', req=['href', 'title'])[0] for i in data if i] #scraping url-title links = [(i.attrs['href'], i.attrs['title']) for i in data if i] #list with link-title for each result #links = re.compile('<header>.+?href="(.+?)" title="(.+?)"',re.DOTALL).findall(html) #xbmc.log('@#@LINKS:%s' % links, xbmc.LOGNOTICE) for m_url, m_title in links: movie_year = re.findall("(\d{4})", re.sub('\d{3,4}p', '', m_title))[-1] movie_name = m_title.split(movie_year)[0] if not clean_title(title) == clean_title(movie_name): continue if not year in movie_year: continue url = m_url #error_log(self.name + ' Pass',url) self.get_source(url, title, year, '', '', start_time) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name,argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = '%s/?s=%s' % (self.base_link, search_id.replace(' ', '+')) #print 'search>>>'+start_url headers = {'User_Agent': User_Agent} scraper = cfscrape.create_scraper() html = scraper.get(start_url, headers=headers, timeout=5).content Regex = re.compile( 'id="mt-.+?href="(.+?)".+?class="tt">(.+?)</span>', re.DOTALL).findall(html) for item_url, item_name in Regex: if not clean_title(title).lower() == clean_title( item_name).lower(): continue if not year in item_name: continue #print item_url self.get_source(item_url, title, year, '', '', start_time) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = urllib.quote_plus(clean_search(title)) query = self.search_url % search_id headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36', 'Referer': self.base_link } scraper = cfscrape.create_scraper() r = scraper.get(query, headers=headers).content posts = client.parseDOM( r, 'div', attrs={'class': 'col-xs-4 col-sm-4 col-md-3 col-lg-3'}) posts = [ dom.parse_dom(i, 'a', req='href')[0] for i in posts if year in i ] post = [ i.attrs['href'] for i in posts if clean_title(title) == clean_title(i.attrs['title']) ][0] self.get_source(post, title, year, '', '', start_time) except Exception, argument: if dev_log == 'true': error_log(self.name, 'Check Search') return self.sources
def get_source(self,url, title, year, season, episode, start_time): try: headers = {'User_Agent':User_Agent} scraper = cfscrape.create_scraper() links = scraper.get(url,headers=headers,timeout=3).content Regex = re.compile('<singlelink>(.+?)</strong><br',re.DOTALL).findall(links) LINK = re.compile('href="([^"]+)"',re.DOTALL).findall(str(Regex)) count = 0 for url in LINK: if '.rar' not in url: if '.srt' not in url: if '1080' in url: res = '1080p' elif '720' in url: res = '720p' elif 'HDTV' in url: res = 'DVD' else: pass host = url.split('//')[1].replace('www.','') host = host.split('/')[0].lower() # if not filter_host(host): # continue # if debrid == "true": rd_domains = get_rd_domains() if host in rd_domains: count +=1 self.sources.append({'source': host,'quality': res,'scraper': self.name,'url': url,'direct': False, 'debridonly': True}) if dev_log=='true': end_time = time.time() - start_time send_log(self.name,end_time,count,title,year, season=season,episode=episode) except:pass
def get_source(self,item_url,title,year,start_time,res): try: #print 'PASSEDURL >>>>>>'+item_url count = 0 scraper = cfscrape.create_scraper() headers={'User-Agent':random_agent()} OPEN = scraper.get(item_url,headers=headers,timeout=5).content Endlinks = re.compile('TrvideoFirst\">(.+?)</div>',re.DOTALL).findall(OPEN) #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks) for link2 in Endlinks: #print 'scraperchk - scrape_movie - link: '+str(link2) link1=base64.b64decode(link2) #print link1+'decoded?????????????????????????????????????????' Endlink =link1.split('src=')[1].split('allowfullscreen')[0].replace('"','').rstrip() #print Endlink +'<<<<<<<<<<<<<<<<endlink>>>>>>>>>>>>>>' if 'openlinks' in Endlink: headers= {'User-Agent':random_agent()} OPEN = requests.get(Endlink,headers=headers,timeout=5,allow_redirects=True).content finalurl = re.compile('url\" content="(.+?)">',re.DOTALL).findall(OPEN) for link in finalurl: #print '===================================='+link count+=1 host = link.split('//')[1].replace('www.','') host = host.split('/')[0].split('.')[0].title() self.sources.append({'source':host, 'quality':res, 'scraper':self.name, 'url':link, 'direct':False}) if dev_log=='true': end_time = time.time() - start_time send_log(self.name,end_time,count,title,year) except Exception, argument: if dev_log=='true': error_log(self.name,argument) return[]
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False): try: start_time = time.time() if not debrid: return [] season_url = "0%s" % season if len(season) < 2 else season episode_url = "0%s" % episode if len(episode) < 2 else episode sea_epi = 's%se%s' % (season_url, episode_url) start_url = "%s/?s=%s+%s" % (self.base_link, title.replace( ' ', '+').lower(), sea_epi) headers = {'User_Agent': User_Agent} scraper = cfscrape.create_scraper() OPEN = scraper.get(start_url, headers=headers, timeout=5).content content = re.compile('<h2><a href="([^"]+)"', re.DOTALL).findall(OPEN) for url in content: if not clean_title(title).lower() in clean_title(url).lower(): continue self.get_source(url, title, year, season, episode, start_time) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = '%s?s=%s' % (self.base_link, search_id.replace( ' ', '+')) #print 'scraperchk - scrape_movie - start_url: ' + start_url headers = {'User-Agent': random_agent()} scraper = cfscrape.create_scraper() html = scraper.get(start_url, headers=headers, timeout=5).content match = re.compile( '<li class="TPostMv".+?class="TPMvCn">.+?<a href="(.+?)"><div class="Title">(.+?)</div></a>.+?class="Date">(.+?)</span><span class="Qlty">(.+?)</span>', re.DOTALL).findall(html) for item_url, name, date, res in match: #print 'scraperchk - scrape_movie - name: '+name+ ' '+date #print 'scraperchk - scrape_movie - item_url: '+item_url+' '+res if year in date: if clean_title(search_id).lower() == clean_title( name).lower(): #print 'scraperchk - scrape_movie - Send this URL: ' + item_url self.get_source(item_url, title, year, start_time, res) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument)
def get_source(self, item_url, title, year, start_time): # print item_url try: count = 0 headers = {'User_Agent': User_Agent} scraper = cfscrape.create_scraper() OPEN = scraper.get(item_url, headers=headers, timeout=10).content # print OPEN # match = re.compile('<iframe.+?src="(.+?)"',re.DOTALL).findall(OPEN) # for link in match: # if 'youtube' not in link: # host = link.split('//')[1].replace('www.','') # host = host.split('/')[0].lower() # host = host.split('.')[0] # count+=1 # self.sources.append({'source': host, 'quality': 'SD', 'scraper': self.name, 'url': link,'direct': False}) match2 = re.compile('href="(https://tvmoviestream.me/links/.+?)"', re.DOTALL).findall(OPEN) for altlink in match2: # print altlink headers = {'User-Agent': User_Agent} r = requests.get(altlink, headers=headers, allow_redirects=False) final_url = r.headers['location'] host = final_url.split('//')[1].replace('www.', '') host = host.split('/')[0].lower() host = host.split('.')[0] if '1080' in final_url: res = '1080p' elif '720' in final_url: res = '720p' else: res = 'SD' count += 1 self.sources.append({ 'source': host, 'quality': res, 'scraper': self.name, 'url': final_url, 'direct': False }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except Exception, argument: print argument if dev_log == 'true': error_log(self.name, argument) return self.sources
def get_source(self, url, title, year, season, episode, start_time): try: headers = {'User_Agent': User_Agent} scraper = cfscrape.create_scraper() links = scraper.get(url, headers=headers, timeout=3).content LINK = re.compile('href="([^"]+)" rel="nofollow"', re.DOTALL).findall(links) count = 0 for url in LINK: if '.rar' not in url: if '.srt' not in url: if '1080' in url: res = '1080p' elif '720' in url: res = '720p' elif 'HDTV' in url: res = 'HD' else: res = "SD" host = url.split('//')[1].replace('www.', '') host = host.split('/')[0].lower() # if not filter_host(host): # continue # if debrid == "true": rd_domains = get_rd_domains() if host in rd_domains: count += 1 self.sources.append({ 'source': host, 'quality': res, 'scraper': self.name, 'url': url, 'direct': False, 'debridonly': True }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season=season, episode=episode) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid = False): try: start_time = time.time() mock_ID = clean_search(title.lower()) #print mock_ID loop_url = ['online-free','for-free','online-free-movies','free',''] for attempt in loop_url: movie_url = '%s/film/watch-%s-%s' %(self.base_link,mock_ID.replace(' ','-'),attempt) if movie_url.endswith('-'): movie_url=movie_url.replace('watch-','')[:-1] #print 'allurls '+movie_url headers={'User-Agent':User_Agent} scraper = cfscrape.create_scraper() html = scraper.get(movie_url,headers=headers,timeout=5).content #print 'PAGE > '+html match = re.compile('name="title" value="(.+?)"',re.DOTALL).findall(html) for item_title in match: # print item_title if not clean_title(title.lower()) == clean_title(item_title.lower()): continue # print 'clean321movie pass '+ movie_url Regex = re.compile('</iframe>.+?class="metaframe rptss" src="(.+?)"',re.DOTALL).findall(html) count = 0 for link in Regex: host = link.split('//')[1].replace('www.','') host = host.split('/')[0].split('.')[0].title() if 'streamango.com' in link: holder = requests.get(link).content qual = re.compile('type:"video/mp4".+?height:(.+?),',re.DOTALL).findall(holder)[0] count +=1 self.sources.append({'source': host, 'quality': qual + 'p', 'scraper': self.name, 'url': link,'direct': False}) elif 'goo.gl' in link: headers = {'User-Agent': User_Agent} r = requests.get(link,headers=headers,allow_redirects=False) link = r.headers['location'] count +=1 self.sources.append({'source': 'Waaw', 'quality': '720p', 'scraper': self.name, 'url': link,'direct': False}) else: count +=1 self.sources.append({'source': host, 'quality': '720p', 'scraper': self.name, 'url': link,'direct': False}) if dev_log=='true': end_time = time.time() - start_time send_log(self.name,end_time,count,title,year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name,argument) return self.sources
def get_source(self, movie_link, title, year, season, episode, start_time): try: print 'passed show ' + movie_link headers = {'User_Agent': User_Agent} scraper = cfscrape.create_scraper() html = scraper.get(movie_link, headers=headers, timeout=5).content # grab_id = re.compile('data-ids="(.+?)"',re.DOTALL).findall(html)[0] # nonce = re.compile('ajax_get_video_info":"(.+?)"',re.DOTALL).findall(html)[0] # print grab_id # print nonce # req_post = '%s/wp-admin/admin-ajax.php' %(self.base_link) # headers = {'User-Agent':User_Agent,'Referer':movie_link} # data = {'action':'ajax_get_video_info','ids':grab_id, # 'server':'1','nonce':nonce} # get_links = scraper.post(req_post,headers=headers,data=data,verify=False).content # print get_links links = re.compile('"file":"(.+?)","label":"(.+?)"', re.DOTALL).findall(html) count = 0 for final_url, res in links: final_url = final_url.replace('\\', '') if '1080' in res: rez = '1080p' elif '720' in res: rez = '720p' else: rez = 'SD' count += 1 self.sources.append({ 'source': 'DirectLink', 'quality': rez, 'scraper': self.name, 'url': final_url, 'direct': True }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season=season, episode=episode) except: pass
def _get_items(self, url): try: headers = {'User-Agent': client.agent()} scraper = cfscrape.create_scraper() r = scraper.get(url, headers=headers) #r = client.request(url, headers=headers) posts = client.parseDOM(r.content, 'tbody')[0] posts = client.parseDOM(posts, 'tr') for post in posts: data = dom.parse_dom(post, 'a', req='href')[1] link = urlparse.urljoin(self.base_link, data.attrs['href']) name = data.content t = name.split(self.hdlr)[0] if not clean_title(re.sub('(|)', '', t)) == clean_title( self.title): continue try: y = re.findall( '[\.|\(|\[|\s|\_|\-](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_|\-]', name, re.I)[-1].upper() except BaseException: y = re.findall( '[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s\_|\-]', name, re.I)[-1].upper() if not y == self.hdlr: continue try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except BaseException: size = '0' self.items.append((name, link, size)) return self.items except BaseException: return self.items
def get_source(self,item_url, title, year, start_time): # print item_url try: count = 0 headers = {'User_Agent':User_Agent} scraper = cfscrape.create_scraper() OPEN = scraper.get(item_url,headers=headers,timeout=10).content # print OPEN # match = re.compile('<iframe.+?src="(.+?)"',re.DOTALL).findall(OPEN) # for link in match: # if 'youtube' not in link: # host = link.split('//')[1].replace('www.','') # host = host.split('/')[0].lower() # host = host.split('.')[0] # count+=1 # self.sources.append({'source': host, 'quality': 'SD', 'scraper': self.name, 'url': link,'direct': False}) match2 = re.compile('href="(https://tvmoviestream.me/links/.+?)"',re.DOTALL).findall(OPEN) for altlink in match2: # print altlink headers = {'User-Agent': User_Agent} r = requests.get(altlink,headers=headers,allow_redirects=False) final_url = r.headers['location'] host = final_url.split('//')[1].replace('www.','') host = host.split('/')[0].lower() host = host.split('.')[0] if '1080' in final_url: res = '1080p' elif '720' in final_url: res = '720p' else: res='SD' count += 1 self.sources.append({'source': host, 'quality': res, 'scraper': self.name, 'url': final_url,'direct': False}) if dev_log=='true': end_time = time.time() - start_time send_log(self.name,end_time,count,title,year) except Exception, argument: print argument if dev_log == 'true': error_log(self.name,argument) return self.sources
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False): try: start_time = time.time() start_url = self.base_link + self.search_link + title.replace( ' ', '%20') + '%20season%20' + season #print start_url scraper = cfscrape.create_scraper() html = scraper.get(start_url, timeout=10).content match = re.compile( '<div class="video-thumbimg">.+?href="(.+?)".+?title="(.+?)"', re.DOTALL).findall(html) for url, name in match: season_name_check = title.lower().replace( ' ', '') + 'season' + season name_check = name.replace('-', '').replace(' ', '').lower() check = difflib.SequenceMatcher(a=season_name_check, b=name_check) d = check.ratio() * 100 if int(d) > 80: html2 = scraper.get(self.base_link + url + '/season', timeout=10).content episodes = re.findall( '<div class="video_container">.+?<a href="(.+?)" class="view_more"></a></div>.+?class="videoHname"><b>(.+?)</b></a></span>.+?<div class="video_date icon-calendar">.+?, (.+?)</div>', html2, re.DOTALL) for url2, ep_no, aired_year in episodes: url2 = self.base_link + url2 ep_no = ep_no.replace('Episode ', '').replace(':', '') if ep_no == episode: #print url2 self.get_sources(url2, title, year, season, episode, start_time) return self.sources except Exception as argument: if dev_log == 'true': error_log(self.name, argument) return []
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() if not debrid: return self.sources start_url = "%s/?s=%s" % (self.base_link, title.replace(' ', '+').lower()) headers = {'User-Agent': client.agent()} scraper = cfscrape.create_scraper() OPEN = scraper.get(start_url, headers=headers).content content = re.compile('<h2><a href="(.+?)"', re.DOTALL).findall(OPEN) for url in content: self.get_source(url, title, year, '', '', start_time) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def get_source(self, item_url, title, year, start_time, res): try: #print 'PASSEDURL >>>>>>'+item_url count = 0 scraper = cfscrape.create_scraper() headers = {'User-Agent': random_agent()} OPEN = scraper.get(item_url, headers=headers, timeout=5).content Endlinks = re.compile('TrvideoFirst\">(.+?)</div>', re.DOTALL).findall(OPEN) #print 'scraperchk - scrape_movie - EndLinks: '+str(Endlinks) for link2 in Endlinks: #print 'scraperchk - scrape_movie - link: '+str(link2) link1 = base64.b64decode(link2) #print link1+'decoded?????????????????????????????????????????' Endlink = link1.split('src=')[1].split( 'allowfullscreen')[0].replace('"', '').rstrip() #print Endlink +'<<<<<<<<<<<<<<<<endlink>>>>>>>>>>>>>>' if 'openlinks' in Endlink: headers = {'User-Agent': random_agent()} OPEN = requests.get(Endlink, headers=headers, timeout=5, allow_redirects=True).content finalurl = re.compile('url\" content="(.+?)">', re.DOTALL).findall(OPEN) for link in finalurl: #print '===================================='+link count += 1 host = link.split('//')[1].replace('www.', '') host = host.split('/')[0].split('.')[0].title() self.sources.append({ 'source': host, 'quality': res, 'scraper': self.name, 'url': link, 'direct': False }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return []
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() if not debrid: return self.sources query = '%s %s' % (title, year) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) query = urllib.quote_plus(query).replace('+', '%2B') url = urlparse.urljoin(self.search_base_link, self.search_link % query) headers = {'User-Agent': client.agent(), 'Referer': self.base_link} scraper = cfscrape.create_scraper() r = scraper.get(url, headers=headers).content posts = json.loads(r)['results'] posts = [(i['post_title'], i['post_name']) for i in posts] posts = [(i[0], i[1]) for i in posts if clean_title( i[0].lower().split(year)[0]) == clean_title(title)] filter = [ 'uhd', '4K', '2160', '1080', '720', 'hevc', 'bluray', 'web' ] posts = [(urlparse.urljoin(self.base_link, i[1]), year) for i in posts if any(x in i[1] for x in filter)] threads = [] for i in posts: threads.append( workers.Thread(self.get_sources, i, title, year, '', '', str(start_time))) [i.start() for i in threads] alive = [x for x in threads if x.is_alive() is True] while alive: alive = [x for x in threads if x.is_alive() is True] time.sleep(0.1) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid = False): try: start_time = time.time() movie_id= clean_search(title.lower().replace(' ', '-')) show_url = '%s/episodes/%s-%sx%s' %(self.base_link,movie_id,season,episode) #print '321tv url> '+show_url headers={'User-Agent':User_Agent} scraper = cfscrape.create_scraper() html = scraper.get(show_url,headers=headers,timeout=5).content match = re.compile('class="metaframe rptss" src="(.+?)"').findall(html) count = 0 for link in match: host = link.split('//')[1].replace('www.','') host = host.split('/')[0].split('.')[0].title() if 'streamango.com' in link: holder = requests.get(link).content qual = re.compile('type:"video/mp4".+?height:(.+?),',re.DOTALL).findall(holder)[0] count +=1 self.sources.append({'source': host, 'quality': qual+'p', 'scraper': self.name, 'url': link,'direct': False}) elif 'goo.gl' in link: headers = {'User-Agent': User_Agent} r = requests.get(link,headers=headers,allow_redirects=False) link = r.headers['location'] count +=1 self.sources.append({'source': 'Waaw', 'quality': '720p', 'scraper': self.name, 'url': link,'direct': False}) else: count +=1 self.sources.append({'source': host, 'quality': '720p', 'scraper': self.name, 'url': link,'direct': False}) if dev_log=='true': end_time = time.time() - start_time send_log(self.name,end_time,count,title,year, season=season,episode=episode) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name,argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid = False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = '%s?s=%s' %(self.base_link,search_id.replace(' ','+')) #print 'scraperchk - scrape_movie - start_url: ' + start_url headers={'User-Agent':random_agent()} scraper = cfscrape.create_scraper() html = scraper.get(start_url,headers=headers,timeout=5).content match = re.compile('<li class="TPostMv".+?class="TPMvCn">.+?<a href="(.+?)"><div class="Title">(.+?)</div></a>.+?class="Date">(.+?)</span><span class="Qlty">(.+?)</span>',re.DOTALL).findall(html) for item_url, name, date, res in match: #print 'scraperchk - scrape_movie - name: '+name+ ' '+date #print 'scraperchk - scrape_movie - item_url: '+item_url+' '+res if year in date: if clean_title(search_id).lower() == clean_title(name).lower(): #print 'scraperchk - scrape_movie - Send this URL: ' + item_url self.get_source(item_url,title,year,start_time,res) return self.sources except Exception, argument: if dev_log=='true': error_log(self.name,argument)
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid = False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = '%s/?s=%s' %(self.base_link,search_id.replace(' ','+')) headers = {'User_Agent':User_Agent} scraper = cfscrape.create_scraper() html = scraper.get(start_url,headers=headers,timeout=5).content Regex = re.compile('class="result-item".+?href="(.+?)".+?alt="(.+?)"',re.DOTALL).findall(html) for item_url,name in Regex: if not clean_title(title).lower() == clean_title(name).lower(): continue if "/series/" in item_url: movie_link = item_url[:-1].replace('/series/','/episodes/')+'-%sx%s/'%(season,episode) #print 'Grabbed Showpass url to pass > ' + movie_link self.get_source(movie_link,title,year,season,episode,start_time) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name,argument) return self.sources
def get_source(self,url, title, year, season, episode, start_time): try: scraper = cfscrape.create_scraper() headers = {'Origin': 'http://hdpopcorns.com', 'Referer': url, 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': client.agent()} count = 0 data = scraper.get(url, headers=headers).content data = client.parseDOM(data, 'div', attrs={'class': 'thecontent'})[0] FN720p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileName720p'})[0] FS720p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileSize720p'})[0] FSID720p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FSID720p'})[0] FN1080p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileName1080p'})[0] FS1080p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileSize1080p'})[0] FSID1080p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FSID1080p'})[0] post = {'FileName720p': FN720p, 'FileSize720p': FS720p, 'FSID720p': FSID720p, 'FileName1080p': FN1080p, 'FileSize1080p': FS1080p, 'FSID1080p': FSID1080p, 'x': 173, 'y': 22} data = scraper.post('%s/select-movie-quality.php' % self.base_link, data=post).content data = client.parseDOM(data, 'div', attrs={'id': 'btn_\d+p'}) u = [client.parseDOM(i, 'a', ret='href')[0] for i in data] for url in u: quality, info = quality_tags.get_release_quality(url, url) url = client.replaceHTMLCodes(url) url = url.encode('utf-8') count += 1 self.sources.append( {'source': 'DirectLink', 'quality': quality, 'scraper': self.name, 'url': url, 'direct': True}) if dev_log=='true': end_time = time.time() - start_time send_log(self.name,end_time,count,title,year, season=season,episode=episode) except: pass #hdpopcorn().scrape_movie('Blade Runner 2049', '2017', '', False) title contains 2 years #hdpopcorn().scrape_movie('Deadpool 2', '2018', '', False) title contains number
def get_source(self, movie_link, title, year, season, episode, start_time): try: #print 'passed show '+movie_link scraper = cfscrape.create_scraper() html = scraper.get(movie_link).content links = re.compile('data-lazy-src="(.+?)"', re.DOTALL).findall(html) count = 0 for link in links: if 'youtube' not in link: if '1080p' in link: qual = '1080p' elif '720p' in link: qual = '720p' else: qual = 'SD' host = link.split('//')[1].replace('www.', '') host = host.split('/')[0].split('.')[0].title() count += 1 self.sources.append({ 'source': host, 'quality': qual, 'scraper': self.name, 'url': link, 'direct': False }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season=season, episode=episode) except: pass
def __init__(self): self.base_link = 'https://321movies.cc' self.scraper = cfscrape.create_scraper() if dev_log == 'true': self.start_time = time.time()
def get_source(self,movie_link,title,year,season,episode,start_time): print '###'+movie_link try: scraper = cfscrape.create_scraper() html = scraper.get(movie_link).content links = re.compile('data-video="(.+?)"',re.DOTALL).findall(html) count = 0 for link in links: print '::::::::::::::::::::::final link> ' + link if 'vidnode.net' in link: if not 'load.php' in link: continue link = 'http:'+link page = requests.get(link,timeout=10).content try: grab = re.compile("sources.+?file: '(.+?)',label: '(.+?)'",re.DOTALL).findall(page) for end_link,rez in grab: if '1080' in rez: res = '1080p' elif '720' in rez: res= '720p' else: res = 'unknown' count +=1 self.sources.append({'source': 'Vidnode','quality': res,'scraper': self.name,'url': end_link,'direct': True}) except:pass # vid_url = re.compile("sources.+?file: '(.+?)'",re.DOTALL).findall(page)[0] # vid_url = 'http:'+vid_url # #count +=1 # self.sources.append({'source': 'GoogleLink','quality': '720p','scraper': self.name,'url': vid_url,'direct': True}) elif 'openload' in link: try: chk = requests.get(link).content rez = re.compile('"description" content="(.+?)"',re.DOTALL).findall(chk)[0] if '1080' in rez: res='1080p' elif '720' in rez: res='720p' else: res ='DVD' except: res = 'DVD' count +=1 self.sources.append({'source': 'Openload', 'quality': res, 'scraper': self.name, 'url': link,'direct': False}) elif 'streamango.com' in link: get_res=requests.get(link).content try: res = re.compile('{type:"video/mp4".+?height:(.+?),',re.DOTALL).findall(get_res)[0] count +=1 self.sources.append({'source': 'Streamango', 'quality': res, 'scraper': self.name, 'url': link,'direct': False}) except: pass else: host = link.split('//')[1].replace('www.','') host = host.split('/')[0].split('.')[0].title() count +=1 self.sources.append({'source': host,'quality': 'DVD','scraper': self.name,'url': link,'direct': False}) if dev_log=='true': end_time = time.time() - start_time send_log(self.name,end_time,count,title,year, season=season,episode=episode) except Exception, argument: if dev_log == 'true': error_log(self.name,argument) return self.sources
def __init__(self): self.base_link = 'http://oceanofmovies.de' self.scraper = cfscrape.create_scraper()
def __init__(self): self.base_link = 'http://123movieshubz.com' self.search_link = '/watch/%s-%s-online-123movies.html' self.scraper = cfscrape.create_scraper()
def scrape_movie(self, x, y, z, debrid=False): try: count = 0 start_headache = time.time() scraper = cfscrape.create_scraper() e = clean_search(x.lower()) l = '%s/?s=%s+%s' % (self.i, e.replace(' ', '+'), y) # print l i = scraper.get(l, headers=c, timeout=5).content f = re.findall( '<div class="thumbnail animation-2">.+?href="(.+?)">.+?alt="(.+?)"', i) for e, l in f: d, g = re.findall('(.+?)\((.+?)\)', str(l))[0] if clean_title(d) == clean_title(x) and g == y: # print e q = scraper.get(e).content r = re.findall( '<iframe class="metaframe rptss" src="(.+?)"', q)[0] t = scraper.get(r).content u = re.findall( "var tc = '(.+?)'.+?url: \"(.+?)\".+?\"_token\": \"(.+?)\".+?function.+?\(s\)(.+?)</script>", t, re.DOTALL) for v, w, xoxo, yoyo in u: o = self.get_x_token(v, yoyo) p = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0', 'Host': 'gomostream.com', 'Referer': r, 'x-token': o } g = {'tokenCode': v, '_token': xoxo} l = b.post(w, headers=p, data=g).json() for a in l: if 'putvid' in a: z = scraper.get(a).content enjoy = re.findall( "<script type='text/javascript'>(.+?)</script>", z, re.DOTALL)[0] enjoy = unpack(enjoy) enjoy = re.findall('sources:\["(.+?)"', str(enjoy))[0] count += 1 self.headache.append({ 'source': 'Putvid', 'quality': 'Unknown - Probably good', 'scraper': self.name, 'url': enjoy, 'direct': False }) elif 'openload' in a or 'streamango' in a: if 'openload' in a: something = 'Openload' elif 'streamango' in a: something = 'Streamango' z = scraper.get(a).content fun = re.findall( '"description" content="(.+?)"', z)[0] if '1080p' in fun: somestuff = '1080p' elif '720p' in fun: somestuff = '720p' else: somestuff = 'SD' count += 1 self.headache.append({ 'source': something, 'quality': somestuff, 'scraper': self.name, 'url': a, 'direct': False }) if dev_log == 'true': end_it_all = time.time() - start_headache send_log(self.name, end_it_all, count, x, y) return self.headache except Exception, argument: print argument if dev_log == 'true': error_log(self.name, argument) return self.sources
def __init__(self): self.base_link = 'https://openloadmovie.me' self.scraper = cfscrape.create_scraper() if dev_log == 'true': self.start_time = time.time()
def __init__(self): self.base_link = 'http://extramovies.cc' self.scraper = cfscrape.create_scraper()
def __init__(self): self.base_link = 'http://2ddl.io' self.sources = [] self.scraper = cfscrape.create_scraper()