def get_source(self, item_url, qual, title, year, start_time): try: print 'hi' count = 0 headers = {'User-Agent': random_agent()} OPEN = requests.get(item_url, headers=headers, timeout=5).content Endlinks = re.compile( 'class="entry-content">.+?<iframe src="(.+?)".+?</iframe>', re.DOTALL).findall(OPEN) for link in Endlinks: headers = { 'User-Agent': random_agent(), 'Host': 'consistent.stream', 'Referer': item_url } consistant = requests.get(link, headers=headers, timeout=5).content print consistant consistant = self.replace_html_entities(consistant) final_links = re.compile('"src":"(.+?)"', re.DOTALL).findall(consistant) print str(final_links) for links in final_links: print links irl = 'irl-' gool = 'googleusercontent' if irl not in links: if gool not in links: links = links.replace('\\', '') host = links.split('//')[1].replace('www.', '') hostname = host.split('/')[0].split('.')[0].title() if 'mentor' in links: links = links + '|' + 'User-Agent=Mozilla/5.0 (Windows NT 6.3; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0&Referer=' + link count += 1 self.sources.append({ 'source': hostname, 'quality': qual, 'scraper': self.name, 'url': links, 'direct': False }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except Exception, argument: print argument if dev_log == 'true': error_log(self.name, argument)
def get(url, check, headers=None, data=None): if headers is None: headers = { 'User-Agent': random_agent(), } try: request = urllib2.Request(url, headers=headers, data=data) html = urllib2.urlopen(request, timeout=10).read() if check in str(html): return html except: pass try: new_url = get_proxy_url() % urllib.quote_plus(url) headers['Referer'] = 'http://%s/' % urlparse.urlparse(new_url).netloc request = urllib2.Request(new_url, headers=headers) response = urllib2.urlopen(request, timeout=10) html = response.read() response.close() if check in html: return html except: pass try: new_url = get_proxy_url() % urllib.quote_plus(url) headers['Referer'] = 'http://%s/' % urlparse.urlparse(new_url).netloc request = urllib2.Request(new_url, headers=headers) html = urllib2.urlopen(request, timeout=10).read() if check in html: return html except: pass return
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() get_link = self.base_link + '%s/' % (imdb) headers = {'User-Agent': random_agent(), 'referrer': get_link} data = {'tmp_chk': '1'} html = requests.post(get_link, headers=headers, data=data, verify=False, timeout=5).content #print html try: link = re.compile('<iframe src="(.+?)"', re.DOTALL).findall(html)[0] except: link = '' #print link count = 0 if link != '': try: chk = requests.get(link).content rez = re.compile('"description" content="(.+?)"', re.DOTALL).findall(chk)[0] if '1080' in rez: res = '1080p' elif '720' in rez: res = '720p' else: res = 'DVD' except: res = 'DVD' count += 1 if 'http' not in link: link = 'http:' + link self.sources.append({ 'source': 'Openload', 'quality': res, 'scraper': self.name, 'url': link, 'direct': False }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def get_raw(url, headers=None, data=None): if headers is None: headers = { 'User-Agent': random_agent(), } try: new_url = get_proxy_url() % urllib.quote_plus(url) headers['Referer'] = 'http://%s/' % urlparse.urlparse(new_url).netloc request = urllib2.Request(new_url, headers=headers) response = urllib2.urlopen(request, timeout=10) return response except: pass
def get_source(self,item_url,qual,title,year,start_time): try: #print item_url #print 'hi' count = 0 headers={'User-Agent':random_agent()} OPEN = session.get(item_url,headers=headers,timeout=5).content Endlinks = re.compile('class="entry-content">.+?<iframe src="(.+?)".+?</iframe>',re.DOTALL).findall(OPEN) for link in Endlinks: headers={'User-Agent':user_agent, 'Host':'consistent.stream', 'Referer':item_url} #print link consistant = session.get(link,headers=headers,timeout=5).content video_, Hash = re.findall('video="(.+?)" hash="(.+?)"',consistant)[0] api_url = 'https://consistent.stream/api/getVideo' headers={'User-Agent':user_agent, 'Host':'consistent.stream','Referer': 'https://consistent.stream/titles/'+video_} #print video_ #print Hash data = {'key':Hash,'referrer':item_url,'video':video_} api_html = session.post(api_url,headers=headers,data=data).content #print str(api_html) #### trying to get response from api_url but being a nob #consistant = self.replace_html_entities(consistant) final_links = re.compile('"src":"(.+?)"',re.DOTALL).findall(api_html) #print str(final_links) for links in final_links: irl= 'irl-' gool = 'googleusercontent' if irl not in links: if gool not in links: links = links.replace('\\','') host = links.split('//')[1].replace('www.','') hostname = host.split('/')[0].split('.')[0].title() if 'mentor' in links: links = links +'|'+'User-Agent=Mozilla/5.0 (Windows NT 6.3; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0&Referer='+link count += 1 self.sources.append({'source': hostname, 'quality': qual, 'scraper': self.name, 'url': links,'direct': False}) else: count += 1 self.sources.append({'source': hostname, 'quality': qual, 'scraper': self.name, 'url': links,'direct': False}) if dev_log=='true': end_time = time.time() - start_time send_log(self.name,end_time,count,title,year) except Exception, argument: #print argument if dev_log == 'true': error_log(self.name,argument)
def get_source(self, item_url, title, year, start_time, season, episode): try: headers = {'User-Agent': random_agent()} OPEN = requests.get(item_url, headers=headers, timeout=5).content Endlinks = re.compile('<IFRAME SRC="(.+?)"', re.DOTALL).findall(OPEN) count = 0 for link in Endlinks: print link + '::::::::::::::::::::::::::::' html = requests.get(link, headers=headers, timeout=5).content packed = packed = re.findall( "id='flvplayer'.+?<script type='text/javascript'>(.+?)</script>", html.replace('\\', ''), re.DOTALL) for item in packed: #print item item = unpack(item) print item item = item.split('file:"')[1].split('",')[0] print item + '>>>>>>>>>>>>>..split?' host = item.split('//')[1].replace('www.', '') hostname = host.split('/')[0].split('.')[0].title() count += 1 self.sources.append({ 'source': hostname, 'quality': 'DVD', 'scraper': self.name, 'url': item, 'direct': False }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season=season, episode=episode) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = '%s/?s=%s' % (self.base_link, search_id.replace(' ', '+')) headers = {'User-Agent': random_agent()} html = requests.get(start_url, headers=headers, timeout=5).content match = re.compile( 'id="mt-.+?href="(.+?)">.+?alt="(.+?)".+?<span class="year">(.+?)</span>.+?class="calidad2">(.+?)</span>', re.DOTALL).findall(html) for item_url, name, release, qual in match: if year == release: if clean_title(search_id).lower() == clean_title( name).lower(): self.get_source(item_url, qual, title, year, start_time) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument)
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = '%s%s%s' % (self.base_link, self.search, search_id.replace(' ', '%20')) #print start_url headers = {'User-Agent': random_agent()} html = requests.get(start_url, headers=headers, timeout=5).content match = re.compile( 'class="title search title".+?href="(.+?)">(.+?)</a>', re.DOTALL).findall(html) for item_url, name in match: #print item_url +' ><><><><><> '+name if clean_title(search_id).lower() == clean_title(name).lower(): self.get_source(item_url, title, year, start_time, '', '') #print 'passedurl>>>>>>>>>>>>>>>>>>>>>>'+item_url return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument)
# -*- coding: utf-8 -*- # Universal Scrapers import xbmc,xbmcaddon,time import re import requests from universalscrapers.common import clean_title,clean_search,random_agent,send_log,error_log from ..scraper import Scraper dev_log = xbmcaddon.Addon('script.module.universalscrapers').getSetting("dev_log") headers = {"User-Agent": random_agent()} class freemusic(Scraper): domains = ['freemusicdownloads'] name = "Freemusic" sources = [] def __init__(self): self.base_link = 'http://down.freemusicdownloads.world/' self.sources = [] if dev_log=='true': self.start_time = time.time() def scrape_music(self, title, artist, debrid=False): try: song_search = clean_title(title.lower()).replace(' ','+') artist_search = clean_title(artist.lower()).replace(' ','+') start_url = '%sresults?search_query=%s+%s' %(self.base_link,artist_search,song_search) html = requests.get(start_url, headers=headers, timeout=20).content match = re.compile('<h4 class="card-title">.+?</i>(.+?)</h4>.+?id="(.+?)"',re.DOTALL).findall(html)
def get_source(self, url, title, year, season, episode, start_time): try: # print url get_cookies = session.get(url, timeout=5).content cookie_frame = re.findall('<iframe.+?src="(.+?)"', get_cookies) for cookie_page in cookie_frame: # print cookie_page if 'gomostream' in cookie_page: # print cookie_page html2 = session.get(cookie_page, allow_redirects=True).content # print html2 get_cookie_info = re.compile( "var tc = '(.+?)'.+?url: \"(.+?)\".+?\"_token\": \"(.+?)\".+?function _tsd_tsd_ds\(s\)(.+?)</script>", re.DOTALL).findall(html2) for tokencode, url_to_open, _token, xtokenscript in get_cookie_info: # print tokencode,xtokenscript, url_to_open,_token x_token = self.get_x_token(tokencode, xtokenscript) # print x_token headers = { 'User-Agent': random_agent(), 'Host': 'gomostream.com', 'Referer': cookie_page, 'x-token': x_token } data = {'tokenCode': tokencode, '_token': _token} html3 = session.post(url_to_open, headers=headers, data=data, timeout=5).json() count = 0 for playlink in html3: if playlink != '': try: if playlink[0] == ' ': playlink = playlink[1:] except: playlink = playlink try: source = re.findall( '//(.+?)/', str(playlink))[0] except: source = self.name count += 1 self.sources.append({ 'source': source, 'quality': 'HD', 'scraper': self.name, 'url': playlink, 'direct': False }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season=season, episode=episode) except Exception, argument: xbmc.log('YESMOVIE : sources ' + argument, xbmc.LOGNOTICE) print argument if dev_log == 'true': error_log(self.name, arguemnt) return self.sources
# -*- coding: utf-8 -*- # Universal Scrapers import xbmc,xbmcaddon,time import re import requests from universalscrapers.common import clean_title,clean_search,random_agent,send_log,error_log from ..scraper import Scraper dev_log = xbmcaddon.Addon('script.module.universalscrapers').getSetting("dev_log") headers = {"User-Agent": random_agent()} class freemusic(Scraper): domains = ['down.getfreemusic.world'] name = "FreeMusic" sources = [] def __init__(self): self.base_link = 'https://down.getfreemusic.world/' self.sources = [] if dev_log=='true': self.start_time = time.time() def scrape_music(self, title, artist, debrid=False): try: song_search = clean_title(title.lower()).replace(' ','+') artist_search = clean_title(artist.lower()).replace(' ','+') start_url = '%sresults?search_query=%s+%s' % (self.base_link,artist_search,song_search) html = requests.get(start_url, headers=headers, timeout=20).content match = re.compile('<h4 class="card-title">.+?</i>(.+?)</h4>.+?id="(.+?)"',re.DOTALL).findall(html)