def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'}) if fragment: html = fragment[0].content iframe_url = dom_parser2.parse_dom(html, 'iframe', req='src') if not iframe_url: return hosters iframe_url = iframe_url[0].attrs['src'] if iframe_url.startswith('/'): iframe_url = scraper_utils.urljoin(self.base_url, iframe_url) html = self._http_get(iframe_url, headers={'Referer': page_url}, cache_limit=.5) obj = dom_parser2.parse_dom(html, 'object', req='data') if obj: streams = dict((stream_url, { 'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True }) for stream_url in scraper_utils.parse_google( self, obj[0].attrs['data'])) else: streams = scraper_utils.parse_sources_list(self, html) for stream_url, values in streams.iteritems(): host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = values['quality'] stream_url += scraper_utils.append_headers({ 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url }) source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True } hosters.append(source) return hosters
def __get_links_from_json2(self, url, page_url, video_type): sources = {} headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(url, headers=headers, cache_limit=0) js_data = scraper_utils.parse_json(html, url) try: playlist = js_data.get('playlist', []) for source in playlist[0].get('sources', []): stream_url = source['file'] if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: if video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(stream_url) else: meta = scraper_utils.parse_episode_link(stream_url) quality = scraper_utils.height_get_quality(meta['height']) sources[stream_url] = {'quality': quality, 'direct': True} logger.log( 'Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG) except Exception as e: logger.log('Exception during yesmovies extract: %s' % (e), log_utils.LOGDEBUG) return sources
def __get_gk_links(self, html): sources = {} match = re.search('{link\s*:\s*"([^"]+)', html) if match: iframe_url = match.group(1) data = {'link': iframe_url} headers = {'Referer': iframe_url} html = self._http_get(self.gk_url, data=data, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, self.gk_url) links = js_data.get('link', []) if isinstance(links, basestring): links = [{'link': links}] for link in links: stream_url = link['link'] if scraper_utils.get_direct_hostname(self, stream_url) == 'openload.co': quality = scraper_utils.gv_get_quality(stream_url) direct = True elif 'label' in link: quality = scraper_utils.height_get_quality(link['label']) direct = True else: quality = QUALITIES.HIGH direct = False sources[stream_url] = {'quality': quality, 'direct': direct} return sources
def __create_source(self, stream_url, height, page_url, subs=False, direct=True): if direct: stream_url = stream_url.replace('\\/', '/') if self.get_name().lower() in stream_url: headers = {'Referer': page_url} redir_url = self._http_get(stream_url, headers=headers, method='HEAD', allow_redirect=False, cache_limit=.25) if redir_url.startswith('http'): stream_url = redir_url stream_url += scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) else: stream_url += scraper_utils.append_headers({ 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url, 'Cookie': self._get_stream_cookies() }) else: stream_url += scraper_utils.append_headers({ 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url }) host = scraper_utils.get_direct_hostname(self, stream_url) else: host = urlparse.urlparse(stream_url).hostname if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(height) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct } if subs: hoster['subs'] = 'Turkish Subtitles' return hoster
def __get_sources(self, html, label): sources = {} for attrs, _label in dom_parser2.parse_dom(html, 'source', req='src'): if scraper_utils.get_direct_hostname(self, attrs['src']) == 'gvideo': quality = scraper_utils.gv_get_quality(attrs['src']) else: quality = Q_MAP.get(label.upper(), QUALITIES.HIGH) sources[attrs['src']] = {'direct': True, 'quality': quality} return sources
def get_sources(self, video): hosters = [] sources = {} source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) match = re.search("load_player\('([^']+)", html) if not match: return hosters headers = {'Referer': page_url, 'Server': 'cloudflare-nginx', 'Accept': 'text/html, */*; q=0.01', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Formating': 'application/json, text/javascript', 'Accept-Encoding': 'gzip, deflate'} headers.update(XHR) params = {'id': match.group(1)} player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL) html = self._http_get(player_url, params=params, headers=headers, cache_limit=1) js_data = scraper_utils.parse_json(html, player_url) pl_url = js_data.get('value') or js_data.get('download') if not pl_url: return hosters headers = {'Referer': page_url} if pl_url.startswith('//'): pl_url = 'https:' + pl_url html = self._http_get(pl_url, headers=headers, allow_redirect=False, cache_limit=0) if html.startswith('http'): streams = [(html, '')] else: js_data = scraper_utils.parse_json(html, pl_url) try: streams = [(source['file'], source.get('label', '')) for source in js_data['playlist'][0]['sources']] except: streams = [] for stream in streams: stream_url, label = stream if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': sources[stream_url] = {'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True} else: if label: quality = scraper_utils.height_get_quality(label) else: quality = QUALITIES.HIGH sources[stream_url] = {'quality': quality, 'direct': False} for source, value in sources.iteritems(): direct = value['direct'] quality = value['quality'] if direct: host = scraper_utils.get_direct_hostname(self, source) else: host = urlparse.urlparse(source).hostname stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) hosts = [ r.content for r in dom_parser2.parse_dom( html, 'p', {'class': 'server_servername'}) ] links = [ r.content for r in dom_parser2.parse_dom(html, 'p', {'class': 'server_play'}) ] for host, link_frag in zip(hosts, links): stream_url = dom_parser2.parse_dom(link_frag, 'a', req='href') if not stream_url: continue stream_url = stream_url[0].attrs['href'] host = re.sub('^Server\s*', '', host, re.I) host = re.sub('\s*Link\s+\d+', '', host) if host.lower() == 'google': sources = self.__get_gvideo_links(stream_url) else: sources = [{'host': host, 'link': stream_url}] for source in sources: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) stream_url = source['link'] + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) direct = True else: stream_url = scraper_utils.pathify_url(source['link']) host = HOST_SUB.get(source['host'].lower(), source['host']) quality = scraper_utils.get_quality( video, host, QUALITIES.HIGH) direct = False hoster = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct } hosters.append(hoster) return hosters
def __get_gk_links(self, link, iframe_url): sources = {} data = {'link': link} headers = XHR headers.update({'Referer': iframe_url, 'User-Agent': USER_AGENT}) html = self._http_get(GK_URL, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, GK_URL) if 'link' in js_data: if isinstance(js_data['link'], basestring): stream_url = js_data['link'] if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': for source in scraper_utils.parse_google(self, stream_url): sources[source] = { 'quality': scraper_utils.gv_get_quality(source), 'direct': True } else: sources[stream_url] = { 'quality': QUALITIES.HIGH, 'direct': False } else: for link in js_data['link']: stream_url = link['link'] if scraper_utils.get_direct_hostname( self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in link: quality = scraper_utils.height_get_quality( link['label']) else: quality = QUALITIES.HIGH sources[stream_url] = {'quality': quality, 'direct': True} return sources
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, require_debrid=True, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'table', {'class': 'links-table'}) if not fragment: return hosters for _attrs, row in dom_parser2.parse_dom(fragment[0].content, 'tr'): match = re.search( "playVideo\.bind\(.*?'([^']+)(?:[^>]*>){2}(.*?)</td>", row, re.DOTALL) if not match: continue stream_url, release = match.groups() if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': sources = scraper_utils.parse_google(self, stream_url) else: sources = [stream_url] for source in sources: host = scraper_utils.get_direct_hostname(self, source) if host == 'gvideo': quality = scraper_utils.gv_get_quality(source) direct = True else: host = urlparse.urlparse(source).hostname if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(release) else: meta = scraper_utils.parse_episode_link(release) base_quality = scraper_utils.height_get_quality( meta['height']) quality = scraper_utils.get_quality( video, host, base_quality) direct = False hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': direct } hosters.append(hoster) return hosters
def __get_sources(self, html): sources = scraper_utils.parse_sources_list(self, html) for source in dom_parser2.parse_dom( html, 'source', {'type': 'video/mp4'}, req='src') + dom_parser2.parse_dom(html, 'iframe', req='src'): source = source.attrs['src'] if scraper_utils.get_direct_hostname(self, source) == 'gvideo': quality = scraper_utils.gv_get_quality(source) direct = True else: quality = QUALITIES.HD720 direct = False sources[source] = {'quality': quality, 'direct': direct} return self.__proc_sources(sources)
def __get_gk_links2(self, html): sources = {} match = re.search('proxy\.link=([^"&]+)', html) if match: proxy_link = match.group(1) proxy_link = proxy_link.split('*', 1)[-1] if len(proxy_link) <= 224: vid_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY1, proxy_link) else: vid_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY2, proxy_link) if scraper_utils.get_direct_hostname(self, vid_url) == 'openload.co': for source in self._parse_gdocs(vid_url): sources[source] = {'quality': scraper_utils.gv_get_quality(source), 'direct': True} return sources
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) iframe_url = dom_parser2.parse_dom(html, 'iframe', {'id': 'myiframe'}, req='src', exclude_comments=True) if not iframe_url: return hosters iframe_url = iframe_url[0].attrs['src'] html = self._http_get(iframe_url, headers={'Referer': page_url}, cache_limit=.5) for source in dom_parser2.parse_dom(html, 'source', {'type': 'video/mp4'}, req=['src', 'data-res']): stream_url = source.attrs['src'] host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) stream_url += scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) else: quality = scraper_utils.height_get_quality( source.attrs['data-res']) stream_url += scraper_utils.append_headers({ 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url }) source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True } hosters.append(source) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=0) match = re.search('var\s*video_id\s*=\s*"([^"]+)', html) if not match: return hosters video_id = match.group(1) headers = {'Referer': page_url} headers.update(XHR) _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'), headers=headers, method='POST', cache_limit=0) vid_url = scraper_utils.urljoin(self.base_url, VIDEO_URL) html = self._http_get(vid_url, data={'v': video_id}, headers=headers, cache_limit=0) for source, value in scraper_utils.parse_json(html, vid_url).iteritems(): match = re.search('url=(.*)', value) if not match: continue stream_url = urllib.unquote(match.group(1)) host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(source) stream_url += scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'movieplay'}): iframe_src = dom_parser2.parse_dom(fragment, 'iframe', req='src') if iframe_src: iframe_src = iframe_src[0].attrs['src'] if re.search('o(pen)?load', iframe_src, re.I): meta = scraper_utils.parse_movie_link(iframe_src) quality = scraper_utils.height_get_quality(meta['height']) links = {iframe_src: {'quality': quality, 'direct': False}} else: links = self.__get_links(iframe_src, url) for link in links: direct = links[link]['direct'] quality = links[link]['quality'] if direct: host = scraper_utils.get_direct_hostname(self, link) if host == 'gvideo': quality = scraper_utils.gv_get_quality(link) stream_url = link + scraper_utils.append_headers( { 'User-Agent': scraper_utils.get_ua(), 'Referer': url }) else: host = urlparse.urlparse(link).hostname stream_url = link source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct } hosters.append(source) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) if video.video_type == VIDEO_TYPES.EPISODE: html = self.__episode_match(video, source_url) sources = [ r.attrs['data-click'] for r in dom_parser2.parse_dom(html, 'div', req='data-click') + dom_parser2.parse_dom(html, 'li', req='data-click') ] else: sources = self.__get_movie_sources(page_url) sources = [source.strip() for source in sources if source] headers = {'Referer': page_url} for source in sources: if source.startswith('http'): direct = False quality = QUALITIES.HD720 host = urlparse.urlparse(source).hostname else: source = self.__get_linked_source(source, headers) if source is None: continue direct = True host = scraper_utils.get_direct_hostname(self, source) if host == 'gvideo': quality = scraper_utils.gv_get_quality(source) else: pass hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': direct } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters js_url = scraper_utils.urljoin(self.base_url, '/javascript/movies.js') html = self._http_get(js_url, cache_limit=48) if source_url.startswith('/'): source_url = source_url[1:] pattern = '''getElementById\(\s*"%s".*?play\(\s*'([^']+)''' % ( source_url) match = re.search(pattern, html, re.I) if match: stream_url = match.group(1) if 'drive.google' in stream_url or 'docs.google' in stream_url: sources = scraper_utils.parse_google(self, stream_url) else: sources = [stream_url] for source in sources: stream_url = source + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) host = scraper_utils.get_direct_hostname(self, source) if host == 'gvideo': quality = scraper_utils.gv_get_quality(source) direct = True elif 'youtube' in stream_url: quality = QUALITIES.HD720 direct = False host = 'youtube.com' else: quality = QUALITIES.HIGH direct = True hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'}) if fragment: html = fragment[0].content links = scraper_utils.parse_sources_list(self, html) for link in links: stream_url = link if self.base_url in link: redir_url = self._http_get(link, headers={'Referer': url}, allow_redirect=False, method='HEAD') if redir_url.startswith('http'): stream_url = redir_url host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = links[link]['quality'] stream_url += scraper_utils.append_headers({ 'User-Agent': scraper_utils.get_ua(), 'Referer': url }) source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True } hosters.append(source) return hosters
def __get_ajax(self, html, page_url): sources = {} pattern = '\$\.ajax\(\s*"([^"]+)' match = re.search(pattern, html) if not match: return sources post_url = match.group(1) headers = {'Referer': page_url} html = self._http_get(post_url, headers=headers, cache_limit=.5) js_result = scraper_utils.parse_json(html, post_url) for key in js_result: stream_url = js_result[key] host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(key) sources[stream_url] = quality return sources
def __get_posts(self, html): sources = {} pattern = '\$\.post\("([^"]+)"\s*,\s*\{(.*?)\}' match = re.search(pattern, html) if not match: return sources post_url, post_data = match.groups() data = self.__get_data(post_data) html = self._http_get(post_url, data=data, cache_limit=.5) js_result = scraper_utils.parse_json(html, post_url) for key in js_result: stream_url = js_result[key] host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(key) sources[stream_url] = quality return sources
def __get_king_links(self, iframe_url): hosters = [] match = re.search('v=(.*)', iframe_url) if match: data = {'ID': match.group(1)} headers = {'Referer': iframe_url} headers.update(XHR) xhr_url = iframe_url.split('?')[0] html = self._http_get(xhr_url, params={'p': 'GetVideoSources'}, data=data, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, xhr_url) try: for source in js_data['VideoSources']: stream_url = source['file'] + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) host = scraper_utils.get_direct_hostname( self, source['file']) label = source.get('label', '') if host == 'gvideo': quality = scraper_utils.gv_get_quality(source['file']) elif re.search('\d+p?', label): quality = scraper_utils.height_get_quality(label) else: quality = QUALITY_MAP.get(label, QUALITIES.HIGH) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': 'Turkish Subtitles' } hosters.append(hoster) except: pass return hosters
def __get_json_links(self, html, sub): hosters = [] js_data = scraper_utils.parse_json(html) if 'sources' in js_data: for source in js_data.get('sources', []): stream_url = source.get('file') if stream_url is None: continue host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: quality = QUALITIES.HIGH hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hoster['subs'] = sub hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=1) pages = self.__get_alt_pages(html, page_url) sources = self.__get_sources(html, page_url, pages.get(page_url, True)) for page in pages: if page == page_url: continue page_url = scraper_utils.urljoin(self.base_url, page, pages[page]) html = self._http_get(page_url, cache_limit=1) sources.update(self.__get_sources(html, page, pages[page])) for stream_url, values in sources.iteritems(): host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) direct = True elif values['direct']: quality = values['quality'] direct = True else: quality = values['quality'] direct = False host = urlparse.urlparse(stream_url).hostname hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct } if values['subs']: hoster['subs'] = 'Turkish Subtitles' hosters.append(hoster) return hosters
def __get_ht_links(self, html, page_url): sources = {} match = re.search('Htplugins_Make_Player\("([^"]+)', html) if match: data = {'data': match.group(1)} url = scraper_utils.urljoin(self.base_url, LINK_URL2) headers = {'Referer': page_url} html = self._http_get(url, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, url) if 'l' in js_data: for link in js_data['l']: if scraper_utils.get_direct_hostname(self, link) == 'gvideo': quality = scraper_utils.gv_get_quality(link) else: quality = QUALITIES.HIGH sources[link] = quality return sources
def __get_linked(self, html): sources = {} match = re.search('dizi=([^"]+)', html) if not match: return sources html = self._http_get(AJAX_URL, params={'dizi': match.group(1)}, headers=XHR, cache_limit=.5) js_result = scraper_utils.parse_json(html, AJAX_URL) for source in js_result.get('success', []): stream_url = source.get('src') if stream_url is None: continue if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: quality = QUALITIES.HIGH sources[stream_url] = quality return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=1) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'player'}) if not fragment: return hosters iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src') if not iframe_url: return hosters html = self._http_get(iframe_url[0].attrs['src'], cache_limit=.25) sources.append(self.__get_embedded_sources(html)) sources.append(self.__get_linked_sources(html)) for source in sources: for stream_url in source['sources']: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': stream_url += scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) quality = scraper_utils.gv_get_quality(stream_url) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } hoster['subs'] = source.get('subs', True) hosters.append(hoster) return hosters
def __get_links_from_xml(self, url, video, page_url, cookies): sources = {} try: headers = {'Referer': page_url} xml = self._http_get(url, cookies=cookies, headers=headers, cache_limit=.5) root = ET.fromstring(xml) for item in root.findall('.//item'): title = item.find('title').text if title and title.upper() == 'OOPS!': continue for source in item.findall( '{https://yesmovies.to/ajax/movie_sources/'): stream_url = source.get('file') label = source.get('label') if scraper_utils.get_direct_hostname( self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif label: quality = scraper_utils.height_get_quality(label) elif title: quality = scraper_utils.blog_get_quality( video, title, '') else: quality = scraper_utils.blog_get_quality( video, stream_url, '') sources[stream_url] = {'quality': quality, 'direct': True} logger.log( 'Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG) except Exception as e: logger.log('Exception during YesMovies XML Parse: %s' % (e), log_utils.LOGWARNING) return sources
def __get_links_from_playlist(self, grab_url, headers): sources = {} grab_url = grab_url.replace('\\', '') grab_html = self._http_get(grab_url, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(grab_html, grab_url) try: playlist = js_data['playlist'][0]['sources'] except: playlist = [] for item in playlist: stream_url = item.get('file') if stream_url: if stream_url.startswith('/'): stream_url = scraper_utils.urljoin(self.base_url, stream_url) redir_url = self._http_get(stream_url, headers=headers, allow_redirect=False, method='HEAD') if redir_url.startswith('http'): stream_url = redir_url if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in item: quality = scraper_utils.height_get_quality(item['label']) else: quality = QUALITIES.HIGH logger.log( 'Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG) sources[stream_url] = {'quality': quality, 'direct': True} if not kodi.get_setting('scraper_url'): break return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=1) iframes = dom_parser2.parse_dom(html, 'iframe', req='src') for attrs, _content in iframes: iframe_url = attrs['src'] if 'docs.google.com' in iframe_url: sources = scraper_utils.parse_google(self, iframe_url) break else: iframe_url = scraper_utils.urljoin(self.base_url, iframe_url) html = self._http_get(iframe_url, cache_limit=1) iframes += dom_parser2.parse_dom(html, 'iframe', req='src') for source in sources: host = scraper_utils.get_direct_hostname(self, source) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.gv_get_quality(source), 'views': None, 'rating': None, 'url': source, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): sources = [] streams = {} source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return sources page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) match = dom_parser2.parse_dom( html, 'a', {'title': re.compile('click to play', re.I)}, req='href') if not match: return sources page_url = scraper_utils.urljoin(self.base_url, match[0].attrs['href']) headers = {'Referer': page_url} html = self._http_get(page_url, headers=headers, cache_limit=.02) for attrs, _content in dom_parser2.parse_dom( html, 'a', {'class': 'mw-episode-btn'}, req=['data-target-i', 'data-target-e', 'title']): try: if "wasn't alive" in attrs['title']: continue vid_url = scraper_utils.urljoin(self.base_url, VID_URL) vid_url = vid_url.format(data_i=attrs['data-target-i'], data_e=attrs['data-target-e']) headers = {'Referer': page_url} headers.update(XHR) cookies = self.__get_cookies(html, attrs) vid_html = self._http_get(vid_url, headers=headers, cookies=cookies, cache_limit=.02) streams.update( self.__get_js_sources(vid_html, vid_url, cookies, page_url)) except scraper.ScrapeError as e: logger.log('IOMovies Error (%s): %s in %s' % (e, vid_url, page_url)) for stream_url, values in streams.iteritems(): if values['direct']: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(values['label']) stream_url += scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) else: host = urlparse.urlparse(stream_url).hostname quality = QUALITIES.HIGH source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': values['direct'] } sources.append(source) return sources
def get_sources(self, video): sources = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return sources page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) if video.video_type == VIDEO_TYPES.MOVIE: action = 'getMovieEmb' else: action = 'getEpisodeEmb' match = re.search('elid\s*=\s*"([^"]+)', html) if self.__token is None: self.__get_token() if match and self.__token is not None: elid = urllib.quote( base64.encodestring(str(int(time.time()))).strip()) data = { 'action': action, 'idEl': match.group(1), 'token': self.__token, 'elid': elid } ajax_url = scraper_utils.urljoin(self.base_url, EMBED_URL) headers = { 'Authorization': 'Bearer %s' % (self.__get_bearer()), 'Referer': page_url } headers.update(XHR) html = self._http_get(ajax_url, data=data, headers=headers, cache_limit=.5) html = html.replace('\\"', '"').replace('\\/', '/') pattern = '<IFRAME\s+SRC="([^"]+)' for match in re.finditer(pattern, html, re.DOTALL | re.I): url = match.group(1) host = scraper_utils.get_direct_hostname(self, url) if host == 'gvideo': direct = True quality = scraper_utils.gv_get_quality(url) else: if 'vk.com' in url and url.endswith('oid='): continue # skip bad vk.com links direct = False host = urlparse.urlparse(url).hostname quality = scraper_utils.get_quality( video, host, QUALITIES.HD720) source = { 'multi-part': False, 'url': url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct } sources.append(source) return sources