def __parse_streams(self, iframe_url, page_url): headers = {'Referer': page_url} html = self._http_get(iframe_url, headers=headers, cache_limit=.5) if jsunpack.detect(html): html = jsunpack.unpack(html) return scraper_utils.parse_sources_list(self, html)
def __get_sources(self, html, page_url): sources = {} subs = False if 'ngilizce' in page_url else True for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL): js_data = jsunpack.unpack(match.group(1)) js_data = js_data.replace('\\', '') temp_sources = self._parse_sources_list(js_data) for source in temp_sources: temp_sources[source]['subs'] = subs if self._get_direct_hostname(source) == 'gvideo': sources[source] = temp_sources[source] else: headers = {'Referer': page_url} redir_url = self._http_get(source, headers=headers, allow_redirect=False, method='HEAD') if redir_url.startswith('http'): sources[redir_url] = temp_sources[source] iframe_url = dom_parser.parse_dom(html, 'iframe', ret='src') if iframe_url: iframe_url = iframe_url[0] if self._get_direct_hostname(iframe_url) == 'gvideo': direct = True else: direct = False sources[iframe_url] = {'direct': direct, 'subs': subs, 'quality': QUALITIES.HD720} return sources
def __parse_streams(self, iframe_url, page_url): headers = {"Referer": page_url} html = self._http_get(iframe_url, headers=headers, cache_limit=0.5) if jsunpack.detect(html): html = jsunpack.unpack(html) log_utils.log(html) return self._parse_sources_list(html)
def __get_links(self, iframe_src, page_url): sources = {} headers = {'Referer': page_url} html = self._http_get(iframe_src, headers=headers, cache_limit=1) for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL): js_data = jsunpack.unpack(match.group(1)) js_data = js_data.replace('\\', '') sources = scraper_utils.parse_sources_list(self, js_data) return sources
def __get_page_links(self, html): hosters = [] for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL): js_data = jsunpack.unpack(match.group(1)) js_data = js_data.replace('\\', '') html += js_data sources = scraper_utils.parse_sources_list(self, html) for source in sources: quality = sources[source]['quality'] hoster = {'multi-part': False, 'url': source, 'class': self, 'quality': quality, 'host': scraper_utils.get_direct_hostname(self, source), 'rating': None, 'views': None, 'direct': True} hosters.append(hoster) return hosters
def __get_embedded_sources(self, html): sources = [] # if captions exist, then they aren't hardcoded subs = '' if re.search('''"?kind"?\s*:\s*"?captions"?''', html) else 'Turkish subtitles' for attrs, _content in dom_parser2.parse_dom(html, 'source', {'type': 'video/mp4'}, req='src'): sources.append(attrs['src']) for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL): js_data = jsunpack.unpack(match.group(1)) js_data = js_data.replace('\\', '') html += js_data sources += [source for source in scraper_utils.parse_sources_list(self, html, var="source")] return {'sources': sources, 'subs': subs}
def __get_cookies(self, html): try: js_code = '' for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL): js_data = jsunpack.unpack(match.group(1)) js_data = js_data.replace('\\', '') js_code += js_data match = re.search("cookie\s*=\s*'([^;']+)", js_code) parts = match.group(1).split('=') cookies = {parts[0]: parts[1]} except: cookies = {} return cookies
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) q_str = dom_parser2.parse_dom(html, 'div', {'class': 'poster-qulabel'}) if q_str: q_str = q_str[0].content.replace(' ', '').upper() page_quality = Q_MAP.get(q_str, QUALITIES.HIGH) else: page_quality = QUALITIES.HIGH for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'tab_box'}): iframe_url = dom_parser2.parse_dom(fragment, 'iframe', req='src') if iframe_url: iframe_url = iframe_url[0].attrs['src'] if 'youtube' in iframe_url: continue html = self._http_get(iframe_url, headers={'Referer': page_url}, cache_limit=.5) for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL): js_data = jsunpack.unpack(match.group(1)) js_data = js_data.replace('\\', '') html += js_data sources = scraper_utils.parse_sources_list(self, html) if not sources: sources = {iframe_url: {'quality': page_quality, 'direct': False}} for source, values in sources.iteritems(): direct = values['direct'] if direct: host = scraper_utils.get_direct_hostname(self, source) if host == 'gvideo': quality = scraper_utils.gv_get_quality(source) else: quality = values['quality'] source += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url}) else: host = urlparse.urlparse(source).hostname quality = scraper_utils.get_quality(video, host, values['quality']) hoster = {'multi-part': False, 'url': source, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct} hosters.append(hoster) return hosters
def __get_slice(self, html): alphabet = re.search("alphabet\s*=\s*'([^']+)", html) if not alphabet: raise scraper.ScrapeError('No Alphabet Found') alphabet = alphabet.group(1) js_code = '' for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL): js_data = jsunpack.unpack(match.group(1)) js_data = js_data.replace('\\', '') js_code += js_data if 'charCodeAt' in js_code: s = self.__get_slice1(js_code, alphabet) else: s = self.__get_slice2(js_code, alphabet) return s
def __get_embedded(self, html, page_url): sources = {} match = dom_parser2.parse_dom(html, 'div', {'id': 'videoreklam'}) if not match: return sources match = dom_parser2.parse_dom(match[0].content, 'iframe', req='src') if not match: return sources headers = {'Referer': page_url} html = self._http_get(match[0].attrs['src'], headers=headers, cache_limit=.5) for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL): js_data = jsunpack.unpack(match.group(1)) js_data = js_data.replace('\\', '') html += js_data return dict((key, value['quality']) for key, value in scraper_utils.parse_sources_list( self, html, var='source').iteritems())
def __get_links_from_js(self, html, page_url): sources = {} for src_url in dom_parser.parse_dom(html, 'script', ret='src'): if 'slug=' in src_url: headers = {'Referer': page_url} js_src = self._http_get(src_url, headers=headers, cache_limit=.05) if jsunpack.detect(js_src): unpacked_data = jsunpack.unpack(js_src) else: unpacked_data = js_src match = re.search('"?sourcesPlaylist?"\s*:\s*"([^"]+)', unpacked_data) if match: sources.update(self.__get_links_from_playlist(match.group(1), headers)) else: match = re.search('"?sourcesEmbed?"\s*:\s*"([^"]+)', unpacked_data) if match: embed_url = match.group(1).replace('\\', '') sources[embed_url] = {'quality': QUALITIES.HD720, 'direct': False} return sources
def __get_page_links(self, html): hosters = [] for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL): js_data = jsunpack.unpack(match.group(1)) js_data = js_data.replace('\\', '') html += js_data sources = scraper_utils.parse_sources_list(self, html) for source in sources: quality = sources[source]['quality'] hoster = { 'multi-part': False, 'url': source, 'class': self, 'quality': quality, 'host': scraper_utils.get_direct_hostname(self, source), 'rating': None, 'views': None, 'direct': True } hosters.append(hoster) return hosters