def __parse_streams(self, iframe_url, page_url): headers = {'Referer': page_url} html = self._http_get(iframe_url, headers=headers, cache_limit=.5) if jsunpack.detect(html): html = jsunpack.unpack(html) return scraper_utils.parse_sources_list(self, html)
def __get_cookies(self, html): try: js_code = '' for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL): js_data = jsunpack.unpack(match.group(1)) js_data = js_data.replace('\\', '') js_code += js_data match = re.search("cookie\s*=\s*'([^;']+)", js_code) parts = match.group(1).split('=') cookies = {parts[0]: parts[1]} except: cookies = {} return cookies
def __get_slice(self, html): alphabet = re.search("alphabet\s*=\s*'([^']+)", html) if not alphabet: raise scraper.ScrapeError('No Alphabet Found') alphabet = alphabet.group(1) js_code = '' for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL): js_data = jsunpack.unpack(match.group(1)) js_data = js_data.replace('\\', '') js_code += js_data if 'charCodeAt' in js_code: s = self.__get_slice1(js_code, alphabet) else: s = self.__get_slice2(js_code, alphabet) return s
def __get_embedded(self, html, page_url): sources = {} match = dom_parser2.parse_dom(html, 'div', {'id': 'videoreklam'}) if not match: return sources match = dom_parser2.parse_dom(match[0].content, 'iframe', req='src') if not match: return sources headers = {'Referer': page_url} html = self._http_get(match[0].attrs['src'], headers=headers, cache_limit=.5) for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL): js_data = jsunpack.unpack(match.group(1)) js_data = js_data.replace('\\', '') html += js_data return dict((key, value['quality']) for key, value in scraper_utils.parse_sources_list( self, html, var='source').iteritems())
def __get_embedded_sources(self, html): sources = [] # if captions exist, then they aren't hardcoded subs = '' if re.search('''"?kind"?\s*:\s*"?captions"?''', html) else 'Turkish subtitles' for attrs, _content in dom_parser2.parse_dom(html, 'source', {'type': 'video/mp4'}, req='src'): sources.append(attrs['src']) for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL): js_data = jsunpack.unpack(match.group(1)) js_data = js_data.replace('\\', '') html += js_data sources += [ source for source in scraper_utils.parse_sources_list( self, html, var="source") ] return {'sources': sources, 'subs': subs}
def __get_page_links(self, html): hosters = [] for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL): js_data = jsunpack.unpack(match.group(1)) js_data = js_data.replace('\\', '') html += js_data sources = scraper_utils.parse_sources_list(self, html) for source in sources: quality = sources[source]['quality'] hoster = { 'multi-part': False, 'url': source, 'class': self, 'quality': quality, 'host': scraper_utils.get_direct_hostname(self, source), 'rating': None, 'views': None, 'direct': True } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) q_str = dom_parser2.parse_dom(html, 'div', {'class': 'poster-qulabel'}) if q_str: q_str = q_str[0].content.replace(' ', '').upper() page_quality = Q_MAP.get(q_str, QUALITIES.HIGH) else: page_quality = QUALITIES.HIGH for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'tab_box'}): iframe_url = dom_parser2.parse_dom(fragment, 'iframe', req='src') if iframe_url: iframe_url = iframe_url[0].attrs['src'] if 'youtube' in iframe_url: continue html = self._http_get(iframe_url, headers={'Referer': page_url}, cache_limit=.5) for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL): js_data = jsunpack.unpack(match.group(1)) js_data = js_data.replace('\\', '') html += js_data sources = scraper_utils.parse_sources_list(self, html) if not sources: sources = { iframe_url: { 'quality': page_quality, 'direct': False } } for source, values in sources.iteritems(): direct = values['direct'] if direct: host = scraper_utils.get_direct_hostname(self, source) if host == 'gvideo': quality = scraper_utils.gv_get_quality(source) else: quality = values['quality'] source += scraper_utils.append_headers({ 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url }) else: host = urlparse.urlparse(source).hostname quality = scraper_utils.get_quality( video, host, values['quality']) hoster = { 'multi-part': False, 'url': source, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct } hosters.append(hoster) return hosters