def __get_framed_streams(self, vid_url, cookies, html, page_url): streams = {} iframe_url = dom_parser2.parse_dom(html, 'iframe', req='src') if not iframe_url: raise scraper.ScrapeError('No Iframe in: %s' % (vid_url)) iframe_url = iframe_url[0].attrs['src'] html = self._http_get(iframe_url, headers={'Referer': page_url}, cache_limit=.02) match = re.search('getScript\("([^"]+)', html) if not match: raise scraper.ScrapeError('No Script in: %s' % (iframe_url)) script_url = match.group(1) html = self._http_get(script_url, headers={'Referer': iframe_url}, cache_limit=.02) match = re.search("responseJson\s*=\s*'([^']+)", html) if not match: raise scraper.ScrapeError('No JSON in: %s' % (script_url)) js_data = scraper_utils.parse_json(match.group(1), script_url) media = js_data.get('medias', {}) if media: headers = {'Referer': page_url} headers.update(XHR) data = {'data': json.dumps({'medias': media, 'original': ''})} vid_html = self._http_get(vid_url, data=data, headers=headers, cookies=cookies, cache_limit=.02) streams.update(self.__get_js_sources(vid_html, vid_url, cookies, page_url, allow_framed=False)) return streams
def __get_slice2(self, js_code, alphabet): s = '' alpha_len = str(len(alphabet)) for match in re.finditer('slice\(([^,]+),([^)]+)\)', js_code): start, end = match.groups() start = start.replace('input.length', alpha_len) end = end.replace('input.length', alpha_len) try: start = eval(start) end = eval(end) except Exception as e: raise scraper.ScrapeError('Eval Failed (%s): |%s|%s|' % (e, start, end)) s += alphabet[start: end] if not s: raise scraper.ScrapeError('No Slice from: %s' % (js_code)) return s
def __get_slice1(self, js_code, alphabet): values = {} for var in re.finditer("var\s+([^=]+)='([^']+)'\.charCodeAt\((\d+)\)", js_code): values[var.group(1)] = ord(var.group(2)[int(var.group(3))]) if not values: raise scraper.ScrapeError('No Vars in js_data') match = re.search('slice\(([^,]+),([^)]+)\)', js_code) if not match: raise scraper.ScrapeError('No Slice in js_data') start, end = match.groups() for key, value in values.iteritems(): start = start.replace(key, str(value)) end = end.replace(key, str(value)) try: start = eval(start) end = eval(end) except Exception as e: raise scraper.ScrapeError('Eval Failed (%s): |%s|%s|' % (e, start, end)) return alphabet[start: end]
def __get_slice(self, html): alphabet = re.search("alphabet\s*=\s*'([^']+)", html) if not alphabet: raise scraper.ScrapeError('No Alphabet Found') alphabet = alphabet.group(1) js_code = '' for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL): js_data = jsunpack.unpack(match.group(1)) js_data = js_data.replace('\\', '') js_code += js_data if 'charCodeAt' in js_code: s = self.__get_slice1(js_code, alphabet) else: s = self.__get_slice2(js_code, alphabet) return s