예제 #1
0
    def __get_framed_streams(self, vid_url, cookies, html, page_url):
        streams = {}
        iframe_url = dom_parser2.parse_dom(html, 'iframe', req='src')
        if not iframe_url: raise scraper.ScrapeError('No Iframe in: %s' % (vid_url))

        iframe_url = iframe_url[0].attrs['src']
        html = self._http_get(iframe_url, headers={'Referer': page_url}, cache_limit=.02)
        match = re.search('getScript\("([^"]+)', html)
        if not match: raise scraper.ScrapeError('No Script in: %s' % (iframe_url))
            
        script_url = match.group(1)
        html = self._http_get(script_url, headers={'Referer': iframe_url}, cache_limit=.02)
        match = re.search("responseJson\s*=\s*'([^']+)", html)
        if not match: raise scraper.ScrapeError('No JSON in: %s' % (script_url))

        js_data = scraper_utils.parse_json(match.group(1), script_url)
        media = js_data.get('medias', {})
        if media:
            headers = {'Referer': page_url}
            headers.update(XHR)
            data = {'data': json.dumps({'medias': media, 'original': ''})}
            vid_html = self._http_get(vid_url, data=data, headers=headers, cookies=cookies, cache_limit=.02)
            streams.update(self.__get_js_sources(vid_html, vid_url, cookies, page_url, allow_framed=False))
            
        return streams
예제 #2
0
    def __get_slice2(self, js_code, alphabet):
        s = ''
        alpha_len = str(len(alphabet))
        for match in re.finditer('slice\(([^,]+),([^)]+)\)', js_code):
            start, end = match.groups()
            start = start.replace('input.length', alpha_len)
            end = end.replace('input.length', alpha_len)
            try:
                start = eval(start)
                end = eval(end)
            except Exception as e:
                raise scraper.ScrapeError('Eval Failed (%s): |%s|%s|' % (e, start, end))

            s += alphabet[start: end]
        
        if not s: raise scraper.ScrapeError('No Slice from: %s' % (js_code))
        return s
예제 #3
0
    def __get_slice1(self, js_code, alphabet):
        values = {}
        for var in re.finditer("var\s+([^=]+)='([^']+)'\.charCodeAt\((\d+)\)", js_code):
            values[var.group(1)] = ord(var.group(2)[int(var.group(3))])
        if not values: raise scraper.ScrapeError('No Vars in js_data')

        match = re.search('slice\(([^,]+),([^)]+)\)', js_code)
        if not match: raise scraper.ScrapeError('No Slice in js_data')
        
        start, end = match.groups()
        for key, value in values.iteritems():
            start = start.replace(key, str(value))
            end = end.replace(key, str(value))
        
        try:
            start = eval(start)
            end = eval(end)
        except Exception as e:
            raise scraper.ScrapeError('Eval Failed (%s): |%s|%s|' % (e, start, end))

        return alphabet[start: end]
예제 #4
0
    def __get_slice(self, html):
        alphabet = re.search("alphabet\s*=\s*'([^']+)", html)
        if not alphabet: raise scraper.ScrapeError('No Alphabet Found')
        
        alphabet = alphabet.group(1)
        js_code = ''
        for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL):
            js_data = jsunpack.unpack(match.group(1))
            js_data = js_data.replace('\\', '')
            js_code += js_data
        
        if 'charCodeAt' in js_code:
            s = self.__get_slice1(js_code, alphabet)
        else:
            s = self.__get_slice2(js_code, alphabet)

        return s