コード例 #1
0
 def __parse_streams(self, iframe_url, page_url):
     headers = {'Referer': page_url}
     html = self._http_get(iframe_url, headers=headers, cache_limit=.5)
     if jsunpack.detect(html):
         html = jsunpack.unpack(html)
     
     return scraper_utils.parse_sources_list(self, html)
コード例 #2
0
    def __parse_streams(self, iframe_url, page_url):
        headers = {'Referer': page_url}
        html = self._http_get(iframe_url, headers=headers, cache_limit=.5)
        if jsunpack.detect(html):
            html = jsunpack.unpack(html)

        return scraper_utils.parse_sources_list(self, html)
コード例 #3
0
 def __get_sources(self, html, page_url):
     sources = {}
     subs = False if 'ngilizce' in page_url else True
     for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL):
         js_data = jsunpack.unpack(match.group(1))
         js_data = js_data.replace('\\', '')
         temp_sources = self._parse_sources_list(js_data)
         for source in temp_sources:
             temp_sources[source]['subs'] = subs
             if self._get_direct_hostname(source) == 'gvideo':
                 sources[source] = temp_sources[source]
             else:
                 headers = {'Referer': page_url}
                 redir_url = self._http_get(source, headers=headers, allow_redirect=False, method='HEAD')
                 if redir_url.startswith('http'):
                     sources[redir_url] = temp_sources[source]
     
     iframe_url = dom_parser.parse_dom(html, 'iframe', ret='src')
     if iframe_url:
         iframe_url = iframe_url[0]
         if self._get_direct_hostname(iframe_url) == 'gvideo':
             direct = True
         else:
             direct = False
         sources[iframe_url] = {'direct': direct, 'subs': subs, 'quality': QUALITIES.HD720}
         
     return sources
コード例 #4
0
    def __parse_streams(self, iframe_url, page_url):
        headers = {"Referer": page_url}
        html = self._http_get(iframe_url, headers=headers, cache_limit=0.5)
        if jsunpack.detect(html):
            html = jsunpack.unpack(html)
        log_utils.log(html)

        return self._parse_sources_list(html)
コード例 #5
0
 def __get_links(self, iframe_src, page_url):
     sources = {}
     headers = {'Referer': page_url}
     html = self._http_get(iframe_src, headers=headers, cache_limit=1)
     for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL):
         js_data = jsunpack.unpack(match.group(1))
         js_data = js_data.replace('\\', '')
         sources = scraper_utils.parse_sources_list(self, js_data)
     return sources
コード例 #6
0
    def __get_page_links(self, html):
        hosters = []
        for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL):
            js_data = jsunpack.unpack(match.group(1))
            js_data = js_data.replace('\\', '')
            html += js_data

        sources = scraper_utils.parse_sources_list(self, html)
        for source in sources:
            quality = sources[source]['quality']
            hoster = {'multi-part': False, 'url': source, 'class': self, 'quality': quality, 'host': scraper_utils.get_direct_hostname(self, source), 'rating': None, 'views': None, 'direct': True}
            hosters.append(hoster)
        return hosters
コード例 #7
0
    def __get_embedded_sources(self, html):
        sources = []
        # if captions exist, then they aren't hardcoded
        subs = '' if re.search('''"?kind"?\s*:\s*"?captions"?''', html) else 'Turkish subtitles'
        for attrs, _content in dom_parser2.parse_dom(html, 'source', {'type': 'video/mp4'}, req='src'):
            sources.append(attrs['src'])
        
        for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL):
            js_data = jsunpack.unpack(match.group(1))
            js_data = js_data.replace('\\', '')
            html += js_data

        sources += [source for source in scraper_utils.parse_sources_list(self, html, var="source")]
        return {'sources': sources, 'subs': subs}
コード例 #8
0
 def __get_cookies(self, html):
     try:
         js_code = ''
         for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL):
             js_data = jsunpack.unpack(match.group(1))
             js_data = js_data.replace('\\', '')
             js_code += js_data
             
         match = re.search("cookie\s*=\s*'([^;']+)", js_code)
         parts = match.group(1).split('=')
         cookies = {parts[0]: parts[1]}
     except:
         cookies = {}
         
     return cookies
コード例 #9
0
ファイル: ol_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon
 def __get_cookies(self, html):
     try:
         js_code = ''
         for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL):
             js_data = jsunpack.unpack(match.group(1))
             js_data = js_data.replace('\\', '')
             js_code += js_data
             
         match = re.search("cookie\s*=\s*'([^;']+)", js_code)
         parts = match.group(1).split('=')
         cookies = {parts[0]: parts[1]}
     except:
         cookies = {}
         
     return cookies
コード例 #10
0
    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        q_str = dom_parser2.parse_dom(html, 'div', {'class': 'poster-qulabel'})
        if q_str:
            q_str = q_str[0].content.replace(' ', '').upper()
            page_quality = Q_MAP.get(q_str, QUALITIES.HIGH)
        else:
            page_quality = QUALITIES.HIGH
            
        for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'tab_box'}):
            iframe_url = dom_parser2.parse_dom(fragment, 'iframe', req='src')
            if iframe_url:
                iframe_url = iframe_url[0].attrs['src']
                if 'youtube' in iframe_url: continue
                
                html = self._http_get(iframe_url, headers={'Referer': page_url}, cache_limit=.5)
                for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL):
                    js_data = jsunpack.unpack(match.group(1))
                    js_data = js_data.replace('\\', '')
                    html += js_data
                
                sources = scraper_utils.parse_sources_list(self, html)
                if not sources:
                    sources = {iframe_url: {'quality': page_quality, 'direct': False}}
                
                for source, values in sources.iteritems():
                    direct = values['direct']
                    if direct:
                        host = scraper_utils.get_direct_hostname(self, source)
                        if host == 'gvideo':
                            quality = scraper_utils.gv_get_quality(source)
                        else:
                            quality = values['quality']
                        source += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url})
                    else:
                        host = urlparse.urlparse(source).hostname
                        quality = scraper_utils.get_quality(video, host, values['quality'])
                    
                    hoster = {'multi-part': False, 'url': source, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct}
                    hosters.append(hoster)

        return hosters
コード例 #11
0
    def __get_slice(self, html):
        alphabet = re.search("alphabet\s*=\s*'([^']+)", html)
        if not alphabet: raise scraper.ScrapeError('No Alphabet Found')
        
        alphabet = alphabet.group(1)
        js_code = ''
        for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL):
            js_data = jsunpack.unpack(match.group(1))
            js_data = js_data.replace('\\', '')
            js_code += js_data
        
        if 'charCodeAt' in js_code:
            s = self.__get_slice1(js_code, alphabet)
        else:
            s = self.__get_slice2(js_code, alphabet)

        return s
コード例 #12
0
    def __get_slice(self, html):
        alphabet = re.search("alphabet\s*=\s*'([^']+)", html)
        if not alphabet: raise scraper.ScrapeError('No Alphabet Found')

        alphabet = alphabet.group(1)
        js_code = ''
        for match in re.finditer('(eval\(function\(.*?)</script>', html,
                                 re.DOTALL):
            js_data = jsunpack.unpack(match.group(1))
            js_data = js_data.replace('\\', '')
            js_code += js_data

        if 'charCodeAt' in js_code:
            s = self.__get_slice1(js_code, alphabet)
        else:
            s = self.__get_slice2(js_code, alphabet)

        return s
コード例 #13
0
 def __get_embedded(self, html, page_url):
     sources = {}
     match = dom_parser2.parse_dom(html, 'div', {'id': 'videoreklam'})
     if not match: return sources
     match = dom_parser2.parse_dom(match[0].content, 'iframe', req='src')
     if not match: return sources
     headers = {'Referer': page_url}
     html = self._http_get(match[0].attrs['src'],
                           headers=headers,
                           cache_limit=.5)
     for match in re.finditer('(eval\(function\(.*?)</script>', html,
                              re.DOTALL):
         js_data = jsunpack.unpack(match.group(1))
         js_data = js_data.replace('\\', '')
         html += js_data
     return dict((key, value['quality'])
                 for key, value in scraper_utils.parse_sources_list(
                     self, html, var='source').iteritems())
コード例 #14
0
 def __get_links_from_js(self, html, page_url):
     sources = {}
     for src_url in dom_parser.parse_dom(html, 'script', ret='src'):
         if 'slug=' in src_url:
             headers = {'Referer': page_url}
             js_src = self._http_get(src_url, headers=headers, cache_limit=.05)
             if jsunpack.detect(js_src):
                 unpacked_data = jsunpack.unpack(js_src)
             else:
                 unpacked_data = js_src
                 
             match = re.search('"?sourcesPlaylist?"\s*:\s*"([^"]+)', unpacked_data)
             if match:
                 sources.update(self.__get_links_from_playlist(match.group(1), headers))
             else:
                 match = re.search('"?sourcesEmbed?"\s*:\s*"([^"]+)', unpacked_data)
                 if match:
                     embed_url = match.group(1).replace('\\', '')
                     sources[embed_url] = {'quality': QUALITIES.HD720, 'direct': False}
                     
     return sources
コード例 #15
0
    def __get_page_links(self, html):
        hosters = []
        for match in re.finditer('(eval\(function\(.*?)</script>', html,
                                 re.DOTALL):
            js_data = jsunpack.unpack(match.group(1))
            js_data = js_data.replace('\\', '')
            html += js_data

        sources = scraper_utils.parse_sources_list(self, html)
        for source in sources:
            quality = sources[source]['quality']
            hoster = {
                'multi-part': False,
                'url': source,
                'class': self,
                'quality': quality,
                'host': scraper_utils.get_direct_hostname(self, source),
                'rating': None,
                'views': None,
                'direct': True
            }
            hosters.append(hoster)
        return hosters