Ejemplo n.º 1
0
    def __parse_streams(self, iframe_url, page_url):
        headers = {'Referer': page_url}
        html = self._http_get(iframe_url, headers=headers, cache_limit=.5)
        if jsunpack.detect(html):
            html = jsunpack.unpack(html)

        return scraper_utils.parse_sources_list(self, html)
Ejemplo n.º 2
0
 def __get_links(self, iframe_src, page_url):
     sources = {}
     headers = {'Referer': page_url}
     html = self._http_get(iframe_src, headers=headers, cache_limit=1)
     for match in re.finditer('(eval\(function\(.*?)</script>', html,
                              re.DOTALL):
         js_data = jsunpack.unpack(match.group(1))
         js_data = js_data.replace('\\', '')
         sources = scraper_utils.parse_sources_list(self, js_data)
     return sources
Ejemplo n.º 3
0
    def __get_embedded_sources(self, html):
        sources = []
        # if captions exist, then they aren't hardcoded
        subs = '' if re.search('''"?kind"?\s*:\s*"?captions"?''', html) else 'Turkish subtitles'
        for attrs, _content in dom_parser2.parse_dom(html, 'source', {'type': 'video/mp4'}, req='src'):
            sources.append(attrs['src'])
        
        for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL):
            js_data = jsunpack.unpack(match.group(1))
            js_data = js_data.replace('\\', '')
            html += js_data

        sources += [source for source in scraper_utils.parse_sources_list(self, html, var="source")]
        return {'sources': sources, 'subs': subs}
Ejemplo n.º 4
0
 def __get_cookies(self, html):
     try:
         js_code = ''
         for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL):
             js_data = jsunpack.unpack(match.group(1))
             js_data = js_data.replace('\\', '')
             js_code += js_data
             
         match = re.search("cookie\s*=\s*'([^;']+)", js_code)
         parts = match.group(1).split('=')
         cookies = {parts[0]: parts[1]}
     except:
         cookies = {}
         
     return cookies
Ejemplo n.º 5
0
 def __get_embedded(self, html, page_url):
     sources = {}
     match = dom_parser2.parse_dom(html, 'div', {'id': 'videoreklam'})
     if not match: return sources
     match = dom_parser2.parse_dom(match[0].content, 'iframe', req='src')
     if not match: return sources
     headers = {'Referer': page_url}
     html = self._http_get(match[0].attrs['src'],
                           headers=headers,
                           cache_limit=.5)
     for match in re.finditer('(eval\(function\(.*?)</script>', html,
                              re.DOTALL):
         js_data = jsunpack.unpack(match.group(1))
         js_data = js_data.replace('\\', '')
         html += js_data
     return dict((key, value['quality'])
                 for key, value in scraper_utils.parse_sources_list(
                     self, html, var='source').iteritems())
    def __get_slice(self, html):
        alphabet = re.search("alphabet\s*=\s*'([^']+)", html)
        if not alphabet: raise scraper.ScrapeError('No Alphabet Found')

        alphabet = alphabet.group(1)
        js_code = ''
        for match in re.finditer('(eval\(function\(.*?)</script>', html,
                                 re.DOTALL):
            js_data = jsunpack.unpack(match.group(1))
            js_data = js_data.replace('\\', '')
            js_code += js_data

        if 'charCodeAt' in js_code:
            s = self.__get_slice1(js_code, alphabet)
        else:
            s = self.__get_slice2(js_code, alphabet)

        return s
Ejemplo n.º 7
0
    def __get_page_links(self, html):
        hosters = []
        for match in re.finditer('(eval\(function\(.*?)</script>', html,
                                 re.DOTALL):
            js_data = jsunpack.unpack(match.group(1))
            js_data = js_data.replace('\\', '')
            html += js_data

        sources = scraper_utils.parse_sources_list(self, html)
        for source in sources:
            quality = sources[source]['quality']
            hoster = {
                'multi-part': False,
                'url': source,
                'class': self,
                'quality': quality,
                'host': scraper_utils.get_direct_hostname(self, source),
                'rating': None,
                'views': None,
                'direct': True
            }
            hosters.append(hoster)
        return hosters
    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        q_str = dom_parser2.parse_dom(html, 'div', {'class': 'poster-qulabel'})
        if q_str:
            q_str = q_str[0].content.replace(' ', '').upper()
            page_quality = Q_MAP.get(q_str, QUALITIES.HIGH)
        else:
            page_quality = QUALITIES.HIGH

        for _attrs, fragment in dom_parser2.parse_dom(html, 'div',
                                                      {'class': 'tab_box'}):
            iframe_url = dom_parser2.parse_dom(fragment, 'iframe', req='src')
            if iframe_url:
                iframe_url = iframe_url[0].attrs['src']
                if 'youtube' in iframe_url: continue

                html = self._http_get(iframe_url,
                                      headers={'Referer': page_url},
                                      cache_limit=.5)
                for match in re.finditer('(eval\(function\(.*?)</script>',
                                         html, re.DOTALL):
                    js_data = jsunpack.unpack(match.group(1))
                    js_data = js_data.replace('\\', '')
                    html += js_data

                sources = scraper_utils.parse_sources_list(self, html)
                if not sources:
                    sources = {
                        iframe_url: {
                            'quality': page_quality,
                            'direct': False
                        }
                    }

                for source, values in sources.iteritems():
                    direct = values['direct']
                    if direct:
                        host = scraper_utils.get_direct_hostname(self, source)
                        if host == 'gvideo':
                            quality = scraper_utils.gv_get_quality(source)
                        else:
                            quality = values['quality']
                        source += scraper_utils.append_headers({
                            'User-Agent':
                            scraper_utils.get_ua(),
                            'Referer':
                            page_url
                        })
                    else:
                        host = urlparse.urlparse(source).hostname
                        quality = scraper_utils.get_quality(
                            video, host, values['quality'])

                    hoster = {
                        'multi-part': False,
                        'url': source,
                        'host': host,
                        'class': self,
                        'quality': quality,
                        'views': None,
                        'rating': None,
                        'direct': direct
                    }
                    hosters.append(hoster)

        return hosters