Python parse_sources_list 예제들, dsrd_lib.scraper_utils.parse_sources_list Python 예제들

예제 #1

0

파일 보기

    def get_sources(self, video):
        hosters = []
        sources = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.25)
        match = re.search('var\s+view_id\s*=\s*"([^"]+)', html)
        if not match: return hosters
        view_id = match.group(1)
        
        for lang in ['or', 'tr']:
            subs = True if lang == 'tr' else False
            view_data = {'id': view_id, 'tip': 'view', 'dil': lang}
            html = self._http_get(self.ajax_url, data=view_data, headers=XHR, cache_limit=.25)
            html = html.strip()
            html = re.sub(r'\\n|\\t', '', html)
            match = re.search('var\s+sources\s*=\s*(\[.*?\])', html)
            if match:
                raw_data = match.group(1)
                raw_data = raw_data.replace('\\', '')
            else:
                raw_data = html
             
            js_data = scraper_utils.parse_json(raw_data, self.ajax_url)
            if 'data' not in js_data: continue
            
            src = dom_parser2.parse_dom(js_data['data'], 'iframe', req='src')
            if not src: continue
            
            html = self._http_get(src[0].attrs['src'], cache_limit=.25)
            for attrs, _content in dom_parser2.parse_dom(html, 'iframe', req='src'):
                src = attrs['src']
                if not src.startswith('http'): continue
                sources.append({'label': '720p', 'file': src, 'direct': False, 'subs': subs})
            
            sources += [{'file': url, 'subs': subs} for url in scraper_utils.parse_sources_list(self, html).iterkeys()]
            
            if sources: break

        for source in sources:
            direct = source.get('direct', True)
            stream_url = source['file'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
            if direct:
                host = scraper_utils.get_direct_hostname(self, stream_url)
                if host == 'gvideo':
                    quality = scraper_utils.gv_get_quality(stream_url)
                elif 'label' in source:
                    quality = scraper_utils.height_get_quality(source['label'])
                else:
                    continue
            else:
                host = urlparse.urlparse(stream_url).hostname
                quality = scraper_utils.height_get_quality(source['label'])
        
            hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
            if source.get('subs'): hoster['subs'] = 'Turkish Subtitles'
            hosters.append(hoster)
    
        return hosters

예제 #2

0

파일 보기

파일: snagfilms_scraper.py 프로젝트: Lhse44/repository.deallen

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     page_url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(page_url, cache_limit=.5)
     fragment = dom_parser2.parse_dom(html, 'div', {'class': 'film-container'})
     if fragment:
         iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src')
         if iframe_url:
             iframe_url = scraper_utils.urljoin(self.base_url, iframe_url[0].attrs['src'])
             headers = {'Referer': page_url}
             html = self._http_get(iframe_url, headers=headers, cache_limit=.5)
             sources = scraper_utils.parse_sources_list(self, html)
             for source in sources:
                 quality = sources[source]['quality']
                 host = scraper_utils.get_direct_hostname(self, source)
                 stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': iframe_url})
                 hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                 match = re.search('(\d+[a-z]bps)', source)
                 if match:
                     hoster['extra'] = match.group(1)
                 hosters.append(hoster)
                     
     hosters.sort(key=lambda x: x.get('extra', ''), reverse=True)
     return hosters

예제 #3

0

파일 보기

파일: quikr_scraper.py 프로젝트: Lhse44/repository.deallen

    def __parse_streams(self, iframe_url, page_url):
        headers = {'Referer': page_url}
        html = self._http_get(iframe_url, headers=headers, cache_limit=.5)
        if jsunpack.detect(html):
            html = jsunpack.unpack(html)

        return scraper_utils.parse_sources_list(self, html)

예제 #4

0

파일 보기

파일: dizibox_scraper.py 프로젝트: Lhse44/repository.deallen

 def __get_embed_links(self, html):
     hosters = []
     sources = scraper_utils.parse_sources_list(self, html)
     for source in sources:
         quality = source['quality']
         stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
         hoster = {'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, source), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': 'Turkish Subtitles'}
         hosters.append(hoster)
     return hosters

예제 #5

0

파일 보기

파일: seehd_scraper.py 프로젝트: Lhse44/repository.deallen

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = {}
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        for _attrs, div in dom_parser2.parse_dom(html, 'div',
                                                 {'class': 'tabcontent'}):
            for attrs, _content in dom_parser2.parse_dom(div,
                                                         'source',
                                                         req='src'):
                source = attrs['src'] + scraper_utils.append_headers(
                    {
                        'User-Agent': scraper_utils.get_ua(),
                        'Referer': page_url
                    })
                sources[source] = {'quality': None, 'direct': True}

            iframe_url = dom_parser2.parse_dom(div, 'iframe', req='src')
            if iframe_url:
                iframe_url = iframe_url[0].attrs['src']
                if 'songs2dl' in iframe_url:
                    headers = {'Referer': page_url}
                    iframe_html = self._http_get(iframe_url,
                                                 headers=headers,
                                                 cache_limit=1)
                    sources.update(
                        scraper_utils.parse_sources_list(self, iframe_html))
                else:
                    sources[iframe_url] = {'quality': None, 'direct': False}

        sources.update(self.__get_mirror_links(html, video))
        page_quality = self.__get_best_quality(sources)
        for source, values in sources.iteritems():
            direct = values['direct']
            if direct:
                host = scraper_utils.get_direct_hostname(self, source)
            else:
                host = urlparse.urlparse(source).hostname

            if values['quality'] is None:
                values['quality'] = page_quality

            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'views': None,
                'url': source,
                'rating': None,
                'quality': values['quality'],
                'direct': direct
            }
            hosters.append(hoster)

        return hosters

예제 #6

0

파일 보기

 def __get_embedded_links(self, html, sub):
     hosters = []
     html = html.replace('\\"', '"').replace('\\/', '/')
     sources = scraper_utils.parse_sources_list(self, html)
     for source in sources:
         host = scraper_utils.get_direct_hostname(self, source)
         quality = sources[source]['quality']
         direct = sources[source]['direct']
         hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': direct}
         hoster['subs'] = sub
         hosters.append(hoster)
     return hosters

예제 #7

0

파일 보기

 def __get_embedded(self, html, page_url):
     sources = {}
     match = dom_parser2.parse_dom(html, 'div', {'id': 'videoreklam'})
     if not match: return sources
     match = dom_parser2.parse_dom(match[0].content, 'iframe', req='src')
     if not match: return sources
     headers = {'Referer': page_url}
     html = self._http_get(match[0].attrs['src'],
                           headers=headers,
                           cache_limit=.5)
     for match in re.finditer('(eval\(function\(.*?)</script>', html,
                              re.DOTALL):
         js_data = jsunpack.unpack(match.group(1))
         js_data = js_data.replace('\\', '')
         html += js_data
     return dict((key, value['quality'])
                 for key, value in scraper_utils.parse_sources_list(
                     self, html, var='source').iteritems())

예제 #8

0

파일 보기

파일: ol_scraper.py 프로젝트: Lhse44/repository.deallen

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=8)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'})
        if fragment: html = fragment[0].content
        links = scraper_utils.parse_sources_list(self, html)
        for link in links:
            stream_url = link
            if self.base_url in link:
                redir_url = self._http_get(link,
                                           headers={'Referer': url},
                                           allow_redirect=False,
                                           method='HEAD')
                if redir_url.startswith('http'):
                    stream_url = redir_url

            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = links[link]['quality']
                stream_url += scraper_utils.append_headers({
                    'User-Agent':
                    scraper_utils.get_ua(),
                    'Referer':
                    url
                })

            source = {
                'multi-part': False,
                'url': stream_url,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'direct': True
            }
            hosters.append(source)

        return hosters