Python parse_sources_listの例、sigsaur_lib.scraper_utils.parse_sources_list Pythonの例

コード例 #1

0

ファイルを表示

    def __get_direct_links(self, iframe_url, page_url):
        sources = []
        headers = {'Referer': page_url}
        html = self._http_get(iframe_url, headers=headers, cache_limit=.5)
        
        # if captions exist, then they aren't hardcoded
        subs = '' if re.search('kind\s*:\s*"captions"', html) else 'Turkish subtitles'
         
        streams = scraper_utils.parse_sources_list(self, html, key='VideoSources')
        streams.update(scraper_utils.parse_sources_list(self, html, var='video'))
        for stream_url in streams:
            quality = streams[stream_url]['quality']
            if 'v.asp' in stream_url:
                stream_url = scraper_utils.urljoin(self.base_url, stream_url)
                stream_redirect = self._http_get(stream_url, allow_redirect=False, method='HEAD', cache_limit=0)
                if stream_redirect.startswith('http'):
                    stream_url = stream_redirect

            sources.append({'stream_url': stream_url, 'subs': subs, 'quality': quality, 'direct': True})
        
        if sources: return sources
        iframe_url = dom_parser2.parse_dom(html, 'iframe', req='src')
        if not iframe_url: return sources
        sources.append({'stream_url': iframe_url[0].attrs['src'], 'subs': subs, 'quality': QUALITIES.HD720, 'direct': False})
                
        return sources

コード例 #2

0

ファイルを表示

ファイル: ol_scraper.py プロジェクト: idaviesfmts/hmdsm.repository

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=8)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'})
        if fragment: html = fragment[0].content
        links = scraper_utils.parse_sources_list(self, html)
        for link in links:
            stream_url = link
            if self.base_url in link:
                redir_url = self._http_get(link, headers={'Referer': url}, allow_redirect=False, method='HEAD')
                if redir_url.startswith('http'):
                    stream_url = redir_url
            
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = links[link]['quality']
                stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': url})
                
            source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True}
            hosters.append(source)

        return hosters

コード例 #3

0

ファイルを表示

    def __parse_streams(self, iframe_url, page_url):
        headers = {'Referer': page_url}
        html = self._http_get(iframe_url, headers=headers, cache_limit=.5)
        if jsunpack.detect(html):
            html = jsunpack.unpack(html)

        return scraper_utils.parse_sources_list(self, html)

コード例 #4

0

ファイルを表示

    def get_sources(self, video):
        hosters = []
        sources = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.25)
        match = re.search('var\s+view_id\s*=\s*"([^"]+)', html)
        if not match: return hosters
        view_id = match.group(1)
        
        for lang in ['or', 'tr']:
            subs = True if lang == 'tr' else False
            view_data = {'id': view_id, 'tip': 'view', 'dil': lang}
            html = self._http_get(self.ajax_url, data=view_data, headers=XHR, cache_limit=.25)
            html = html.strip()
            html = re.sub(r'\\n|\\t', '', html)
            match = re.search('var\s+sources\s*=\s*(\[.*?\])', html)
            if match:
                raw_data = match.group(1)
                raw_data = raw_data.replace('\\', '')
            else:
                raw_data = html
             
            js_data = scraper_utils.parse_json(raw_data, self.ajax_url)
            if 'data' not in js_data: continue
            
            src = dom_parser2.parse_dom(js_data['data'], 'iframe', req='src')
            if not src: continue
            
            html = self._http_get(src[0].attrs['src'], cache_limit=.25)
            for attrs, _content in dom_parser2.parse_dom(html, 'iframe', req='src'):
                src = attrs['src']
                if not src.startswith('http'): continue
                sources.append({'label': '720p', 'file': src, 'direct': False, 'subs': subs})
            
            sources += [{'file': url, 'subs': subs} for url in scraper_utils.parse_sources_list(self, html).iterkeys()]
            
            if sources: break

        for source in sources:
            direct = source.get('direct', True)
            stream_url = source['file'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
            if direct:
                host = scraper_utils.get_direct_hostname(self, stream_url)
                if host == 'gvideo':
                    quality = scraper_utils.gv_get_quality(stream_url)
                elif 'label' in source:
                    quality = scraper_utils.height_get_quality(source['label'])
                else:
                    continue
            else:
                host = urlparse.urlparse(stream_url).hostname
                quality = scraper_utils.height_get_quality(source['label'])
        
            hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
            if source.get('subs'): hoster['subs'] = 'Turkish Subtitles'
            hosters.append(hoster)
    
        return hosters

コード例 #5

0

ファイルを表示

ファイル: seehd_scraper.py プロジェクト: idaviesfmts/hmdsm.repository

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = {}
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        for _attrs, div in dom_parser2.parse_dom(html, 'div',
                                                 {'class': 'tabcontent'}):
            for attrs, _content in dom_parser2.parse_dom(div,
                                                         'source',
                                                         req='src'):
                source = attrs['src'] + scraper_utils.append_headers(
                    {
                        'User-Agent': scraper_utils.get_ua(),
                        'Referer': page_url
                    })
                sources[source] = {'quality': None, 'direct': True}

            iframe_url = dom_parser2.parse_dom(div, 'iframe', req='src')
            if iframe_url:
                iframe_url = iframe_url[0].attrs['src']
                if 'songs2dl' in iframe_url:
                    headers = {'Referer': page_url}
                    iframe_html = self._http_get(iframe_url,
                                                 headers=headers,
                                                 cache_limit=1)
                    sources.update(
                        scraper_utils.parse_sources_list(self, iframe_html))
                else:
                    sources[iframe_url] = {'quality': None, 'direct': False}

        sources.update(self.__get_mirror_links(html, video))
        page_quality = self.__get_best_quality(sources)
        for source, values in sources.iteritems():
            direct = values['direct']
            if direct:
                host = scraper_utils.get_direct_hostname(self, source)
            else:
                host = urlparse.urlparse(source).hostname

            if values['quality'] is None:
                values['quality'] = page_quality

            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'views': None,
                'url': source,
                'rating': None,
                'quality': values['quality'],
                'direct': direct
            }
            hosters.append(hoster)

        return hosters

コード例 #6

0

ファイルを表示

ファイル: vebup_scraper.py プロジェクト: idaviesfmts/hmdsm.repository

 def __get_links(self, iframe_src, page_url):
     sources = {}
     headers = {'Referer': page_url}
     html = self._http_get(iframe_src, headers=headers, cache_limit=1)
     for match in re.finditer('(eval\(function\(.*?)</script>', html,
                              re.DOTALL):
         js_data = jsunpack.unpack(match.group(1))
         js_data = js_data.replace('\\', '')
         sources = scraper_utils.parse_sources_list(self, js_data)
     return sources

コード例 #7

0

ファイルを表示

 def __get_sources(self, html, page_url):
     sources = []
     fragment = dom_parser2.parse_dom(html, 'div',
                                      {'class': 'video-content'})
     if fragment:
         referer = page_url
         iframes = dom_parser2.parse_dom(fragment[0].content,
                                         'iframe',
                                         req='src')
         for attrs, _content in iframes:
             iframe_url = attrs['src']
             if self.base_url in iframe_url:
                 headers = {'Referer': referer}
                 html = self._http_get(iframe_url,
                                       headers=headers,
                                       cache_limit=.5)
                 referer = iframe_url
                 links = scraper_utils.parse_sources_list(self, html)
                 if links:
                     for link, values in links.iteritems():
                         host = scraper_utils.get_direct_hostname(
                             self, link)
                         if host == 'gvideo':
                             quality = scraper_utils.gv_get_quality(link)
                         else:
                             quality = values['quality']
                         source = {
                             'multi-part': False,
                             'url': link,
                             'host': host,
                             'class': self,
                             'quality': quality,
                             'views': None,
                             'rating': None,
                             'direct': True
                         }
                         sources.append(source)
                 else:
                     iframes += dom_parser2.parse_dom(html,
                                                      'iframe',
                                                      req='src')
             else:
                 host = urlparse.urlparse(iframe_url).hostname
                 source = {
                     'multi-part': False,
                     'url': iframe_url,
                     'host': host,
                     'class': self,
                     'quality': QUALITIES.HIGH,
                     'views': None,
                     'rating': None,
                     'direct': False
                 }
                 sources.append(source)
     return sources

コード例 #8

0

ファイルを表示

    def get_sources(self, video):
        hosters = []
        sources = {}
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        for match in re.finditer(
                'player-data="([^"]+)[^>]+episode-data="([^"]+)[^>]*>(.*?)</a>',
                html, re.DOTALL):
            player_url, ep_id, label = match.groups()
            if video.video_type == VIDEO_TYPES.EPISODE and not self.__episode_match(
                    video, ep_id):
                continue
            label = label.strip()
            headers = {'Referer': page_url}
            if re.match('https?://embed', player_url):
                src_html = self._http_get(player_url,
                                          headers=headers,
                                          cache_limit=.5)
                sources.update(scraper_utils.parse_sources_list(
                    self, src_html))
                sources.update(self.__get_sources(src_html, label))
            else:
                sources[player_url] = {
                    'direct': False,
                    'quality': Q_MAP.get(label.upper(), QUALITIES.HIGH)
                }

        for source, value in sources.iteritems():
            direct = value['direct']
            quality = value['quality']
            if direct:
                host = scraper_utils.get_direct_hostname(self, source)
                stream_url = source + scraper_utils.append_headers(
                    {'User-Agent': scraper_utils.get_ua()})
            else:
                host = urlparse.urlparse(source).hostname
                stream_url = source

            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'url': stream_url,
                'direct': direct
            }
            hosters.append(hoster)

        return hosters

コード例 #9

0

ファイルを表示

 def __get_links(self, html):
     hosters = []
     r = re.search('tlas\("([^"]+)', html)
     if r:
         plaintext = self.__caesar(self.__get_f(self.__caesar(r.group(1), 13)), 13)
         sources = scraper_utils.parse_sources_list(self, plaintext)
         for source in sources:
             stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Cookie': self._get_stream_cookies()})
             host = scraper_utils.get_direct_hostname(self, stream_url)
             hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': sources[source]['quality'], 'rating': None, 'views': None, 'direct': True}
             hosters.append(hoster)
     return hosters

コード例 #10

0

ファイルを表示

ファイル: mehliz_scraper.py プロジェクト: idaviesfmts/hmdsm.repository

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'})
        if fragment: html = fragment[0].content
        iframe_url = dom_parser2.parse_dom(html, 'iframe', req='src')
        if not iframe_url: return hosters
        iframe_url = iframe_url[0].attrs['src']
        if iframe_url.startswith('/'):
            iframe_url = scraper_utils.urljoin(self.base_url, iframe_url)
        html = self._http_get(iframe_url,
                              headers={'Referer': page_url},
                              cache_limit=.5)
        obj = dom_parser2.parse_dom(html, 'object', req='data')
        if obj:
            streams = dict((stream_url, {
                'quality': scraper_utils.gv_get_quality(stream_url),
                'direct': True
            }) for stream_url in scraper_utils.parse_google(
                self, obj[0].attrs['data']))
        else:
            streams = scraper_utils.parse_sources_list(self, html)

        for stream_url, values in streams.iteritems():
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = values['quality']
                stream_url += scraper_utils.append_headers({
                    'User-Agent':
                    scraper_utils.get_ua(),
                    'Referer':
                    page_url
                })

            source = {
                'multi-part': False,
                'url': stream_url,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'direct': True
            }
            hosters.append(source)

        return hosters

コード例 #11

0

ファイルを表示

 def __get_sources(self, html):
     sources = scraper_utils.parse_sources_list(self, html)
     for source in dom_parser2.parse_dom(html, 'source', {'type': 'video/mp4'}, req='src') + dom_parser2.parse_dom(html, 'iframe', req='src'):
         source = source.attrs['src']
         if scraper_utils.get_direct_hostname(self, source) == 'gvideo':
             quality = scraper_utils.gv_get_quality(source)
             direct = True
         else:
             quality = QUALITIES.HD720
             direct = False
         
         sources[source] = {'quality': quality, 'direct': direct}
     return self.__proc_sources(sources)

コード例 #12

0

ファイルを表示

    def __get_embedded_sources(self, html):
        sources = []
        # if captions exist, then they aren't hardcoded
        subs = '' if re.search('''"?kind"?\s*:\s*"?captions"?''', html) else 'Turkish subtitles'
        for attrs, _content in dom_parser2.parse_dom(html, 'source', {'type': 'video/mp4'}, req='src'):
            sources.append(attrs['src'])
        
        for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL):
            js_data = jsunpack.unpack(match.group(1))
            js_data = js_data.replace('\\', '')
            html += js_data

        sources += [source for source in scraper_utils.parse_sources_list(self, html, var="source")]
        return {'sources': sources, 'subs': subs}

コード例 #13

0

ファイルを表示

ファイル: snagfilms_scraper.py プロジェクト: idaviesfmts/hmdsm.repository

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        fragment = dom_parser2.parse_dom(html, 'div',
                                         {'class': 'film-container'})
        if fragment:
            iframe_url = dom_parser2.parse_dom(fragment[0].content,
                                               'iframe',
                                               req='src')
            if iframe_url:
                iframe_url = scraper_utils.urljoin(self.base_url,
                                                   iframe_url[0].attrs['src'])
                headers = {'Referer': page_url}
                html = self._http_get(iframe_url,
                                      headers=headers,
                                      cache_limit=.5)
                sources = scraper_utils.parse_sources_list(self, html)
                for source in sources:
                    quality = sources[source]['quality']
                    host = scraper_utils.get_direct_hostname(self, source)
                    stream_url = source + scraper_utils.append_headers(
                        {
                            'User-Agent': scraper_utils.get_ua(),
                            'Referer': iframe_url
                        })
                    hoster = {
                        'multi-part': False,
                        'host': host,
                        'class': self,
                        'quality': quality,
                        'views': None,
                        'rating': None,
                        'url': stream_url,
                        'direct': True
                    }
                    match = re.search('(\d+[a-z]bps)', source)
                    if match:
                        hoster['extra'] = match.group(1)
                    hosters.append(hoster)

        hosters.sort(key=lambda x: x.get('extra', ''), reverse=True)
        return hosters

コード例 #14

0

ファイルを表示

 def __get_embedded(self, html, page_url):
     sources = {}
     match = dom_parser2.parse_dom(html, 'div', {'id': 'videoreklam'})
     if not match: return sources
     match = dom_parser2.parse_dom(match[0].content, 'iframe', req='src')
     if not match: return sources
     headers = {'Referer': page_url}
     html = self._http_get(match[0].attrs['src'],
                           headers=headers,
                           cache_limit=.5)
     for match in re.finditer('(eval\(function\(.*?)</script>', html,
                              re.DOTALL):
         js_data = jsunpack.unpack(match.group(1))
         js_data = js_data.replace('\\', '')
         html += js_data
     return dict((key, value['quality'])
                 for key, value in scraper_utils.parse_sources_list(
                     self, html, var='source').iteritems())

コード例 #15

0

ファイルを表示

ファイル: dizibox_scraper.py プロジェクト: idaviesfmts/hmdsm.repository

 def __get_embed_links(self, html):
     hosters = []
     sources = scraper_utils.parse_sources_list(self, html)
     for source in sources:
         quality = source['quality']
         stream_url = source + scraper_utils.append_headers(
             {'User-Agent': scraper_utils.get_ua()})
         hoster = {
             'multi-part': False,
             'host': scraper_utils.get_direct_hostname(self, source),
             'class': self,
             'quality': quality,
             'views': None,
             'rating': None,
             'url': stream_url,
             'direct': True,
             'subs': 'Turkish Subtitles'
         }
         hosters.append(hoster)
     return hosters

コード例 #16

0

ファイルを表示

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)
        
        views = None
        fragment = dom_parser2.parse_dom(html, 'span', {'class': 'post-views'})
        if fragment:
            views = re.sub('[^\d]', '', fragment[0].content)
        
        iframe_urls = []
        if video.video_type == VIDEO_TYPES.MOVIE:
            iframe_urls = [r.attrs['href'] for r in dom_parser2.parse_dom(html, 'a', {'class': ['orange', 'abutton']}, req='href')]
        else:
            for label, link in self.__get_episode_links(html):
                if int(label) == int(video.episode):
                    iframe_urls.append(link)
            
        for iframe_url in iframe_urls:
            headers = {'Referer': url}
            html = self._http_get(iframe_url, headers=headers, cache_limit=.5)
            match = re.search('{link\s*:\s*"([^"]+)', html)
            if match:
                sources = self.__get_gk_links(match.group(1), iframe_url)
            else:
                sources = scraper_utils.parse_sources_list(self, html)
                
            for source in sources:
                stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
                direct = sources[source]['direct']
                quality = sources[source]['quality']
                if sources[source]['direct']:
                    host = scraper_utils.get_direct_hostname(self, source)
                else:
                    host = urlparse.urlparse(source).hostname
                hoster = {'multi-part': False, 'url': stream_url, 'class': self, 'quality': quality, 'host': host, 'rating': None, 'views': views, 'direct': direct}
                hosters.append(hoster)

        return hosters

コード例 #17

0

ファイルを表示

 def __get_embedded_links(self, html, sub):
     hosters = []
     html = html.replace('\\"', '"').replace('\\/', '/')
     sources = scraper_utils.parse_sources_list(self, html)
     for source in sources:
         host = scraper_utils.get_direct_hostname(self, source)
         quality = sources[source]['quality']
         direct = sources[source]['direct']
         hoster = {
             'multi-part': False,
             'host': host,
             'class': self,
             'quality': quality,
             'views': None,
             'rating': None,
             'url': source,
             'direct': direct
         }
         hoster['subs'] = sub
         hosters.append(hoster)
     return hosters

コード例 #18

0

ファイルを表示

    def __get_page_links(self, html):
        hosters = []
        for match in re.finditer('(eval\(function\(.*?)</script>', html,
                                 re.DOTALL):
            js_data = jsunpack.unpack(match.group(1))
            js_data = js_data.replace('\\', '')
            html += js_data

        sources = scraper_utils.parse_sources_list(self, html)
        for source in sources:
            quality = sources[source]['quality']
            hoster = {
                'multi-part': False,
                'url': source,
                'class': self,
                'quality': quality,
                'host': scraper_utils.get_direct_hostname(self, source),
                'rating': None,
                'views': None,
                'direct': True
            }
            hosters.append(hoster)
        return hosters

コード例 #19

0

ファイルを表示

ファイル: watchonline_scraper.py プロジェクト: idaviesfmts/hmdsm.repository

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)

        sources = {}
        for _attrs, fragment in dom_parser2.parse_dom(html, 'ul',
                                                      {'class': 'enlaces'}):
            for attrs, _content in dom_parser2.parse_dom(fragment,
                                                         'a',
                                                         req='href'):
                stream_url = attrs['href']
                if video.video_type == VIDEO_TYPES.MOVIE:
                    meta = scraper_utils.parse_movie_link(stream_url)
                else:
                    meta = scraper_utils.parse_episode_link(stream_url)
                sources.update({
                    stream_url: {
                        'quality':
                        scraper_utils.height_get_quality(meta['height']),
                        'direct': False
                    }
                })

        for _attrs, fragment in dom_parser2.parse_dom(
                html, 'div', {'class': 'movieplay'}) + dom_parser2.parse_dom(
                    html, 'div', {'id': re.compile('player\d+')}):
            for attrs, _content in dom_parser2.parse_dom(
                    fragment, 'iframe', req='src') + dom_parser2.parse_dom(
                        fragment, 'iframe', req='data-lazy-src'):
                iframe_url = attrs.get('src', '')
                if not iframe_url.startswith('http'):
                    iframe_url = attrs.get('data-lazy-src', '')
                    if not iframe_url.startswith('http'): continue

                if '//player' in iframe_url:
                    html = self._http_get(iframe_url,
                                          headers={'Referer': page_url},
                                          cache_limit=.5)
                    sources.update(scraper_utils.parse_sources_list(
                        self, html))
                else:
                    if video.video_type == VIDEO_TYPES.MOVIE:
                        meta = scraper_utils.parse_movie_link(iframe_url)
                    else:
                        meta = scraper_utils.parse_episode_link(iframe_url)
                    sources.update({
                        iframe_url: {
                            'quality':
                            scraper_utils.height_get_quality(meta['height']),
                            'direct':
                            False
                        }
                    })

        for stream_url, values in sources.iteritems():
            direct = values['direct']
            quality = values['quality']
            if direct:
                host = scraper_utils.get_direct_hostname(self, stream_url)
                stream_url += scraper_utils.append_headers(
                    {'User-Agent': scraper_utils.get_ua()})
            else:
                stream_url = stream_url
                host = urlparse.urlparse(stream_url).hostname

            hoster = {
                'multi-part': False,
                'url': stream_url,
                'class': self,
                'quality': quality,
                'host': host,
                'rating': None,
                'views': None,
                'direct': direct
            }
            hosters.append(hoster)

        return hosters

コード例 #20

0

ファイルを表示

ファイル: moviego_scraper.py プロジェクト: idaviesfmts/hmdsm.repository

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        q_str = dom_parser2.parse_dom(html, 'div', {'class': 'poster-qulabel'})
        if q_str:
            q_str = q_str[0].content.replace(' ', '').upper()
            page_quality = Q_MAP.get(q_str, QUALITIES.HIGH)
        else:
            page_quality = QUALITIES.HIGH

        for _attrs, fragment in dom_parser2.parse_dom(html, 'div',
                                                      {'class': 'tab_box'}):
            iframe_url = dom_parser2.parse_dom(fragment, 'iframe', req='src')
            if iframe_url:
                iframe_url = iframe_url[0].attrs['src']
                if 'youtube' in iframe_url: continue

                html = self._http_get(iframe_url,
                                      headers={'Referer': page_url},
                                      cache_limit=.5)
                for match in re.finditer('(eval\(function\(.*?)</script>',
                                         html, re.DOTALL):
                    js_data = jsunpack.unpack(match.group(1))
                    js_data = js_data.replace('\\', '')
                    html += js_data

                sources = scraper_utils.parse_sources_list(self, html)
                if not sources:
                    sources = {
                        iframe_url: {
                            'quality': page_quality,
                            'direct': False
                        }
                    }

                for source, values in sources.iteritems():
                    direct = values['direct']
                    if direct:
                        host = scraper_utils.get_direct_hostname(self, source)
                        if host == 'gvideo':
                            quality = scraper_utils.gv_get_quality(source)
                        else:
                            quality = values['quality']
                        source += scraper_utils.append_headers({
                            'User-Agent':
                            scraper_utils.get_ua(),
                            'Referer':
                            page_url
                        })
                    else:
                        host = urlparse.urlparse(source).hostname
                        quality = scraper_utils.get_quality(
                            video, host, values['quality'])

                    hoster = {
                        'multi-part': False,
                        'url': source,
                        'host': host,
                        'class': self,
                        'quality': quality,
                        'views': None,
                        'rating': None,
                        'direct': direct
                    }
                    hosters.append(hoster)

        return hosters

コード例 #21

0

ファイルを表示

    def __get_sources(self, html, page_url, subs):
        sources = {}
        player_div = dom_parser2.parse_dom(html,
                                           'div', {'class': 'dzst-player'},
                                           req='data-dzst-player')
        if player_div:
            js_html = scraper_utils.cleanse_title(
                player_div[0].attrs['data-dzst-player'].replace('&#x3D;', '='))
            js_data = scraper_utils.parse_json(js_html, page_url)
            links = js_data.get('tr', {})
            for height in links:
                stream_url = links[height]
                if scraper_utils.get_direct_hostname(self,
                                                     stream_url) == 'gvideo':
                    quality = scraper_utils.gv_get_quality(stream_url)
                else:
                    quality = scraper_utils.height_get_quality(height)
                sources[stream_url] = {
                    'direct': True,
                    'subs': subs,
                    'quality': quality
                }
        else:
            fragment = dom_parser2.parse_dom(html, 'div',
                                             {'class': 'video-player'})
            if fragment:
                fragment = fragment[0].content
                for _attrs, div in dom_parser2.parse_dom(
                        fragment, 'div', {'class': 'ad-player'}):
                    fragment = fragment.replace(div, '')

                iframe_url = dom_parser2.parse_dom(fragment,
                                                   'iframe',
                                                   req='src')
                if iframe_url:
                    iframe_url = iframe_url[0].attrs['src']
                    if 'dizist' in iframe_url:
                        html = self._http_get(iframe_url,
                                              headers={'Referer': page_url},
                                              cache_limit=1)
                        return self.__get_sources(html, page_url, subs)
                    else:
                        parts = urlparse.urlparse(iframe_url)
                        if not parts.hostname:
                            iframe_url = scraper_utils.urljoin(
                                self.base_url, iframe_url)
                            html = self._http_get(
                                iframe_url,
                                headers={'Referer': page_url},
                                cache_limit=1)
                            sources = scraper_utils.parse_sources_list(
                                self, html, var='sources')
                            for value in sources.itervalues():
                                value['subs'] = subs
                        else:
                            if scraper_utils.get_direct_hostname(
                                    self, iframe_url) == 'gvideo':
                                direct = True
                            else:
                                direct = False
                            sources[iframe_url] = {
                                'direct': direct,
                                'subs': subs,
                                'quality': QUALITIES.HD720
                            }
                else:
                    sources = scraper_utils.parse_sources_list(self,
                                                               fragment,
                                                               var='sources')
                    for value in sources.itervalues():
                        value['subs'] = subs

        return sources

コード例 #22

0

ファイルを表示

ファイル: miradetodo_scraper.py プロジェクト: idaviesfmts/hmdsm.repository

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = {}
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)
        for _attrs, fragment in dom_parser2.parse_dom(html, 'div',
                                                      {'class': 'movieplay'}):
            for attrs, _content in dom_parser2.parse_dom(
                    fragment, 'iframe', req='src') + dom_parser2.parse_dom(
                        fragment, 'iframe', req='data-lazy-src'):
                iframe_url = attrs.get('src', '')
                if not iframe_url.startswith('http'):
                    iframe_url = attrs.get('data-lazy-src', '')
                    if not iframe_url.startswith('http'): continue

                if 'miradetodo' in iframe_url:
                    html = self._http_get(iframe_url, cache_limit=.5)
                    fragment = dom_parser2.parse_dom(html, 'nav',
                                                     {'class': 'nav'})
                    if fragment:
                        stream_url = dom_parser2.parse_dom(fragment[0].content,
                                                           'a',
                                                           req='href')
                        if stream_url:
                            html = self._http_get(stream_url[0].attrs['href'],
                                                  cache_limit=.5)

                    sources.update(self.__get_gk_links(html))
                    sources.update(self.__get_gk_links2(html))
                    sources.update(self.__get_amazon_links(html))
                    sources.update(scraper_utils.parse_sources_list(
                        self, html))
                else:
                    host = urlparse.urlparse(iframe_url).hostname
                    source = {
                        'quality':
                        scraper_utils.get_quality(video, host, QUALITIES.HIGH),
                        'direct':
                        False
                    }
                    sources.update({iframe_url: source})

        for source in sources:
            stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua())
            direct = sources[source]['direct']
            quality = sources[source]['quality']
            host = scraper_utils.get_direct_hostname(
                self, source) if direct else urlparse.urlparse(source).hostname
            hoster = {
                'multi-part': False,
                'url': stream_url,
                'class': self,
                'quality': quality,
                'host': host,
                'rating': None,
                'views': None,
                'direct': direct
            }
            hosters.append(hoster)

        return hosters

コード例 #23

0

ファイルを表示

ファイル: movieblast_scraper.py プロジェクト: idaviesfmts/hmdsm.repository

 def __get_direct_links(self, stream_url):
     return scraper_utils.parse_sources_list(self, self._http_get(stream_url, cache_limit=1))

コード例 #24

0

ファイルを表示

    def get_sources(self, video):
        source_url = self.get_url(video)
        sources = {}
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=0)
        for match in re.finditer("embeds\[(\d+)\]\s*=\s*'([^']+)", html):
            match = re.search('src="([^"]+)', match.group(2))
            if match:
                iframe_url = match.group(1)
                if 'play-en.php' in iframe_url:
                    match = re.search('id=([^"&]+)', iframe_url)
                    if match:
                        proxy_link = match.group(1)
                        proxy_link = proxy_link.split('*', 1)[-1]
                        picasa_url = scraper_utils.gk_decrypt(
                            self.get_name(), GK_KEY, proxy_link)
                        for stream_url in scraper_utils.parse_google(
                                self, picasa_url):
                            sources[stream_url] = {
                                'quality':
                                scraper_utils.gv_get_quality(stream_url),
                                'direct': True
                            }
                else:
                    html = self._http_get(iframe_url, cache_limit=0)
                    temp_sources = scraper_utils.parse_sources_list(self, html)
                    for source in temp_sources:
                        if 'download.php' in source:
                            redir_html = self._http_get(source,
                                                        allow_redirect=False,
                                                        method='HEAD',
                                                        cache_limit=0)
                            if redir_html.startswith('http'):
                                temp_sources[redir_html] = temp_sources[source]
                                del temp_sources[source]
                    sources.update(temp_sources)
                    for source in dom_parser2.parse_dom(html,
                                                        'source',
                                                        {'type': 'video/mp4'},
                                                        req='src'):
                        sources[source.attrs['src']] = {
                            'quality': QUALITIES.HD720,
                            'direct': True,
                            'referer': iframe_url
                        }

        for source, values in sources.iteritems():
            host = scraper_utils.get_direct_hostname(self, source)
            headers = {'User-Agent': scraper_utils.get_ua()}
            if 'referer' in values: headers['Referer'] = values['referer']
            stream_url = source + scraper_utils.append_headers(headers)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(source)
            else:
                quality = values['quality']
                if quality not in Q_ORDER:
                    quality = QUALITY_MAP.get(values['quality'],
                                              QUALITIES.HIGH)

            hoster = {
                'multi-part': False,
                'url': stream_url,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'direct': True
            }
            hosters.append(hoster)

        return hosters

コード例 #25

0

ファイルを表示

 def __get_embedded(self, html):
     return self.__proc_sources(scraper_utils.parse_sources_list(self, html))