Python get_ua Exemples, dsrd_lib.scraper_utils.get_ua Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : tvwtvs_scraper.py Projet : Lhse44/repository.deallen

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = {}
        if source_url and source_url != FORCE_NO_MATCH:
            page_url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(page_url, cache_limit=.5)
            sources.update(self.__get_gk_links(html, page_url))
            sources.update(self.__get_iframe_links(html, page_url))

            for source in sources:
                host = self._get_direct_hostname(source)
                stream_url = source + '|User-Agent=%s' % (
                    scraper_utils.get_ua())
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'quality': sources[source],
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': True
                }
                hosters.append(hoster)

        return hosters

Exemple #2

0

Afficher le fichier

Fichier : heydl_scraper.py Projet : Lhse44/repository.deallen

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        for attrs, _content in dom_parser2.parse_dom(html, 'a', req='href'):
            stream_url = attrs['href']
            if MOVIE_URL in stream_url:
                meta = scraper_utils.parse_movie_link(stream_url)
                stream_url = scraper_utils.pathify_url(
                    stream_url) + scraper_utils.append_headers(
                        {'User-Agent': scraper_utils.get_ua()})
                quality = scraper_utils.height_get_quality(meta['height'])
                hoster = {
                    'multi-part': False,
                    'host':
                    scraper_utils.get_direct_hostname(self, stream_url),
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': True
                }
                if 'format' in meta: hoster['format'] = meta['format']
                hosters.append(hoster)

        return hosters

Exemple #3

0

Afficher le fichier

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)
        sources = self.__get_posts(html)
        sources.update(self.__get_ajax(html, url))
        sources.update(self.__get_embedded(html, url))
        for source in sources:
            stream_url = source + scraper_utils.append_headers(
                {'User-Agent': scraper_utils.get_ua()})
            host = scraper_utils.get_direct_hostname(self, source)
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': sources[source],
                'views': None,
                'rating': None,
                'url': stream_url,
                'direct': True,
                'subs': 'Turkish subtitles'
            }
            hosters.append(hoster)

        return hosters

Exemple #4

0

Afficher le fichier

 def resolve_link(self, link):
     url = scraper_utils.urljoin(self.base_url, link)
     request = urllib2.Request(url)
     request.add_header('User-Agent', scraper_utils.get_ua())
     request.add_unredirected_header('Host', request.get_host())
     request.add_unredirected_header('Referer', url)
     response = urllib2.urlopen(request)
     return response.geturl()

Exemple #5

0

Afficher le fichier

Fichier : seehd_scraper.py Projet : Lhse44/repository.deallen

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = {}
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        for _attrs, div in dom_parser2.parse_dom(html, 'div',
                                                 {'class': 'tabcontent'}):
            for attrs, _content in dom_parser2.parse_dom(div,
                                                         'source',
                                                         req='src'):
                source = attrs['src'] + scraper_utils.append_headers(
                    {
                        'User-Agent': scraper_utils.get_ua(),
                        'Referer': page_url
                    })
                sources[source] = {'quality': None, 'direct': True}

            iframe_url = dom_parser2.parse_dom(div, 'iframe', req='src')
            if iframe_url:
                iframe_url = iframe_url[0].attrs['src']
                if 'songs2dl' in iframe_url:
                    headers = {'Referer': page_url}
                    iframe_html = self._http_get(iframe_url,
                                                 headers=headers,
                                                 cache_limit=1)
                    sources.update(
                        scraper_utils.parse_sources_list(self, iframe_html))
                else:
                    sources[iframe_url] = {'quality': None, 'direct': False}

        sources.update(self.__get_mirror_links(html, video))
        page_quality = self.__get_best_quality(sources)
        for source, values in sources.iteritems():
            direct = values['direct']
            if direct:
                host = scraper_utils.get_direct_hostname(self, source)
            else:
                host = urlparse.urlparse(source).hostname

            if values['quality'] is None:
                values['quality'] = page_quality

            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'views': None,
                'url': source,
                'rating': None,
                'quality': values['quality'],
                'direct': direct
            }
            hosters.append(hoster)

        return hosters

Exemple #6

0

Afficher le fichier

Fichier : dizibox_scraper.py Projet : Lhse44/repository.deallen

 def __get_embed_links(self, html):
     hosters = []
     sources = scraper_utils.parse_sources_list(self, html)
     for source in sources:
         quality = source['quality']
         stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
         hoster = {'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, source), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': 'Turkish Subtitles'}
         hosters.append(hoster)
     return hosters

Exemple #7

0

Afficher le fichier

Fichier : afdah_scraper.py Projet : Lhse44/repository.deallen

 def __get_links(self, html):
     hosters = []
     r = re.search('salt\("([^"]+)', html)
     if r:
         plaintext = self.__caesar(self.__get_f(self.__caesar(r.group(1), 13)), 13)
         sources = self._parse_sources_list(plaintext)
         for source in sources:
             stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Cookie': self._get_stream_cookies()})
             host = self._get_direct_hostname(stream_url)
             hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': sources[source]['quality'], 'rating': None, 'views': None, 'direct': True}
             hosters.append(hoster)
     return hosters

Exemple #8

0

Afficher le fichier

Fichier : moviehubs_scraper.py Projet : Lhse44/repository.deallen

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=8)
        hosts = [
            r.content for r in dom_parser2.parse_dom(
                html, 'p', {'class': 'server_servername'})
        ]
        links = [
            r.content
            for r in dom_parser2.parse_dom(html, 'p', {'class': 'server_play'})
        ]
        for host, link_frag in zip(hosts, links):
            stream_url = dom_parser2.parse_dom(link_frag, 'a', req='href')
            if not stream_url: continue

            stream_url = stream_url[0].attrs['href']
            host = re.sub('^Server\s*', '', host, re.I)
            host = re.sub('\s*Link\s+\d+', '', host)
            if host.lower() == 'google':
                sources = self.__get_gvideo_links(stream_url)
            else:
                sources = [{'host': host, 'link': stream_url}]

            for source in sources:
                host = scraper_utils.get_direct_hostname(self, stream_url)
                if host == 'gvideo':
                    quality = scraper_utils.gv_get_quality(stream_url)
                    stream_url = source['link'] + scraper_utils.append_headers(
                        {'User-Agent': scraper_utils.get_ua()})
                    direct = True
                else:
                    stream_url = scraper_utils.pathify_url(source['link'])
                    host = HOST_SUB.get(source['host'].lower(), source['host'])
                    quality = scraper_utils.get_quality(
                        video, host, QUALITIES.HIGH)
                    direct = False

                hoster = {
                    'multi-part': False,
                    'url': stream_url,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'direct': direct
                }
                hosters.append(hoster)

        return hosters

Exemple #9

0

Afficher le fichier

Fichier : quikr_scraper.py Projet : Lhse44/repository.deallen

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)
        page_quality = QUALITIES.HD720 if video.video_type == VIDEO_TYPES.MOVIE else QUALITIES.HIGH
        for _attrs, fragment in dom_parser2.parse_dom(
                html, 'div', {'class': 'embed-responsive'}):
            iframe_url = dom_parser2.parse_dom(fragment,
                                               'iframe',
                                               req='data-src')
            if iframe_url:
                iframe_url = iframe_url[0].attrs['data-src']
                iframe_host = urlparse.urlparse(iframe_url).hostname
                if iframe_host in DIRECT_HOSTS:
                    sources = self.__parse_streams(iframe_url, url)
                else:
                    sources = {
                        iframe_url: {
                            'quality':
                            scraper_utils.get_quality(video, iframe_host,
                                                      page_quality),
                            'direct':
                            False
                        }
                    }

            for source in sources:
                quality = sources[source]['quality']
                direct = sources[source]['direct']
                if direct:
                    host = scraper_utils.get_direct_hostname(self, source)
                    stream_url = source + scraper_utils.append_headers(
                        {'User-Agent': scraper_utils.get_ua()})
                else:
                    host = urlparse.urlparse(source).hostname
                    stream_url = source

                hoster = {
                    'multi-part': False,
                    'url': stream_url,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'direct': direct
                }
                hosters.append(hoster)

        return hosters

Exemple #10

0

Afficher le fichier

Fichier : vivoto_scraper.py Projet : Lhse44/repository.deallen

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        page_quality = dom_parser2.parse_dom(html, 'dd', {'class': 'status'})
        if page_quality:
            page_quality = QUALITY_MAP.get(page_quality[0].content,
                                           QUALITIES.HIGH)
        else:
            page_quality = QUALITIES.HIGH

        if video.video_type == VIDEO_TYPES.EPISODE:
            fragment = dom_parser2.parse_dom(html, 'div',
                                             {'id': 'servers-list'})
            gk_html = fragment[0].content if fragment else ''
        else:
            gk_html = html

        link_url = scraper_utils.urljoin(self.base_url, LINK_URL)
        player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL)
        for stream_url, quality in scraper_utils.get_gk_links(
                self, gk_html, page_url, page_quality, link_url,
                player_url).iteritems():
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                direct = True
                quality = quality
            else:
                host = urlparse.urlparse(stream_url).hostname
                quality = scraper_utils.get_quality(video, host, quality)
                direct = False

            if host is not None:
                stream_url += scraper_utils.append_headers(
                    {'User-Agent': scraper_utils.get_ua()})
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': direct
                }
                hosters.append(hoster)

        return hosters

Exemple #11

0

Afficher le fichier

 def resolve_link(self, link):
     url = scraper_utils.urljoin(self.base_url, link)
     html = self._http_get(url, cache_limit=.5)
     match = re.search('"file"\s*:\s*"([^"]+)', html)
     if match:
         file_link = match.group(1)
         stream_url = scraper_utils.urljoin(self.base_url, file_link)
         cj = self._set_cookies(self.base_url, {})
         request = urllib2.Request(stream_url)
         request.add_header('User-Agent', scraper_utils.get_ua())
         request.add_unredirected_header('Host', request.get_host())
         request.add_unredirected_header('Referer', url)
         cj.add_cookie_header(request)
         response = urllib2.urlopen(request)
         return response.geturl()

Exemple #12

0

Afficher le fichier

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters

        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        if video.video_type == VIDEO_TYPES.EPISODE:
            gk_html = ''.join(
                match.group(0) for match in re.finditer(
                    '<a[^>]*>(%s|Server \d+)</a>' %
                    (video.episode), html, re.I))
        else:
            gk_html = html
        link_url = scraper_utils.urljoin(self.base_url, LINK_URL)
        player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL)
        sources = scraper_utils.get_gk_links(self, gk_html, page_url,
                                             QUALITIES.HIGH, link_url,
                                             player_url)
        sources.update(self.__get_ht_links(html, page_url))

        for stream_url, quality in sources.iteritems():
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                direct = True
            else:
                host = urlparse.urlparse(stream_url).hostname
                direct = False

            if host is None: continue
            stream_url += scraper_utils.append_headers(
                {'User-Agent': scraper_utils.get_ua()})
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'url': stream_url,
                'direct': direct
            }
            hosters.append(hoster)

        return hosters

Exemple #13

0

Afficher le fichier

Fichier : xmovies8_scraper.py Projet : Lhse44/repository.deallen

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=0)
        match = re.search('var\s*video_id\s*=\s*"([^"]+)', html)
        if not match: return hosters

        video_id = match.group(1)
        headers = {'Referer': page_url}
        headers.update(XHR)
        # _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'), headers=headers, method='POST', cache_limit=0)

        vid_url = scraper_utils.urljoin(self.base_url, VIDEO_URL)
        html = self._http_get(vid_url,
                              data={'v': video_id},
                              headers=headers,
                              cache_limit=0)
        for source, value in scraper_utils.parse_json(html,
                                                      vid_url).iteritems():
            match = re.search('url=(.*)', value)
            if not match: continue
            stream_url = urllib.unquote(match.group(1))

            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = scraper_utils.height_get_quality(source)
            stream_url += scraper_utils.append_headers(
                {'User-Agent': scraper_utils.get_ua()})
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'url': stream_url,
                'direct': True
            }
            hosters.append(hoster)
        return hosters

Exemple #14

0

Afficher le fichier

Fichier : ol_scraper.py Projet : Lhse44/repository.deallen

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=8)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'})
        if fragment: html = fragment[0].content
        links = scraper_utils.parse_sources_list(self, html)
        for link in links:
            stream_url = link
            if self.base_url in link:
                redir_url = self._http_get(link,
                                           headers={'Referer': url},
                                           allow_redirect=False,
                                           method='HEAD')
                if redir_url.startswith('http'):
                    stream_url = redir_url

            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = links[link]['quality']
                stream_url += scraper_utils.append_headers({
                    'User-Agent':
                    scraper_utils.get_ua(),
                    'Referer':
                    url
                })

            source = {
                'multi-part': False,
                'url': stream_url,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'direct': True
            }
            hosters.append(source)

        return hosters

Exemple #15

0

Afficher le fichier

Fichier : downloadtube_scraper.py Projet : Lhse44/repository.deallen

 def __get_direct(self, html, page_url):
     sources = []
     best_quality = QUALITIES.HIGH
     match = re.search('''['"]?sources["']?\s*:\s*\[(.*?)\}\s*,?\s*\]''', html, re.DOTALL)
     if match:
         files = re.findall('''['"]?file['"]?\s*:\s*(.*?)['"]([^'"]+)''', match.group(1), re.DOTALL)
         labels = re.findall('''['"]?label['"]?\s*:\s*['"]([^'"]*)''', match.group(1), re.DOTALL)
         for stream, label in map(None, files, labels):
             func, stream_url = stream
             if 'atob' in func:
                 stream_url = base64.b64decode(stream_url)
             stream_url = stream_url.replace(' ', '%20')
             host = scraper_utils.get_direct_hostname(self, stream_url)
             label = re.sub(re.compile('\s*HD', re.I), '', label)
             quality = scraper_utils.height_get_quality(label)
             if Q_ORDER[quality] > Q_ORDER[best_quality]: best_quality = quality
             stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url})
             source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True}
             sources.append(source)
     return best_quality, sources

Exemple #16

0

Afficher le fichier

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = {}
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)
            sources = self.__get_gk_links(html, url)
            if not sources:
                sources = self.__get_gk_links2(html)

            for source in sources:
                host = self._get_direct_hostname(source)
                if host == 'gvideo':
                    direct = True
                    quality = sources[source]
                    stream_url = source + scraper_utils.append_headers(
                        {'User-Agent': scraper_utils.get_ua()})
                else:
                    direct = False
                    stream_url = source
                    if self.base_url in source:
                        host = sources[source]
                        quality = scraper_utils.get_quality(
                            video, host, QUALITIES.HIGH)
                    else:
                        host = urlparse.urlparse(source).hostname
                        quality = sources[source]
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': direct
                }
                hosters.append(hoster)

        return hosters

Exemple #17

0

Afficher le fichier

Fichier : afdahorg_scraper.py Projet : Lhse44/repository.deallen

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         page_url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(page_url, cache_limit=.5)
         match = re.search('var\s*video_id="([^"]+)', html)
         if match:
             video_id = match.group(1)
             data = {'v': video_id}
             headers = {'Referer': page_url}
             headers.update(XHR)
             html = self._http_get(self.info_url,
                                   data=data,
                                   headers=headers,
                                   cache_limit=0)
             sources = scraper_utils.parse_json(html, self.info_url)
             for source in sources:
                 match = re.search('url=(.*)', sources[source])
                 if match:
                     stream_url = urllib.unquote(match.group(1))
                     host = self._get_direct_hostname(stream_url)
                     if host == 'gvideo':
                         quality = scraper_utils.gv_get_quality(stream_url)
                     else:
                         quality = scraper_utils.height_get_quality(source)
                     stream_url += scraper_utils.append_headers(
                         {'User-Agent': scraper_utils.get_ua()})
                     hoster = {
                         'multi-part': False,
                         'host': host,
                         'class': self,
                         'quality': quality,
                         'views': None,
                         'rating': None,
                         'url': stream_url,
                         'direct': True
                     }
                     hosters.append(hoster)
     return hosters

Exemple #18

0

Afficher le fichier

Fichier : rainierland_scraper.py Projet : Lhse44/repository.deallen

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            html = self._http_get(source_url, cache_limit=.5)
            fragment = dom_parser.parse_dom(html, 'div',
                                            {'class': '[^"]*screen[^"]*'})
            if fragment:
                js_src = dom_parser.parse_dom(fragment[0], 'script', ret='src')
                if js_src:
                    js_url = urlparse.urljoin(self.base_url, js_src[0])
                    html = self._http_get(js_url, cache_limit=.5)
                else:
                    html = fragment[0]

                for match in re.finditer('<source[^>]+src="([^"]+)', html):
                    stream_url = match.group(1)
                    host = self._get_direct_hostname(stream_url)
                    if host == 'gvideo':
                        quality = scraper_utils.gv_get_quality(stream_url)
                    else:
                        _, _, height, _ = scraper_utils.parse_movie_link(
                            stream_url)
                        quality = scraper_utils.height_get_quality(height)
                        stream_url += '|User-Agent=%s' % (
                            scraper_utils.get_ua())

                    hoster = {
                        'multi-part': False,
                        'host': host,
                        'class': self,
                        'quality': quality,
                        'views': None,
                        'rating': None,
                        'url': stream_url,
                        'direct': True
                    }
                    hosters.append(hoster)
        return hosters

Exemple #19

0

Afficher le fichier

Fichier : hdmovie14.py Projet : Lhse44/repository.deallen

    def get_sources(self, video):
        source_url = self.get_url(video)
        sources = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)
            for fragment in dom_parser.parse_dom(html, 'div',
                                                 {'class': 'player_wraper'}):
                iframe_url = dom_parser.parse_dom(fragment,
                                                  'iframe',
                                                  ret='src')
                if iframe_url:
                    url = urlparse.urljoin(self.base_url, iframe_url[0])
                    html = self._http_get(url, cache_limit=.5)
                    for match in re.finditer(
                            '"(?:url|src)"\s*:\s*"([^"]+)[^}]+"res"\s*:\s*([^,]+)',
                            html):
                        stream_url, height = match.groups()
                        host = self._get_direct_hostname(stream_url)
                        if host == 'gvideo':
                            quality = scraper_utils.gv_get_quality(stream_url)
                        else:
                            quality = scraper_utils.height_get_quality(height)
                        stream_url += '|User-Agent=%s' % (
                            scraper_utils.get_ua())
                        source = {
                            'multi-part': False,
                            'url': stream_url,
                            'host': host,
                            'class': self,
                            'quality': quality,
                            'views': None,
                            'rating': None,
                            'direct': True
                        }
                        sources.append(source)

        return sources

Exemple #20

0

Afficher le fichier

    def get_sources(self, video):
        hosters = []
        sources = {}
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        movie_id, watching_url, html = self.__get_source_page(
            video.video_type, page_url)

        links = []
        for match in dom_parser2.parse_dom(html,
                                           'li', {'class': 'ep-item'},
                                           req=['data-id', 'data-server']):
            label = dom_parser2.parse_dom(match.content, 'a', req='title')
            if not label: continue
            if video.video_type == VIDEO_TYPES.EPISODE and not self.__episode_match(
                    video, label[0].attrs['title']):
                continue
            links.append((match.attrs['data-server'], match.attrs['data-id']))

        for link_type, link_id in links:
            if link_type in ['12', '13', '14', '15']:
                url = scraper_utils.urljoin(
                    self.base_url, PLAYLIST_URL1.format(ep_id=link_id))
                sources.update(self.__get_link_from_json(url))
            elif kodi.get_setting('scraper_url'):
                url = scraper_utils.urljoin(
                    self.base_url, PLAYLIST_URL2.format(ep_id=link_id))
                params = self.__get_params(movie_id, link_id, watching_url)
                if params is not None:
                    url += '?' + urllib.urlencode(params)
                sources.update(
                    self.__get_links_from_json2(url, page_url,
                                                video.video_type))

        for source in sources:
            if not source.lower().startswith('http'): continue
            if sources[source]['direct']:
                host = scraper_utils.get_direct_hostname(self, source)
                if host != 'gvideo':
                    stream_url = source + scraper_utils.append_headers(
                        {
                            'User-Agent': scraper_utils.get_ua(),
                            'Referer': page_url
                        })
                else:
                    stream_url = source
            else:
                host = urlparse.urlparse(source).hostname
                stream_url = source
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': sources[source]['quality'],
                'views': None,
                'rating': None,
                'url': stream_url,
                'direct': sources[source]['direct']
            }
            hosters.append(hoster)

        return hosters

Exemple #21

0

Afficher le fichier

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        headers = {'Referer': page_url}
        html = self._http_get(page_url, headers=headers, cache_limit=.5)
        if video.video_type == VIDEO_TYPES.MOVIE:
            fragment = dom_parser2.parse_dom(html, 'div', {'class': 'poster'})
            if fragment:
                movie_url = dom_parser2.parse_dom(fragment[0].content,
                                                  'a',
                                                  req='href')
                if movie_url:
                    page_url = scraper_utils.urljoin(
                        self.base_url, movie_url[0].attrs['href'])
                    html = self._http_get(page_url, cache_limit=.5)
                    episodes = self.__get_episodes(html)
                    page_url = self.__get_best_page(episodes)
                    if not page_url:
                        return hosters
                    else:
                        page_url = scraper_utils.urljoin(
                            self.base_url, page_url)
                        html = self._http_get(page_url, cache_limit=.5)

        streams = dom_parser2.parse_dom(html, 'iframe', req='src')
        if streams:
            streams = [(attrs['src'], 480) for attrs, _content in streams]
            direct = False
        else:
            streams = [(attrs['src'], attrs.get('data-res', 480))
                       for attrs, _content in dom_parser2.parse_dom(
                           html, 'source', req=['src'])]
            direct = True

        headers = {'User-Agent': scraper_utils.get_ua(), 'Referer': page_url}
        for stream_url, height in streams:
            if 'video.php' in stream_url or 'moviexk.php' in stream_url:
                if 'title=' in stream_url:
                    title = stream_url.split('title=')[-1]
                    stream_url = stream_url.replace(title, urllib.quote(title))
                redir_url = self._http_get(stream_url,
                                           headers=headers,
                                           allow_redirect=False,
                                           method='HEAD',
                                           cache_limit=0)
                if redir_url.startswith('http'):
                    redir_url = redir_url.replace(' ', '').split(';codec')[0]
                    stream_url = redir_url
                else:
                    continue

            if direct:
                host = scraper_utils.get_direct_hostname(self, stream_url)
                if host == 'gvideo':
                    quality = scraper_utils.gv_get_quality(stream_url)
                else:
                    quality = scraper_utils.height_get_quality(height)
                stream_url += scraper_utils.append_headers(headers)
            else:
                host = urlparse.urlparse(stream_url).hostname
                quality = scraper_utils.height_get_quality(height)

            source = {
                'multi-part': False,
                'url': stream_url,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'direct': direct
            }
            hosters.append(source)

        return hosters

Exemple #22

0

Afficher le fichier

 def __get_cloud_links(self, html, page_url, sub):
     hosters = []
     html = html.replace('\\"', '"').replace('\\/', '/')
     match = re.search("dizi_kapak_getir\('([^']+)", html)
     if match:
         ep_id = match.group(1)
         for attrs, _content in dom_parser2.parse_dom(html, 'script', {'data-cfasync': 'false'}, req='src'):
             script_url = attrs['src']
             html = self._http_get(script_url, cache_limit=24)
             match1 = re.search("var\s+kapak_url\s*=\s*'([^']+)", html)
             match2 = re.search("var\s+aCtkp\s*=\s*'([^']+)", html)
             if match1 and match2:
                 link_url = '%s?fileid=%s&access_token=%s' % (match1.group(1), ep_id, match2.group(1))
                 headers = {'Referer': page_url}
                 html = self._http_get(link_url, headers=headers, cache_limit=.5)
                 js_data = scraper_utils.parse_json(html, link_url)
                 for variant in js_data.get('variants', {}):
                     stream_host = random.choice(variant.get('hosts', []))
                     if stream_host:
                         stream_url = stream_host + variant['path'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url})
                         if not stream_url.startswith('http'):
                             stream_url = 'http://' + stream_url
                         host = scraper_utils.get_direct_hostname(self, stream_url)
                         if 'width' in variant:
                             quality = scraper_utils.width_get_quality(variant['width'])
                         elif 'height' in variant:
                             quality = scraper_utils.height_get_quality(variant['height'])
                         else:
                             quality = QUALITIES.HIGH
                         hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                         hoster['subs'] = sub
                         hosters.append(hoster)
     return hosters

Exemple #23

0

Afficher le fichier

Fichier : snagfilms_scraper.py Projet : Lhse44/repository.deallen

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     page_url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(page_url, cache_limit=.5)
     fragment = dom_parser2.parse_dom(html, 'div', {'class': 'film-container'})
     if fragment:
         iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src')
         if iframe_url:
             iframe_url = scraper_utils.urljoin(self.base_url, iframe_url[0].attrs['src'])
             headers = {'Referer': page_url}
             html = self._http_get(iframe_url, headers=headers, cache_limit=.5)
             sources = scraper_utils.parse_sources_list(self, html)
             for source in sources:
                 quality = sources[source]['quality']
                 host = scraper_utils.get_direct_hostname(self, source)
                 stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': iframe_url})
                 hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                 match = re.search('(\d+[a-z]bps)', source)
                 if match:
                     hoster['extra'] = match.group(1)
                 hosters.append(hoster)
                     
     hosters.sort(key=lambda x: x.get('extra', ''), reverse=True)
     return hosters

Exemple #24

0

Afficher le fichier

Fichier : dizibox_scraper.py Projet : Lhse44/repository.deallen

 def __get_king_links(self, iframe_url):
     hosters = []
     match = re.search('v=(.*)', iframe_url)
     if match:
         data = {'ID': match.group(1)}
         headers = {'Referer': iframe_url}
         headers.update(XHR)
         xhr_url = iframe_url.split('?')[0]
         html = self._http_get(xhr_url, params={'p': 'GetVideoSources'}, data=data, headers=headers, cache_limit=.5)
         js_data = scraper_utils.parse_json(html, xhr_url)
         try:
             for source in js_data['VideoSources']:
                 stream_url = source['file'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
                 host = scraper_utils.get_direct_hostname(self, source['file'])
                 label = source.get('label', '')
                 if host == 'gvideo':
                     quality = scraper_utils.gv_get_quality(source['file'])
                 elif re.search('\d+p?', label):
                     quality = scraper_utils.height_get_quality(label)
                 else:
                     quality = QUALITY_MAP.get(label, QUALITIES.HIGH)
                 hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': 'Turkish Subtitles'}
                 hosters.append(hoster)
         except:
             pass
         
     return hosters

Exemple #25

0

Afficher le fichier

Fichier : hdmoviefree_scraper.py Projet : Lhse44/repository.deallen

    def get_sources(self, video):
        source_url = self.get_url(video)
        sources = []
        if not source_url or source_url == FORCE_NO_MATCH: return sources
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        for attrs, _content in dom_parser2.parse_dom(
                html, 'img', req=['data-id', 'data-name']):
            film_id, data_name = attrs['data-id'], attrs['data-name']
            data = {'id': film_id, 'n': data_name}
            server_url = scraper_utils.urljoin(self.base_url, SERVER_URL)
            server_url = server_url % (film_id)
            headers = {'Referer': page_url}
            headers.update(XHR)
            html = self._http_get(server_url,
                                  data=data,
                                  headers=headers,
                                  cache_limit=.5)
            for attrs, _content in dom_parser2.parse_dom(html,
                                                         'a',
                                                         req='data-id'):
                data = {'epid': attrs['data-id']}
                ep_url = scraper_utils.urljoin(self.base_url, EP_URL)
                ep_url = ep_url % (attrs['data-id'])
                headers = {'Referer': page_url}
                headers.update(XHR)
                html = self._http_get(ep_url,
                                      data=data,
                                      headers=headers,
                                      cache_limit=.5)
                js_data = scraper_utils.parse_json(html, ep_url)
                try:
                    links = [
                        r.attrs['src'] for r in dom_parser2.parse_dom(
                            js_data['link']['embed'], 'iframe', req='src')
                    ]
                except:
                    try:
                        links = js_data['link']['l']
                    except:
                        links = []
                try:
                    heights = js_data['link']['q']
                except:
                    heights = []
                for stream_url, height in map(None, links, heights):
                    match = re.search('movie_url=(.*)', stream_url)
                    if match:
                        stream_url = match.group(1)

                    host = scraper_utils.get_direct_hostname(self, stream_url)
                    if host == 'gvideo':
                        quality = scraper_utils.gv_get_quality(stream_url)
                        stream_url += scraper_utils.append_headers({
                            'User-Agent':
                            scraper_utils.get_ua(),
                            'Referer':
                            page_url
                        })
                        direct = True
                    else:
                        host = urlparse.urlparse(stream_url).hostname
                        if height:
                            quality = scraper_utils.height_get_quality(height)
                        else:
                            quality = QUALITIES.HD720
                        direct = False
                    source = {
                        'multi-part': False,
                        'url': stream_url,
                        'host': host,
                        'class': self,
                        'quality': quality,
                        'views': None,
                        'rating': None,
                        'direct': direct
                    }
                    sources.append(source)

        return sources

Exemple #26

0

Afficher le fichier

    def get_sources(self, video):
        hosters = []
        sources = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.25)
        match = re.search('var\s+view_id\s*=\s*"([^"]+)', html)
        if not match: return hosters
        view_id = match.group(1)
        
        for lang in ['or', 'tr']:
            subs = True if lang == 'tr' else False
            view_data = {'id': view_id, 'tip': 'view', 'dil': lang}
            html = self._http_get(self.ajax_url, data=view_data, headers=XHR, cache_limit=.25)
            html = html.strip()
            html = re.sub(r'\\n|\\t', '', html)
            match = re.search('var\s+sources\s*=\s*(\[.*?\])', html)
            if match:
                raw_data = match.group(1)
                raw_data = raw_data.replace('\\', '')
            else:
                raw_data = html
             
            js_data = scraper_utils.parse_json(raw_data, self.ajax_url)
            if 'data' not in js_data: continue
            
            src = dom_parser2.parse_dom(js_data['data'], 'iframe', req='src')
            if not src: continue
            
            html = self._http_get(src[0].attrs['src'], cache_limit=.25)
            for attrs, _content in dom_parser2.parse_dom(html, 'iframe', req='src'):
                src = attrs['src']
                if not src.startswith('http'): continue
                sources.append({'label': '720p', 'file': src, 'direct': False, 'subs': subs})
            
            sources += [{'file': url, 'subs': subs} for url in scraper_utils.parse_sources_list(self, html).iterkeys()]
            
            if sources: break

        for source in sources:
            direct = source.get('direct', True)
            stream_url = source['file'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
            if direct:
                host = scraper_utils.get_direct_hostname(self, stream_url)
                if host == 'gvideo':
                    quality = scraper_utils.gv_get_quality(stream_url)
                elif 'label' in source:
                    quality = scraper_utils.height_get_quality(source['label'])
                else:
                    continue
            else:
                host = urlparse.urlparse(stream_url).hostname
                quality = scraper_utils.height_get_quality(source['label'])
        
            hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
            if source.get('subs'): hoster['subs'] = 'Turkish Subtitles'
            hosters.append(hoster)
    
        return hosters

Exemple #27

0

Afficher le fichier

    def _cached_http_get(self,
                         url,
                         base_url,
                         timeout,
                         params=None,
                         data=None,
                         multipart_data=None,
                         headers=None,
                         cookies=None,
                         allow_redirect=True,
                         method=None,
                         require_debrid=False,
                         read_error=False,
                         cache_limit=8):
        if require_debrid:
            if Scraper.debrid_resolvers is None:
                Scraper.debrid_resolvers = [
                    resolver for resolver in resolveurl.relevant_resolvers()
                    if resolver.isUniversal()
                ]
            if not Scraper.debrid_resolvers:
                logger.log(
                    '%s requires debrid: %s' %
                    (self.__module__, Scraper.debrid_resolvers),
                    log_utils.LOGDEBUG)
                return ''

        if cookies is None: cookies = {}
        if timeout == 0: timeout = None
        if headers is None: headers = {}
        if url.startswith('//'): url = 'http:' + url
        referer = headers['Referer'] if 'Referer' in headers else base_url
        if params:
            if url == base_url and not url.endswith('/'):
                url += '/'

            parts = urlparse.urlparse(url)
            if parts.query:
                params.update(scraper_utils.parse_query(url))
                url = urlparse.urlunparse(
                    (parts.scheme, parts.netloc, parts.path, parts.params, '',
                     parts.fragment))

            url += '?' + urllib.urlencode(params)
        logger.log(
            'Getting Url: %s cookie=|%s| data=|%s| extra headers=|%s|' %
            (url, cookies, data, headers), log_utils.LOGDEBUG)
        if data is not None:
            if isinstance(data, basestring):
                data = data
            else:
                data = urllib.urlencode(data, True)

        if multipart_data is not None:
            headers['Content-Type'] = 'multipart/form-data; boundary=X-X-X'
            data = multipart_data

        _created, _res_header, html = self.db_connection().get_cached_url(
            url, data, cache_limit)
        if html:
            logger.log('Returning cached result for: %s' % (url),
                       log_utils.LOGDEBUG)
            return html

        try:
            self.cj = self._set_cookies(base_url, cookies)
            if isinstance(url, unicode): url = url.encode('utf-8')
            request = urllib2.Request(url, data=data)
            headers = headers.copy()
            request.add_header('User-Agent', scraper_utils.get_ua())
            request.add_header('Accept', '*/*')
            request.add_header('Accept-Encoding', 'gzip')
            request.add_unredirected_header('Host', request.get_host())
            if referer: request.add_unredirected_header('Referer', referer)
            if 'Referer' in headers: del headers['Referer']
            if 'Host' in headers: del headers['Host']
            for key, value in headers.iteritems():
                request.add_header(key, value)
            self.cj.add_cookie_header(request)
            if not allow_redirect:
                opener = urllib2.build_opener(NoRedirection)
                urllib2.install_opener(opener)
            else:
                opener = urllib2.build_opener(urllib2.HTTPRedirectHandler)
                urllib2.install_opener(opener)
                opener2 = urllib2.build_opener(
                    urllib2.HTTPCookieProcessor(self.cj))
                urllib2.install_opener(opener2)

            if method is not None: request.get_method = lambda: method.upper()
            response = urllib2.urlopen(request, timeout=timeout)
            self.cj.extract_cookies(response, request)
            if kodi.get_setting('cookie_debug') == 'true':
                logger.log(
                    'Response Cookies: %s - %s' %
                    (url, scraper_utils.cookies_as_str(self.cj)),
                    log_utils.LOGDEBUG)
            self.cj._cookies = scraper_utils.fix_bad_cookies(self.cj._cookies)
            self.cj.save(ignore_discard=True)
            if not allow_redirect and (
                    response.getcode() in [301, 302, 303, 307]
                    or response.info().getheader('Refresh')):
                if response.info().getheader('Refresh') is not None:
                    refresh = response.info().getheader('Refresh')
                    return refresh.split(';')[-1].split('url=')[-1]
                else:
                    redir_url = response.info().getheader('Location')
                    if redir_url.startswith('='):
                        redir_url = redir_url[1:]
                    return redir_url

            content_length = response.info().getheader('Content-Length', 0)
            if int(content_length) > MAX_RESPONSE:
                logger.log(
                    'Response exceeded allowed size. %s => %s / %s' %
                    (url, content_length, MAX_RESPONSE), log_utils.LOGWARNING)

            if method == 'HEAD':
                return ''
            else:
                if response.info().get('Content-Encoding') == 'gzip':
                    html = ungz(response.read(MAX_RESPONSE))
                else:
                    html = response.read(MAX_RESPONSE)
        except urllib2.HTTPError as e:
            if e.info().get('Content-Encoding') == 'gzip':
                html = ungz(e.read(MAX_RESPONSE))
            else:
                html = e.read(MAX_RESPONSE)

            if CF_CAPCHA_ENABLED and e.code == 403 and 'cf-captcha-bookmark' in html:
                html = cf_captcha.solve(url, self.cj, scraper_utils.get_ua(),
                                        self.get_name())
                if not html:
                    return ''
            elif e.code == 503 and 'cf-browser-verification' in html:
                html = cloudflare.solve(url,
                                        self.cj,
                                        scraper_utils.get_ua(),
                                        extra_headers=headers)
                if not html:
                    return ''
            else:
                logger.log(
                    'Error (%s) during scraper http get: %s' % (str(e), url),
                    log_utils.LOGWARNING)
                if not read_error:
                    return ''
        except Exception as e:
            logger.log(
                'Error (%s) during scraper http get: %s' % (str(e), url),
                log_utils.LOGWARNING)
            return ''

        self.db_connection().cache_url(url, html, data)
        return html