コード例 #1
0
    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)

            for match in re.finditer(
                    '<a[^>]+href="([^"]+)[^>]+>(Version \d+)<', html):
                url, version = match.groups()
                host = urlparse.urlsplit(url).hostname.replace('embed.', '')
                hoster = {
                    'multi-part':
                    False,
                    'host':
                    host,
                    'class':
                    self,
                    'quality':
                    scraper_utils.get_quality(video, host, QUALITIES.HIGH),
                    'views':
                    None,
                    'rating':
                    None,
                    'url':
                    url,
                    'direct':
                    False
                }
                hoster['version'] = version
                hosters.append(hoster)

        return hosters
コード例 #2
0
    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)

        for match in re.finditer(
                '<td>\s*<a\s+href="([^"]+)(?:[^>]+>){2}\s*(?:&nbsp;)*\s*([^<]+)',
                html):
            stream_url, host = match.groups()
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': scraper_utils.get_quality(video, host,
                                                     QUALITIES.HIGH),
                'views': None,
                'rating': None,
                'url': stream_url,
                'direct': False
            }
            hosters.append(hoster)

        return hosters
コード例 #3
0
 def get_sources(self, video):
     hosters = []
     source_url = self.get_url(video)
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(url, cache_limit=.5)
     for _attrs, td in dom_parser2.parse_dom(html, 'td',
                                             {'class': 'tdhost'}):
         match = dom_parser2.parse_dom(td, 'a', req='href')
         if match:
             stream_url = match[0].attrs['href']
             host = urlparse.urlparse(stream_url).hostname
             quality = scraper_utils.get_quality(video, host,
                                                 QUALITIES.HIGH)
             hoster = {
                 'multi-part': False,
                 'host': host,
                 'class': self,
                 'url': stream_url,
                 'quality': quality,
                 'views': None,
                 'rating': None,
                 'direct': False
             }
             hosters.append(hoster)
     return hosters
コード例 #4
0
ファイル: tvshow_scraper.py プロジェクト: uguer30/Project
    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)

        pattern = 'href="[^"]+gtfo=([^&"]+)[^>]+>([^<]+)'
        for match in re.finditer(pattern, html, re.DOTALL | re.I):
            url, link_name = match.groups()
            url = url.decode('base-64')
            host = urlparse.urlsplit(url).hostname
            match = re.search('Part\s+(\d+)', link_name)
            if match:
                if match.group(1) == '2':
                    del hosters[-1]  # remove Part 1 previous link added
                continue

            source = {
                'multi-part': False,
                'url': url,
                'host': host,
                'class': self,
                'quality': scraper_utils.get_quality(video, host,
                                                     QUALITIES.HIGH),
                'views': None,
                'rating': None,
                'direct': False
            }
            hosters.append(source)

        return hosters
コード例 #5
0
ファイル: solarmovie.py プロジェクト: uguer30/Project
    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)

            pattern = '<tr\s+id="link_(.*?)</tr>'
            for match in re.finditer(pattern, html, re.DOTALL):
                link = match.group(1)
                link_pattern = 'href="([^"]+)">\s*([^<]+).*?class="text">\s*([^<%]+).*?class="qualityCell[^>]*>\s*([^<]+)'
                link_match = re.search(link_pattern, link, re.DOTALL)
                if link_match:
                    url, host, rating, quality = link_match.groups()
                    host = host.strip()
                    quality = quality.upper().strip()
                    if rating == 'n/a': rating = None
                    url = url.replace('/show/', '/play/')
                    quality = QUALITY_MAP.get(quality, QUALITIES.MEDIUM)

                    hoster = {
                        'multi-part': False,
                        'url': url,
                        'host': host,
                        'class': self,
                        'quality':
                        scraper_utils.get_quality(video, host, quality),
                        'views': None,
                        'rating': rating,
                        'direct': False
                    }
                    hosters.append(hoster)

        return hosters
コード例 #6
0
ファイル: merb.py プロジェクト: uguer30/Project
    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)

            container_pattern = r'<table[^>]+class="movie_version[ "][^>]*>(.*?)</table>'
            item_pattern = (
                r'quality_(?!sponsored|unknown|play)([^>]*)></span>.*?'
                r'url=([^&]+)&(?:amp;)?domain=([^&]+)&(?:amp;)?(.*?)'
                r'"version_veiws"> ([\d]+) views</')
            max_index = 0
            max_views = -1
            for container in re.finditer(container_pattern, html,
                                         re.DOTALL | re.IGNORECASE):
                for i, source in enumerate(
                        re.finditer(item_pattern, container.group(1),
                                    re.DOTALL)):
                    qual, url, host, parts, views = source.groups()

                    if host == 'ZnJhbWVndGZv':
                        continue  # filter out promo hosts

                    item = {
                        'host': host.decode('base-64'),
                        'url': url.decode('base-64'),
                        'class': self,
                        'direct': False
                    }
                    item['verified'] = source.group(0).find('star.gif') > -1
                    item['quality'] = scraper_utils.get_quality(
                        video, item['host'], QUALITY_MAP.get(qual.upper()))
                    item['views'] = int(views)
                    if item['views'] > max_views:
                        max_index = i
                        max_views = item['views']

                    if max_views > 0:
                        item['rating'] = item['views'] * 100 / max_views
                    else:
                        item['rating'] = None
                    pattern = r'<a href=".*?url=(.*?)&(?:amp;)?.*?".*?>(part \d*)</a>'
                    other_parts = re.findall(pattern, parts, re.DOTALL | re.I)
                    if other_parts:
                        item['multi-part'] = True
                        item['parts'] = [
                            part[0].decode('base-64') for part in other_parts
                        ]
                    else:
                        item['multi-part'] = False
                    hosters.append(item)

            if max_views > 0:
                for i in xrange(0, max_index):
                    hosters[i][
                        'rating'] = hosters[i]['views'] * 100 / max_views

        return hosters
コード例 #7
0
    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)

        best_quality = QUALITIES.HIGH
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'entry'})
        if fragment:
            for match in re.finditer(
                    'href="[^"]*/movies-quality/[^"]*[^>]*>([^<]+)',
                    fragment[0].content, re.I):
                quality = Q_MAP.get(match.group(1).upper(), QUALITIES.HIGH)
                if Q_ORDER[quality] > Q_ORDER[best_quality]:
                    best_quality = quality

        sources = []
        for attrs, _content in dom_parser2.parse_dom(html, 'a',
                                                     req='data-vid'):
            try:
                vid_url = dom_parser2.parse_dom(scraper_utils.cleanse_title(
                    attrs['data-vid']),
                                                'iframe',
                                                req='src')
                sources.append(vid_url[0])
            except:
                pass

        fragment = dom_parser2.parse_dom(html, 'table',
                                         {'class': 'additional-links'})
        if fragment:
            sources += dom_parser2.parse_dom(fragment[0].content,
                                             'a',
                                             req='href')

        for stream_url in sources:
            stream_url = stream_url.attrs.get('href') or stream_url.attrs.get(
                'src')
            host = urlparse.urlparse(stream_url).hostname
            quality = scraper_utils.get_quality(video, host, best_quality)
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'url': stream_url,
                'direct': False
            }
            hosters.append(hoster)
        return hosters
コード例 #8
0
    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=10)
        hosts = [
            r.content for r in dom_parser2.parse_dom(
                html, 'p', {'class': 'server_servername'})
        ]
        links = [
            r.content
            for r in dom_parser2.parse_dom(html, 'p', {'class': 'server_play'})
        ]
        for host, link_frag in zip(hosts, links):
            stream_url = dom_parser2.parse_dom(link_frag, 'a', req='href')
            if not stream_url: continue

            stream_url = stream_url[0].attrs['href']
            host = re.sub('^Server\s*', '', host, re.I)
            host = re.sub('\s*Link\s+\d+', '', host)
            if host.lower() == 'google':
                sources = self.__get_gvideo_links(stream_url)
            else:
                sources = [{'host': host, 'link': stream_url}]

            for source in sources:
                host = scraper_utils.get_direct_hostname(self, stream_url)
                if host == 'gvideo':
                    quality = scraper_utils.gv_get_quality(stream_url)
                    stream_url = source['link'] + scraper_utils.append_headers(
                        {'User-Agent': scraper_utils.get_ua()})
                    direct = True
                else:
                    stream_url = scraper_utils.pathify_url(source['link'])
                    host = HOST_SUB.get(source['host'].lower(), source['host'])
                    quality = scraper_utils.get_quality(
                        video, host, QUALITIES.HIGH)
                    direct = False

                hoster = {
                    'multi-part': False,
                    'url': stream_url,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'direct': direct
                }
                hosters.append(hoster)

        return hosters
コード例 #9
0
    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)
        page_quality = QUALITIES.HD720 if video.video_type == VIDEO_TYPES.MOVIE else QUALITIES.HIGH
        for _attrs, fragment in dom_parser2.parse_dom(
                html, 'div', {'class': 'embed-responsive'}):
            iframe_url = dom_parser2.parse_dom(fragment,
                                               'iframe',
                                               req='data-src')
            if iframe_url:
                iframe_url = iframe_url[0].attrs['data-src']
                iframe_host = urlparse.urlparse(iframe_url).hostname
                if iframe_host in DIRECT_HOSTS:
                    sources = self.__parse_streams(iframe_url, url)
                else:
                    sources = {
                        iframe_url: {
                            'quality':
                            scraper_utils.get_quality(video, iframe_host,
                                                      page_quality),
                            'direct':
                            False
                        }
                    }

            for source in sources:
                quality = sources[source]['quality']
                direct = sources[source]['direct']
                if direct:
                    host = scraper_utils.get_direct_hostname(self, source)
                    stream_url = source + scraper_utils.append_headers(
                        {'User-Agent': scraper_utils.get_ua()})
                else:
                    host = urlparse.urlparse(source).hostname
                    stream_url = source

                hoster = {
                    'multi-part': False,
                    'url': stream_url,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'direct': direct
                }
                hosters.append(hoster)

        return hosters
コード例 #10
0
ファイル: uflix.py プロジェクト: uguer30/Project
    def get_sources(self, video):
        source_url = self.get_url(video)
        sources = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)

            quality = None
            for key in QUALITY_ICONS:
                if key in html:
                    quality = QUALITY_ICONS[key]
                    break

            if quality is None:
                match = re.search(
                    '(?:qaulity|quality):\s*<span[^>]*>(.*?)</span>', html,
                    re.DOTALL | re.I)
                if match:
                    quality = QUALITY_MAP.get(match.group(1).upper())

            pattern = '''href="[^"]+url=([^&]+)&domain=([^"&]+).*?fa-thumbs-o-up">\s*([^<]+).*?vote_bad_embedid_\d+'>([^<]+)'''
            for match in re.finditer(pattern, html, re.I | re.DOTALL):
                url, host, up, down = match.groups()
                up = ''.join([c for c in up if c in string.digits])
                down = ''.join([c for c in down if c in string.digits])
                url = url.decode('base-64')
                host = host.decode('base-64')

                # skip ad match
                if host.upper() == 'HDSTREAM':
                    continue

                up = int(up)
                down = int(down)
                source = {
                    'multi-part': False,
                    'url': url,
                    'host': host,
                    'class': self,
                    'quality': scraper_utils.get_quality(video, host, quality),
                    'up': up,
                    'down': down,
                    'direct': False
                }
                rating = up * 100 / (up + down) if (up > 0
                                                    or down > 0) else None
                source['rating'] = rating
                source['views'] = up + down
                sources.append(source)

        return sources
コード例 #11
0
ファイル: movytvy_scraper.py プロジェクト: uguer30/Project
    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, require_debrid=True, cache_limit=.5)
        fragment = dom_parser2.parse_dom(html, 'table',
                                         {'class': 'links-table'})
        if not fragment: return hosters
        for _attrs, row in dom_parser2.parse_dom(fragment[0].content, 'tr'):
            match = re.search(
                "playVideo\.bind\(.*?'([^']+)(?:[^>]*>){2}(.*?)</td>", row,
                re.DOTALL)
            if not match: continue

            stream_url, release = match.groups()
            if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo':
                sources = scraper_utils.parse_google(self, stream_url)
            else:
                sources = [stream_url]

            for source in sources:
                host = scraper_utils.get_direct_hostname(self, source)
                if host == 'gvideo':
                    quality = scraper_utils.gv_get_quality(source)
                    direct = True
                else:
                    host = urlparse.urlparse(source).hostname
                    if video.video_type == VIDEO_TYPES.MOVIE:
                        meta = scraper_utils.parse_movie_link(release)
                    else:
                        meta = scraper_utils.parse_episode_link(release)
                    base_quality = scraper_utils.height_get_quality(
                        meta['height'])
                    quality = scraper_utils.get_quality(
                        video, host, base_quality)
                    direct = False
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': source,
                    'direct': direct
                }
                hosters.append(hoster)

        return hosters
コード例 #12
0
    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        page_quality = dom_parser2.parse_dom(html, 'dd', {'class': 'status'})
        if page_quality:
            page_quality = QUALITY_MAP.get(page_quality[0].content,
                                           QUALITIES.HIGH)
        else:
            page_quality = QUALITIES.HIGH

        if video.video_type == VIDEO_TYPES.EPISODE:
            fragment = dom_parser2.parse_dom(html, 'div',
                                             {'id': 'servers-list'})
            gk_html = fragment[0].content if fragment else ''
        else:
            gk_html = html

        link_url = scraper_utils.urljoin(self.base_url, LINK_URL)
        player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL)
        for stream_url, quality in scraper_utils.get_gk_links(
                self, gk_html, page_url, page_quality, link_url,
                player_url).iteritems():
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                direct = True
                quality = quality
            else:
                host = urlparse.urlparse(stream_url).hostname
                quality = scraper_utils.get_quality(video, host, quality)
                direct = False

            if host is not None:
                stream_url += scraper_utils.append_headers(
                    {'User-Agent': scraper_utils.get_ua()})
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': direct
                }
                hosters.append(hoster)

        return hosters
コード例 #13
0
 def __get_post_links(self, html, video):
     sources = {}
     post = dom_parser.parse_dom(html, 'article', {'id': 'post-\d+'})
     if post:
         for fragment in dom_parser.parse_dom(post[0], 'h2'):
             for match in re.finditer('href="([^"]+)', fragment):
                 stream_url = match.group(1)
                 meta = scraper_utils.parse_episode_link(stream_url)
                 release_quality = scraper_utils.height_get_quality(
                     meta['height'])
                 host = urlparse.urlparse(stream_url).hostname
                 quality = scraper_utils.get_quality(
                     video, host, release_quality)
                 sources[stream_url] = quality
     return sources
コード例 #14
0
 def __add_sources(self, sources, video, quality=QUALITIES.HIGH):
     hosters = []
     for source in sources:
         if self._get_direct_hostname(source) == 'gvideo':
             host = self._get_direct_hostname(source)
             quality = scraper_utils.gv_get_quality(source)
             stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua())
             direct = True
         else:
             host = urlparse.urlparse(source).hostname
             quality = scraper_utils.get_quality(video, host, quality)
             stream_url = source
             direct = False
         
         hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
         hosters.append(hoster)
     return hosters
コード例 #15
0
    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.25)
        for _attrs, link in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'ldr-item'}):
            stream_url = dom_parser2.parse_dom(link,
                                               'a',
                                               req='data-actuallink')

            try:
                watched = dom_parser2.parse_dom(link, 'div',
                                                {'class': 'click-count'})
                match = re.search(' (\d+) ', watched[0].content)
                views = match.group(1)
            except:
                views = None

            try:
                score = dom_parser2.parse_dom(link, 'div', {'class': 'point'})
                score = int(score[0].content)
                rating = score * 10 if score else None
            except:
                rating = None

            if stream_url:
                stream_url = stream_url[0].attrs['data-actuallink'].strip()
                host = urlparse.urlparse(stream_url).hostname
                quality = scraper_utils.get_quality(video, host,
                                                    QUALITIES.HIGH)
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': views,
                    'rating': rating,
                    'url': stream_url,
                    'direct': False
                }
                hosters.append(hoster)

        return hosters
コード例 #16
0
    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters

        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        movie_id = dom_parser2.parse_dom(html,
                                         'input', {'id': 'movie_id'},
                                         req='value')
        if not movie_id: return hosters

        data = {
            'movie': movie_id[0].attrs['value'],
            'starttime': 'undefined',
            'pageevent': 0,
            'aspectration': ''
        }
        xhr_url = scraper_utils.urljoin(self.base_url, '/movies/play_online')
        headers = {'Referer': page_url}
        headers.update(XHR)
        html = self._http_get(xhr_url,
                              data=data,
                              headers=headers,
                              cache_limit=.5)
        best_quality, _sources = self.__get_direct(html, page_url)
        for attrs, _content in dom_parser2.parse_dom(html, 'iframe',
                                                     req='src'):
            stream_url = attrs['src']
            host = urlparse.urlparse(stream_url).hostname
            quality = scraper_utils.get_quality(video, host, best_quality)
            hoster = {
                'multi-part': False,
                'url': stream_url,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'direct': False
            }
            hosters.append(hoster)

        return hosters
コード例 #17
0
    def __get_links(self, url, video):
        hosters = []
        seen_urls = set()
        for search_type in SEARCH_TYPES:
            search_url, params = self.__translate_search(url, search_type)
            if not search_url: continue
            html = self._http_get(search_url, params=params, cache_limit=.5)
            js_result = scraper_utils.parse_json(html, search_url)
            if js_result.get('status') != 'sucsess':
                continue

            for result in js_result['post']:
                stream_url = result['hosterurls'][0]['url']
                if len(result['hosterurls']) > 1: continue
                if result['extension'] == 'rar': continue
                if stream_url in seen_urls: continue

                if scraper_utils.release_check(video, result['title']):
                    host = urlparse.urlsplit(stream_url).hostname
                    quality = scraper_utils.get_quality(
                        video, host, self._get_title_quality(result['title']))
                    hoster = {
                        'multi-part': False,
                        'class': self,
                        'views': None,
                        'url': stream_url,
                        'rating': None,
                        'host': host,
                        'quality': quality,
                        'direct': False
                    }
                    hoster['extra'] = scraper_utils.cleanse_title(
                        result['title'])
                    if video.video_type == VIDEO_TYPES.MOVIE:
                        meta = scraper_utils.parse_movie_link(hoster['extra'])
                    else:
                        meta = scraper_utils.parse_episode_link(
                            hoster['extra'])
                    if 'format' in meta: hoster['format'] = meta['format']

                    hosters.append(hoster)
                    seen_urls.add(stream_url)

        return hosters
コード例 #18
0
    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        entry = ''
        while True:
            html = self._http_get(url, cache_limit=.5)
            if not html:
                url = scraper_utils.urljoin(BASE_URL2, source_url)
                html = self._http_get(url, cache_limit=.5)

            entry = dom_parser2.parse_dom(html, 'div', {'class': 'entry'})
            if entry:
                entry = entry[0].content
                match = re.search('Watch it here\s*:.*?href="([^"]+)', entry,
                                  re.I)
                if not match: break
                url = match.group(1)
            else:
                entry = ''
                break

        for _attribs, tab in dom_parser2.parse_dom(entry, 'div',
                                                   {'class': 'postTabs_divs'}):
            match = dom_parser2.parse_dom(tab, 'iframe', req='src')
            if not match: continue
            link = match[0].attrs['src']
            host = urlparse.urlparse(link).hostname
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': scraper_utils.get_quality(video, host,
                                                     QUALITIES.HIGH),
                'views': None,
                'rating': None,
                'url': link,
                'direct': False
            }
            hosters.append(hoster)

        return hosters
コード例 #19
0
    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        headers = {'Referer': self.base_url}
        html = self._http_get(url, headers=headers, cache_limit=.5)
        for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'lang'}):
            section_label = dom_parser2.parse_dom(fragment, 'div', {'title': re.compile('Language Flag\s+[^"]*')})
            lang, subs = self.__get_section_label(section_label)
            if lang.lower() == 'english':
                for attrs, host in dom_parser2.parse_dom(fragment, 'a', {'class': 'p1'}, req='href'):
                    stream_url = attrs['href']
                    quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH)
                    hoster = {'multi-part': False, 'url': stream_url, 'class': self, 'quality': quality, 'host': host, 'rating': None, 'views': None, 'direct': False}
                    if subs: hoster['subs'] = subs
                    hosters.append(hoster)

        return hosters
コード例 #20
0
ファイル: tvrelease_scraper.py プロジェクト: uguer30/Project
    def get_sources(self, video):
        scraper = cfscrape.create_scraper()
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)

            q_str = ''
            quality = None
            match = re.search('>Category.*?td_col">([^<]+)', html)
            if match:
                quality = QUALITY_MAP.get(match.group(1).upper(), None)
            else:
                match = re.search('>Release.*?td_col">([^<]+)', html)
                if match:
                    q_str = match.group(1).upper()

            pattern = "td_cols.+?href='(.+?)"
            for match in re.finditer(pattern, html):
                url = match.group(1)
                if re.search('\.rar(\.|$)', url):
                    continue

                hoster = {
                    'multi-part': False,
                    'class': self,
                    'views': None,
                    'url': url,
                    'rating': None,
                    'direct': False
                }
                hoster['host'] = urlparse.urlsplit(url).hostname
                if quality is None:
                    hoster['quality'] = scraper_utils.blog_get_quality(
                        video, q_str, hoster['host'])
                else:
                    hoster['quality'] = scraper_utils.get_quality(
                        video, hoster['host'], quality)
                hosters.append(hoster)

        return hosters
コード例 #21
0
    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = {}
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)
            sources = self.__get_gk_links(html, url)
            if not sources:
                sources = self.__get_gk_links2(html)

            for source in sources:
                host = self._get_direct_hostname(source)
                if host == 'gvideo':
                    direct = True
                    quality = sources[source]
                    stream_url = source + scraper_utils.append_headers(
                        {'User-Agent': scraper_utils.get_ua()})
                else:
                    direct = False
                    stream_url = source
                    if self.base_url in source:
                        host = sources[source]
                        quality = scraper_utils.get_quality(
                            video, host, QUALITIES.HIGH)
                    else:
                        host = urlparse.urlparse(source).hostname
                        quality = sources[source]
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': direct
                }
                hosters.append(hoster)

        return hosters
コード例 #22
0
ファイル: moviehut_scraper.py プロジェクト: uguer30/Project
    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)
        pattern = 'href="([^"]+)">Watch (Link \d+)(.*?)</td>\s*<td[^>]*>(.*?)</td>.*?<td[^>]*id="lv_\d+"[^>]*>([^<]+)'
        for match in re.finditer(pattern, html, re.DOTALL):
            stream_url, label, part_str, q_str, views = match.groups()
            q_str = q_str.strip().upper()
            parts = re.findall('href="([^"]+)">(Part\s+\d+)<', part_str,
                               re.DOTALL)
            if parts:
                multipart = True
            else:
                multipart = False
            host = urlparse.urlparse(stream_url).hostname
            if host is None: continue

            quality = scraper_utils.get_quality(
                video, host, QUALITY_MAP.get(q_str, QUALITIES.HIGH))
            hoster = {
                'multi-part': multipart,
                'host': host,
                'class': self,
                'quality': quality,
                'views': views,
                'rating': None,
                'url': stream_url,
                'direct': False
            }
            hoster['extra'] = label
            hosters.append(hoster)
            for part in parts:
                stream_url, part_label = part
                part_hoster = hoster.copy()
                part_hoster['part_label'] = part_label
                part_hoster['url'] = stream_url
                hosters.append(part_hoster)

        return hosters
コード例 #23
0
ファイル: iwatch_scraper.py プロジェクト: uguer30/Project
    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)

        max_age = 0
        now = min_age = int(time.time())
        for _attrs, row in dom_parser2.parse_dom(html, 'tr', {'id': re.compile('pt\d+')}):
            stream_url = dom_parser2.parse_dom(row, 'a', {'class': 'spf-link'}, req='href')
            host = dom_parser2.parse_dom(row, 'img', {'alt': ''}, req='src')
            data = dom_parser2.parse_dom(row, 'td')
            if stream_url and host:
                stream_url = stream_url[0].attrs['href']
                match = re.search('.*/(.*?)\.gif', host[0].attrs['src'])
                host = match.group(1) if match else ''
                
                age = dom_parser2.parse_dom(row, 'span', {'class': 'linkdate'})
                try: age = age[0].content
                except:
                    try: age = data[2].content
                    except: age = 0
                    
                try: quality = data[3].content
                except: quality = 'HDTV'
                
                age = self.__get_age(now, age)
                if age > max_age: max_age = age
                if age < min_age: min_age = age
                
                hoster = {'multi-part': False, 'class': self, 'url': scraper_utils.pathify_url(stream_url), 'host': host, 'age': age, 'views': None, 'rating': None, 'direct': False}
                quality = QUALITY_MAP.get(quality.upper(), QUALITIES.HIGH)
                hoster['quality'] = scraper_utils.get_quality(video, host, quality)
                hosters.append(hoster)

        unit = (max_age - min_age) / 100
        if unit > 0:
            for hoster in hosters:
                hoster['rating'] = (hoster['age'] - min_age) / unit
        return hosters
コード例 #24
0
ファイル: putlocker_scraper.py プロジェクト: uguer30/Project
    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            page_url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(page_url, cache_limit=.5)
            fragment = dom_parser.parse_dom(html, 'div', {'class': 'alternativesc'})
            if fragment:
                for item in dom_parser.parse_dom(fragment[0], 'div', {'class': 'altercolumn'}):
                    link = dom_parser.parse_dom(item, 'a', {'class': 'altercolumnlink'}, ret='href')
                    host = dom_parser.parse_dom(item, 'span')
                    if link and host:
                        link = link[0]
                        if not link.startswith('http'):
                            link = source_url + link
                        host = host[0]
                        quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH)
                        hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': link, 'direct': False}
                        hosters.append(hoster)

        return hosters
コード例 #25
0
    def get_sources(self, video):
        sources = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return sources
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        if video.video_type == VIDEO_TYPES.MOVIE:
            action = 'getMovieEmb'
        else:
            action = 'getEpisodeEmb'
        match = re.search('elid\s*=\s*"([^"]+)', html)
        if self.__token is None:
            self.__get_token()
            
        if match and self.__token is not None:
            elid = urllib.quote(base64.encodestring(str(int(time.time()))).strip())
            data = {'action': action, 'idEl': match.group(1), 'token': self.__token, 'elid': elid}
            ajax_url = scraper_utils.urljoin(self.base_url, EMBED_URL)
            headers = {'Authorization': 'Bearer %s' % (self.__get_bearer()), 'Referer': page_url}
            headers.update(XHR)
            html = self._http_get(ajax_url, data=data, headers=headers, cache_limit=.5)
            html = html.replace('\\"', '"').replace('\\/', '/')
             
            pattern = '<IFRAME\s+SRC="([^"]+)'
            for match in re.finditer(pattern, html, re.DOTALL | re.I):
                url = match.group(1)
                host = scraper_utils.get_direct_hostname(self, url)
                if host == 'gvideo':
                    direct = True
                    quality = scraper_utils.gv_get_quality(url)
                else:
                    if 'vk.com' in url and url.endswith('oid='): continue  # skip bad vk.com links
                    direct = False
                    host = urlparse.urlparse(url).hostname
                    quality = scraper_utils.get_quality(video, host, QUALITIES.HD720)

                source = {'multi-part': False, 'url': url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct}
                sources.append(source)

        return sources
コード例 #26
0
ファイル: miradetodo_scraper.py プロジェクト: uguer30/Project
 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     sources = {}
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(url, cache_limit=.5)
     for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'movieplay'}):
         for attrs, _content in dom_parser2.parse_dom(fragment, 'iframe', req='src') + dom_parser2.parse_dom(fragment, 'iframe', req='data-lazy-src'):
             iframe_url = attrs.get('src', '')
             if not iframe_url.startswith('http'):
                 iframe_url = attrs.get('data-lazy-src', '')
                 if not iframe_url.startswith('http'): continue
                 
             if 'miradetodo' in iframe_url:
                 html = self._http_get(iframe_url, cache_limit=.5)
                 fragment = dom_parser2.parse_dom(html, 'nav', {'class': 'nav'})
                 if fragment:
                     stream_url = dom_parser2.parse_dom(fragment[0].content, 'a', req='href')
                     if stream_url:
                         html = self._http_get(stream_url[0].attrs['href'], cache_limit=.5)
                         
                 sources.update(self.__get_gk_links(html))
                 sources.update(self.__get_gk_links2(html))
                 sources.update(self.__get_amazon_links(html))
                 sources.update(scraper_utils.parse_sources_list(self, html))
             else:
                 host = urlparse.urlparse(iframe_url).hostname
                 source = {'quality': scraper_utils.get_quality(video, host, QUALITIES.HIGH), 'direct': False}
                 sources.update({iframe_url: source})
                 
     for source in sources:
         stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua())
         direct = sources[source]['direct']
         quality = sources[source]['quality']
         host = scraper_utils.get_direct_hostname(self, source) if direct else urlparse.urlparse(source).hostname
         hoster = {'multi-part': False, 'url': stream_url, 'class': self, 'quality': quality, 'host': host, 'rating': None, 'views': None, 'direct': direct}
         hosters.append(hoster)
         
     return hosters
コード例 #27
0
ファイル: myddl_scraper.py プロジェクト: uguer30/Project
    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, require_debrid=True, cache_limit=.5)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'post-cont'})
        if not fragment: return hosters

        match = re.search('<p>\s*<strong>(.*?)<script', fragment[0].content,
                          re.DOTALL)
        if not match: return hosters

        for attrs, _content in dom_parser2.parse_dom(match.group(1),
                                                     'a',
                                                     req='href'):
            stream_url = attrs['href']
            if scraper_utils.excluded_link(stream_url): continue
            if video.video_type == VIDEO_TYPES.MOVIE:
                meta = scraper_utils.parse_movie_link(stream_url)
            else:
                meta = scraper_utils.parse_episode_link(stream_url)

            host = urlparse.urlparse(stream_url).hostname
            quality = scraper_utils.get_quality(
                video, host, scraper_utils.height_get_quality(meta['height']))
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'views': None,
                'url': stream_url,
                'rating': None,
                'quality': quality,
                'direct': False
            }
            hosters.append(hoster)

        return hosters
コード例 #28
0
    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        for _attrs, row in dom_parser2.parse_dom(html, 'tr', {'id': 'linktr'}):
            redirect = dom_parser2.parse_dom(row, 'span', req='id')
            link = dom_parser2.parse_dom(row, 'a', req='href')
            if link and link[0].attrs['href'].startswith('http'):
                stream_url = link[0].attrs['href']
            elif redirect:
                stream_url = redirect[0].attrs['id']
            else:
                stream_url = ''

            if stream_url.startswith('http'):
                host = urlparse.urlparse(stream_url).hostname
            else:
                host = dom_parser2.parse_dom(row, 'h9')
                host = host[0].content if host else ''

            if stream_url and host:
                quality = scraper_utils.get_quality(video, host,
                                                    QUALITIES.HIGH)
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': False
                }
                hosters.append(hoster)

        return hosters
コード例 #29
0
ファイル: movietube_scraper.py プロジェクト: uguer30/Project
    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)

        for _attrs, item in dom_parser2.parse_dom(html, 'li'):
            label = dom_parser2.parse_dom(item, 'span', {'class': 'type'})
            value = dom_parser2.parse_dom(item, 'p', {'class': 'text'})
            if label and value and 'quality' in label[0].content.lower():
                q_str = value[0].content.upper()
                break
        else:
            q_str = ''

        fragment = dom_parser2.parse_dom(html, 'div', {'id': 'fstory-video'})
        if not fragment: return hosters

        for match in re.finditer('<iframe[^>]*src="([^"]+)',
                                 fragment[0].content, re.I):
            stream_url = match.group(1)
            host = urlparse.urlparse(stream_url).hostname
            quality = scraper_utils.get_quality(
                video, host, QUALITY_MAP.get(q_str, QUALITIES.HIGH))
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'url': stream_url,
                'direct': False
            }
            hosters.append(hoster)

        return hosters
コード例 #30
0
 def __get_post_links(self, html, video):
     sources = {}
     post = dom_parser2.parse_dom(html, 'div', {'class': 'postContent'})
     if post:
         post = post[0].content
         for fragment in re.finditer('(<strong>.*?)(?=<strong>|$)', post,
                                     re.DOTALL):
             fragment = fragment.group(1)
             release = dom_parser2.parse_dom(fragment, 'strong')
             if release:
                 release = release[0].content
                 meta = scraper_utils.parse_episode_link(release)
                 release_quality = scraper_utils.height_get_quality(
                     meta['height'])
                 for attrs, _content in dom_parser2.parse_dom(fragment,
                                                              'a',
                                                              req='href'):
                     link = attrs['href']
                     host = urlparse.urlparse(link).hostname
                     quality = scraper_utils.get_quality(
                         video, host, release_quality)
                     sources[link] = quality
     return sources