Python blog_get_quality 예제들, dsrd_lib.scraper_utils.blog_get_quality Python 예제들

예제 #1

0

파일 보기

파일: rlsbb_scraper.py 프로젝트: Lhse44/repository.deallen

    def __get_post_links(self, html, video):
        sources = {}
        post = dom_parser2.parse_dom(html, 'div', {'class': 'postContent'})
        if post:
            post = post[0].content
            results = re.findall(
                '<p\s+style="text-align:\s*center;">(?:\s*<strong>)*(.*?)<br(.*?)</p>',
                post, re.DOTALL)
            if not results:
                match = re.search('>Release Name\s*:(.*?)<br', post, re.I)
                release = match.group(1) if match else ''
                match = re.search('>Download\s*:(.*?)</p>', post,
                                  re.DOTALL | re.I)
                links = match.group(1) if match else ''
                results = [(release, links)]

            for result in results:
                release, links = result
                release = re.sub('</?[^>]*>', '', release)
                for attrs, hostname in dom_parser2.parse_dom(links,
                                                             'a',
                                                             req='href'):
                    stream_url = attrs['href']
                    if hostname.upper() in ['TORRENT SEARCH', 'VIP FILE']:
                        continue
                    host = urlparse.urlparse(stream_url).hostname
                    quality = scraper_utils.blog_get_quality(
                        video, release, host)
                    sources[stream_url] = quality
        return sources

예제 #2

0

파일 보기

    def __get_episode_sources(self, source_url, video):
        hosters = []
        links = self.__find_episode(source_url, video)
        if not links: return hosters
        hash_data = self.__get_hash_data([link[0] for link in links])
        for link in links:
            try:
                status = hash_data['hashes'][link[0]]['status']
            except KeyError:
                status = ''
            if status.lower() != 'finished': continue
            stream_url = 'hash_id=%s' % (link[0])
            host = scraper_utils.get_direct_hostname(self, stream_url)
            quality = scraper_utils.blog_get_quality(video, link[1], '')
            hoster = {
                'multi-part': False,
                'class': self,
                'views': None,
                'url': stream_url,
                'rating': None,
                'host': host,
                'quality': quality,
                'direct': True
            }
            hoster['extra'] = link[1]
            hosters.append(hoster)

        return hosters

예제 #3

0

파일 보기

 def get_sources(self, video):
     hosters = []
     source_url = self.get_url(video)
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(url, require_debrid=True, cache_limit=.5)
     for source, values in self.__get_post_links(html).iteritems():
         if scraper_utils.excluded_link(source): continue
         host = urlparse.urlparse(source).hostname
         release = values['release']
         quality = scraper_utils.blog_get_quality(video, release, host)
         hoster = {
             'multi-part': False,
             'host': host,
             'class': self,
             'views': None,
             'url': source,
             'rating': None,
             'quality': quality,
             'direct': False
         }
         if 'X265' in release or 'HEVC' in release:
             hoster['format'] = 'x265'
         hosters.append(hoster)
     return hosters

예제 #4

0

파일 보기

    def __get_links_from_xml(self, xml, video):
        sources = {}
        try:
            root = ET.fromstring(xml)
            for item in root.findall('.//item'):
                title = item.find('title').text
                for source in item.findall('{http://rss.jwpcdn.com/}source'):
                    stream_url = source.get('file')
                    label = source.get('label')
                    if self._get_direct_hostname(stream_url) == 'gvideo':
                        quality = scraper_utils.gv_get_quality(stream_url)
                    elif label:
                        quality = scraper_utils.height_get_quality(label)
                    else:
                        quality = scraper_utils.blog_get_quality(
                            video, title, '')
                    sources[stream_url] = {'quality': quality, 'direct': True}
                    log_utils.log(
                        'Adding stream: %s Quality: %s' %
                        (stream_url, quality), log_utils.LOGDEBUG)
        except Exception as e:
            log_utils.log('Exception during 123Movies XML Parse: %s' % (e),
                          log_utils.LOGWARNING)

        return sources

예제 #5

0

파일 보기

파일: rlsbb_scraper.py 프로젝트: Lhse44/repository.deallen

 def __get_comment_links(self, comment, video):
     sources = {}
     for attrs, _content in dom_parser2.parse_dom(comment, 'a', req='href'):
         stream_url = attrs['href']
         host = urlparse.urlparse(stream_url).hostname
         quality = scraper_utils.blog_get_quality(video, stream_url, host)
         sources[stream_url] = quality
     return sources

예제 #6

0

파일 보기

 def __get_links(self, video, views, html, q_str):
     pattern = 'li>\s*<a\s+href="(http[^"]+)'
     hosters = []
     for match in re.finditer(pattern, html, re.DOTALL):
         url = match.group(1)
         hoster = {'multi-part': False, 'class': self, 'views': views, 'url': url, 'rating': None, 'quality': None, 'direct': False}
         hoster['host'] = urlparse.urlsplit(url).hostname
         hoster['quality'] = scraper_utils.blog_get_quality(video, q_str, hoster['host'])
         hosters.append(hoster)
     return hosters

예제 #7

0

파일 보기

    def get_sources(self, video):
        sources = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return sources

        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, require_debrid=True, cache_limit=8)
        for div in dom_parser2.parse_dom(
                html, 'div', {'id': re.compile('stb-container-\d+')}):
            stream_url = dom_parser2.parse_dom(div.content,
                                               'iframe',
                                               req='src')
            if not stream_url: continue
            stream_url = stream_url[0].attrs['src']
            host = urlparse.urlparse(stream_url).hostname
            source = {
                'multi-part': False,
                'url': stream_url,
                'host': host,
                'class': self,
                'quality': QUALITIES.HIGH,
                'views': None,
                'rating': None,
                'direct': False
            }
            sources.append(source)

        fragment = dom_parser2.parse_dom(html, 'div',
                                         {'class': "stb-download-body_box"})
        if not fragment: return sources

        labels = dom_parser2.parse_dom(fragment[0].content, 'a', {'href': '#'})
        stream_urls = [
            result for result in dom_parser2.parse_dom(
                fragment[0].content, 'a', req='href')
            if result.content.lower() == 'download now'
        ]
        for label, stream_url in zip(labels, stream_urls):
            stream_url = stream_url.attrs['href']
            label = re.sub('</?[^>]*>', '', label.content)
            host = urlparse.urlparse(stream_url).hostname
            quality = scraper_utils.blog_get_quality(video, label, host)
            source = {
                'multi-part': False,
                'url': stream_url,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'direct': False
            }
            sources.append(source)

        return sources

예제 #8

0

파일 보기

    def __get_links_from_xml(self, url, video, page_url, cookies):
        sources = {}
        try:
            headers = {'Referer': page_url}
            xml = self._http_get(url,
                                 cookies=cookies,
                                 headers=headers,
                                 cache_limit=.5)
            root = ET.fromstring(xml)
            for item in root.findall('.//item'):
                title = item.find('title').text
                if title and title.upper() == 'OOPS!': continue
                for source in item.findall(
                        '{https://yesmovies.to/ajax/movie_sources/'):
                    stream_url = source.get('file')
                    label = source.get('label')
                    if scraper_utils.get_direct_hostname(
                            self, stream_url) == 'gvideo':
                        quality = scraper_utils.gv_get_quality(stream_url)
                    elif label:
                        quality = scraper_utils.height_get_quality(label)
                    elif title:
                        quality = scraper_utils.blog_get_quality(
                            video, title, '')
                    else:
                        quality = scraper_utils.blog_get_quality(
                            video, stream_url, '')
                    sources[stream_url] = {'quality': quality, 'direct': True}
                    logger.log(
                        'Adding stream: %s Quality: %s' %
                        (stream_url, quality), log_utils.LOGDEBUG)
        except Exception as e:
            logger.log('Exception during YesMovies XML Parse: %s' % (e),
                       log_utils.LOGWARNING)

        return sources

예제 #9

0

파일 보기

    def get_sources(self, goog, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)
            q_str = ''
            match = re.search('>quality(.*?)<br\s*/>', html, re.I)
            if match:
                q_str = match.group(1)
                q_str = q_str.decode('utf-8').encode('ascii', 'ignore')
                q_str = re.sub('(</?strong[^>]*>|:|\s)', '', q_str,
                               re.I | re.U)

            fragment = dom_parser.parse_dom(html, 'div',
                                            {'class': 'video-embed'})
            if fragment:
                for match in re.finditer('<iframe[^>]+src="([^"]+)',
                                         fragment[0], re.I):
                    stream_url = match.group(1)
                    host = urlparse.urlparse(stream_url).hostname
                    hoster = {
                        'multi-part':
                        False,
                        'host':
                        host,
                        'class':
                        self,
                        'quality':
                        scraper_utils.blog_get_quality(video, q_str, host),
                        'views':
                        None,
                        'rating':
                        None,
                        'url':
                        stream_url,
                        'direct':
                        False
                    }
                    match = re.search(
                        'class="views-infos">(\d+).*?class="rating">(\d+)%',
                        html, re.DOTALL)
                    if match:
                        hoster['views'] = int(match.group(1))
                        hoster['rating'] = match.group(2)

                    hosters.append(hoster)
        return hosters

예제 #10

0

파일 보기

파일: tvrelease_scraper.py 프로젝트: Lhse44/repository.deallen

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)

            q_str = ''
            quality = None
            match = re.search('>Category.*?td_col">([^<]+)', html)
            if match:
                quality = QUALITY_MAP.get(match.group(1).upper(), None)
            else:
                match = re.search('>Release.*?td_col">([^<]+)', html)
                if match:
                    q_str = match.group(1).upper()

            pattern = "td_cols.+?href='(.+?)"
            for match in re.finditer(pattern, html):
                url = match.group(1)
                if re.search('\.rar(\.|$)', url):
                    continue

                hoster = {
                    'multi-part': False,
                    'class': self,
                    'views': None,
                    'url': url,
                    'rating': None,
                    'direct': False
                }
                hoster['host'] = urlparse.urlsplit(url).hostname
                if quality is None:
                    hoster['quality'] = scraper_utils.blog_get_quality(
                        video, q_str, hoster['host'])
                else:
                    hoster['quality'] = scraper_utils.get_quality(
                        video, hoster['host'], quality)
                hosters.append(hoster)

        return hosters

예제 #11

0

파일 보기

파일: 2ddl_scraper.py 프로젝트: Lhse44/repository.deallen

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters

        html = self._http_get(source_url, require_debrid=False, cache_limit=.5)
        if video.video_type == VIDEO_TYPES.MOVIE:
            pattern = '<singlelink>(.*?)(?=<hr\s*/>|download>|thanks_button_div)'
        else:
            pattern = '<hr\s*/>\s*<strong>(.*?)</strong>.*?<singlelink>(.*?)(?=<hr\s*/>|download>|thanks_button_div)'

        for match in re.finditer(pattern, html, re.DOTALL):
            if video.video_type == VIDEO_TYPES.MOVIE:
                links = match.group(1)
                match = re.search('<h2>\s*<a[^>]+>(.*?)</a>', html)
                if match:
                    title = match.group(1)
                else:
                    title = ''
            else:
                title, links = match.groups()

            for match in re.finditer('href="([^"]+)', links):
                stream_url = match.group(1).lower()
                if any(link in stream_url for link in EXCLUDE_LINKS): continue
                host = urlparse.urlparse(stream_url).hostname
                quality = scraper_utils.blog_get_quality(video, title, host)
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'views': None,
                    'url': stream_url,
                    'rating': None,
                    'quality': quality,
                    'direct': False
                }
                hosters.append(hoster)

        return hosters

예제 #12

0

파일 보기

파일: rlssource_scraper.py 프로젝트: Lhse44/repository.deallen

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, require_debrid=False, cache_limit=.5)
        
        q_str = ''
        match = re.search('class="entry-title">([^<]+)', html)
        if match:
            q_str = match.group(1)

        pattern = 'href="?([^" ]+)(?:[^>]+>){2}\s+\|'
        for match in re.finditer(pattern, html, re.DOTALL):
            url = match.group(1)
            if 'adf.ly' in url:
                continue
            
            hoster = {'multi-part': False, 'class': self, 'views': None, 'url': url, 'rating': None, 'quality': None, 'direct': False}
            hoster['host'] = urlparse.urlsplit(url).hostname
            hoster['quality'] = scraper_utils.blog_get_quality(video, q_str, hoster['host'])
            hosters.append(hoster)

        return hosters

예제 #13

0

파일 보기

    def _blog_get_url(self, video, delim='.'):
        url = None
        result = self.db_connection().get_related_url(video.video_type,
                                                      video.title, video.year,
                                                      self.get_name(),
                                                      video.season,
                                                      video.episode)
        if result:
            url = result[0][0]
            logger.log(
                'Got local related url: |%s|%s|%s|%s|%s|' %
                (video.video_type, video.title, video.year, self.get_name(),
                 url), log_utils.LOGDEBUG)
        else:
            try:
                select = int(kodi.get_setting('%s-select' % (self.get_name())))
            except:
                select = 0
            if video.video_type == VIDEO_TYPES.EPISODE:
                temp_title = re.sub('[^A-Za-z0-9 ]', '', video.title)
                if not scraper_utils.force_title(video):
                    search_title = '%s S%02dE%02d' % (
                        temp_title, int(video.season), int(video.episode))
                    if isinstance(video.ep_airdate, datetime.date):
                        fallback_search = '%s %s' % (
                            temp_title,
                            video.ep_airdate.strftime(
                                '%Y{0}%m{0}%d'.format(delim)))
                    else:
                        fallback_search = ''
                else:
                    if not video.ep_title: return None
                    search_title = '%s %s' % (temp_title, video.ep_title)
                    fallback_search = ''
            else:
                search_title = video.title
                fallback_search = ''

            results = self.search(video.video_type, search_title, video.year)
            if not results and fallback_search:
                results = self.search(video.video_type, fallback_search,
                                      video.year)

            if results:
                # TODO: First result isn't always the most recent...
                best_result = results[0]
                if select != 0:
                    best_qorder = 0
                    for result in results:
                        if 'quality' in result:
                            quality = result['quality']
                        else:
                            match = re.search('\((\d+p)\)', result['title'])
                            if match:
                                quality = scraper_utils.height_get_quality(
                                    match.group(1))
                            else:
                                match = re.search('\[(.*)\]$', result['title'])
                                q_str = match.group(1) if match else ''
                                quality = scraper_utils.blog_get_quality(
                                    video, q_str, '')

                        logger.log(
                            'result: |%s|%s|%s|' %
                            (result, quality, Q_ORDER[quality]),
                            log_utils.LOGDEBUG)
                        if Q_ORDER[quality] > best_qorder:
                            logger.log(
                                'Setting best as: |%s|%s|%s|' %
                                (result, quality, Q_ORDER[quality]),
                                log_utils.LOGDEBUG)
                            best_result = result
                            best_qorder = Q_ORDER[quality]

                url = best_result['url']
                self.db_connection().set_related_url(video.video_type,
                                                     video.title, video.year,
                                                     self.get_name(), url,
                                                     video.season,
                                                     video.episode)
        return url