def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, require_debrid=True, cache_limit=.5)
        title = dom_parser2.parse_dom(html, 'title')
        if title:
            title = title[0].content
            title = re.sub('^\[ST\]\s*–\s*', '', title)
            meta = scraper_utils.parse_episode_link(title)
            page_quality = scraper_utils.height_get_quality(meta['height'])
        else:
            page_quality = QUALITIES.HIGH

        fragment = dom_parser2.parse_dom(html, 'section',
                                         {'class': 'entry-content'})
        if fragment:
            for _attrs, section in dom_parser2.parse_dom(
                    fragment[0].content, 'p'):
                match = re.search('([^<]*)', section)
                meta = scraper_utils.parse_episode_link(match.group(1))
                if meta['episode'] != '-1' or meta['airdate']:
                    section_quality = scraper_utils.height_get_quality(
                        meta['height'])
                else:
                    section_quality = page_quality

                if Q_ORDER[section_quality] < Q_ORDER[page_quality]:
                    quality = section_quality
                else:
                    quality = page_quality

                for attrs, _content in dom_parser2.parse_dom(section,
                                                             'a',
                                                             req='href'):
                    stream_url = attrs['href']
                    host = urlparse.urlparse(stream_url).hostname
                    hoster = {
                        'multi-part': False,
                        'host': host,
                        'class': self,
                        'views': None,
                        'url': stream_url,
                        'rating': None,
                        'quality': quality,
                        'direct': False
                    }
                    hosters.append(hoster)

        return hosters
Example #2
0
 def __get_links_from_json2(self, url, page_url, video_type):
     sources = {}
     headers = {'Referer': page_url}
     headers.update(XHR)
     html = self._http_get(url, headers=headers, cache_limit=0)
     js_data = scraper_utils.parse_json(html, url)
     try:
         playlist = js_data.get('playlist', [])
         for source in playlist[0].get('sources', []):
             stream_url = source['file']
             if scraper_utils.get_direct_hostname(self,
                                                  stream_url) == 'gvideo':
                 quality = scraper_utils.gv_get_quality(stream_url)
             elif 'label' in source:
                 quality = scraper_utils.height_get_quality(source['label'])
             else:
                 if video_type == VIDEO_TYPES.MOVIE:
                     meta = scraper_utils.parse_movie_link(stream_url)
                 else:
                     meta = scraper_utils.parse_episode_link(stream_url)
                 quality = scraper_utils.height_get_quality(meta['height'])
             sources[stream_url] = {'quality': quality, 'direct': True}
             logger.log(
                 'Adding stream: %s Quality: %s' % (stream_url, quality),
                 log_utils.LOGDEBUG)
     except Exception as e:
         logger.log('Exception during yesmovies extract: %s' % (e),
                    log_utils.LOGDEBUG)
     return sources
 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(url, require_debrid=True, cache_limit=.5)
     post = dom_parser2.parse_dom(html, 'div', {'class': 'entry-content'})
     if not post: return hosters
     for match in re.finditer('(?:href="|>)(http(?:s|)://.+?)',
                              post[0].content):
         stream_url = match.group(1)
         if scraper_utils.excluded_link(
                 stream_url) or 'imdb.com' in stream_url:
             continue
         host = urlparse.urlparse(stream_url).hostname
         if video.video_type == VIDEO_TYPES.MOVIE:
             meta = scraper_utils.parse_movie_link(stream_url)
         else:
             meta = scraper_utils.parse_episode_link(stream_url)
         quality = scraper_utils.height_get_quality(meta['height'])
         hoster = {
             'multi-part': False,
             'host': host,
             'class': self,
             'views': None,
             'url': stream_url,
             'rating': None,
             'quality': quality,
             'direct': False
         }
         hosters.append(hoster)
     return hosters
Example #4
0
    def __get_links(self, url, video):
        hosters = []
        seen_urls = set()
        for search_type in SEARCH_TYPES:
            search_url, params = self.__translate_search(url, search_type)
            if not search_url: continue
            html = self._http_get(search_url, params=params, cache_limit=.5)
            js_result = scraper_utils.parse_json(html, search_url)
            if js_result.get('status') != 'success':
                logger.log('Pron API Error: |%s|%s|: %s' % (search_url, params, js_result.get('message', 'Unknown Error')), log_utils.LOGWARNING)
                continue
            
            for result in js_result['result']:
                stream_url = result['hosterurls'][0]['url']
                if len(result['hosterurls']) > 1: continue
                if result['extension'] == 'rar': continue
                if stream_url in seen_urls: continue

                if scraper_utils.release_check(video, result['title']):
                    host = urlparse.urlsplit(stream_url).hostname
                    quality = scraper_utils.get_quality(video, host, self._get_title_quality(result['title']))
                    hoster = {'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': False}
                    hoster['extra'] = scraper_utils.cleanse_title(result['title'])
                    if video.video_type == VIDEO_TYPES.MOVIE:
                        meta = scraper_utils.parse_movie_link(hoster['extra'])
                    else:
                        meta = scraper_utils.parse_episode_link(hoster['extra'])
                    if 'format' in meta: hoster['format'] = meta['format']
                    
                    hosters.append(hoster)
                    seen_urls.add(stream_url)

        return hosters
Example #5
0
    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        headers = {'User-Agent': LOCAL_UA}
        html = self._http_get(url,
                              require_debrid=True,
                              headers=headers,
                              cache_limit=.5)
        for match in re.finditer(
                "<span\s+class='info2'(.*?)(<span\s+class='info|<hr\s*/>)",
                html, re.DOTALL):
            for match2 in re.finditer('href="([^"]+)', match.group(1)):
                stream_url = match2.group(1)
                meta = scraper_utils.parse_episode_link(stream_url)
                quality = scraper_utils.height_get_quality(meta['height'])
                host = urlparse.urlparse(stream_url).hostname
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'views': None,
                    'url': stream_url,
                    'rating': None,
                    'quality': quality,
                    'direct': False
                }
                hosters.append(hoster)

        return hosters
 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(url, require_debrid=True, cache_limit=.5)
     sources = self.__get_post_links(html)
     for source, value in sources.iteritems():
         if scraper_utils.excluded_link(source): continue
         host = urlparse.urlparse(source).hostname
         if video.video_type == VIDEO_TYPES.MOVIE:
             meta = scraper_utils.parse_movie_link(value['release'])
         else:
             meta = scraper_utils.parse_episode_link(value['release'])
         quality = scraper_utils.height_get_quality(meta['height'])
         hoster = {
             'multi-part': False,
             'host': host,
             'class': self,
             'views': None,
             'url': source,
             'rating': None,
             'quality': quality,
             'direct': False
         }
         if 'format' in meta: hoster['format'] = meta['format']
         hosters.append(hoster)
     return hosters
Example #7
0
    def __get_release(self, html, video):
        try:
            select = int(kodi.get_setting('%s-select' % (self.get_name())))
        except:
            select = 0
        ul_id = 'releases' if video.video_type == VIDEO_TYPES.MOVIE else 'episodes'
        fragment = dom_parser2.parse_dom(html, 'ul', {'id': ul_id})
        if fragment:
            best_qorder = 0
            best_page = None
            for _attrs, item in dom_parser2.parse_dom(fragment[0].content,
                                                      'li'):
                match = dom_parser2.parse_dom(item,
                                              'span',
                                              req=['href', 'title'])
                if not match:
                    match = dom_parser2.parse_dom(item,
                                                  'a',
                                                  req=['href', 'title'])
                    if not match: continue

                page_url, release = match[0].attrs['href'], match[0].attrs[
                    'title']
                match = dom_parser2.parse_dom(item, 'span', {'class': 'time'})
                if match and self.__too_old(match[0].content): break

                release = re.sub('^\[[^\]]*\]\s*', '', release)
                if video.video_type == VIDEO_TYPES.MOVIE:
                    meta = scraper_utils.parse_movie_link(release)
                else:
                    if not scraper_utils.release_check(
                            video, release, require_title=False):
                        continue
                    meta = scraper_utils.parse_episode_link(release)

                if select == 0:
                    best_page = page_url
                    break
                else:
                    quality = scraper_utils.height_get_quality(meta['height'])
                    logger.log(
                        'result: |%s|%s|%s|' %
                        (page_url, quality, Q_ORDER[quality]),
                        log_utils.LOGDEBUG)
                    if Q_ORDER[quality] > best_qorder:
                        logger.log(
                            'Setting best as: |%s|%s|%s|' %
                            (page_url, quality, Q_ORDER[quality]),
                            log_utils.LOGDEBUG)
                        best_page = page_url
                        best_qorder = Q_ORDER[quality]

            return best_page
 def __get_quality(self, item, video):
     if item.get('width'):
         return scraper_utils.width_get_quality(item['width'])
     elif item.get('height'):
         return scraper_utils.height_get_quality(item['height'])
     elif 'name' in item:
         if video.video_type == VIDEO_TYPES.MOVIE:
             meta = scraper_utils.parse_movie_link(item['name'])
         else:
             meta = scraper_utils.parse_episode_link(item['name'])
         return scraper_utils.height_get_quality(meta['height'])
     else:
         return QUALITIES.HIGH
 def __get_post_links(self, html, video):
     sources = {}
     post = dom_parser.parse_dom(html, 'article', {'id': 'post-\d+'})
     if post:
         for fragment in dom_parser.parse_dom(post[0], 'h2'):
             for match in re.finditer('href="([^"]+)', fragment):
                 stream_url = match.group(1)
                 meta = scraper_utils.parse_episode_link(stream_url)
                 release_quality = scraper_utils.height_get_quality(meta['height'])
                 host = urlparse.urlparse(stream_url).hostname
                 quality = scraper_utils.get_quality(video, host, release_quality)
                 sources[stream_url] = quality
     return sources
Example #10
0
 def __get_sources(self, video, html):
     sources = {}
     for match in re.finditer(
             '<center>\s*<b>\s*(.*?)\s*</b>.*?<tr>(.*?)</tr>', html,
             re.DOTALL):
         release, links = match.groups()
         release = re.sub('</?[^>]*>', '', release)
         if scraper_utils.release_check(video, release):
             meta = scraper_utils.parse_episode_link(release)
             for match in re.finditer('href="([^"]+)', links):
                 sources[match.group(1)] = scraper_utils.height_get_quality(
                     meta['height'])
     return sources
Example #11
0
 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(url, require_debrid=True, cache_limit=.5)
         post = dom_parser.parse_dom(html, 'div', {'class': 'entry-content'})
         if post:
             for p in dom_parser.parse_dom(post[0], 'p'):
                 for match in re.finditer('href="([^"]+)[^>]+>([^<]+)', p):
                     stream_url, q_str = match.groups()
                     if re.search('\.part\.?\d+', q_str, re.I) or '.rar' in q_str or 'sample' in q_str or q_str.endswith('.nfo'): continue
                     host = urlparse.urlparse(stream_url).hostname
                     if video.video_type == VIDEO_TYPES.MOVIE:
                         meta = scraper_utils.parse_movie_link(q_str)
                     else:
                         meta = scraper_utils.parse_episode_link(q_str)
                     quality = scraper_utils.height_get_quality(meta['height'])
                     hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False}
                     if 'format' in meta: hoster['format'] = meta['format']
                     hosters.append(hoster)
     return hosters
Example #12
0
    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, require_debrid=True, cache_limit=.5)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'post-cont'})
        if not fragment: return hosters

        match = re.search('<p>\s*<strong>(.*?)<script', fragment[0].content,
                          re.DOTALL)
        if not match: return hosters

        for attrs, _content in dom_parser2.parse_dom(match.group(1),
                                                     'a',
                                                     req='href'):
            stream_url = attrs['href']
            if scraper_utils.excluded_link(stream_url): continue
            if video.video_type == VIDEO_TYPES.MOVIE:
                meta = scraper_utils.parse_movie_link(stream_url)
            else:
                meta = scraper_utils.parse_episode_link(stream_url)

            host = urlparse.urlparse(stream_url).hostname
            quality = scraper_utils.get_quality(
                video, host, scraper_utils.height_get_quality(meta['height']))
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'views': None,
                'url': stream_url,
                'rating': None,
                'quality': quality,
                'direct': False
            }
            hosters.append(hoster)

        return hosters
 def __get_mirror_links(self, html, video):
     sources = {}
     for attrs, _content in dom_parser2.parse_dom(html, 'img', req='src'):
         image = attrs['src']
         if image.endswith('/mirrors.png'):
             match = re.search('%s.*?<p>(.*?)</p>' % (image), html,
                               re.DOTALL)
             if match:
                 for attrs, _content in dom_parser2.parse_dom(
                         match.group(1), 'a', req='href'):
                     stream_url = attrs['href']
                     host = urlparse.urlparse(stream_url).hostname
                     meta = scraper_utils.parse_episode_link(stream_url)
                     base_quality = scraper_utils.height_get_quality(
                         meta['height'])
                     sources[stream_url] = {
                         'quality':
                         scraper_utils.get_quality(video, host,
                                                   base_quality),
                         'direct':
                         False
                     }
     return sources
Example #14
0
    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)

            for match in re.finditer('<a[^>]+href="([^"]+)[^>]+>(.*?)</a>',
                                     html):
                stream_url, title = match.groups()
                title = re.sub('<span[^>]*>|</span>', '', title)
                title = title.strip()
                if title[-2:].upper() in ('MB', 'GB'):
                    _title, season, episode, height, extra = scraper_utils.parse_episode_link(
                        title)
                    if int(season) == int(
                            video.season) and int(episode) == int(
                                video.episode):
                        host = urlparse.urlparse(stream_url).hostname
                        hoster = {
                            'multi-part': False,
                            'host': host,
                            'class': self,
                            'quality':
                            scraper_utils.height_get_quality(height),
                            'views': None,
                            'rating': None,
                            'url': stream_url,
                            'direct': False
                        }
                        for vid_format in FORMATS:
                            if vid_format in extra.lower():
                                hoster['format'] = vid_format
                                break
                        hosters.append(hoster)

        return hosters
Example #15
0
    def __get_links(self, url, video):
        hosters = []
        search_url = scraper_utils.urljoin(self.base_url, SEARCH_URL)
        query = self.__translate_search(url)
        result = self._http_get(search_url,
                                data=query,
                                allow_redirect=False,
                                cache_limit=.5)
        for item in result.get('files', []):
            checks = [False] * 6
            if item.get('type', '').upper() != 'VIDEO': checks[0] = True
            if item.get('is_ready') != '1': checks[1] = True
            if item.get('av_result') in ['warning', 'infected']:
                checks[2] = True
            if 'video_info' not in item: checks[3] = True
            if item.get('video_info') and not re.search(
                    '#0:(0|1)(\((eng|und)\))?:\s*Audio:', item['video_info'],
                    re.I):
                checks[4] = True
            if not scraper_utils.release_check(video, item['name']):
                checks[5] = True
            if any(checks):
                logger.log(
                    'Furk.net result excluded: %s - |%s|' %
                    (checks, item['name']), log_utils.LOGDEBUG)
                continue

            match = re.search('(\d{3,})\s*x\s*(\d{3,})', item['video_info'])
            if match:
                width, _height = match.groups()
                quality = scraper_utils.width_get_quality(width)
            else:
                if video.video_type == VIDEO_TYPES.MOVIE:
                    meta = scraper_utils.parse_movie_link(item['name'])
                else:
                    meta = scraper_utils.parse_episode_link(item['name'])
                quality = scraper_utils.height_get_quality(meta['height'])

            if 'url_pls' in item:
                size_gb = scraper_utils.format_size(int(item['size']), 'B')
                if self.max_bytes and int(item['size']) > self.max_bytes:
                    logger.log(
                        'Result skipped, Too big: |%s| - %s (%s) > %s (%sGB)' %
                        (item['name'], item['size'], size_gb, self.max_bytes,
                         self.max_gb))
                    continue

                stream_url = item['url_pls']
                host = scraper_utils.get_direct_hostname(self, stream_url)
                hoster = {
                    'multi-part': False,
                    'class': self,
                    'views': None,
                    'url': stream_url,
                    'rating': None,
                    'host': host,
                    'quality': quality,
                    'direct': True
                }
                hoster['size'] = size_gb
                hoster['extra'] = item['name']
                hosters.append(hoster)
            else:
                logger.log(
                    'Furk.net result skipped - no playlist: |%s|' %
                    (json.dumps(item)), log_utils.LOGDEBUG)

        return hosters
Example #16
0
    def __get_links(self, url, video):
        hosters = []
        search_url, params = self.__translate_search(url)
        html = self._http_get(search_url, params=params, cache_limit=.5)
        js_result = scraper_utils.parse_json(html, search_url)
        down_url = js_result.get('downURL')
        dl_farm = js_result.get('dlFarm')
        dl_port = js_result.get('dlPort')
        for item in js_result.get('data', []):
            post_hash, size, post_title, ext, duration = item['0'], item[
                '4'], item['10'], item['11'], item['14']
            checks = [False] * 6
            if not scraper_utils.release_check(video, post_title):
                checks[0] = True
            if 'alangs' in item and item['alangs'] and 'eng' not in item[
                    'alangs']:
                checks[1] = True
            if re.match('^\d+s', duration) or re.match('^[0-5]m', duration):
                checks[2] = True
            if 'passwd' in item and item['passwd']: checks[3] = True
            if 'virus' in item and item['virus']: checks[4] = True
            if 'type' in item and item['type'].upper() != 'VIDEO':
                checks[5] = True
            if any(checks):
                logger.log(
                    'EasyNews Post excluded: %s - |%s|' % (checks, item),
                    log_utils.LOGDEBUG)
                continue

            stream_url = down_url + urllib.quote(
                '/%s/%s/%s%s/%s%s' %
                (dl_farm, dl_port, post_hash, ext, post_title, ext))
            stream_url = stream_url + '|Authorization=%s' % (urllib.quote(
                self.auth))
            host = scraper_utils.get_direct_hostname(self, stream_url)
            quality = None
            if 'width' in item:
                try:
                    width = int(item['width'])
                except:
                    width = 0
                if width:
                    quality = scraper_utils.width_get_quality(width)

            if quality is None:
                if video.video_type == VIDEO_TYPES.MOVIE:
                    meta = scraper_utils.parse_movie_link(post_title)
                else:
                    meta = scraper_utils.parse_episode_link(post_title)
                quality = scraper_utils.height_get_quality(meta['height'])

            if self.max_bytes:
                match = re.search('([\d.]+)\s+(.*)', size)
                if match:
                    size_bytes = scraper_utils.to_bytes(*match.groups())
                    if size_bytes > self.max_bytes:
                        logger.log(
                            'Result skipped, Too big: |%s| - %s (%s) > %s (%s GB)'
                            % (post_title, size_bytes, size, self.max_bytes,
                               self.max_gb))
                        continue

            hoster = {
                'multi-part': False,
                'class': self,
                'views': None,
                'url': stream_url,
                'rating': None,
                'host': host,
                'quality': quality,
                'direct': True
            }
            if any(i for i in ['X265', 'HEVC'] if i in post_title.upper()):
                hoster['format'] = 'x265'
            if size: hoster['size'] = size
            if post_title: hoster['extra'] = post_title
            hosters.append(hoster)
        return hosters
Example #17
0
    def _blog_proc_results(self, html, post_pattern, date_format, video_type,
                           title, year):
        results = []
        search_date = ''
        search_sxe = ''
        if video_type == VIDEO_TYPES.EPISODE:
            match = re.search('(.*?)\s*(S\d+E\d+)\s*', title)
            if match:
                show_title, search_sxe = match.groups()
            else:
                match = re.search(
                    '(.*?)\s*(\d{4})[._ -]?(\d{2})[._ -]?(\d{2})\s*', title)
                if match:
                    show_title, search_year, search_month, search_day = match.groups(
                    )
                    search_date = '%s-%s-%s' % (search_year, search_month,
                                                search_day)
                    search_date = scraper_utils.to_datetime(
                        search_date, "%Y-%m-%d").date()
                else:
                    show_title = title
        else:
            show_title = title

        today = datetime.date.today()
        for match in re.finditer(post_pattern, html, re.DOTALL):
            post_data = match.groupdict()
            post_title = post_data['post_title']
            post_title = re.sub('<[^>]*>', '', post_title)
            if 'quality' in post_data:
                post_title += '- [%s]' % (post_data['quality'])

            try:
                filter_days = int(
                    kodi.get_setting('%s-filter' % (self.get_name())))
            except ValueError:
                filter_days = 0
            if filter_days and date_format and 'date' in post_data:
                post_data['date'] = post_data['date'].strip()
                filter_days = datetime.timedelta(days=filter_days)
                post_date = scraper_utils.to_datetime(post_data['date'],
                                                      date_format).date()
                if not post_date:
                    logger.log(
                        'Failed date Check in %s: |%s|%s|%s|' %
                        (self.get_name(), post_data['date'], date_format),
                        log_utils.LOGWARNING)
                    post_date = today

                if today - post_date > filter_days:
                    continue

            match_year = ''
            match_date = ''
            match_sxe = ''
            match_title = full_title = post_title
            if video_type == VIDEO_TYPES.MOVIE:
                meta = scraper_utils.parse_movie_link(post_title)
                match_year = meta['year']
            else:
                meta = scraper_utils.parse_episode_link(post_title)
                match_sxe = 'S%02dE%02d' % (int(
                    meta['season']), int(meta['episode']))
                match_date = meta['airdate']

            match_title = meta['title']
            full_title = '%s (%sp) [%s]' % (meta['title'], meta['height'],
                                            meta['extra'])
            norm_title = scraper_utils.normalize_title(show_title)
            match_norm_title = scraper_utils.normalize_title(match_title)
            title_match = norm_title and (match_norm_title in norm_title
                                          or norm_title in match_norm_title)
            year_match = not year or not match_year or year == match_year
            sxe_match = not search_sxe or (search_sxe == match_sxe)
            date_match = not search_date or (search_date == match_date)
            logger.log(
                'Blog Results: |%s|%s|%s| - |%s|%s|%s| - |%s|%s|%s| - |%s|%s|%s| (%s)'
                % (match_norm_title, norm_title, title_match, year, match_year,
                   year_match, search_date, match_date, date_match, search_sxe,
                   match_sxe, sxe_match, self.get_name()), log_utils.LOGDEBUG)
            if title_match and year_match and date_match and sxe_match:
                quality = scraper_utils.height_get_quality(meta['height'])
                result = {
                    'url': scraper_utils.pathify_url(post_data['url']),
                    'title': scraper_utils.cleanse_title(full_title),
                    'year': match_year,
                    'quality': quality
                }
                results.append(result)
        return results