コード例 #1
0
    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=8)
        for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'movieplay'}):
            iframe_src = dom_parser2.parse_dom(fragment, 'iframe', req='src')
            if iframe_src:
                iframe_src = iframe_src[0].attrs['src']
                if re.search('o(pen)?load', iframe_src, re.I):
                    meta = scraper_utils.parse_movie_link(iframe_src)
                    quality = scraper_utils.height_get_quality(meta['height'])
                    links = {iframe_src: {'quality': quality, 'direct': False}}
                else:
                    links = self.__get_links(iframe_src, url)

                for link in links:
                    direct = links[link]['direct']
                    quality = links[link]['quality']
                    if direct:
                        host = scraper_utils.get_direct_hostname(self, link)
                        if host == 'gvideo':
                            quality = scraper_utils.gv_get_quality(link)
                        stream_url = link + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': url})
                    else:
                        host = urlparse.urlparse(link).hostname
                        stream_url = link
                        
                    source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct}
                    hosters.append(source)

        return hosters
コード例 #2
0
 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     if video_type == VIDEO_TYPES.TVSHOW and title:
         test_url = '/tv-show/%s/' % (scraper_utils.to_slug(title))
         test_url = scraper_utils.urljoin(self.base_url, test_url)
         html = self._http_get(test_url, require_debrid=True, cache_limit=24)
         posts = dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})
         if posts:
             result = {'url': scraper_utils.pathify_url(test_url), 'title': scraper_utils.cleanse_title(title), 'year': ''}
             results.append(result)
     elif video_type == VIDEO_TYPES.MOVIE:
         search_title = re.sub('[^A-Za-z0-9 ]', '', title.lower())
         html = self._http_get(self.base_url, params={'s': search_title}, require_debrid=True, cache_limit=1)
         norm_title = scraper_utils.normalize_title(title)
         for _attrs, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}):
             match = re.search('<h\d+[^>]*>\s*<a\s+href="([^"]+)[^>]*>(.*?)</a>', post)
             if match:
                 post_url, post_title = match.groups()
                 if '/tv-show/' in post or self.__too_old(post): continue
                 post_title = re.sub('<[^>]*>', '', post_title)
                 meta = scraper_utils.parse_movie_link(post_title)
                 full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height'])
                 match_year = meta['year']
                 
                 match_norm_title = scraper_utils.normalize_title(meta['title'])
                 if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year):
                     result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year}
                     results.append(result)
         
     return results
コード例 #3
0
ファイル: 2ddl_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon
 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     search_url = '/search/' + urllib.quote_plus(title)
     html = self._http_get(search_url, require_debrid=True, cache_limit=1)
     if video_type == VIDEO_TYPES.TVSHOW:
         seen_urls = {}
         for _attr, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}):
             if CATEGORIES[video_type] not in post: continue
             match = re.search('<span>\s*TAGS:\s*</span>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', post, re.I)
             if match:
                 show_url, match_title = match.groups()
                 if show_url in seen_urls: continue
                 result = {'url': scraper_utils.pathify_url(show_url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''}
                 seen_urls[show_url] = result
                 results.append(result)
     elif video_type == VIDEO_TYPES.MOVIE:
         norm_title = scraper_utils.normalize_title(title)
         headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html)
         posts = [result.content for result in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})]
         for heading, post in zip(headings, posts):
             if CATEGORIES[video_type] not in post or self.__too_old(post): continue
             post_url, post_title = heading
             meta = scraper_utils.parse_movie_link(post_title)
             full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height'])
             match_year = meta['year']
             
             match_norm_title = scraper_utils.normalize_title(meta['title'])
             if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year):
                 result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year}
                 results.append(result)
         
     return results
コード例 #4
0
 def get_sources(self, video):
     hosters = []
     source_url = self.get_url(video)
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(url, require_debrid=True, cache_limit=.5)
     fragment = dom_parser2.parse_dom(html, 'div', {'class': 'post-cont'})
     if not fragment: return hosters
     
     match = re.search('<p>\s*<strong>(.*?)<script', fragment[0].content, re.DOTALL)
     if not match: return hosters
     
     for attrs, _content in dom_parser2.parse_dom(match.group(1), 'a', req='href'):
         stream_url = attrs['href']
         if scraper_utils.excluded_link(stream_url): continue
         if video.video_type == VIDEO_TYPES.MOVIE:
             meta = scraper_utils.parse_movie_link(stream_url)
         else:
             meta = scraper_utils.parse_episode_link(stream_url)
         
         host = urlparse.urlparse(stream_url).hostname
         quality = scraper_utils.get_quality(video, host, scraper_utils.height_get_quality(meta['height']))
         hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False}
         hosters.append(hoster)
             
     return hosters
コード例 #5
0
 def search(self, video_type, title, year):
     results = []
     norm_title = scraper_utils.normalize_title(title)
     if video_type == VIDEO_TYPES.MOVIE:
         if year:
             base_url = urlparse.urljoin(self.base_url, '/Film/')
             html = self._http_get(base_url, cache_limit=48)
             for link in self.__parse_directory(html):
                 if year == link['title']:
                     url = urlparse.urljoin(base_url, link['link'])
                     for movie in self.__get_files(url, cache_limit=24):
                         match_title, match_year, _height, _extra = scraper_utils.parse_movie_link(movie['link'])
                         if not movie['directory'] and norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                             result = {'url': scraper_utils.pathify_url(url), 'title': match_title, 'year': year}
                             results.append(result)
     else:
         base_url = urlparse.urljoin(self.base_url, '/Serial/')
         html = self._http_get(base_url, cache_limit=48)
         for link in self.__parse_directory(html):
             if link['directory'] and norm_title in scraper_utils.normalize_title(link['title']):
                 url = urlparse.urljoin(base_url, link['link'])
                 result = {'url': scraper_utils.pathify_url(url), 'title': link['title'], 'year': ''}
                 results.append(result)
         
     return results
コード例 #6
0
ファイル: farda_scraper.py プロジェクト: henry73/salts
    def search(self, video_type, title, year, season=""):
        results = []
        norm_title = scraper_utils.normalize_title(title)
        if video_type == VIDEO_TYPES.MOVIE:
            if year:
                base_url = urlparse.urljoin(self.base_url, "/Film/")
                html = self._http_get(base_url, cache_limit=48)
                for link in self.__parse_directory(html):
                    if year == link["title"]:
                        url = urlparse.urljoin(base_url, link["link"])
                        for movie in self.__get_files(url, cache_limit=24):
                            match_title, match_year, _height, _extra = scraper_utils.parse_movie_link(movie["link"])
                            if (
                                not movie["directory"]
                                and norm_title in scraper_utils.normalize_title(match_title)
                                and (not year or not match_year or year == match_year)
                            ):
                                result = {"url": scraper_utils.pathify_url(url), "title": match_title, "year": year}
                                results.append(result)
        else:
            base_url = urlparse.urljoin(self.base_url, "/Serial/")
            html = self._http_get(base_url, cache_limit=48)
            for link in self.__parse_directory(html):
                if link["directory"] and norm_title in scraper_utils.normalize_title(link["title"]):
                    url = urlparse.urljoin(base_url, link["link"])
                    result = {"url": scraper_utils.pathify_url(url), "title": link["title"], "year": ""}
                    results.append(result)

        return results
コード例 #7
0
 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(url, cache_limit=.5)
         fragment = dom_parser.parse_dom(html, 'div', {'class': '[^"]*screen[^"]*'})
         if fragment:
             js_src = dom_parser.parse_dom(fragment[0], 'script', ret='src')
             if js_src:
                 js_url = urlparse.urljoin(self.base_url, js_src[0])
                 html = self._http_get(js_url, cache_limit=.5)
             else:
                 html = fragment[0]
                 
             for match in re.finditer('<source[^>]+src="([^"]+)', html):
                 stream_url = match.group(1)
                 host = self._get_direct_hostname(stream_url)
                 if host == 'gvideo':
                     quality = scraper_utils.gv_get_quality(stream_url)
                 else:
                     _, _, height, _ = scraper_utils.parse_movie_link(stream_url)
                     quality = scraper_utils.height_get_quality(height)
                     stream_url += '|User-Agent=%s' % (scraper_utils.get_ua())
                     
                 hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                 hosters.append(hoster)
     return hosters
コード例 #8
0
 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     search_url = '/search/' + urllib.quote_plus(title)
     html = self._http_get(search_url, require_debrid=False, cache_limit=1)
     if video_type == VIDEO_TYPES.TVSHOW:
         seen_urls = {}
         for _attr, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}):
             if CATEGORIES[video_type] not in post: continue
             match = re.search('<span>\s*TAGS:\s*</span>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', post, re.I)
             if match:
                 show_url, match_title = match.groups()
                 if show_url in seen_urls: continue
                 result = {'url': scraper_utils.pathify_url(show_url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''}
                 seen_urls[show_url] = result
                 results.append(result)
     elif video_type == VIDEO_TYPES.MOVIE:
         norm_title = scraper_utils.normalize_title(title)
         headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html)
         posts = [result.content for result in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})]
         for heading, post in zip(headings, posts):
             if CATEGORIES[video_type] not in post or self.__too_old(post): continue
             post_url, post_title = heading
             meta = scraper_utils.parse_movie_link(post_title)
             full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height'])
             match_year = meta['year']
             
             match_norm_title = scraper_utils.normalize_title(meta['title'])
             if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year):
                 result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year}
                 results.append(result)
         
     return results
コード例 #9
0
    def __get_links(self, url, video):
        hosters = []
        seen_urls = set()
        for search_type in SEARCH_TYPES:
            search_url, params = self.__translate_search(url, search_type)
            if not search_url: continue
            html = self._http_get(search_url, params=params, cache_limit=.5)
            js_result = scraper_utils.parse_json(html, search_url)
            if js_result.get('status') != 'success':
                logger.log('Alluc API Error: |%s|%s|: %s' % (search_url, params, js_result.get('message', 'Unknown Error')), log_utils.LOGWARNING)
                continue
            
            for result in js_result['result']:
                stream_url = result['hosterurls'][0]['url']
                if len(result['hosterurls']) > 1: continue
                if result['extension'] == 'rar': continue
                if stream_url in seen_urls: continue

                if scraper_utils.release_check(video, result['title']):
                    host = urlparse.urlsplit(stream_url).hostname
                    quality = scraper_utils.get_quality(video, host, self._get_title_quality(result['title']))
                    hoster = {'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': False}
                    hoster['extra'] = scraper_utils.cleanse_title(result['title'])
                    if video.video_type == VIDEO_TYPES.MOVIE:
                        meta = scraper_utils.parse_movie_link(hoster['extra'])
                    else:
                        meta = scraper_utils.parse_episode_link(hoster['extra'])
                    if 'format' in meta: hoster['format'] = meta['format']
                    
                    hosters.append(hoster)
                    seen_urls.add(stream_url)

        return hosters
コード例 #10
0
 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     norm_title = scraper_utils.normalize_title(video.title)
     if source_url and source_url != FORCE_NO_MATCH:
         source_url = urlparse.urljoin(self.base_url2, source_url)
         for line in self._get_files(source_url, cache_limit=24):
             if not line['directory']:
                 match = {}
                 if video.video_type == VIDEO_TYPES.MOVIE:
                     match_title, _match_year, height, extra = scraper_utils.parse_movie_link(line['link'])
                     if norm_title in scraper_utils.normalize_title(match_title):
                         match = line
                 else:
                     _show_title, season, episode, height, extra = scraper_utils.parse_episode_link(line['link'])
                     if int(video.season) == int(season) and int(video.episode) == int(episode):
                         match = line
                     
                 if 'dubbed' in extra.lower(): continue
                 if match:
                     stream_url = match['url'] + '|User-Agent=%s' % (scraper_utils.get_ua())
                     hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': scraper_utils.height_get_quality(height), 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                     if 'x265' in extra: hoster['format'] = 'x265'
                     if 'size' in match: hoster['size'] = scraper_utils.format_size(int(match['size']))
                     hosters.append(hoster)
         
     return hosters
コード例 #11
0
 def __get_links_from_json2(self, url, page_url, video_type):
     sources = {}
     headers = {'Referer': page_url}
     headers.update(XHR)
     html = self._http_get(url, headers=headers, cache_limit=0)
     js_data = scraper_utils.parse_json(html, url)
     try:
         playlist = js_data.get('playlist', [])
         for source in playlist[0].get('sources', []):
             stream_url = source['file']
             if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo':
                 quality = scraper_utils.gv_get_quality(stream_url)
             elif 'label' in source:
                 quality = scraper_utils.height_get_quality(source['label'])
             else:
                 if video_type == VIDEO_TYPES.MOVIE:
                     meta = scraper_utils.parse_movie_link(stream_url)
                 else:
                     meta = scraper_utils.parse_episode_link(stream_url)
                 quality = scraper_utils.height_get_quality(meta['height'])
             sources[stream_url] = {'quality': quality, 'direct': True}
             logger.log('Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG)
     except Exception as e:
         logger.log('Exception during yesmovies extract: %s' % (e), log_utils.LOGDEBUG)
     return sources
コード例 #12
0
    def __get_links(self, url, video):
        hosters = []
        seen_urls = set()
        for search_type in SEARCH_TYPES:
            search_url, params = self.__translate_search(url, search_type)
            if not search_url: continue
            html = self._http_get(search_url, params=params, cache_limit=.5)
            js_result = scraper_utils.parse_json(html, search_url)
            if js_result.get('status') != 'success':
                logger.log('Alluc API Error: |%s|%s|: %s' % (search_url, params, js_result.get('message', 'Unknown Error')), log_utils.LOGWARNING)
                continue
            
            for result in js_result['result']:
                stream_url = result['hosterurls'][0]['url']
                if len(result['hosterurls']) > 1: continue
                if result['extension'] == 'rar': continue
                if stream_url in seen_urls: continue

                if scraper_utils.release_check(video, result['title']):
                    host = urlparse.urlsplit(stream_url).hostname
                    quality = scraper_utils.get_quality(video, host, self._get_title_quality(result['title']))
                    hoster = {'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': False}
                    hoster['extra'] = scraper_utils.cleanse_title(result['title'])
                    if video.video_type == VIDEO_TYPES.MOVIE:
                        meta = scraper_utils.parse_movie_link(hoster['extra'])
                    else:
                        meta = scraper_utils.parse_episode_link(hoster['extra'])
                    if 'format' in meta: hoster['format'] = meta['format']
                    
                    hosters.append(hoster)
                    seen_urls.add(stream_url)

        return hosters
コード例 #13
0
 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(url, require_debrid=True, cache_limit=.5)
     sources = self.__get_post_links(html)
     for source, value in sources.iteritems():
         if scraper_utils.excluded_link(source): continue
         host = urlparse.urlparse(source).hostname
         if video.video_type == VIDEO_TYPES.MOVIE:
             meta = scraper_utils.parse_movie_link(value['release'])
         else:
             meta = scraper_utils.parse_episode_link(value['release'])
         quality = scraper_utils.height_get_quality(meta['height'])
         hoster = {
             'multi-part': False,
             'host': host,
             'class': self,
             'views': None,
             'url': source,
             'rating': None,
             'quality': quality,
             'direct': False
         }
         if 'format' in meta: hoster['format'] = meta['format']
         hosters.append(hoster)
     return hosters
コード例 #14
0
ファイル: farda_scraper.py プロジェクト: henry73/salts
 def search(self, video_type, title, year, season=''):
     results = []
     norm_title = scraper_utils.normalize_title(title)
     if video_type == VIDEO_TYPES.MOVIE:
         if year:
             base_url = urlparse.urljoin(self.base_url, '/Film/')
             html = self._http_get(base_url, cache_limit=48)
             for link in self.__parse_directory(html):
                 if year == link['title']:
                     url = urlparse.urljoin(base_url, link['link'])
                     for movie in self.__get_files(url, cache_limit=24):
                         match_title, match_year, _height, _extra = scraper_utils.parse_movie_link(movie['link'])
                         if not movie['directory'] and norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                             result = {'url': scraper_utils.pathify_url(url), 'title': match_title, 'year': year}
                             results.append(result)
     else:
         base_url = urlparse.urljoin(self.base_url, '/Serial/')
         html = self._http_get(base_url, cache_limit=48)
         for link in self.__parse_directory(html):
             if link['directory'] and norm_title in scraper_utils.normalize_title(link['title']):
                 url = urlparse.urljoin(base_url, link['link'])
                 result = {'url': scraper_utils.pathify_url(url), 'title': link['title'], 'year': ''}
                 results.append(result)
         
     return results
コード例 #15
0
 def get_sources(self, video):
     hosters = []
     source_url = self.get_url(video)
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(url, require_debrid=True, cache_limit=.5)
     fragment = dom_parser2.parse_dom(html, 'div', {'class': 'post-cont'})
     if not fragment: return hosters
     
     match = re.search('<p>\s*<strong>(.*?)<script', fragment[0].content, re.DOTALL)
     if not match: return hosters
     
     for attrs, _content in dom_parser2.parse_dom(match.group(1), 'a', req='href'):
         stream_url = attrs['href']
         if scraper_utils.excluded_link(stream_url): continue
         if video.video_type == VIDEO_TYPES.MOVIE:
             meta = scraper_utils.parse_movie_link(stream_url)
         else:
             meta = scraper_utils.parse_episode_link(stream_url)
         
         host = urlparse.urlparse(stream_url).hostname
         quality = scraper_utils.get_quality(video, host, scraper_utils.height_get_quality(meta['height']))
         hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False}
         hosters.append(hoster)
             
     return hosters
コード例 #16
0
 def __get_videos(self, contents, video):
     videos = []
     for key in contents:
         item = contents[key]
         if item['type'].lower() == 'dir':
             videos += self.__get_videos(item['children'], video)
         else:
             if item['ext'].upper() in VIDEO_EXT and int(
                     item['size']) > (100 * 1024 * 1024):
                 if video.video_type == VIDEO_TYPES.MOVIE:
                     _, _, height, _ = scraper_utils.parse_movie_link(
                         item['name'])
                 else:
                     _, _, _, height, _ = scraper_utils.parse_episode_link(
                         item['name'])
                 video = {
                     'name': item['name'],
                     'size': item['size'],
                     'url': item['url'],
                     'quality': scraper_utils.height_get_quality(height)
                 }
                 videos.append(video)
                 if item['stream'] is not None:
                     if int(height) > 720: height = 720
                     video = {
                         'name': '(Transcode) %s' % (item['name']),
                         'url': item['stream'],
                         'quality': scraper_utils.height_get_quality(height)
                     }
                     videos.append(video)
     return videos
コード例 #17
0
    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        for attrs, _content in dom_parser2.parse_dom(html, 'a', req='href'):
            stream_url = attrs['href']
            if MOVIE_URL in stream_url:
                meta = scraper_utils.parse_movie_link(stream_url)
                stream_url = scraper_utils.pathify_url(
                    stream_url) + scraper_utils.append_headers(
                        {'User-Agent': scraper_utils.get_ua()})
                quality = scraper_utils.height_get_quality(meta['height'])
                hoster = {
                    'multi-part': False,
                    'host':
                    scraper_utils.get_direct_hostname(self, stream_url),
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': True
                }
                if 'format' in meta: hoster['format'] = meta['format']
                hosters.append(hoster)

        return hosters
コード例 #18
0
 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     norm_title = scraper_utils.normalize_title(video.title)
     if source_url and source_url != FORCE_NO_MATCH:
         source_url = urlparse.urljoin(self.base_url, source_url)
         for line in self._get_files(source_url, cache_limit=24):
             if not line['directory']:
                 match = {}
                 if video.video_type == VIDEO_TYPES.MOVIE:
                     meta = scraper_utils.parse_movie_link(line['link'])
                     if norm_title in scraper_utils.normalize_title(meta['title']):
                         match = line
                 elif self.__episode_match(line, video):
                     match = line
                     meta = scraper_utils.parse_episode_link(line['link'])
                     
                 if match:
                     if meta['dubbed']: continue
                     stream_url = match['url'] + '|User-Agent=%s' % (scraper_utils.get_ua())
                     stream_url = stream_url.replace(self.base_url, '')
                     quality = scraper_utils.height_get_quality(meta['height'])
                     hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                     if 'format' in meta: hoster['format'] = meta['format']
                     if 'size' in match: hoster['size'] = scraper_utils.format_size(int(match['size']))
                     hosters.append(hoster)
         
     return hosters
コード例 #19
0
ファイル: farda_scraper.py プロジェクト: henry73/salts
 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     norm_title = scraper_utils.normalize_title(video.title)
     if source_url and source_url != FORCE_NO_MATCH:
         source_url = urlparse.urljoin(self.base_url, source_url)
         for line in self.__get_files(source_url, cache_limit=24):
             if not line['directory']:
                 match = {}
                 if video.video_type == VIDEO_TYPES.MOVIE:
                     match_title, _match_year, height, extra = scraper_utils.parse_movie_link(line['link'])
                     if norm_title in scraper_utils.normalize_title(match_title):
                         match = line
                 else:
                     _show_title, season, episode, height, extra = scraper_utils.parse_episode_link(line['link'])
                     if int(video.season) == int(season) and int(video.episode) == int(episode):
                         match = line
                     
                 if 'dubbed' in extra.lower(): continue
                 if match:
                     stream_url = match['url'] + '|User-Agent=%s' % (scraper_utils.get_ua())
                     hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': scraper_utils.height_get_quality(height), 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                     if 'x265' in extra: hoster['format'] = 'x265'
                     if 'size' in match: hoster['size'] = scraper_utils.format_size(int(match['size']))
                     hosters.append(hoster)
         
     return hosters
コード例 #20
0
 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(url, require_debrid=False, cache_limit=.5)
     title = dom_parser2.parse_dom(html,
                                   'meta', {'property': 'og:title'},
                                   req='content')
     meta = scraper_utils.parse_movie_link(
         title[0].attrs['content']) if title else {}
     fragment = dom_parser2.parse_dom(html, 'p',
                                      {'class': 'download_message'})
     if fragment:
         for attrs, _content in dom_parser2.parse_dom(fragment[0].content,
                                                      'a',
                                                      req='href'):
             source = attrs['href']
             if scraper_utils.excluded_link(source): continue
             host = urlparse.urlparse(source).hostname
             quality = scraper_utils.height_get_quality(
                 meta.get('height', 480))
             hoster = {
                 'multi-part': False,
                 'host': host,
                 'class': self,
                 'views': None,
                 'url': source,
                 'rating': None,
                 'quality': quality,
                 'direct': True
             }
             if 'format' in meta: hoster['format'] = meta['format']
             hosters.append(hoster)
     return hosters
コード例 #21
0
 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     if video_type == VIDEO_TYPES.TVSHOW and title:
         test_url = '/tv-show/%s/' % (scraper_utils.to_slug(title))
         test_url = scraper_utils.urljoin(self.base_url, test_url)
         html = self._http_get(test_url, require_debrid=True, cache_limit=24)
         posts = dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})
         if posts:
             result = {'url': scraper_utils.pathify_url(test_url), 'title': scraper_utils.cleanse_title(title), 'year': ''}
             results.append(result)
     elif video_type == VIDEO_TYPES.MOVIE:
         search_title = re.sub('[^A-Za-z0-9 ]', '', title.lower())
         html = self._http_get(self.base_url, params={'s': search_title}, require_debrid=True, cache_limit=1)
         norm_title = scraper_utils.normalize_title(title)
         for _attrs, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}):
             match = re.search('<h\d+[^>]*>\s*<a\s+href="([^"]+)[^>]*>(.*?)</a>', post)
             if match:
                 post_url, post_title = match.groups()
                 if '/tv-show/' in post or self.__too_old(post): continue
                 post_title = re.sub('<[^>]*>', '', post_title)
                 meta = scraper_utils.parse_movie_link(post_title)
                 full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height'])
                 match_year = meta['year']
                 
                 match_norm_title = scraper_utils.normalize_title(meta['title'])
                 if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year):
                     result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year}
                     results.append(result)
         
     return results
コード例 #22
0
 def __get_links_from_json2(self, url, page_url, video_type):
     sources = {}
     headers = {'Referer': page_url}
     headers.update(XHR)
     html = self._http_get(url, headers=headers, cache_limit=0)
     js_data = scraper_utils.parse_json(html, url)
     try:
         playlist = js_data.get('playlist', [])
         for source in playlist[0].get('sources', []):
             stream_url = source['file']
             if scraper_utils.get_direct_hostname(self,
                                                  stream_url) == 'gvideo':
                 quality = scraper_utils.gv_get_quality(stream_url)
             elif 'label' in source:
                 quality = scraper_utils.height_get_quality(source['label'])
             else:
                 if video_type == VIDEO_TYPES.MOVIE:
                     meta = scraper_utils.parse_movie_link(stream_url)
                 else:
                     meta = scraper_utils.parse_episode_link(stream_url)
                 quality = scraper_utils.height_get_quality(meta['height'])
             sources[stream_url] = {'quality': quality, 'direct': True}
             logger.log(
                 'Adding stream: %s Quality: %s' % (stream_url, quality),
                 log_utils.LOGDEBUG)
     except Exception as e:
         logger.log('Exception during yesmovies extract: %s' % (e),
                    log_utils.LOGDEBUG)
     return sources
コード例 #23
0
 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(url, cache_limit=.5)
         fragment = dom_parser.parse_dom(html, 'div', {'class': '[^"]*screen[^"]*'})
         if fragment:
             js_src = dom_parser.parse_dom(fragment[0], 'script', ret='src')
             if js_src:
                 js_url = urlparse.urljoin(self.base_url, js_src[0])
                 html = self._http_get(js_url, cache_limit=.5)
             else:
                 html = fragment[0]
                 
             for match in re.finditer('<source[^>]+src="([^"]+)', html):
                 stream_url = match.group(1)
                 host = self._get_direct_hostname(stream_url)
                 if host == 'gvideo':
                     quality = scraper_utils.gv_get_quality(stream_url)
                 elif 'blogspot' in stream_url:
                     quality = scraper_utils.gv_get_quality(stream_url)
                 else:
                     _, _, height, _ = scraper_utils.parse_movie_link(stream_url)
                     quality = scraper_utils.height_get_quality(height)
                     stream_url += '|User-Agent=%s' % (scraper_utils.get_ua())
                     
                 hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                 hosters.append(hoster)
     return hosters
コード例 #24
0
    def __get_links(self, url, video):
        hosters = []
        search_url = urlparse.urljoin(self.base_url, SEARCH_URL)
        query = self.__translate_search(url)
        result = self._http_get(search_url, data=query, allow_redirect=False, cache_limit=.5)
        if 'files' in result:
            for item in result['files']:
                checks = [False] * 6
                if 'type' not in item or item['type'].upper() != 'VIDEO': checks[0] = True
                if 'is_ready' in item and item['is_ready'] != '1': checks[1] = True
                if 'av_result' in item and item['av_result'] in ['warning', 'infected']: checks[2] = True
                if 'video_info' not in item: checks[3] = True
                if 'video_info' in item and item['video_info'] and not re.search('#0:(?:0|1)(?:\(eng\)|\(und\))?:\s*Audio:', item['video_info']): checks[4] = True
                if video.video_type == VIDEO_TYPES.EPISODE:
                    sxe = '[. ][Ss]%02d[Ee]%02d[. ]' % (int(video.season), int(video.episode))
                    if not re.search(sxe, item['name']):
                        if video.ep_airdate:
                            airdate_pattern = '[. ]%s[. ]%02d[. ]%02d[. ]' % (video.ep_airdate.year, video.ep_airdate.month, video.ep_airdate.day)
                            if not re.search(airdate_pattern, item['name']): checks[5] = True
                    
                if any(checks):
                    log_utils.log('Furk.net result excluded: %s - |%s|' % (checks, item['name']), log_utils.LOGDEBUG)
                    continue
                
                match = re.search('(\d{3,})\s?x\s?(\d{3,})', item['video_info'])
                if match:
                    width, _ = match.groups()
                    quality = scraper_utils.width_get_quality(width)
                else:
                    if video.video_type == VIDEO_TYPES.MOVIE:
                        _, _, height, _ = scraper_utils.parse_movie_link(item['name'])
                        quality = scraper_utils.height_get_quality(height)
                    elif video.video_type == VIDEO_TYPES.EPISODE:
                        _, _, _, height, _ = scraper_utils.parse_episode_link(item['name'])
                        if int(height) > -1:
                            quality = scraper_utils.height_get_quality(height)
                        else:
                            quality = QUALITIES.HIGH
                    else:
                        quality = QUALITIES.HIGH
                    
                if 'url_pls' in item:
                    size_gb = scraper_utils.format_size(int(item['size']), 'B')
                    if self.max_bytes and int(item['size']) > self.max_bytes:
                        log_utils.log('Result skipped, Too big: |%s| - %s (%s) > %s (%sGB)' % (item['name'], item['size'], size_gb, self.max_bytes, self.max_gb))
                        continue

                    stream_url = item['url_pls']
                    host = self._get_direct_hostname(stream_url)
                    hoster = {'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': True}
                    hoster['size'] = size_gb
                    hoster['extra'] = item['name']
                    hosters.append(hoster)
                else:
                    log_utils.log('Furk.net result skipped - no playlist: |%s|' % (json.dumps(item)), log_utils.LOGDEBUG)
                    
        return hosters
コード例 #25
0
    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        host_count = {}
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, require_debrid=True, cache_limit=.5)

            q_str = ''
            match = re.search('>Release.*?td_col">([^<]+)', html)
            if match:
                q_str = match.group(1).upper()

            size = ''
            match = re.search('>Size.*?td_col">([^<]+)', html)
            if match:
                size = match.group(1).upper()

            fragment = dom_parser.parse_dom(html, 'table',
                                            {'id': 'download_table'})
            if fragment:
                for match in re.finditer('''href=['"]([^'"]+)''', fragment[0]):
                    stream_url = match.group(1)
                    if re.search('\.rar(\.|$)', stream_url):
                        continue

                    host = urlparse.urlsplit(stream_url).hostname
                    if q_str:
                        if video.video_type == VIDEO_TYPES.EPISODE:
                            _title, _season, _episode, height, _extra = scraper_utils.parse_episode_link(
                                q_str)
                        else:
                            _title, _year, height, _extra = scraper_utils.parse_movie_link(
                                q_str)
                        quality = scraper_utils.height_get_quality(height)
                    else:
                        quality = QUALITY_MAP.get(
                            match.group(1).upper(), QUALITIES.HIGH)
                    quality = scraper_utils.get_quality(video, host, quality)
                    host_count[host] = host_count.get(host, 0) + 1
                    hoster = {
                        'multi-part': False,
                        'class': self,
                        'host': host,
                        'quality': quality,
                        'views': None,
                        'url': stream_url,
                        'rating': None,
                        'direct': False
                    }
                    if size: hoster['size'] = size
                    hosters.append(hoster)

        new_hosters = [
            hoster for hoster in hosters if host_count[hoster['host']] <= 1
        ]
        return new_hosters
コード例 #26
0
 def __get_quality(self, item, video):
     if 'width' in item:
         return scraper_utils.width_get_quality(item['width'])
     elif 'height' in item:
         return scraper_utils.height_get_quality(item['height'])
     else:
         if video.video_type == VIDEO_TYPES.MOVIE:
             _title, _year, height, _extra = scraper_utils.parse_movie_link(item['name'])
         else:
             _title, _season, _episode, height, _extra = scraper_utils.parse_episode_link(item['name'])
         return scraper_utils.height_get_quality(height)
コード例 #27
0
    def __get_links(self, url, video):
        hosters = []
        search_url = self.__translate_search(url)
        html = self._http_get(search_url, cache_limit=.5)
        js_result = scraper_utils.parse_json(html, search_url)
        down_url = js_result.get('downURL')
        dl_farm = js_result.get('dlFarm')
        dl_port = js_result.get('dlPort')
        for item in js_result.get('data', []):
            post_hash, size, post_title, ext, duration = item['0'], item['4'], item['10'], item['11'], item['14']
            checks = [False] * 6
            if not scraper_utils.release_check(video, post_title): checks[0] = True
            if 'alangs' in item and item['alangs'] and 'eng' not in item['alangs']: checks[1] = True
            if re.match('^\d+s', duration) or re.match('^[0-5]m', duration): checks[2] = True
            if 'passwd' in item and item['passwd']: checks[3] = True
            if 'virus' in item and item['virus']: checks[4] = True
            if 'type' in item and item['type'].upper() != 'VIDEO': checks[5] = True
            if any(checks):
                log_utils.log('EasyNews Post excluded: %s - |%s|' % (checks, item), log_utils.LOGDEBUG)
                continue
            
            stream_url = down_url + urllib.quote('/%s/%s/%s%s/%s%s' % (dl_farm, dl_port, post_hash, ext, post_title, ext))
            stream_url = stream_url + '|Authorization=%s' % (urllib.quote(self.auth))
            host = self._get_direct_hostname(stream_url)
            quality = None
            if 'width' in item:
                try: width = int(item['width'])
                except: width = 0
                if width:
                    quality = scraper_utils.width_get_quality(width)
            
            if quality is None:
                if video.video_type == VIDEO_TYPES.MOVIE:
                    meta = scraper_utils.parse_movie_link(post_title)
                else:
                    meta = scraper_utils.parse_episode_link(post_title)
                quality = scraper_utils.height_get_quality(meta['height'])
                
            if self.max_bytes:
                match = re.search('([\d.]+)\s+(.*)', size)
                if match:
                    size_bytes = scraper_utils.to_bytes(*match.groups())
                    if size_bytes > self.max_bytes:
                        log_utils.log('Result skipped, Too big: |%s| - %s (%s) > %s (%s GB)' % (post_title, size_bytes, size, self.max_bytes, self.max_gb))
                        continue

            hoster = {'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': True}
            if any(i for i in ['X265', 'HEVC'] if i in post_title.upper()): hoster['format'] = 'x265'
            if size: hoster['size'] = size
            if post_title: hoster['extra'] = post_title
            hosters.append(hoster)
        return hosters
コード例 #28
0
 def __get_quality(self, item, video):
     if item.get('width'):
         return scraper_utils.width_get_quality(item['width'])
     elif item.get('height'):
         return scraper_utils.height_get_quality(item['height'])
     elif 'name' in item:
         if video.video_type == VIDEO_TYPES.MOVIE:
             meta = scraper_utils.parse_movie_link(item['name'])
         else:
             meta = scraper_utils.parse_episode_link(item['name'])
         return scraper_utils.height_get_quality(meta['height'])
     else:
         return QUALITIES.HIGH
コード例 #29
0
 def __get_quality(self, item, video):
     if 'width' in item:
         return scraper_utils.width_get_quality(item['width'])
     elif 'height' in item:
         return scraper_utils.height_get_quality(item['height'])
     else:
         if video.video_type == VIDEO_TYPES.MOVIE:
             _title, _year, height, _extra = scraper_utils.parse_movie_link(
                 item['name'])
         else:
             _title, _season, _episode, height, _extra = scraper_utils.parse_episode_link(
                 item['name'])
         return scraper_utils.height_get_quality(height)
コード例 #30
0
 def __get_quality(self, item, video):
     if 'width' in item and item['width']:
         return scraper_utils.width_get_quality(item['width'])
     elif 'height' in item and item['height']:
         return scraper_utils.height_get_quality(item['height'])
     elif 'name' in item:
         if video.video_type == VIDEO_TYPES.MOVIE:
             meta = scraper_utils.parse_movie_link(item['name'])
         else:
             meta = scraper_utils.parse_episode_link(item['name'])
         return scraper_utils.height_get_quality(meta['height'])
     else:
         return QUALITIES.HIGH
コード例 #31
0
 def __movie_search(self, title, year):
     results = []
     norm_title = scraper_utils.normalize_title(title)
     html = self._http_get(self.base_url, cache_limit=48)
     for item in self._parse_directory(html):
         if not item['directory']:
             match_title, match_year, height, extra = scraper_utils.parse_movie_link(item['title'])
             if 'dubbed' in extra.lower(): continue
             if (norm_title in scraper_utils.normalize_title(match_title)) and (not year or not match_year or year == match_year):
                 match_title = match_title.replace('.', ' ')
                 match_title += ' [%sp.%s]' % (height, extra)
                 result = {'url': scraper_utils.pathify_url(item['link']), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                 results.append(result)
     return results
コード例 #32
0
 def __movie_search(self, title, year):
     results = []
     norm_title = scraper_utils.normalize_title(title)
     html = self._http_get(self.base_url, cache_limit=48)
     for item in self._parse_directory(html):
         if not item['directory']:
             meta = scraper_utils.parse_movie_link(item['title'])
             if meta['dubbed']: continue
             if (norm_title in scraper_utils.normalize_title(meta['title'])) and (not year or not meta['year'] or year == meta['year']):
                 match_title = meta['title'].replace('.', ' ')
                 match_title += ' [%sp.%s]' % (meta['height'], meta['extra'])
                 result = {'url': scraper_utils.pathify_url(item['link']), 'title': scraper_utils.cleanse_title(match_title), 'year': meta['year']}
                 results.append(result)
     return results
コード例 #33
0
 def __movie_search(self, title, year):
     results = []
     norm_title = scraper_utils.normalize_title(title)
     html = self._http_get(self.base_url, cache_limit=48)
     for item in scraper_utils.parse_directory(self, html):
         if not item['directory']:
             meta = scraper_utils.parse_movie_link(item['title'])
             if meta['dubbed']: continue
             if (norm_title in scraper_utils.normalize_title(meta['title'])) and (not year or not meta['year'] or year == meta['year']):
                 match_title = meta['title'].replace('.', ' ')
                 match_title += ' [%sp.%s]' % (meta['height'], meta['extra'])
                 result = {'url': scraper_utils.pathify_url(item['link']), 'title': scraper_utils.cleanse_title(match_title), 'year': meta['year']}
                 results.append(result)
     return results
コード例 #34
0
    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, require_debrid=True, cache_limit=.5)
        fragment = dom_parser2.parse_dom(html, 'table',
                                         {'class': 'links-table'})
        if not fragment: return hosters
        for _attrs, row in dom_parser2.parse_dom(fragment[0].content, 'tr'):
            match = re.search(
                "playVideo\.bind\(.*?'([^']+)(?:[^>]*>){2}(.*?)</td>", row,
                re.DOTALL)
            if not match: continue

            stream_url, release = match.groups()
            if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo':
                sources = scraper_utils.parse_google(self, stream_url)
            else:
                sources = [stream_url]

            for source in sources:
                host = scraper_utils.get_direct_hostname(self, source)
                if host == 'gvideo':
                    quality = scraper_utils.gv_get_quality(source)
                    direct = True
                else:
                    host = urlparse.urlparse(source).hostname
                    if video.video_type == VIDEO_TYPES.MOVIE:
                        meta = scraper_utils.parse_movie_link(release)
                    else:
                        meta = scraper_utils.parse_episode_link(release)
                    base_quality = scraper_utils.height_get_quality(
                        meta['height'])
                    quality = scraper_utils.get_quality(
                        video, host, base_quality)
                    direct = False
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': source,
                    'direct': direct
                }
                hosters.append(hoster)

        return hosters
コード例 #35
0
    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        headers = {
            'User-Agent': scraper_utils.get_ua(),
            'Referer': self.base_url + source_url
        }
        if video.video_type == VIDEO_TYPES.MOVIE:
            meta = scraper_utils.parse_movie_link(source_url)
            stream_url = source_url + scraper_utils.append_headers(headers)
            quality = scraper_utils.height_get_quality(meta['height'])
            hoster = {
                'multi-part': False,
                'host': scraper_utils.get_direct_hostname(self, stream_url),
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'url': stream_url,
                'direct': True
            }
            if 'format' in meta: hoster['format'] = meta['format']
            hosters.append(hoster)
        else:
            for episode in self.__match_episode(source_url, video):
                meta = scraper_utils.parse_episode_link(episode['title'])
                stream_url = episode['url'] + scraper_utils.append_headers(
                    headers)
                stream_url = stream_url.replace(self.base_url, '')
                quality = scraper_utils.height_get_quality(meta['height'])
                hoster = {
                    'multi-part': False,
                    'host':
                    scraper_utils.get_direct_hostname(self, stream_url),
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': True
                }
                if 'format' in meta: hoster['format'] = meta['format']
                if 'size' in episode:
                    hoster['size'] = scraper_utils.format_size(
                        int(episode['size']))
                hosters.append(hoster)

        return hosters
コード例 #36
0
 def get_sources(self, video):
     hosters = []
     source_url = self.get_url(video)
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     page_url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(page_url, cache_limit=8)
     for attrs, _content in dom_parser2.parse_dom(html, 'a', req='href'):
         stream_url = attrs['href']
         if MOVIE_URL in stream_url:
             meta = scraper_utils.parse_movie_link(stream_url)
             stream_url = scraper_utils.pathify_url(stream_url) + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
             quality = scraper_utils.height_get_quality(meta['height'])
             hoster = {'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
             if 'format' in meta: hoster['format'] = meta['format']
             hosters.append(hoster)
             
     return hosters
コード例 #37
0
 def get_sources(self, video):
     hosters = []
     source_url = self.get_url(video)
     if source_url and source_url != FORCE_NO_MATCH:
         if video.video_type == VIDEO_TYPES.MOVIE:
             _title, _year, height, extra = scraper_utils.parse_movie_link(
                 source_url)
             stream_url = self.base_url + source_url + '|User-Agent=%s' % (
                 scraper_utils.get_ua())
             hoster = {
                 'multi-part': False,
                 'host': self._get_direct_hostname(stream_url),
                 'class': self,
                 'quality': scraper_utils.height_get_quality(height),
                 'views': None,
                 'rating': None,
                 'url': stream_url,
                 'direct': True
             }
             if 'x265' in extra: hoster['format'] = 'x265'
             hosters.append(hoster)
         else:
             for episode in self.__match_episode(source_url, video):
                 _show_title, _season, _episode, height, extra = scraper_utils.parse_episode_link(
                     episode['title'])
                 stream_url = urlparse.urljoin(
                     self.base_url, episode['url']) + '|User-Agent=%s' % (
                         scraper_utils.get_ua())
                 hoster = {
                     'multi-part': False,
                     'host': self._get_direct_hostname(stream_url),
                     'class': self,
                     'quality': scraper_utils.height_get_quality(height),
                     'views': None,
                     'rating': None,
                     'url': stream_url,
                     'direct': True
                 }
                 if 'x265' in extra: hoster['format'] = 'x265'
                 if 'size' in episode:
                     hoster['size'] = scraper_utils.format_size(
                         int(episode['size']))
                 hosters.append(hoster)
     return hosters
コード例 #38
0
ファイル: furk_scraper.py プロジェクト: enursha101/xbmc-addon
    def __get_links(self, url, video):
        hosters = []
        search_url = scraper_utils.urljoin(self.base_url, SEARCH_URL)
        query = self.__translate_search(url)
        result = self._http_get(search_url, data=query, allow_redirect=False, cache_limit=.5)
        for item in result.get('files', []):
            checks = [False] * 6
            if item.get('type', '').upper() != 'VIDEO': checks[0] = True
            if item.get('is_ready') != '1': checks[1] = True
            if item.get('av_result') in ['warning', 'infected']: checks[2] = True
            if 'video_info' not in item: checks[3] = True
            if item.get('video_info') and not re.search('#0:(0|1)(\((eng|und)\))?:\s*Audio:', item['video_info'], re.I): checks[4] = True
            if not scraper_utils.release_check(video, item['name']): checks[5] = True
            if any(checks):
                logger.log('Furk.net result excluded: %s - |%s|' % (checks, item['name']), log_utils.LOGDEBUG)
                continue
            
            match = re.search('(\d{3,})\s*x\s*(\d{3,})', item['video_info'])
            if match:
                width, _height = match.groups()
                quality = scraper_utils.width_get_quality(width)
            else:
                if video.video_type == VIDEO_TYPES.MOVIE:
                    meta = scraper_utils.parse_movie_link(item['name'])
                else:
                    meta = scraper_utils.parse_episode_link(item['name'])
                quality = scraper_utils.height_get_quality(meta['height'])
                
            if 'url_pls' in item:
                size_gb = scraper_utils.format_size(int(item['size']), 'B')
                if self.max_bytes and int(item['size']) > self.max_bytes:
                    logger.log('Result skipped, Too big: |%s| - %s (%s) > %s (%sGB)' % (item['name'], item['size'], size_gb, self.max_bytes, self.max_gb))
                    continue

                stream_url = item['url_pls']
                host = scraper_utils.get_direct_hostname(self, stream_url)
                hoster = {'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': True}
                hoster['size'] = size_gb
                hoster['extra'] = item['name']
                hosters.append(hoster)
            else:
                logger.log('Furk.net result skipped - no playlist: |%s|' % (json.dumps(item)), log_utils.LOGDEBUG)
                    
        return hosters
コード例 #39
0
 def __get_links(self, url, video):
     hosters = []
     search_url = self.__translate_search(url)
     html = self._http_get(search_url, cache_limit=.5)
     js_result = scraper_utils.parse_json(html, search_url)
     if 'data' in js_result:
         for item in js_result['data']:
             post_hash, size, post_title, ext, duration = item['0'], item['4'], item['10'], item['11'], item['14']
             checks = [False] * 6
             if not scraper_utils.title_check(video, post_title): checks[0] = True
             if 'alangs' in item and item['alangs'] and 'eng' not in item['alangs']: checks[1] = True
             if re.match('^\d+s', duration) or re.match('^[0-5]m', duration): checks[2] = True
             if 'passwd' in item and item['passwd']: checks[3] = True
             if 'virus' in item and item['virus']: checks[4] = True
             if 'type' in item and item['type'].upper() != 'VIDEO': checks[5] = True
             if any(checks):
                 log_utils.log('EasyNews Post excluded: %s - |%s|' % (checks, item), log_utils.LOGDEBUG)
                 continue
             
             stream_url = urllib.quote('%s%s/%s%s' % (post_hash, ext, post_title, ext))
             stream_url = 'http://members.easynews.com/dl/%s' % (stream_url)
             stream_url = stream_url + '|Cookie=%s' % (self._get_stream_cookies())
             host = self._get_direct_hostname(stream_url)
             quality = None
             if 'width' in item:
                 try: width = int(item['width'])
                 except: width = 0
                 if width:
                     quality = scraper_utils.width_get_quality(width)
             
             if quality is None:
                 if video.video_type == VIDEO_TYPES.MOVIE:
                     _title, _year, height, _extra = scraper_utils.parse_movie_link(post_title)
                 else:
                     _title, _season, _episode, height, _extra = scraper_utils.parse_episode_link(post_title)
                 quality = scraper_utils.height_get_quality(height)
                 
             hoster = {'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': True}
             if any(i for i in ['X265', 'HEVC'] if i in post_title.upper()): hoster['format'] = 'x265'
             if size: hoster['size'] = size
             if post_title: hoster['extra'] = post_title
             hosters.append(hoster)
     return hosters
コード例 #40
0
 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(url, require_debrid=True, cache_limit=.5)
     sources = self.__get_post_links(html)
     for source, value in sources.iteritems():
         if scraper_utils.excluded_link(source): continue
         host = urlparse.urlparse(source).hostname
         if video.video_type == VIDEO_TYPES.MOVIE:
             meta = scraper_utils.parse_movie_link(value['release'])
         else:
             meta = scraper_utils.parse_episode_link(value['release'])
         quality = scraper_utils.height_get_quality(meta['height'])
         hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': quality, 'direct': False}
         if 'format' in meta: hoster['format'] = meta['format']
         hosters.append(hoster)
     return hosters
コード例 #41
0
 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(url, require_debrid=True, cache_limit=.5)
     title = dom_parser2.parse_dom(html, 'meta', {'property': 'og:title'}, req='content')
     meta = scraper_utils.parse_movie_link(title[0].attrs['content']) if title else {}
     fragment = dom_parser2.parse_dom(html, 'p', {'class': 'download_message'})
     if fragment:
         for attrs, _content in dom_parser2.parse_dom(fragment[0].content, 'a', req='href'):
             source = attrs['href']
             if scraper_utils.excluded_link(source): continue
             host = urlparse.urlparse(source).hostname
             quality = scraper_utils.height_get_quality(meta.get('height', 480))
             hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': quality, 'direct': False}
             if 'format' in meta: hoster['format'] = meta['format']
             hosters.append(hoster)
     return hosters
コード例 #42
0
 def get_sources(self, video):
     hosters = []
     source_url = self.get_url(video)
     if source_url and source_url != FORCE_NO_MATCH:
         if video.video_type == VIDEO_TYPES.MOVIE:
             _title, _year, height, extra = scraper_utils.parse_movie_link(source_url)
             stream_url = self.base_url + source_url + '|User-Agent=%s' % (scraper_utils.get_ua())
             hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': scraper_utils.height_get_quality(height), 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
             if 'x265' in extra: hoster['format'] = 'x265'
             hosters.append(hoster)
         else:
             for episode in self.__match_episode(source_url, video):
                 _show_title, _season, _episode, height, extra = scraper_utils.parse_episode_link(episode['title'])
                 stream_url = urlparse.urljoin(self.base_url, episode['url']) + '|User-Agent=%s' % (scraper_utils.get_ua())
                 hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': scraper_utils.height_get_quality(height), 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                 if 'x265' in extra: hoster['format'] = 'x265'
                 if 'size' in episode: hoster['size'] = scraper_utils.format_size(int(episode['size']))
                 hosters.append(hoster)
     return hosters
コード例 #43
0
 def __get_videos(self, contents, video):
     videos = []
     for key in contents:
         item = contents[key]
         if item['type'].lower() == 'dir':
             videos += self.__get_videos(item['children'], video)
         else:
             if item['ext'].upper() in VIDEO_EXT and int(item['size']) > (100 * 1024 * 1024):
                 if video.video_type == VIDEO_TYPES.MOVIE:
                     _, _, height, _ = scraper_utils.parse_movie_link(item['name'])
                 else:
                     _, _, _, height, _ = scraper_utils.parse_episode_link(item['name'])
                 video = {'name': item['name'], 'size': item['size'], 'url': item['url'], 'quality': scraper_utils.height_get_quality(height)}
                 videos.append(video)
                 if item['stream'] is not None:
                     if int(height) > 720: height = 720
                     video = {'name': '(Transcode) %s' % (item['name']), 'url': item['stream'], 'quality': scraper_utils.height_get_quality(height)}
                     videos.append(video)
     return videos
コード例 #44
0
ファイル: tvrelease_scraper.py プロジェクト: ScriptUp/salts
    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        host_count = {}
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)

            q_str = ''
            match = re.search('>Release.*?td_col">([^<]+)', html)
            if match:
                q_str = match.group(1).upper()
                
            size = ''
            match = re.search('>Size.*?td_col">([^<]+)', html)
            if match:
                size = match.group(1).upper()
            
            fragment = dom_parser.parse_dom(html, 'table', {'id': 'download_table'})
            if fragment:
                for match in re.finditer('''href=['"]([^'"]+)''', fragment[0]):
                    stream_url = match.group(1)
                    if re.search('\.rar(\.|$)', stream_url):
                        continue
    
                    host = urlparse.urlsplit(stream_url).hostname
                    if q_str:
                        if video.video_type == VIDEO_TYPES.EPISODE:
                            _title, _season, _episode, height, _extra = scraper_utils.parse_episode_link(q_str)
                        else:
                            _title, _year, height, _extra = scraper_utils.parse_movie_link(q_str)
                        quality = scraper_utils.height_get_quality(height)
                    else:
                        quality = QUALITY_MAP.get(match.group(1).upper(), QUALITIES.HIGH)
                    quality = scraper_utils.get_quality(video, host, quality)
                    host_count[host] = host_count.get(host, 0) + 1
                    hoster = {'multi-part': False, 'class': self, 'host': host, 'quality': quality, 'views': None, 'url': stream_url, 'rating': None, 'direct': False}
                    if size: hoster['size'] = size
                    hosters.append(hoster)

        new_hosters = [hoster for hoster in hosters if host_count[hoster['host']] <= 1]
        return new_hosters
コード例 #45
0
 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(url, require_debrid=True, cache_limit=.5)
     post = dom_parser2.parse_dom(html, 'div', {'class': 'entry-content'})
     if not post: return hosters
     for match in re.finditer('(?:href="|>)(https?://[^"<]+)', post[0].content):
         stream_url = match.group(1)
         if scraper_utils.excluded_link(stream_url) or 'imdb.com' in stream_url: continue
         host = urlparse.urlparse(stream_url).hostname
         if video.video_type == VIDEO_TYPES.MOVIE:
             meta = scraper_utils.parse_movie_link(stream_url)
         else:
             meta = scraper_utils.parse_episode_link(stream_url)
         quality = scraper_utils.height_get_quality(meta['height'])
         hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False}
         hosters.append(hoster)
     return hosters
コード例 #46
0
 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(url, require_debrid=True, cache_limit=.5)
         post = dom_parser.parse_dom(html, 'div', {'class': 'entry-content'})
         if post:
             for p in dom_parser.parse_dom(post[0], 'p'):
                 for match in re.finditer('href="([^"]+)[^>]+>([^<]+)', p):
                     stream_url, q_str = match.groups()
                     if re.search('\.part\.?\d+', q_str, re.I) or '.rar' in q_str or 'sample' in q_str or q_str.endswith('.nfo'): continue
                     host = urlparse.urlparse(stream_url).hostname
                     if video.video_type == VIDEO_TYPES.MOVIE:
                         meta = scraper_utils.parse_movie_link(q_str)
                     else:
                         meta = scraper_utils.parse_episode_link(q_str)
                     quality = scraper_utils.height_get_quality(meta['height'])
                     hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False}
                     if 'format' in meta: hoster['format'] = meta['format']
                     hosters.append(hoster)
     return hosters
コード例 #47
0
ファイル: farda_scraper.py プロジェクト: henry73/salts
    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        norm_title = scraper_utils.normalize_title(video.title)
        if source_url and source_url != FORCE_NO_MATCH:
            source_url = urlparse.urljoin(self.base_url, source_url)
            for line in self.__get_files(source_url, cache_limit=24):
                if not line["directory"]:
                    match = {}
                    if video.video_type == VIDEO_TYPES.MOVIE:
                        match_title, _match_year, height, extra = scraper_utils.parse_movie_link(line["link"])
                        if norm_title in scraper_utils.normalize_title(match_title):
                            match = line
                    else:
                        _show_title, season, episode, height, extra = scraper_utils.parse_episode_link(line["link"])
                        if int(video.season) == int(season) and int(video.episode) == int(episode):
                            match = line

                    if "dubbed" in extra.lower():
                        continue
                    if match:
                        stream_url = match["url"] + "|User-Agent=%s" % (scraper_utils.get_ua())
                        hoster = {
                            "multi-part": False,
                            "host": self._get_direct_hostname(stream_url),
                            "class": self,
                            "quality": scraper_utils.height_get_quality(height),
                            "views": None,
                            "rating": None,
                            "url": stream_url,
                            "direct": True,
                        }
                        if "x265" in extra:
                            hoster["format"] = "x265"
                        if "size" in match:
                            hoster["size"] = scraper_utils.format_size(int(match["size"]))
                        hosters.append(hoster)

        return hosters
コード例 #48
0
 def get_sources(self, video):
     hosters = []
     source_url = self.get_url(video)
     if source_url and source_url != FORCE_NO_MATCH:
         if video.video_type == VIDEO_TYPES.MOVIE:
             meta = scraper_utils.parse_movie_link(source_url)
             stream_url = source_url + '|User-Agent=%s' % (scraper_utils.get_ua())
             quality = scraper_utils.height_get_quality(meta['height'])
             hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
             if 'format' in meta: hoster['format'] = meta['format']
             hosters.append(hoster)
         else:
             for episode in self.__match_episode(source_url, video):
                 meta = scraper_utils.parse_episode_link(episode['title'])
                 stream_url = episode['url'] + '|User-Agent=%s' % (scraper_utils.get_ua())
                 stream_url = stream_url.replace(self.base_url, '')
                 quality = scraper_utils.height_get_quality(meta['height'])
                 hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                 if 'format' in meta: hoster['format'] = meta['format']
                 if 'size' in episode: hoster['size'] = scraper_utils.format_size(int(episode['size']))
                 hosters.append(hoster)
     return hosters
コード例 #49
0
    def __get_release(self, html, video):
        try: select = int(kodi.get_setting('%s-select' % (self.get_name())))
        except: select = 0
        ul_id = 'releases' if video.video_type == VIDEO_TYPES.MOVIE else 'episodes'
        fragment = dom_parser2.parse_dom(html, 'ul', {'id': ul_id})
        if fragment:
            best_qorder = 0
            best_page = None
            for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'li'):
                match = dom_parser2.parse_dom(item, 'span', req=['href', 'title'])
                if not match:
                    match = dom_parser2.parse_dom(item, 'a', req=['href', 'title'])
                    if not match: continue
                
                page_url, release = match[0].attrs['href'], match[0].attrs['title']
                match = dom_parser2.parse_dom(item, 'span', {'class': 'time'})
                if match and self.__too_old(match[0].content): break
                
                release = re.sub('^\[[^\]]*\]\s*', '', release)
                if video.video_type == VIDEO_TYPES.MOVIE:
                    meta = scraper_utils.parse_movie_link(release)
                else:
                    if not scraper_utils.release_check(video, release, require_title=False): continue
                    meta = scraper_utils.parse_episode_link(release)

                if select == 0:
                    best_page = page_url
                    break
                else:
                    quality = scraper_utils.height_get_quality(meta['height'])
                    logger.log('result: |%s|%s|%s|' % (page_url, quality, Q_ORDER[quality]), log_utils.LOGDEBUG)
                    if Q_ORDER[quality] > best_qorder:
                        logger.log('Setting best as: |%s|%s|%s|' % (page_url, quality, Q_ORDER[quality]), log_utils.LOGDEBUG)
                        best_page = page_url
                        best_qorder = Q_ORDER[quality]
            
            return best_page
コード例 #50
0
ファイル: rmz_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon
    def __get_release(self, html, video):
        try: select = int(kodi.get_setting('%s-select' % (self.get_name())))
        except: select = 0
        ul_id = 'releases' if video.video_type == VIDEO_TYPES.MOVIE else 'episodes'
        fragment = dom_parser2.parse_dom(html, 'ul', {'id': ul_id})
        if fragment:
            best_qorder = 0
            best_page = None
            for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'li'):
                match = dom_parser2.parse_dom(item, 'span', req=['href', 'title'])
                if not match:
                    match = dom_parser2.parse_dom(item, 'a', req=['href', 'title'])
                    if not match: continue
                
                page_url, release = match[0].attrs['href'], match[0].attrs['title']
                match = dom_parser2.parse_dom(item, 'span', {'class': 'time'})
                if match and self.__too_old(match[0].content): break
                
                release = re.sub('^\[[^\]]*\]\s*', '', release)
                if video.video_type == VIDEO_TYPES.MOVIE:
                    meta = scraper_utils.parse_movie_link(release)
                else:
                    if not scraper_utils.release_check(video, release, require_title=False): continue
                    meta = scraper_utils.parse_episode_link(release)

                if select == 0:
                    best_page = page_url
                    break
                else:
                    quality = scraper_utils.height_get_quality(meta['height'])
                    logger.log('result: |%s|%s|%s|' % (page_url, quality, Q_ORDER[quality]), log_utils.LOGDEBUG)
                    if Q_ORDER[quality] > best_qorder:
                        logger.log('Setting best as: |%s|%s|%s|' % (page_url, quality, Q_ORDER[quality]), log_utils.LOGDEBUG)
                        best_page = page_url
                        best_qorder = Q_ORDER[quality]
            
            return best_page
コード例 #51
0
 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     if video_type == VIDEO_TYPES.TVSHOW and title:
         test_url = '/show/%s/' % (scraper_utils.to_slug(title))
         test_url = scraper_utils.urljoin(self.base_url, test_url)
         html = self._http_get(test_url, require_debrid=True, cache_limit=24)
         posts = dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})
         if posts and CATEGORIES[video_type] in posts[0].content:
             match = re.search('<div[^>]*>\s*show\s+name:.*?<a\s+href="([^"]+)[^>]+>(?!Season\s+\d+)([^<]+)', posts[0].content, re.I)
             if match:
                 show_url, match_title = match.groups()
                 result = {'url': scraper_utils.pathify_url(show_url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''}
                 results.append(result)
     elif video_type == VIDEO_TYPES.MOVIE:
         search_url = scraper_utils.urljoin(self.base_url, '/search/%s/')
         search_title = re.sub('[^A-Za-z0-9 ]', '', title.lower())
         search_url = search_url % (urllib.quote_plus(search_title))
         headers = {'User-Agent': LOCAL_UA}
         html = self._http_get(search_url, headers=headers, require_debrid=True, cache_limit=1)
         headings = re.findall('<h2>\s*<a\s+href="([^"]+).*?">(.*?)</a>', html)
         posts = [r.content for r in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})]
         norm_title = scraper_utils.normalize_title(title)
         for heading, post in zip(headings, posts):
             if not re.search('[._ -]S\d+E\d+[._ -]', heading[1], re.I) and not self.__too_old(post):
                 post_url, post_title = heading
                 post_title = re.sub('<[^>]*>', '', post_title)
                 meta = scraper_utils.parse_movie_link(post_title)
                 full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height'])
                 match_year = meta['year']
                 
                 match_norm_title = scraper_utils.normalize_title(meta['title'])
                 if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year):
                     result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year}
                     results.append(result)
     
     return results
コード例 #52
0
    def __get_links(self, url, video):
        hosters = []
        search_url = urlparse.urljoin(self.base_url, SEARCH_URL)
        query = self.__translate_search(url)
        result = self._http_get(search_url,
                                data=query,
                                allow_redirect=False,
                                cache_limit=.5)
        if 'files' in result:
            for item in result['files']:
                checks = [False] * 6
                if 'type' not in item or item['type'].upper() != 'VIDEO':
                    checks[0] = True
                if 'is_ready' in item and item['is_ready'] != '1':
                    checks[1] = True
                if 'av_result' in item and item['av_result'] in [
                        'warning', 'infected'
                ]:
                    checks[2] = True
                if 'video_info' not in item: checks[3] = True
                if 'video_info' in item and item[
                        'video_info'] and not re.search(
                            '#0:(?:0|1)(?:\(eng\)|\(und\))?:\s*Audio:',
                            item['video_info']):
                    checks[4] = True
                if video.video_type == VIDEO_TYPES.EPISODE:
                    sxe = '[. ][Ss]%02d[Ee]%02d[. ]' % (int(
                        video.season), int(video.episode))
                    if not re.search(sxe, item['name']):
                        if video.ep_airdate:
                            airdate_pattern = '[. ]%s[. ]%02d[. ]%02d[. ]' % (
                                video.ep_airdate.year, video.ep_airdate.month,
                                video.ep_airdate.day)
                            if not re.search(airdate_pattern, item['name']):
                                checks[5] = True

                if any(checks):
                    log_utils.log(
                        'Furk.net result excluded: %s - |%s|' %
                        (checks, item['name']), log_utils.LOGDEBUG)
                    continue

                match = re.search('(\d{3,})\s?x\s?(\d{3,})',
                                  item['video_info'])
                if match:
                    width, _ = match.groups()
                    quality = scraper_utils.width_get_quality(width)
                else:
                    if video.video_type == VIDEO_TYPES.MOVIE:
                        _, _, height, _ = scraper_utils.parse_movie_link(
                            item['name'])
                        quality = scraper_utils.height_get_quality(height)
                    elif video.video_type == VIDEO_TYPES.EPISODE:
                        _, _, _, height, _ = scraper_utils.parse_episode_link(
                            item['name'])
                        if int(height) > -1:
                            quality = scraper_utils.height_get_quality(height)
                        else:
                            quality = QUALITIES.HIGH
                    else:
                        quality = QUALITIES.HIGH

                stream_url = item['url_pls']
                host = self._get_direct_hostname(stream_url)
                hoster = {
                    'multi-part': False,
                    'class': self,
                    'views': None,
                    'url': stream_url,
                    'rating': None,
                    'host': host,
                    'quality': quality,
                    'direct': True
                }
                hoster['size'] = scraper_utils.format_size(
                    int(item['size']), 'B')
                hoster['extra'] = item['name']
                hosters.append(hoster)
        return hosters
コード例 #53
0
    def __get_links(self, url, video):
        hosters = []
        search_url, params = self.__translate_search(url)
        html = self._http_get(search_url, params=params, cache_limit=.5)
        js_result = scraper_utils.parse_json(html, search_url)
        down_url = js_result.get('downURL')
        dl_farm = js_result.get('dlFarm')
        dl_port = js_result.get('dlPort')
        for item in js_result.get('data', []):
            post_hash, size, post_title, ext, duration = item['0'], item[
                '4'], item['10'], item['11'], item['14']
            checks = [False] * 6
            if not scraper_utils.release_check(video, post_title):
                checks[0] = True
            if 'alangs' in item and item['alangs'] and 'eng' not in item[
                    'alangs']:
                checks[1] = True
            if re.match('^\d+s', duration) or re.match('^[0-5]m', duration):
                checks[2] = True
            if 'passwd' in item and item['passwd']: checks[3] = True
            if 'virus' in item and item['virus']: checks[4] = True
            if 'type' in item and item['type'].upper() != 'VIDEO':
                checks[5] = True
            if any(checks):
                logger.log(
                    'EasyNews Post excluded: %s - |%s|' % (checks, item),
                    log_utils.LOGDEBUG)
                continue

            stream_url = down_url + urllib.quote(
                '/%s/%s/%s%s/%s%s' %
                (dl_farm, dl_port, post_hash, ext, post_title, ext))
            stream_url = stream_url + '|Authorization=%s' % (urllib.quote(
                self.auth))
            host = scraper_utils.get_direct_hostname(self, stream_url)
            quality = None
            if 'width' in item:
                try:
                    width = int(item['width'])
                except:
                    width = 0
                if width:
                    quality = scraper_utils.width_get_quality(width)

            if quality is None:
                if video.video_type == VIDEO_TYPES.MOVIE:
                    meta = scraper_utils.parse_movie_link(post_title)
                else:
                    meta = scraper_utils.parse_episode_link(post_title)
                quality = scraper_utils.height_get_quality(meta['height'])

            if self.max_bytes:
                match = re.search('([\d.]+)\s+(.*)', size)
                if match:
                    size_bytes = scraper_utils.to_bytes(*match.groups())
                    if size_bytes > self.max_bytes:
                        logger.log(
                            'Result skipped, Too big: |%s| - %s (%s) > %s (%s GB)'
                            % (post_title, size_bytes, size, self.max_bytes,
                               self.max_gb))
                        continue

            hoster = {
                'multi-part': False,
                'class': self,
                'views': None,
                'url': stream_url,
                'rating': None,
                'host': host,
                'quality': quality,
                'direct': True
            }
            if any(i for i in ['X265', 'HEVC'] if i in post_title.upper()):
                hoster['format'] = 'x265'
            if size: hoster['size'] = size
            if post_title: hoster['extra'] = post_title
            hosters.append(hoster)
        return hosters
コード例 #54
0
    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)

        sources = {}
        for _attrs, fragment in dom_parser2.parse_dom(html, 'ul',
                                                      {'class': 'enlaces'}):
            for attrs, _content in dom_parser2.parse_dom(fragment,
                                                         'a',
                                                         req='href'):
                stream_url = attrs['href']
                if video.video_type == VIDEO_TYPES.MOVIE:
                    meta = scraper_utils.parse_movie_link(stream_url)
                else:
                    meta = scraper_utils.parse_episode_link(stream_url)
                sources.update({
                    stream_url: {
                        'quality':
                        scraper_utils.height_get_quality(meta['height']),
                        'direct': False
                    }
                })

        for _attrs, fragment in dom_parser2.parse_dom(
                html, 'div', {'class': 'movieplay'}) + dom_parser2.parse_dom(
                    html, 'div', {'id': re.compile('player\d+')}):
            for attrs, _content in dom_parser2.parse_dom(
                    fragment, 'iframe', req='src') + dom_parser2.parse_dom(
                        fragment, 'iframe', req='data-lazy-src'):
                iframe_url = attrs.get('src', '')
                if not iframe_url.startswith('http'):
                    iframe_url = attrs.get('data-lazy-src', '')
                    if not iframe_url.startswith('http'): continue

                if '//player' in iframe_url:
                    html = self._http_get(iframe_url,
                                          headers={'Referer': page_url},
                                          cache_limit=.5)
                    sources.update(scraper_utils.parse_sources_list(
                        self, html))
                else:
                    if video.video_type == VIDEO_TYPES.MOVIE:
                        meta = scraper_utils.parse_movie_link(iframe_url)
                    else:
                        meta = scraper_utils.parse_episode_link(iframe_url)
                    sources.update({
                        iframe_url: {
                            'quality':
                            scraper_utils.height_get_quality(meta['height']),
                            'direct':
                            False
                        }
                    })

        for stream_url, values in sources.iteritems():
            direct = values['direct']
            quality = values['quality']
            if direct:
                host = scraper_utils.get_direct_hostname(self, stream_url)
                stream_url += scraper_utils.append_headers(
                    {'User-Agent': scraper_utils.get_ua()})
            else:
                stream_url = stream_url
                host = urlparse.urlparse(stream_url).hostname

            hoster = {
                'multi-part': False,
                'url': stream_url,
                'class': self,
                'quality': quality,
                'host': host,
                'rating': None,
                'views': None,
                'direct': direct
            }
            hosters.append(hoster)

        return hosters
コード例 #55
0
    def _blog_proc_results(self, html, post_pattern, date_format, video_type,
                           title, year):
        results = []
        search_date = ''
        search_sxe = ''
        if video_type == VIDEO_TYPES.EPISODE:
            match = re.search('(.*?)\s*(S\d+E\d+)\s*', title)
            if match:
                show_title, search_sxe = match.groups()
            else:
                match = re.search(
                    '(.*?)\s*(\d{4})[._ -]?(\d{2})[._ -]?(\d{2})\s*', title)
                if match:
                    show_title, search_year, search_month, search_day = match.groups(
                    )
                    search_date = '%s-%s-%s' % (search_year, search_month,
                                                search_day)
                    search_date = scraper_utils.to_datetime(
                        search_date, "%Y-%m-%d").date()
                else:
                    show_title = title
        else:
            show_title = title

        today = datetime.date.today()
        for match in re.finditer(post_pattern, html, re.DOTALL):
            post_data = match.groupdict()
            post_title = post_data['post_title']
            post_title = re.sub('<[^>]*>', '', post_title)
            if 'quality' in post_data:
                post_title += '- [%s]' % (post_data['quality'])

            try:
                filter_days = int(
                    kodi.get_setting('%s-filter' % (self.get_name())))
            except ValueError:
                filter_days = 0
            if filter_days and date_format and 'date' in post_data:
                post_data['date'] = post_data['date'].strip()
                filter_days = datetime.timedelta(days=filter_days)
                post_date = scraper_utils.to_datetime(post_data['date'],
                                                      date_format).date()
                if not post_date:
                    logger.log(
                        'Failed date Check in %s: |%s|%s|%s|' %
                        (self.get_name(), post_data['date'], date_format),
                        log_utils.LOGWARNING)
                    post_date = today

                if today - post_date > filter_days:
                    continue

            match_year = ''
            match_date = ''
            match_sxe = ''
            match_title = full_title = post_title
            if video_type == VIDEO_TYPES.MOVIE:
                meta = scraper_utils.parse_movie_link(post_title)
                match_year = meta['year']
            else:
                meta = scraper_utils.parse_episode_link(post_title)
                match_sxe = 'S%02dE%02d' % (int(
                    meta['season']), int(meta['episode']))
                match_date = meta['airdate']

            match_title = meta['title']
            full_title = '%s (%sp) [%s]' % (meta['title'], meta['height'],
                                            meta['extra'])
            norm_title = scraper_utils.normalize_title(show_title)
            match_norm_title = scraper_utils.normalize_title(match_title)
            title_match = norm_title and (match_norm_title in norm_title
                                          or norm_title in match_norm_title)
            year_match = not year or not match_year or year == match_year
            sxe_match = not search_sxe or (search_sxe == match_sxe)
            date_match = not search_date or (search_date == match_date)
            logger.log(
                'Blog Results: |%s|%s|%s| - |%s|%s|%s| - |%s|%s|%s| - |%s|%s|%s| (%s)'
                % (match_norm_title, norm_title, title_match, year, match_year,
                   year_match, search_date, match_date, date_match, search_sxe,
                   match_sxe, sxe_match, self.get_name()), log_utils.LOGDEBUG)
            if title_match and year_match and date_match and sxe_match:
                quality = scraper_utils.height_get_quality(meta['height'])
                result = {
                    'url': scraper_utils.pathify_url(post_data['url']),
                    'title': scraper_utils.cleanse_title(full_title),
                    'year': match_year,
                    'quality': quality
                }
                results.append(result)
        return results