def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'movieplay'}): iframe_src = dom_parser2.parse_dom(fragment, 'iframe', req='src') if iframe_src: iframe_src = iframe_src[0].attrs['src'] if re.search('o(pen)?load', iframe_src, re.I): meta = scraper_utils.parse_movie_link(iframe_src) quality = scraper_utils.height_get_quality(meta['height']) links = {iframe_src: {'quality': quality, 'direct': False}} else: links = self.__get_links(iframe_src, url) for link in links: direct = links[link]['direct'] quality = links[link]['quality'] if direct: host = scraper_utils.get_direct_hostname(self, link) if host == 'gvideo': quality = scraper_utils.gv_get_quality(link) stream_url = link + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': url}) else: host = urlparse.urlparse(link).hostname stream_url = link source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct} hosters.append(source) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] if video_type == VIDEO_TYPES.TVSHOW and title: test_url = '/tv-show/%s/' % (scraper_utils.to_slug(title)) test_url = scraper_utils.urljoin(self.base_url, test_url) html = self._http_get(test_url, require_debrid=True, cache_limit=24) posts = dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}) if posts: result = {'url': scraper_utils.pathify_url(test_url), 'title': scraper_utils.cleanse_title(title), 'year': ''} results.append(result) elif video_type == VIDEO_TYPES.MOVIE: search_title = re.sub('[^A-Za-z0-9 ]', '', title.lower()) html = self._http_get(self.base_url, params={'s': search_title}, require_debrid=True, cache_limit=1) norm_title = scraper_utils.normalize_title(title) for _attrs, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}): match = re.search('<h\d+[^>]*>\s*<a\s+href="([^"]+)[^>]*>(.*?)</a>', post) if match: post_url, post_title = match.groups() if '/tv-show/' in post or self.__too_old(post): continue post_title = re.sub('<[^>]*>', '', post_title) meta = scraper_utils.parse_movie_link(post_title) full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height']) match_year = meta['year'] match_norm_title = scraper_utils.normalize_title(meta['title']) if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = '/search/' + urllib.quote_plus(title) html = self._http_get(search_url, require_debrid=True, cache_limit=1) if video_type == VIDEO_TYPES.TVSHOW: seen_urls = {} for _attr, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}): if CATEGORIES[video_type] not in post: continue match = re.search('<span>\s*TAGS:\s*</span>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', post, re.I) if match: show_url, match_title = match.groups() if show_url in seen_urls: continue result = {'url': scraper_utils.pathify_url(show_url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''} seen_urls[show_url] = result results.append(result) elif video_type == VIDEO_TYPES.MOVIE: norm_title = scraper_utils.normalize_title(title) headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html) posts = [result.content for result in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})] for heading, post in zip(headings, posts): if CATEGORIES[video_type] not in post or self.__too_old(post): continue post_url, post_title = heading meta = scraper_utils.parse_movie_link(post_title) full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height']) match_year = meta['year'] match_norm_title = scraper_utils.normalize_title(meta['title']) if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year} results.append(result) return results
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'post-cont'}) if not fragment: return hosters match = re.search('<p>\s*<strong>(.*?)<script', fragment[0].content, re.DOTALL) if not match: return hosters for attrs, _content in dom_parser2.parse_dom(match.group(1), 'a', req='href'): stream_url = attrs['href'] if scraper_utils.excluded_link(stream_url): continue if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(stream_url) else: meta = scraper_utils.parse_episode_link(stream_url) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, scraper_utils.height_get_quality(meta['height'])) hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False} hosters.append(hoster) return hosters
def search(self, video_type, title, year): results = [] norm_title = scraper_utils.normalize_title(title) if video_type == VIDEO_TYPES.MOVIE: if year: base_url = urlparse.urljoin(self.base_url, '/Film/') html = self._http_get(base_url, cache_limit=48) for link in self.__parse_directory(html): if year == link['title']: url = urlparse.urljoin(base_url, link['link']) for movie in self.__get_files(url, cache_limit=24): match_title, match_year, _height, _extra = scraper_utils.parse_movie_link(movie['link']) if not movie['directory'] and norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(url), 'title': match_title, 'year': year} results.append(result) else: base_url = urlparse.urljoin(self.base_url, '/Serial/') html = self._http_get(base_url, cache_limit=48) for link in self.__parse_directory(html): if link['directory'] and norm_title in scraper_utils.normalize_title(link['title']): url = urlparse.urljoin(base_url, link['link']) result = {'url': scraper_utils.pathify_url(url), 'title': link['title'], 'year': ''} results.append(result) return results
def search(self, video_type, title, year, season=""): results = [] norm_title = scraper_utils.normalize_title(title) if video_type == VIDEO_TYPES.MOVIE: if year: base_url = urlparse.urljoin(self.base_url, "/Film/") html = self._http_get(base_url, cache_limit=48) for link in self.__parse_directory(html): if year == link["title"]: url = urlparse.urljoin(base_url, link["link"]) for movie in self.__get_files(url, cache_limit=24): match_title, match_year, _height, _extra = scraper_utils.parse_movie_link(movie["link"]) if ( not movie["directory"] and norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year) ): result = {"url": scraper_utils.pathify_url(url), "title": match_title, "year": year} results.append(result) else: base_url = urlparse.urljoin(self.base_url, "/Serial/") html = self._http_get(base_url, cache_limit=48) for link in self.__parse_directory(html): if link["directory"] and norm_title in scraper_utils.normalize_title(link["title"]): url = urlparse.urljoin(base_url, link["link"]) result = {"url": scraper_utils.pathify_url(url), "title": link["title"], "year": ""} results.append(result) return results
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'div', {'class': '[^"]*screen[^"]*'}) if fragment: js_src = dom_parser.parse_dom(fragment[0], 'script', ret='src') if js_src: js_url = urlparse.urljoin(self.base_url, js_src[0]) html = self._http_get(js_url, cache_limit=.5) else: html = fragment[0] for match in re.finditer('<source[^>]+src="([^"]+)', html): stream_url = match.group(1) host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: _, _, height, _ = scraper_utils.parse_movie_link(stream_url) quality = scraper_utils.height_get_quality(height) stream_url += '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = '/search/' + urllib.quote_plus(title) html = self._http_get(search_url, require_debrid=False, cache_limit=1) if video_type == VIDEO_TYPES.TVSHOW: seen_urls = {} for _attr, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}): if CATEGORIES[video_type] not in post: continue match = re.search('<span>\s*TAGS:\s*</span>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', post, re.I) if match: show_url, match_title = match.groups() if show_url in seen_urls: continue result = {'url': scraper_utils.pathify_url(show_url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''} seen_urls[show_url] = result results.append(result) elif video_type == VIDEO_TYPES.MOVIE: norm_title = scraper_utils.normalize_title(title) headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html) posts = [result.content for result in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})] for heading, post in zip(headings, posts): if CATEGORIES[video_type] not in post or self.__too_old(post): continue post_url, post_title = heading meta = scraper_utils.parse_movie_link(post_title) full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height']) match_year = meta['year'] match_norm_title = scraper_utils.normalize_title(meta['title']) if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year} results.append(result) return results
def __get_links(self, url, video): hosters = [] seen_urls = set() for search_type in SEARCH_TYPES: search_url, params = self.__translate_search(url, search_type) if not search_url: continue html = self._http_get(search_url, params=params, cache_limit=.5) js_result = scraper_utils.parse_json(html, search_url) if js_result.get('status') != 'success': logger.log('Alluc API Error: |%s|%s|: %s' % (search_url, params, js_result.get('message', 'Unknown Error')), log_utils.LOGWARNING) continue for result in js_result['result']: stream_url = result['hosterurls'][0]['url'] if len(result['hosterurls']) > 1: continue if result['extension'] == 'rar': continue if stream_url in seen_urls: continue if scraper_utils.release_check(video, result['title']): host = urlparse.urlsplit(stream_url).hostname quality = scraper_utils.get_quality(video, host, self._get_title_quality(result['title'])) hoster = {'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': False} hoster['extra'] = scraper_utils.cleanse_title(result['title']) if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(hoster['extra']) else: meta = scraper_utils.parse_episode_link(hoster['extra']) if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) seen_urls.add(stream_url) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] norm_title = scraper_utils.normalize_title(video.title) if source_url and source_url != FORCE_NO_MATCH: source_url = urlparse.urljoin(self.base_url2, source_url) for line in self._get_files(source_url, cache_limit=24): if not line['directory']: match = {} if video.video_type == VIDEO_TYPES.MOVIE: match_title, _match_year, height, extra = scraper_utils.parse_movie_link(line['link']) if norm_title in scraper_utils.normalize_title(match_title): match = line else: _show_title, season, episode, height, extra = scraper_utils.parse_episode_link(line['link']) if int(video.season) == int(season) and int(video.episode) == int(episode): match = line if 'dubbed' in extra.lower(): continue if match: stream_url = match['url'] + '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': scraper_utils.height_get_quality(height), 'views': None, 'rating': None, 'url': stream_url, 'direct': True} if 'x265' in extra: hoster['format'] = 'x265' if 'size' in match: hoster['size'] = scraper_utils.format_size(int(match['size'])) hosters.append(hoster) return hosters
def __get_links_from_json2(self, url, page_url, video_type): sources = {} headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(url, headers=headers, cache_limit=0) js_data = scraper_utils.parse_json(html, url) try: playlist = js_data.get('playlist', []) for source in playlist[0].get('sources', []): stream_url = source['file'] if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: if video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(stream_url) else: meta = scraper_utils.parse_episode_link(stream_url) quality = scraper_utils.height_get_quality(meta['height']) sources[stream_url] = {'quality': quality, 'direct': True} logger.log('Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG) except Exception as e: logger.log('Exception during yesmovies extract: %s' % (e), log_utils.LOGDEBUG) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) sources = self.__get_post_links(html) for source, value in sources.iteritems(): if scraper_utils.excluded_link(source): continue host = urlparse.urlparse(source).hostname if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(value['release']) else: meta = scraper_utils.parse_episode_link(value['release']) quality = scraper_utils.height_get_quality(meta['height']) hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': quality, 'direct': False } if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): results = [] norm_title = scraper_utils.normalize_title(title) if video_type == VIDEO_TYPES.MOVIE: if year: base_url = urlparse.urljoin(self.base_url, '/Film/') html = self._http_get(base_url, cache_limit=48) for link in self.__parse_directory(html): if year == link['title']: url = urlparse.urljoin(base_url, link['link']) for movie in self.__get_files(url, cache_limit=24): match_title, match_year, _height, _extra = scraper_utils.parse_movie_link(movie['link']) if not movie['directory'] and norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(url), 'title': match_title, 'year': year} results.append(result) else: base_url = urlparse.urljoin(self.base_url, '/Serial/') html = self._http_get(base_url, cache_limit=48) for link in self.__parse_directory(html): if link['directory'] and norm_title in scraper_utils.normalize_title(link['title']): url = urlparse.urljoin(base_url, link['link']) result = {'url': scraper_utils.pathify_url(url), 'title': link['title'], 'year': ''} results.append(result) return results
def __get_videos(self, contents, video): videos = [] for key in contents: item = contents[key] if item['type'].lower() == 'dir': videos += self.__get_videos(item['children'], video) else: if item['ext'].upper() in VIDEO_EXT and int( item['size']) > (100 * 1024 * 1024): if video.video_type == VIDEO_TYPES.MOVIE: _, _, height, _ = scraper_utils.parse_movie_link( item['name']) else: _, _, _, height, _ = scraper_utils.parse_episode_link( item['name']) video = { 'name': item['name'], 'size': item['size'], 'url': item['url'], 'quality': scraper_utils.height_get_quality(height) } videos.append(video) if item['stream'] is not None: if int(height) > 720: height = 720 video = { 'name': '(Transcode) %s' % (item['name']), 'url': item['stream'], 'quality': scraper_utils.height_get_quality(height) } videos.append(video) return videos
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) for attrs, _content in dom_parser2.parse_dom(html, 'a', req='href'): stream_url = attrs['href'] if MOVIE_URL in stream_url: meta = scraper_utils.parse_movie_link(stream_url) stream_url = scraper_utils.pathify_url( stream_url) + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) quality = scraper_utils.height_get_quality(meta['height']) hoster = { 'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] norm_title = scraper_utils.normalize_title(video.title) if source_url and source_url != FORCE_NO_MATCH: source_url = urlparse.urljoin(self.base_url, source_url) for line in self._get_files(source_url, cache_limit=24): if not line['directory']: match = {} if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(line['link']) if norm_title in scraper_utils.normalize_title(meta['title']): match = line elif self.__episode_match(line, video): match = line meta = scraper_utils.parse_episode_link(line['link']) if match: if meta['dubbed']: continue stream_url = match['url'] + '|User-Agent=%s' % (scraper_utils.get_ua()) stream_url = stream_url.replace(self.base_url, '') quality = scraper_utils.height_get_quality(meta['height']) hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} if 'format' in meta: hoster['format'] = meta['format'] if 'size' in match: hoster['size'] = scraper_utils.format_size(int(match['size'])) hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] norm_title = scraper_utils.normalize_title(video.title) if source_url and source_url != FORCE_NO_MATCH: source_url = urlparse.urljoin(self.base_url, source_url) for line in self.__get_files(source_url, cache_limit=24): if not line['directory']: match = {} if video.video_type == VIDEO_TYPES.MOVIE: match_title, _match_year, height, extra = scraper_utils.parse_movie_link(line['link']) if norm_title in scraper_utils.normalize_title(match_title): match = line else: _show_title, season, episode, height, extra = scraper_utils.parse_episode_link(line['link']) if int(video.season) == int(season) and int(video.episode) == int(episode): match = line if 'dubbed' in extra.lower(): continue if match: stream_url = match['url'] + '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': scraper_utils.height_get_quality(height), 'views': None, 'rating': None, 'url': stream_url, 'direct': True} if 'x265' in extra: hoster['format'] = 'x265' if 'size' in match: hoster['size'] = scraper_utils.format_size(int(match['size'])) hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=False, cache_limit=.5) title = dom_parser2.parse_dom(html, 'meta', {'property': 'og:title'}, req='content') meta = scraper_utils.parse_movie_link( title[0].attrs['content']) if title else {} fragment = dom_parser2.parse_dom(html, 'p', {'class': 'download_message'}) if fragment: for attrs, _content in dom_parser2.parse_dom(fragment[0].content, 'a', req='href'): source = attrs['href'] if scraper_utils.excluded_link(source): continue host = urlparse.urlparse(source).hostname quality = scraper_utils.height_get_quality( meta.get('height', 480)) hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': quality, 'direct': True } if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) return hosters
def __get_links_from_json2(self, url, page_url, video_type): sources = {} headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(url, headers=headers, cache_limit=0) js_data = scraper_utils.parse_json(html, url) try: playlist = js_data.get('playlist', []) for source in playlist[0].get('sources', []): stream_url = source['file'] if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: if video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(stream_url) else: meta = scraper_utils.parse_episode_link(stream_url) quality = scraper_utils.height_get_quality(meta['height']) sources[stream_url] = {'quality': quality, 'direct': True} logger.log( 'Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG) except Exception as e: logger.log('Exception during yesmovies extract: %s' % (e), log_utils.LOGDEBUG) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'div', {'class': '[^"]*screen[^"]*'}) if fragment: js_src = dom_parser.parse_dom(fragment[0], 'script', ret='src') if js_src: js_url = urlparse.urljoin(self.base_url, js_src[0]) html = self._http_get(js_url, cache_limit=.5) else: html = fragment[0] for match in re.finditer('<source[^>]+src="([^"]+)', html): stream_url = match.group(1) host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'blogspot' in stream_url: quality = scraper_utils.gv_get_quality(stream_url) else: _, _, height, _ = scraper_utils.parse_movie_link(stream_url) quality = scraper_utils.height_get_quality(height) stream_url += '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def __get_links(self, url, video): hosters = [] search_url = urlparse.urljoin(self.base_url, SEARCH_URL) query = self.__translate_search(url) result = self._http_get(search_url, data=query, allow_redirect=False, cache_limit=.5) if 'files' in result: for item in result['files']: checks = [False] * 6 if 'type' not in item or item['type'].upper() != 'VIDEO': checks[0] = True if 'is_ready' in item and item['is_ready'] != '1': checks[1] = True if 'av_result' in item and item['av_result'] in ['warning', 'infected']: checks[2] = True if 'video_info' not in item: checks[3] = True if 'video_info' in item and item['video_info'] and not re.search('#0:(?:0|1)(?:\(eng\)|\(und\))?:\s*Audio:', item['video_info']): checks[4] = True if video.video_type == VIDEO_TYPES.EPISODE: sxe = '[. ][Ss]%02d[Ee]%02d[. ]' % (int(video.season), int(video.episode)) if not re.search(sxe, item['name']): if video.ep_airdate: airdate_pattern = '[. ]%s[. ]%02d[. ]%02d[. ]' % (video.ep_airdate.year, video.ep_airdate.month, video.ep_airdate.day) if not re.search(airdate_pattern, item['name']): checks[5] = True if any(checks): log_utils.log('Furk.net result excluded: %s - |%s|' % (checks, item['name']), log_utils.LOGDEBUG) continue match = re.search('(\d{3,})\s?x\s?(\d{3,})', item['video_info']) if match: width, _ = match.groups() quality = scraper_utils.width_get_quality(width) else: if video.video_type == VIDEO_TYPES.MOVIE: _, _, height, _ = scraper_utils.parse_movie_link(item['name']) quality = scraper_utils.height_get_quality(height) elif video.video_type == VIDEO_TYPES.EPISODE: _, _, _, height, _ = scraper_utils.parse_episode_link(item['name']) if int(height) > -1: quality = scraper_utils.height_get_quality(height) else: quality = QUALITIES.HIGH else: quality = QUALITIES.HIGH if 'url_pls' in item: size_gb = scraper_utils.format_size(int(item['size']), 'B') if self.max_bytes and int(item['size']) > self.max_bytes: log_utils.log('Result skipped, Too big: |%s| - %s (%s) > %s (%sGB)' % (item['name'], item['size'], size_gb, self.max_bytes, self.max_gb)) continue stream_url = item['url_pls'] host = self._get_direct_hostname(stream_url) hoster = {'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': True} hoster['size'] = size_gb hoster['extra'] = item['name'] hosters.append(hoster) else: log_utils.log('Furk.net result skipped - no playlist: |%s|' % (json.dumps(item)), log_utils.LOGDEBUG) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] host_count = {} if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) q_str = '' match = re.search('>Release.*?td_col">([^<]+)', html) if match: q_str = match.group(1).upper() size = '' match = re.search('>Size.*?td_col">([^<]+)', html) if match: size = match.group(1).upper() fragment = dom_parser.parse_dom(html, 'table', {'id': 'download_table'}) if fragment: for match in re.finditer('''href=['"]([^'"]+)''', fragment[0]): stream_url = match.group(1) if re.search('\.rar(\.|$)', stream_url): continue host = urlparse.urlsplit(stream_url).hostname if q_str: if video.video_type == VIDEO_TYPES.EPISODE: _title, _season, _episode, height, _extra = scraper_utils.parse_episode_link( q_str) else: _title, _year, height, _extra = scraper_utils.parse_movie_link( q_str) quality = scraper_utils.height_get_quality(height) else: quality = QUALITY_MAP.get( match.group(1).upper(), QUALITIES.HIGH) quality = scraper_utils.get_quality(video, host, quality) host_count[host] = host_count.get(host, 0) + 1 hoster = { 'multi-part': False, 'class': self, 'host': host, 'quality': quality, 'views': None, 'url': stream_url, 'rating': None, 'direct': False } if size: hoster['size'] = size hosters.append(hoster) new_hosters = [ hoster for hoster in hosters if host_count[hoster['host']] <= 1 ] return new_hosters
def __get_quality(self, item, video): if 'width' in item: return scraper_utils.width_get_quality(item['width']) elif 'height' in item: return scraper_utils.height_get_quality(item['height']) else: if video.video_type == VIDEO_TYPES.MOVIE: _title, _year, height, _extra = scraper_utils.parse_movie_link(item['name']) else: _title, _season, _episode, height, _extra = scraper_utils.parse_episode_link(item['name']) return scraper_utils.height_get_quality(height)
def __get_links(self, url, video): hosters = [] search_url = self.__translate_search(url) html = self._http_get(search_url, cache_limit=.5) js_result = scraper_utils.parse_json(html, search_url) down_url = js_result.get('downURL') dl_farm = js_result.get('dlFarm') dl_port = js_result.get('dlPort') for item in js_result.get('data', []): post_hash, size, post_title, ext, duration = item['0'], item['4'], item['10'], item['11'], item['14'] checks = [False] * 6 if not scraper_utils.release_check(video, post_title): checks[0] = True if 'alangs' in item and item['alangs'] and 'eng' not in item['alangs']: checks[1] = True if re.match('^\d+s', duration) or re.match('^[0-5]m', duration): checks[2] = True if 'passwd' in item and item['passwd']: checks[3] = True if 'virus' in item and item['virus']: checks[4] = True if 'type' in item and item['type'].upper() != 'VIDEO': checks[5] = True if any(checks): log_utils.log('EasyNews Post excluded: %s - |%s|' % (checks, item), log_utils.LOGDEBUG) continue stream_url = down_url + urllib.quote('/%s/%s/%s%s/%s%s' % (dl_farm, dl_port, post_hash, ext, post_title, ext)) stream_url = stream_url + '|Authorization=%s' % (urllib.quote(self.auth)) host = self._get_direct_hostname(stream_url) quality = None if 'width' in item: try: width = int(item['width']) except: width = 0 if width: quality = scraper_utils.width_get_quality(width) if quality is None: if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(post_title) else: meta = scraper_utils.parse_episode_link(post_title) quality = scraper_utils.height_get_quality(meta['height']) if self.max_bytes: match = re.search('([\d.]+)\s+(.*)', size) if match: size_bytes = scraper_utils.to_bytes(*match.groups()) if size_bytes > self.max_bytes: log_utils.log('Result skipped, Too big: |%s| - %s (%s) > %s (%s GB)' % (post_title, size_bytes, size, self.max_bytes, self.max_gb)) continue hoster = {'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': True} if any(i for i in ['X265', 'HEVC'] if i in post_title.upper()): hoster['format'] = 'x265' if size: hoster['size'] = size if post_title: hoster['extra'] = post_title hosters.append(hoster) return hosters
def __get_quality(self, item, video): if item.get('width'): return scraper_utils.width_get_quality(item['width']) elif item.get('height'): return scraper_utils.height_get_quality(item['height']) elif 'name' in item: if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(item['name']) else: meta = scraper_utils.parse_episode_link(item['name']) return scraper_utils.height_get_quality(meta['height']) else: return QUALITIES.HIGH
def __get_quality(self, item, video): if 'width' in item: return scraper_utils.width_get_quality(item['width']) elif 'height' in item: return scraper_utils.height_get_quality(item['height']) else: if video.video_type == VIDEO_TYPES.MOVIE: _title, _year, height, _extra = scraper_utils.parse_movie_link( item['name']) else: _title, _season, _episode, height, _extra = scraper_utils.parse_episode_link( item['name']) return scraper_utils.height_get_quality(height)
def __get_quality(self, item, video): if 'width' in item and item['width']: return scraper_utils.width_get_quality(item['width']) elif 'height' in item and item['height']: return scraper_utils.height_get_quality(item['height']) elif 'name' in item: if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(item['name']) else: meta = scraper_utils.parse_episode_link(item['name']) return scraper_utils.height_get_quality(meta['height']) else: return QUALITIES.HIGH
def __movie_search(self, title, year): results = [] norm_title = scraper_utils.normalize_title(title) html = self._http_get(self.base_url, cache_limit=48) for item in self._parse_directory(html): if not item['directory']: match_title, match_year, height, extra = scraper_utils.parse_movie_link(item['title']) if 'dubbed' in extra.lower(): continue if (norm_title in scraper_utils.normalize_title(match_title)) and (not year or not match_year or year == match_year): match_title = match_title.replace('.', ' ') match_title += ' [%sp.%s]' % (height, extra) result = {'url': scraper_utils.pathify_url(item['link']), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def __movie_search(self, title, year): results = [] norm_title = scraper_utils.normalize_title(title) html = self._http_get(self.base_url, cache_limit=48) for item in self._parse_directory(html): if not item['directory']: meta = scraper_utils.parse_movie_link(item['title']) if meta['dubbed']: continue if (norm_title in scraper_utils.normalize_title(meta['title'])) and (not year or not meta['year'] or year == meta['year']): match_title = meta['title'].replace('.', ' ') match_title += ' [%sp.%s]' % (meta['height'], meta['extra']) result = {'url': scraper_utils.pathify_url(item['link']), 'title': scraper_utils.cleanse_title(match_title), 'year': meta['year']} results.append(result) return results
def __movie_search(self, title, year): results = [] norm_title = scraper_utils.normalize_title(title) html = self._http_get(self.base_url, cache_limit=48) for item in scraper_utils.parse_directory(self, html): if not item['directory']: meta = scraper_utils.parse_movie_link(item['title']) if meta['dubbed']: continue if (norm_title in scraper_utils.normalize_title(meta['title'])) and (not year or not meta['year'] or year == meta['year']): match_title = meta['title'].replace('.', ' ') match_title += ' [%sp.%s]' % (meta['height'], meta['extra']) result = {'url': scraper_utils.pathify_url(item['link']), 'title': scraper_utils.cleanse_title(match_title), 'year': meta['year']} results.append(result) return results
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, require_debrid=True, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'table', {'class': 'links-table'}) if not fragment: return hosters for _attrs, row in dom_parser2.parse_dom(fragment[0].content, 'tr'): match = re.search( "playVideo\.bind\(.*?'([^']+)(?:[^>]*>){2}(.*?)</td>", row, re.DOTALL) if not match: continue stream_url, release = match.groups() if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': sources = scraper_utils.parse_google(self, stream_url) else: sources = [stream_url] for source in sources: host = scraper_utils.get_direct_hostname(self, source) if host == 'gvideo': quality = scraper_utils.gv_get_quality(source) direct = True else: host = urlparse.urlparse(source).hostname if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(release) else: meta = scraper_utils.parse_episode_link(release) base_quality = scraper_utils.height_get_quality( meta['height']) quality = scraper_utils.get_quality( video, host, base_quality) direct = False hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': direct } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters headers = { 'User-Agent': scraper_utils.get_ua(), 'Referer': self.base_url + source_url } if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(source_url) stream_url = source_url + scraper_utils.append_headers(headers) quality = scraper_utils.height_get_quality(meta['height']) hoster = { 'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) else: for episode in self.__match_episode(source_url, video): meta = scraper_utils.parse_episode_link(episode['title']) stream_url = episode['url'] + scraper_utils.append_headers( headers) stream_url = stream_url.replace(self.base_url, '') quality = scraper_utils.height_get_quality(meta['height']) hoster = { 'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } if 'format' in meta: hoster['format'] = meta['format'] if 'size' in episode: hoster['size'] = scraper_utils.format_size( int(episode['size'])) hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) for attrs, _content in dom_parser2.parse_dom(html, 'a', req='href'): stream_url = attrs['href'] if MOVIE_URL in stream_url: meta = scraper_utils.parse_movie_link(stream_url) stream_url = scraper_utils.pathify_url(stream_url) + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) quality = scraper_utils.height_get_quality(meta['height']) hoster = {'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if source_url and source_url != FORCE_NO_MATCH: if video.video_type == VIDEO_TYPES.MOVIE: _title, _year, height, extra = scraper_utils.parse_movie_link( source_url) stream_url = self.base_url + source_url + '|User-Agent=%s' % ( scraper_utils.get_ua()) hoster = { 'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': scraper_utils.height_get_quality(height), 'views': None, 'rating': None, 'url': stream_url, 'direct': True } if 'x265' in extra: hoster['format'] = 'x265' hosters.append(hoster) else: for episode in self.__match_episode(source_url, video): _show_title, _season, _episode, height, extra = scraper_utils.parse_episode_link( episode['title']) stream_url = urlparse.urljoin( self.base_url, episode['url']) + '|User-Agent=%s' % ( scraper_utils.get_ua()) hoster = { 'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': scraper_utils.height_get_quality(height), 'views': None, 'rating': None, 'url': stream_url, 'direct': True } if 'x265' in extra: hoster['format'] = 'x265' if 'size' in episode: hoster['size'] = scraper_utils.format_size( int(episode['size'])) hosters.append(hoster) return hosters
def __get_links(self, url, video): hosters = [] search_url = scraper_utils.urljoin(self.base_url, SEARCH_URL) query = self.__translate_search(url) result = self._http_get(search_url, data=query, allow_redirect=False, cache_limit=.5) for item in result.get('files', []): checks = [False] * 6 if item.get('type', '').upper() != 'VIDEO': checks[0] = True if item.get('is_ready') != '1': checks[1] = True if item.get('av_result') in ['warning', 'infected']: checks[2] = True if 'video_info' not in item: checks[3] = True if item.get('video_info') and not re.search('#0:(0|1)(\((eng|und)\))?:\s*Audio:', item['video_info'], re.I): checks[4] = True if not scraper_utils.release_check(video, item['name']): checks[5] = True if any(checks): logger.log('Furk.net result excluded: %s - |%s|' % (checks, item['name']), log_utils.LOGDEBUG) continue match = re.search('(\d{3,})\s*x\s*(\d{3,})', item['video_info']) if match: width, _height = match.groups() quality = scraper_utils.width_get_quality(width) else: if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(item['name']) else: meta = scraper_utils.parse_episode_link(item['name']) quality = scraper_utils.height_get_quality(meta['height']) if 'url_pls' in item: size_gb = scraper_utils.format_size(int(item['size']), 'B') if self.max_bytes and int(item['size']) > self.max_bytes: logger.log('Result skipped, Too big: |%s| - %s (%s) > %s (%sGB)' % (item['name'], item['size'], size_gb, self.max_bytes, self.max_gb)) continue stream_url = item['url_pls'] host = scraper_utils.get_direct_hostname(self, stream_url) hoster = {'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': True} hoster['size'] = size_gb hoster['extra'] = item['name'] hosters.append(hoster) else: logger.log('Furk.net result skipped - no playlist: |%s|' % (json.dumps(item)), log_utils.LOGDEBUG) return hosters
def __get_links(self, url, video): hosters = [] search_url = self.__translate_search(url) html = self._http_get(search_url, cache_limit=.5) js_result = scraper_utils.parse_json(html, search_url) if 'data' in js_result: for item in js_result['data']: post_hash, size, post_title, ext, duration = item['0'], item['4'], item['10'], item['11'], item['14'] checks = [False] * 6 if not scraper_utils.title_check(video, post_title): checks[0] = True if 'alangs' in item and item['alangs'] and 'eng' not in item['alangs']: checks[1] = True if re.match('^\d+s', duration) or re.match('^[0-5]m', duration): checks[2] = True if 'passwd' in item and item['passwd']: checks[3] = True if 'virus' in item and item['virus']: checks[4] = True if 'type' in item and item['type'].upper() != 'VIDEO': checks[5] = True if any(checks): log_utils.log('EasyNews Post excluded: %s - |%s|' % (checks, item), log_utils.LOGDEBUG) continue stream_url = urllib.quote('%s%s/%s%s' % (post_hash, ext, post_title, ext)) stream_url = 'http://members.easynews.com/dl/%s' % (stream_url) stream_url = stream_url + '|Cookie=%s' % (self._get_stream_cookies()) host = self._get_direct_hostname(stream_url) quality = None if 'width' in item: try: width = int(item['width']) except: width = 0 if width: quality = scraper_utils.width_get_quality(width) if quality is None: if video.video_type == VIDEO_TYPES.MOVIE: _title, _year, height, _extra = scraper_utils.parse_movie_link(post_title) else: _title, _season, _episode, height, _extra = scraper_utils.parse_episode_link(post_title) quality = scraper_utils.height_get_quality(height) hoster = {'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': True} if any(i for i in ['X265', 'HEVC'] if i in post_title.upper()): hoster['format'] = 'x265' if size: hoster['size'] = size if post_title: hoster['extra'] = post_title hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) sources = self.__get_post_links(html) for source, value in sources.iteritems(): if scraper_utils.excluded_link(source): continue host = urlparse.urlparse(source).hostname if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(value['release']) else: meta = scraper_utils.parse_episode_link(value['release']) quality = scraper_utils.height_get_quality(meta['height']) hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': quality, 'direct': False} if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) title = dom_parser2.parse_dom(html, 'meta', {'property': 'og:title'}, req='content') meta = scraper_utils.parse_movie_link(title[0].attrs['content']) if title else {} fragment = dom_parser2.parse_dom(html, 'p', {'class': 'download_message'}) if fragment: for attrs, _content in dom_parser2.parse_dom(fragment[0].content, 'a', req='href'): source = attrs['href'] if scraper_utils.excluded_link(source): continue host = urlparse.urlparse(source).hostname quality = scraper_utils.height_get_quality(meta.get('height', 480)) hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': quality, 'direct': False} if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if source_url and source_url != FORCE_NO_MATCH: if video.video_type == VIDEO_TYPES.MOVIE: _title, _year, height, extra = scraper_utils.parse_movie_link(source_url) stream_url = self.base_url + source_url + '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': scraper_utils.height_get_quality(height), 'views': None, 'rating': None, 'url': stream_url, 'direct': True} if 'x265' in extra: hoster['format'] = 'x265' hosters.append(hoster) else: for episode in self.__match_episode(source_url, video): _show_title, _season, _episode, height, extra = scraper_utils.parse_episode_link(episode['title']) stream_url = urlparse.urljoin(self.base_url, episode['url']) + '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': scraper_utils.height_get_quality(height), 'views': None, 'rating': None, 'url': stream_url, 'direct': True} if 'x265' in extra: hoster['format'] = 'x265' if 'size' in episode: hoster['size'] = scraper_utils.format_size(int(episode['size'])) hosters.append(hoster) return hosters
def __get_videos(self, contents, video): videos = [] for key in contents: item = contents[key] if item['type'].lower() == 'dir': videos += self.__get_videos(item['children'], video) else: if item['ext'].upper() in VIDEO_EXT and int(item['size']) > (100 * 1024 * 1024): if video.video_type == VIDEO_TYPES.MOVIE: _, _, height, _ = scraper_utils.parse_movie_link(item['name']) else: _, _, _, height, _ = scraper_utils.parse_episode_link(item['name']) video = {'name': item['name'], 'size': item['size'], 'url': item['url'], 'quality': scraper_utils.height_get_quality(height)} videos.append(video) if item['stream'] is not None: if int(height) > 720: height = 720 video = {'name': '(Transcode) %s' % (item['name']), 'url': item['stream'], 'quality': scraper_utils.height_get_quality(height)} videos.append(video) return videos
def get_sources(self, video): source_url = self.get_url(video) hosters = [] host_count = {} if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) q_str = '' match = re.search('>Release.*?td_col">([^<]+)', html) if match: q_str = match.group(1).upper() size = '' match = re.search('>Size.*?td_col">([^<]+)', html) if match: size = match.group(1).upper() fragment = dom_parser.parse_dom(html, 'table', {'id': 'download_table'}) if fragment: for match in re.finditer('''href=['"]([^'"]+)''', fragment[0]): stream_url = match.group(1) if re.search('\.rar(\.|$)', stream_url): continue host = urlparse.urlsplit(stream_url).hostname if q_str: if video.video_type == VIDEO_TYPES.EPISODE: _title, _season, _episode, height, _extra = scraper_utils.parse_episode_link(q_str) else: _title, _year, height, _extra = scraper_utils.parse_movie_link(q_str) quality = scraper_utils.height_get_quality(height) else: quality = QUALITY_MAP.get(match.group(1).upper(), QUALITIES.HIGH) quality = scraper_utils.get_quality(video, host, quality) host_count[host] = host_count.get(host, 0) + 1 hoster = {'multi-part': False, 'class': self, 'host': host, 'quality': quality, 'views': None, 'url': stream_url, 'rating': None, 'direct': False} if size: hoster['size'] = size hosters.append(hoster) new_hosters = [hoster for hoster in hosters if host_count[hoster['host']] <= 1] return new_hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) post = dom_parser2.parse_dom(html, 'div', {'class': 'entry-content'}) if not post: return hosters for match in re.finditer('(?:href="|>)(https?://[^"<]+)', post[0].content): stream_url = match.group(1) if scraper_utils.excluded_link(stream_url) or 'imdb.com' in stream_url: continue host = urlparse.urlparse(stream_url).hostname if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(stream_url) else: meta = scraper_utils.parse_episode_link(stream_url) quality = scraper_utils.height_get_quality(meta['height']) hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) post = dom_parser.parse_dom(html, 'div', {'class': 'entry-content'}) if post: for p in dom_parser.parse_dom(post[0], 'p'): for match in re.finditer('href="([^"]+)[^>]+>([^<]+)', p): stream_url, q_str = match.groups() if re.search('\.part\.?\d+', q_str, re.I) or '.rar' in q_str or 'sample' in q_str or q_str.endswith('.nfo'): continue host = urlparse.urlparse(stream_url).hostname if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(q_str) else: meta = scraper_utils.parse_episode_link(q_str) quality = scraper_utils.height_get_quality(meta['height']) hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False} if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] norm_title = scraper_utils.normalize_title(video.title) if source_url and source_url != FORCE_NO_MATCH: source_url = urlparse.urljoin(self.base_url, source_url) for line in self.__get_files(source_url, cache_limit=24): if not line["directory"]: match = {} if video.video_type == VIDEO_TYPES.MOVIE: match_title, _match_year, height, extra = scraper_utils.parse_movie_link(line["link"]) if norm_title in scraper_utils.normalize_title(match_title): match = line else: _show_title, season, episode, height, extra = scraper_utils.parse_episode_link(line["link"]) if int(video.season) == int(season) and int(video.episode) == int(episode): match = line if "dubbed" in extra.lower(): continue if match: stream_url = match["url"] + "|User-Agent=%s" % (scraper_utils.get_ua()) hoster = { "multi-part": False, "host": self._get_direct_hostname(stream_url), "class": self, "quality": scraper_utils.height_get_quality(height), "views": None, "rating": None, "url": stream_url, "direct": True, } if "x265" in extra: hoster["format"] = "x265" if "size" in match: hoster["size"] = scraper_utils.format_size(int(match["size"])) hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if source_url and source_url != FORCE_NO_MATCH: if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(source_url) stream_url = source_url + '|User-Agent=%s' % (scraper_utils.get_ua()) quality = scraper_utils.height_get_quality(meta['height']) hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) else: for episode in self.__match_episode(source_url, video): meta = scraper_utils.parse_episode_link(episode['title']) stream_url = episode['url'] + '|User-Agent=%s' % (scraper_utils.get_ua()) stream_url = stream_url.replace(self.base_url, '') quality = scraper_utils.height_get_quality(meta['height']) hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} if 'format' in meta: hoster['format'] = meta['format'] if 'size' in episode: hoster['size'] = scraper_utils.format_size(int(episode['size'])) hosters.append(hoster) return hosters
def __get_release(self, html, video): try: select = int(kodi.get_setting('%s-select' % (self.get_name()))) except: select = 0 ul_id = 'releases' if video.video_type == VIDEO_TYPES.MOVIE else 'episodes' fragment = dom_parser2.parse_dom(html, 'ul', {'id': ul_id}) if fragment: best_qorder = 0 best_page = None for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'li'): match = dom_parser2.parse_dom(item, 'span', req=['href', 'title']) if not match: match = dom_parser2.parse_dom(item, 'a', req=['href', 'title']) if not match: continue page_url, release = match[0].attrs['href'], match[0].attrs['title'] match = dom_parser2.parse_dom(item, 'span', {'class': 'time'}) if match and self.__too_old(match[0].content): break release = re.sub('^\[[^\]]*\]\s*', '', release) if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(release) else: if not scraper_utils.release_check(video, release, require_title=False): continue meta = scraper_utils.parse_episode_link(release) if select == 0: best_page = page_url break else: quality = scraper_utils.height_get_quality(meta['height']) logger.log('result: |%s|%s|%s|' % (page_url, quality, Q_ORDER[quality]), log_utils.LOGDEBUG) if Q_ORDER[quality] > best_qorder: logger.log('Setting best as: |%s|%s|%s|' % (page_url, quality, Q_ORDER[quality]), log_utils.LOGDEBUG) best_page = page_url best_qorder = Q_ORDER[quality] return best_page
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] if video_type == VIDEO_TYPES.TVSHOW and title: test_url = '/show/%s/' % (scraper_utils.to_slug(title)) test_url = scraper_utils.urljoin(self.base_url, test_url) html = self._http_get(test_url, require_debrid=True, cache_limit=24) posts = dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}) if posts and CATEGORIES[video_type] in posts[0].content: match = re.search('<div[^>]*>\s*show\s+name:.*?<a\s+href="([^"]+)[^>]+>(?!Season\s+\d+)([^<]+)', posts[0].content, re.I) if match: show_url, match_title = match.groups() result = {'url': scraper_utils.pathify_url(show_url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''} results.append(result) elif video_type == VIDEO_TYPES.MOVIE: search_url = scraper_utils.urljoin(self.base_url, '/search/%s/') search_title = re.sub('[^A-Za-z0-9 ]', '', title.lower()) search_url = search_url % (urllib.quote_plus(search_title)) headers = {'User-Agent': LOCAL_UA} html = self._http_get(search_url, headers=headers, require_debrid=True, cache_limit=1) headings = re.findall('<h2>\s*<a\s+href="([^"]+).*?">(.*?)</a>', html) posts = [r.content for r in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})] norm_title = scraper_utils.normalize_title(title) for heading, post in zip(headings, posts): if not re.search('[._ -]S\d+E\d+[._ -]', heading[1], re.I) and not self.__too_old(post): post_url, post_title = heading post_title = re.sub('<[^>]*>', '', post_title) meta = scraper_utils.parse_movie_link(post_title) full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height']) match_year = meta['year'] match_norm_title = scraper_utils.normalize_title(meta['title']) if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year} results.append(result) return results
def __get_links(self, url, video): hosters = [] search_url = urlparse.urljoin(self.base_url, SEARCH_URL) query = self.__translate_search(url) result = self._http_get(search_url, data=query, allow_redirect=False, cache_limit=.5) if 'files' in result: for item in result['files']: checks = [False] * 6 if 'type' not in item or item['type'].upper() != 'VIDEO': checks[0] = True if 'is_ready' in item and item['is_ready'] != '1': checks[1] = True if 'av_result' in item and item['av_result'] in [ 'warning', 'infected' ]: checks[2] = True if 'video_info' not in item: checks[3] = True if 'video_info' in item and item[ 'video_info'] and not re.search( '#0:(?:0|1)(?:\(eng\)|\(und\))?:\s*Audio:', item['video_info']): checks[4] = True if video.video_type == VIDEO_TYPES.EPISODE: sxe = '[. ][Ss]%02d[Ee]%02d[. ]' % (int( video.season), int(video.episode)) if not re.search(sxe, item['name']): if video.ep_airdate: airdate_pattern = '[. ]%s[. ]%02d[. ]%02d[. ]' % ( video.ep_airdate.year, video.ep_airdate.month, video.ep_airdate.day) if not re.search(airdate_pattern, item['name']): checks[5] = True if any(checks): log_utils.log( 'Furk.net result excluded: %s - |%s|' % (checks, item['name']), log_utils.LOGDEBUG) continue match = re.search('(\d{3,})\s?x\s?(\d{3,})', item['video_info']) if match: width, _ = match.groups() quality = scraper_utils.width_get_quality(width) else: if video.video_type == VIDEO_TYPES.MOVIE: _, _, height, _ = scraper_utils.parse_movie_link( item['name']) quality = scraper_utils.height_get_quality(height) elif video.video_type == VIDEO_TYPES.EPISODE: _, _, _, height, _ = scraper_utils.parse_episode_link( item['name']) if int(height) > -1: quality = scraper_utils.height_get_quality(height) else: quality = QUALITIES.HIGH else: quality = QUALITIES.HIGH stream_url = item['url_pls'] host = self._get_direct_hostname(stream_url) hoster = { 'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': True } hoster['size'] = scraper_utils.format_size( int(item['size']), 'B') hoster['extra'] = item['name'] hosters.append(hoster) return hosters
def __get_links(self, url, video): hosters = [] search_url, params = self.__translate_search(url) html = self._http_get(search_url, params=params, cache_limit=.5) js_result = scraper_utils.parse_json(html, search_url) down_url = js_result.get('downURL') dl_farm = js_result.get('dlFarm') dl_port = js_result.get('dlPort') for item in js_result.get('data', []): post_hash, size, post_title, ext, duration = item['0'], item[ '4'], item['10'], item['11'], item['14'] checks = [False] * 6 if not scraper_utils.release_check(video, post_title): checks[0] = True if 'alangs' in item and item['alangs'] and 'eng' not in item[ 'alangs']: checks[1] = True if re.match('^\d+s', duration) or re.match('^[0-5]m', duration): checks[2] = True if 'passwd' in item and item['passwd']: checks[3] = True if 'virus' in item and item['virus']: checks[4] = True if 'type' in item and item['type'].upper() != 'VIDEO': checks[5] = True if any(checks): logger.log( 'EasyNews Post excluded: %s - |%s|' % (checks, item), log_utils.LOGDEBUG) continue stream_url = down_url + urllib.quote( '/%s/%s/%s%s/%s%s' % (dl_farm, dl_port, post_hash, ext, post_title, ext)) stream_url = stream_url + '|Authorization=%s' % (urllib.quote( self.auth)) host = scraper_utils.get_direct_hostname(self, stream_url) quality = None if 'width' in item: try: width = int(item['width']) except: width = 0 if width: quality = scraper_utils.width_get_quality(width) if quality is None: if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(post_title) else: meta = scraper_utils.parse_episode_link(post_title) quality = scraper_utils.height_get_quality(meta['height']) if self.max_bytes: match = re.search('([\d.]+)\s+(.*)', size) if match: size_bytes = scraper_utils.to_bytes(*match.groups()) if size_bytes > self.max_bytes: logger.log( 'Result skipped, Too big: |%s| - %s (%s) > %s (%s GB)' % (post_title, size_bytes, size, self.max_bytes, self.max_gb)) continue hoster = { 'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': True } if any(i for i in ['X265', 'HEVC'] if i in post_title.upper()): hoster['format'] = 'x265' if size: hoster['size'] = size if post_title: hoster['extra'] = post_title hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) sources = {} for _attrs, fragment in dom_parser2.parse_dom(html, 'ul', {'class': 'enlaces'}): for attrs, _content in dom_parser2.parse_dom(fragment, 'a', req='href'): stream_url = attrs['href'] if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(stream_url) else: meta = scraper_utils.parse_episode_link(stream_url) sources.update({ stream_url: { 'quality': scraper_utils.height_get_quality(meta['height']), 'direct': False } }) for _attrs, fragment in dom_parser2.parse_dom( html, 'div', {'class': 'movieplay'}) + dom_parser2.parse_dom( html, 'div', {'id': re.compile('player\d+')}): for attrs, _content in dom_parser2.parse_dom( fragment, 'iframe', req='src') + dom_parser2.parse_dom( fragment, 'iframe', req='data-lazy-src'): iframe_url = attrs.get('src', '') if not iframe_url.startswith('http'): iframe_url = attrs.get('data-lazy-src', '') if not iframe_url.startswith('http'): continue if '//player' in iframe_url: html = self._http_get(iframe_url, headers={'Referer': page_url}, cache_limit=.5) sources.update(scraper_utils.parse_sources_list( self, html)) else: if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(iframe_url) else: meta = scraper_utils.parse_episode_link(iframe_url) sources.update({ iframe_url: { 'quality': scraper_utils.height_get_quality(meta['height']), 'direct': False } }) for stream_url, values in sources.iteritems(): direct = values['direct'] quality = values['quality'] if direct: host = scraper_utils.get_direct_hostname(self, stream_url) stream_url += scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) else: stream_url = stream_url host = urlparse.urlparse(stream_url).hostname hoster = { 'multi-part': False, 'url': stream_url, 'class': self, 'quality': quality, 'host': host, 'rating': None, 'views': None, 'direct': direct } hosters.append(hoster) return hosters
def _blog_proc_results(self, html, post_pattern, date_format, video_type, title, year): results = [] search_date = '' search_sxe = '' if video_type == VIDEO_TYPES.EPISODE: match = re.search('(.*?)\s*(S\d+E\d+)\s*', title) if match: show_title, search_sxe = match.groups() else: match = re.search( '(.*?)\s*(\d{4})[._ -]?(\d{2})[._ -]?(\d{2})\s*', title) if match: show_title, search_year, search_month, search_day = match.groups( ) search_date = '%s-%s-%s' % (search_year, search_month, search_day) search_date = scraper_utils.to_datetime( search_date, "%Y-%m-%d").date() else: show_title = title else: show_title = title today = datetime.date.today() for match in re.finditer(post_pattern, html, re.DOTALL): post_data = match.groupdict() post_title = post_data['post_title'] post_title = re.sub('<[^>]*>', '', post_title) if 'quality' in post_data: post_title += '- [%s]' % (post_data['quality']) try: filter_days = int( kodi.get_setting('%s-filter' % (self.get_name()))) except ValueError: filter_days = 0 if filter_days and date_format and 'date' in post_data: post_data['date'] = post_data['date'].strip() filter_days = datetime.timedelta(days=filter_days) post_date = scraper_utils.to_datetime(post_data['date'], date_format).date() if not post_date: logger.log( 'Failed date Check in %s: |%s|%s|%s|' % (self.get_name(), post_data['date'], date_format), log_utils.LOGWARNING) post_date = today if today - post_date > filter_days: continue match_year = '' match_date = '' match_sxe = '' match_title = full_title = post_title if video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(post_title) match_year = meta['year'] else: meta = scraper_utils.parse_episode_link(post_title) match_sxe = 'S%02dE%02d' % (int( meta['season']), int(meta['episode'])) match_date = meta['airdate'] match_title = meta['title'] full_title = '%s (%sp) [%s]' % (meta['title'], meta['height'], meta['extra']) norm_title = scraper_utils.normalize_title(show_title) match_norm_title = scraper_utils.normalize_title(match_title) title_match = norm_title and (match_norm_title in norm_title or norm_title in match_norm_title) year_match = not year or not match_year or year == match_year sxe_match = not search_sxe or (search_sxe == match_sxe) date_match = not search_date or (search_date == match_date) logger.log( 'Blog Results: |%s|%s|%s| - |%s|%s|%s| - |%s|%s|%s| - |%s|%s|%s| (%s)' % (match_norm_title, norm_title, title_match, year, match_year, year_match, search_date, match_date, date_match, search_sxe, match_sxe, sxe_match, self.get_name()), log_utils.LOGDEBUG) if title_match and year_match and date_match and sxe_match: quality = scraper_utils.height_get_quality(meta['height']) result = { 'url': scraper_utils.pathify_url(post_data['url']), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year, 'quality': quality } results.append(result) return results