def __get_links_from_json2(self, url, page_url, video_type): sources = {} headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(url, headers=headers, cache_limit=0) js_data = scraper_utils.parse_json(html, url) try: playlist = js_data.get('playlist', []) for source in playlist[0].get('sources', []): stream_url = source['file'] if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: if video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(stream_url) else: meta = scraper_utils.parse_episode_link(stream_url) quality = scraper_utils.height_get_quality(meta['height']) sources[stream_url] = {'quality': quality, 'direct': True} logger.log( 'Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG) except Exception as e: logger.log('Exception during yesmovies extract: %s' % (e), log_utils.LOGDEBUG) return sources
def __get_quality(self, item, video): if item.get('width'): return scraper_utils.width_get_quality(item['width']) elif item.get('height'): return scraper_utils.height_get_quality(item['height']) elif 'name' in item: if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(item['name']) else: meta = scraper_utils.parse_episode_link(item['name']) return scraper_utils.height_get_quality(meta['height']) else: return QUALITIES.HIGH
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters headers = { 'User-Agent': scraper_utils.get_ua(), 'Referer': self.base_url + source_url } if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(source_url) stream_url = source_url + scraper_utils.append_headers(headers) quality = scraper_utils.height_get_quality(meta['height']) hoster = { 'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) else: for episode in self.__match_episode(source_url, video): meta = scraper_utils.parse_episode_link(episode['title']) stream_url = episode['url'] + scraper_utils.append_headers( headers) stream_url = stream_url.replace(self.base_url, '') quality = scraper_utils.height_get_quality(meta['height']) hoster = { 'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } if 'format' in meta: hoster['format'] = meta['format'] if 'size' in episode: hoster['size'] = scraper_utils.format_size( int(episode['size'])) hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) for attrs, _content in dom_parser2.parse_dom(html, 'a', req='href'): stream_url = attrs['href'] if MOVIE_URL in stream_url: meta = scraper_utils.parse_movie_link(stream_url) stream_url = scraper_utils.pathify_url( stream_url) + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) quality = scraper_utils.height_get_quality(meta['height']) hoster = { 'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) headers = {'User-Agent': LOCAL_UA} html = self._http_get(url, require_debrid=True, headers=headers, cache_limit=.5) for match in re.finditer( "<span\s+class='info2'(.*?)(<span\s+class='info|<hr\s*/>)", html, re.DOTALL): for match2 in re.finditer('href="([^"]+)', match.group(1)): stream_url = match2.group(1) meta = scraper_utils.parse_episode_link(stream_url) quality = scraper_utils.height_get_quality(meta['height']) host = urlparse.urlparse(stream_url).hostname hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False } hosters.append(hoster) return hosters
def __get_cloud_links(self, html, page_url, sub): hosters = [] html = html.replace('\\"', '"').replace('\\/', '/') match = re.search("dizi_kapak_getir\('([^']+)", html) if match: ep_id = match.group(1) for attrs, _content in dom_parser2.parse_dom(html, 'script', {'data-cfasync': 'false'}, req='src'): script_url = attrs['src'] html = self._http_get(script_url, cache_limit=24) match1 = re.search("var\s+kapak_url\s*=\s*'([^']+)", html) match2 = re.search("var\s+aCtkp\s*=\s*'([^']+)", html) if match1 and match2: link_url = '%s?fileid=%s&access_token=%s' % (match1.group(1), ep_id, match2.group(1)) headers = {'Referer': page_url} html = self._http_get(link_url, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, link_url) for variant in js_data.get('variants', {}): stream_host = random.choice(variant.get('hosts', [])) if stream_host: stream_url = stream_host + variant['path'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url}) if not stream_url.startswith('http'): stream_url = 'http://' + stream_url host = scraper_utils.get_direct_hostname(self, stream_url) if 'width' in variant: quality = scraper_utils.width_get_quality(variant['width']) elif 'height' in variant: quality = scraper_utils.height_get_quality(variant['height']) else: quality = QUALITIES.HIGH hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hoster['subs'] = sub hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) post = dom_parser2.parse_dom(html, 'div', {'class': 'entry-content'}) if not post: return hosters for match in re.finditer('(?:href="|>)(https?://[^"<]+)', post[0].content): stream_url = match.group(1) if scraper_utils.excluded_link( stream_url) or 'imdb.com' in stream_url: continue host = urlparse.urlparse(stream_url).hostname if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(stream_url) else: meta = scraper_utils.parse_episode_link(stream_url) quality = scraper_utils.height_get_quality(meta['height']) hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'post-cont'}) if not fragment: return hosters match = re.search('<p>\s*<strong>(.*?)<script', fragment[0].content, re.DOTALL) if not match: return hosters for attrs, _content in dom_parser2.parse_dom(match.group(1), 'a', req='href'): stream_url = attrs['href'] if scraper_utils.excluded_link(stream_url): continue if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(stream_url) else: meta = scraper_utils.parse_episode_link(stream_url) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, scraper_utils.height_get_quality(meta['height'])) hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False} hosters.append(hoster) return hosters
def __get_gk_links(self, html): sources = {} match = re.search('{link\s*:\s*"([^"]+)', html) if match: iframe_url = match.group(1) data = {'link': iframe_url} headers = {'Referer': iframe_url} html = self._http_get(self.gk_url, data=data, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, self.gk_url) links = js_data.get('link', []) if isinstance(links, basestring): links = [{'link': links}] for link in links: stream_url = link['link'] if scraper_utils.get_direct_hostname(self, stream_url) == 'openload.co': quality = scraper_utils.gv_get_quality(stream_url) direct = True elif 'label' in link: quality = scraper_utils.height_get_quality(link['label']) direct = True else: quality = QUALITIES.HIGH direct = False sources[stream_url] = {'quality': quality, 'direct': direct} return sources
def __create_source(self, stream_url, height, page_url, subs=False, direct=True): if direct: stream_url = stream_url.replace('\\/', '/') if self.get_name().lower() in stream_url: headers = {'Referer': page_url} redir_url = self._http_get(stream_url, headers=headers, method='HEAD', allow_redirect=False, cache_limit=.25) if redir_url.startswith('http'): stream_url = redir_url stream_url += scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) else: stream_url += scraper_utils.append_headers({ 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url, 'Cookie': self._get_stream_cookies() }) else: stream_url += scraper_utils.append_headers({ 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url }) host = scraper_utils.get_direct_hostname(self, stream_url) else: host = urlparse.urlparse(stream_url).hostname if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(height) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct } if subs: hoster['subs'] = 'Turkish Subtitles' return hoster
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, require_debrid=True, cache_limit=.5) if video.video_type == VIDEO_TYPES.MOVIE: page_url = self.__get_release(html, video) if page_url is None: return hosters page_url = scraper_utils.urljoin(self.base_url, page_url) html = self._http_get(page_url, require_debrid=True, cache_limit=.5) hevc = False for _attrs, content in dom_parser2.parse_dom( html, 'span', {'class': 'releaselabel'}): if re.search('(hevc|x265)', content, re.I): hevc = 'x265' match = re.search('(\d+)x(\d+)', content) if match: _width, height = match.groups() quality = scraper_utils.height_get_quality(height) break else: quality = QUALITIES.HIGH streams = [ attrs['href'] for attrs, _content in dom_parser2.parse_dom( html, 'a', {'class': 'links'}, req='href') ] streams += [ content for _attrs, content in dom_parser2.parse_dom( html, 'pre', {'class': 'links'}) ] for stream_url in streams: if scraper_utils.excluded_link(stream_url): continue host = urlparse.urlparse(stream_url).hostname hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False } if hevc: hoster['format'] = hevc hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] sources = {} source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) match = re.search("load_player\('([^']+)", html) if not match: return hosters headers = {'Referer': page_url, 'Server': 'cloudflare-nginx', 'Accept': 'text/html, */*; q=0.01', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Formating': 'application/json, text/javascript', 'Accept-Encoding': 'gzip, deflate'} headers.update(XHR) params = {'id': match.group(1)} player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL) html = self._http_get(player_url, params=params, headers=headers, cache_limit=1) js_data = scraper_utils.parse_json(html, player_url) pl_url = js_data.get('value') or js_data.get('download') if not pl_url: return hosters headers = {'Referer': page_url} if pl_url.startswith('//'): pl_url = 'https:' + pl_url html = self._http_get(pl_url, headers=headers, allow_redirect=False, cache_limit=0) if html.startswith('http'): streams = [(html, '')] else: js_data = scraper_utils.parse_json(html, pl_url) try: streams = [(source['file'], source.get('label', '')) for source in js_data['playlist'][0]['sources']] except: streams = [] for stream in streams: stream_url, label = stream if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': sources[stream_url] = {'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True} else: if label: quality = scraper_utils.height_get_quality(label) else: quality = QUALITIES.HIGH sources[stream_url] = {'quality': quality, 'direct': False} for source, value in sources.iteritems(): direct = value['direct'] quality = value['quality'] if direct: host = scraper_utils.get_direct_hostname(self, source) else: host = urlparse.urlparse(source).hostname stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} hosters.append(hoster) return hosters
def __get_release(self, html, video): try: select = int(kodi.get_setting('%s-select' % (self.get_name()))) except: select = 0 ul_id = 'releases' if video.video_type == VIDEO_TYPES.MOVIE else 'episodes' fragment = dom_parser2.parse_dom(html, 'ul', {'id': ul_id}) if fragment: best_qorder = 0 best_page = None for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'li'): match = dom_parser2.parse_dom(item, 'span', req=['href', 'title']) if not match: match = dom_parser2.parse_dom(item, 'a', req=['href', 'title']) if not match: continue page_url, release = match[0].attrs['href'], match[0].attrs[ 'title'] match = dom_parser2.parse_dom(item, 'span', {'class': 'time'}) if match and self.__too_old(match[0].content): break release = re.sub('^\[[^\]]*\]\s*', '', release) if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(release) else: if not scraper_utils.release_check( video, release, require_title=False): continue meta = scraper_utils.parse_episode_link(release) if select == 0: best_page = page_url break else: quality = scraper_utils.height_get_quality(meta['height']) logger.log( 'result: |%s|%s|%s|' % (page_url, quality, Q_ORDER[quality]), log_utils.LOGDEBUG) if Q_ORDER[quality] > best_qorder: logger.log( 'Setting best as: |%s|%s|%s|' % (page_url, quality, Q_ORDER[quality]), log_utils.LOGDEBUG) best_page = page_url best_qorder = Q_ORDER[quality] return best_page
def __get_links(self, url, video): hosters = [] search_url, params = self.__translate_search(url) html = self._http_get(search_url, params=params, cache_limit=.5) js_result = scraper_utils.parse_json(html, search_url) down_url = js_result.get('downURL') dl_farm = js_result.get('dlFarm') dl_port = js_result.get('dlPort') for item in js_result.get('data', []): post_hash, size, post_title, ext, duration = item['0'], item['4'], item['10'], item['11'], item['14'] checks = [False] * 6 if not scraper_utils.release_check(video, post_title): checks[0] = True if 'alangs' in item and item['alangs'] and 'eng' not in item['alangs']: checks[1] = True if re.match('^\d+s', duration) or re.match('^[0-5]m', duration): checks[2] = True if 'passwd' in item and item['passwd']: checks[3] = True if 'virus' in item and item['virus']: checks[4] = True if 'type' in item and item['type'].upper() != 'VIDEO': checks[5] = True if any(checks): logger.log('EasyNews Post excluded: %s - |%s|' % (checks, item), log_utils.LOGDEBUG) continue stream_url = down_url + urllib.quote('/%s/%s/%s%s/%s%s' % (dl_farm, dl_port, post_hash, ext, post_title, ext)) stream_url = stream_url + '|Authorization=%s' % (urllib.quote(self.auth)) host = scraper_utils.get_direct_hostname(self, stream_url) quality = None if 'width' in item: try: width = int(item['width']) except: width = 0 if width: quality = scraper_utils.width_get_quality(width) if quality is None: if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(post_title) else: meta = scraper_utils.parse_episode_link(post_title) quality = scraper_utils.height_get_quality(meta['height']) if self.max_bytes: match = re.search('([\d.]+)\s+(.*)', size) if match: size_bytes = scraper_utils.to_bytes(*match.groups()) if size_bytes > self.max_bytes: logger.log('Result skipped, Too big: |%s| - %s (%s) > %s (%s GB)' % (post_title, size_bytes, size, self.max_bytes, self.max_gb)) continue hoster = {'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': True} if any(i for i in ['X265', 'HEVC'] if i in post_title.upper()): hoster['format'] = 'x265' if size: hoster['size'] = size if post_title: hoster['extra'] = post_title hosters.append(hoster) return hosters
def __get_mirror_links(self, html, video): sources = {} for attrs, _content in dom_parser2.parse_dom(html, 'img', req='src'): image = attrs['src'] if image.endswith('/mirrors.png'): match = re.search('%s.*?<p>(.*?)</p>' % (image), html, re.DOTALL) if match: for attrs, _content in dom_parser2.parse_dom(match.group(1), 'a', req='href'): stream_url = attrs['href'] host = urlparse.urlparse(stream_url).hostname meta = scraper_utils.parse_episode_link(stream_url) base_quality = scraper_utils.height_get_quality(meta['height']) sources[stream_url] = {'quality': scraper_utils.get_quality(video, host, base_quality), 'direct': False} return sources
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, require_debrid=True, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'table', {'class': 'links-table'}) if not fragment: return hosters for _attrs, row in dom_parser2.parse_dom(fragment[0].content, 'tr'): match = re.search( "playVideo\.bind\(.*?'([^']+)(?:[^>]*>){2}(.*?)</td>", row, re.DOTALL) if not match: continue stream_url, release = match.groups() if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': sources = scraper_utils.parse_google(self, stream_url) else: sources = [stream_url] for source in sources: host = scraper_utils.get_direct_hostname(self, source) if host == 'gvideo': quality = scraper_utils.gv_get_quality(source) direct = True else: host = urlparse.urlparse(source).hostname if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(release) else: meta = scraper_utils.parse_episode_link(release) base_quality = scraper_utils.height_get_quality( meta['height']) quality = scraper_utils.get_quality( video, host, base_quality) direct = False hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': direct } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) iframe_url = dom_parser2.parse_dom(html, 'iframe', {'id': 'myiframe'}, req='src', exclude_comments=True) if not iframe_url: return hosters iframe_url = iframe_url[0].attrs['src'] html = self._http_get(iframe_url, headers={'Referer': page_url}, cache_limit=.5) for source in dom_parser2.parse_dom(html, 'source', {'type': 'video/mp4'}, req=['src', 'data-res']): stream_url = source.attrs['src'] host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) stream_url += scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) else: quality = scraper_utils.height_get_quality( source.attrs['data-res']) stream_url += scraper_utils.append_headers({ 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url }) source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True } hosters.append(source) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'movieplay'}): iframe_src = dom_parser2.parse_dom(fragment, 'iframe', req='src') if iframe_src: iframe_src = iframe_src[0].attrs['src'] if re.search('o(pen)?load', iframe_src, re.I): meta = scraper_utils.parse_movie_link(iframe_src) quality = scraper_utils.height_get_quality(meta['height']) links = {iframe_src: {'quality': quality, 'direct': False}} else: links = self.__get_links(iframe_src, url) for link in links: direct = links[link]['direct'] quality = links[link]['quality'] if direct: host = scraper_utils.get_direct_hostname(self, link) if host == 'gvideo': quality = scraper_utils.gv_get_quality(link) stream_url = link + scraper_utils.append_headers( { 'User-Agent': scraper_utils.get_ua(), 'Referer': url }) else: host = urlparse.urlparse(link).hostname stream_url = link source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct } hosters.append(source) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=0) match = re.search('var\s*video_id\s*=\s*"([^"]+)', html) if not match: return hosters video_id = match.group(1) headers = {'Referer': page_url} headers.update(XHR) _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'), headers=headers, method='POST', cache_limit=0) vid_url = scraper_utils.urljoin(self.base_url, VIDEO_URL) html = self._http_get(vid_url, data={'v': video_id}, headers=headers, cache_limit=0) for source, value in scraper_utils.parse_json(html, vid_url).iteritems(): match = re.search('url=(.*)', value) if not match: continue stream_url = urllib.unquote(match.group(1)) host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(source) stream_url += scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } hosters.append(hoster) return hosters
def __get_pk_links(self, html): hosters = [] match = re.search('var\s+parametros\s*=\s*"([^"]+)', html) if match: params = scraper_utils.parse_query(match.group(1)) if 'pic' in params: data = {'sou': 'pic', 'fv': '25', 'url': params['pic']} html = self._http_get(PK_URL, headers=XHR, data=data, cache_limit=0) js_data = scraper_utils.parse_json(html, PK_URL) for item in js_data: if 'url' in item and item['url']: if 'width' in item and item['width']: quality = scraper_utils.width_get_quality( item['width']) elif 'height' in item and item['height']: quality = scraper_utils.height_get_quality( item['height']) else: quality = QUALITIES.HD720 stream_url = item['url'] + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) hoster = { 'multi-part': False, 'url': stream_url, 'class': self, 'quality': quality, 'host': scraper_utils.get_direct_hostname( self, item['url']), 'rating': None, 'views': None, 'direct': True } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) sources = self.__get_post_links(html) for source, value in sources.iteritems(): if scraper_utils.excluded_link(source): continue host = urlparse.urlparse(source).hostname if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(value['release']) else: meta = scraper_utils.parse_episode_link(value['release']) quality = scraper_utils.height_get_quality(meta['height']) hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': quality, 'direct': False} if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) return hosters
def __get_post_links(self, html, video): sources = {} post = dom_parser2.parse_dom(html, 'article', {'id': re.compile('post-\d+')}) if post: for _attrs, fragment in dom_parser2.parse_dom( post[0].content, 'h2'): for attrs, _content in dom_parser2.parse_dom(fragment, 'a', req='href'): stream_url = attrs['href'] meta = scraper_utils.parse_episode_link(stream_url) release_quality = scraper_utils.height_get_quality( meta['height']) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality( video, host, release_quality) sources[stream_url] = quality return sources
def __get_ajax(self, html, page_url): sources = {} pattern = '\$\.ajax\(\s*"([^"]+)' match = re.search(pattern, html) if not match: return sources post_url = match.group(1) headers = {'Referer': page_url} html = self._http_get(post_url, headers=headers, cache_limit=.5) js_result = scraper_utils.parse_json(html, post_url) for key in js_result: stream_url = js_result[key] host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(key) sources[stream_url] = quality return sources
def __get_king_links(self, iframe_url): hosters = [] match = re.search('v=(.*)', iframe_url) if match: data = {'ID': match.group(1)} headers = {'Referer': iframe_url} headers.update(XHR) xhr_url = iframe_url.split('?')[0] html = self._http_get(xhr_url, params={'p': 'GetVideoSources'}, data=data, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, xhr_url) try: for source in js_data['VideoSources']: stream_url = source['file'] + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) host = scraper_utils.get_direct_hostname( self, source['file']) label = source.get('label', '') if host == 'gvideo': quality = scraper_utils.gv_get_quality(source['file']) elif re.search('\d+p?', label): quality = scraper_utils.height_get_quality(label) else: quality = QUALITY_MAP.get(label, QUALITIES.HIGH) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': 'Turkish Subtitles' } hosters.append(hoster) except: pass return hosters
def __get_posts(self, html): sources = {} pattern = '\$\.post\("([^"]+)"\s*,\s*\{(.*?)\}' match = re.search(pattern, html) if not match: return sources post_url, post_data = match.groups() data = self.__get_data(post_data) html = self._http_get(post_url, data=data, cache_limit=.5) js_result = scraper_utils.parse_json(html, post_url) for key in js_result: stream_url = js_result[key] host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(key) sources[stream_url] = quality return sources
def __get_json_links(self, html, sub): hosters = [] js_data = scraper_utils.parse_json(html) if 'sources' in js_data: for source in js_data.get('sources', []): stream_url = source.get('file') if stream_url is None: continue host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: quality = QUALITIES.HIGH hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hoster['subs'] = sub hosters.append(hoster) return hosters
def __get_linked(self, html): sources = {} match = re.search('dizi=([^"]+)', html) if not match: return sources html = self._http_get(AJAX_URL, params={'dizi': match.group(1)}, headers=XHR, cache_limit=.5) js_result = scraper_utils.parse_json(html, AJAX_URL) for source in js_result.get('success', []): stream_url = source.get('src') if stream_url is None: continue if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: quality = QUALITIES.HIGH sources[stream_url] = quality return sources
def __get_direct(self, html, page_url): sources = [] best_quality = QUALITIES.HIGH match = re.search('''['"]?sources["']?\s*:\s*\[(.*?)\}\s*,?\s*\]''', html, re.DOTALL) if match: files = re.findall('''['"]?file['"]?\s*:\s*(.*?)['"]([^'"]+)''', match.group(1), re.DOTALL) labels = re.findall('''['"]?label['"]?\s*:\s*['"]([^'"]*)''', match.group(1), re.DOTALL) for stream, label in map(None, files, labels): func, stream_url = stream if 'atob' in func: stream_url = base64.b64decode(stream_url) stream_url = stream_url.replace(' ', '%20') host = scraper_utils.get_direct_hostname(self, stream_url) label = re.sub(re.compile('\s*HD', re.I), '', label) quality = scraper_utils.height_get_quality(label) if Q_ORDER[quality] > Q_ORDER[best_quality]: best_quality = quality stream_url += scraper_utils.append_headers({ 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url }) source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True } sources.append(source) return best_quality, sources
def __get_gk_links(self, link, iframe_url): sources = {} data = {'link': link} headers = XHR headers.update({'Referer': iframe_url, 'User-Agent': USER_AGENT}) html = self._http_get(GK_URL, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, GK_URL) if 'link' in js_data: if isinstance(js_data['link'], basestring): stream_url = js_data['link'] if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': for source in scraper_utils.parse_google(self, stream_url): sources[source] = { 'quality': scraper_utils.gv_get_quality(source), 'direct': True } else: sources[stream_url] = { 'quality': QUALITIES.HIGH, 'direct': False } else: for link in js_data['link']: stream_url = link['link'] if scraper_utils.get_direct_hostname( self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in link: quality = scraper_utils.height_get_quality( link['label']) else: quality = QUALITIES.HIGH sources[stream_url] = {'quality': quality, 'direct': True} return sources
def __get_post_links(self, html, video): sources = {} post = dom_parser2.parse_dom(html, 'div', {'class': 'postContent'}) if post: post = post[0].content for fragment in re.finditer('(<strong>.*?)(?=<strong>|$)', post, re.DOTALL): fragment = fragment.group(1) release = dom_parser2.parse_dom(fragment, 'strong') if release: release = release[0].content meta = scraper_utils.parse_episode_link(release) release_quality = scraper_utils.height_get_quality( meta['height']) for attrs, _content in dom_parser2.parse_dom(fragment, 'a', req='href'): link = attrs['href'] host = urlparse.urlparse(link).hostname quality = scraper_utils.get_quality( video, host, release_quality) sources[link] = quality return sources