def __get_videos(self, contents, video): videos = [] for key in contents: item = contents[key] if item['type'].lower() == 'dir': videos += self.__get_videos(item['children'], video) else: if item['ext'].upper() in VIDEO_EXT and int( item['size']) > (100 * 1024 * 1024): if video.video_type == VIDEO_TYPES.MOVIE: _, _, height, _ = scraper_utils.parse_movie_link( item['name']) else: _, _, _, height, _ = scraper_utils.parse_episode_link( item['name']) video = { 'name': item['name'], 'size': item['size'], 'url': item['url'], 'quality': scraper_utils.height_get_quality(height) } videos.append(video) if item['stream'] is not None: if int(height) > 720: height = 720 video = { 'name': '(Transcode) %s' % (item['name']), 'url': item['stream'], 'quality': scraper_utils.height_get_quality(height) } videos.append(video) return videos
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) title = dom_parser.parse_dom(html, 'title') if title: title = re.sub('^\[ST\]\s*–\s*', '', title[0]) meta = scraper_utils.parse_episode_link(title) page_quality = scraper_utils.height_get_quality(meta['height']) else: page_quality = QUALITIES.HIGH fragment = dom_parser.parse_dom(html, 'section', {'class': '[^"]*entry-content[^"]*'}) if fragment: for section in dom_parser.parse_dom(fragment[0], 'p'): match = re.search('([^<]*)', section) meta = scraper_utils.parse_episode_link(match.group(1)) if meta['episode'] != '-1' or meta['airdate']: section_quality = scraper_utils.height_get_quality(meta['height']) else: section_quality = page_quality if Q_ORDER[section_quality] < Q_ORDER[page_quality]: quality = section_quality else: quality = page_quality for stream_url in dom_parser.parse_dom(section, 'a', ret='href'): host = urlparse.urlparse(stream_url).hostname hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False} hosters.append(hoster) return hosters
def __get_links_from_json2(self, url, page_url, video_type): sources = {} headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(url, headers=headers, cache_limit=0) js_data = scraper_utils.parse_json(html, url) try: playlist = js_data.get('playlist', []) for source in playlist[0].get('sources', []): stream_url = source['file'] if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: if video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(stream_url) else: meta = scraper_utils.parse_episode_link(stream_url) quality = scraper_utils.height_get_quality(meta['height']) sources[stream_url] = {'quality': quality, 'direct': True} logger.log('Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG) except Exception as e: logger.log('Exception during yesmovies extract: %s' % (e), log_utils.LOGDEBUG) return sources
def get_sources(self, video): hosters = [] sources = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) match = re.search('var\s+view_id\s*=\s*"([^"]+)', html) if not match: return hosters view_id = match.group(1) for lang in ['or', 'tr']: subs = True if lang == 'tr' else False view_data = {'id': view_id, 'tip': 'view', 'dil': lang} html = self._http_get(self.ajax_url, data=view_data, headers=XHR, cache_limit=.25) html = html.strip() html = re.sub(r'\\n|\\t', '', html) match = re.search('var\s+sources\s*=\s*(\[.*?\])', html) if match: raw_data = match.group(1) raw_data = raw_data.replace('\\', '') else: raw_data = html js_data = scraper_utils.parse_json(raw_data, self.ajax_url) if 'data' not in js_data: continue src = dom_parser2.parse_dom(js_data['data'], 'iframe', req='src') if not src: continue html = self._http_get(src[0].attrs['src'], cache_limit=.25) for attrs, _content in dom_parser2.parse_dom(html, 'iframe', req='src'): src = attrs['src'] if not src.startswith('http'): continue sources.append({'label': '720p', 'file': src, 'direct': False, 'subs': subs}) sources += [{'file': url, 'subs': subs} for url in scraper_utils.parse_sources_list(self, html).iterkeys()] if sources: break for source in sources: direct = source.get('direct', True) stream_url = source['file'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) if direct: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: continue else: host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.height_get_quality(source['label']) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} if source.get('subs'): hoster['subs'] = 'Turkish Subtitles' hosters.append(hoster) return hosters
def resolve_link(self, link): try: headers = dict([item.split('=') for item in (link.split('|')[1]).split('&')]) for key in headers: headers[key] = urllib.unquote(headers[key]) link = link.split('|')[0] except: headers = {} if not link.startswith('http'): link = urlparse.urljoin(self.base_url, link) html = self._http_get(link, headers=headers, cache_limit=0) fragment = dom_parser.parse_dom(html, 'div', {'class': 'player'}) if fragment: iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src') if iframe_url: iframe_url = iframe_url[0] headers = {'Referer': link} html = self._http_get(iframe_url, headers=headers, cache_limit=0) sitekey = dom_parser.parse_dom(html, 'div', {'class': 'g-recaptcha'}, ret='data-sitekey') if sitekey: token = recaptcha_v2.UnCaptchaReCaptcha().processCaptcha(sitekey[0], lang='en') if token: data = {'g-recaptcha-response': token} html = self._http_get(iframe_url, data=data, cache_limit=0) log_utils.log(html) match = re.search("\.replace\(\s*'([^']+)'\s*,\s*'([^']*)'\s*\)", html, re.I) if match: html = html.replace(match.group(1), match.group(2)) match = re.search("window\.atob[\([]+'([^']+)", html) if match: func_count = len(re.findall('window\.atob', html)) html = match.group(1) for _i in xrange(func_count): html = base64.decodestring(html) streams = [] for match in re.finditer('''<source[^>]+src=["']([^'"]+)[^>]+label=['"]([^'"]+)''', html): streams.append(match.groups()) if len(streams) > 1: if not self.auto_pick: result = xbmcgui.Dialog().select(i18n('choose_stream'), [e[1] for e in streams]) if result > -1: return streams[result][0] + '|User-Agent=%s' % (scraper_utils.get_ua()) else: best_stream = '' best_q = 0 for stream in streams: stream_url, label = stream if Q_ORDER[scraper_utils.height_get_quality(label)] > best_q: best_q = Q_ORDER[scraper_utils.height_get_quality(label)] best_stream = stream_url if best_stream: return best_stream + '|User-Agent=%s' % (scraper_utils.get_ua()) elif streams: return streams[0][0] + '|User-Agent=%s' % (scraper_utils.get_ua())
def __get_links_from_json2(self, url, page_url, video_type): sources = {} headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(url, headers=headers, cache_limit=0) js_data = scraper_utils.parse_json(html, url) try: playlist = js_data.get('playlist', []) for source in playlist[0].get('sources', []): stream_url = source['file'] if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: if video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(stream_url) else: meta = scraper_utils.parse_episode_link(stream_url) quality = scraper_utils.height_get_quality(meta['height']) sources[stream_url] = {'quality': quality, 'direct': True} logger.log( 'Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG) except Exception as e: logger.log('Exception during yesmovies extract: %s' % (e), log_utils.LOGDEBUG) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) match = re.search('var\s+view_id\s*=\s*"([^"]+)', html) if match: view_data = {'id': match.group(1), 'tip': 'view', 'dil': 'or'} html = self._http_get(self.ajax_url, data=view_data, headers=XHR, cache_limit=.25) html = html.strip() html = re.sub(r'\\n|\\t', '', html) match = re.search('var\s+sources\s*=\s*(\[.*?\])', html) if match: raw_data = match.group(1) raw_data = raw_data.replace('\\', '') else: raw_data = html js_data = scraper_utils.parse_json(raw_data, self.ajax_url) if 'data' in js_data: src = dom_parser.parse_dom(js_data['data'], 'iframe', ret='src') if src: html = self._http_get(src[0], cache_limit=.25) match = re.search('url=([^"]+)', html) if match: stream_url = match.group(1).replace('>', '') sources.append({'label': '720p', 'file': stream_url}) direct = False else: src = dom_parser.parse_dom(html, 'iframe', ret='src') if src: sources.append({'label': '720p', 'file': src[0]}) direct = False else: for match in re.finditer('"file"\s*:\s*"([^"]+)"\s*,\s*"label"\s*:\s*"([^"]+)', html): sources.append({'label': match.group(2), 'file': match.group(1)}) direct = True else: sources = js_data direct = True for source in sources: stream_url = source['file'] + '|User-Agent=%s' % (scraper_utils.get_ua()) if direct: host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(source['label']) else: host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.height_get_quality(source['label']) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) headers = {'Referer': page_url} html = self._http_get(page_url, headers=headers, cache_limit=.5) if video.video_type == VIDEO_TYPES.MOVIE: fragment = dom_parser2.parse_dom(html, 'div', {'class': 'poster'}) if fragment: movie_url = dom_parser2.parse_dom(fragment[0].content, 'a', req='href') if movie_url: page_url = scraper_utils.urljoin(self.base_url, movie_url[0].attrs['href']) html = self._http_get(page_url, cache_limit=.5) episodes = self.__get_episodes(html) page_url = self.__get_best_page(episodes) if not page_url: return hosters else: page_url = scraper_utils.urljoin(self.base_url, page_url) html = self._http_get(page_url, cache_limit=.5) streams = dom_parser2.parse_dom(html, 'iframe', req='src') if streams: streams = [(attrs['src'], 480) for attrs, _content in streams] direct = False else: streams = [(attrs['src'], attrs.get('data-res', 480)) for attrs, _content in dom_parser2.parse_dom(html, 'source', req=['src'])] direct = True headers = {'User-Agent': scraper_utils.get_ua(), 'Referer': page_url} for stream_url, height in streams: if 'video.php' in stream_url or 'moviexk.php' in stream_url: if 'title=' in stream_url: title = stream_url.split('title=')[-1] stream_url = stream_url.replace(title, urllib.quote(title)) redir_url = self._http_get(stream_url, headers=headers, allow_redirect=False, method='HEAD', cache_limit=0) if redir_url.startswith('http'): redir_url = redir_url.replace(' ', '').split(';codec')[0] stream_url = redir_url else: continue if direct: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(height) stream_url += scraper_utils.append_headers(headers) else: host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.height_get_quality(height) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct} hosters.append(source) return hosters
def __get_links(self, url, video): hosters = [] search_url = urlparse.urljoin(self.base_url, SEARCH_URL) query = self.__translate_search(url) result = self._http_get(search_url, data=query, allow_redirect=False, cache_limit=.5) if 'files' in result: for item in result['files']: checks = [False] * 6 if 'type' not in item or item['type'].upper() != 'VIDEO': checks[0] = True if 'is_ready' in item and item['is_ready'] != '1': checks[1] = True if 'av_result' in item and item['av_result'] in ['warning', 'infected']: checks[2] = True if 'video_info' not in item: checks[3] = True if 'video_info' in item and item['video_info'] and not re.search('#0:(?:0|1)(?:\(eng\)|\(und\))?:\s*Audio:', item['video_info']): checks[4] = True if video.video_type == VIDEO_TYPES.EPISODE: sxe = '[. ][Ss]%02d[Ee]%02d[. ]' % (int(video.season), int(video.episode)) if not re.search(sxe, item['name']): if video.ep_airdate: airdate_pattern = '[. ]%s[. ]%02d[. ]%02d[. ]' % (video.ep_airdate.year, video.ep_airdate.month, video.ep_airdate.day) if not re.search(airdate_pattern, item['name']): checks[5] = True if any(checks): log_utils.log('Furk.net result excluded: %s - |%s|' % (checks, item['name']), log_utils.LOGDEBUG) continue match = re.search('(\d{3,})\s?x\s?(\d{3,})', item['video_info']) if match: width, _ = match.groups() quality = scraper_utils.width_get_quality(width) else: if video.video_type == VIDEO_TYPES.MOVIE: _, _, height, _ = scraper_utils.parse_movie_link(item['name']) quality = scraper_utils.height_get_quality(height) elif video.video_type == VIDEO_TYPES.EPISODE: _, _, _, height, _ = scraper_utils.parse_episode_link(item['name']) if int(height) > -1: quality = scraper_utils.height_get_quality(height) else: quality = QUALITIES.HIGH else: quality = QUALITIES.HIGH if 'url_pls' in item: size_gb = scraper_utils.format_size(int(item['size']), 'B') if self.max_bytes and int(item['size']) > self.max_bytes: log_utils.log('Result skipped, Too big: |%s| - %s (%s) > %s (%sGB)' % (item['name'], item['size'], size_gb, self.max_bytes, self.max_gb)) continue stream_url = item['url_pls'] host = self._get_direct_hostname(stream_url) hoster = {'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': True} hoster['size'] = size_gb hoster['extra'] = item['name'] hosters.append(hoster) else: log_utils.log('Furk.net result skipped - no playlist: |%s|' % (json.dumps(item)), log_utils.LOGDEBUG) return hosters
def __get_gk_links(self, html, page_url, page_quality): sources = {} fragment = dom_parser.parse_dom(html, 'div', {'id': 'load_server'}) if fragment: for link in dom_parser.parse_dom(fragment[0], 'li'): film_id = dom_parser.parse_dom(link, 'a', ret='data-film') name_id = dom_parser.parse_dom(link, 'a', ret='data-name') server_id = dom_parser.parse_dom(link, 'a', ret='data-server') if film_id and name_id and server_id: data = { 'ipplugins': 1, 'ip_film': film_id[0], 'ip_server': server_id[0], 'ip_name': name_id[0] } headers = XHR headers['Referer'] = page_url url = urlparse.urljoin(self.base_url, LINK_URL) html = self._http_get(url, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, url) if 's' in js_data: if isinstance(js_data['s'], basestring): stream_urls = self.__get_real_url(js_data['s']) if stream_urls is not None: if isinstance(stream_urls, basestring): sources[stream_urls] = page_quality else: for item in stream_urls: stream_url = item['files'] if self._get_direct_hostname( stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality( stream_url) elif 'quality' in item: quality = scraper_utils.height_get_quality( item['quality']) else: quality = page_quality sources[stream_url] = quality else: for link in js_data['s']: stream_url = self.__get_real_url(link['file']) if stream_url is not None: if self._get_direct_hostname( stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality( stream_url) elif 'label' in link: quality = scraper_utils.height_get_quality( link['label']) else: quality = page_quality sources[stream_url] = quality return sources
def __get_quality(self, item, video): if 'width' in item: return scraper_utils.width_get_quality(item['width']) elif 'height' in item: return scraper_utils.height_get_quality(item['height']) else: if video.video_type == VIDEO_TYPES.MOVIE: _title, _year, height, _extra = scraper_utils.parse_movie_link(item['name']) else: _title, _season, _episode, height, _extra = scraper_utils.parse_episode_link(item['name']) return scraper_utils.height_get_quality(height)
def resolve_link(self, link): try: headers = dict( [item.split('=') for item in (link.split('|')[1]).split('&')]) for key in headers: headers[key] = urllib.unquote(headers[key]) link = link.split('|')[0] except: headers = {} html = self._http_get(link, headers=headers, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'div', {'class': 'player'}) if fragment: iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src') if iframe_url: headers = {'Referer': link} html = self._http_get(iframe_url[0], headers=headers, cache_limit=.5) match = re.search("window\.atob\('([^']+)", html) if match: func_count = len(re.findall('window\.atob', html)) html = match.group(1) for _i in xrange(func_count): html = base64.decodestring(html) streams = [] for match in re.finditer( '''<source[^>]+src=["']([^'"]+)[^>]+label=['"]([^'"]+)''', html): streams.append(match.groups()) if len(streams) > 1: if not self.auto_pick: result = xbmcgui.Dialog().select( i18n('choose_stream'), [e[1] for e in streams]) if result > -1: return streams[result][0] else: best_stream = '' best_q = 0 for stream in streams: stream_url, label = stream if Q_ORDER[scraper_utils.height_get_quality( label)] > best_q: best_q = Q_ORDER[ scraper_utils.height_get_quality(label)] best_stream = stream_url if best_stream: return best_stream elif streams: return streams[0][0]
def __get_quality(self, item, video): if 'width' in item: return scraper_utils.width_get_quality(item['width']) elif 'height' in item: return scraper_utils.height_get_quality(item['height']) else: if video.video_type == VIDEO_TYPES.MOVIE: _title, _year, height, _extra = scraper_utils.parse_movie_link( item['name']) else: _title, _season, _episode, height, _extra = scraper_utils.parse_episode_link( item['name']) return scraper_utils.height_get_quality(height)
def __get_quality(self, item, video): if 'width' in item and item['width']: return scraper_utils.width_get_quality(item['width']) elif 'height' in item and item['height']: return scraper_utils.height_get_quality(item['height']) elif 'name' in item: if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(item['name']) else: meta = scraper_utils.parse_episode_link(item['name']) return scraper_utils.height_get_quality(meta['height']) else: return QUALITIES.HIGH
def __get_quality(self, item, video): if item.get('width'): return scraper_utils.width_get_quality(item['width']) elif item.get('height'): return scraper_utils.height_get_quality(item['height']) elif 'name' in item: if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(item['name']) else: meta = scraper_utils.parse_episode_link(item['name']) return scraper_utils.height_get_quality(meta['height']) else: return QUALITIES.HIGH
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) title = dom_parser2.parse_dom(html, 'title') if title: title = title[0].content title = re.sub('^\[ST\]\s*–\s*', '', title) meta = scraper_utils.parse_episode_link(title) page_quality = scraper_utils.height_get_quality(meta['height']) else: page_quality = QUALITIES.HIGH fragment = dom_parser2.parse_dom(html, 'section', {'class': 'entry-content'}) if fragment: for _attrs, section in dom_parser2.parse_dom( fragment[0].content, 'p'): match = re.search('([^<]*)', section) meta = scraper_utils.parse_episode_link(match.group(1)) if meta['episode'] != '-1' or meta['airdate']: section_quality = scraper_utils.height_get_quality( meta['height']) else: section_quality = page_quality if Q_ORDER[section_quality] < Q_ORDER[page_quality]: quality = section_quality else: quality = page_quality for attrs, _content in dom_parser2.parse_dom(section, 'a', req='href'): stream_url = attrs['href'] host = urlparse.urlparse(stream_url).hostname hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters headers = { 'User-Agent': scraper_utils.get_ua(), 'Referer': self.base_url + source_url } if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(source_url) stream_url = source_url + scraper_utils.append_headers(headers) quality = scraper_utils.height_get_quality(meta['height']) hoster = { 'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) else: for episode in self.__match_episode(source_url, video): meta = scraper_utils.parse_episode_link(episode['title']) stream_url = episode['url'] + scraper_utils.append_headers( headers) stream_url = stream_url.replace(self.base_url, '') quality = scraper_utils.height_get_quality(meta['height']) hoster = { 'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } if 'format' in meta: hoster['format'] = meta['format'] if 'size' in episode: hoster['size'] = scraper_utils.format_size( int(episode['size'])) hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) match = re.search('var\s*video_id="([^"]+)', html) if not match: return hosters video_id = match.group(1) data = {'v': video_id} headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(INFO_URL, data=data, headers=headers, cache_limit=.5) sources = scraper_utils.parse_json(html, INFO_URL) for source in sources: match = re.search('url=(.*)', sources[source]) if not match: continue stream_url = urllib.unquote(match.group(1)) host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(source) stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] norm_title = scraper_utils.normalize_title(video.title) if source_url and source_url != FORCE_NO_MATCH: source_url = urlparse.urljoin(self.base_url, source_url) for line in self._get_files(source_url, cache_limit=24): if not line['directory']: match = {} if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(line['link']) if norm_title in scraper_utils.normalize_title(meta['title']): match = line elif self.__episode_match(line, video): match = line meta = scraper_utils.parse_episode_link(line['link']) if match: if meta['dubbed']: continue stream_url = match['url'] + '|User-Agent=%s' % (scraper_utils.get_ua()) stream_url = stream_url.replace(self.base_url, '') quality = scraper_utils.height_get_quality(meta['height']) hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} if 'format' in meta: hoster['format'] = meta['format'] if 'size' in match: hoster['size'] = scraper_utils.format_size(int(match['size'])) hosters.append(hoster) return hosters
def __get_gk_links(self, html, page_url, video_type, episode): sources = {} phimid = dom_parser.parse_dom(html, 'input', {'name': 'phimid'}, ret='value') if phimid and video_type == VIDEO_TYPES.EPISODE: url = urlparse.urljoin(self.tv_base_url, '/ajax.php') data = {'ipos_server': 1, 'phimid': phimid[0], 'keyurl': episode} headers = XHR headers['Referer'] = page_url html = self._http_get(url, data=data, headers=headers, cache_limit=.5) for link in dom_parser.parse_dom(html, 'div', {'class': '[^"]*server_line[^"]*'}): film_id = dom_parser.parse_dom(link, 'a', ret='data-film') name_id = dom_parser.parse_dom(link, 'a', ret='data-name') server_id = dom_parser.parse_dom(link, 'a', ret='data-server') if film_id and name_id and server_id: data = {'ipplugins': 1, 'ip_film': film_id[0], 'ip_server': server_id[0], 'ip_name': name_id[0]} headers = XHR headers['Referer'] = page_url url = urlparse.urljoin(self.__get_base_url(video_type), LINK_URL) html = self._http_get(url, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, url) if 's' in js_data: if isinstance(js_data['s'], basestring): sources[js_data['s']] = QUALITIES.HIGH else: for link in js_data['s']: stream_url = link['file'] if self._get_direct_hostname(stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in link: quality = scraper_utils.height_get_quality(link['label']) else: quality = QUALITIES.HIGH sources[stream_url] = quality return sources
def __get_cloud_links(self, html, page_url, sub): hosters = [] html = html.replace('\\"', '"').replace('\\/', '/') match = re.search("dizi_kapak_getir\('([^']+)", html) if match: ep_id = match.group(1) for script_url in dom_parser.parse_dom(html, 'script', {'data-cfasync': 'false'}, ret='src'): html = self._http_get(script_url, cache_limit=24) match1 = re.search("var\s+kapak_url\s*=\s*'([^']+)", html) match2 = re.search("var\s+aCtkp\s*=\s*'([^']+)", html) if match1 and match2: link_url = '%s?fileid=%s&access_token=%s' % (match1.group(1), ep_id, match2.group(1)) headers = {'Referer': page_url} html = self._http_get(link_url, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, link_url) for variant in js_data.get('variants', {}): stream_host = random.choice(variant.get('hosts', [])) if stream_host: stream_url = STREAM_URL % (stream_host, variant['path'], scraper_utils.get_ua(), urllib.quote(page_url)) if not stream_url.startswith('http'): stream_url = 'http://' + stream_url host = self._get_direct_hostname(stream_url) if 'width' in variant: quality = scraper_utils.width_get_quality(variant['width']) elif 'height' in variant: quality = scraper_utils.height_get_quality(variant['height']) else: quality = QUALITIES.HIGH hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hoster['subs'] = sub hosters.append(hoster) return hosters
def __get_gk_links(self, html, page_url): sources = {} match = re.search('{link\s*:\s*"([^"]+)', html) if match: data = {'link': match.group(1)} url = urlparse.urljoin(self.base_url, LINK_URL) headers = {'Referer': page_url} html = self._http_get(url, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, url) if 'link' in js_data: for link in js_data['link']: if 'type' in link and link[ 'type'] == 'mp4' and 'link' in link: if self._get_direct_hostname(link['link']) == 'gvideo': quality = scraper_utils.gv_get_quality( link['link']) elif 'label' in link: quality = scraper_utils.height_get_quality( link['label']) else: quality = QUALITIES.HIGH sources[link['link']] = quality return sources
def __grab_links(self, grab_url, query, referer): try: sources = {} query['mobile'] = '0' query.update(self.__get_token(query)) grab_url = grab_url + '?' + urllib.urlencode(query) headers = XHR headers['Referer'] = referer html = self._http_get(grab_url, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, grab_url) if 'data' in js_data: for link in js_data['data']: stream_url = link['file'] if self._get_direct_hostname(stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in link: quality = scraper_utils.height_get_quality( link['label']) else: quality = QUALITIES.HIGH sources[stream_url] = {'direct': True, 'quality': quality} except Exception as e: log_utils.log('9Movies Link Parse Error: %s' % (e), log_utils.LOGWARNING) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) pattern = '\$\.post\("([^"]+)"\s*,\s*\{(.*?)\}' match = re.search(pattern, html) if match: post_url, post_data = match.groups() data = self.__get_data(post_data) html = self._http_get(post_url, data=data, cache_limit=.5) js_result = scraper_utils.parse_json(html, post_url) for key in js_result: stream_url = js_result[key] host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(key) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } hosters.append(hoster) return hosters
def __get_streams_from_m3u8(self, playlist, st_url, vid_id, stream_id): sources = {} quality = QUALITIES.HIGH audio_group = '' audio_stream = '' stream_name = 'Unknown' bandwidth = 0 for line in playlist: if line.startswith('#EXT-X-MEDIA'): match = re.search('GROUP-ID="([^"]+).*?URI="([^"]+)', line) if match: audio_group, audio_stream = match.groups() if line.startswith('#EXT-X-STREAM-INF'): match = re.search('BANDWIDTH=(\d+).*?NAME="(\d+p)', line) if match: bandwidth, stream_name = match.groups() quality = scraper_utils.height_get_quality(stream_name) elif line.endswith('m3u8'): stream_url = urlparse.urljoin(st_url, line) query = {'audio_group': audio_group, 'audio_stream': audio_stream, 'stream_name': stream_name, 'bandwidth': bandwidth, 'video_stream': stream_url, 'vid_id': vid_id, 'stream_id': stream_id} stream_url = urllib.urlencode(query) sources[stream_url] = quality return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) if re.search('<span[^>]+>\s*Low Quality\s*</span>', html): quality = QUALITIES.LOW else: quality = QUALITIES.HIGH for match in re.finditer('gkpluginsphp.*?link\s*:\s*"([^"]+)', html): data = {'link': match.group(1)} headers = XHR headers['Referer'] = url gk_url = urlparse.urljoin(self.base_url, GK_URL) html = self._http_get(gk_url, data=data, headers=headers, cache_limit=.25) js_result = scraper_utils.parse_json(html, gk_url) if 'link' in js_result and 'func' not in js_result: if isinstance(js_result['link'], list): sources = dict((link['link'], scraper_utils.height_get_quality(link['label'])) for link in js_result['link']) else: sources = {js_result['link']: quality} for source in sources: host = self._get_direct_hostname(source) hoster = {'multi-part': False, 'url': source, 'class': self, 'quality': sources[source], 'host': host, 'rating': None, 'views': None, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=0) match = re.search('var\s*video_id\s*=\s*"([^"]+)', html) if not match: return hosters video_id = match.group(1) headers = {'Referer': page_url} headers.update(XHR) _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'), headers=headers, method='POST', cache_limit=0) vid_url = scraper_utils.urljoin(self.base_url, VIDEO_URL) html = self._http_get(vid_url, data={'v': video_id}, headers=headers, cache_limit=0) for source, value in scraper_utils.parse_json(html, vid_url).iteritems(): match = re.search('url=(.*)', value) if not match: continue stream_url = urllib.unquote(match.group(1)) host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(source) stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'post-cont'}) if not fragment: return hosters match = re.search('<p>\s*<strong>(.*?)<script', fragment[0].content, re.DOTALL) if not match: return hosters for attrs, _content in dom_parser2.parse_dom(match.group(1), 'a', req='href'): stream_url = attrs['href'] if scraper_utils.excluded_link(stream_url): continue if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(stream_url) else: meta = scraper_utils.parse_episode_link(stream_url) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, scraper_utils.height_get_quality(meta['height'])) hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False} hosters.append(hoster) return hosters
def __get_gk_links(self, link, iframe_url): sources = {} data = {'link': link} headers = XHR headers.update({'Referer': iframe_url, 'User-Agent': USER_AGENT}) html = self._http_get(GK_URL, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, GK_URL) if 'link' in js_data: if isinstance(js_data['link'], basestring): stream_url = js_data['link'] if self._get_direct_hostname(stream_url) == 'gvideo': temp = self._parse_google(stream_url) for source in temp: sources[source] = {'quality': scraper_utils.gv_get_quality(source), 'direct': True} else: sources[stream_url] = {'quality': QUALITIES.HIGH, 'direct': False} else: for link in js_data['link']: stream_url = link['link'] if self._get_direct_hostname(stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in link: quality = scraper_utils.height_get_quality(link['label']) else: quality = QUALITIES.HIGH sources[stream_url] = {'quality': quality, 'direct': True} return sources
def __get_links_from_playlist(self, grab_url, headers): sources = {} grab_url = grab_url.replace('\\', '') grab_html = self._http_get(grab_url, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(grab_html, grab_url) try: playlist = js_data['playlist'][0]['sources'] except: playlist = [] for item in playlist: stream_url = item.get('file') if stream_url: if stream_url.startswith('/'): stream_url = scraper_utils.urljoin(self.base_url, stream_url) redir_url = self._http_get(stream_url, headers=headers, allow_redirect=False, method='HEAD') if redir_url.startswith('http'): stream_url = redir_url if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in item: quality = scraper_utils.height_get_quality(item['label']) else: quality = QUALITIES.HIGH logger.log('Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG) sources[stream_url] = {'quality': quality, 'direct': True} if not kodi.get_setting('scraper_url'): break return sources
def __get_gk_links(self, html): sources = {} match = re.search('{link\s*:\s*"([^"]+)', html) if match: iframe_url = match.group(1) data = {'link': iframe_url} headers = {'Referer': iframe_url} html = self._http_get(self.gk_url, data=data, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, self.gk_url) links = js_data.get('link', []) if isinstance(links, basestring): links = [{'link': links}] for link in links: stream_url = link['link'] if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) direct = True elif 'label' in link: quality = scraper_utils.height_get_quality(link['label']) direct = True else: quality = QUALITIES.HIGH direct = False sources[stream_url] = {'quality': quality, 'direct': direct} return sources
def get_sources(self, video): source_url = self.get_url(video) sources = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) is_3d = False page_quality = QUALITIES.HD720 title = dom_parser.parse_dom(html, 'title') if title: title = title[0] match = re.search('(\d{3,})p', title) if match: page_quality = scraper_utils.height_get_quality(match.group(1)) is_3d = True if re.search('\s+3D\s+', title) else False fragments = dom_parser.parse_dom(html, 'div', {'class': 'txt-block'}) + dom_parser.parse_dom(html, 'li', {'class': 'elemento'}) for fragment in fragments: for match in re.finditer('href="([^"]+)', fragment): stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname q_str = dom_parser.parse_dom(fragment, 'span', {'class': 'd'}) q_str = q_str[0].upper() if q_str else '' base_quality = QUALITY_MAP.get(q_str, page_quality) quality = scraper_utils.get_quality(video, host, base_quality) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': False} source['format'] = 'x265' source['3D'] = is_3d sources.append(source) return sources
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'movieplay'}): iframe_src = dom_parser2.parse_dom(fragment, 'iframe', req='src') if iframe_src: iframe_src = iframe_src[0].attrs['src'] if re.search('o(pen)?load', iframe_src, re.I): meta = scraper_utils.parse_movie_link(iframe_src) quality = scraper_utils.height_get_quality(meta['height']) links = {iframe_src: {'quality': quality, 'direct': False}} else: links = self.__get_links(iframe_src, url) for link in links: direct = links[link]['direct'] quality = links[link]['quality'] if direct: host = scraper_utils.get_direct_hostname(self, link) if host == 'gvideo': quality = scraper_utils.gv_get_quality(link) stream_url = link + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': url}) else: host = urlparse.urlparse(link).hostname stream_url = link source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct} hosters.append(source) return hosters
def __get_gk_links(self, html, page_url, video_type, episode): sources = {} for link in dom_parser.parse_dom(html, 'div', {'class': '[^"]*server_line[^"]*'}): film_id = dom_parser.parse_dom(link, 'a', ret='data-film') name_id = dom_parser.parse_dom(link, 'a', ret='data-name') server_id = dom_parser.parse_dom(link, 'a', ret='data-server') if film_id and name_id and server_id: data = {'ipplugins': 1, 'ip_film': film_id[0], 'ip_server': server_id[0], 'ip_name': name_id[0]} headers = {'Referer': page_url} headers.update(XHR) url = urlparse.urljoin(self.base_url, LINK_URL) html = self._http_get(url, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, url) if 's' in js_data and isinstance(js_data['s'], basestring): url = urlparse.urljoin(self.base_url, LINK_URL3) params = {'u': js_data['s'], 'w': '100%', 'h': 450, 's': js_data['v']} html = self._http_get(url, params=params, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, url) if 'data' in js_data and js_data['data']: if isinstance(js_data['data'], basestring): sources[js_data['data']] = QUALITIES.HIGH else: for link in js_data['data']: stream_url = link['files'] if self._get_direct_hostname(stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'quality' in link: quality = scraper_utils.height_get_quality(link['quality']) else: quality = QUALITIES.HIGH sources[stream_url] = quality return sources
def __get_links_from_xml(self, xml, video): sources = {} try: root = ET.fromstring(xml) for item in root.findall('.//item'): title = item.find('title').text for source in item.findall('{http://rss.jwpcdn.com/}source'): stream_url = source.get('file') label = source.get('label') if self._get_direct_hostname(stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif label: quality = scraper_utils.height_get_quality(label) else: quality = scraper_utils.blog_get_quality( video, title, '') sources[stream_url] = {'quality': quality, 'direct': True} log_utils.log( 'Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG) except Exception as e: log_utils.log('Exception during 123Movies XML Parse: %s' % (e), log_utils.LOGWARNING) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) pattern = '\$\.post\("([^"]+)"\s*,\s*\{(.*?)\}' match = re.search(pattern, html) if match: post_url, post_data = match.groups() data = self.__get_data(post_data) html = self._http_get(post_url, data=data, cache_limit=.5) js_result = scraper_utils.parse_json(html, post_url) for key in js_result: stream_url = js_result[key] host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(key) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def __get_gk_links(self, html, page_url): sources = {} for link in dom_parser.parse_dom(html, 'div', {'class': '[^"]*server_line[^"]*'}): film_id = dom_parser.parse_dom(link, 'a', ret='data-film') name_id = dom_parser.parse_dom(link, 'a', ret='data-name') server_id = dom_parser.parse_dom(link, 'a', ret='data-server') if film_id and name_id and server_id: data = {'ipplugins': 1, 'ip_film': film_id[0], 'ip_server': server_id[0], 'ip_name': name_id[0]} headers = XHR headers['Referer'] = page_url url = urlparse.urljoin(self.base_url, LINK_URL) html = self._http_get(url, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, url) if 's' in js_data: if isinstance(js_data['s'], basestring): sources[js_data['s']] = QUALITIES.HIGH else: for link in js_data['s']: stream_url = link['file'] if self._get_direct_hostname(stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in link: quality = scraper_utils.height_get_quality(link['label']) else: quality = QUALITIES.HIGH sources[stream_url] = quality return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) sources = self.__get_post_links(html) for source, value in sources.iteritems(): if scraper_utils.excluded_link(source): continue host = urlparse.urlparse(source).hostname if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(value['release']) else: meta = scraper_utils.parse_episode_link(value['release']) quality = scraper_utils.height_get_quality(meta['height']) hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': quality, 'direct': False } if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) match = re.search('var\s*video_id="([^"]+)', html) if match: video_id = match.group(1) url = urlparse.urljoin(self.base_url, VIDEO_URL) data = {'v': video_id} headers = XHR headers['Referer'] = page_url html = self._http_get(url, data=data, headers=headers, cache_limit=.5) sources = scraper_utils.parse_json(html, url) for source in sources: match = re.search('url=(.*)', sources[source]) if match: stream_url = urllib.unquote(match.group(1)) host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(source) stream_url += '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'div', {'class': '[^"]*screen[^"]*'}) if fragment: js_src = dom_parser.parse_dom(fragment[0], 'script', ret='src') if js_src: js_url = urlparse.urljoin(self.base_url, js_src[0]) html = self._http_get(js_url, cache_limit=.5) else: html = fragment[0] for match in re.finditer('<source[^>]+src="([^"]+)', html): stream_url = match.group(1) host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: _, _, height, _ = scraper_utils.parse_movie_link(stream_url) quality = scraper_utils.height_get_quality(height) stream_url += '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) match = re.search('<iframe[^>]+src="([^"]+watch=([^"]+))', html) if match: iframe_url, link_id = match.groups() data = {'link': link_id} headers = {'Referer': iframe_url} headers['User-Agent'] = LOCAL_USER_AGENT gk_url = urlparse.urljoin(self.base_url, GK_URL) html = self._http_get(gk_url, data=data, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, gk_url) if 'link' in js_data: if isinstance(js_data['link'], list): sources = dict((link['link'], scraper_utils.height_get_quality(link['label'])) for link in js_data['link']) direct = True else: sources = {js_data['link']: QUALITIES.HIGH} direct = False for source in sources: source = source.replace('\\/', '/') if direct: host = self._get_direct_hostname(source) else: host = urlparse.urlparse(source).hostname hoster = {'multi-part': False, 'url': source, 'class': self, 'quality': sources[source], 'host': host, 'rating': None, 'views': None, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) match = re.search('<iframe[^>]+src="([^"]+watch=([^"]+))', html) if match: iframe_url, link_id = match.groups() data = {'link': link_id} headers = {'Referer': iframe_url} gk_url = urlparse.urljoin(self.base_url, GK_URL) html = self._http_get(gk_url, data=data, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, gk_url) if 'link' in js_data: for link in js_data['link']: stream_url = link['link'] host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(link['label']) hoster = {'multi-part': False, 'url': stream_url, 'class': self, 'quality': quality, 'host': host, 'rating': None, 'views': None, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) q_str = dom_parser.parse_dom(html, 'span', {'class': 'calidad\d*'}) if q_str: if q_str[0].upper() == 'COMING SOON': return hosters try: quality = scraper_utils.height_get_quality(q_str[0]) except: quality = QUALITIES.HIGH else: quality = QUALITIES.HIGH fragment = dom_parser.parse_dom(html, 'div', {'id': 'player\d+'}) if fragment: for match in re.finditer('<iframe[^>]+src="([^"]+)', fragment[0], re.I): stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: html = self.__get_embedded_page(source_url) fragment = dom_parser.parse_dom(html, 'div', {'class': '[^"]*download[^"]*'}) if fragment: page_url = urlparse.urljoin(self.base_url, source_url) for match in re.finditer('href="([^"]+)[^>]+>([^<]+)', fragment[0]): stream_url, label = match.groups() quality = scraper_utils.height_get_quality(label) stream_url += '|User-Agent=%s&Referer=%s' % ( scraper_utils.get_ua(), urllib.quote(page_url)) hoster = { 'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) # exit early if trailer if re.search('Şu an fragman*', html, re.I): return hosters match = re.search('''url\s*:\s*"([^"]+)"\s*,\s*data:\s*["'](id=\d+)''', html) if match: url, data = match.groups() url = urlparse.urljoin(self.base_url, url) result = self._http_get(url, data=data, headers=XHR, cache_limit=.5) for match in re.finditer('"videolink\d*"\s*:\s*"([^"]+)","videokalite\d*"\s*:\s*"?(\d+)p?', result): stream_url, height = match.groups() stream_url = stream_url.replace('\\/', '/') host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(height) stream_url += '|User-Agent=%s&Referer=%s' % (scraper_utils.get_ua(), urllib.quote(page_url)) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) match = re.search('<b>Views:.*?([\d,]+)', html) if match: views = int(match.group(1).replace(',', '')) else: views = None html = self.__get_watch_now(html) for match in re.finditer('<span class="svname">\s*(.*?)\s*:?\s*</span>(.*?)(?=<span class="svname">|</div>)', html): title, fragment = match.groups() for match in re.finditer('<a[^>]+id="ep_\d+"[^>]+href="([^"]+)[^>]+>\s*([^<]+)', fragment): stream_url, name = match.groups() match = re.search('(\d+)', name) if video.video_type == VIDEO_TYPES.MOVIE: if match: quality = scraper_utils.height_get_quality(match.group(1)) else: quality = QUALITIES.HIGH else: if not match or int(name) != int(video.episode): continue quality = QUALITIES.HIGH stream_url += '|User-Agent=%s&Referer=%s&Cookie=%s' % (scraper_utils.get_ua(), url, self._get_stream_cookies()) hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': True} hoster['title'] = title hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'div', {'class': '[^"]*screen[^"]*'}) if fragment: js_src = dom_parser.parse_dom(fragment[0], 'script', ret='src') if js_src: js_url = urlparse.urljoin(self.base_url, js_src[0]) html = self._http_get(js_url, cache_limit=.5) else: html = fragment[0] for match in re.finditer('<source[^>]+src="([^"]+)', html): stream_url = match.group(1) host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'blogspot' in stream_url: quality = scraper_utils.gv_get_quality(stream_url) else: _, _, height, _ = scraper_utils.parse_movie_link(stream_url) quality = scraper_utils.height_get_quality(height) stream_url += '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def __get_streams_from_m3u8(self, playlist, st_url, vid_id, stream_id): sources = {} quality = QUALITIES.HIGH audio_group = '' audio_stream = '' stream_name = 'Unknown' bandwidth = 0 for line in playlist: if line.startswith('#EXT-X-MEDIA'): match = re.search('GROUP-ID="([^"]+).*?URI="([^"]+)', line) if match: audio_group, audio_stream = match.groups() if line.startswith('#EXT-X-STREAM-INF'): match = re.search('BANDWIDTH=(\d+).*?NAME="(\d+p)', line) if match: bandwidth, stream_name = match.groups() quality = scraper_utils.height_get_quality(stream_name) elif line.endswith('m3u8'): stream_url = urlparse.urljoin(st_url, line) query = { 'audio_group': audio_group, 'audio_stream': audio_stream, 'stream_name': stream_name, 'bandwidth': bandwidth, 'video_stream': stream_url, 'vid_id': vid_id, 'stream_id': stream_id } stream_url = urllib.urlencode(query) sources[stream_url] = quality return sources
def __get_json_links(self, html, sub): hosters = [] js_data = scraper_utils.parse_json(html) if 'sources' in js_data: for source in js_data.get('sources', []): stream_url = source.get('file') if stream_url is None: continue host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: quality = QUALITIES.HIGH hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } hoster['subs'] = sub hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) headers = {'User-Agent': LOCAL_UA} html = self._http_get(url, require_debrid=False, headers=headers, cache_limit=.5) for match in re.finditer( "<span\s+class='info2'(.*?)(<span\s+class='info|<hr\s*/>)", html, re.DOTALL): for match2 in re.finditer('href="([^"]+)', match.group(1)): stream_url = match2.group(1) meta = scraper_utils.parse_episode_link(stream_url) quality = scraper_utils.height_get_quality(meta['height']) host = urlparse.urlparse(stream_url).hostname hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) match = re.search('''<option[^>]+value\s*=\s*["']([^"']+)[^>]*>(?:Altyaz.{1,3}s.{1,3}z)<''', html) if match: option_url = urlparse.urljoin(self.base_url, match.group(1)) html = self._http_get(option_url, cache_limit=.25) fragment = dom_parser.parse_dom(html, 'span', {'class': 'object-wrapper'}) if fragment: iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src') if iframe_url: html = self._http_get(iframe_url[0], cache_limit=.25) seen_urls = {} for match in re.finditer('"?file"?\s*:\s*"([^"]+)"\s*,\s*"?label"?\s*:\s*"(\d+)p?[^"]*"', html): stream_url, height = match.groups() if stream_url not in seen_urls: seen_urls[stream_url] = True stream_url += '|User-Agent=%s' % (scraper_utils.get_ua()) host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(height) hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) q_str = dom_parser.parse_dom(html, 'span', {'class': 'calidad\d*'}) if q_str: if q_str[0].upper() == 'COMING SOON': return hosters try: quality = scraper_utils.height_get_quality(q_str[0]) except: quality = QUALITIES.HIGH else: quality = QUALITIES.HIGH fragment = dom_parser.parse_dom(html, 'div', {'id': 'player\d+'}) if fragment: for match in re.finditer('<iframe[^>]+src="([^"]+)', fragment[0], re.I): stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname hoster = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': False } hosters.append(hoster) return hosters