def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'div', {'class': 'videos'}) if fragment: for match in re.finditer('href="([^"]+)[^>]*>([^<]+)', fragment[0]): page_url, page_label = match.groups() page_label = page_label.lower() if page_label not in ALLOWED_LABELS: continue sources = self.__get_sources(page_url, ALLOWED_LABELS[page_label]) for source in sources: host = self._get_direct_hostname(source) if host == 'gvideo': quality = scraper_utils.gv_get_quality(source) direct = True stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua()) elif sources[source]['direct']: quality = sources[source]['quality'] direct = True stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua()) else: quality = sources[source]['quality'] direct = False host = urlparse.urlparse(source).hostname stream_url = source hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} if sources[source]['subs']: hoster['subs'] = 'Turkish Subtitles' hosters.append(hoster) return hosters
def resolve_link(self, link): try: headers = dict([item.split('=') for item in (link.split('|')[1]).split('&')]) for key in headers: headers[key] = urllib.unquote(headers[key]) link = link.split('|')[0] except: headers = {} if not link.startswith('http'): link = urlparse.urljoin(self.base_url, link) html = self._http_get(link, headers=headers, cache_limit=0) fragment = dom_parser.parse_dom(html, 'div', {'class': 'player'}) if fragment: iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src') if iframe_url: iframe_url = iframe_url[0] headers = {'Referer': link} html = self._http_get(iframe_url, headers=headers, cache_limit=0) sitekey = dom_parser.parse_dom(html, 'div', {'class': 'g-recaptcha'}, ret='data-sitekey') if sitekey: token = recaptcha_v2.UnCaptchaReCaptcha().processCaptcha(sitekey[0], lang='en') if token: data = {'g-recaptcha-response': token} html = self._http_get(iframe_url, data=data, cache_limit=0) log_utils.log(html) match = re.search("\.replace\(\s*'([^']+)'\s*,\s*'([^']*)'\s*\)", html, re.I) if match: html = html.replace(match.group(1), match.group(2)) match = re.search("window\.atob[\([]+'([^']+)", html) if match: func_count = len(re.findall('window\.atob', html)) html = match.group(1) for _i in xrange(func_count): html = base64.decodestring(html) streams = [] for match in re.finditer('''<source[^>]+src=["']([^'"]+)[^>]+label=['"]([^'"]+)''', html): streams.append(match.groups()) if len(streams) > 1: if not self.auto_pick: result = xbmcgui.Dialog().select(i18n('choose_stream'), [e[1] for e in streams]) if result > -1: return streams[result][0] + '|User-Agent=%s' % (scraper_utils.get_ua()) else: best_stream = '' best_q = 0 for stream in streams: stream_url, label = stream if Q_ORDER[scraper_utils.height_get_quality(label)] > best_q: best_q = Q_ORDER[scraper_utils.height_get_quality(label)] best_stream = stream_url if best_stream: return best_stream + '|User-Agent=%s' % (scraper_utils.get_ua()) elif streams: return streams[0][0] + '|User-Agent=%s' % (scraper_utils.get_ua())
def __create_source(self, stream_url, height, page_url, subs=False, direct=True): if direct: stream_url = stream_url.replace('\\/', '/') if self.get_name().lower() in stream_url: headers = {'Referer': page_url} redir_url = self._http_get(stream_url, headers=headers, method='HEAD', allow_redirect=False, cache_limit=.25) if redir_url.startswith('http'): stream_url = redir_url stream_url += scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) else: stream_url += scraper_utils.append_headers({ 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url, 'Cookie': self._get_stream_cookies() }) else: stream_url += scraper_utils.append_headers({ 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url }) host = scraper_utils.get_direct_hostname(self, stream_url) else: host = urlparse.urlparse(stream_url).hostname if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(height) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct } if subs: hoster['subs'] = 'Turkish Subtitles' return hoster
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) iframe_url = dom_parser2.parse_dom(html, 'iframe', {'id': 'myiframe'}, req='src', exclude_comments=True) if not iframe_url: return hosters iframe_url = iframe_url[0].attrs['src'] html = self._http_get(iframe_url, headers={'Referer': page_url}, cache_limit=.5) for source in dom_parser2.parse_dom(html, 'source', {'type': 'video/mp4'}, req=['src', 'data-res']): stream_url = source.attrs['src'] host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) stream_url += scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) else: quality = scraper_utils.height_get_quality( source.attrs['data-res']) stream_url += scraper_utils.append_headers({ 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url }) source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True } hosters.append(source) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) fragment = dom_parser.parse_dom(html, 'ul', {'class': 'dropdown-menu'}) if fragment: match = re.search('''href=['"]([^'"]+)[^>]*>(?:Altyaz.{1,3}s.{1,3}z)<''', fragment[0]) if match: option_url = urlparse.urljoin(self.base_url, match.group(1)) html = self._http_get(option_url, cache_limit=2) fragment = dom_parser.parse_dom(html, 'div', {'class': 'video-player'}) if fragment: iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src') if iframe_url: html = self._http_get(iframe_url[0], cache_limit=.25) iframe_url = dom_parser.parse_dom(html, 'iframe', {'id': 'ifr'}, ret='src') if iframe_url: html = self._http_get(iframe_url[0], allow_redirect=False, cache_limit=.25) if html.startswith('http'): stream_url = html host = urlparse.urlparse(stream_url).hostname stream_url += '|User-Agent=%s' % (scraper_utils.get_ua()) quality = QUALITIES.HIGH hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) views = None fragment = dom_parser.parse_dom(html, 'img', {'src': '[^"]*view_icon.png'}) if fragment: match = re.search('(\d+)', fragment[0]) if match: views = match.group(1) match = re.search('href="([^"]+-full-movie-[^"]+)', html) if match: url = match.group(1) html = self._http_get(url, cache_limit=.5) sources = self.__get_sources(html, url) match = re.search('href="([^"]+)[^>]*>\s*<button', html) if match: html = self._http_get(match.group(1), cache_limit=.5) sources.update(self.__get_sources(html, url)) for source in sources: host = self._get_direct_hostname(source) stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua()) quality = sources[source]['quality'] hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) hosts = dom_parser.parse_dom(html, 'p', {'class': 'server_servername'}) links = dom_parser.parse_dom(html, 'p', {'class': 'server_play'}) for host, link_frag in zip(hosts, links): stream_url = dom_parser.parse_dom(link_frag, 'a', ret='href') if stream_url: stream_url = stream_url[0] host = re.sub('^Server\s*', '', host, re.I) host = re.sub('\s*Link\s+\d+', '', host) if host.lower() == 'google': sources = self.__get_gvideo_links(stream_url) else: sources = [{'host': host, 'link': stream_url}] for source in sources: stream_url = source['link'] host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) stream_url += '|User-Agent=%s' % (scraper_utils.get_ua()) direct = True else: host = HOST_SUB.get(source['host'].lower(), source['host']) quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) direct = False hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct} hosters.append(hoster) return hosters
def __get_king_links(self, iframe_url): hosters = [] match = re.search('v=(.*)', iframe_url) if match: data = {'ID': match.group(1)} headers = {'Referer': iframe_url} headers.update(XHR) xhr_url = iframe_url.split('?')[0] + '?p=GetVideoSources' html = self._http_get(xhr_url, data=data, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, xhr_url) try: for source in js_data['VideoSources']: stream_url = source['file'] + '|User-Agent=%s' % (scraper_utils.get_ua()) host = self._get_direct_hostname(source['file']) label = source.get('label', '') if host == 'gvideo': quality = scraper_utils.gv_get_quality(source['file']) elif label.isdigit(): quality = scraper_utils.height_get_quality(label) else: quality = QUALITY_MAP.get(label, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': 'Turkish Subtitles'} hosters.append(hoster) except: pass return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'}) if fragment: html = fragment[0].content links = scraper_utils.parse_sources_list(self, html) for link in links: stream_url = link if self.base_url in link: redir_url = self._http_get(link, headers={'Referer': url}, allow_redirect=False, method='HEAD') if redir_url.startswith('http'): stream_url = redir_url host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = links[link]['quality'] stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': url}) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True} hosters.append(source) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) match = re.search('var\s*video_id="([^"]+)', html) if match: video_id = match.group(1) url = urlparse.urljoin(self.base_url, VIDEO_URL) data = {'v': video_id} headers = XHR headers['Referer'] = page_url html = self._http_get(url, data=data, headers=headers, cache_limit=.5) sources = scraper_utils.parse_json(html, url) for source in sources: match = re.search('url=(.*)', sources[source]) if match: stream_url = urllib.unquote(match.group(1)) host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(source) stream_url += '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=1) match = re.search('"file"\s*:\s*"([^"]+)', html) if match: if video.video_type == VIDEO_TYPES.MOVIE: quality = QUALITIES.HD720 else: quality = QUALITIES.HIGH stream_url = match.group(1) + '|User-Agent=%s&Referer=%s' % ( scraper_utils.get_ua(), urllib.quote(url)) hoster = { 'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'url': stream_url, 'quality': quality, 'views': None, 'rating': None, 'direct': True } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, headers=XHR, cache_limit=.5) pattern = "source src='([^']+)'\s+type='video/([^']+)" quality = QUALITIES.HD720 if video.video_type == VIDEO_TYPES.MOVIE: match = re.search('<a\s+data-href="([^"]+)', html) if match: source_url = match.group(1) url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, headers=XHR, cache_limit=.5) for match in re.finditer(pattern, html): stream_url = match.group(1) stream_url = stream_url.replace('&', '&') stream_url = stream_url + '|User-Agent=%s' % ( scraper_utils.get_ua()) hoster = { 'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'url': stream_url, 'quality': quality, 'views': None, 'rating': None, 'direct': True } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) match = re.search('''<option[^>]+value\s*=\s*["']([^"']+)[^>]*>(?:Altyaz.{1,3}s.{1,3}z)<''', html) if match: option_url = urlparse.urljoin(self.base_url, match.group(1)) html = self._http_get(option_url, cache_limit=.25) fragment = dom_parser.parse_dom(html, 'span', {'class': 'object-wrapper'}) if fragment: iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src') if iframe_url: html = self._http_get(iframe_url[0], cache_limit=.25) seen_urls = {} for match in re.finditer('"?file"?\s*:\s*"([^"]+)"\s*,\s*"?label"?\s*:\s*"(\d+)p?[^"]*"', html): stream_url, height = match.groups() if stream_url not in seen_urls: seen_urls[stream_url] = True stream_url += '|User-Agent=%s' % (scraper_utils.get_ua()) host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(height) hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) for attrs, _content in dom_parser2.parse_dom(html, 'a', req='href'): stream_url = attrs['href'] if MOVIE_URL in stream_url: meta = scraper_utils.parse_movie_link(stream_url) stream_url = scraper_utils.pathify_url( stream_url) + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) quality = scraper_utils.height_get_quality(meta['height']) hoster = { 'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] norm_title = scraper_utils.normalize_title(video.title) if source_url and source_url != FORCE_NO_MATCH: source_url = urlparse.urljoin(self.base_url, source_url) for line in self._get_files(source_url, cache_limit=24): if not line['directory']: match = {} if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(line['link']) if norm_title in scraper_utils.normalize_title(meta['title']): match = line elif self.__episode_match(line, video): match = line meta = scraper_utils.parse_episode_link(line['link']) if match: if meta['dubbed']: continue stream_url = match['url'] + '|User-Agent=%s' % (scraper_utils.get_ua()) stream_url = stream_url.replace(self.base_url, '') quality = scraper_utils.height_get_quality(meta['height']) hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} if 'format' in meta: hoster['format'] = meta['format'] if 'size' in match: hoster['size'] = scraper_utils.format_size(int(match['size'])) hosters.append(hoster) return hosters
def __get_links(self, url): sources = [] match = re.search('src="([^"]+)', url) if match: url = match.group(1).replace('\\/', '/') html = self._http_get(url, cache_limit=0) match = re.search('<script\s+src="([^\']+)\'\+(\d+)\+\'([^\']+)', html) if match: page_url = ''.join(match.groups()) page_url += str(random.random()) html = self._http_get(page_url, cache_limit=0) for match in re.finditer('"?file"?\s*:\s*"([^"]+)"\s*,\s*"?label"?\s*:\s*"(\d+)p?"', html): stream_url, height = match.groups() stream_url = stream_url.replace('\\&', '&').replace('\\/', '/') if 'v.asp' in stream_url and 'ok.ru' not in url: stream_redirect = self._http_get(stream_url, allow_redirect=False, cache_limit=0) if stream_redirect: stream_url = stream_redirect if self._get_direct_hostname(stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(height) host = self._get_direct_hostname(stream_url) stream_url += '|User-Agent=%s&Referer=%s' % (scraper_utils.get_ua(), urllib.quote(url)) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} sources.append(hoster) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) sources = self.__get_posts(html) sources.update(self.__get_ajax(html, url)) sources.update(self.__get_embedded(html, url)) for source in sources: stream_url = source + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) host = scraper_utils.get_direct_hostname(self, source) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': sources[source], 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': 'Turkish subtitles' } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] norm_title = scraper_utils.normalize_title(video.title) if source_url and source_url != FORCE_NO_MATCH: source_url = urlparse.urljoin(self.base_url, source_url) for line in self.__get_files(source_url, cache_limit=24): if not line['directory']: match = {} if video.video_type == VIDEO_TYPES.MOVIE: match_title, _match_year, height, extra = scraper_utils.parse_movie_link(line['link']) if norm_title in scraper_utils.normalize_title(match_title): match = line else: _show_title, season, episode, height, extra = scraper_utils.parse_episode_link(line['link']) if int(video.season) == int(season) and int(video.episode) == int(episode): match = line if 'dubbed' in extra.lower(): continue if match: stream_url = match['url'] + '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': scraper_utils.height_get_quality(height), 'views': None, 'rating': None, 'url': stream_url, 'direct': True} if 'x265' in extra: hoster['format'] = 'x265' if 'size' in match: hoster['size'] = scraper_utils.format_size(int(match['size'])) hosters.append(hoster) return hosters
def __get_cloud_links(self, html, page_url, sub): hosters = [] html = html.replace('\\"', '"').replace('\\/', '/') match = re.search("dizi_kapak_getir\('([^']+)", html) if match: ep_id = match.group(1) for script_url in dom_parser.parse_dom(html, 'script', {'data-cfasync': 'false'}, ret='src'): html = self._http_get(script_url, cache_limit=24) match1 = re.search("var\s+kapak_url\s*=\s*'([^']+)", html) match2 = re.search("var\s+aCtkp\s*=\s*'([^']+)", html) if match1 and match2: link_url = '%s?fileid=%s&access_token=%s' % (match1.group(1), ep_id, match2.group(1)) headers = {'Referer': page_url} html = self._http_get(link_url, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, link_url) for variant in js_data.get('variants', {}): stream_host = random.choice(variant.get('hosts', [])) if stream_host: stream_url = STREAM_URL % (stream_host, variant['path'], scraper_utils.get_ua(), urllib.quote(page_url)) if not stream_url.startswith('http'): stream_url = 'http://' + stream_url host = self._get_direct_hostname(stream_url) if 'width' in variant: quality = scraper_utils.width_get_quality(variant['width']) elif 'height' in variant: quality = scraper_utils.height_get_quality(variant['height']) else: quality = QUALITIES.HIGH hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hoster['subs'] = sub hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if source_url and source_url != FORCE_NO_MATCH: html = self.__get_source_page(source_url) sources = {} page_url = urlparse.urljoin(self.base_url, source_url) for match in re.finditer('''loadEpisode\(\s*(\d+)\s*,\s*(\d+)\s*,\s*'([^']+)'\s*\).*?class="btn-eps[^>]*>([^<]+)''', html, re.DOTALL): link_type, link_id, hash_id, q_str = match.groups() pattern = 'Episode\s+%s(:|$| )' % (video.episode) if video.video_type == VIDEO_TYPES.EPISODE and not re.search(pattern, q_str): continue if link_type in ['12', '13', '14']: url = urlparse.urljoin(self.base_url, PLAYLIST_URL1 % (link_id)) sources.update(self.__get_link_from_json(url, q_str)) else: url = urlparse.urljoin(self.base_url, PLAYLIST_URL2 % (link_id, hash_id)) sources.update(self.__get_links_from_xml(url, video, page_url)) for source in sources: if not source.lower().startswith('http'): continue if sources[source]['direct']: host = self._get_direct_hostname(source) if host != 'gvideo': stream_url = source + '|User-Agent=%s&Referer=%s' % (scraper_utils.get_ua(), page_url) else: stream_url = source else: host = urlparse.urlparse(source).hostname stream_url = source hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': sources[source]['quality'], 'views': None, 'rating': None, 'url': stream_url, 'direct': sources[source]['direct']} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) page_quality = dom_parser.parse_dom(html, 'dd', {'class': 'status'}) if page_quality: page_quality = QUALITY_MAP.get(page_quality[0], QUALITIES.HIGH) else: page_quality = QUALITIES.HIGH sources = self.__get_gk_links(html, url, page_quality) for source in sources: host = self._get_direct_hostname(source) if host == 'gvideo': direct = True quality = sources[source] else: host = urlparse.urlparse(source).hostname quality = scraper_utils.get_quality(video, host, sources[source]) direct = False if host is not None: stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) if video.video_type == VIDEO_TYPES.EPISODE: gk_html = ''.join(match.group(0) for match in re.finditer('<a[^>]*>(%s|Server \d+)</a>' % (video.episode), html, re.I)) else: gk_html = html link_url = scraper_utils.urljoin(self.base_url, LINK_URL) player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL) sources = scraper_utils.get_gk_links(self, gk_html, page_url, QUALITIES.HIGH, link_url, player_url) sources.update(self.__get_ht_links(html, page_url)) for stream_url, quality in sources.iteritems(): host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': direct = True else: host = urlparse.urlparse(stream_url).hostname direct = False if host is None: continue stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) match = re.search('var\s*video_id="([^"]+)', html) if not match: return hosters video_id = match.group(1) data = {'v': video_id} headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(INFO_URL, data=data, headers=headers, cache_limit=.5) sources = scraper_utils.parse_json(html, INFO_URL) for source in sources: match = re.search('url=(.*)', sources[source]) if not match: continue stream_url = urllib.unquote(match.group(1)) host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(source) stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'}) if fragment: html = fragment[0].content iframe_url = dom_parser2.parse_dom(html, 'iframe', req='src') if not iframe_url: return hosters iframe_url = iframe_url[0].attrs['src'] if iframe_url.startswith('/'): iframe_url = scraper_utils.urljoin(self.base_url, iframe_url) html = self._http_get(iframe_url, headers={'Referer': page_url}, cache_limit=.5) obj = dom_parser2.parse_dom(html, 'object', req='data') if obj: streams = dict((stream_url, {'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True}) for stream_url in scraper_utils.parse_google(self, obj[0].attrs['data'])) else: streams = scraper_utils.parse_sources_list(self, html) for stream_url, values in streams.iteritems(): host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = values['quality'] stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url}) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True} hosters.append(source) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=1) match = re.search('''["']sources['"]\s*:\s*\[(.*?)\]''', html, re.DOTALL) if match: for match in re.finditer('''['"]*file['"]*\s*:\s*([^\(]+)''', match.group(1), re.DOTALL): stream_url = self.__decode(match.group(1), html) if stream_url: if video.video_type == VIDEO_TYPES.MOVIE: quality = QUALITIES.HD720 else: quality = QUALITIES.HIGH stream_url = stream_url + '|User-Agent=%s&Referer=%s&Cookie=%s' % ( scraper_utils.get_ua(), urllib.quote(url), self._get_stream_cookies()) hoster = { 'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'url': stream_url, 'quality': quality, 'views': None, 'rating': None, 'direct': True } hosters.append(hoster) return hosters
def __get_direct_links(self, html, page_url): hosters = [] match = re.search("&u=([^']+)", html) if match: u = match.group(1) fragment = dom_parser.parse_dom(html, 'ul', {'class': '[^"]*rektab[^"]*'}) if fragment: page = dom_parser.parse_dom(fragment[0], 'a', ret='id') if page: page = page[0] ss = dom_parser.parse_dom(fragment[0], 'a', {'id': page}, ret='class') for s in ss: yt_url = YT_URL % (page, s, u) url = urlparse.urljoin(self.base_url, yt_url) headers = {'Referer': page_url} html = self._http_get(url, headers=headers, cache_limit=.5) sources = self._parse_sources_list(html) for source in sources: host = self._get_direct_hostname(source) if sources[source]['quality']: quality = sources[source]['quality'] else: quality = QUALITIES.HIGH stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'div', {'class': '[^"]*screen[^"]*'}) if fragment: js_src = dom_parser.parse_dom(fragment[0], 'script', ret='src') if js_src: js_url = urlparse.urljoin(self.base_url, js_src[0]) html = self._http_get(js_url, cache_limit=.5) else: html = fragment[0] for match in re.finditer('<source[^>]+src="([^"]+)', html): stream_url = match.group(1) host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'blogspot' in stream_url: quality = scraper_utils.gv_get_quality(stream_url) else: _, _, height, _ = scraper_utils.parse_movie_link(stream_url) quality = scraper_utils.height_get_quality(height) stream_url += '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) # exit early if trailer if re.search('Şu an fragman*', html, re.I): return hosters match = re.search('''url\s*:\s*"([^"]+)"\s*,\s*data:\s*["'](id=\d+)''', html) if match: url, data = match.groups() url = urlparse.urljoin(self.base_url, url) result = self._http_get(url, data=data, headers=XHR, cache_limit=.5) for match in re.finditer('"videolink\d*"\s*:\s*"([^"]+)","videokalite\d*"\s*:\s*"?(\d+)p?', result): stream_url, height = match.groups() stream_url = stream_url.replace('\\/', '/') host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(height) stream_url += '|User-Agent=%s&Referer=%s' % (scraper_utils.get_ua(), urllib.quote(page_url)) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def __get_links(self, html): hosters = [] r = re.search('tlas\("([^"]+)', html) if r: plaintext = self.__caesar( self.__get_f(self.__caesar(r.group(1), 13)), 13) sources = scraper_utils.parse_sources_list(self, plaintext) for source in sources: stream_url = source + scraper_utils.append_headers( { 'User-Agent': scraper_utils.get_ua(), 'Cookie': self._get_stream_cookies() }) host = scraper_utils.get_direct_hostname(self, stream_url) hoster = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': sources[source]['quality'], 'rating': None, 'views': None, 'direct': True } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) sources = self.__get_gk_links(html, url) if not sources: sources = self.__get_gk_links2(html) sources.update(self.__get_iframe_links(html)) for source in sources: host = self._get_direct_hostname(source) if host == 'gvideo': direct = True quality = sources[source] stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua()) else: direct = False stream_url = source if self.base_url in source: host = sources[source] quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) else: host = urlparse.urlparse(source).hostname quality = sources[source] hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) fragment = dom_parser.parse_dom(html, 'ul', {'class': 'dropdown-menu'}) if fragment: match = re.search('''href=['"]([^'"]+)[^>]*>(?:Altyaz.{1,3}s.{1,3}z)<''', fragment[0]) if match: option_url = urlparse.urljoin(self.base_url, match.group(1)) html = self._http_get(option_url, cache_limit=2) fragment = dom_parser.parse_dom(html, 'div', {'class': 'video-player'}) if fragment: iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src') if iframe_url: html = self._http_get(iframe_url[0], cache_limit=.25) iframe_url = dom_parser.parse_dom(html, 'iframe', {'id': 'ifr'}, ret='src') if iframe_url: html = self._http_get(iframe_url[0], allow_redirect=False, method='HEAD', cache_limit=.25) if html.startswith('http'): stream_url = html host = urlparse.urlparse(stream_url).hostname stream_url += '|User-Agent=%s' % (scraper_utils.get_ua()) quality = QUALITIES.HIGH hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) sources = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) q_str = dom_parser.parse_dom(html, 'div', {'class': 'poster-qulabel'}) if q_str: q_str = q_str[0].replace(' ', '').upper() page_quality = Q_MAP.get(q_str, QUALITIES.HIGH) else: page_quality = QUALITIES.HIGH for fragment in dom_parser.parse_dom(html, 'div', {'class': 'tab_box'}): match = re.search('file\s*:\s*"([^"]+)', fragment) if match: stream_url = match.group(1) else: stream_url = self.__get_ajax_sources(fragment, page_url) if stream_url: host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = page_quality stream_url += '|User-Agent=%s&Referer=%s' % (scraper_utils.get_ua(), urllib.quote(page_url)) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True} sources.append(source) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'ul', {'class': 'css_server_new'}) if fragment: for match in re.finditer('href="([^"]+)[^>]*>(.*?)(?:-\d+)?</a>', fragment[0]): url, host = match.groups() host = host.lower() host = re.sub('<img.*?/>', '', host) host = HOSTS.get(host, host) log_utils.log('%s - %s' % (url, host)) if host in GVIDEO_NAMES: sources = self.__get_links(urlparse.urljoin(self.base_url, url)) direct = True else: sources = {url: host} direct = False for source in sources: if self._get_direct_hostname(source) == 'gvideo': quality = scraper_utils.gv_get_quality(source) source = source + '|User-Agent=%s' % (scraper_utils.get_ua()) else: quality = scraper_utils.get_quality(video, source, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': sources[source], 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=1) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'player'}) if not fragment: return hosters iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src') if not iframe_url: return hosters html = self._http_get(iframe_url[0].attrs['src'], cache_limit=.25) sources.append(self.__get_embedded_sources(html)) sources.append(self.__get_linked_sources(html)) for source in sources: for stream_url in source['sources']: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) quality = scraper_utils.gv_get_quality(stream_url) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hoster['subs'] = source.get('subs', True) hosters.append(hoster) return hosters
def get_sources(self, video): sources = [] source_url = self.get_url(video) if source_url and source_url != FORCE_NO_MATCH: if video.video_type == VIDEO_TYPES.MOVIE: streams = self.__get_movie_sources(source_url) else: streams = self.__get_episode_sources(source_url) for stream_url in streams: stream_url2 = stream_url + '|User-Agent=%s' % ( scraper_utils.get_ua()) host = self._get_direct_hostname(stream_url) source = { 'multi-part': False, 'url': stream_url2, 'host': host, 'class': self, 'quality': streams[stream_url], 'views': None, 'rating': None, 'direct': True } sources.append(source) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: html = self.__get_embedded_page(source_url) fragment = dom_parser.parse_dom(html, 'div', {'class': '[^"]*download[^"]*'}) if fragment: page_url = urlparse.urljoin(self.base_url, source_url) for match in re.finditer('href="([^"]+)[^>]+>([^<]+)', fragment[0]): stream_url, label = match.groups() quality = scraper_utils.height_get_quality(label) stream_url += '|User-Agent=%s&Referer=%s' % ( scraper_utils.get_ua(), urllib.quote(page_url)) hoster = { 'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=0) match = re.search('var\s*video_id\s*=\s*"([^"]+)', html) if not match: return hosters video_id = match.group(1) headers = {'Referer': page_url} headers.update(XHR) _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'), headers=headers, method='POST', cache_limit=0) vid_url = scraper_utils.urljoin(self.base_url, VIDEO_URL) html = self._http_get(vid_url, data={'v': video_id}, headers=headers, cache_limit=0) for source, value in scraper_utils.parse_json(html, vid_url).iteritems(): match = re.search('url=(.*)', value) if not match: continue stream_url = urllib.unquote(match.group(1)) host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(source) stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] norm_title = scraper_utils.normalize_title(video.title) if source_url and source_url != FORCE_NO_MATCH: source_url = urlparse.urljoin(self.base_url2, source_url) for line in self._get_files(source_url, cache_limit=24): if not line['directory']: match = {} if video.video_type == VIDEO_TYPES.MOVIE: match_title, _match_year, height, extra = scraper_utils.parse_movie_link(line['link']) if norm_title in scraper_utils.normalize_title(match_title): match = line else: _show_title, season, episode, height, extra = scraper_utils.parse_episode_link(line['link']) if int(video.season) == int(season) and int(video.episode) == int(episode): match = line if 'dubbed' in extra.lower(): continue if match: stream_url = match['url'] + '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': scraper_utils.height_get_quality(height), 'views': None, 'rating': None, 'url': stream_url, 'direct': True} if 'x265' in extra: hoster['format'] = 'x265' if 'size' in match: hoster['size'] = scraper_utils.format_size(int(match['size'])) hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) sources.update(self.__get_gk_links(html, page_url)) sources.update(self.__get_iframe_links(html, page_url)) for source in sources: host = self._get_direct_hostname(source) stream_url = source + '|User-Agent=%s' % ( scraper_utils.get_ua()) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': sources[source], 'views': None, 'rating': None, 'url': stream_url, 'direct': True } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'div', {'class': 'meta-media'}) if fragment: iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src') if iframe_url: iframe_url = urlparse.urljoin(self.base_url, iframe_url[0]) html = self._http_get(iframe_url, cache_limit=.5) for match in re.finditer('window.location.href\s*=\s*"([^"]+)', html): stream_url = match.group(1) host = self._get_direct_hostname(stream_url) if host == 'gvideo': sources[stream_url] = scraper_utils.gv_get_quality(stream_url) else: sources[source_url] = QUALITIES.HIGH for source in sources: host = self._get_direct_hostname(stream_url) stream_url += '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': sources[source], 'views': None, 'rating': None, 'url': source, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'div', {'class': '[^"]*screen[^"]*'}) if fragment: js_src = dom_parser.parse_dom(fragment[0], 'script', ret='src') if js_src: js_url = urlparse.urljoin(self.base_url, js_src[0]) html = self._http_get(js_url, cache_limit=.5) else: html = fragment[0] for match in re.finditer('<source[^>]+src="([^"]+)', html): stream_url = match.group(1) host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: _, _, height, _ = scraper_utils.parse_movie_link(stream_url) quality = scraper_utils.height_get_quality(height) stream_url += '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) views = None match = re.search('<li>\s*Views\s*:\s*(.*?)</li>', html) if match: views = re.sub('[^0-9]', '', match.group(1)) hosts = dom_parser.parse_dom(html, 'p', {'class': 'server_servername'}) links = dom_parser.parse_dom(html, 'p', {'class': 'server_play'}) for item in zip(hosts, links): host, link_text = item host = host.lower().replace('server', '').strip() match = re.search('href="([^"]+)', link_text) if match: link = match.group(1) if 'google' in host: sources = self.__get_google_links(link) for source in sources: source += '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'url': source, 'class': self, 'quality': scraper_utils.gv_get_quality(source), 'host': self._get_direct_hostname(source), 'rating': None, 'views': views, 'direct': True} hosters.append(hoster) else: hoster = {'multi-part': False, 'url': link, 'class': self, 'quality': scraper_utils.get_quality(video, host, QUALITIES.HIGH), 'host': host, 'rating': None, 'views': views, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters js_url = scraper_utils.urljoin(self.base_url, '/javascript/movies.js') html = self._http_get(js_url, cache_limit=48) if source_url.startswith('/'): source_url = source_url[1:] pattern = '''getElementById\(\s*"%s".*?play\(\s*'([^']+)''' % (source_url) match = re.search(pattern, html, re.I) if match: stream_url = match.group(1) if 'drive.google' in stream_url or 'docs.google' in stream_url: sources = scraper_utils.parse_google(self, stream_url) else: sources = [stream_url] for source in sources: stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) host = scraper_utils.get_direct_hostname(self, source) if host == 'gvideo': quality = scraper_utils.gv_get_quality(source) direct = True elif 'youtube' in stream_url: quality = QUALITIES.HD720 direct = False host = 'youtube.com' else: quality = QUALITIES.HIGH direct = True hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) match = re.search('<b>Views:.*?([\d,]+)', html) if match: views = int(match.group(1).replace(',', '')) else: views = None button = dom_parser.parse_dom(html, 'a', {'class': '[^"]*btn_watch_detail[^"]*'}, ret='href') if button: html = self._http_get(button[0], cache_limit=.5) for match in re.finditer('<span class="svname">\s*(.*?)\s*:?\s*</span>(.*?)(?=<span class="svname">|</div>)', html): title, fragment = match.groups() for match in re.finditer('<a[^>]+id="ep_\d+"[^>]+href="([^"]+)[^>]+>\s*([^<]+)', fragment): stream_url, name = match.groups() match = re.search('(\d+)', name) if match: quality = scraper_utils.height_get_quality(match.group(1)) else: quality = QUALITIES.HIGH stream_url += '|User-Agent=%s&Referer=%s&Cookie=%s' % (scraper_utils.get_ua(), url, self._get_stream_cookies()) hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': True} hoster['title'] = title hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) match = re.search('<b>Views:.*?([\d,]+)', html) if match: views = int(match.group(1).replace(',', '')) else: views = None html = self.__get_watch_now(html) for match in re.finditer('<span class="svname">\s*(.*?)\s*:?\s*</span>(.*?)(?=<span class="svname">|</div>)', html): title, fragment = match.groups() for match in re.finditer('<a[^>]+id="ep_\d+"[^>]+href="([^"]+)[^>]+>\s*([^<]+)', fragment): stream_url, name = match.groups() match = re.search('(\d+)', name) if video.video_type == VIDEO_TYPES.MOVIE: if match: quality = scraper_utils.height_get_quality(match.group(1)) else: quality = QUALITIES.HIGH else: if not match or int(name) != int(video.episode): continue quality = QUALITIES.HIGH stream_url += '|User-Agent=%s&Referer=%s&Cookie=%s' % (scraper_utils.get_ua(), url, self._get_stream_cookies()) hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': True} hoster['title'] = title hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'div', {'class': 'film-container'}) if fragment: iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src') if iframe_url: iframe_url = urlparse.urljoin(self.base_url, iframe_url[0]) headers = {'Referer': page_url} html = self._http_get(iframe_url, headers=headers, cache_limit=.5) sources = self._parse_sources_list(html) for source in sources: quality = sources[source]['quality'] host = self._get_direct_hostname(source) stream_url = source + '|User-Agent=%s&Referer=%s' % (scraper_utils.get_ua(), urllib.quote(iframe_url)) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} match = re.search('(\d+[a-z]bps)', source) if match: hoster['extra'] = match.group(1) hosters.append(hoster) hosters.sort(key=lambda x: x.get('extra', ''), reverse=True) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) match = re.search('var\s+view_id\s*=\s*"([^"]+)', html) if match: view_data = {'id': match.group(1), 'tip': 'view', 'dil': 'or'} html = self._http_get(self.ajax_url, data=view_data, headers=XHR, cache_limit=.25) html = html.strip() html = re.sub(r'\\n|\\t', '', html) match = re.search('var\s+sources\s*=\s*(\[.*?\])', html) if match: raw_data = match.group(1) raw_data = raw_data.replace('\\', '') else: raw_data = html js_data = scraper_utils.parse_json(raw_data, self.ajax_url) if 'data' in js_data: src = dom_parser.parse_dom(js_data['data'], 'iframe', ret='src') if src: html = self._http_get(src[0], cache_limit=.25) match = re.search('url=([^"]+)', html) if match: stream_url = match.group(1).replace('>', '') sources.append({'label': '720p', 'file': stream_url}) direct = False else: src = dom_parser.parse_dom(html, 'iframe', ret='src') if src: sources.append({'label': '720p', 'file': src[0]}) direct = False else: for match in re.finditer('"file"\s*:\s*"([^"]+)"\s*,\s*"label"\s*:\s*"([^"]+)', html): sources.append({'label': match.group(2), 'file': match.group(1)}) direct = True else: sources = js_data direct = True for source in sources: stream_url = source['file'] + '|User-Agent=%s' % (scraper_utils.get_ua()) if direct: host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(source['label']) else: host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.height_get_quality(source['label']) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) sources = {} hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for match in re.finditer("embeds\[(\d+)\]\s*=\s*'([^']+)", html): match = re.search('src="([^"]+)', match.group(2)) if match: iframe_url = match.group(1) if 'play-en.php' in iframe_url: match = re.search('id=([^"&]+)', iframe_url) if match: proxy_link = match.group(1) proxy_link = proxy_link.split('*', 1)[-1] picasa_url = scraper_utils.gk_decrypt( self.get_name(), GK_KEY, proxy_link) for stream_url in self._parse_google(picasa_url): sources[stream_url] = { 'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True } else: html = self._http_get(iframe_url, cache_limit=0) temp_sources = self._parse_sources_list(html) for source in temp_sources: if 'download.php' in source: redir_html = self._http_get( source, allow_redirect=False, method='HEAD', cache_limit=0) if redir_html.startswith('http'): temp_sources[redir_html] = temp_sources[ source] del temp_sources[source] sources.update(temp_sources) for source in sources: host = self._get_direct_hostname(source) stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua()) quality = QUALITY_MAP.get(sources[source]['quality'], QUALITIES.HIGH) hoster = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) sources = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) if video.video_type == VIDEO_TYPES.MOVIE: fragment = dom_parser.parse_dom(html, 'div', {'class': 'poster'}) if fragment: movie_url = dom_parser.parse_dom(fragment[0], 'a', ret='href') if movie_url: url = urlparse.urljoin(self.base_url, movie_url[0]) html = self._http_get(url, cache_limit=.5) episodes = self.__get_episodes(html) url = self.__get_best_page(episodes) if not url: return sources else: url = urlparse.urljoin(self.base_url, url) html = self._http_get(url, cache_limit=.5) for match in re.finditer('''<source[^>]+src=['"]([^'"]+)([^>]+)''', html): stream_url, extra = match.groups() if 'video.php' in stream_url: redir_url = self._http_get(stream_url, allow_redirect=False, method='HEAD', cache_limit=.25) if redir_url.startswith('http'): stream_url = redir_url host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: match = re.search('''data-res\s*=\s*["']([^"']+)''', extra) if match: height = re.sub('(hd|px)', '', match.group(1)) quality = scraper_utils.height_get_quality(height) else: quality = QUALITIES.HIGH stream_url += '|User-Agent=%s' % (scraper_utils.get_ua()) source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True } sources.append(source) return sources
def _get_links(self, html): hosters = [] for match in re.finditer('file\s*:\s*"([^"]+).*?label\s*:\s*"([^"]+)', html): url, resolution = match.groups() url += '|User-Agent=%s&Cookie=%s' % (scraper_utils.get_ua(), self._get_stream_cookies()) hoster = {'multi-part': False, 'url': url, 'host': self._get_direct_hostname(url), 'class': self, 'quality': scraper_utils.height_get_quality(resolution), 'rating': None, 'views': None, 'direct': True} hosters.append(hoster) return hosters
def resolve_link(self, link): url = urlparse.urljoin(self.base_url, link) request = urllib2.Request(url) request.add_header('User-Agent', scraper_utils.get_ua()) request.add_unredirected_header('Host', request.get_host()) request.add_unredirected_header('Referer', url) response = urllib2.urlopen(request) return response.geturl()
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) headers = {'Referer': page_url} html = self._http_get(page_url, headers=headers, cache_limit=.5) if video.video_type == VIDEO_TYPES.MOVIE: fragment = dom_parser2.parse_dom(html, 'div', {'class': 'poster'}) if fragment: movie_url = dom_parser2.parse_dom(fragment[0].content, 'a', req='href') if movie_url: page_url = scraper_utils.urljoin(self.base_url, movie_url[0].attrs['href']) html = self._http_get(page_url, cache_limit=.5) episodes = self.__get_episodes(html) page_url = self.__get_best_page(episodes) if not page_url: return hosters else: page_url = scraper_utils.urljoin(self.base_url, page_url) html = self._http_get(page_url, cache_limit=.5) streams = dom_parser2.parse_dom(html, 'iframe', req='src') if streams: streams = [(attrs['src'], 480) for attrs, _content in streams] direct = False else: streams = [(attrs['src'], attrs.get('data-res', 480)) for attrs, _content in dom_parser2.parse_dom(html, 'source', req=['src'])] direct = True headers = {'User-Agent': scraper_utils.get_ua(), 'Referer': page_url} for stream_url, height in streams: if 'video.php' in stream_url or 'moviexk.php' in stream_url: if 'title=' in stream_url: title = stream_url.split('title=')[-1] stream_url = stream_url.replace(title, urllib.quote(title)) redir_url = self._http_get(stream_url, headers=headers, allow_redirect=False, method='HEAD', cache_limit=0) if redir_url.startswith('http'): redir_url = redir_url.replace(' ', '').split(';codec')[0] stream_url = redir_url else: continue if direct: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(height) stream_url += scraper_utils.append_headers(headers) else: host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.height_get_quality(height) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct} hosters.append(source) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=2) fragment = dom_parser.parse_dom(html, 'div', {'id': 'embed'}) if fragment: iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src') if iframe_url: html = self._http_get(iframe_url[0], cache_limit=.25) seen_urls = {} # if captions exist, then they aren't hardcoded if re.search('kind\s*:\s*"captions"', html): subs = False else: subs = True for match in re.finditer( '"?file"?\s*:\s*"([^"]+)"\s*,\s*"?label"?\s*:\s*"(\d+)p?[^"]*"', html): stream_url, height = match.groups() if stream_url not in seen_urls: seen_urls[stream_url] = True if 'v.asp' in stream_url: stream_redirect = self._http_get( stream_url, allow_redirect=False, method='HEAD', cache_limit=0) if stream_redirect: stream_url = stream_redirect stream_url += '|User-Agent=%s' % ( scraper_utils.get_ua()) host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality( stream_url) else: quality = scraper_utils.height_get_quality( height) hoster = { 'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': subs } hosters.append(hoster) return hosters