def __create_source(self, stream_url, height, page_url, subs=False, direct=True): if direct: stream_url = stream_url.replace('\\/', '/') if self.get_name().lower() in stream_url: headers = {'Referer': page_url} redir_url = self._http_get(stream_url, headers=headers, method='HEAD', allow_redirect=False, cache_limit=.25) if redir_url.startswith('http'): stream_url = redir_url stream_url += scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) else: stream_url += scraper_utils.append_headers({ 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url, 'Cookie': self._get_stream_cookies() }) else: stream_url += scraper_utils.append_headers({ 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url }) host = scraper_utils.get_direct_hostname(self, stream_url) else: host = urlparse.urlparse(stream_url).hostname if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(height) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct } if subs: hoster['subs'] = 'Turkish Subtitles' return hoster
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters headers = { 'User-Agent': scraper_utils.get_ua(), 'Referer': self.base_url + source_url } if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(source_url) stream_url = source_url + scraper_utils.append_headers(headers) quality = scraper_utils.height_get_quality(meta['height']) hoster = { 'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) else: for episode in self.__match_episode(source_url, video): meta = scraper_utils.parse_episode_link(episode['title']) stream_url = episode['url'] + scraper_utils.append_headers( headers) stream_url = stream_url.replace(self.base_url, '') quality = scraper_utils.height_get_quality(meta['height']) hoster = { 'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } if 'format' in meta: hoster['format'] = meta['format'] if 'size' in episode: hoster['size'] = scraper_utils.format_size( int(episode['size'])) hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) iframe_url = dom_parser2.parse_dom(html, 'iframe', {'id': 'myiframe'}, req='src', exclude_comments=True) if not iframe_url: return hosters iframe_url = iframe_url[0].attrs['src'] html = self._http_get(iframe_url, headers={'Referer': page_url}, cache_limit=.5) for source in dom_parser2.parse_dom(html, 'source', {'type': 'video/mp4'}, req=['src', 'data-res']): stream_url = source.attrs['src'] host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) stream_url += scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) else: quality = scraper_utils.height_get_quality( source.attrs['data-res']) stream_url += scraper_utils.append_headers({ 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url }) source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True } hosters.append(source) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=0) match = re.search('var\s*video_id\s*=\s*"([^"]+)', html) if not match: return hosters video_id = match.group(1) headers = {'Referer': page_url} headers.update(XHR) _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'), headers=headers, method='POST', cache_limit=0) vid_url = scraper_utils.urljoin(self.base_url, VIDEO_URL) html = self._http_get(vid_url, data={'v': video_id}, headers=headers, cache_limit=0) for source, value in scraper_utils.parse_json(html, vid_url).iteritems(): match = re.search('url=(.*)', value) if not match: continue stream_url = urllib.unquote(match.group(1)) host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(source) stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] sources = {} source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) for match in re.finditer('player-data="([^"]+)[^>]+episode-data="([^"]+)[^>]*>(.*?)</a>', html, re.DOTALL): player_url, ep_id, label = match.groups() if video.video_type == VIDEO_TYPES.EPISODE and not self.__episode_match(video, ep_id): continue label = label.strip() headers = {'Referer': page_url} if re.match('https?://embed', player_url): src_html = self._http_get(player_url, headers=headers, cache_limit=.5) sources.update(scraper_utils.parse_sources_list(self, src_html)) sources.update(self.__get_sources(src_html, label)) else: sources[player_url] = {'direct': False, 'quality': Q_MAP.get(label.upper(), QUALITIES.HIGH)} for source, value in sources.iteritems(): direct = value['direct'] quality = value['quality'] if direct: host = scraper_utils.get_direct_hostname(self, source) stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) else: host = urlparse.urlparse(source).hostname stream_url = source hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'}) if fragment: html = fragment[0].content links = scraper_utils.parse_sources_list(self, html) for link in links: stream_url = link if self.base_url in link: redir_url = self._http_get(link, headers={'Referer': url}, allow_redirect=False, method='HEAD') if redir_url.startswith('http'): stream_url = redir_url host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = links[link]['quality'] stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': url}) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True} hosters.append(source) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'movieplay'}): iframe_src = dom_parser2.parse_dom(fragment, 'iframe', req='src') if iframe_src: iframe_src = iframe_src[0].attrs['src'] if re.search('o(pen)?load', iframe_src, re.I): meta = scraper_utils.parse_movie_link(iframe_src) quality = scraper_utils.height_get_quality(meta['height']) links = {iframe_src: {'quality': quality, 'direct': False}} else: links = self.__get_links(iframe_src, url) for link in links: direct = links[link]['direct'] quality = links[link]['quality'] if direct: host = scraper_utils.get_direct_hostname(self, link) if host == 'gvideo': quality = scraper_utils.gv_get_quality(link) stream_url = link + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': url}) else: host = urlparse.urlparse(link).hostname stream_url = link source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct} hosters.append(source) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) match = re.search('var\s*video_id="([^"]+)', html) if not match: return hosters video_id = match.group(1) data = {'v': video_id} headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(INFO_URL, data=data, headers=headers, cache_limit=.5) sources = scraper_utils.parse_json(html, INFO_URL) for source in sources: match = re.search('url=(.*)', sources[source]) if not match: continue stream_url = urllib.unquote(match.group(1)) host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(source) stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'}) if fragment: html = fragment[0].content iframe_url = dom_parser2.parse_dom(html, 'iframe', req='src') if not iframe_url: return hosters iframe_url = iframe_url[0].attrs['src'] if iframe_url.startswith('/'): iframe_url = scraper_utils.urljoin(self.base_url, iframe_url) html = self._http_get(iframe_url, headers={'Referer': page_url}, cache_limit=.5) obj = dom_parser2.parse_dom(html, 'object', req='data') if obj: streams = dict((stream_url, {'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True}) for stream_url in scraper_utils.parse_google(self, obj[0].attrs['data'])) else: streams = scraper_utils.parse_sources_list(self, html) for stream_url, values in streams.iteritems(): host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = values['quality'] stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url}) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True} hosters.append(source) return hosters
def __get_links(self, html): hosters = [] r = re.search('tlas\("([^"]+)', html) if r: plaintext = self.__caesar( self.__get_f(self.__caesar(r.group(1), 13)), 13) sources = scraper_utils.parse_sources_list(self, plaintext) for source in sources: stream_url = source + scraper_utils.append_headers( { 'User-Agent': scraper_utils.get_ua(), 'Cookie': self._get_stream_cookies() }) host = scraper_utils.get_direct_hostname(self, stream_url) hoster = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': sources[source]['quality'], 'rating': None, 'views': None, 'direct': True } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'film-container'}) if fragment: iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src') if iframe_url: iframe_url = scraper_utils.urljoin(self.base_url, iframe_url[0].attrs['src']) headers = {'Referer': page_url} html = self._http_get(iframe_url, headers=headers, cache_limit=.5) sources = scraper_utils.parse_sources_list(self, html) for source in sources: quality = sources[source]['quality'] host = scraper_utils.get_direct_hostname(self, source) stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': iframe_url}) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} match = re.search('(\d+[a-z]bps)', source) if match: hoster['extra'] = match.group(1) hosters.append(hoster) hosters.sort(key=lambda x: x.get('extra', ''), reverse=True) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) for attrs, _content in dom_parser2.parse_dom(html, 'a', req='href'): stream_url = attrs['href'] if MOVIE_URL in stream_url: meta = scraper_utils.parse_movie_link(stream_url) stream_url = scraper_utils.pathify_url( stream_url) + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) quality = scraper_utils.height_get_quality(meta['height']) hoster = { 'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) page_quality = QUALITIES.HD720 if video.video_type == VIDEO_TYPES.MOVIE else QUALITIES.HIGH for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'embed-responsive'}): iframe_url = dom_parser2.parse_dom(fragment, 'iframe', req='data-src') if iframe_url: iframe_url = iframe_url[0].attrs['data-src'] iframe_host = urlparse.urlparse(iframe_url).hostname if iframe_host in DIRECT_HOSTS: sources = self.__parse_streams(iframe_url, url) else: sources = {iframe_url: {'quality': scraper_utils.get_quality(video, iframe_host, page_quality), 'direct': False}} for source in sources: quality = sources[source]['quality'] direct = sources[source]['direct'] if direct: host = scraper_utils.get_direct_hostname(self, source) stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) else: host = urlparse.urlparse(source).hostname stream_url = source hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) sources = self.__get_posts(html) sources.update(self.__get_ajax(html, url)) sources.update(self.__get_embedded(html, url)) for source in sources: stream_url = source + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) host = scraper_utils.get_direct_hostname(self, source) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': sources[source], 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': 'Turkish subtitles' } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] sources = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) match = re.search('var\s+view_id\s*=\s*"([^"]+)', html) if not match: return hosters view_id = match.group(1) for lang in ['or', 'tr']: subs = True if lang == 'tr' else False view_data = {'id': view_id, 'tip': 'view', 'dil': lang} html = self._http_get(self.ajax_url, data=view_data, headers=XHR, cache_limit=.25) html = html.strip() html = re.sub(r'\\n|\\t', '', html) match = re.search('var\s+sources\s*=\s*(\[.*?\])', html) if match: raw_data = match.group(1) raw_data = raw_data.replace('\\', '') else: raw_data = html js_data = scraper_utils.parse_json(raw_data, self.ajax_url) if 'data' not in js_data: continue src = dom_parser2.parse_dom(js_data['data'], 'iframe', req='src') if not src: continue html = self._http_get(src[0].attrs['src'], cache_limit=.25) for attrs, _content in dom_parser2.parse_dom(html, 'iframe', req='src'): src = attrs['src'] if not src.startswith('http'): continue sources.append({'label': '720p', 'file': src, 'direct': False, 'subs': subs}) sources += [{'file': url, 'subs': subs} for url in scraper_utils.parse_sources_list(self, html).iterkeys()] if sources: break for source in sources: direct = source.get('direct', True) stream_url = source['file'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) if direct: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: continue else: host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.height_get_quality(source['label']) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} if source.get('subs'): hoster['subs'] = 'Turkish Subtitles' hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters js_url = scraper_utils.urljoin(self.base_url, '/javascript/movies.js') html = self._http_get(js_url, cache_limit=48) if source_url.startswith('/'): source_url = source_url[1:] pattern = '''getElementById\(\s*"%s".*?play\(\s*'([^']+)''' % (source_url) match = re.search(pattern, html, re.I) if match: stream_url = match.group(1) if 'drive.google' in stream_url or 'docs.google' in stream_url: sources = scraper_utils.parse_google(self, stream_url) else: sources = [stream_url] for source in sources: stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) host = scraper_utils.get_direct_hostname(self, source) if host == 'gvideo': quality = scraper_utils.gv_get_quality(source) direct = True elif 'youtube' in stream_url: quality = QUALITIES.HD720 direct = False host = 'youtube.com' else: quality = QUALITIES.HIGH direct = True hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) if video.video_type == VIDEO_TYPES.EPISODE: gk_html = ''.join(match.group(0) for match in re.finditer('<a[^>]*>(%s|Server \d+)</a>' % (video.episode), html, re.I)) else: gk_html = html link_url = scraper_utils.urljoin(self.base_url, LINK_URL) player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL) sources = scraper_utils.get_gk_links(self, gk_html, page_url, QUALITIES.HIGH, link_url, player_url) sources.update(self.__get_ht_links(html, page_url)) for stream_url, quality in sources.iteritems(): host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': direct = True else: host = urlparse.urlparse(stream_url).hostname direct = False if host is None: continue stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} hosters.append(hoster) return hosters
def __get_king_links(self, iframe_url): hosters = [] match = re.search('v=(.*)', iframe_url) if match: data = {'ID': match.group(1)} headers = {'Referer': iframe_url} headers.update(XHR) xhr_url = iframe_url.split('?')[0] html = self._http_get(xhr_url, params={'p': 'GetVideoSources'}, data=data, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, xhr_url) try: for source in js_data['VideoSources']: stream_url = source['file'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) host = scraper_utils.get_direct_hostname(self, source['file']) label = source.get('label', '') if host == 'gvideo': quality = scraper_utils.gv_get_quality(source['file']) elif re.search('\d+p?', label): quality = scraper_utils.height_get_quality(label) else: quality = QUALITY_MAP.get(label, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': 'Turkish Subtitles'} hosters.append(hoster) except: pass return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=1) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'player'}) if not fragment: return hosters iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src') if not iframe_url: return hosters html = self._http_get(iframe_url[0].attrs['src'], cache_limit=.25) sources.append(self.__get_embedded_sources(html)) sources.append(self.__get_linked_sources(html)) for source in sources: for stream_url in source['sources']: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) quality = scraper_utils.gv_get_quality(stream_url) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hoster['subs'] = source.get('subs', True) hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) headers = {'Referer': page_url} html = self._http_get(page_url, headers=headers, cache_limit=.5) if video.video_type == VIDEO_TYPES.MOVIE: fragment = dom_parser2.parse_dom(html, 'div', {'class': 'poster'}) if fragment: movie_url = dom_parser2.parse_dom(fragment[0].content, 'a', req='href') if movie_url: page_url = scraper_utils.urljoin(self.base_url, movie_url[0].attrs['href']) html = self._http_get(page_url, cache_limit=.5) episodes = self.__get_episodes(html) page_url = self.__get_best_page(episodes) if not page_url: return hosters else: page_url = scraper_utils.urljoin(self.base_url, page_url) html = self._http_get(page_url, cache_limit=.5) streams = dom_parser2.parse_dom(html, 'iframe', req='src') if streams: streams = [(attrs['src'], 480) for attrs, _content in streams] direct = False else: streams = [(attrs['src'], attrs.get('data-res', 480)) for attrs, _content in dom_parser2.parse_dom(html, 'source', req=['src'])] direct = True headers = {'User-Agent': scraper_utils.get_ua(), 'Referer': page_url} for stream_url, height in streams: if 'video.php' in stream_url or 'moviexk.php' in stream_url: if 'title=' in stream_url: title = stream_url.split('title=')[-1] stream_url = stream_url.replace(title, urllib.quote(title)) redir_url = self._http_get(stream_url, headers=headers, allow_redirect=False, method='HEAD', cache_limit=0) if redir_url.startswith('http'): redir_url = redir_url.replace(' ', '').split(';codec')[0] stream_url = redir_url else: continue if direct: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(height) stream_url += scraper_utils.append_headers(headers) else: host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.height_get_quality(height) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct} hosters.append(source) return hosters
def __get_embed_links(self, html): hosters = [] sources = scraper_utils.parse_sources_list(self, html) for source in sources: quality = source['quality'] stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) hoster = {'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, source), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': 'Turkish Subtitles'} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} headers = {'Accept-Language': 'en-US,en;q=0.5'} if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, headers=headers, cache_limit=2) if video.video_type == VIDEO_TYPES.MOVIE: sources.update(self.__scrape_sources(html, page_url)) pages = set([ r.attrs['href'] for r in dom_parser2.parse_dom( html, 'a', {'class': 'btn-eps'}, req='href') ]) active = set([ r.attrs['href'] for r in dom_parser2.parse_dom( html, 'a', {'class': 'active'}, req='href') ]) for page in list(pages - active): page_url = scraper_utils.urljoin(self.base_url, page) html = self._http_get(page_url, headers=headers, cache_limit=2) sources.update(self.__scrape_sources(html, page_url)) else: for page in self.__match_episode(video, html): page_url = scraper_utils.urljoin(self.base_url, page) html = self._http_get(page_url, headers=headers, cache_limit=2) sources.update(self.__scrape_sources(html, page_url)) for source, values in sources.iteritems(): if not source.lower().startswith('http'): continue if values['direct']: host = scraper_utils.get_direct_hostname(self, source) if host != 'gvideo': stream_url = source + scraper_utils.append_headers( { 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url }) else: stream_url = source else: host = urlparse.urlparse(source).hostname stream_url = source hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': values['quality'], 'views': None, 'rating': None, 'url': stream_url, 'direct': values['direct'] } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) views = None fragment = dom_parser2.parse_dom(html, 'span', {'class': 'post-views'}) if fragment: views = re.sub('[^\d]', '', fragment[0].content) iframe_urls = [] if video.video_type == VIDEO_TYPES.MOVIE: iframe_urls = [ r.attrs['href'] for r in dom_parser2.parse_dom( html, 'a', {'class': ['orange', 'abutton']}, req='href') ] else: for label, link in self.__get_episode_links(html): if int(label) == int(video.episode): iframe_urls.append(link) for iframe_url in iframe_urls: headers = {'Referer': url} html = self._http_get(iframe_url, headers=headers, cache_limit=.5) match = re.search('{link\s*:\s*"([^"]+)', html) if match: sources = self.__get_gk_links(match.group(1), iframe_url) else: sources = scraper_utils.parse_sources_list(self, html) for source in sources: stream_url = source + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) direct = sources[source]['direct'] quality = sources[source]['quality'] if sources[source]['direct']: host = scraper_utils.get_direct_hostname(self, source) else: host = urlparse.urlparse(source).hostname hoster = { 'multi-part': False, 'url': stream_url, 'class': self, 'quality': quality, 'host': host, 'rating': None, 'views': views, 'direct': direct } hosters.append(hoster) return hosters
def __get_cloud_links(self, html, page_url, sub): hosters = [] html = html.replace('\\"', '"').replace('\\/', '/') match = re.search("dizi_kapak_getir\('([^']+)", html) if match: ep_id = match.group(1) for attrs, _content in dom_parser2.parse_dom( html, 'script', {'data-cfasync': 'false'}, req='src'): script_url = attrs['src'] html = self._http_get(script_url, cache_limit=24) match1 = re.search("var\s+kapak_url\s*=\s*'([^']+)", html) match2 = re.search("var\s+aCtkp\s*=\s*'([^']+)", html) if match1 and match2: link_url = '%s?fileid=%s&access_token=%s' % ( match1.group(1), ep_id, match2.group(1)) headers = {'Referer': page_url} html = self._http_get(link_url, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, link_url) for variant in js_data.get('variants', {}): stream_host = random.choice(variant.get('hosts', [])) if stream_host: stream_url = stream_host + variant[ 'path'] + scraper_utils.append_headers( { 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url }) if not stream_url.startswith('http'): stream_url = 'http://' + stream_url host = scraper_utils.get_direct_hostname( self, stream_url) if 'width' in variant: quality = scraper_utils.width_get_quality( variant['width']) elif 'height' in variant: quality = scraper_utils.height_get_quality( variant['height']) else: quality = QUALITIES.HIGH hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } hoster['subs'] = sub hosters.append(hoster) return hosters
def __get_links(self, html): hosters = [] r = re.search('tlas\("([^"]+)', html) if r: plaintext = self.__caesar(self.__get_f(self.__caesar(r.group(1), 13)), 13) sources = scraper_utils.parse_sources_list(self, plaintext) for source in sources: stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Cookie': self._get_stream_cookies()}) host = scraper_utils.get_direct_hostname(self, stream_url) hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': sources[source]['quality'], 'rating': None, 'views': None, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) hosts = [ r.content for r in dom_parser2.parse_dom( html, 'p', {'class': 'server_servername'}) ] links = [ r.content for r in dom_parser2.parse_dom(html, 'p', {'class': 'server_play'}) ] for host, link_frag in zip(hosts, links): stream_url = dom_parser2.parse_dom(link_frag, 'a', req='href') if not stream_url: continue stream_url = stream_url[0].attrs['href'] host = re.sub('^Server\s*', '', host, re.I) host = re.sub('\s*Link\s+\d+', '', host) if host.lower() == 'google': sources = self.__get_gvideo_links(stream_url) else: sources = [{'host': host, 'link': stream_url}] for source in sources: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) stream_url = source['link'] + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) direct = True else: stream_url = scraper_utils.pathify_url(source['link']) host = HOST_SUB.get(source['host'].lower(), source['host']) quality = scraper_utils.get_quality( video, host, QUALITIES.HIGH) direct = False hoster = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] sources = {} source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) for match in re.finditer( 'player-data="([^"]+)[^>]+episode-data="([^"]+)[^>]*>(.*?)</a>', html, re.DOTALL): player_url, ep_id, label = match.groups() if video.video_type == VIDEO_TYPES.EPISODE and not self.__episode_match( video, ep_id): continue label = label.strip() headers = {'Referer': page_url} if re.match('https?://embed', player_url): src_html = self._http_get(player_url, headers=headers, cache_limit=.5) sources.update(scraper_utils.parse_sources_list( self, src_html)) sources.update(self.__get_sources(src_html, label)) else: sources[player_url] = { 'direct': False, 'quality': Q_MAP.get(label.upper(), QUALITIES.HIGH) } for source, value in sources.iteritems(): direct = value['direct'] quality = value['quality'] if direct: host = scraper_utils.get_direct_hostname(self, source) stream_url = source + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) else: host = urlparse.urlparse(source).hostname stream_url = source hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) page_quality = QUALITIES.HD720 if video.video_type == VIDEO_TYPES.MOVIE else QUALITIES.HIGH for _attrs, fragment in dom_parser2.parse_dom( html, 'div', {'class': 'embed-responsive'}): iframe_url = dom_parser2.parse_dom(fragment, 'iframe', req='data-src') if iframe_url: iframe_url = iframe_url[0].attrs['data-src'] iframe_host = urlparse.urlparse(iframe_url).hostname if iframe_host in DIRECT_HOSTS: sources = self.__parse_streams(iframe_url, url) else: sources = { iframe_url: { 'quality': scraper_utils.get_quality(video, iframe_host, page_quality), 'direct': False } } for source in sources: quality = sources[source]['quality'] direct = sources[source]['direct'] if direct: host = scraper_utils.get_direct_hostname(self, source) stream_url = source + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) else: host = urlparse.urlparse(source).hostname stream_url = source hoster = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) match = re.search('((?:pic|emb|vb|dir|emb2)=[^<]+)', html) if match: embeds = match.group(1) for stream_url in embeds.split('&'): if stream_url.startswith('dir='): headers = {'Referer': url} html = self._http_get(DIR_URL, params={'v': stream_url[3:]}, headers=headers, auth=False, allow_redirect=False, cache_limit=.5) if html.startswith('http'): stream_url = html + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': url}) host = scraper_utils.get_direct_hostname(self, stream_url) direct = True quality = QUALITIES.HD720 else: continue elif stream_url.startswith('vb='): stream_url = 'http://www.vidbux.com/%s' % (stream_url[3:]) host = 'vidbux.com' direct = False quality = scraper_utils.get_quality(video, host, QUALITIES.HD1080) elif stream_url.startswith('pic='): data = {'url': stream_url[4:]} html = self._http_get(PHP_URL, data=data, auth=False, cache_limit=1) js_data = scraper_utils.parse_json(html, PHP_URL) host = scraper_utils.get_direct_hostname(self, stream_url) direct = True for item in js_data: if item.get('medium') == 'video': stream_url = item['url'] quality = scraper_utils.width_get_quality(item['width']) break else: continue elif stream_url.startswith(('emb=', 'emb2=')): stream_url = re.sub('emb\d*=', '', stream_url) host = urlparse.urlparse(stream_url).hostname direct = False quality = scraper_utils.get_quality(video, host, QUALITIES.HD720) else: continue hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) sources = [] if not source_url or source_url == FORCE_NO_MATCH: return sources page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) for attrs, _content in dom_parser2.parse_dom(html, 'img', req=['data-id', 'data-name']): film_id, data_name = attrs['data-id'], attrs['data-name'] data = {'id': film_id, 'n': data_name} server_url = scraper_utils.urljoin(self.base_url, SERVER_URL) server_url = server_url % (film_id) headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(server_url, data=data, headers=headers, cache_limit=.5) for attrs, _content in dom_parser2.parse_dom(html, 'a', req='data-id'): data = {'epid': attrs['data-id']} ep_url = scraper_utils.urljoin(self.base_url, EP_URL) ep_url = ep_url % (attrs['data-id']) headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(ep_url, data=data, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, ep_url) try: links = [r.attrs['src'] for r in dom_parser2.parse_dom(js_data['link']['embed'], 'iframe', req='src')] except: try: links = js_data['link']['l'] except: links = [] try: heights = js_data['link']['q'] except: heights = [] for stream_url, height in map(None, links, heights): match = re.search('movie_url=(.*)', stream_url) if match: stream_url = match.group(1) host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url}) direct = True else: host = urlparse.urlparse(stream_url).hostname if height: quality = scraper_utils.height_get_quality(height) else: quality = QUALITIES.HD720 direct = False source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct} sources.append(source) return sources
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) page_quality = dom_parser2.parse_dom(html, 'dd', {'class': 'status'}) if page_quality: page_quality = QUALITY_MAP.get(page_quality[0].content, QUALITIES.HIGH) else: page_quality = QUALITIES.HIGH if video.video_type == VIDEO_TYPES.EPISODE: fragment = dom_parser2.parse_dom(html, 'div', {'id': 'servers-list'}) gk_html = fragment[0].content if fragment else '' else: gk_html = html link_url = scraper_utils.urljoin(self.base_url, LINK_URL) player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL) for stream_url, quality in scraper_utils.get_gk_links( self, gk_html, page_url, page_quality, link_url, player_url).iteritems(): host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': direct = True quality = quality else: host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, quality) direct = False if host is not None: stream_url += scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters headers = {'User-Agent': scraper_utils.get_ua(), 'Referer': self.base_url + source_url} if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(source_url) stream_url = source_url + scraper_utils.append_headers(headers) quality = scraper_utils.height_get_quality(meta['height']) hoster = {'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) else: for episode in self.__match_episode(source_url, video): meta = scraper_utils.parse_episode_link(episode['title']) stream_url = episode['url'] + scraper_utils.append_headers(headers) stream_url = stream_url.replace(self.base_url, '') quality = scraper_utils.height_get_quality(meta['height']) hoster = {'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} if 'format' in meta: hoster['format'] = meta['format'] if 'size' in episode: hoster['size'] = scraper_utils.format_size(int(episode['size'])) hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) sources = {} hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=0) for match in re.finditer("embeds\[(\d+)\]\s*=\s*'([^']+)", html): match = re.search('src="([^"]+)', match.group(2)) if match: iframe_url = match.group(1) if 'play-en.php' in iframe_url: match = re.search('id=([^"&]+)', iframe_url) if match: proxy_link = match.group(1) proxy_link = proxy_link.split('*', 1)[-1] picasa_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY, proxy_link) for stream_url in scraper_utils.parse_google(self, picasa_url): sources[stream_url] = {'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True} else: html = self._http_get(iframe_url, cache_limit=0) temp_sources = scraper_utils.parse_sources_list(self, html) for source in temp_sources: if 'download.php' in source: redir_html = self._http_get(source, allow_redirect=False, method='HEAD', cache_limit=0) if redir_html.startswith('http'): temp_sources[redir_html] = temp_sources[source] del temp_sources[source] sources.update(temp_sources) for source in dom_parser2.parse_dom(html, 'source', {'type': 'video/mp4'}, req='src'): sources[source.attrs['src']] = {'quality': QUALITIES.HD720, 'direct': True, 'referer': iframe_url} for source, values in sources.iteritems(): host = scraper_utils.get_direct_hostname(self, source) headers = {'User-Agent': scraper_utils.get_ua()} if 'referer' in values: headers['Referer'] = values['referer'] stream_url = source + scraper_utils.append_headers(headers) if host == 'gvideo': quality = scraper_utils.gv_get_quality(source) else: quality = values['quality'] if quality not in Q_ORDER: quality = QUALITY_MAP.get(values['quality'], QUALITIES.HIGH) hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) for attrs, _content in dom_parser2.parse_dom(html, 'a', req='href'): stream_url = attrs['href'] if MOVIE_URL in stream_url: meta = scraper_utils.parse_movie_link(stream_url) stream_url = scraper_utils.pathify_url(stream_url) + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) quality = scraper_utils.height_get_quality(meta['height']) hoster = {'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) q_str = dom_parser2.parse_dom(html, 'div', {'class': 'poster-qulabel'}) if q_str: q_str = q_str[0].content.replace(' ', '').upper() page_quality = Q_MAP.get(q_str, QUALITIES.HIGH) else: page_quality = QUALITIES.HIGH for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'tab_box'}): iframe_url = dom_parser2.parse_dom(fragment, 'iframe', req='src') if iframe_url: iframe_url = iframe_url[0].attrs['src'] if 'youtube' in iframe_url: continue html = self._http_get(iframe_url, headers={'Referer': page_url}, cache_limit=.5) for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL): js_data = jsunpack.unpack(match.group(1)) js_data = js_data.replace('\\', '') html += js_data sources = scraper_utils.parse_sources_list(self, html) if not sources: sources = {iframe_url: {'quality': page_quality, 'direct': False}} for source, values in sources.iteritems(): direct = values['direct'] if direct: host = scraper_utils.get_direct_hostname(self, source) if host == 'gvideo': quality = scraper_utils.gv_get_quality(source) else: quality = values['quality'] source += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url}) else: host = urlparse.urlparse(source).hostname quality = scraper_utils.get_quality(video, host, values['quality']) hoster = {'multi-part': False, 'url': source, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'film-container'}) if fragment: iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src') if iframe_url: iframe_url = scraper_utils.urljoin(self.base_url, iframe_url[0].attrs['src']) headers = {'Referer': page_url} html = self._http_get(iframe_url, headers=headers, cache_limit=.5) sources = scraper_utils.parse_sources_list(self, html) for source in sources: quality = sources[source]['quality'] host = scraper_utils.get_direct_hostname(self, source) stream_url = source + scraper_utils.append_headers( { 'User-Agent': scraper_utils.get_ua(), 'Referer': iframe_url }) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } match = re.search('(\d+[a-z]bps)', source) if match: hoster['extra'] = match.group(1) hosters.append(hoster) hosters.sort(key=lambda x: x.get('extra', ''), reverse=True) return hosters
def __get_pk_links(self, html): hosters = [] match = re.search('var\s+parametros\s*=\s*"([^"]+)', html) if match: params = scraper_utils.parse_query(match.group(1)) if 'pic' in params: data = {'sou': 'pic', 'fv': '25', 'url': params['pic']} html = self._http_get(PK_URL, headers=XHR, data=data, cache_limit=0) js_data = scraper_utils.parse_json(html, PK_URL) for item in js_data: if 'url' in item and item['url']: if 'width' in item and item['width']: quality = scraper_utils.width_get_quality( item['width']) elif 'height' in item and item['height']: quality = scraper_utils.height_get_quality( item['height']) else: quality = QUALITIES.HD720 stream_url = item['url'] + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) hoster = { 'multi-part': False, 'url': stream_url, 'class': self, 'quality': quality, 'host': scraper_utils.get_direct_hostname( self, item['url']), 'rating': None, 'views': None, 'direct': True } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters js_url = scraper_utils.urljoin(self.base_url, '/javascript/movies.js') html = self._http_get(js_url, cache_limit=48) if source_url.startswith('/'): source_url = source_url[1:] pattern = '''getElementById\(\s*"%s".*?play\(\s*'([^']+)''' % ( source_url) match = re.search(pattern, html, re.I) if match: stream_url = match.group(1) if 'drive.google' in stream_url or 'docs.google' in stream_url: sources = scraper_utils.parse_google(self, stream_url) else: sources = [stream_url] for source in sources: stream_url = source + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) host = scraper_utils.get_direct_hostname(self, source) if host == 'gvideo': quality = scraper_utils.gv_get_quality(source) direct = True elif 'youtube' in stream_url: quality = QUALITIES.HD720 direct = False host = 'youtube.com' else: quality = QUALITIES.HIGH direct = True hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct } hosters.append(hoster) return hosters
def get_sources(self, video): sources = [] streams = {} source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return sources page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) match = dom_parser2.parse_dom(html, 'a', {'title': re.compile('click to play', re.I)}, req='href') if not match: return sources page_url = scraper_utils.urljoin(self.base_url, match[0].attrs['href']) headers = {'Referer': page_url} html = self._http_get(page_url, headers=headers, cache_limit=.02) for attrs, _content in dom_parser2.parse_dom(html, 'a', {'class': 'mw-episode-btn'}, req=['data-target-i', 'data-target-e', 'title']): try: if "wasn't alive" in attrs['title']: continue vid_url = scraper_utils.urljoin(self.base_url, VID_URL) vid_url = vid_url.format(data_i=attrs['data-target-i'], data_e=attrs['data-target-e']) headers = {'Referer': page_url} headers.update(XHR) cookies = self.__get_cookies(html, attrs) vid_html = self._http_get(vid_url, headers=headers, cookies=cookies, cache_limit=.02) streams.update(self.__get_js_sources(vid_html, vid_url, cookies, page_url)) except scraper.ScrapeError as e: logger.log('IOMovies Error (%s): %s in %s' % (e, vid_url, page_url)) for stream_url, values in streams.iteritems(): if values['direct']: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(values['label']) stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) else: host = urlparse.urlparse(stream_url).hostname quality = QUALITIES.HIGH source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': values['direct']} sources.append(source) return sources
def __get_king_links(self, iframe_url): hosters = [] match = re.search('v=(.*)', iframe_url) if match: data = {'ID': match.group(1)} headers = {'Referer': iframe_url} headers.update(XHR) xhr_url = iframe_url.split('?')[0] html = self._http_get(xhr_url, params={'p': 'GetVideoSources'}, data=data, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, xhr_url) try: for source in js_data['VideoSources']: stream_url = source['file'] + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) host = scraper_utils.get_direct_hostname( self, source['file']) label = source.get('label', '') if host == 'gvideo': quality = scraper_utils.gv_get_quality(source['file']) elif re.search('\d+p?', label): quality = scraper_utils.height_get_quality(label) else: quality = QUALITY_MAP.get(label, QUALITIES.HIGH) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': 'Turkish Subtitles' } hosters.append(hoster) except: pass return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) match = re.search('var\s*video_id="([^"]+)', html) if not match: return hosters video_id = match.group(1) data = {'v': video_id} headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(INFO_URL, data=data, headers=headers, cache_limit=.5) sources = scraper_utils.parse_json(html, INFO_URL) for source in sources: match = re.search('url=(.*)', sources[source]) if not match: continue stream_url = urllib.unquote(match.group(1)) host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(source) stream_url += scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] sources = {} source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) movie_id, watching_url, html = self.__get_source_page(video.video_type, page_url) links = [] for match in dom_parser2.parse_dom(html, 'li', {'class': 'ep-item'}, req=['data-id', 'data-server']): label = dom_parser2.parse_dom(match.content, 'a', req='title') if not label: continue if video.video_type == VIDEO_TYPES.EPISODE and not self.__episode_match(video, label[0].attrs['title']): continue links.append((match.attrs['data-server'], match.attrs['data-id'])) for link_type, link_id in links: if link_type in ['12', '13', '14', '15']: url = scraper_utils.urljoin(self.base_url, PLAYLIST_URL1.format(ep_id=link_id)) sources.update(self.__get_link_from_json(url)) elif kodi.get_setting('scraper_url'): url = scraper_utils.urljoin(self.base_url, PLAYLIST_URL2.format(ep_id=link_id)) params = self.__get_params(movie_id, link_id, watching_url) if params is not None: url += '?' + urllib.urlencode(params) sources.update(self.__get_links_from_json2(url, page_url, video.video_type)) for source in sources: if not source.lower().startswith('http'): continue if sources[source]['direct']: host = scraper_utils.get_direct_hostname(self, source) if host != 'gvideo': stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url}) else: stream_url = source else: host = urlparse.urlparse(source).hostname stream_url = source hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': sources[source]['quality'], 'views': None, 'rating': None, 'url': stream_url, 'direct': sources[source]['direct']} hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=1) match = re.search('''["']sources['"]\s*:\s*\[(.*?)\]''', html, re.DOTALL) if match: for match in re.finditer('''['"]*file['"]*\s*:\s*([^\(]+)''', match.group(1), re.DOTALL): stream_url = self.__decode(match.group(1), html) if stream_url: if video.video_type == VIDEO_TYPES.MOVIE: quality = QUALITIES.HD720 else: quality = QUALITIES.HIGH stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': url, 'Cookie': self._get_stream_cookies()}) hoster = {'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'url': stream_url, 'quality': quality, 'views': None, 'rating': None, 'direct': True} hosters.append(hoster) return hosters
def __get_embed_links(self, html): hosters = [] sources = scraper_utils.parse_sources_list(self, html) for source in sources: quality = source['quality'] stream_url = source + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) hoster = { 'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, source), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': 'Turkish Subtitles' } hosters.append(hoster) return hosters
def __get_direct(self, html, page_url): sources = [] best_quality = QUALITIES.HIGH match = re.search('''['"]?sources["']?\s*:\s*\[(.*?)\}\s*,?\s*\]''', html, re.DOTALL) if match: files = re.findall('''['"]?file['"]?\s*:\s*(.*?)['"]([^'"]+)''', match.group(1), re.DOTALL) labels = re.findall('''['"]?label['"]?\s*:\s*['"]([^'"]*)''', match.group(1), re.DOTALL) for stream, label in map(None, files, labels): func, stream_url = stream if 'atob' in func: stream_url = base64.b64decode(stream_url) stream_url = stream_url.replace(' ', '%20') host = scraper_utils.get_direct_hostname(self, stream_url) label = re.sub(re.compile('\s*HD', re.I), '', label) quality = scraper_utils.height_get_quality(label) if Q_ORDER[quality] > Q_ORDER[best_quality]: best_quality = quality stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url}) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True} sources.append(source) return best_quality, sources
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) views = None fragment = dom_parser2.parse_dom(html, 'span', {'class': 'post-views'}) if fragment: views = re.sub('[^\d]', '', fragment[0].content) iframe_urls = [] if video.video_type == VIDEO_TYPES.MOVIE: iframe_urls = [r.attrs['href'] for r in dom_parser2.parse_dom(html, 'a', {'class': ['orange', 'abutton']}, req='href')] else: for label, link in self.__get_episode_links(html): if int(label) == int(video.episode): iframe_urls.append(link) for iframe_url in iframe_urls: headers = {'Referer': url} html = self._http_get(iframe_url, headers=headers, cache_limit=.5) match = re.search('{link\s*:\s*"([^"]+)', html) if match: sources = self.__get_gk_links(match.group(1), iframe_url) else: sources = scraper_utils.parse_sources_list(self, html) for source in sources: stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) direct = sources[source]['direct'] quality = sources[source]['quality'] if sources[source]['direct']: host = scraper_utils.get_direct_hostname(self, source) else: host = urlparse.urlparse(source).hostname hoster = {'multi-part': False, 'url': stream_url, 'class': self, 'quality': quality, 'host': host, 'rating': None, 'views': views, 'direct': direct} hosters.append(hoster) return hosters
def __get_pk_links(self, html): hosters = [] match = re.search('var\s+parametros\s*=\s*"([^"]+)', html) if match: params = scraper_utils.parse_query(match.group(1)) if 'pic' in params: data = {'sou': 'pic', 'fv': '25', 'url': params['pic']} html = self._http_get(PK_URL, headers=XHR, data=data, cache_limit=0) js_data = scraper_utils.parse_json(html, PK_URL) for item in js_data: if 'url' in item and item['url']: if 'width' in item and item['width']: quality = scraper_utils.width_get_quality(item['width']) elif 'height' in item and item['height']: quality = scraper_utils.height_get_quality(item['height']) else: quality = QUALITIES.HD720 stream_url = item['url'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) hoster = {'multi-part': False, 'url': stream_url, 'class': self, 'quality': quality, 'host': scraper_utils.get_direct_hostname(self, item['url']), 'rating': None, 'views': None, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) views = None fragment = dom_parser2.parse_dom(html, 'img', {'src': re.compile('[^"]*view_icon.png')}) if fragment: match = re.search('(\d+)', fragment[0].content) if match: views = match.group(1) match = re.search('href="([^"]+-full-movie-[^"]+)', html) if match: url = match.group(1) html = self._http_get(url, cache_limit=.5) sources = self.__get_embedded(html) for link in dom_parser2.parse_dom(html, 'span', {'class': 'btn-eps'}, req='link'): link = link.attrs['link'] ajax_url = scraper_utils.urljoin(self.base_url, AJAX_URL) headers = {'Referer': url} headers.update(XHR) html = self._http_get(ajax_url, params={'v': link}, headers=headers, cache_limit=.5) sources.update(self.__get_sources(html)) for source in sources: if sources[source]['direct']: host = scraper_utils.get_direct_hostname(self, source) else: host = urlparse.urlparse(source).hostname stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) direct = sources[source]['direct'] quality = sources[source]['quality'] hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) for _attrs, div in dom_parser2.parse_dom(html, 'div', {'class': 'tabcontent'}): for attrs, _content in dom_parser2.parse_dom(div, 'source', req='src'): source = attrs['src'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url}) sources[source] = {'quality': None, 'direct': True} iframe_url = dom_parser2.parse_dom(div, 'iframe', req='src') if iframe_url: iframe_url = iframe_url[0].attrs['src'] if 'songs2dl' in iframe_url: headers = {'Referer': page_url} iframe_html = self._http_get(iframe_url, headers=headers, cache_limit=1) sources.update(scraper_utils.parse_sources_list(self, iframe_html)) else: sources[iframe_url] = {'quality': None, 'direct': False} sources.update(self.__get_mirror_links(html, video)) page_quality = self.__get_best_quality(sources) for source, values in sources.iteritems(): direct = values['direct'] if direct: host = scraper_utils.get_direct_hostname(self, source) else: host = urlparse.urlparse(source).hostname if values['quality'] is None: values['quality'] = page_quality hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': values['quality'], 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} headers = {'Accept-Language': 'en-US,en;q=0.5'} if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, headers=headers, cache_limit=2) if video.video_type == VIDEO_TYPES.MOVIE: sources.update(self.__scrape_sources(html, page_url)) pages = set([r.attrs['href'] for r in dom_parser2.parse_dom(html, 'a', {'class': 'btn-eps'}, req='href')]) active = set([r.attrs['href'] for r in dom_parser2.parse_dom(html, 'a', {'class': 'active'}, req='href')]) for page in list(pages - active): page_url = scraper_utils.urljoin(self.base_url, page) html = self._http_get(page_url, headers=headers, cache_limit=2) sources.update(self.__scrape_sources(html, page_url)) else: for page in self.__match_episode(video, html): page_url = scraper_utils.urljoin(self.base_url, page) html = self._http_get(page_url, headers=headers, cache_limit=2) sources.update(self.__scrape_sources(html, page_url)) for source, values in sources.iteritems(): if not source.lower().startswith('http'): continue if values['direct']: host = scraper_utils.get_direct_hostname(self, source) if host != 'gvideo': stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url}) else: stream_url = source else: host = urlparse.urlparse(source).hostname stream_url = source hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': values['quality'], 'views': None, 'rating': None, 'url': stream_url, 'direct': values['direct']} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) hosts = [r.content for r in dom_parser2.parse_dom(html, 'p', {'class': 'server_servername'})] links = [r.content for r in dom_parser2.parse_dom(html, 'p', {'class': 'server_play'})] for host, link_frag in zip(hosts, links): stream_url = dom_parser2.parse_dom(link_frag, 'a', req='href') if not stream_url: continue stream_url = stream_url[0].attrs['href'] host = re.sub('^Server\s*', '', host, re.I) host = re.sub('\s*Link\s+\d+', '', host) if host.lower() == 'google': sources = self.__get_gvideo_links(stream_url) else: sources = [{'host': host, 'link': stream_url}] for source in sources: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) stream_url = source['link'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) direct = True else: stream_url = scraper_utils.pathify_url(source['link']) host = HOST_SUB.get(source['host'].lower(), source['host']) quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) direct = False hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct} hosters.append(hoster) return hosters