def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'div', {'class': '[^"]*screen[^"]*'}) if fragment: js_src = dom_parser.parse_dom(fragment[0], 'script', ret='src') if js_src: js_url = urlparse.urljoin(self.base_url, js_src[0]) html = self._http_get(js_url, cache_limit=.5) else: html = fragment[0] for match in re.finditer('<source[^>]+src="([^"]+)', html): stream_url = match.group(1) host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'blogspot' in stream_url: quality = scraper_utils.gv_get_quality(stream_url) else: _, _, height, _ = scraper_utils.parse_movie_link(stream_url) quality = scraper_utils.height_get_quality(height) stream_url += '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def __get_gk_links(self, link, iframe_url): sources = {} data = {'link': link} headers = XHR headers.update({'Referer': iframe_url, 'User-Agent': USER_AGENT}) html = self._http_get(GK_URL, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, GK_URL) if 'link' in js_data: if isinstance(js_data['link'], basestring): stream_url = js_data['link'] if self._get_direct_hostname(stream_url) == 'gvideo': temp = self._parse_google(stream_url) for source in temp: sources[source] = {'quality': scraper_utils.gv_get_quality(source), 'direct': True} else: sources[stream_url] = {'quality': QUALITIES.HIGH, 'direct': False} else: for link in js_data['link']: stream_url = link['link'] if self._get_direct_hostname(stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in link: quality = scraper_utils.height_get_quality(link['label']) else: quality = QUALITIES.HIGH sources[stream_url] = {'quality': quality, 'direct': True} return sources
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'}) if fragment: html = fragment[0].content iframe_url = dom_parser2.parse_dom(html, 'iframe', req='src') if not iframe_url: return hosters iframe_url = iframe_url[0].attrs['src'] if iframe_url.startswith('/'): iframe_url = scraper_utils.urljoin(self.base_url, iframe_url) html = self._http_get(iframe_url, headers={'Referer': page_url}, cache_limit=.5) obj = dom_parser2.parse_dom(html, 'object', req='data') if obj: streams = dict((stream_url, {'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True}) for stream_url in scraper_utils.parse_google(self, obj[0].attrs['data'])) else: streams = scraper_utils.parse_sources_list(self, html) for stream_url, values in streams.iteritems(): host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = values['quality'] stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url}) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True} hosters.append(source) return hosters
def get_sources(self, video): source_url = self.get_url(video) sources = {} hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=0.5) for match in re.finditer("embeds\[(\d+)\]\s*=\s*'([^']+)", html): match = re.search('src="([^"]+)', match.group(2)) if match: iframe_url = match.group(1) if "play-en.php" in iframe_url: match = re.search('id=([^"&]+)', iframe_url) if match: proxy_link = match.group(1) proxy_link = proxy_link.split("*", 1)[-1] picasa_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY, proxy_link) for stream_url in self._parse_google(picasa_url): sources[stream_url] = { "quality": scraper_utils.gv_get_quality(stream_url), "direct": True, } else: html = self._http_get(iframe_url, cache_limit=0.25) temp_sources = self._parse_sources_list(html) for source in temp_sources: if "download.php" in source: redir_html = self._http_get(source, allow_redirect=False, method="HEAD", cache_limit=0) if redir_html.startswith("http"): temp_sources[redir_html] = temp_sources[source] del temp_sources[source] sources.update(temp_sources) for source in dom_parser.parse_dom(html, "source", {"type": "video/mp4"}, ret="src"): sources[source] = {"quality": QUALITIES.HD720, "direct": True} for source in sources: host = self._get_direct_hostname(source) stream_url = source + "|User-Agent=%s" % (scraper_utils.get_ua()) if host == "gvideo": quality = scraper_utils.gv_get_quality(source) else: quality = sources[source]["quality"] if quality not in Q_ORDER: quality = QUALITY_MAP.get(sources[source]["quality"], QUALITIES.HIGH) hoster = { "multi-part": False, "url": stream_url, "host": host, "class": self, "quality": quality, "views": None, "rating": None, "direct": True, } hosters.append(hoster) return hosters
def __get_gk_links(self, html, page_url, page_quality): sources = {} fragment = dom_parser.parse_dom(html, 'div', {'id': 'load_server'}) if fragment: for link in dom_parser.parse_dom(fragment[0], 'li'): film_id = dom_parser.parse_dom(link, 'a', ret='data-film') name_id = dom_parser.parse_dom(link, 'a', ret='data-name') server_id = dom_parser.parse_dom(link, 'a', ret='data-server') if film_id and name_id and server_id: data = { 'ipplugins': 1, 'ip_film': film_id[0], 'ip_server': server_id[0], 'ip_name': name_id[0] } headers = XHR headers['Referer'] = page_url url = urlparse.urljoin(self.base_url, LINK_URL) html = self._http_get(url, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, url) if 's' in js_data: if isinstance(js_data['s'], basestring): stream_urls = self.__get_real_url(js_data['s']) if stream_urls is not None: if isinstance(stream_urls, basestring): sources[stream_urls] = page_quality else: for item in stream_urls: stream_url = item['files'] if self._get_direct_hostname( stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality( stream_url) elif 'quality' in item: quality = scraper_utils.height_get_quality( item['quality']) else: quality = page_quality sources[stream_url] = quality else: for link in js_data['s']: stream_url = self.__get_real_url(link['file']) if stream_url is not None: if self._get_direct_hostname( stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality( stream_url) elif 'label' in link: quality = scraper_utils.height_get_quality( link['label']) else: quality = page_quality sources[stream_url] = quality return sources
def get_sources(self, video): source_url = self.get_url(video) sources = {} hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=0) for match in re.finditer("embeds\[(\d+)\]\s*=\s*'([^']+)", html): match = re.search('src="([^"]+)', match.group(2)) if match: iframe_url = match.group(1) if 'play-en.php' in iframe_url: match = re.search('id=([^"&]+)', iframe_url) if match: proxy_link = match.group(1) proxy_link = proxy_link.split('*', 1)[-1] picasa_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY, proxy_link) for stream_url in scraper_utils.parse_google(self, picasa_url): sources[stream_url] = {'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True} else: html = self._http_get(iframe_url, cache_limit=0) temp_sources = scraper_utils.parse_sources_list(self, html) for source in temp_sources: if 'download.php' in source: redir_html = self._http_get(source, allow_redirect=False, method='HEAD', cache_limit=0) if redir_html.startswith('http'): temp_sources[redir_html] = temp_sources[source] del temp_sources[source] sources.update(temp_sources) for source in dom_parser2.parse_dom(html, 'source', {'type': 'video/mp4'}, req='src'): sources[source.attrs['src']] = {'quality': QUALITIES.HD720, 'direct': True, 'referer': iframe_url} for source, values in sources.iteritems(): host = scraper_utils.get_direct_hostname(self, source) headers = {'User-Agent': scraper_utils.get_ua()} if 'referer' in values: headers['Referer'] = values['referer'] stream_url = source + scraper_utils.append_headers(headers) if host == 'gvideo': quality = scraper_utils.gv_get_quality(source) else: quality = values['quality'] if quality not in Q_ORDER: quality = QUALITY_MAP.get(values['quality'], QUALITIES.HIGH) hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'div', {'class': 'videos'}) if fragment: for match in re.finditer('href="([^"]+)[^>]*>([^<]+)', fragment[0]): page_url, page_label = match.groups() page_label = page_label.lower() if page_label not in ALLOWED_LABELS: continue sources = self.__get_sources(page_url, ALLOWED_LABELS[page_label]) for source in sources: host = self._get_direct_hostname(source) if host == 'gvideo': quality = scraper_utils.gv_get_quality(source) direct = True stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua()) elif sources[source]['direct']: quality = sources[source]['quality'] direct = True stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua()) else: quality = sources[source]['quality'] direct = False host = urlparse.urlparse(source).hostname stream_url = source hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} if sources[source]['subs']: hoster['subs'] = 'Turkish Subtitles' hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=1) fragment = dom_parser.parse_dom(html, 'div', {'class': 'player'}) if fragment: iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src') if iframe_url: html = self._http_get(iframe_url[0], cache_limit=.5) # if captions exist, then they aren't hardcoded if re.search('kind\s*:\s*"captions"', html): subs = False else: subs = True sources = [] for name, stream_url in self.__get_stream_cookies2().items(): if re.match('source_\d+p?', name): sources.append(urllib.unquote(stream_url)) for stream_url in dom_parser.parse_dom(html, 'source', {'type': 'video/mp4'}, ret='src'): sources.append(stream_url) for source in sources: if self._get_direct_hostname(source) == 'gvideo': quality = scraper_utils.gv_get_quality(source) hoster = {'multi-part': False, 'host': self._get_direct_hostname(source), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': True, 'subs': subs} hosters.append(hoster) return hosters
def __get_sources(self, html, page_url): sources = [] fragment = dom_parser.parse_dom(html, 'div', {'class': 'video-content'}) if fragment: iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src') if iframe_url: iframe_url = iframe_url[0] if self.base_url in iframe_url: headers = {'Referer': page_url} html = self._http_get(iframe_url, headers=headers, cache_limit=.5) referer = iframe_url iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src') if iframe_url: iframe_url = iframe_url[0] headers = {'Referer': referer} html = self._http_get(iframe_url, headers=headers, cache_limit=.5) links = self._parse_sources_list(html) for link in links: host = self._get_direct_hostname(link) if host == 'gvideo': quality = scraper_utils.gv_get_quality(link) else: quality = links[link]['quality'] source = {'multi-part': False, 'url': link, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True} sources.append(source) else: host = urlparse.urlparse(iframe_url).hostname source = {'multi-part': False, 'url': iframe_url, 'host': host, 'class': self, 'quality': QUALITIES.HD720, 'views': None, 'rating': None, 'direct': False} sources.append(source) return sources
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) sources = dom_parser.parse_dom(html, 'source', ret='src') iframes = dom_parser.parse_dom(html, 'iframe', {'class': 'movieframe'}, ret='src') headers = {'Referer': url} for iframe_url in iframes: html = self._http_get(iframe_url, headers=headers, allow_redirect=False, method='HEAD', cache_limit=0) if html.startswith('http'): sources.append(html) else: iframe_url = urlparse.urljoin(self.base_url, iframe_url) html = self._http_get(iframe_url, headers=headers, cache_limit=1) sources += dom_parser.parse_dom(html, 'source', ret='src') iframes += dom_parser.parse_dom(html, 'iframe', ret='src') for source in sources: host = self._get_direct_hostname(source) if host == 'gvideo': quality = scraper_utils.gv_get_quality(source) direct = True else: quality = QUALITIES.HIGH direct = False host = urlparse.urlparse(source).hostname source = {'multi-part': False, 'url': source, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct} hosters.append(source) return hosters
def get_sources(self, video): source_url = self.get_url(video) sources = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) if video.video_type == VIDEO_TYPES.MOVIE: action = 'getMovieEmb' else: action = 'getEpisodeEmb' match = re.search('elid="([^"]+)', html) if self.__token is None: self.__get_token() if match and self.__token is not None: elid = urllib.quote( base64.encodestring(str(int(time.time()))).strip()) data = { 'action': action, 'idEl': match.group(1), 'token': self.__token, 'elid': elid } ajax_url = urlparse.urljoin(self.base_url, EMBED_URL) headers = XHR headers['Authorization'] = 'Bearer %s' % (self.__get_bearer()) html = self._http_get(ajax_url, data=data, headers=headers, cache_limit=0) html = html.replace('\\"', '"').replace('\\/', '/') pattern = '<IFRAME\s+SRC="([^"]+)' for match in re.finditer(pattern, html, re.DOTALL | re.I): url = match.group(1) host = self._get_direct_hostname(url) if host == 'gvideo': direct = True quality = scraper_utils.gv_get_quality(url) else: if 'vk.com' in url and url.endswith('oid='): continue # skip bad vk.com links direct = False host = urlparse.urlparse(url).hostname quality = scraper_utils.get_quality( video, host, QUALITIES.HD720) source = { 'multi-part': False, 'url': url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct } sources.append(source) return sources
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=0) match = re.search('var\s*video_id\s*=\s*"([^"]+)', html) if not match: return hosters video_id = match.group(1) headers = {'Referer': page_url} headers.update(XHR) _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'), headers=headers, method='POST', cache_limit=0) vid_url = scraper_utils.urljoin(self.base_url, VIDEO_URL) html = self._http_get(vid_url, data={'v': video_id}, headers=headers, cache_limit=0) for source, value in scraper_utils.parse_json(html, vid_url).iteritems(): match = re.search('url=(.*)', value) if not match: continue stream_url = urllib.unquote(match.group(1)) host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(source) stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) for _attrs, fragment in dom_parser2.parse_dom( html, 'div', {'class': 'videoPlayer'}): for attrs, _content in dom_parser2.parse_dom(fragment, 'iframe', req='src'): html = self._http_get(attrs['src'], headers={'Referer': page_url}, cache_limit=.5) match = re.search('downloadUrl\s*=\s*"([^"]+)', html) if match: stream_url = match.group(1) host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = QUALITIES.HIGH hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'movieplay'}): iframe_src = dom_parser2.parse_dom(fragment, 'iframe', req='src') if iframe_src: iframe_src = iframe_src[0].attrs['src'] if re.search('o(pen)?load', iframe_src, re.I): meta = scraper_utils.parse_movie_link(iframe_src) quality = scraper_utils.height_get_quality(meta['height']) links = {iframe_src: {'quality': quality, 'direct': False}} else: links = self.__get_links(iframe_src, url) for link in links: direct = links[link]['direct'] quality = links[link]['quality'] if direct: host = scraper_utils.get_direct_hostname(self, link) if host == 'gvideo': quality = scraper_utils.gv_get_quality(link) stream_url = link + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': url}) else: host = urlparse.urlparse(link).hostname stream_url = link source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct} hosters.append(source) return hosters
def __get_links_from_xml(self, xml, video): sources = {} try: root = ET.fromstring(xml) for item in root.findall('.//item'): title = item.find('title').text for source in item.findall('{http://rss.jwpcdn.com/}source'): stream_url = source.get('file') label = source.get('label') if self._get_direct_hostname(stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif label: quality = scraper_utils.height_get_quality(label) else: quality = scraper_utils.blog_get_quality( video, title, '') sources[stream_url] = {'quality': quality, 'direct': True} log_utils.log( 'Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG) except Exception as e: log_utils.log('Exception during 123Movies XML Parse: %s' % (e), log_utils.LOGWARNING) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if source_url and source_url != FORCE_NO_MATCH: source_url = '/forum' + source_url page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) iframes = dom_parser.parse_dom(html, 'iframe', ret='src') for iframe_url in iframes: if 'docs.google.com' in iframe_url: sources = self._parse_gdocs(iframe_url) break elif 'banner' in iframe_url or not iframe_url.startswith( 'http'): pass else: html = self._http_get(iframe_url, cache_limit=.25) iframes += dom_parser.parse_dom(html, 'iframe', ret='src') for source in sources: host = self._get_direct_hostname(source) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.gv_get_quality(source), 'views': None, 'rating': None, 'url': source, 'direct': True } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'div', {'class': 'meta-media'}) if fragment: iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src') if iframe_url: iframe_url = urlparse.urljoin(self.base_url, iframe_url[0]) html = self._http_get(iframe_url, cache_limit=.5) for source in dom_parser.parse_dom(html, 'source', ret='src'): source_url = urlparse.urljoin(self.base_url, source) redir_url = self._http_get(source_url, allow_redirect=False, cache_limit=.5) if redir_url.startswith('http'): sources[redir_url] = scraper_utils.gv_get_quality(redir_url) else: sources[source_url] = QUALITIES.HIGH for source in sources: hoster = {'multi-part': False, 'host': self._get_direct_hostname(source), 'class': self, 'quality': sources[source], 'views': None, 'rating': None, 'url': source, 'direct': True} hosters.append(hoster) return hosters
def __get_links(self, url): sources = [] match = re.search('src="([^"]+)', url) if match: url = match.group(1).replace('\\/', '/') html = self._http_get(url, cache_limit=0) match = re.search('<script\s+src="([^\']+)\'\+(\d+)\+\'([^\']+)', html) if match: page_url = ''.join(match.groups()) page_url += str(random.random()) html = self._http_get(page_url, cache_limit=0) for match in re.finditer('"?file"?\s*:\s*"([^"]+)"\s*,\s*"?label"?\s*:\s*"(\d+)p?"', html): stream_url, height = match.groups() stream_url = stream_url.replace('\\&', '&').replace('\\/', '/') if 'v.asp' in stream_url and 'ok.ru' not in url: stream_redirect = self._http_get(stream_url, allow_redirect=False, cache_limit=0) if stream_redirect: stream_url = stream_redirect if self._get_direct_hostname(stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(height) host = self._get_direct_hostname(stream_url) stream_url += '|User-Agent=%s&Referer=%s' % (scraper_utils.get_ua(), urllib.quote(url)) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} sources.append(hoster) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for server_list in dom_parser.parse_dom(html, 'ul', {'class': 'episodes'}): labels = dom_parser.parse_dom(server_list, 'a') hash_ids = dom_parser.parse_dom(server_list, 'a', ret='data-id') for label, hash_id in zip(labels, hash_ids): if video.video_type == VIDEO_TYPES.EPISODE and not self.__episode_match( label, video.episode): continue now = time.localtime() url = urlparse.urljoin(self.base_url, HASH_URL) url = url % (hash_id, now.tm_hour + now.tm_min) html = self._http_get(url, headers=XHR, cache_limit=.5) js_result = scraper_utils.parse_json(html, url) if 'videoUrlHash' in js_result and 'grabber' in js_result: query = { 'flash': 1, 'json': 1, 's': now.tm_min, 'link': js_result['videoUrlHash'], '_': int(time.time()) } query['link'] = query['link'].replace('\/', '/') grab_url = js_result['grabber'].replace('\/', '/') grab_url += '?' + urllib.urlencode(query) html = self._http_get(grab_url, headers=XHR, cache_limit=.5) js_result = scraper_utils.parse_json(html, grab_url) for result in js_result: if 'label' in result: quality = scraper_utils.height_get_quality( result['label']) else: quality = scraper_utils.gv_get_quality( result['file']) sources[result['file']] = quality for source in sources: hoster = { 'multi-part': False, 'host': self._get_direct_hostname(source), 'class': self, 'quality': sources[source], 'views': None, 'rating': None, 'url': source, 'direct': True } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'div', {'class': 'player_container'}) if fragment: iframe_url = dom_parser.parse_dom(fragment[0], 'IFRAME', ret='SRC') if iframe_url: html = self._http_get(iframe_url[0], cache_limit=.5) match = re.search('{link\s*:\s*"([^"]+)', html) if match: sources = self.__get_gk_links(match.group(1)) else: sources = {} for source in sources: hoster = { 'multi-part': False, 'url': source, 'class': self, 'quality': scraper_utils.gv_get_quality(source), 'host': self._get_direct_hostname(source), 'rating': None, 'views': None, 'direct': True } hosters.append(hoster) return hosters
def __get_gk_links(self, html, page_url): sources = {} for link in dom_parser.parse_dom(html, 'div', {'class': '[^"]*server_line[^"]*'}): film_id = dom_parser.parse_dom(link, 'a', ret='data-film') name_id = dom_parser.parse_dom(link, 'a', ret='data-name') server_id = dom_parser.parse_dom(link, 'a', ret='data-server') if film_id and name_id and server_id: data = {'ipplugins': 1, 'ip_film': film_id[0], 'ip_server': server_id[0], 'ip_name': name_id[0]} headers = XHR headers['Referer'] = page_url url = urlparse.urljoin(self.base_url, LINK_URL) html = self._http_get(url, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, url) if 's' in js_data: if isinstance(js_data['s'], basestring): sources[js_data['s']] = QUALITIES.HIGH else: for link in js_data['s']: stream_url = link['file'] if self._get_direct_hostname(stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in link: quality = scraper_utils.height_get_quality(link['label']) else: quality = QUALITIES.HIGH sources[stream_url] = quality return sources
def __grab_links(self, grab_url, query, referer): try: sources = {} query['mobile'] = '0' query.update(self.__get_token(query)) grab_url = grab_url + '?' + urllib.urlencode(query) headers = XHR headers['Referer'] = referer html = self._http_get(grab_url, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, grab_url) if 'data' in js_data: for link in js_data['data']: stream_url = link['file'] if self._get_direct_hostname(stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in link: quality = scraper_utils.height_get_quality( link['label']) else: quality = QUALITIES.HIGH sources[stream_url] = {'direct': True, 'quality': quality} except Exception as e: log_utils.log('9Movies Link Parse Error: %s' % (e), log_utils.LOGWARNING) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'div', {'class': '[^"]*screen[^"]*'}) if fragment: js_src = dom_parser.parse_dom(fragment[0], 'script', ret='src') if js_src: js_url = urlparse.urljoin(self.base_url, js_src[0]) html = self._http_get(js_url, cache_limit=.5) else: html = fragment[0] for match in re.finditer('<source[^>]+src="([^"]+)', html): stream_url = match.group(1) host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: _, _, height, _ = scraper_utils.parse_movie_link(stream_url) quality = scraper_utils.height_get_quality(height) stream_url += '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def __get_links_from_playlist(self, grab_url, headers): sources = {} grab_url = grab_url.replace('\\', '') grab_html = self._http_get(grab_url, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(grab_html, grab_url) try: playlist = js_data['playlist'][0]['sources'] except: playlist = [] for item in playlist: stream_url = item.get('file') if stream_url: if stream_url.startswith('/'): stream_url = scraper_utils.urljoin(self.base_url, stream_url) redir_url = self._http_get(stream_url, headers=headers, allow_redirect=False, method='HEAD') if redir_url.startswith('http'): stream_url = redir_url if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in item: quality = scraper_utils.height_get_quality(item['label']) else: quality = QUALITIES.HIGH logger.log('Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG) sources[stream_url] = {'quality': quality, 'direct': True} if not kodi.get_setting('scraper_url'): break return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=1) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'player'}) if not fragment: return hosters iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src') if not iframe_url: return hosters html = self._http_get(iframe_url[0].attrs['src'], cache_limit=.25) sources.append(self.__get_embedded_sources(html)) sources.append(self.__get_linked_sources(html)) for source in sources: for stream_url in source['sources']: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) quality = scraper_utils.gv_get_quality(stream_url) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hoster['subs'] = source.get('subs', True) hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) match = re.search('proxy\.link=([^"&]+)', html) if match: proxy_link = match.group(1) proxy_link = proxy_link.split('*', 1)[-1] if len(proxy_link) <= 224: picasa_url = scraper_utils.gk_decrypt( self.get_name(), GK_KEY1, proxy_link) else: picasa_url = scraper_utils.gk_decrypt( self.get_name(), GK_KEY2, proxy_link) if self._get_direct_hostname(picasa_url) == 'gvideo': sources = self._parse_google(picasa_url) for source in sources: hoster = { 'multi-part': False, 'url': source, 'class': self, 'quality': scraper_utils.gv_get_quality(source), 'host': self._get_direct_hostname(source), 'rating': None, 'views': None, 'direct': True } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) match = re.search('<iframe[^>]+src="([^"]+watch=([^"]+))', html) if match: iframe_url, link_id = match.groups() data = {'link': link_id} headers = {'Referer': iframe_url} gk_url = urlparse.urljoin(self.base_url, GK_URL) html = self._http_get(gk_url, data=data, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, gk_url) if 'link' in js_data: for link in js_data['link']: stream_url = link['link'] host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(link['label']) hoster = {'multi-part': False, 'url': stream_url, 'class': self, 'quality': quality, 'host': host, 'rating': None, 'views': None, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) views = None match = re.search('<li>\s*Views\s*:\s*(.*?)</li>', html) if match: views = re.sub('[^0-9]', '', match.group(1)) hosts = dom_parser.parse_dom(html, 'p', {'class': 'server_servername'}) links = dom_parser.parse_dom(html, 'p', {'class': 'server_play'}) for item in zip(hosts, links): host, link_text = item host = host.replace('server', '').strip() match = re.search('href="([^"]+)', link_text) if match: link = match.group(1) if 'google' in host: sources = self.__get_google_links(link) for source in sources: hoster = { 'multi-part': False, 'url': source, 'class': self, 'quality': scraper_utils.gv_get_quality(source), 'host': self._get_direct_hostname(source), 'rating': None, 'views': views, 'direct': True } hosters.append(hoster) else: hoster = { 'multi-part': False, 'url': link, 'class': self, 'quality': scraper_utils.get_quality(video, host, QUALITIES.HIGH), 'host': host, 'rating': None, 'views': views, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) # exit early if trailer if re.search('Şu an fragman*', html, re.I): return hosters match = re.search('''url\s*:\s*"([^"]+)"\s*,\s*data:\s*["'](id=\d+)''', html) if match: url, data = match.groups() url = urlparse.urljoin(self.base_url, url) result = self._http_get(url, data=data, headers=XHR, cache_limit=.5) for match in re.finditer('"videolink\d*"\s*:\s*"([^"]+)","videokalite\d*"\s*:\s*"?(\d+)p?', result): stream_url, height = match.groups() stream_url = stream_url.replace('\\/', '/') host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(height) stream_url += '|User-Agent=%s&Referer=%s' % (scraper_utils.get_ua(), urllib.quote(page_url)) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) if video.video_type == VIDEO_TYPES.EPISODE: html = self.__episode_match(video, source_url) sources = [r.attrs['data-click'] for r in dom_parser2.parse_dom(html, 'div', req='data-click') + dom_parser2.parse_dom(html, 'li', req='data-click')] else: sources = self.__get_movie_sources(page_url) sources = [source.strip() for source in sources if source] headers = {'Referer': page_url} for source in sources: if source.startswith('http'): direct = False quality = QUALITIES.HD720 host = urlparse.urlparse(source).hostname else: source = self.__get_linked_source(source, headers) if source is None: continue direct = True host = scraper_utils.get_direct_hostname(self, source) if host == 'gvideo': quality = scraper_utils.gv_get_quality(source) else: pass hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) pattern = '\$\.post\("([^"]+)"\s*,\s*\{(.*?)\}' match = re.search(pattern, html) if match: post_url, post_data = match.groups() data = self.__get_data(post_data) html = self._http_get(post_url, data=data, cache_limit=.5) js_result = scraper_utils.parse_json(html, post_url) for key in js_result: stream_url = js_result[key] host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(key) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def __get_json_links(self, html, sub): hosters = [] js_data = scraper_utils.parse_json(html) if 'sources' in js_data: for source in js_data.get('sources', []): stream_url = source.get('file') if stream_url is None: continue host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: quality = QUALITIES.HIGH hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } hoster['subs'] = sub hosters.append(hoster) return hosters
def __get_links_from_json2(self, url, page_url, video_type): sources = {} headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(url, headers=headers, cache_limit=0) js_data = scraper_utils.parse_json(html, url) try: playlist = js_data.get('playlist', []) for source in playlist[0].get('sources', []): stream_url = source['file'] if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: if video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(stream_url) else: meta = scraper_utils.parse_episode_link(stream_url) quality = scraper_utils.height_get_quality(meta['height']) sources[stream_url] = {'quality': quality, 'direct': True} logger.log( 'Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG) except Exception as e: logger.log('Exception during yesmovies extract: %s' % (e), log_utils.LOGDEBUG) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters js_url = scraper_utils.urljoin(self.base_url, '/javascript/movies.js') html = self._http_get(js_url, cache_limit=48) if source_url.startswith('/'): source_url = source_url[1:] pattern = '''getElementById\(\s*"%s".*?play\(\s*'([^']+)''' % (source_url) match = re.search(pattern, html, re.I) if match: stream_url = match.group(1) if 'drive.google' in stream_url or 'docs.google' in stream_url: sources = scraper_utils.parse_google(self, stream_url) else: sources = [stream_url] for source in sources: stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) host = scraper_utils.get_direct_hostname(self, source) if host == 'gvideo': quality = scraper_utils.gv_get_quality(source) direct = True elif 'youtube' in stream_url: quality = QUALITIES.HD720 direct = False host = 'youtube.com' else: quality = QUALITIES.HIGH direct = True hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} hosters.append(hoster) return hosters
def __get_gk_links(self, html, page_url, video_type, episode): sources = {} phimid = dom_parser.parse_dom(html, 'input', {'name': 'phimid'}, ret='value') if phimid and video_type == VIDEO_TYPES.EPISODE: url = urlparse.urljoin(self.tv_base_url, '/ajax.php') data = {'ipos_server': 1, 'phimid': phimid[0], 'keyurl': episode} headers = XHR headers['Referer'] = page_url html = self._http_get(url, data=data, headers=headers, cache_limit=.5) for link in dom_parser.parse_dom(html, 'div', {'class': '[^"]*server_line[^"]*'}): film_id = dom_parser.parse_dom(link, 'a', ret='data-film') name_id = dom_parser.parse_dom(link, 'a', ret='data-name') server_id = dom_parser.parse_dom(link, 'a', ret='data-server') if film_id and name_id and server_id: data = {'ipplugins': 1, 'ip_film': film_id[0], 'ip_server': server_id[0], 'ip_name': name_id[0]} headers = XHR headers['Referer'] = page_url url = urlparse.urljoin(self.__get_base_url(video_type), LINK_URL) html = self._http_get(url, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, url) if 's' in js_data: if isinstance(js_data['s'], basestring): sources[js_data['s']] = QUALITIES.HIGH else: for link in js_data['s']: stream_url = link['file'] if self._get_direct_hostname(stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in link: quality = scraper_utils.height_get_quality(link['label']) else: quality = QUALITIES.HIGH sources[stream_url] = quality return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'div', {'class': 'meta-media'}) if fragment: iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src') if iframe_url: iframe_url = urlparse.urljoin(self.base_url, iframe_url[0]) html = self._http_get(iframe_url, cache_limit=.5) for match in re.finditer('window.location.href\s*=\s*"([^"]+)', html): stream_url = match.group(1) host = self._get_direct_hostname(stream_url) if host == 'gvideo': sources[stream_url] = scraper_utils.gv_get_quality(stream_url) else: sources[source_url] = QUALITIES.HIGH for source in sources: host = self._get_direct_hostname(stream_url) stream_url += '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': sources[source], 'views': None, 'rating': None, 'url': source, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) match = re.search('var\s*video_id="([^"]+)', html) if not match: return hosters video_id = match.group(1) data = {'v': video_id} headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(INFO_URL, data=data, headers=headers, cache_limit=.5) sources = scraper_utils.parse_json(html, INFO_URL) for source in sources: match = re.search('url=(.*)', sources[source]) if not match: continue stream_url = urllib.unquote(match.group(1)) host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(source) stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def __get_gk_links(self, html, page_url): sources = {} match = re.search('{link\s*:\s*"([^"]+)', html) if match: data = {'link': match.group(1)} url = urlparse.urljoin(self.base_url, LINK_URL) headers = {'Referer': page_url} html = self._http_get(url, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, url) if 'link' in js_data: for link in js_data['link']: if 'type' in link and link[ 'type'] == 'mp4' and 'link' in link: if self._get_direct_hostname(link['link']) == 'gvideo': quality = scraper_utils.gv_get_quality( link['link']) elif 'label' in link: quality = scraper_utils.height_get_quality( link['label']) else: quality = QUALITIES.HIGH sources[link['link']] = quality return sources
def __get_gk_links(self, html): sources = {} match = re.search('{link\s*:\s*"([^"]+)', html) if match: iframe_url = match.group(1) data = {'link': iframe_url} headers = {'Referer': iframe_url} html = self._http_get(self.gk_url, data=data, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, self.gk_url) links = js_data.get('link', []) if isinstance(links, basestring): links = [{'link': links}] for link in links: stream_url = link['link'] if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) direct = True elif 'label' in link: quality = scraper_utils.height_get_quality(link['label']) direct = True else: quality = QUALITIES.HIGH direct = False sources[stream_url] = {'quality': quality, 'direct': direct} return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'ul', {'class': 'css_server_new'}) if fragment: for match in re.finditer('href="([^"]+)[^>]*>(.*?)(?:-\d+)?</a>', fragment[0]): url, host = match.groups() host = host.lower() host = re.sub('<img.*?/>', '', host) host = HOSTS.get(host, host) log_utils.log('%s - %s' % (url, host)) if host in GVIDEO_NAMES: sources = self.__get_links(urlparse.urljoin(self.base_url, url)) direct = True else: sources = {url: host} direct = False for source in sources: if self._get_direct_hostname(source) == 'gvideo': quality = scraper_utils.gv_get_quality(source) source = source + '|User-Agent=%s' % (scraper_utils.get_ua()) else: quality = scraper_utils.get_quality(video, source, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': sources[source], 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) match = re.search('''<option[^>]+value\s*=\s*["']([^"']+)[^>]*>(?:Altyaz.{1,3}s.{1,3}z)<''', html) if match: option_url = urlparse.urljoin(self.base_url, match.group(1)) html = self._http_get(option_url, cache_limit=.25) fragment = dom_parser.parse_dom(html, 'span', {'class': 'object-wrapper'}) if fragment: iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src') if iframe_url: html = self._http_get(iframe_url[0], cache_limit=.25) seen_urls = {} for match in re.finditer('"?file"?\s*:\s*"([^"]+)"\s*,\s*"?label"?\s*:\s*"(\d+)p?[^"]*"', html): stream_url, height = match.groups() if stream_url not in seen_urls: seen_urls[stream_url] = True stream_url += '|User-Agent=%s' % (scraper_utils.get_ua()) host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(height) hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def __get_links(self, url): sources = {} html = self._http_get(url, cache_limit=.5) match = re.search("files\s*:\s*'([^']+)", html) if match: gk_url = GK_URL % (match.group(1)) gk_url = urlparse.urljoin(self.base_url, gk_url) headers = XHR headers['Referer'] = url html = self._http_get(gk_url, headers=headers, cache_limit=.5) try: html = html.decode('utf-8-sig') except: pass js_data = scraper_utils.parse_json(html, gk_url) if 'data' in js_data: if isinstance(js_data['data'], list): stream_list = [item['files'] for item in js_data['data']] else: stream_list = [js_data['data']] for stream_url in stream_list: host = self._get_direct_hostname(stream_url) if host == 'gvideo': sources = {stream_url: {'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True, 'host': host}} else: host = urlparse.urlparse(stream_url).hostname sources = {stream_url: {'quality': QUALITIES.HIGH, 'direct': False, 'host': host}} return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) match = re.search('var\s*video_id="([^"]+)', html) if match: video_id = match.group(1) url = urlparse.urljoin(self.base_url, VIDEO_URL) data = {'v': video_id} headers = XHR headers['Referer'] = page_url html = self._http_get(url, data=data, headers=headers, cache_limit=.5) sources = scraper_utils.parse_json(html, url) for source in sources: match = re.search('url=(.*)', sources[source]) if match: stream_url = urllib.unquote(match.group(1)) host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(source) stream_url += '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) sources = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) q_str = dom_parser.parse_dom(html, 'div', {'class': 'poster-qulabel'}) if q_str: q_str = q_str[0].replace(' ', '').upper() page_quality = Q_MAP.get(q_str, QUALITIES.HIGH) else: page_quality = QUALITIES.HIGH for fragment in dom_parser.parse_dom(html, 'div', {'class': 'tab_box'}): match = re.search('file\s*:\s*"([^"]+)', fragment) if match: stream_url = match.group(1) else: stream_url = self.__get_ajax_sources(fragment, page_url) if stream_url: host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = page_quality stream_url += '|User-Agent=%s&Referer=%s' % (scraper_utils.get_ua(), urllib.quote(page_url)) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True} sources.append(source) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) pattern = '\$\.post\("([^"]+)"\s*,\s*\{(.*?)\}' match = re.search(pattern, html) if match: post_url, post_data = match.groups() data = self.__get_data(post_data) html = self._http_get(post_url, data=data, cache_limit=.5) js_result = scraper_utils.parse_json(html, post_url) for key in js_result: stream_url = js_result[key] host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(key) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } hosters.append(hoster) return hosters
def __get_gk_links(self, html, page_url, video_type, episode): sources = {} for link in dom_parser.parse_dom(html, 'div', {'class': '[^"]*server_line[^"]*'}): film_id = dom_parser.parse_dom(link, 'a', ret='data-film') name_id = dom_parser.parse_dom(link, 'a', ret='data-name') server_id = dom_parser.parse_dom(link, 'a', ret='data-server') if film_id and name_id and server_id: data = {'ipplugins': 1, 'ip_film': film_id[0], 'ip_server': server_id[0], 'ip_name': name_id[0]} headers = {'Referer': page_url} headers.update(XHR) url = urlparse.urljoin(self.base_url, LINK_URL) html = self._http_get(url, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, url) if 's' in js_data and isinstance(js_data['s'], basestring): url = urlparse.urljoin(self.base_url, LINK_URL3) params = {'u': js_data['s'], 'w': '100%', 'h': 450, 's': js_data['v']} html = self._http_get(url, params=params, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, url) if 'data' in js_data and js_data['data']: if isinstance(js_data['data'], basestring): sources[js_data['data']] = QUALITIES.HIGH else: for link in js_data['data']: stream_url = link['files'] if self._get_direct_hostname(stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'quality' in link: quality = scraper_utils.height_get_quality(link['quality']) else: quality = QUALITIES.HIGH sources[stream_url] = quality return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'div', {'class': 'video-embed'}) if fragment: sources = dom_parser.parse_dom(fragment[0], 'source', ret='src') sources += dom_parser.parse_dom(fragment[0], 'iframe', ret='src') for source in sources: if self._get_direct_hostname(source) == 'gvideo': direct = True quality = scraper_utils.gv_get_quality(source) host = self._get_direct_hostname(source) else: direct = False host = urlparse.urlparse(source).hostname quality = scraper_utils.get_quality(video, host, QUALITIES.HD720) hoster = {'multi-part': False, 'url': source, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'}) if fragment: html = fragment[0].content links = scraper_utils.parse_sources_list(self, html) for link in links: stream_url = link if self.base_url in link: redir_url = self._http_get(link, headers={'Referer': url}, allow_redirect=False, method='HEAD') if redir_url.startswith('http'): stream_url = redir_url host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = links[link]['quality'] stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': url}) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True} hosters.append(source) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=1) iframes = dom_parser2.parse_dom(html, 'iframe', req='src') for attrs, _content in iframes: iframe_url = attrs['src'] if 'docs.google.com' in iframe_url: sources = scraper_utils.parse_google(self, iframe_url) break else: iframe_url = scraper_utils.urljoin(self.base_url, iframe_url) html = self._http_get(iframe_url, cache_limit=1) iframes += dom_parser2.parse_dom(html, 'iframe', req='src') for source in sources: host = scraper_utils.get_direct_hostname(self, source) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.gv_get_quality(source), 'views': None, 'rating': None, 'url': source, 'direct': True } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] sources = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) match = re.search('var\s+view_id\s*=\s*"([^"]+)', html) if not match: return hosters view_id = match.group(1) for lang in ['or', 'tr']: subs = True if lang == 'tr' else False view_data = {'id': view_id, 'tip': 'view', 'dil': lang} html = self._http_get(self.ajax_url, data=view_data, headers=XHR, cache_limit=.25) html = html.strip() html = re.sub(r'\\n|\\t', '', html) match = re.search('var\s+sources\s*=\s*(\[.*?\])', html) if match: raw_data = match.group(1) raw_data = raw_data.replace('\\', '') else: raw_data = html js_data = scraper_utils.parse_json(raw_data, self.ajax_url) if 'data' not in js_data: continue src = dom_parser2.parse_dom(js_data['data'], 'iframe', req='src') if not src: continue html = self._http_get(src[0].attrs['src'], cache_limit=.25) for attrs, _content in dom_parser2.parse_dom(html, 'iframe', req='src'): src = attrs['src'] if not src.startswith('http'): continue sources.append({'label': '720p', 'file': src, 'direct': False, 'subs': subs}) sources += [{'file': url, 'subs': subs} for url in scraper_utils.parse_sources_list(self, html).iterkeys()] if sources: break for source in sources: direct = source.get('direct', True) stream_url = source['file'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) if direct: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: continue else: host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.height_get_quality(source['label']) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} if source.get('subs'): hoster['subs'] = 'Turkish Subtitles' hosters.append(hoster) return hosters