def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) match = re.search('proxy\.link=([^"&]+)', html) if match: proxy_link = match.group(1) proxy_link = proxy_link.split('*', 1)[-1] if len(proxy_link) <= 224: picasa_url = scraper_utils.gk_decrypt( self.get_name(), GK_KEY1, proxy_link) else: picasa_url = scraper_utils.gk_decrypt( self.get_name(), GK_KEY2, proxy_link) if self._get_direct_hostname(picasa_url) == 'gvideo': sources = self._parse_google(picasa_url) for source in sources: hoster = { 'multi-part': False, 'url': source, 'class': self, 'quality': scraper_utils.gv_get_quality(source), 'host': self._get_direct_hostname(source), 'rating': None, 'views': None, 'direct': True } hosters.append(hoster) return hosters
def __get_gk_links2(self, html): sources = {} match = re.search('proxy\.link=([^"&]+)', html) if match: proxy_link = match.group(1) proxy_link = proxy_link.split('*', 1)[-1] if len(proxy_link) <= 224: vid_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY1, proxy_link) else: vid_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY2, proxy_link) if scraper_utils.get_direct_hostname(self, vid_url) == 'gvideo': for source in self._parse_gdocs(vid_url): sources[source] = {'quality': scraper_utils.gv_get_quality(source), 'direct': True} return sources
def __get_gk_links2(self, html): sources = {} match = re.search('proxy\.link=([^"&]+)', html) if match: proxy_link = match.group(1) proxy_link = proxy_link.split('*', 1)[-1] if len(proxy_link) <= 224: vid_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY1, proxy_link) else: vid_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY2, proxy_link) if self._get_direct_hostname(vid_url) == 'gvideo': for source in self._parse_gdocs(vid_url): sources[source] = scraper_utils.gv_get_quality(source) return sources
def get_sources(self, video): source_url = self.get_url(video) sources = {} hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for match in re.finditer("embeds\[(\d+)\]\s*=\s*'([^']+)", html): match = re.search('src="([^"]+)', match.group(2)) if match: iframe_url = match.group(1) if 'play-en.php' in iframe_url: match = re.search('id=([^"&]+)', iframe_url) if match: proxy_link = match.group(1) proxy_link = proxy_link.split('*', 1)[-1] picasa_url = scraper_utils.gk_decrypt( self.get_name(), GK_KEY, proxy_link) for stream_url in self._parse_google(picasa_url): sources[stream_url] = { 'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True } else: html = self._http_get(iframe_url, cache_limit=0) temp_sources = self._parse_sources_list(html) for source in temp_sources: if 'download.php' in source: redir_html = self._http_get( source, allow_redirect=False, method='HEAD', cache_limit=0) if redir_html.startswith('http'): temp_sources[redir_html] = temp_sources[ source] del temp_sources[source] sources.update(temp_sources) for source in sources: host = self._get_direct_hostname(source) stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua()) quality = QUALITY_MAP.get(sources[source]['quality'], QUALITIES.HIGH) hoster = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) sources = {} hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=0.5) for match in re.finditer("embeds\[(\d+)\]\s*=\s*'([^']+)", html): match = re.search('src="([^"]+)', match.group(2)) if match: iframe_url = match.group(1) if "play-en.php" in iframe_url: match = re.search('id=([^"&]+)', iframe_url) if match: proxy_link = match.group(1) proxy_link = proxy_link.split("*", 1)[-1] picasa_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY, proxy_link) for stream_url in self._parse_google(picasa_url): sources[stream_url] = { "quality": scraper_utils.gv_get_quality(stream_url), "direct": True, } else: html = self._http_get(iframe_url, cache_limit=0.25) temp_sources = self._parse_sources_list(html) for source in temp_sources: if "download.php" in source: redir_html = self._http_get(source, allow_redirect=False, method="HEAD", cache_limit=0) if redir_html.startswith("http"): temp_sources[redir_html] = temp_sources[source] del temp_sources[source] sources.update(temp_sources) for source in dom_parser.parse_dom(html, "source", {"type": "video/mp4"}, ret="src"): sources[source] = {"quality": QUALITIES.HD720, "direct": True} for source in sources: host = self._get_direct_hostname(source) stream_url = source + "|User-Agent=%s" % (scraper_utils.get_ua()) if host == "gvideo": quality = scraper_utils.gv_get_quality(source) else: quality = sources[source]["quality"] if quality not in Q_ORDER: quality = QUALITY_MAP.get(sources[source]["quality"], QUALITIES.HIGH) hoster = { "multi-part": False, "url": stream_url, "host": host, "class": self, "quality": quality, "views": None, "rating": None, "direct": True, } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) sources = [] stream_urls = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for match in re.finditer("embeds\[(\d+)\]\s*=\s*'([^']+)", html): match = re.search('src="([^"]+)', match.group(2)) if match: iframe_url = match.group(1) if 'play-en.php' in iframe_url: match = re.search('id=([^"&]+)', iframe_url) if match: proxy_link = match.group(1) proxy_link = proxy_link.split('*', 1)[-1] picasa_url = scraper_utils.gk_decrypt( self.get_name(), GK_KEY, proxy_link) stream_urls += self._parse_google(picasa_url) else: html = self._http_get(iframe_url, cache_limit=0) match = re.search('sources\s*:\s*\[(.*?)\]', html, re.DOTALL) if match: for match in re.finditer( '''['"]*file['"]*\s*:\s*['"]*([^'"]+).*?['"]*label['"]*\s*:\s*['"]*([^'"]+)''', match.group(1), re.DOTALL): stream_url, label = match.groups() if 'download.php' in stream_url: redir_html = self._http_get( stream_url, allow_redirect=False, method='HEAD', cache_limit=0) if stream_url.startswith('http'): stream_url = redir_html stream_urls.append(stream_url) for stream_url in list(set(stream_urls)): host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = QUALITY_MAP.get(label, QUALITIES.HIGH) stream_url += '|User-Agent=%s' % (scraper_utils.get_ua()) source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True } sources.append(source) return sources
def __get_google_links(self, link): sources = {} html = self._http_get(link, cache_limit=.5) match = re.search('base64\.decode\("([^"]+)', html, re.I) if match: match = re.search('proxy\.link=tunemovie\*([^&]+)', base64.b64decode(match.group(1))) if match: picasa_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY, match.group(1)) sources = self._parse_google(picasa_url) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) match = re.search('proxy\.link=([^"&]+)', html) if match: proxy_link = match.group(1) proxy_link = proxy_link.split('*', 1)[-1] if len(proxy_link) <= 224: picasa_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY1, proxy_link) else: picasa_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY2, proxy_link) if self._get_direct_hostname(picasa_url) == 'gvideo': sources = self._parse_google(picasa_url) for source in sources: hoster = {'multi-part': False, 'url': source, 'class': self, 'quality': scraper_utils.gv_get_quality(source), 'host': self._get_direct_hostname(source), 'rating': None, 'views': None, 'direct': True} hosters.append(hoster) return hosters
def __get_google_links(self, link): sources = {} html = self._http_get(link, cache_limit=.5) match = re.search('base64\.decode\("([^"]+)', html, re.I) if match: log_utils.log(match.group(1)) match = re.search('proxy\.link=tunemovie\*([^&]+)', base64.b64decode(match.group(1))) if match: picasa_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY, match.group(1)) sources = self._parse_google(picasa_url) return sources
def __get_gk_links2(self, html): sources = {} match = re.search('base64\.decode\("([^"]+)', html, re.I) if match: match = re.search('proxy\.link=tunemovie\*([^&]+)', base64.b64decode(match.group(1))) if match: picasa_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY, match.group(1)) g_links = self._parse_google(picasa_url) for link in g_links: sources[link] = scraper_utils.gv_get_quality(link) return sources
def get_sources(self, video): source_url = self.get_url(video) sources = {} hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=0) for match in re.finditer("embeds\[(\d+)\]\s*=\s*'([^']+)", html): match = re.search('src="([^"]+)', match.group(2)) if match: iframe_url = match.group(1) if 'play-en.php' in iframe_url: match = re.search('id=([^"&]+)', iframe_url) if match: proxy_link = match.group(1) proxy_link = proxy_link.split('*', 1)[-1] picasa_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY, proxy_link) for stream_url in scraper_utils.parse_google(self, picasa_url): sources[stream_url] = {'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True} else: html = self._http_get(iframe_url, cache_limit=0) temp_sources = scraper_utils.parse_sources_list(self, html) for source in temp_sources: if 'download.php' in source: redir_html = self._http_get(source, allow_redirect=False, method='HEAD', cache_limit=0) if redir_html.startswith('http'): temp_sources[redir_html] = temp_sources[source] del temp_sources[source] sources.update(temp_sources) for source in dom_parser2.parse_dom(html, 'source', {'type': 'video/mp4'}, req='src'): sources[source.attrs['src']] = {'quality': QUALITIES.HD720, 'direct': True, 'referer': iframe_url} for source, values in sources.iteritems(): host = scraper_utils.get_direct_hostname(self, source) headers = {'User-Agent': scraper_utils.get_ua()} if 'referer' in values: headers['Referer'] = values['referer'] stream_url = source + scraper_utils.append_headers(headers) if host == 'gvideo': quality = scraper_utils.gv_get_quality(source) else: quality = values['quality'] if quality not in Q_ORDER: quality = QUALITY_MAP.get(values['quality'], QUALITIES.HIGH) hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) ele = dom_parser.parse_dom(html, 'video') if ele: stream_url = dom_parser.parse_dom(ele, 'source', ret='src') if stream_url: hoster = {'multi-part': False, 'url': stream_url[0], 'class': self, 'quality': QUALITIES.HD720, 'host': self._get_direct_hostname(stream_url[0]), 'rating': None, 'views': None, 'direct': True} if hoster['host'] == 'gvideo': hoster['quality'] = scraper_utils.gv_get_quality(hoster['url']) hosters.append(hoster) sources = dom_parser.parse_dom(html, 'iframe', ret='src') for src in sources: if 'facebook' in src: continue host = urlparse.urlparse(src).hostname hoster = {'multi-part': False, 'url': src, 'class': self, 'quality': QUALITIES.HIGH, 'host': host, 'rating': None, 'views': None, 'direct': False} hosters.append(hoster) match = re.search('proxy\.link=([^"&]+)', html) if match: proxy_link = match.group(1) proxy_link = proxy_link.split('*', 1)[-1] stream_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY, proxy_link) if 'vk.com' in stream_url.lower(): hoster = {'multi-part': False, 'host': 'vk.com', 'class': self, 'url': stream_url, 'quality': QUALITIES.HD720, 'views': None, 'rating': None, 'direct': False} hosters.append(hoster) if 'picasaweb' in stream_url.lower(): for source in self._parse_google(stream_url): quality = scraper_utils.gv_get_quality(source) hoster = {'multi-part': False, 'url': source, 'class': self, 'quality': quality, 'host': self._get_direct_hostname(source), 'rating': None, 'views': None, 'direct': True} hosters.append(hoster) if 'docs.google' in stream_url.lower(): for source in self._parse_gdocs(stream_url): quality = scraper_utils.gv_get_quality(source) hoster = {'multi-part': False, 'url': source, 'class': self, 'quality': quality, 'host': self._get_direct_hostname(source), 'rating': None, 'views': None, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) ele = dom_parser.parse_dom(html, 'video') if ele: stream_url = dom_parser.parse_dom(ele, 'source', ret='src') if stream_url: hoster = {'multi-part': False, 'url': stream_url[0], 'class': self, 'quality': QUALITIES.HD720, 'host': self._get_direct_hostname(stream_url[0]), 'rating': None, 'views': None, 'direct': True} if hoster['host'] == 'gvideo': hoster['quality'] = scraper_utils.gv_get_quality(hoster['url']) hosters.append(hoster) sources = dom_parser.parse_dom(html, 'iframe', ret='src') for src in sources: if 'facebook' in src: continue host = urlparse.urlparse(src).hostname hoster = {'multi-part': False, 'url': src, 'class': self, 'quality': QUALITIES.HIGH, 'host': host, 'rating': None, 'views': None, 'direct': False} hosters.append(hoster) match = re.search('proxy\.link=([^"&]+)', html) if match: proxy_link = match.group(1) proxy_link = proxy_link.split('*', 1)[-1] stream_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY, proxy_link) if 'vk.com' in stream_url.lower(): hoster = {'multi-part': False, 'host': 'vk.com', 'class': self, 'url': stream_url, 'quality': QUALITIES.HD720, 'views': None, 'rating': None, 'direct': False} hosters.append(hoster) if 'picasaweb' in stream_url.lower(): for source in self._parse_google(stream_url): quality = scraper_utils.gv_get_quality(source) hoster = {'multi-part': False, 'url': source, 'class': self, 'quality': quality, 'host': self._get_direct_hostname(source), 'rating': None, 'views': None, 'direct': True} hosters.append(hoster) if 'docs.google' in stream_url.lower(): for source in self._parse_google(stream_url): quality = scraper_utils.gv_get_quality(source) hoster = {'multi-part': False, 'url': source, 'class': self, 'quality': quality, 'host': self._get_direct_hostname(source), 'rating': None, 'views': None, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) sources = {} hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for match in re.finditer("embeds\[(\d+)\]\s*=\s*'([^']+)", html): match = re.search('src="([^"]+)', match.group(2)) if match: iframe_url = match.group(1) if 'play-en.php' in iframe_url: match = re.search('id=([^"&]+)', iframe_url) if match: proxy_link = match.group(1) proxy_link = proxy_link.split('*', 1)[-1] picasa_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY, proxy_link) for stream_url in self._parse_google(picasa_url): sources[stream_url] = {'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True} else: html = self._http_get(iframe_url, cache_limit=.25) temp_sources = self._parse_sources_list(html) for source in temp_sources: if 'download.php' in source: redir_html = self._http_get(source, allow_redirect=False, method='HEAD', cache_limit=0) if redir_html.startswith('http'): temp_sources[redir_html] = temp_sources[source] del temp_sources[source] sources.update(temp_sources) for source in sources: host = self._get_direct_hostname(source) stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua()) if host == 'gvideo': quality = scraper_utils.gv_get_quality(source) else: quality = QUALITY_MAP.get(sources[source]['quality'], QUALITIES.HIGH) hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) sources = [] stream_urls = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for match in re.finditer("embeds\[(\d+)\]\s*=\s*'([^']+)", html): match = re.search('src="([^"]+)', match.group(2)) if match: iframe_url = match.group(1) if 'play-en.php' in iframe_url: match = re.search('id=([^"&]+)', iframe_url) if match: proxy_link = match.group(1) proxy_link = proxy_link.split('*', 1)[-1] picasa_url = scraper_utils.gk_decrypt(self.get_name(), GK_KEY, proxy_link) stream_urls += self._parse_google(picasa_url) else: html = self._http_get(iframe_url, cache_limit=0) match = re.search('sources\s*:\s*\[(.*?)\]', html, re.DOTALL) if match: for match in re.finditer('''['"]*file['"]*\s*:\s*['"]*([^'"]+).*?['"]*label['"]*\s*:\s*['"]*([^'"]+)''', match.group(1), re.DOTALL): stream_url, label = match.groups() if 'download.php' in stream_url: redir_html = self._http_get(stream_url, allow_redirect=False, cache_limit=0) if stream_url.startswith('http'): stream_url = redir_html stream_urls.append(stream_url) for stream_url in list(set(stream_urls)): host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = QUALITY_MAP.get(label, QUALITIES.HIGH) stream_url += '|User-Agent=%s' % (scraper_utils.get_ua()) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True} sources.append(source) return sources