def __get_direct_links(self, iframe_url, page_url): sources = [] headers = {'Referer': page_url} html = self._http_get(iframe_url, headers=headers, cache_limit=.5) # if captions exist, then they aren't hardcoded subs = '' if re.search('kind\s*:\s*"captions"', html) else 'Turkish subtitles' streams = scraper_utils.parse_sources_list(self, html, key='VideoSources') streams.update(scraper_utils.parse_sources_list(self, html, var='video')) for stream_url in streams: quality = streams[stream_url]['quality'] if 'v.asp' in stream_url: stream_url = scraper_utils.urljoin(self.base_url, stream_url) stream_redirect = self._http_get(stream_url, allow_redirect=False, method='HEAD', cache_limit=0) if stream_redirect.startswith('http'): stream_url = stream_redirect sources.append({'stream_url': stream_url, 'subs': subs, 'quality': quality, 'direct': True}) if sources: return sources iframe_url = dom_parser2.parse_dom(html, 'iframe', req='src') if not iframe_url: return sources sources.append({'stream_url': iframe_url[0].attrs['src'], 'subs': subs, 'quality': QUALITIES.HD720, 'direct': False}) return sources
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'}) if fragment: html = fragment[0].content links = scraper_utils.parse_sources_list(self, html) for link in links: stream_url = link if self.base_url in link: redir_url = self._http_get(link, headers={'Referer': url}, allow_redirect=False, method='HEAD') if redir_url.startswith('http'): stream_url = redir_url host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = links[link]['quality'] stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': url}) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True} hosters.append(source) return hosters
def __parse_streams(self, iframe_url, page_url): headers = {'Referer': page_url} html = self._http_get(iframe_url, headers=headers, cache_limit=.5) if jsunpack.detect(html): html = jsunpack.unpack(html) return scraper_utils.parse_sources_list(self, html)
def get_sources(self, video): hosters = [] sources = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) match = re.search('var\s+view_id\s*=\s*"([^"]+)', html) if not match: return hosters view_id = match.group(1) for lang in ['or', 'tr']: subs = True if lang == 'tr' else False view_data = {'id': view_id, 'tip': 'view', 'dil': lang} html = self._http_get(self.ajax_url, data=view_data, headers=XHR, cache_limit=.25) html = html.strip() html = re.sub(r'\\n|\\t', '', html) match = re.search('var\s+sources\s*=\s*(\[.*?\])', html) if match: raw_data = match.group(1) raw_data = raw_data.replace('\\', '') else: raw_data = html js_data = scraper_utils.parse_json(raw_data, self.ajax_url) if 'data' not in js_data: continue src = dom_parser2.parse_dom(js_data['data'], 'iframe', req='src') if not src: continue html = self._http_get(src[0].attrs['src'], cache_limit=.25) for attrs, _content in dom_parser2.parse_dom(html, 'iframe', req='src'): src = attrs['src'] if not src.startswith('http'): continue sources.append({'label': '720p', 'file': src, 'direct': False, 'subs': subs}) sources += [{'file': url, 'subs': subs} for url in scraper_utils.parse_sources_list(self, html).iterkeys()] if sources: break for source in sources: direct = source.get('direct', True) stream_url = source['file'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) if direct: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: continue else: host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.height_get_quality(source['label']) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} if source.get('subs'): hoster['subs'] = 'Turkish Subtitles' hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) for _attrs, div in dom_parser2.parse_dom(html, 'div', {'class': 'tabcontent'}): for attrs, _content in dom_parser2.parse_dom(div, 'source', req='src'): source = attrs['src'] + scraper_utils.append_headers( { 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url }) sources[source] = {'quality': None, 'direct': True} iframe_url = dom_parser2.parse_dom(div, 'iframe', req='src') if iframe_url: iframe_url = iframe_url[0].attrs['src'] if 'songs2dl' in iframe_url: headers = {'Referer': page_url} iframe_html = self._http_get(iframe_url, headers=headers, cache_limit=1) sources.update( scraper_utils.parse_sources_list(self, iframe_html)) else: sources[iframe_url] = {'quality': None, 'direct': False} sources.update(self.__get_mirror_links(html, video)) page_quality = self.__get_best_quality(sources) for source, values in sources.iteritems(): direct = values['direct'] if direct: host = scraper_utils.get_direct_hostname(self, source) else: host = urlparse.urlparse(source).hostname if values['quality'] is None: values['quality'] = page_quality hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': values['quality'], 'direct': direct } hosters.append(hoster) return hosters
def __get_links(self, iframe_src, page_url): sources = {} headers = {'Referer': page_url} html = self._http_get(iframe_src, headers=headers, cache_limit=1) for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL): js_data = jsunpack.unpack(match.group(1)) js_data = js_data.replace('\\', '') sources = scraper_utils.parse_sources_list(self, js_data) return sources
def __get_sources(self, html, page_url): sources = [] fragment = dom_parser2.parse_dom(html, 'div', {'class': 'video-content'}) if fragment: referer = page_url iframes = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src') for attrs, _content in iframes: iframe_url = attrs['src'] if self.base_url in iframe_url: headers = {'Referer': referer} html = self._http_get(iframe_url, headers=headers, cache_limit=.5) referer = iframe_url links = scraper_utils.parse_sources_list(self, html) if links: for link, values in links.iteritems(): host = scraper_utils.get_direct_hostname( self, link) if host == 'gvideo': quality = scraper_utils.gv_get_quality(link) else: quality = values['quality'] source = { 'multi-part': False, 'url': link, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True } sources.append(source) else: iframes += dom_parser2.parse_dom(html, 'iframe', req='src') else: host = urlparse.urlparse(iframe_url).hostname source = { 'multi-part': False, 'url': iframe_url, 'host': host, 'class': self, 'quality': QUALITIES.HIGH, 'views': None, 'rating': None, 'direct': False } sources.append(source) return sources
def get_sources(self, video): hosters = [] sources = {} source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) for match in re.finditer( 'player-data="([^"]+)[^>]+episode-data="([^"]+)[^>]*>(.*?)</a>', html, re.DOTALL): player_url, ep_id, label = match.groups() if video.video_type == VIDEO_TYPES.EPISODE and not self.__episode_match( video, ep_id): continue label = label.strip() headers = {'Referer': page_url} if re.match('https?://embed', player_url): src_html = self._http_get(player_url, headers=headers, cache_limit=.5) sources.update(scraper_utils.parse_sources_list( self, src_html)) sources.update(self.__get_sources(src_html, label)) else: sources[player_url] = { 'direct': False, 'quality': Q_MAP.get(label.upper(), QUALITIES.HIGH) } for source, value in sources.iteritems(): direct = value['direct'] quality = value['quality'] if direct: host = scraper_utils.get_direct_hostname(self, source) stream_url = source + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) else: host = urlparse.urlparse(source).hostname stream_url = source hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct } hosters.append(hoster) return hosters
def __get_links(self, html): hosters = [] r = re.search('tlas\("([^"]+)', html) if r: plaintext = self.__caesar(self.__get_f(self.__caesar(r.group(1), 13)), 13) sources = scraper_utils.parse_sources_list(self, plaintext) for source in sources: stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Cookie': self._get_stream_cookies()}) host = scraper_utils.get_direct_hostname(self, stream_url) hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': sources[source]['quality'], 'rating': None, 'views': None, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'}) if fragment: html = fragment[0].content iframe_url = dom_parser2.parse_dom(html, 'iframe', req='src') if not iframe_url: return hosters iframe_url = iframe_url[0].attrs['src'] if iframe_url.startswith('/'): iframe_url = scraper_utils.urljoin(self.base_url, iframe_url) html = self._http_get(iframe_url, headers={'Referer': page_url}, cache_limit=.5) obj = dom_parser2.parse_dom(html, 'object', req='data') if obj: streams = dict((stream_url, { 'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True }) for stream_url in scraper_utils.parse_google( self, obj[0].attrs['data'])) else: streams = scraper_utils.parse_sources_list(self, html) for stream_url, values in streams.iteritems(): host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = values['quality'] stream_url += scraper_utils.append_headers({ 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url }) source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True } hosters.append(source) return hosters
def __get_sources(self, html): sources = scraper_utils.parse_sources_list(self, html) for source in dom_parser2.parse_dom(html, 'source', {'type': 'video/mp4'}, req='src') + dom_parser2.parse_dom(html, 'iframe', req='src'): source = source.attrs['src'] if scraper_utils.get_direct_hostname(self, source) == 'gvideo': quality = scraper_utils.gv_get_quality(source) direct = True else: quality = QUALITIES.HD720 direct = False sources[source] = {'quality': quality, 'direct': direct} return self.__proc_sources(sources)
def __get_embedded_sources(self, html): sources = [] # if captions exist, then they aren't hardcoded subs = '' if re.search('''"?kind"?\s*:\s*"?captions"?''', html) else 'Turkish subtitles' for attrs, _content in dom_parser2.parse_dom(html, 'source', {'type': 'video/mp4'}, req='src'): sources.append(attrs['src']) for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL): js_data = jsunpack.unpack(match.group(1)) js_data = js_data.replace('\\', '') html += js_data sources += [source for source in scraper_utils.parse_sources_list(self, html, var="source")] return {'sources': sources, 'subs': subs}
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'film-container'}) if fragment: iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src') if iframe_url: iframe_url = scraper_utils.urljoin(self.base_url, iframe_url[0].attrs['src']) headers = {'Referer': page_url} html = self._http_get(iframe_url, headers=headers, cache_limit=.5) sources = scraper_utils.parse_sources_list(self, html) for source in sources: quality = sources[source]['quality'] host = scraper_utils.get_direct_hostname(self, source) stream_url = source + scraper_utils.append_headers( { 'User-Agent': scraper_utils.get_ua(), 'Referer': iframe_url }) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } match = re.search('(\d+[a-z]bps)', source) if match: hoster['extra'] = match.group(1) hosters.append(hoster) hosters.sort(key=lambda x: x.get('extra', ''), reverse=True) return hosters
def __get_embedded(self, html, page_url): sources = {} match = dom_parser2.parse_dom(html, 'div', {'id': 'videoreklam'}) if not match: return sources match = dom_parser2.parse_dom(match[0].content, 'iframe', req='src') if not match: return sources headers = {'Referer': page_url} html = self._http_get(match[0].attrs['src'], headers=headers, cache_limit=.5) for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL): js_data = jsunpack.unpack(match.group(1)) js_data = js_data.replace('\\', '') html += js_data return dict((key, value['quality']) for key, value in scraper_utils.parse_sources_list( self, html, var='source').iteritems())
def __get_embed_links(self, html): hosters = [] sources = scraper_utils.parse_sources_list(self, html) for source in sources: quality = source['quality'] stream_url = source + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) hoster = { 'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, source), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': 'Turkish Subtitles' } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) views = None fragment = dom_parser2.parse_dom(html, 'span', {'class': 'post-views'}) if fragment: views = re.sub('[^\d]', '', fragment[0].content) iframe_urls = [] if video.video_type == VIDEO_TYPES.MOVIE: iframe_urls = [r.attrs['href'] for r in dom_parser2.parse_dom(html, 'a', {'class': ['orange', 'abutton']}, req='href')] else: for label, link in self.__get_episode_links(html): if int(label) == int(video.episode): iframe_urls.append(link) for iframe_url in iframe_urls: headers = {'Referer': url} html = self._http_get(iframe_url, headers=headers, cache_limit=.5) match = re.search('{link\s*:\s*"([^"]+)', html) if match: sources = self.__get_gk_links(match.group(1), iframe_url) else: sources = scraper_utils.parse_sources_list(self, html) for source in sources: stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) direct = sources[source]['direct'] quality = sources[source]['quality'] if sources[source]['direct']: host = scraper_utils.get_direct_hostname(self, source) else: host = urlparse.urlparse(source).hostname hoster = {'multi-part': False, 'url': stream_url, 'class': self, 'quality': quality, 'host': host, 'rating': None, 'views': views, 'direct': direct} hosters.append(hoster) return hosters
def __get_embedded_links(self, html, sub): hosters = [] html = html.replace('\\"', '"').replace('\\/', '/') sources = scraper_utils.parse_sources_list(self, html) for source in sources: host = scraper_utils.get_direct_hostname(self, source) quality = sources[source]['quality'] direct = sources[source]['direct'] hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': direct } hoster['subs'] = sub hosters.append(hoster) return hosters
def __get_page_links(self, html): hosters = [] for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL): js_data = jsunpack.unpack(match.group(1)) js_data = js_data.replace('\\', '') html += js_data sources = scraper_utils.parse_sources_list(self, html) for source in sources: quality = sources[source]['quality'] hoster = { 'multi-part': False, 'url': source, 'class': self, 'quality': quality, 'host': scraper_utils.get_direct_hostname(self, source), 'rating': None, 'views': None, 'direct': True } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) sources = {} for _attrs, fragment in dom_parser2.parse_dom(html, 'ul', {'class': 'enlaces'}): for attrs, _content in dom_parser2.parse_dom(fragment, 'a', req='href'): stream_url = attrs['href'] if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(stream_url) else: meta = scraper_utils.parse_episode_link(stream_url) sources.update({ stream_url: { 'quality': scraper_utils.height_get_quality(meta['height']), 'direct': False } }) for _attrs, fragment in dom_parser2.parse_dom( html, 'div', {'class': 'movieplay'}) + dom_parser2.parse_dom( html, 'div', {'id': re.compile('player\d+')}): for attrs, _content in dom_parser2.parse_dom( fragment, 'iframe', req='src') + dom_parser2.parse_dom( fragment, 'iframe', req='data-lazy-src'): iframe_url = attrs.get('src', '') if not iframe_url.startswith('http'): iframe_url = attrs.get('data-lazy-src', '') if not iframe_url.startswith('http'): continue if '//player' in iframe_url: html = self._http_get(iframe_url, headers={'Referer': page_url}, cache_limit=.5) sources.update(scraper_utils.parse_sources_list( self, html)) else: if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(iframe_url) else: meta = scraper_utils.parse_episode_link(iframe_url) sources.update({ iframe_url: { 'quality': scraper_utils.height_get_quality(meta['height']), 'direct': False } }) for stream_url, values in sources.iteritems(): direct = values['direct'] quality = values['quality'] if direct: host = scraper_utils.get_direct_hostname(self, stream_url) stream_url += scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) else: stream_url = stream_url host = urlparse.urlparse(stream_url).hostname hoster = { 'multi-part': False, 'url': stream_url, 'class': self, 'quality': quality, 'host': host, 'rating': None, 'views': None, 'direct': direct } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) q_str = dom_parser2.parse_dom(html, 'div', {'class': 'poster-qulabel'}) if q_str: q_str = q_str[0].content.replace(' ', '').upper() page_quality = Q_MAP.get(q_str, QUALITIES.HIGH) else: page_quality = QUALITIES.HIGH for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'tab_box'}): iframe_url = dom_parser2.parse_dom(fragment, 'iframe', req='src') if iframe_url: iframe_url = iframe_url[0].attrs['src'] if 'youtube' in iframe_url: continue html = self._http_get(iframe_url, headers={'Referer': page_url}, cache_limit=.5) for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL): js_data = jsunpack.unpack(match.group(1)) js_data = js_data.replace('\\', '') html += js_data sources = scraper_utils.parse_sources_list(self, html) if not sources: sources = { iframe_url: { 'quality': page_quality, 'direct': False } } for source, values in sources.iteritems(): direct = values['direct'] if direct: host = scraper_utils.get_direct_hostname(self, source) if host == 'gvideo': quality = scraper_utils.gv_get_quality(source) else: quality = values['quality'] source += scraper_utils.append_headers({ 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url }) else: host = urlparse.urlparse(source).hostname quality = scraper_utils.get_quality( video, host, values['quality']) hoster = { 'multi-part': False, 'url': source, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct } hosters.append(hoster) return hosters
def __get_sources(self, html, page_url, subs): sources = {} player_div = dom_parser2.parse_dom(html, 'div', {'class': 'dzst-player'}, req='data-dzst-player') if player_div: js_html = scraper_utils.cleanse_title( player_div[0].attrs['data-dzst-player'].replace('=', '=')) js_data = scraper_utils.parse_json(js_html, page_url) links = js_data.get('tr', {}) for height in links: stream_url = links[height] if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(height) sources[stream_url] = { 'direct': True, 'subs': subs, 'quality': quality } else: fragment = dom_parser2.parse_dom(html, 'div', {'class': 'video-player'}) if fragment: fragment = fragment[0].content for _attrs, div in dom_parser2.parse_dom( fragment, 'div', {'class': 'ad-player'}): fragment = fragment.replace(div, '') iframe_url = dom_parser2.parse_dom(fragment, 'iframe', req='src') if iframe_url: iframe_url = iframe_url[0].attrs['src'] if 'dizist' in iframe_url: html = self._http_get(iframe_url, headers={'Referer': page_url}, cache_limit=1) return self.__get_sources(html, page_url, subs) else: parts = urlparse.urlparse(iframe_url) if not parts.hostname: iframe_url = scraper_utils.urljoin( self.base_url, iframe_url) html = self._http_get( iframe_url, headers={'Referer': page_url}, cache_limit=1) sources = scraper_utils.parse_sources_list( self, html, var='sources') for value in sources.itervalues(): value['subs'] = subs else: if scraper_utils.get_direct_hostname( self, iframe_url) == 'gvideo': direct = True else: direct = False sources[iframe_url] = { 'direct': direct, 'subs': subs, 'quality': QUALITIES.HD720 } else: sources = scraper_utils.parse_sources_list(self, fragment, var='sources') for value in sources.itervalues(): value['subs'] = subs return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'movieplay'}): for attrs, _content in dom_parser2.parse_dom( fragment, 'iframe', req='src') + dom_parser2.parse_dom( fragment, 'iframe', req='data-lazy-src'): iframe_url = attrs.get('src', '') if not iframe_url.startswith('http'): iframe_url = attrs.get('data-lazy-src', '') if not iframe_url.startswith('http'): continue if 'miradetodo' in iframe_url: html = self._http_get(iframe_url, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'nav', {'class': 'nav'}) if fragment: stream_url = dom_parser2.parse_dom(fragment[0].content, 'a', req='href') if stream_url: html = self._http_get(stream_url[0].attrs['href'], cache_limit=.5) sources.update(self.__get_gk_links(html)) sources.update(self.__get_gk_links2(html)) sources.update(self.__get_amazon_links(html)) sources.update(scraper_utils.parse_sources_list( self, html)) else: host = urlparse.urlparse(iframe_url).hostname source = { 'quality': scraper_utils.get_quality(video, host, QUALITIES.HIGH), 'direct': False } sources.update({iframe_url: source}) for source in sources: stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua()) direct = sources[source]['direct'] quality = sources[source]['quality'] host = scraper_utils.get_direct_hostname( self, source) if direct else urlparse.urlparse(source).hostname hoster = { 'multi-part': False, 'url': stream_url, 'class': self, 'quality': quality, 'host': host, 'rating': None, 'views': None, 'direct': direct } hosters.append(hoster) return hosters
def __get_direct_links(self, stream_url): return scraper_utils.parse_sources_list(self, self._http_get(stream_url, cache_limit=1))
def get_sources(self, video): source_url = self.get_url(video) sources = {} hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=0) for match in re.finditer("embeds\[(\d+)\]\s*=\s*'([^']+)", html): match = re.search('src="([^"]+)', match.group(2)) if match: iframe_url = match.group(1) if 'play-en.php' in iframe_url: match = re.search('id=([^"&]+)', iframe_url) if match: proxy_link = match.group(1) proxy_link = proxy_link.split('*', 1)[-1] picasa_url = scraper_utils.gk_decrypt( self.get_name(), GK_KEY, proxy_link) for stream_url in scraper_utils.parse_google( self, picasa_url): sources[stream_url] = { 'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True } else: html = self._http_get(iframe_url, cache_limit=0) temp_sources = scraper_utils.parse_sources_list(self, html) for source in temp_sources: if 'download.php' in source: redir_html = self._http_get(source, allow_redirect=False, method='HEAD', cache_limit=0) if redir_html.startswith('http'): temp_sources[redir_html] = temp_sources[source] del temp_sources[source] sources.update(temp_sources) for source in dom_parser2.parse_dom(html, 'source', {'type': 'video/mp4'}, req='src'): sources[source.attrs['src']] = { 'quality': QUALITIES.HD720, 'direct': True, 'referer': iframe_url } for source, values in sources.iteritems(): host = scraper_utils.get_direct_hostname(self, source) headers = {'User-Agent': scraper_utils.get_ua()} if 'referer' in values: headers['Referer'] = values['referer'] stream_url = source + scraper_utils.append_headers(headers) if host == 'gvideo': quality = scraper_utils.gv_get_quality(source) else: quality = values['quality'] if quality not in Q_ORDER: quality = QUALITY_MAP.get(values['quality'], QUALITIES.HIGH) hoster = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True } hosters.append(hoster) return hosters
def __get_embedded(self, html): return self.__proc_sources(scraper_utils.parse_sources_list(self, html))