def get_sources(self, video): hosters = [] sources = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) match = re.search('var\s+view_id\s*=\s*"([^"]+)', html) if not match: return hosters view_id = match.group(1) for lang in ['or', 'tr']: subs = True if lang == 'tr' else False view_data = {'id': view_id, 'tip': 'view', 'dil': lang} html = self._http_get(self.ajax_url, data=view_data, headers=XHR, cache_limit=.25) html = html.strip() html = re.sub(r'\\n|\\t', '', html) match = re.search('var\s+sources\s*=\s*(\[.*?\])', html) if match: raw_data = match.group(1) raw_data = raw_data.replace('\\', '') else: raw_data = html js_data = scraper_utils.parse_json(raw_data, self.ajax_url) if 'data' not in js_data: continue src = dom_parser2.parse_dom(js_data['data'], 'iframe', req='src') if not src: continue html = self._http_get(src[0].attrs['src'], cache_limit=.25) for attrs, _content in dom_parser2.parse_dom(html, 'iframe', req='src'): src = attrs['src'] if not src.startswith('http'): continue sources.append({'label': '720p', 'file': src, 'direct': False, 'subs': subs}) sources += [{'file': url, 'subs': subs} for url in scraper_utils.parse_sources_list(self, html).iterkeys()] if sources: break for source in sources: direct = source.get('direct', True) stream_url = source['file'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) if direct: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: continue else: host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.height_get_quality(source['label']) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} if source.get('subs'): hoster['subs'] = 'Turkish Subtitles' hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'film-container'}) if fragment: iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src') if iframe_url: iframe_url = scraper_utils.urljoin(self.base_url, iframe_url[0].attrs['src']) headers = {'Referer': page_url} html = self._http_get(iframe_url, headers=headers, cache_limit=.5) sources = scraper_utils.parse_sources_list(self, html) for source in sources: quality = sources[source]['quality'] host = scraper_utils.get_direct_hostname(self, source) stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': iframe_url}) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} match = re.search('(\d+[a-z]bps)', source) if match: hoster['extra'] = match.group(1) hosters.append(hoster) hosters.sort(key=lambda x: x.get('extra', ''), reverse=True) return hosters
def __parse_streams(self, iframe_url, page_url): headers = {'Referer': page_url} html = self._http_get(iframe_url, headers=headers, cache_limit=.5) if jsunpack.detect(html): html = jsunpack.unpack(html) return scraper_utils.parse_sources_list(self, html)
def __get_embed_links(self, html): hosters = [] sources = scraper_utils.parse_sources_list(self, html) for source in sources: quality = source['quality'] stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) hoster = {'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, source), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': 'Turkish Subtitles'} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) for _attrs, div in dom_parser2.parse_dom(html, 'div', {'class': 'tabcontent'}): for attrs, _content in dom_parser2.parse_dom(div, 'source', req='src'): source = attrs['src'] + scraper_utils.append_headers( { 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url }) sources[source] = {'quality': None, 'direct': True} iframe_url = dom_parser2.parse_dom(div, 'iframe', req='src') if iframe_url: iframe_url = iframe_url[0].attrs['src'] if 'songs2dl' in iframe_url: headers = {'Referer': page_url} iframe_html = self._http_get(iframe_url, headers=headers, cache_limit=1) sources.update( scraper_utils.parse_sources_list(self, iframe_html)) else: sources[iframe_url] = {'quality': None, 'direct': False} sources.update(self.__get_mirror_links(html, video)) page_quality = self.__get_best_quality(sources) for source, values in sources.iteritems(): direct = values['direct'] if direct: host = scraper_utils.get_direct_hostname(self, source) else: host = urlparse.urlparse(source).hostname if values['quality'] is None: values['quality'] = page_quality hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': values['quality'], 'direct': direct } hosters.append(hoster) return hosters
def __get_embedded_links(self, html, sub): hosters = [] html = html.replace('\\"', '"').replace('\\/', '/') sources = scraper_utils.parse_sources_list(self, html) for source in sources: host = scraper_utils.get_direct_hostname(self, source) quality = sources[source]['quality'] direct = sources[source]['direct'] hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': direct} hoster['subs'] = sub hosters.append(hoster) return hosters
def __get_embedded(self, html, page_url): sources = {} match = dom_parser2.parse_dom(html, 'div', {'id': 'videoreklam'}) if not match: return sources match = dom_parser2.parse_dom(match[0].content, 'iframe', req='src') if not match: return sources headers = {'Referer': page_url} html = self._http_get(match[0].attrs['src'], headers=headers, cache_limit=.5) for match in re.finditer('(eval\(function\(.*?)</script>', html, re.DOTALL): js_data = jsunpack.unpack(match.group(1)) js_data = js_data.replace('\\', '') html += js_data return dict((key, value['quality']) for key, value in scraper_utils.parse_sources_list( self, html, var='source').iteritems())
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'}) if fragment: html = fragment[0].content links = scraper_utils.parse_sources_list(self, html) for link in links: stream_url = link if self.base_url in link: redir_url = self._http_get(link, headers={'Referer': url}, allow_redirect=False, method='HEAD') if redir_url.startswith('http'): stream_url = redir_url host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = links[link]['quality'] stream_url += scraper_utils.append_headers({ 'User-Agent': scraper_utils.get_ua(), 'Referer': url }) source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True } hosters.append(source) return hosters