def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=2) for _attrs, tr in dom_parser2.parse_dom( html, 'tr', {'id': re.compile('link_\d+')}): match = dom_parser2.parse_dom(tr, 'a', {'class': 'buttonlink'}, req=['href', 'title']) if match: stream_url = match[0].attrs['href'] host = match[0].attrs['title'] host = re.sub(re.compile('Server\s+', re.I), '', host) quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) match = re.search('This movie is of poor quality', html, re.I) if match: quality = QUALITIES.LOW else: quality = QUALITIES.HIGH for match in re.finditer('href="([^"]+/embed\d*/[^"]+)', html): url = match.group(1) embed_html = self._http_get(url, cache_limit=.5) hosters += self.__get_links(embed_html) pattern = 'href="([^"]+)[^>]*>\s*<[^>]+play_video.gif' for match in re.finditer(pattern, html, re.I): stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, quality) hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'rating': None, 'views': None, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) if video.video_type == VIDEO_TYPES.EPISODE: html = self.__get_episode_fragment(html, video) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'linkTr'}): stream_url = dom_parser2.parse_dom(item, 'div', {'class': 'linkHiddenUrl'}) q_str = dom_parser2.parse_dom(item, 'div', {'class': 'linkQualityText'}) if stream_url and q_str: stream_url = stream_url[0].content q_str = q_str[0].content host = urlparse.urlparse(stream_url).hostname base_quality = QUALITY_MAP.get(q_str, QUALITIES.HIGH) quality = scraper_utils.get_quality(video, host, base_quality) source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': False } hosters.append(source) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) for _attrs, link in dom_parser2.parse_dom(html, 'div', {'class': 'ldr-item'}): stream_url = dom_parser2.parse_dom(link, 'a', req='data-actuallink') try: watched = dom_parser2.parse_dom(link, 'div', {'class': 'click-count'}) match = re.search(' (\d+) ', watched[0].content) views = match.group(1) except: views = None try: score = dom_parser2.parse_dom(link, 'div', {'class': 'point'}) score = int(score[0].content) rating = score * 10 if score else None except: rating = None if stream_url: stream_url = stream_url[0].attrs['data-actuallink'].strip() host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': rating, 'url': stream_url, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) pattern = 'href="[^"]+gtfo=([^&"]+)[^>]+>([^<]+)' for match in re.finditer(pattern, html, re.DOTALL | re.I): url, link_name = match.groups() url = url.decode('base-64') host = urlparse.urlsplit(url).hostname match = re.search('Part\s+(\d+)', link_name) if match: if match.group(1) == '2': del hosters[-1] # remove Part 1 previous link added continue source = { 'multi-part': False, 'url': url, 'host': host, 'class': self, 'quality': scraper_utils.get_quality(video, host, QUALITIES.HIGH), 'views': None, 'rating': None, 'direct': False } hosters.append(source) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for _attrs, td in dom_parser2.parse_dom(html, 'td', {'class': 'tdhost'}): match = dom_parser2.parse_dom(td, 'a', req='href') if match: stream_url = match[0].attrs['href'] host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = { 'multi-part': False, 'host': host, 'class': self, 'url': stream_url, 'quality': quality, 'views': None, 'rating': None, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=1) for _attrs, item in dom_parser2.parse_dom(html, 'a', {'class': 'full-torrent1'}): stream_url = dom_parser2.parse_dom(item, 'span', req='onclick') host = dom_parser2.parse_dom(item, 'div', {'class': 'small_server'}) match = re.search('Views:\s*(?:</[^>]*>)?\s*(\d+)', item, re.I) views = match.group(1) if match else None match = re.search('Size:\s*(?:</[^>]*>)?\s*(\d+)', item, re.I) size = int(match.group(1)) * 1024 * 1024 if match else None if not stream_url or not host: continue stream_url = stream_url[0].attrs['onclick'] host = host[0].content.lower() host = host.replace('stream server: ', '') match = re.search("'(/redirect/[^']+)", stream_url) if match: stream_url = match.group(1) quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': False} if size is not None: hoster['size'] = scraper_utils.format_size(size, 'B') hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'tbody') if fragment: fragment = fragment[0].content for attrs, content in dom_parser2.parse_dom(fragment, 'a', req='href'): stream_url = attrs['href'] match = dom_parser2.parse_dom(content, 'img') if not match: continue host = match[0].content.strip() quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) for _attrs, comment in dom_parser2.parse_dom( html, 'div', {'class': 'commentmetadata'}): for attrs, _content in dom_parser2.parse_dom(comment, 'a', req='href'): stream_url = attrs['href'] host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for _attrs, item in dom_parser2.parse_dom(html, 'li'): label = dom_parser2.parse_dom(item, 'span', {'class': 'type'}) value = dom_parser2.parse_dom(item, 'p', {'class': 'text'}) if label and value and 'quality' in label[0].content.lower(): q_str = value[0].content.upper() break else: q_str = '' fragment = dom_parser2.parse_dom(html, 'div', {'id': 'fstory-video'}) if not fragment: return hosters for match in re.finditer('<iframe[^>]*src="([^"]+)', fragment[0].content, re.I): stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, QUALITY_MAP.get(q_str, QUALITIES.HIGH)) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for match in re.finditer( '<td>\s*<a\s+href="([^"]+)(?:[^>]+>){2}\s*(?: )*\s*([^<]+)', html): stream_url, host = match.groups() hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.get_quality(video, host, QUALITIES.HIGH), 'views': None, 'rating': None, 'url': stream_url, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) for _attrs, row in dom_parser2.parse_dom(html, 'tr', {'id': 'linktr'}): redirect = dom_parser2.parse_dom(row, 'span', req='id') link = dom_parser2.parse_dom(row, 'a', req='href') if link and link[0].attrs['href'].startswith('http'): stream_url = link[0].attrs['href'] elif redirect: stream_url = redirect[0].attrs['id'] else: stream_url = '' if stream_url.startswith('http'): host = urlparse.urlparse(stream_url).hostname else: host = dom_parser2.parse_dom(row, 'h9') host = host[0].content if host else '' if stream_url and host: quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters base_ep_url = self.__get_base_ep_url(video) url = scraper_utils.urljoin(base_ep_url, source_url) html = self._http_get(url, cache_limit=.5) for match in re.finditer('postTabs_titles.*?iframe.*?src="([^"]+)', html, re.I | re.DOTALL): stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname hoster = { 'multi-part': False, 'host': host, 'class': self, 'url': stream_url, 'quality': scraper_utils.get_quality(video, host, None), 'views': None, 'rating': None, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) match = re.search('Views?\s*:\s*(\d+)', html, re.I) if match: views = match.group(1) else: views = None pattern = 'href="[^"]+/rd\.html\?url=([^"]+)' for match in re.finditer(pattern, html): url = match.group(1) host = urlparse.urlsplit(url).hostname hoster = { 'multi-part': False, 'host': host, 'url': url, 'class': self, 'rating': None, 'views': views, 'quality': scraper_utils.get_quality(video, host, QUALITIES.HIGH), 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) max_age = 0 now = min_age = int(time.time()) for _attrs, row in dom_parser2.parse_dom(html, 'tr', {'id': re.compile('pt\d+')}): stream_url = dom_parser2.parse_dom(row, 'a', {'class': 'spf-link'}, req='href') host = dom_parser2.parse_dom(row, 'img', {'alt': ''}, req='src') data = dom_parser2.parse_dom(row, 'td') if stream_url and host: stream_url = stream_url[0].attrs['href'] match = re.search('.*/(.*?)\.gif', host[0].attrs['src']) host = match.group(1) if match else '' age = dom_parser2.parse_dom(row, 'span', {'class': 'linkdate'}) try: age = age[0].content except: try: age = data[2].content except: age = 0 try: quality = data[3].content except: quality = 'HDTV' age = self.__get_age(now, age) if age > max_age: max_age = age if age < min_age: min_age = age hoster = { 'multi-part': False, 'class': self, 'url': scraper_utils.pathify_url(stream_url), 'host': host, 'age': age, 'views': None, 'rating': None, 'direct': False } quality = QUALITY_MAP.get(quality.upper(), QUALITIES.HIGH) hoster['quality'] = scraper_utils.get_quality( video, host, quality) hosters.append(hoster) unit = (max_age - min_age) / 100 if unit > 0: for hoster in hosters: hoster['rating'] = (hoster['age'] - min_age) / unit return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) container_pattern = r'<table[^>]+class="movie_version[ "][^>]*>(.*?)</table>' item_pattern = (r'quality_(?!sponsored|unknown)([^>]*)></span>.*?' r'url=([^&]+)&(?:amp;)?domain=([^&]+)&(?:amp;)?(.*?)' r'"version_veiws"> ([\d]+) views</') max_index = 0 max_views = -1 for container in re.finditer(container_pattern, html, re.DOTALL | re.IGNORECASE): for i, source in enumerate( re.finditer(item_pattern, container.group(1), re.DOTALL)): qual, url, host, parts, views = source.groups() if host == 'ZnJhbWVndGZv': continue # filter out promo hosts item = { 'host': host.decode('base-64'), 'url': url.decode('base-64') } item['verified'] = source.group(0).find('star.gif') > -1 item['quality'] = scraper_utils.get_quality( video, item['host'], QUALITY_MAP.get(qual.upper())) item['views'] = int(views) if item['views'] > max_views: max_index = i max_views = item['views'] if max_views > 0: item['rating'] = item['views'] * 100 / max_views else: item['rating'] = None pattern = r'<a href=".*?url=(.*?)&(?:amp;)?.*?".*?>(part \d*)</a>' other_parts = re.findall(pattern, parts, re.DOTALL | re.I) if other_parts: item['multi-part'] = True item['parts'] = [ part[0].decode('base-64') for part in other_parts ] else: item['multi-part'] = False item['class'] = self item['direct'] = False hosters.append(item) if max_views > 0: for i in xrange(0, max_index): hosters[i]['rating'] = hosters[i]['views'] * 100 / max_views return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) best_quality = QUALITIES.HIGH fragment = dom_parser2.parse_dom(html, 'div', {'class': 'entry'}) if fragment: for match in re.finditer( 'href="[^"]*/movies-quality/[^"]*[^>]*>([^<]+)', fragment[0].content, re.I): quality = Q_MAP.get(match.group(1).upper(), QUALITIES.HIGH) if Q_ORDER[quality] > Q_ORDER[best_quality]: best_quality = quality sources = [] for attrs, _content in dom_parser2.parse_dom(html, 'a', req='data-vid'): try: vid_url = dom_parser2.parse_dom(scraper_utils.cleanse_title( attrs['data-vid']), 'iframe', req='src') sources.append(vid_url[0]) except: pass fragment = dom_parser2.parse_dom(html, 'table', {'class': 'additional-links'}) if fragment: sources += dom_parser2.parse_dom(fragment[0].content, 'a', req='href') for stream_url in sources: stream_url = stream_url.attrs.get('href') or stream_url.attrs.get( 'src') host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, best_quality) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) hosts = [ r.content for r in dom_parser2.parse_dom( html, 'p', {'class': 'server_servername'}) ] links = [ r.content for r in dom_parser2.parse_dom(html, 'p', {'class': 'server_play'}) ] for host, link_frag in zip(hosts, links): stream_url = dom_parser2.parse_dom(link_frag, 'a', req='href') if not stream_url: continue stream_url = stream_url[0].attrs['href'] host = re.sub('^Server\s*', '', host, re.I) host = re.sub('\s*Link\s+\d+', '', host) if host.lower() == 'google': sources = self.__get_gvideo_links(stream_url) else: sources = [{'host': host, 'link': stream_url}] for source in sources: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) stream_url = source['link'] + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) direct = True else: stream_url = scraper_utils.pathify_url(source['link']) host = HOST_SUB.get(source['host'].lower(), source['host']) quality = scraper_utils.get_quality( video, host, QUALITIES.HIGH) direct = False hoster = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) page_quality = QUALITIES.HD720 if video.video_type == VIDEO_TYPES.MOVIE else QUALITIES.HIGH for _attrs, fragment in dom_parser2.parse_dom( html, 'div', {'class': 'embed-responsive'}): iframe_url = dom_parser2.parse_dom(fragment, 'iframe', req='data-src') if iframe_url: iframe_url = iframe_url[0].attrs['data-src'] iframe_host = urlparse.urlparse(iframe_url).hostname if iframe_host in DIRECT_HOSTS: sources = self.__parse_streams(iframe_url, url) else: sources = { iframe_url: { 'quality': scraper_utils.get_quality(video, iframe_host, page_quality), 'direct': False } } for source in sources: quality = sources[source]['quality'] direct = sources[source]['direct'] if direct: host = scraper_utils.get_direct_hostname(self, source) stream_url = source + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) else: host = urlparse.urlparse(source).hostname stream_url = source hoster = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct } hosters.append(hoster) return hosters
def __get_post_links(self, html, video): sources = {} post = dom_parser2.parse_dom(html, 'article', {'id': re.compile('post-\d+')}) if post: for _attrs, fragment in dom_parser2.parse_dom(post[0].content, 'h2'): for attrs, _content in dom_parser2.parse_dom(fragment, 'a', req='href'): stream_url = attrs['href'] meta = scraper_utils.parse_episode_link(stream_url) release_quality = scraper_utils.height_get_quality(meta['height']) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, release_quality) sources[stream_url] = quality return sources
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) page_quality = dom_parser2.parse_dom(html, 'dd', {'class': 'status'}) if page_quality: page_quality = QUALITY_MAP.get(page_quality[0].content, QUALITIES.HIGH) else: page_quality = QUALITIES.HIGH if video.video_type == VIDEO_TYPES.EPISODE: fragment = dom_parser2.parse_dom(html, 'div', {'id': 'servers-list'}) gk_html = fragment[0].content if fragment else '' else: gk_html = html link_url = scraper_utils.urljoin(self.base_url, LINK_URL) player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL) for stream_url, quality in scraper_utils.get_gk_links( self, gk_html, page_url, page_quality, link_url, player_url).iteritems(): host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': direct = True quality = quality else: host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, quality) direct = False if host is not None: stream_url += scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct } hosters.append(hoster) return hosters
def __get_links(self, url, video): hosters = [] seen_urls = set() for search_type in SEARCH_TYPES: search_url, params = self.__translate_search(url, search_type) if not search_url: continue html = self._http_get(search_url, params=params, cache_limit=.5) js_result = scraper_utils.parse_json(html, search_url) if js_result.get('status') != 'success': logger.log( 'Alluc API Error: |%s|%s|: %s' % (search_url, params, js_result.get('message', 'Unknown Error')), log_utils.LOGWARNING) continue for result in js_result['result']: stream_url = result['hosterurls'][0]['url'] if len(result['hosterurls']) > 1: continue if result['extension'] == 'rar': continue if stream_url in seen_urls: continue if scraper_utils.release_check(video, result['title']): host = urlparse.urlsplit(stream_url).hostname quality = scraper_utils.get_quality( video, host, self._get_title_quality(result['title'])) hoster = { 'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': False } hoster['extra'] = scraper_utils.cleanse_title( result['title']) if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(hoster['extra']) else: meta = scraper_utils.parse_episode_link( hoster['extra']) if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) seen_urls.add(stream_url) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'embeds'}) if fragment: fragment = fragment[0].content links = [r.attrs['href'] for r in dom_parser2.parse_dom(fragment, 'a', req='href')] hosts = [r.content for r in dom_parser2.parse_dom(fragment, 'div', {'class': 'searchTVname'})] for stream_url, host in map(None, links, hosts): quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) movie_id = dom_parser2.parse_dom(html, 'input', {'id': 'movie_id'}, req='value') if not movie_id: return hosters data = { 'movie': movie_id[0].attrs['value'], 'starttime': 'undefined', 'pageevent': 0, 'aspectration': '' } xhr_url = scraper_utils.urljoin(self.base_url, '/movies/play_online') headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(xhr_url, data=data, headers=headers, cache_limit=.5) best_quality, _sources = self.__get_direct(html, page_url) for attrs, _content in dom_parser2.parse_dom(html, 'iframe', req='src'): stream_url = attrs['src'] host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, best_quality) hoster = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) headers = {'Referer': self.base_url} html = self._http_get(url, headers=headers, cache_limit=.5) for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'lang'}): section_label = dom_parser2.parse_dom(fragment, 'div', {'title': re.compile('Language Flag\s+[^"]*')}) lang, subs = self.__get_section_label(section_label) if lang.lower() == 'english': for attrs, host in dom_parser2.parse_dom(fragment, 'a', {'class': 'p1'}, req='href'): stream_url = attrs['href'] quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = {'multi-part': False, 'url': stream_url, 'class': self, 'quality': quality, 'host': host, 'rating': None, 'views': None, 'direct': False} if subs: hoster['subs'] = subs hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) pattern = 'href="([^"]+)">Watch (Link \d+)(.*?)</td>\s*<td[^>]*>(.*?)</td>.*?<td[^>]*id="lv_\d+"[^>]*>([^<]+)' for match in re.finditer(pattern, html, re.DOTALL): stream_url, label, part_str, q_str, views = match.groups() q_str = q_str.strip().upper() parts = re.findall('href="([^"]+)">(Part\s+\d+)<', part_str, re.DOTALL) if parts: multipart = True else: multipart = False host = urlparse.urlparse(stream_url).hostname if host is None: continue quality = scraper_utils.get_quality( video, host, QUALITY_MAP.get(q_str, QUALITIES.HIGH)) hoster = { 'multi-part': multipart, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': False } hoster['extra'] = label hosters.append(hoster) for part in parts: stream_url, part_label = part part_hoster = hoster.copy() part_hoster['part_label'] = part_label part_hoster['url'] = stream_url hosters.append(part_hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) for _attrs, button in dom_parser2.parse_dom(html, 'li', {'class': 'playing_button'}): try: link = dom_parser2.parse_dom(button, 'a', req='href') match = re.search('php\?.*?=?([^"]+)', link[0].attrs['href']) stream_url = base64.b64decode(match.group(1)) match = re.search('(https?://.*)', stream_url) stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False} hosters.append(hoster) except Exception as e: logger.log('Exception during tvonline source: %s - |%s|' % (e, button), log_utils.LOGDEBUG) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters sources = [] url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for _attrs, div in dom_parser2.parse_dom(html, 'div', {'class': 'tab-content'}): for attrs, _content in dom_parser2.parse_dom(div, 'iframe', req='src'): sources.append(attrs['src']) sources += [match.group(1) for match in re.finditer("window\.open\('([^']+)", html)] for source in sources: host = urlparse.urlparse(source).hostname quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): sources = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return sources page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) if video.video_type == VIDEO_TYPES.MOVIE: action = 'getMovieEmb' else: action = 'getEpisodeEmb' match = re.search('elid\s*=\s*"([^"]+)', html) if self.__token is None: self.__get_token() if match and self.__token is not None: elid = urllib.quote(base64.encodestring(str(int(time.time()))).strip()) data = {'action': action, 'idEl': match.group(1), 'token': self.__token, 'elid': elid} ajax_url = scraper_utils.urljoin(self.base_url, EMBED_URL) headers = {'Authorization': 'Bearer %s' % (self.__get_bearer()), 'Referer': page_url} headers.update(XHR) html = self._http_get(ajax_url, data=data, headers=headers, cache_limit=.5) html = html.replace('\\"', '"').replace('\\/', '/') pattern = '<IFRAME\s+SRC="([^"]+)' for match in re.finditer(pattern, html, re.DOTALL | re.I): url = match.group(1) host = scraper_utils.get_direct_hostname(self, url) if host == 'gvideo': direct = True quality = scraper_utils.gv_get_quality(url) else: if 'vk.com' in url and url.endswith('oid='): continue # skip bad vk.com links direct = False host = urlparse.urlparse(url).hostname quality = scraper_utils.get_quality(video, host, QUALITIES.HD720) source = {'multi-part': False, 'url': url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct} sources.append(source) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) pattern = "/w\.php\?q=([^']+)" seen_hosts = {} for match in re.finditer(pattern, html, re.DOTALL): url = match.group(1) hoster = {'multi-part': False, 'url': url.decode('base-64'), 'class': self, 'quality': None, 'views': None, 'rating': None, 'direct': False} hoster['host'] = urlparse.urlsplit(hoster['url']).hostname # top list is HD, bottom list is SD if hoster['host'] in seen_hosts: quality = QUALITIES.HIGH else: quality = QUALITIES.HD720 seen_hosts[hoster['host']] = True hoster['quality'] = scraper_utils.get_quality(video, hoster['host'], quality) hosters.append(hoster) return hosters