def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) match = re.search('((?:pic|emb|vb)=[^<]+)', html) if match: embeds = match.group(1) for stream_url in embeds.split('&'): if stream_url.startswith('vb='): stream_url = 'http://www.vidbux.com/%s' % ( stream_url[3:]) host = 'vidbux.com' direct = False quality = scraper_utils.get_quality( video, host, QUALITIES.HD1080) elif stream_url.startswith('pic='): data = {'url': stream_url[4:]} html = self._http_get(PHP_URL, data=data, auth=False, cache_limit=1) js_data = scraper_utils.parse_json(html, PHP_URL) host = self._get_direct_hostname(stream_url) direct = True for item in js_data: if 'medium' in item and item['medium'] == 'video': stream_url = item['url'] quality = scraper_utils.width_get_quality( item['width']) break else: continue elif stream_url.startswith('emb='): stream_url = stream_url.replace('emb=', '') host = urlparse.urlparse(stream_url).hostname direct = False quality = scraper_utils.get_quality( video, host, QUALITIES.HD720) else: continue hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) match = re.search('((?:pic|emb|vb|dir|emb2)=[^<]+)', html) if match: embeds = match.group(1) for stream_url in embeds.split('&'): if stream_url.startswith('dir='): headers = {'Referer': url} html = self._http_get(DIR_URL, params={'v': stream_url[3:]}, headers=headers, auth=False, allow_redirect=False, cache_limit=.5) if html.startswith('http'): stream_url = html + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': url}) host = scraper_utils.get_direct_hostname(self, stream_url) direct = True quality = QUALITIES.HD720 else: continue elif stream_url.startswith('vb='): stream_url = 'http://www.vidbux.com/%s' % (stream_url[3:]) host = 'vidbux.com' direct = False quality = scraper_utils.get_quality(video, host, QUALITIES.HD1080) elif stream_url.startswith('pic='): data = {'url': stream_url[4:]} html = self._http_get(PHP_URL, data=data, auth=False, cache_limit=1) js_data = scraper_utils.parse_json(html, PHP_URL) host = scraper_utils.get_direct_hostname(self, stream_url) direct = True for item in js_data: if item.get('medium') == 'video': stream_url = item['url'] quality = scraper_utils.width_get_quality(item['width']) break else: continue elif stream_url.startswith(('emb=', 'emb2=')): stream_url = re.sub('emb\d*=', '', stream_url) host = urlparse.urlparse(stream_url).hostname direct = False quality = scraper_utils.get_quality(video, host, QUALITIES.HD720) else: continue hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) for _attrs, link in dom_parser2.parse_dom(html, 'div', {'class': 'ldr-item'}): stream_url = dom_parser2.parse_dom(link, 'a', req='data-actuallink') try: watched = dom_parser2.parse_dom(link, 'div', {'class': 'click-count'}) match = re.search(' (\d+) ', watched[0].content) views = match.group(1) except: views = None try: score = dom_parser2.parse_dom(link, 'div', {'class': 'point'}) score = int(score[0].content) rating = score * 10 if score else None except: rating = None if stream_url: stream_url = stream_url[0].attrs['data-actuallink'].strip() host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': rating, 'url': stream_url, 'direct': False} hosters.append(hoster) return hosters
def __get_links(self, url, video): hosters = [] seen_urls = set() for search_type in SEARCH_TYPES: search_url, params = self.__translate_search(url, search_type) if not search_url: continue html = self._http_get(search_url, params=params, cache_limit=.5) js_result = scraper_utils.parse_json(html, search_url) if js_result.get('status') != 'success': logger.log('Alluc API Error: |%s|%s|: %s' % (search_url, params, js_result.get('message', 'Unknown Error')), log_utils.LOGWARNING) continue for result in js_result['result']: stream_url = result['hosterurls'][0]['url'] if len(result['hosterurls']) > 1: continue if result['extension'] == 'rar': continue if stream_url in seen_urls: continue if scraper_utils.release_check(video, result['title']): host = urlparse.urlsplit(stream_url).hostname quality = scraper_utils.get_quality(video, host, self._get_title_quality(result['title'])) hoster = {'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': False} hoster['extra'] = scraper_utils.cleanse_title(result['title']) if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(hoster['extra']) else: meta = scraper_utils.parse_episode_link(hoster['extra']) if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) seen_urls.add(stream_url) return hosters
def get_sources(self, video): source_url = self.get_url(video) sources = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) if video.video_type == VIDEO_TYPES.MOVIE: action = 'getMovieEmb' else: action = 'getEpisodeEmb' match = re.search('elid="([^"]+)', html) if self.__token is None: self.__get_token() if match and self.__token is not None: elid = urllib.quote( base64.encodestring(str(int(time.time()))).strip()) data = { 'action': action, 'idEl': match.group(1), 'token': self.__token, 'elid': elid } ajax_url = urlparse.urljoin(self.base_url, EMBED_URL) headers = XHR headers['Authorization'] = 'Bearer %s' % (self.__get_bearer()) html = self._http_get(ajax_url, data=data, headers=headers, cache_limit=0) html = html.replace('\\"', '"').replace('\\/', '/') pattern = '<IFRAME\s+SRC="([^"]+)' for match in re.finditer(pattern, html, re.DOTALL | re.I): url = match.group(1) host = self._get_direct_hostname(url) if host == 'gvideo': direct = True quality = scraper_utils.gv_get_quality(url) else: if 'vk.com' in url and url.endswith('oid='): continue # skip bad vk.com links direct = False host = urlparse.urlparse(url).hostname quality = scraper_utils.get_quality( video, host, QUALITIES.HD720) source = { 'multi-part': False, 'url': url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct } sources.append(source) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) headers = {'Refer': self.base_url} html = self._http_get(page_url, headers=headers, cache_limit=.5) for _attrs, table in dom_parser2.parse_dom(html, 'table', {'class': 'W'}): for _attrs, row in dom_parser2.parse_dom(table, 'tr'): td = dom_parser2.parse_dom(row, 'td') stream_url = dom_parser2.parse_dom(row, 'a', req='href') if not td or not stream_url: continue host = td[0].content host = re.sub('<!--.*?-->', '', host) host = re.sub('<([^\s]+)[^>]*>.*?</\\1>', '', host) host = host.strip() stream_url = stream_url[0].attrs['href'] quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) quality = None match = re.search('Links\s+-\s+Quality\s*([^<]*)</h1>', html, re.DOTALL | re.I) if match: quality = QUALITY_MAP.get(match.group(1).strip().upper()) for match in re.finditer( 'id="link_name">\s*([^<]+).*?href="([^"]+)', html, re.DOTALL): host, url = match.groups() hoster = { 'multi-part': False, 'host': host, 'class': self, 'url': url, 'quality': scraper_utils.get_quality(video, host, quality), 'rating': None, 'views': None, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'post-cont'}) if not fragment: return hosters match = re.search('<p>\s*<strong>(.*?)<script', fragment[0].content, re.DOTALL) if not match: return hosters for attrs, _content in dom_parser2.parse_dom(match.group(1), 'a', req='href'): stream_url = attrs['href'] if scraper_utils.excluded_link(stream_url): continue if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(stream_url) else: meta = scraper_utils.parse_episode_link(stream_url) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, scraper_utils.height_get_quality(meta['height'])) hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) sources = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=0.5) if video.video_type == VIDEO_TYPES.EPISODE: html = self.__get_episode_fragment(html, video) for item in dom_parser.parse_dom(html, "div", {"class": "linkTr"}): stream_url = dom_parser.parse_dom(item, "div", {"class": '[^"]*linkHiddenUrl[^"]*'}) q_str = dom_parser.parse_dom(item, "div", {"class": '[^"]*linkQualityText[^"]*'}) if stream_url and q_str: stream_url = stream_url[0] q_str = q_str[0] host = urlparse.urlparse(stream_url).hostname base_quality = QUALITY_MAP.get(q_str, QUALITIES.HIGH) quality = scraper_utils.get_quality(video, host, base_quality) source = { "multi-part": False, "url": stream_url, "host": host, "class": self, "quality": quality, "views": None, "rating": None, "direct": False, } sources.append(source) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) for _attrs, row in dom_parser2.parse_dom(html, 'tr', {'id': 'linktr'}): redirect = dom_parser2.parse_dom(row, 'span', req='id') link = dom_parser2.parse_dom(row, 'a', req='href') if link and link[0].attrs['href'].startswith('http'): stream_url = link[0].attrs['href'] elif redirect: stream_url = redirect[0].attrs['id'] else: stream_url = '' if stream_url.startswith('http'): host = urlparse.urlparse(stream_url).hostname else: host = dom_parser2.parse_dom(row, 'h9') host = host[0].content if host else '' if stream_url and host: quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) hosts = dom_parser.parse_dom(html, 'p', {'class': 'server_servername'}) links = dom_parser.parse_dom(html, 'p', {'class': 'server_play'}) for host, link_frag in zip(hosts, links): stream_url = dom_parser.parse_dom(link_frag, 'a', ret='href') if stream_url: stream_url = stream_url[0] host = re.sub('^Server\s*', '', host, re.I) host = re.sub('\s*Link\s+\d+', '', host) if host.lower() == 'google': sources = self.__get_gvideo_links(stream_url) else: sources = [{'host': host, 'link': stream_url}] for source in sources: stream_url = source['link'] host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) stream_url += '|User-Agent=%s' % (scraper_utils.get_ua()) direct = True else: host = HOST_SUB.get(source['host'].lower(), source['host']) quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) direct = False hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=0.5) quality = None match = re.search("Links\s+-\s+Quality\s*([^<]*)</h1>", html, re.DOTALL | re.I) if match: quality = QUALITY_MAP.get(match.group(1).strip().upper()) for match in re.finditer('id="link_name">\s*([^<]+).*?href="([^"]+)', html, re.DOTALL): host, url = match.groups() hoster = { "multi-part": False, "host": host, "class": self, "url": url, "quality": scraper_utils.get_quality(video, host, quality), "rating": None, "views": None, "direct": False, } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) match = re.search('This movie is of poor quality', html, re.I) if match: quality = QUALITIES.LOW else: quality = QUALITIES.HIGH for match in re.finditer('href="([^"]+/embed\d*/[^"]+)', html): url = match.group(1) embed_html = self._http_get(url, cache_limit=.5) hosters += self.__get_links(embed_html) pattern = 'href="([^"]+)[^>]*>\s*<[^>]+play_video.gif' for match in re.finditer(pattern, html, re.I): stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, quality) hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'rating': None, 'views': None, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'table', {'id': 'streamlinks'}) if fragment: max_age = 0 now = min_age = int(time.time()) for row in dom_parser.parse_dom(fragment[0], 'tr', {'id': 'pt\d+'}): if video.video_type == VIDEO_TYPES.MOVIE: pattern = 'href="([^"]+).*?/>([^<]+).*?(?:<td>.*?</td>\s*){1}<td>(.*?)</td>\s*<td>(.*?)</td>' else: pattern = 'href="([^"]+).*?/>([^<]+).*?(<span class="linkdate">.*?)</td>\s*<td>(.*?)</td>' match = re.search(pattern, row, re.DOTALL) if match: url, host, age, quality = match.groups() age = self.__get_age(now, age) quality = quality.upper() if age > max_age: max_age = age if age < min_age: min_age = age host = host.strip() hoster = {'multi-part': False, 'class': self, 'url': scraper_utils.pathify_url(url), 'host': host, 'age': age, 'views': None, 'rating': None, 'direct': False} hoster['quality'] = scraper_utils.get_quality(video, host, QUALITY_MAP.get(quality, QUALITIES.HIGH)) hosters.append(hoster) unit = (max_age - min_age) / 100 if unit > 0: for hoster in hosters: hoster['rating'] = (hoster['age'] - min_age) / unit return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=2) for _attrs, tr in dom_parser2.parse_dom( html, 'tr', {'id': re.compile('link_\d+')}): match = dom_parser2.parse_dom(tr, 'a', {'class': 'buttonlink'}, req=['href', 'title']) if match: stream_url = match[0].attrs['href'] host = match[0].attrs['title'] host = re.sub(re.compile('Server\s+', re.I), '', host) quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) entry = '' while True: html = self._http_get(url, cache_limit=.5) if not html: url = scraper_utils.urljoin(BASE_URL2, source_url) html = self._http_get(url, cache_limit=.5) entry = dom_parser2.parse_dom(html, 'div', {'class': 'entry'}) if entry: entry = entry[0].content match = re.search('Watch it here\s*:.*?href="([^"]+)', entry, re.I) if not match: break url = match.group(1) else: entry = '' break for _attribs, tab in dom_parser2.parse_dom(entry, 'div', {'class': 'postTabs_divs'}): match = dom_parser2.parse_dom(tab, 'iframe', req='src') if not match: continue link = match[0].attrs['src'] host = urlparse.urlparse(link).hostname hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.get_quality(video, host, QUALITIES.HIGH), 'views': None, 'rating': None, 'url': link, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) sources = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) if video.video_type == VIDEO_TYPES.EPISODE: html = self.__get_episode_fragment(html, video) for item in dom_parser.parse_dom(html, 'div', {'class': 'linkTr'}): stream_url = dom_parser.parse_dom( item, 'div', {'class': '[^"]*linkHiddenUrl[^"]*'}) q_str = dom_parser.parse_dom( item, 'div', {'class': '[^"]*linkQualityText[^"]*'}) if stream_url and q_str: stream_url = stream_url[0] q_str = q_str[0] host = urlparse.urlparse(stream_url).hostname base_quality = QUALITY_MAP.get(q_str, QUALITIES.HIGH) quality = scraper_utils.get_quality( video, host, base_quality) source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': False } sources.append(source) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) pattern = '<tr\s+id="link_(.*?)</tr>' for match in re.finditer(pattern, html, re.DOTALL): link = match.group(1) link_pattern = 'href="([^"]+)">\s*([^<]+).*?class="text">\s*([^<%]+).*?class="qualityCell[^>]*>\s*([^<]+)' link_match = re.search(link_pattern, link, re.DOTALL) if link_match: url, host, rating, quality = link_match.groups() host = host.strip() quality = quality.upper().strip() if rating == 'n/a': rating = None url = url.replace('/show/', '/play/') quality = QUALITY_MAP.get(quality, QUALITIES.MEDIUM) hoster = { 'multi-part': False, 'url': url, 'host': host, 'class': self, 'quality': scraper_utils.get_quality(video, host, quality), 'views': None, 'rating': rating, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) headers = {'Referer': self.base_url} html = self._http_get(url, headers=headers, cache_limit=.5) for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'lang'}): section_label = dom_parser2.parse_dom( fragment, 'div', {'title': re.compile('Language Flag\s+[^"]*')}) lang, subs = self.__get_section_label(section_label) if lang.lower() == 'english': for attrs, host in dom_parser2.parse_dom(fragment, 'a', {'class': 'p1'}, req='href'): stream_url = attrs['href'] quality = scraper_utils.get_quality( video, host, QUALITIES.HIGH) hoster = { 'multi-part': False, 'url': stream_url, 'class': self, 'quality': quality, 'host': host, 'rating': None, 'views': None, 'direct': False } if subs: hoster['subs'] = subs hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) q_str = '' quality = None match = re.search('>Category.*?td_col">([^<]+)', html) if match: quality = QUALITY_MAP.get(match.group(1).upper(), None) else: match = re.search('>Release.*?td_col">([^<]+)', html) if match: q_str = match.group(1).upper() pattern = "td_cols.*?href='([^']+)" for match in re.finditer(pattern, html): url = match.group(1) if re.search('\.rar(\.|$)', url): continue hoster = {'multi-part': False, 'class': self, 'views': None, 'url': url, 'rating': None, 'direct': False} hoster['host'] = urlparse.urlsplit(url).hostname if quality is None: hoster['quality'] = scraper_utils.blog_get_quality(video, q_str, hoster['host']) else: hoster['quality'] = scraper_utils.get_quality(video, hoster['host'], quality) hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=1) for _attrs, item in dom_parser2.parse_dom(html, 'a', {'class': 'full-torrent1'}): stream_url = dom_parser2.parse_dom(item, 'span', req='onclick') host = dom_parser2.parse_dom(item, 'div', {'class': 'small_server'}) match = re.search('Views:\s*(?:</[^>]*>)?\s*(\d+)', item, re.I) views = match.group(1) if match else None match = re.search('Size:\s*(?:</[^>]*>)?\s*(\d+)', item, re.I) size = int(match.group(1)) * 1024 * 1024 if match else None if not stream_url or not host: continue stream_url = stream_url[0].attrs['onclick'] host = host[0].content.lower() host = host.replace('stream server: ', '') match = re.search("'(/redirect/[^']+)", stream_url) if match: stream_url = match.group(1) quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': False} if size is not None: hoster['size'] = scraper_utils.format_size(size, 'B') hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for match in re.finditer('href="([^"]+)" rel="nofollow"', html): url = match.group(1) host = urlparse.urlsplit(url).hostname.replace('embed.', '') hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.get_quality(video, host, QUALITIES.HIGH), 'views': None, 'rating': None, 'url': url, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): sources = [] source_url = self.get_url(video) if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=24) pattern = 'class="playDiv3".*?href="([^"]+).*?>(.*?)</a>' for match in re.finditer(pattern, html, re.DOTALL | re.I): url, host = match.groups() source = { 'multi-part': False, 'url': scraper_utils.pathify_url(url), 'host': host, 'class': self, 'quality': scraper_utils.get_quality(video, host, QUALITIES.HIGH), 'rating': None, 'views': None, 'direct': False } sources.append(source) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'div', {'class': 'video-embed'}) if fragment: sources = dom_parser.parse_dom(fragment[0], 'source', ret='src') sources += dom_parser.parse_dom(fragment[0], 'iframe', ret='src') for source in sources: if self._get_direct_hostname(source) == 'gvideo': direct = True quality = scraper_utils.gv_get_quality(source) host = self._get_direct_hostname(source) else: direct = False host = urlparse.urlparse(source).hostname quality = scraper_utils.get_quality(video, host, QUALITIES.HD720) hoster = {'multi-part': False, 'url': source, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) best_quality = QUALITIES.HIGH fragment = dom_parser2.parse_dom(html, 'div', {'class': 'entry'}) if fragment: for match in re.finditer('href="[^"]*/movies-quality/[^"]*[^>]*>([^<]+)', fragment[0].content, re.I): quality = Q_MAP.get(match.group(1).upper(), QUALITIES.HIGH) if Q_ORDER[quality] > Q_ORDER[best_quality]: best_quality = quality sources = [] for attrs, _content in dom_parser2.parse_dom(html, 'a', req='data-vid'): try: vid_url = dom_parser2.parse_dom(scraper_utils.cleanse_title(attrs['data-vid']), 'iframe', req='src') sources.append(vid_url[0]) except: pass fragment = dom_parser2.parse_dom(html, 'table', {'class': 'additional-links'}) if fragment: sources += dom_parser2.parse_dom(fragment[0].content, 'a', req='href') for stream_url in sources: stream_url = stream_url.attrs.get('href') or stream_url.attrs.get('src') host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, best_quality) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) sources = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) is_3d = False page_quality = QUALITIES.HD720 title = dom_parser.parse_dom(html, 'title') if title: title = title[0] match = re.search('(\d{3,})p', title) if match: page_quality = scraper_utils.height_get_quality(match.group(1)) is_3d = True if re.search('\s+3D\s+', title) else False fragments = dom_parser.parse_dom(html, 'div', {'class': 'txt-block'}) + dom_parser.parse_dom(html, 'li', {'class': 'elemento'}) for fragment in fragments: for match in re.finditer('href="([^"]+)', fragment): stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname q_str = dom_parser.parse_dom(fragment, 'span', {'class': 'd'}) q_str = q_str[0].upper() if q_str else '' base_quality = QUALITY_MAP.get(q_str, page_quality) quality = scraper_utils.get_quality(video, host, base_quality) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': False} source['format'] = 'x265' source['3D'] = is_3d sources.append(source) return sources
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) page_quality = QUALITIES.HD720 if video.video_type == VIDEO_TYPES.MOVIE else QUALITIES.HIGH for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'embed-responsive'}): iframe_url = dom_parser2.parse_dom(fragment, 'iframe', req='data-src') if iframe_url: iframe_url = iframe_url[0].attrs['data-src'] iframe_host = urlparse.urlparse(iframe_url).hostname if iframe_host in DIRECT_HOSTS: sources = self.__parse_streams(iframe_url, url) else: sources = {iframe_url: {'quality': scraper_utils.get_quality(video, iframe_host, page_quality), 'direct': False}} for source in sources: quality = sources[source]['quality'] direct = sources[source]['direct'] if direct: host = scraper_utils.get_direct_hostname(self, source) stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) else: host = urlparse.urlparse(source).hostname stream_url = source hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'tbody') if fragment: fragment = fragment[0].content for attrs, content in dom_parser2.parse_dom(fragment, 'a', req='href'): stream_url = attrs['href'] match = dom_parser2.parse_dom(content, 'img') if not match: continue host = match[0].content.strip() quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) sources = self.__get_gk_links(html, url) if not sources: sources = self.__get_gk_links2(html) sources.update(self.__get_iframe_links(html)) for source in sources: host = self._get_direct_hostname(source) if host == 'gvideo': direct = True quality = sources[source] stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua()) else: direct = False stream_url = source if self.base_url in source: host = sources[source] quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) else: host = urlparse.urlparse(source).hostname quality = sources[source] hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) pattern = 'class="[^"]*tdhost".*?href="([^"]+)">([^<]+)' for match in re.finditer(pattern, html, re.DOTALL): stream_url, host = match.groups() hoster = { 'multi-part': False, 'host': host, 'class': self, 'url': stream_url, 'quality': scraper_utils.get_quality(video, host, QUALITIES.HIGH), 'views': None, 'rating': None, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) pattern = 'href="([^"]+)">Watch (Link \d+)(.*?)</td>\s*<td[^>]*>(.*?)</td>.*?<td[^>]*id="lv_\d+"[^>]*>([^<]+)' for match in re.finditer(pattern, html, re.DOTALL): stream_url, label, part_str, q_str, views = match.groups() q_str = q_str.strip().upper() parts = re.findall('href="([^"]+)">(Part\s+\d+)<', part_str, re.DOTALL) if parts: multipart = True else: multipart = False host = urlparse.urlparse(stream_url).hostname if host is not None: quality = scraper_utils.get_quality(video, host, QUALITY_MAP.get(q_str, QUALITIES.HIGH)) hoster = {'multi-part': multipart, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': False} hoster['label'] = label hosters.append(hoster) for part in parts: stream_url, part_label = part part_hoster = hoster.copy() part_hoster['part_label'] = part_label part_hoster['url'] = stream_url hosters.append(part_hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) pattern = 'class="source_td">\s*<img[^>]+>\s*(.*?)\s*-\s*\((\d+) views\).*?class="quality_td">\s*(.*?)\s*<.*?href="([^"]+)' for match in re.finditer(pattern, html, re.DOTALL): host, views, quality_str, stream_url = match.groups() hoster = { 'multi-part': False, 'host': host, 'class': self, 'url': stream_url, 'quality': scraper_utils.get_quality( video, host, QUALITY_MAP.get(quality_str.upper())), 'views': views, 'rating': None, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) pattern = "/watch\.php\?q=([^']+)" seen_hosts = {} for match in re.finditer(pattern, html, re.DOTALL): url = match.group(1) hoster = { 'multi-part': False, 'url': url.decode('base-64'), 'class': self, 'quality': None, 'views': None, 'rating': None, 'direct': False } hoster['host'] = urlparse.urlsplit(hoster['url']).hostname # top list is HD, bottom list is SD if hoster['host'] in seen_hosts: quality = QUALITIES.HIGH else: quality = QUALITIES.HD720 seen_hosts[hoster['host']] = True hoster['quality'] = scraper_utils.get_quality( video, hoster['host'], quality) hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) for _attrs, button in dom_parser2.parse_dom( html, 'li', {'class': 'playing_button'}): try: link = dom_parser2.parse_dom(button, 'a', req='href') match = re.search('php\?.*?=?([^"]+)', link[0].attrs['href']) stream_url = base64.b64decode(match.group(1)) match = re.search('(https?://.*)', stream_url) stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False } hosters.append(hoster) except Exception as e: logger.log( 'Exception during tvonline source: %s - |%s|' % (e, button), log_utils.LOGDEBUG) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) for link in dom_parser.parse_dom(html, 'div', {'class': '[^"]*ldr-item[^"]*'}): stream_url = dom_parser.parse_dom(link, 'a', ret='data-actuallink') views = None watched = dom_parser.parse_dom(link, 'div', {'class': 'click-count'}) if watched: match = re.search(' (\d+) ', watched[0]) if match: views = match.group(1) score = dom_parser.parse_dom(link, 'div', {'class': '\s*point\s*'}) if score: score = int(score[0]) rating = score * 10 if score else None if stream_url: stream_url = stream_url[0].strip() host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': rating, 'url': stream_url, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) page_quality = dom_parser.parse_dom(html, 'dd', {'class': 'status'}) if page_quality: page_quality = QUALITY_MAP.get(page_quality[0], QUALITIES.HIGH) else: page_quality = QUALITIES.HIGH sources = self.__get_gk_links(html, url, page_quality) for source in sources: host = self._get_direct_hostname(source) if host == 'gvideo': direct = True quality = sources[source] else: host = urlparse.urlparse(source).hostname quality = scraper_utils.get_quality(video, host, sources[source]) direct = False if host is not None: stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} hosters.append(hoster) return hosters
def __get_links(self, url, video): hosters = [] seen_urls = set() for search_type in SEARCH_TYPES: search_url = self.__translate_search(url, search_type) if search_url: html = self._http_get(search_url, cache_limit=.5) js_result = scraper_utils.parse_json(html, search_url) if 'status' in js_result and js_result['status'] == 'success': for result in js_result['result']: if len(result['hosterurls']) > 1: continue if result['extension'] == 'rar': continue stream_url = result['hosterurls'][0]['url'] if stream_url not in seen_urls: if scraper_utils.title_check(video, result['title']): host = urlparse.urlsplit(stream_url).hostname quality = scraper_utils.get_quality(video, host, self._get_title_quality(result['title'])) hoster = {'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': False} hoster['extra'] = result['title'] hosters.append(hoster) seen_urls.add(stream_url) else: log_utils.log('Alluc API Error: %s: %s' % (search_url, js_result['message']), log_utils.LOGWARNING) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) for item in dom_parser.parse_dom(html, 'div', {'class': 'stream-table__row'}): stream_url = dom_parser.parse_dom(item, 'a', ret='href') match = re.search('Views:\s*(?:</span>)?\s*(\d+)', item, re.I) if match: views = match.group(1) else: views = None match = re.search('Size:\s*(?:</span>)?\s*(\d+)', item, re.I) if match: size = int(match.group(1)) * 1024 * 1024 else: size = None if stream_url: stream_url = stream_url[0] match = re.search('/redirect/(.*)', stream_url) if match: stream_url = base64.decodestring(urllib.unquote(match.group(1))) host = urlparse.urlparse(stream_url).hostname if host: quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': False} if size is not None: hoster['size'] = scraper_utils.format_size(size, 'B') hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'ul', {'class': 'css_server_new'}) if fragment: for match in re.finditer('href="([^"]+)[^>]*>(.*?)(?:-\d+)?</a>', fragment[0]): url, host = match.groups() host = host.lower() host = re.sub('<img.*?/>', '', host) host = HOSTS.get(host, host) log_utils.log('%s - %s' % (url, host)) if host in GVIDEO_NAMES: sources = self.__get_links(urlparse.urljoin(self.base_url, url)) direct = True else: sources = {url: host} direct = False for source in sources: if self._get_direct_hostname(source) == 'gvideo': quality = scraper_utils.gv_get_quality(source) source = source + '|User-Agent=%s' % (scraper_utils.get_ua()) else: quality = scraper_utils.get_quality(video, source, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': sources[source], 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) for link in dom_parser.parse_dom(html, 'div', {'class': '[^"]*ldr-item[^"]*'}): stream_url = dom_parser.parse_dom(link, 'a', ret='data-actuallink') views = None watched = dom_parser.parse_dom(link, 'div', {'class': 'click-count'}) if watched: match = re.search(' (\d+) ', watched[0]) if match: views = match.group(1) score = dom_parser.parse_dom(link, 'div', {'class': '\s*point\s*'}) if score: score = int(score[0]) rating = score * 10 if score else None if stream_url: stream_url = stream_url[0] host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': rating, 'url': stream_url, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for table_cell in dom_parser.parse_dom(html, 'td', {'class': 'domain'}): match = re.search('href="([^"]+)(?:[^>]+>){2}\s*([^<]+)', table_cell) if match: link, host = match.groups() hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.get_quality(video, host, QUALITIES.HIGH), 'views': None, 'rating': None, 'url': link, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) match = re.search('<table[^>]+id="streamlinks">(.*?)</table>', html, re.DOTALL) if match: fragment = match.group(1) if video.video_type == VIDEO_TYPES.MOVIE: pattern = 'href="([^"]+/play/[^"]+).*?/>\s+\.?([^\s]+)\s+.*?(?:<td>.*?</td>\s*){2}<td>(.*?)</td>\s*<td>(.*?)</td>' else: pattern = 'href="([^"]+/play/[^"]+).*?/>\s+\.?([^\s]+)\s+.*?(<span class="linkdate">.*?)</td>\s*<td>(.*?)</td>' max_age = 0 now = min_age = int(time.time()) for match in re.finditer(pattern, fragment, re.DOTALL): url, host, age, quality = match.groups() age = self.__get_age(now, age) quality = quality.upper() if age > max_age: max_age = age if age < min_age: min_age = age hoster = {'multi-part': False, 'class': self, 'url': scraper_utils.pathify_url(url), 'host': host, 'age': age, 'views': None, 'rating': None, 'direct': False} hoster['quality'] = scraper_utils.get_quality(video, host, QUALITY_MAP.get(quality, QUALITIES.HIGH)) hosters.append(hoster) unit = (max_age - min_age) / 100 if unit > 0: for hoster in hosters: hoster['rating'] = (hoster['age'] - min_age) / unit # print '%s, %s' % (hoster['rating'], hoster['age']) return hosters
def get_sources(self, video): source_url = self.get_url(video) sources = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for item in dom_parser.parse_dom(html, 'li', {'class': 'elemento'}): match = re.search('href="([^"]+)', item) if match: stream_url = match.group(1) q_str = dom_parser.parse_dom(item, 'span', {'class': 'd'}) q_str = q_str[0].upper() if q_str else '' base_quality = QUALITY_MAP.get(q_str, QUALITIES.HIGH) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality( video, host, base_quality) source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': False } sources.append(source) return sources
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) match = re.search('This movie is of poor quality', html, re.I) if match: quality = QUALITIES.LOW else: quality = QUALITIES.HIGH for match in re.finditer('href="([^"]+/embed\d*/[^"]+)', html): url = match.group(1) embed_html = self._http_get(url, cache_limit=.5) hosters += self.__get_links(embed_html) pattern = 'href="([^"]+)[^>]*>\s*<[^>]+play_video.gif' for match in re.finditer(pattern, html, re.I): stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, quality) hoster = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'rating': None, 'views': None, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'ul', {'class': 'css_server_new'}) if fragment: for match in re.finditer('href="([^"]+).*?/>(.*?)(?:-\d+)?</a>', fragment[0]): url, host = match.groups() host = host.lower() host = HOSTS.get(host, host) if host in GVIDEO_NAMES: sources = self.__get_links(urlparse.urljoin(self.base_url, url)) else: sources = {url: {'quality': scraper_utils.get_quality(video, host, QUALITIES.HIGH), 'direct': False, 'host': host}} for source in sources: direct = sources[source]['direct'] quality = sources[source]['quality'] host = sources[source]['host'] stream_url = source if not direct else source + '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'embeds'}) if fragment: fragment = fragment[0].content links = [ r.attrs['href'] for r in dom_parser2.parse_dom(fragment, 'a', req='href') ] hosts = [ r.content for r in dom_parser2.parse_dom( fragment, 'div', {'class': 'searchTVname'}) ] for stream_url, host in map(None, links, hosts): quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) match = re.search('Links:(.*?)Show All Links', html, re.DOTALL) if match: fragment = match.group(1) for match in re.finditer( 'class="movie_link.*?href="([^"]+)[^>]+>([^<]+)', fragment, re.DOTALL): media_url, host = match.groups() hosters.append({ 'multi-part': False, 'url': media_url, 'class': self, 'quality': scraper_utils.get_quality(video, host, QUALITIES.HIGH), 'host': host, 'rating': None, 'views': None, 'direct': False }) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) fragment = dom_parser.parse_dom(html, 'tbody') if fragment: links = dom_parser.parse_dom(fragment[0], 'a', ret='href') domains = dom_parser.parse_dom(fragment[0], 'a') for link, host in zip(links, domains): host = re.sub('</?span[^>]*>', '', host) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.get_quality(video, host, QUALITIES.HIGH), 'views': None, 'rating': None, 'url': link, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) match = re.search('((?:pic|emb|vb)=[^<]+)', html) if match: embeds = match.group(1) for stream_url in embeds.split('&'): if stream_url.startswith('vb='): stream_url = 'http://www.vidbux.com/%s' % (stream_url[3:]) host = 'vidbux.com' direct = False quality = scraper_utils.get_quality(video, host, QUALITIES.HD1080) elif stream_url.startswith('pic='): data = {'url': stream_url[4:]} html = self._http_get(PHP_URL, data=data, auth=False, cache_limit=1) js_data = scraper_utils.parse_json(html, PHP_URL) host = self._get_direct_hostname(stream_url) direct = True for item in js_data: if 'medium' in item and item['medium'] == 'video': stream_url = item['url'] quality = scraper_utils.width_get_quality(item['width']) break else: continue elif stream_url.startswith('emb='): stream_url = stream_url.replace('emb=', '') host = urlparse.urlparse(stream_url).hostname direct = False quality = scraper_utils.get_quality(video, host, QUALITIES.HD720) else: continue hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} hosters.append(hoster) return hosters
def __get_post_links(self, html, video): sources = {} post = dom_parser2.parse_dom(html, 'article', {'id': re.compile('post-\d+')}) if post: for _attrs, fragment in dom_parser2.parse_dom(post[0].content, 'h2'): for attrs, _content in dom_parser2.parse_dom(fragment, 'a', req='href'): stream_url = attrs['href'] meta = scraper_utils.parse_episode_link(stream_url) release_quality = scraper_utils.height_get_quality(meta['height']) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, release_quality) sources[stream_url] = quality return sources
def __get_mirror_links(self, html, video): sources = {} for attrs, _content in dom_parser2.parse_dom(html, 'img', req='src'): image = attrs['src'] if image.endswith('/mirrors.png'): match = re.search('%s.*?<p>(.*?)</p>' % (image), html, re.DOTALL) if match: for attrs, _content in dom_parser2.parse_dom(match.group(1), 'a', req='href'): stream_url = attrs['href'] host = urlparse.urlparse(stream_url).hostname meta = scraper_utils.parse_episode_link(stream_url) base_quality = scraper_utils.height_get_quality(meta['height']) sources[stream_url] = {'quality': scraper_utils.get_quality(video, host, base_quality), 'direct': False} return sources
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) pattern = '''tablemoviesindex2.*?href\s*=\s*['"]([^'"]+).*? ([^<]+)''' for match in re.finditer(pattern, html): url, host = match.groups() if not url.startswith('/'): url = '/' + url quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': url, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) for match in re.finditer('''onclick\s*=\s*"go_to\(\s*\d+\s*,\s*'([^']+)''', html): stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False} hosters.append(hoster) return hosters