def __get_post_links(self, html, video): sources = {} post = dom_parser2.parse_dom(html, 'div', {'class': 'postContent'}) if post: post = post[0].content results = re.findall( '<p\s+style="text-align:\s*center;">(?:\s*<strong>)*(.*?)<br(.*?)</p>', post, re.DOTALL) if not results: match = re.search('>Release Name\s*:(.*?)<br', post, re.I) release = match.group(1) if match else '' match = re.search('>Download\s*:(.*?)</p>', post, re.DOTALL | re.I) links = match.group(1) if match else '' results = [(release, links)] for result in results: release, links = result release = re.sub('</?[^>]*>', '', release) for attrs, hostname in dom_parser2.parse_dom(links, 'a', req='href'): stream_url = attrs['href'] if hostname.upper() in ['TORRENT SEARCH', 'VIP FILE']: continue host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.blog_get_quality( video, release, host) sources[stream_url] = quality return sources
def __get_episode_sources(self, source_url, video): hosters = [] links = self.__find_episode(source_url, video) if not links: return hosters hash_data = self.__get_hash_data([link[0] for link in links]) for link in links: try: status = hash_data['hashes'][link[0]]['status'] except KeyError: status = '' if status.lower() != 'finished': continue stream_url = 'hash_id=%s' % (link[0]) host = scraper_utils.get_direct_hostname(self, stream_url) quality = scraper_utils.blog_get_quality(video, link[1], '') hoster = { 'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': True } hoster['extra'] = link[1] hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) for source, values in self.__get_post_links(html).iteritems(): if scraper_utils.excluded_link(source): continue host = urlparse.urlparse(source).hostname release = values['release'] quality = scraper_utils.blog_get_quality(video, release, host) hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': quality, 'direct': False } if 'X265' in release or 'HEVC' in release: hoster['format'] = 'x265' hosters.append(hoster) return hosters
def __get_links_from_xml(self, xml, video): sources = {} try: root = ET.fromstring(xml) for item in root.findall('.//item'): title = item.find('title').text for source in item.findall('{http://rss.jwpcdn.com/}source'): stream_url = source.get('file') label = source.get('label') if self._get_direct_hostname(stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif label: quality = scraper_utils.height_get_quality(label) else: quality = scraper_utils.blog_get_quality( video, title, '') sources[stream_url] = {'quality': quality, 'direct': True} log_utils.log( 'Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG) except Exception as e: log_utils.log('Exception during 123Movies XML Parse: %s' % (e), log_utils.LOGWARNING) return sources
def __get_comment_links(self, comment, video): sources = {} for attrs, _content in dom_parser2.parse_dom(comment, 'a', req='href'): stream_url = attrs['href'] host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.blog_get_quality(video, stream_url, host) sources[stream_url] = quality return sources
def __get_links(self, video, views, html, q_str): pattern = 'li>\s*<a\s+href="(http[^"]+)' hosters = [] for match in re.finditer(pattern, html, re.DOTALL): url = match.group(1) hoster = {'multi-part': False, 'class': self, 'views': views, 'url': url, 'rating': None, 'quality': None, 'direct': False} hoster['host'] = urlparse.urlsplit(url).hostname hoster['quality'] = scraper_utils.blog_get_quality(video, q_str, hoster['host']) hosters.append(hoster) return hosters
def get_sources(self, video): sources = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return sources url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=8) for div in dom_parser2.parse_dom( html, 'div', {'id': re.compile('stb-container-\d+')}): stream_url = dom_parser2.parse_dom(div.content, 'iframe', req='src') if not stream_url: continue stream_url = stream_url[0].attrs['src'] host = urlparse.urlparse(stream_url).hostname source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': QUALITIES.HIGH, 'views': None, 'rating': None, 'direct': False } sources.append(source) fragment = dom_parser2.parse_dom(html, 'div', {'class': "stb-download-body_box"}) if not fragment: return sources labels = dom_parser2.parse_dom(fragment[0].content, 'a', {'href': '#'}) stream_urls = [ result for result in dom_parser2.parse_dom( fragment[0].content, 'a', req='href') if result.content.lower() == 'download now' ] for label, stream_url in zip(labels, stream_urls): stream_url = stream_url.attrs['href'] label = re.sub('</?[^>]*>', '', label.content) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.blog_get_quality(video, label, host) source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': False } sources.append(source) return sources
def __get_links_from_xml(self, url, video, page_url, cookies): sources = {} try: headers = {'Referer': page_url} xml = self._http_get(url, cookies=cookies, headers=headers, cache_limit=.5) root = ET.fromstring(xml) for item in root.findall('.//item'): title = item.find('title').text if title and title.upper() == 'OOPS!': continue for source in item.findall( '{https://yesmovies.to/ajax/movie_sources/'): stream_url = source.get('file') label = source.get('label') if scraper_utils.get_direct_hostname( self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif label: quality = scraper_utils.height_get_quality(label) elif title: quality = scraper_utils.blog_get_quality( video, title, '') else: quality = scraper_utils.blog_get_quality( video, stream_url, '') sources[stream_url] = {'quality': quality, 'direct': True} logger.log( 'Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG) except Exception as e: logger.log('Exception during YesMovies XML Parse: %s' % (e), log_utils.LOGWARNING) return sources
def get_sources(self, goog, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) q_str = '' match = re.search('>quality(.*?)<br\s*/>', html, re.I) if match: q_str = match.group(1) q_str = q_str.decode('utf-8').encode('ascii', 'ignore') q_str = re.sub('(</?strong[^>]*>|:|\s)', '', q_str, re.I | re.U) fragment = dom_parser.parse_dom(html, 'div', {'class': 'video-embed'}) if fragment: for match in re.finditer('<iframe[^>]+src="([^"]+)', fragment[0], re.I): stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.blog_get_quality(video, q_str, host), 'views': None, 'rating': None, 'url': stream_url, 'direct': False } match = re.search( 'class="views-infos">(\d+).*?class="rating">(\d+)%', html, re.DOTALL) if match: hoster['views'] = int(match.group(1)) hoster['rating'] = match.group(2) hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) q_str = '' quality = None match = re.search('>Category.*?td_col">([^<]+)', html) if match: quality = QUALITY_MAP.get(match.group(1).upper(), None) else: match = re.search('>Release.*?td_col">([^<]+)', html) if match: q_str = match.group(1).upper() pattern = "td_cols.+?href='(.+?)" for match in re.finditer(pattern, html): url = match.group(1) if re.search('\.rar(\.|$)', url): continue hoster = { 'multi-part': False, 'class': self, 'views': None, 'url': url, 'rating': None, 'direct': False } hoster['host'] = urlparse.urlsplit(url).hostname if quality is None: hoster['quality'] = scraper_utils.blog_get_quality( video, q_str, hoster['host']) else: hoster['quality'] = scraper_utils.get_quality( video, hoster['host'], quality) hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters html = self._http_get(source_url, require_debrid=False, cache_limit=.5) if video.video_type == VIDEO_TYPES.MOVIE: pattern = '<singlelink>(.*?)(?=<hr\s*/>|download>|thanks_button_div)' else: pattern = '<hr\s*/>\s*<strong>(.*?)</strong>.*?<singlelink>(.*?)(?=<hr\s*/>|download>|thanks_button_div)' for match in re.finditer(pattern, html, re.DOTALL): if video.video_type == VIDEO_TYPES.MOVIE: links = match.group(1) match = re.search('<h2>\s*<a[^>]+>(.*?)</a>', html) if match: title = match.group(1) else: title = '' else: title, links = match.groups() for match in re.finditer('href="([^"]+)', links): stream_url = match.group(1).lower() if any(link in stream_url for link in EXCLUDE_LINKS): continue host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.blog_get_quality(video, title, host) hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=False, cache_limit=.5) q_str = '' match = re.search('class="entry-title">([^<]+)', html) if match: q_str = match.group(1) pattern = 'href="?([^" ]+)(?:[^>]+>){2}\s+\|' for match in re.finditer(pattern, html, re.DOTALL): url = match.group(1) if 'adf.ly' in url: continue hoster = {'multi-part': False, 'class': self, 'views': None, 'url': url, 'rating': None, 'quality': None, 'direct': False} hoster['host'] = urlparse.urlsplit(url).hostname hoster['quality'] = scraper_utils.blog_get_quality(video, q_str, hoster['host']) hosters.append(hoster) return hosters
def _blog_get_url(self, video, delim='.'): url = None result = self.db_connection().get_related_url(video.video_type, video.title, video.year, self.get_name(), video.season, video.episode) if result: url = result[0][0] logger.log( 'Got local related url: |%s|%s|%s|%s|%s|' % (video.video_type, video.title, video.year, self.get_name(), url), log_utils.LOGDEBUG) else: try: select = int(kodi.get_setting('%s-select' % (self.get_name()))) except: select = 0 if video.video_type == VIDEO_TYPES.EPISODE: temp_title = re.sub('[^A-Za-z0-9 ]', '', video.title) if not scraper_utils.force_title(video): search_title = '%s S%02dE%02d' % ( temp_title, int(video.season), int(video.episode)) if isinstance(video.ep_airdate, datetime.date): fallback_search = '%s %s' % ( temp_title, video.ep_airdate.strftime( '%Y{0}%m{0}%d'.format(delim))) else: fallback_search = '' else: if not video.ep_title: return None search_title = '%s %s' % (temp_title, video.ep_title) fallback_search = '' else: search_title = video.title fallback_search = '' results = self.search(video.video_type, search_title, video.year) if not results and fallback_search: results = self.search(video.video_type, fallback_search, video.year) if results: # TODO: First result isn't always the most recent... best_result = results[0] if select != 0: best_qorder = 0 for result in results: if 'quality' in result: quality = result['quality'] else: match = re.search('\((\d+p)\)', result['title']) if match: quality = scraper_utils.height_get_quality( match.group(1)) else: match = re.search('\[(.*)\]$', result['title']) q_str = match.group(1) if match else '' quality = scraper_utils.blog_get_quality( video, q_str, '') logger.log( 'result: |%s|%s|%s|' % (result, quality, Q_ORDER[quality]), log_utils.LOGDEBUG) if Q_ORDER[quality] > best_qorder: logger.log( 'Setting best as: |%s|%s|%s|' % (result, quality, Q_ORDER[quality]), log_utils.LOGDEBUG) best_result = result best_qorder = Q_ORDER[quality] url = best_result['url'] self.db_connection().set_related_url(video.video_type, video.title, video.year, self.get_name(), url, video.season, video.episode) return url