def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=2) urls = dom_parser.parse_dom(html, "iframe", ret="src") for iframe_url in urls: if "/ads/" in iframe_url: continue elif "/watch/" in iframe_url: url = urlparse.urljoin(self.base_url, iframe_url) html = self._http_get(url, cache_limit=2) urls += dom_parser.parse_dom(html, "iframe", ret="src") match = re.search("""location.href=['"]([^'"]+)""", html) if match: urls.append(match.group(1)) else: stream_url = iframe_url host = urlparse.urlparse(stream_url).hostname hoster = { "multi-part": False, "host": host, "class": self, "url": stream_url, "quality": QUALITIES.HIGH, "views": None, "rating": None, "direct": False, } hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] headers = {'Referer': self.base_url} params = {'search': title} html = self._http_get(self.base_url, params=params, headers=headers, cache_limit=8) for item in dom_parser.parse_dom(html, 'div', {'class': 'listCard'}): match_title = dom_parser.parse_dom(item, 'p', {'class': 'extraTitle'}) match_url = dom_parser.parse_dom(item, 'a', ret='href') match_year = dom_parser.parse_dom(item, 'p', {'class': 'cardYear'}) if match_url and match_title: match_url = match_url[0] match_title = match_title[0] match_year = match_year[0] if match_year else '' if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = urlparse.urljoin(self.base_url, SEARCH_URL) if video_type == VIDEO_TYPES.MOVIE: search = 'search' else: search = 'searchshow' search_url = search_url % (search, urllib.quote(title)) html = self._http_get(search_url, cache_limit=8) for item in dom_parser.parse_dom(html, 'div', {'class': 'movie'}): match_url = dom_parser.parse_dom(item, 'a', {'class': 'poster'}, ret='href') match_title = dom_parser.parse_dom(item, 'div', {'class': 'title'}) match_year = dom_parser.parse_dom(item, 'div', {'class': 'year'}) if match_url and match_title: match_url = match_url[0] match_title = match_title[0] match_year = match_year[0] if match_year else '' if match_title and (not year or not match_year or year == match_year): result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def get_sources(self, video): source_url = self.get_url(video) sources = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) q_str = dom_parser.parse_dom(html, 'div', {'class': 'poster-qulabel'}) if q_str: q_str = q_str[0].replace(' ', '').upper() page_quality = Q_MAP.get(q_str, QUALITIES.HIGH) else: page_quality = QUALITIES.HIGH for fragment in dom_parser.parse_dom(html, 'div', {'class': 'tab_box'}): match = re.search('file\s*:\s*"([^"]+)', fragment) if match: stream_url = match.group(1) else: stream_url = self.__get_ajax_sources(fragment, page_url) if stream_url: host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = page_quality stream_url += '|User-Agent=%s&Referer=%s' % (scraper_utils.get_ua(), urllib.quote(page_url)) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True} sources.append(source) return sources
def search(self, video_type, title, year, season=''): results = [] search_url = urlparse.urljoin(self.base_url, '/movies/search?s=%s' % urllib.quote_plus(title)) html = self._http_get(search_url, cache_limit=8) for item in dom_parser.parse_dom(html, 'div', {'class': '[^"]*c-content-product-2[^"]*'}): match_title_year = dom_parser.parse_dom(item, 'p', {'class': '[^"]*c-title[^"]*'}) match_url = dom_parser.parse_dom(item, 'a', ret='href') is_season = dom_parser.parse_dom(item, 'div', {'class': '[^"]*c-bg-red[^'"]*"}) if match_title_year and match_url: match_title_year = match_title_year[0] match_url = match_url[0] if (video_type == VIDEO_TYPES.MOVIE and not is_season) or (video_type == VIDEO_TYPES.SEASON and is_season): match_year = '' if video_type == VIDEO_TYPES.SEASON: match_title = match_title_year if season and not re.search('Season\s+(%s)\s+' % (season), match_title_year, re.I): continue else: match = re.search('(.*?)\s+(\d{4})$', match_title_year) if match: match_title, match_year = match.groups() else: match_title = match_title_year match_year = '' match_url = urlparse.urljoin(match_url, 'watching.html') if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def __get_sources(self, html, page_url): sources = [] fragment = dom_parser.parse_dom(html, 'div', {'class': 'video-content'}) if fragment: iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src') if iframe_url: iframe_url = iframe_url[0] if self.base_url in iframe_url: headers = {'Referer': page_url} html = self._http_get(iframe_url, headers=headers, cache_limit=.5) referer = iframe_url iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src') if iframe_url: iframe_url = iframe_url[0] headers = {'Referer': referer} html = self._http_get(iframe_url, headers=headers, cache_limit=.5) links = self._parse_sources_list(html) for link in links: host = self._get_direct_hostname(link) if host == 'gvideo': quality = scraper_utils.gv_get_quality(link) else: quality = links[link]['quality'] source = {'multi-part': False, 'url': link, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True} sources.append(source) else: host = urlparse.urlparse(iframe_url).hostname source = {'multi-part': False, 'url': iframe_url, 'host': host, 'class': self, 'quality': QUALITIES.HD720, 'views': None, 'rating': None, 'direct': False} sources.append(source) return sources
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) sources = dom_parser.parse_dom(html, 'source', ret='src') iframes = dom_parser.parse_dom(html, 'iframe', {'class': 'movieframe'}, ret='src') headers = {'Referer': url} for iframe_url in iframes: html = self._http_get(iframe_url, headers=headers, allow_redirect=False, method='HEAD', cache_limit=0) if html.startswith('http'): sources.append(html) else: iframe_url = urlparse.urljoin(self.base_url, iframe_url) html = self._http_get(iframe_url, headers=headers, cache_limit=1) sources += dom_parser.parse_dom(html, 'source', ret='src') iframes += dom_parser.parse_dom(html, 'iframe', ret='src') for source in sources: host = self._get_direct_hostname(source) if host == 'gvideo': quality = scraper_utils.gv_get_quality(source) direct = True else: quality = QUALITIES.HIGH direct = False host = urlparse.urlparse(source).hostname source = {'multi-part': False, 'url': source, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct} hosters.append(source) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) hosters += self.__add_sources( dom_parser.parse_dom(html, 'a', {'rel': 'nofollow'}, ret='href'), video) sources = [] for match in re.finditer('''\$\.get\('([^']+)'\s*,\s*(\{.*?\})''', html): ajax_url, params = match.groups() ajax_url = ajax_url + '?' + urllib.urlencode( scraper_utils.parse_params(params)) ajax_url = urlparse.urljoin(self.base_url, ajax_url) headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(ajax_url, headers=headers, auth=False, cache_limit=.5) sources += dom_parser.parse_dom(html, 'source', {'type': '''video[^'"]*'''}, ret='src') sources += dom_parser.parse_dom(html, 'iframe', ret='src') hosters += self.__add_sources(sources, video, QUALITIES.HD720) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) for button in dom_parser.parse_dom(html, 'li', {'class': 'playing_button'}): try: link = dom_parser.parse_dom(button, 'a', ret='href') match = re.search('php\?.*?=?([^"]+)', link[0]) stream_url = base64.b64decode(match.group(1)) match = re.search('(http://.*)', stream_url) stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality( video, host, QUALITIES.HIGH) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False } hosters.append(hoster) except Exception as e: log_utils.log( 'Exception during tvonline source: %s - |%s|' % (e, button), log_utils.LOGDEBUG) return hosters
def search(self, video_type, title, year, season=''): results = [] search_url = urlparse.urljoin(self.base_url, '/search/%s.html' % (urllib.quote_plus(title))) html = self._http_get(search_url, cache_limit=1) fragment = dom_parser.parse_dom(html, 'ul', {'class': 'cfv'}) if fragment: for item in dom_parser.parse_dom(fragment[0], 'li'): is_season = dom_parser.parse_dom(item, 'div', {'class': 'status'}) if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON): match_url = dom_parser.parse_dom(item, 'a', ret='href') match_title = dom_parser.parse_dom(item, 'a', ret='title') if match_url and match_title: match_title = match_title[0] match_url = match_url[0] match_year = '' if video_type == VIDEO_TYPES.SEASON: if season and not re.search('Season\s+%s$' % (season), match_title, re.I): continue else: match = re.search('-(\d{4})\.html', match_url) if match: match_year = match.group(1) if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)} results.append(result) return results
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'table', {'id': 'streamlinks'}) if fragment: max_age = 0 now = min_age = int(time.time()) for row in dom_parser.parse_dom(fragment[0], 'tr', {'id': 'pt\d+'}): if video.video_type == VIDEO_TYPES.MOVIE: pattern = 'href="([^"]+).*?/>([^<]+).*?(?:<td>.*?</td>\s*){1}<td>(.*?)</td>\s*<td>(.*?)</td>' else: pattern = 'href="([^"]+).*?/>([^<]+).*?(<span class="linkdate">.*?)</td>\s*<td>(.*?)</td>' match = re.search(pattern, row, re.DOTALL) if match: url, host, age, quality = match.groups() age = self.__get_age(now, age) quality = quality.upper() if age > max_age: max_age = age if age < min_age: min_age = age host = host.strip() hoster = {'multi-part': False, 'class': self, 'url': scraper_utils.pathify_url(url), 'host': host, 'age': age, 'views': None, 'rating': None, 'direct': False} hoster['quality'] = scraper_utils.get_quality(video, host, QUALITY_MAP.get(quality, QUALITIES.HIGH)) hosters.append(hoster) unit = (max_age - min_age) / 100 if unit > 0: for hoster in hosters: hoster['rating'] = (hoster['age'] - min_age) / unit return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'div', {'class': 'film-container'}) if fragment: iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src') if iframe_url: iframe_url = urlparse.urljoin(self.base_url, iframe_url[0]) headers = {'Referer': page_url} html = self._http_get(iframe_url, headers=headers, cache_limit=.5) sources = self._parse_sources_list(html) for source in sources: quality = sources[source]['quality'] host = self._get_direct_hostname(source) stream_url = source + '|User-Agent=%s&Referer=%s' % (scraper_utils.get_ua(), urllib.quote(iframe_url)) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} match = re.search('(\d+[a-z]bps)', source) if match: hoster['extra'] = match.group(1) hosters.append(hoster) hosters.sort(key=lambda x: x.get('extra', ''), reverse=True) return hosters
def search(self, video_type, title, year, season=""): results = [] search_url = urlparse.urljoin(self.base_url, "/search?query=%s") search_url = search_url % (urllib.quote_plus(title)) html = self._http_get(search_url, cache_limit=8) for item in dom_parser.parse_dom(html, "div", {"class": "one_movie-item"}): match_url = dom_parser.parse_dom(item, "a", ret="href") match_title = dom_parser.parse_dom(item, "img", ret="alt") media_type = dom_parser.parse_dom(item, "div", {"class": "movie-series"}) if not media_type: media_type = VIDEO_TYPES.MOVIE elif media_type[0] == "TV SERIE": media_type = VIDEO_TYPES.TVSHOW if match_url and match_title and video_type == media_type: match_url = match_url[0] match_title = match_title[0] match_year = re.search("-(\d{4})-", match_url) if match_year: match_year = match_year.group(1) else: match_year = "" if not year or not match_year or year == match_year: result = { "url": scraper_utils.pathify_url(match_url), "title": scraper_utils.cleanse_title(match_title), "year": match_year, } results.append(result) return results
def __tv_search(self, title, year): results = [] if title: norm_title = scraper_utils.normalize_title(title) url = '/series/letra/%s/' % (title[0]) url = urlparse.urljoin(self.base_url, url) html = self._http_get(url, cache_limit=48) for item in dom_parser.parse_dom(html, 'li', {'class': '[^"]*bpM12[^"]*'}): title_frag = dom_parser.parse_dom(item, 'h2') year_frag = dom_parser.parse_dom(item, 'div', {'class': '[^"]*sectionDetail[^"]*'}) match_url = dom_parser.parse_dom(item, 'a', ret='href') if title_frag and match_url: match_url = match_url[0] match = re.search('(.*?)<br>', title_frag[0]) if match: match_title = match.group(1) else: match_title = title_frag[0] match_year = '' if year_frag: match = re.search('(\d{4})', year_frag[0]) if match: match_year = match.group(1) if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): search_url = urlparse.urljoin(self.base_url, '/index.php?menu=search&query=') search_url += urllib.quote_plus(title) html = self._http_get(search_url, cache_limit=.25) results = [] sections = {VIDEO_TYPES.MOVIE: 'movies', VIDEO_TYPES.TVSHOW: 'series'} fragment = dom_parser.parse_dom(html, 'div', {'id': sections[video_type]}) if fragment: for item in dom_parser.parse_dom(fragment[0], 'figcaption'): match = re.search('title="([^"]+)[^>]+href="([^"]+)', item) if match: match_title_year, url = match.groups() match = re.search('(.*?)\s+\(?(\d{4})\)?', match_title_year) if match: match_title, match_year = match.groups() else: match_title = match_title_year match_year = '' if match_title.startswith('Watch '): match_title = match_title.replace('Watch ', '') if match_title.endswith(' Online'): match_title = match_title.replace(' Online', '') if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'url': scraper_utils.pathify_url(url), 'year': match_year} results.append(result) return results
def __movie_search(self, title, year): results = [] search_url = urlparse.urljoin(self.base_url, '/search1.php?keywords=%s&ser=506') search_url = search_url % (urllib.quote_plus(title)) html = self._http_get(search_url, cache_limit=8) fragment = dom_parser.parse_dom(html, 'ul', {'class': 'cbp-rfgrid'}) if fragment: for item in dom_parser.parse_dom(fragment[0], 'li'): match_url = dom_parser.parse_dom(item, 'a', ret='href') match_title_year = '' link_frag = dom_parser.parse_dom(item, 'a') if link_frag: match_title_year = dom_parser.parse_dom(link_frag[0], 'div') if match_url and match_title_year: match_url = match_url[0] match_title_year = match_title_year[0] match = re.search('(.*?)\s+\((\d{4})\)', match_title_year) if match: match_title, match_year = match.groups() else: match_title = match_title_year match_year = '' if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def _get_episode_url(self, show_url, video): url = urlparse.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=8) pattern = "<a[^>]*class='dropdown-toggle'[^>]*>Season\s+%s<(.*?)<li\s+class='divider'>" % (video.season) match = re.search(pattern, html, re.DOTALL) if match: fragment = match.group(1) ep_ids = dom_parser.parse_dom(fragment, 'a', {'id': 'epiloader'}, ret='class') episodes = dom_parser.parse_dom(fragment, 'a', {'id': 'epiloader'}) airdates = dom_parser.parse_dom(fragment, 'span', {'class': 'airdate'}) ep_airdate = video.ep_airdate.strftime('%Y-%m-%d') if isinstance(video.ep_airdate, datetime.date) else '' norm_title = scraper_utils.normalize_title(video.ep_title) num_id, airdate_id, title_id = '', '', '' for episode, airdate, ep_id in zip(episodes, airdates, ep_ids): if ep_airdate and ep_airdate == airdate: airdate_id = ep_id match = re.search('(?:<span[^>]*>)?(\d+)\.\s*([^<]+)', episode) if match: ep_num, ep_title = match.groups() if int(ep_num) == int(video.episode): num_id = ep_id if norm_title and norm_title in scraper_utils.normalize_title(ep_title): title_id = ep_id best_id = '' if not scraper_utils.force_title(video): if num_id: best_id = num_id if kodi.get_setting('airdate-fallback') == 'true' and airdate_id: best_id = airdate_id if kodi.get_setting('title-fallback') == 'true' and title_id: best_id = title_id else: if title_id: best_id = title_id if best_id: return EP_URL % (best_id)
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'div', {'class': 'tab_container'}) if fragment: q_str = 'HDRIP' match = re.search('>Quality(.*?)<br\s*/?>', html, re.I) if match: q_str = match.group(1) q_str = re.sub('(</?strong[^>]*>|:|\s)', '', q_str, re.I | re.U) for source in dom_parser.parse_dom(fragment[0], 'iframe', ret='src'): host = urlparse.urlparse(source).hostname quality = scraper_utils.blog_get_quality(video, q_str, host) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': False} match = re.search('class="views-infos">(\d+).*?class="rating">(\d+)%', html, re.DOTALL) if match: hoster['views'] = int(match.group(1)) hoster['rating'] = match.group(2) hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) sources = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=0.5) if video.video_type == VIDEO_TYPES.EPISODE: html = self.__get_episode_fragment(html, video) for item in dom_parser.parse_dom(html, "div", {"class": "linkTr"}): stream_url = dom_parser.parse_dom(item, "div", {"class": '[^"]*linkHiddenUrl[^"]*'}) q_str = dom_parser.parse_dom(item, "div", {"class": '[^"]*linkQualityText[^"]*'}) if stream_url and q_str: stream_url = stream_url[0] q_str = q_str[0] host = urlparse.urlparse(stream_url).hostname base_quality = QUALITY_MAP.get(q_str, QUALITIES.HIGH) quality = scraper_utils.get_quality(video, host, base_quality) source = { "multi-part": False, "url": stream_url, "host": host, "class": self, "quality": quality, "views": None, "rating": None, "direct": False, } sources.append(source) return sources
def search(self, video_type, title, year, season=''): results = [] if title and title[0].isalpha(): page_url = ['/list/?char=%s' % (title[0])] while page_url: page_url = urlparse.urljoin(self.base_url, page_url[0]) html = self._http_get(page_url, cache_limit=48) fragment = dom_parser.parse_dom(html, 'ul', {'class': 'list-film-char'}) if fragment: norm_title = scraper_utils.normalize_title(title) for match in re.finditer('href="([^"]+)[^>]+>(.*?)</a>', fragment[0]): match_url, match_title = match.groups() match_title = re.sub('</?strong>', '', match_title) match = re.search('Season\s+(\d+)', match_title, re.I) if match: if season and int(season) != int(match.group(1)): continue if norm_title in scraper_utils.normalize_title(match_title): result = {'title': scraper_utils.cleanse_title(match_title), 'year': '', 'url': scraper_utils.pathify_url(match_url)} results.append(result) if results: break page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href') return results
def search(self, video_type, title, year, season=''): results = [] search_url = urlparse.urljoin(self.base_url, '/search/') search_url += urllib.quote_plus(title) html = self._http_get(search_url, cache_limit=1) for fragment in dom_parser.parse_dom(html, 'div', {'class': 'inner'}): name = dom_parser.parse_dom(fragment, 'div', {'class': 'name'}) if name: match = re.search('href="([^"]+)[^>]+>(.*?)</a>', name[0]) if match: match_url, match_title_year = match.groups() if 'tv-series' in match_url and video_type == VIDEO_TYPES.MOVIE: continue match_title_year = re.sub('</?[^>]*>', '', match_title_year) match_title_year = re.sub('[Ww]atch\s+[Mm]ovie\s*', '', match_title_year) match_title_year = match_title_year.replace('’', "'") match_title, match_year = scraper_utils.extra_year(match_title_year) if not match_year: year_span = dom_parser.parse_dom(fragment, 'span', {'class': 'year'}) if year_span: year_text = dom_parser.parse_dom(year_span[0], 'a') if year_text: match_year = year_text[0].strip() if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'url': scraper_utils.pathify_url(match_url), 'year': match_year} results.append(result) return results
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) quality = None match = re.search('Links\s+-\s+Quality\s*([^<]*)</h1>', html, re.DOTALL | re.I) if match: quality = QUALITY_MAP.get(match.group(1).strip().upper()) seen_links = {} for item in dom_parser.parse_dom(html, 'li', {'id': 'playing_button'}): stream_url = dom_parser.parse_dom(item, 'a', ret='href') if stream_url: stream_url = stream_url[0] match = re.search('url=([^&"]+)', stream_url) if match: stream_url = base64.b64decode(match.group(1)) else: match = re.search('stream\.php\?([^"]+)', stream_url) if match: stream_url = base64.b64decode(match.group(1)) i = stream_url.rfind('&&') if i > -1: stream_url = stream_url[i + 2:] if stream_url in seen_links: continue seen_links[stream_url] = True host = urlparse.urlparse(stream_url).hostname hoster = {'multi-part': False, 'host': host, 'class': self, 'url': stream_url, 'quality': scraper_utils.get_quality(video, host, quality), 'rating': None, 'views': None, 'direct': False} hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): results = [] html = self._http_get(self.base_url, params={'s': title}, cache_limit=8) for item in dom_parser.parse_dom(html, 'div', {'class': 'item'}): match = re.search('href="([^"]+)', item) match_title = dom_parser.parse_dom(item, 'span', {'class': 'tt'}) year_frag = dom_parser.parse_dom(item, 'span', {'class': 'year'}) if match and match_title: url = match.group(1) match_title = match_title[0] if re.search('\d+\s*x\s*\d+', match_title): continue # exclude episodes match_title, match_year = scraper_utils.extra_year(match_title) if not match_year and year_frag: match_year = year_frag[0] match = re.search('(.*?)\s+\d{3,}p', match_title) if match: match_title = match.group(1) extra = dom_parser.parse_dom(item, 'span', {'class': 'calidad2'}) if extra: match_title += ' [%s]' % (extra[0]) if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(url)} results.append(result) return results
def search(self, video_type, title, year, season=''): scrape = title.lower().replace(' ','+').replace(':', '') search_url = urlparse.urljoin(self.base_url, '/movie/search/') search_url += urllib.quote_plus(title) html = self._http_get(search_url, cache_limit=.25) results = [] sections = {VIDEO_TYPES.MOVIE: 'movies', VIDEO_TYPES.TVSHOW: 'series'} fragment = dom_parser.parse_dom(html, 'div', {'id': sections[video_type]}) if fragment: for item in dom_parser.parse_dom(fragment[0], 'figcaption'): match = re.search('title="([^"]+)[^>]+href="([^"]+)', item) if match: match_title_year, url = match.groups() match = re.search('(.*?)\s+\(?(\d{4})\)?', match_title_year) if match: match_title, match_year = match.groups() else: match_title = match_title_year url = urlparse.urljoin(match_title.group(1), 'watching.html') match_year = '' if match_title.startswith('Watch '): match_title = match_title.replace('Watch ', '') if match_title.endswith(' Online'): match_title = match_title.replace(' Online', '') if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'year': scraper_utils.pathify_url(url), 'url': match_year} results.append(result) return results
def _get_episode_url(self, show_url, video): episode_pattern = 'href="([^"]+-s0*%se0*%s(?!\d)[^"]*)' % (video.season, video.episode) result = self._default_get_episode_url(show_url, video, episode_pattern) if result: return result url = urlparse.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=2) fragment = dom_parser.parse_dom(html, "ul", {"class": "episode_list"}) if fragment: ep_urls = dom_parser.parse_dom(fragment[0], "a", ret="href") ep_dates = dom_parser.parse_dom(fragment[0], "span", {"class": "episode_air_d"}) ep_titles = dom_parser.parse_dom(fragment[0], "span", {"class": "episode_name"}) force_title = scraper_utils.force_title(video) if not force_title and kodi.get_setting("airdate-fallback") == "true" and video.ep_airdate: for ep_url, ep_date in zip(ep_urls, ep_dates): log_utils.log( "Quikr Ep Airdate Matching: %s - %s - %s" % (ep_url, ep_date, video.ep_airdate), log_utils.LOGDEBUG, ) if video.ep_airdate == scraper_utils.to_datetime(ep_date, "%Y-%m-%d").date(): return scraper_utils.pathify_url(ep_url) if force_title or kodi.get_setting("title-fallback") == "true": norm_title = scraper_utils.normalize_title(video.ep_title) for ep_url, ep_title in zip(ep_urls, ep_titles): ep_title = re.sub("<span>.*?</span>\s*", "", ep_title) log_utils.log( "Quikr Ep Title Matching: %s - %s - %s" % (ep_url, norm_title, video.ep_title), log_utils.LOGDEBUG, ) if norm_title == scraper_utils.normalize_title(ep_title): return scraper_utils.pathify_url(ep_url)
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] html = self._http_get(self.base_url, params={'s': title}, cache_limit=1) for item in dom_parser.parse_dom(html, 'div', {'class': 'item'}): match = re.search('href="([^"]+).*?alt="([^"]+)', item, re.DOTALL) if match: url, match_title_year = match.groups() match_title, match_year = scraper_utils.extra_year( match_title_year) if not match_year: year_fragment = dom_parser.parse_dom( item, 'span', {'class': 'year'}) if year_fragment: match_year = year_fragment[0] else: match_year = '' if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) q_str = dom_parser.parse_dom(html, 'span', {'class': 'calidad\d*'}) if q_str: if q_str[0].upper() == 'COMING SOON': return hosters try: quality = scraper_utils.height_get_quality(q_str[0]) except: quality = QUALITIES.HIGH else: quality = QUALITIES.HIGH fragment = dom_parser.parse_dom(html, 'div', {'id': 'player\d+'}) if fragment: for match in re.finditer('<iframe[^>]+src="([^"]+)', fragment[0], re.I): stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname hoster = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': False } hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): results = [] search_url = urlparse.urljoin(self.base_url, '/?s=%s' % (urllib.quote_plus(title))) html = self._http_get(search_url, cache_limit=8) for movie in dom_parser.parse_dom(html, 'div', {'class': 'movie'}): match = re.search('href="([^"]+)', movie) if match: match_url = match.group(1) match_title_year = dom_parser.parse_dom(movie, 'img', ret='alt') if match_title_year: match_title_year = match_title_year[0] match = re.search('(.*?)\s+\((\d{4})\)', match_title_year) if match: match_title, match_year = match.groups() else: match_title = match_title_year match_year = dom_parser.parse_dom(movie, 'div', {'class': 'year'}) try: match_year = match_year[0] except: match_year = '' if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): search_url = urlparse.urljoin(self.base_url, '/search/%s.html') search_url = search_url % (urllib.quote_plus(title)) html = self._http_get(search_url, cache_limit=8) results = [] for thumb in dom_parser.parse_dom(html, 'div', {'class': 'thumb'}): match_title = dom_parser.parse_dom(thumb, 'a', {'class': 'clip-link'}, ret='title') url = dom_parser.parse_dom(thumb, 'a', {'class': 'clip-link'}, ret='href') if match_title and url: match_title, url = match_title[0], url[0] is_season = re.search('Season\s+(\d+)$', match_title, re.I) if (not is_season and video_type == VIDEO_TYPES.MOVIE) or ( is_season and video_type == VIDEO_TYPES.SEASON): match_year = '' if video_type == VIDEO_TYPES.MOVIE: match_year = dom_parser.parse_dom( thumb, 'div', {'class': '[^"]*status-year[^"]*'}) if match_year: match_year = match_year[0] else: if season and int(is_season.group(1)) != int(season): continue if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): search_url = urlparse.urljoin(self.base_url, '/search/%s.html') search_url = search_url % (urllib.quote_plus(title)) html = self._http_get(search_url, cache_limit=1) results = [] fragment = dom_parser.parse_dom(html, 'div', {'class': 'movie'}) if fragment: for item in dom_parser.parse_dom(fragment[0], 'li'): match_url = dom_parser.parse_dom(item, 'a', ret='href') match_title = dom_parser.parse_dom(item, 'span', {'class': 'text'}) match_year = dom_parser.parse_dom(item, 'span', {'class': 'year'}) if match_url and match_title: match_url = match_url[0] match_title = re.sub('</?strong>', '', match_title[0]) is_season = re.search('Season\s+(\d+)$', match_title, re.I) if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON): if video_type == VIDEO_TYPES.MOVIE: if match_year: match_year = match_year[0] else: match_year = '' else: if season and int(is_season.group(1)) != int(season): continue match_year = '' if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)} results.append(result) return results
def __get_gk_links(self, html, page_url): sources = {} for link in dom_parser.parse_dom(html, 'div', {'class': '[^"]*server_line[^"]*'}): film_id = dom_parser.parse_dom(link, 'a', ret='data-film') name_id = dom_parser.parse_dom(link, 'a', ret='data-name') server_id = dom_parser.parse_dom(link, 'a', ret='data-server') if film_id and name_id and server_id: data = {'ipplugins': 1, 'ip_film': film_id[0], 'ip_server': server_id[0], 'ip_name': name_id[0]} headers = XHR headers['Referer'] = page_url url = urlparse.urljoin(self.base_url, LINK_URL) html = self._http_get(url, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, url) if 's' in js_data: url = urlparse.urljoin(self.base_url, LINK_URL2) params = {'u': js_data['s'], 'w': '100%', 'h': 420} html = self._http_get(url, params=params, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, url) if 'data' in js_data and js_data['data']: if isinstance(js_data['data'], basestring): sources[js_data['data']] = QUALITIES.HIGH else: for link in js_data['data']: stream_url = link['files'] if self._get_direct_hostname(stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'quality' in link: quality = scraper_utils.height_get_quality(link['quality']) else: quality = QUALITIES.HIGH sources[stream_url] = quality return sources
def get_sources(self, video): source_url = self.get_url(video) sources = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) is_3d = False page_quality = QUALITIES.HD720 title = dom_parser.parse_dom(html, 'title') if title: title = title[0] match = re.search('(\d{3,})p', title) if match: page_quality = scraper_utils.height_get_quality(match.group(1)) is_3d = True if re.search('\s+3D\s+', title) else False fragments = dom_parser.parse_dom(html, 'div', {'class': 'txt-block'}) + dom_parser.parse_dom(html, 'li', {'class': 'elemento'}) for fragment in fragments: for match in re.finditer('href="([^"]+)', fragment): stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname q_str = dom_parser.parse_dom(fragment, 'span', {'class': 'd'}) q_str = q_str[0].upper() if q_str else '' base_quality = QUALITY_MAP.get(q_str, page_quality) quality = scraper_utils.get_quality(video, host, base_quality) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': False} source['format'] = 'x265' source['3D'] = is_3d sources.append(source) return sources
def search(self, video_type, title, year, season=''): search_url = urlparse.urljoin(self.base_url, '/?s=') search_url += urllib.quote_plus(title) html = self._http_get(search_url, cache_limit=1) results = [] for item in dom_parser.parse_dom(html, 'div', {'class': 'item'}): match = re.search('href="([^"]+)', item) match_title = dom_parser.parse_dom(item, 'span', {'class': 'tt'}) year_frag = dom_parser.parse_dom(item, 'span', {'class': 'year'}) if match and match_title: url = match.group(1) match_title = match_title[0] if re.search('\d+\s*x\s*\d+', match_title): continue # exclude episodes match = re.search('(.*?)\s+\((\d{4})\)', match_title) if match: match_title, match_year = match.groups() else: match_title = match_title match_year = '' if year_frag: match_year = year_frag[0] if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(url)} results.append(result) return results
def get_sources(self, video): source_url = self.get_url(video) sources = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) for fragment in dom_parser.parse_dom(html, 'div', {'class': 'video'}): for source in dom_parser.parse_dom( fragment, 'iframe', ret='src') + dom_parser.parse_dom( fragment, 'script', ret='src'): if 'validateemb' in source: continue host = urlparse.urlparse(source).hostname source = { 'multi-part': False, 'url': source, 'host': host, 'class': self, 'quality': QUALITIES.HD720, 'views': None, 'rating': None, 'direct': False } sources.append(source) return sources
def __get_direct_links(self, html, page_url): hosters = [] match = re.search("&u=([^']+)", html) if match: u = match.group(1) fragment = dom_parser.parse_dom(html, 'ul', {'class': '[^"]*rektab[^"]*'}) if fragment: page = dom_parser.parse_dom(fragment[0], 'a', ret='id') if page: page = page[0] ss = dom_parser.parse_dom(fragment[0], 'a', {'id': page}, ret='class') for s in ss: yt_url = YT_URL % (page, s, u) url = urlparse.urljoin(self.base_url, yt_url) headers = {'Referer': page_url} html = self._http_get(url, headers=headers, cache_limit=.5) sources = self._parse_sources_list(html) for source in sources: host = self._get_direct_hostname(source) if sources[source]['quality']: quality = sources[source]['quality'] else: quality = QUALITIES.HIGH stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): search_url = urlparse.urljoin(self.base_url, '/search/%s.html') search_url = search_url % (urllib.quote_plus(title)) html = self._http_get(search_url, cache_limit=8) results = [] for thumb in dom_parser.parse_dom(html, 'div', {'class': 'thumb'}): match_title = dom_parser.parse_dom(thumb, 'a', {'class': 'clip-link'}, ret='title') url = dom_parser.parse_dom(thumb, 'a', {'class': 'clip-link'}, ret='href') if match_title and url: match_title, url = match_title[0], url[0] is_season = re.search('Season\s+(\d+)$', match_title, re.I) if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON): match_year = '' if video_type == VIDEO_TYPES.MOVIE: match_year = dom_parser.parse_dom(thumb, 'div', {'class': '[^"]*status-year[^"]*'}) if match_year: match_year = match_year[0] else: if season and int(is_season.group(1)) != int(season): continue if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] page_url = urlparse.urljoin(self.base_url, '/tvseries/index.php?&page=1') while page_url: html = self._http_get(page_url, cache_limit=48) html = re.sub('<!--.*?-->', '', html) norm_title = scraper_utils.normalize_title(title) for td in dom_parser.parse_dom(html, 'td', {'class': 'topic_content'}): match_url = re.search('href="([^"]+)', td) match_title_year = dom_parser.parse_dom(td, 'img', ret='alt') if match_url and match_title_year: match_url = match_url.group(1) if not match_url.startswith('/'): match_url = '/tvseries/' + match_url match_title, match_year = scraper_utils.extra_year(match_title_year[0]) if norm_title in scraper_utils.normalize_title(match_title): result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) match = re.search('href="([^"]+)[^>]*>>', html) if match: page_url = urlparse.urljoin(self.base_url, match.group(1)) else: page_url = '' return results
def _get_episode_url(self, show_url, video): force_title = scraper_utils.force_title(video) title_fallback = kodi.get_setting('title-fallback') == 'true' norm_title = scraper_utils.normalize_title(video.ep_title) page_url = [show_url] too_old = False while page_url and not too_old: url = urlparse.urljoin(self.base_url, page_url[0]) html = self._http_get(url, require_debrid=True, cache_limit=1) posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'}) for post in posts: if self.__too_old(post): too_old = True break if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post: match = re.search('<a\s+href="([^"]+)[^>]+>(.*?)</a>', post) if match: url, title = match.groups() if not force_title: if scraper_utils.release_check(video, title, require_title=False): return scraper_utils.pathify_url(url) else: if title_fallback and norm_title: match = re.search('</strong>(.*?)</p>', post) if match and norm_title == scraper_utils.normalize_title(match.group(1)): return scraper_utils.pathify_url(url) page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'div', {'class': 'repro'}) if fragment: iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src') if iframe_url: html = self._http_get(iframe_url[0], cache_limit=.5) fragment = dom_parser.parse_dom(html, 'div', {'id': 'botones'}) if fragment: for media_url in dom_parser.parse_dom(fragment[0], 'a', ret='href'): media_url = media_url.replace(' ', '') if self.base_url in media_url or 'pelispedia.biz' in media_url: headers = {'Referer': iframe_url[0]} html = self._http_get(media_url, headers=headers, cache_limit=.5) hosters += self.__get_page_links(html) hosters += self.__get_pk_links(html) hosters += self.__get_gk_links(html, url) else: host = urlparse.urlparse(media_url).hostname hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': QUALITIES.HD720, 'views': None, 'rating': None, 'url': media_url, 'direct': False} hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): search_url = urlparse.urljoin( self.base_url, '/search?keyword=%s' % (urllib.quote_plus(title))) html = self._http_get(search_url, cache_limit=1) results = [] match_year = '' fragment = dom_parser.parse_dom(html, 'href', {'class': '[^"]*movie-list[^"]*'}) if fragment: for item in dom_parser.parse_dom(fragment[0], 'div', {'class': 'item'}): links = dom_parser.parse_dom(item, 'a', {'class': 'name'}, ret='href') titles = dom_parser.parse_dom(item, 'a', {'class': 'name'}) is_season = dom_parser.parse_dom(item, 'div', {'class': 'status'}) for match_url, match_title in zip(links, titles): if (not is_season and video_type == VIDEO_TYPES.MOVIE) or ( is_season and video_type == VIDEO_TYPES.SEASON): if video_type == VIDEO_TYPES.SEASON: if season and not re.search( '\s+%s$' % (season), match_title): continue if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': '', 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) title = dom_parser.parse_dom(html, 'title') if title: title = re.sub('^\[ST\]\s*–\s*', '', title[0]) meta = scraper_utils.parse_episode_link(title) page_quality = scraper_utils.height_get_quality(meta['height']) else: page_quality = QUALITIES.HIGH fragment = dom_parser.parse_dom(html, 'section', {'class': '[^"]*entry-content[^"]*'}) if fragment: for section in dom_parser.parse_dom(fragment[0], 'p'): match = re.search('([^<]*)', section) meta = scraper_utils.parse_episode_link(match.group(1)) if meta['episode'] != '-1' or meta['airdate']: section_quality = scraper_utils.height_get_quality(meta['height']) else: section_quality = page_quality if Q_ORDER[section_quality] < Q_ORDER[page_quality]: quality = section_quality else: quality = page_quality for stream_url in dom_parser.parse_dom(section, 'a', ret='href'): host = urlparse.urlparse(stream_url).hostname hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False} hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): results = [] html = self._http_get(self.base_url, cache_limit=48) norm_title = scraper_utils.normalize_title(title) for series in dom_parser.parse_dom(html, 'div', {'class': 'series-item'}): match_url = dom_parser.parse_dom(series, 'a', ret='href') match_title = dom_parser.parse_dom(series, 'h3') match_year = dom_parser.parse_dom(series, 'p') if match_url and match_title: match_url = match_url[0] match_title = match_title[0] if match_year: match = re.search('\s*(\d{4})\s+', match_year[0]) if match: match_year = match.group(1) else: match_year = '' else: match_year = '' if norm_title in scraper_utils.normalize_title(match_title): result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for server_list in dom_parser.parse_dom(html, 'ul', {'class': 'episodes'}): labels = dom_parser.parse_dom(server_list, 'a') hash_ids = dom_parser.parse_dom(server_list, 'a', ret='data-id') for label, hash_id in zip(labels, hash_ids): if video.video_type == VIDEO_TYPES.EPISODE and not self.__episode_match( label, video.episode): continue hash_url = urlparse.urljoin(self.base_url, HASH_URL) query = {'id': hash_id, 'update': '0'} query.update(self.__get_token(query)) hash_url = hash_url + '?' + urllib.urlencode(query) headers = XHR headers['Referer'] = url html = self._http_get(hash_url, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, hash_url) sources = {} link_type = js_data.get('type') target = js_data.get('target') grabber = js_data.get('grabber') params = js_data.get('params') if link_type == 'iframe' and target: sources[target] = { 'direct': False, 'quality': QUALITIES.HD720 } elif grabber and params: sources = self.__grab_links(grabber, params, url) for source in sources: direct = sources[source]['direct'] quality = sources[source]['quality'] if direct: host = self._get_direct_hostname(source) else: host = urlparse.urlparse(source).hostname hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': direct } hosters.append(hoster) return hosters
def _get_episode_url(self, season_url, video): url = urlparse.urljoin(self.base_url, season_url) html = self._http_get(url, cache_limit=8) fragment = dom_parser.parse_dom(html, 'ul', {'class': 'episodes'}) if fragment: for link in dom_parser.parse_dom(fragment[0], 'a'): if self.__episode_match(link, video.episode): return season_url
def __get_pages(self, url): pages = [] url = urlparse.urljoin(self.base_url, url) html = self._http_get(url, cache_limit=2) fragment = dom_parser.parse_dom(html, 'div', {'class': 'pagination'}) if fragment: pages = dom_parser.parse_dom(fragment[0], 'a', ret='href') return pages
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = urlparse.urljoin(self.base_url, '/movies/') html = self._http_get(search_url, params={'q': title}, cache_limit=4) for item in dom_parser.parse_dom(html, 'div', {'class': 'movie_about'}): match_url = dom_parser.parse_dom(item, 'a', ret='href') match_title_year = dom_parser.parse_dom(item, 'a') if match_url and match_title_year: match_url = match_url[0] match_title, match_year = scraper_utils.extra_year(match_title_year[0]) if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = urlparse.urljoin(self.base_url, '%s/search?q=') data = {'search': title, 'type': 'title'} html = self._http_get(search_url, data=data, headers=XHR, cache_limit=1) for item in dom_parser.parse_dom(html, 'li'): match_url = dom_parser.parse_dom(item, 'a', ret='div') match_title_year = dom_parser.parse_dom(item, 'a') if match_url and match_title_year: match_url = match_url[0] match_title, match_year = scraper_utils.extra_year(match_title_year[0]) if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def __get_source_page(self, source_url): html = '' url = urlparse.urljoin(self.base_url, source_url) page_html = self._http_get(url, cache_limit=8) movie_id = dom_parser.parse_dom(page_html, 'div', {'id': 'media-player'}, 'movie-id') token = dom_parser.parse_dom(page_html, 'div', {'id': 'media-player'}, 'player-token') if movie_id and token: server_url = SL_URL % (movie_id[0], token[0]) headers = XHR headers['Referer'] = url url = urlparse.urljoin(self.base_url, server_url) html = self._http_get(url, headers=headers, cache_limit=8) return html
def __get_gk_links(self, html, page_url): sources = {} for link in dom_parser.parse_dom(html, 'div', {'class': '[^"]*server_line[^"]*'}): film_id = dom_parser.parse_dom(link, 'a', ret='data-film') name_id = dom_parser.parse_dom(link, 'a', ret='data-name') server_id = dom_parser.parse_dom(link, 'a', ret='data-server') if film_id and name_id and server_id: data = { 'ipplugins': 1, 'ip_film': film_id[0], 'ip_server': server_id[0], 'ip_name': name_id[0] } headers = XHR headers['Referer'] = page_url url = urlparse.urljoin(self.base_url, LINK_URL) html = self._http_get(url, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, url) if 's' in js_data: url = urlparse.urljoin(self.base_url, LINK_URL2) params = {'u': js_data['s'], 'w': '100%', 'h': 420} html = self._http_get(url, params=params, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, url) if 'data' in js_data and js_data['data']: if isinstance(js_data['data'], basestring): sources[js_data['data']] = QUALITIES.HIGH else: for link in js_data['data']: stream_url = link['files'] if self._get_direct_hostname( stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality( stream_url) elif 'quality' in link: quality = scraper_utils.height_get_quality( link['quality']) else: quality = QUALITIES.HIGH sources[stream_url] = quality return sources
def search(self, video_type, title, year, season=''): search_url = urlparse.urljoin( self.base_url, '/results?q=%s' % urllib.quote_plus(title)) html = self._http_get(search_url, cache_limit=.25) results = [] for result in dom_parser.parse_dom(html, 'div', {'class': 'cell'}): match = re.search( 'class="video_title".*?href="([^"]+)"[^>]*>\s*([^<]+)', result, re.DOTALL) if match: url, match_title_year = match.groups() match = re.search('(.*?)\s+\((\d{4})\)', match_title_year) if match: match_title, match_year = match.groups() else: match_title = match_title_year match = re.search( 'class="video_quality".*?Year\s*(?:</b>)?\s*:\s*(\d{4})', result, re.DOTALL) if match: match_year = match.group(1) else: match_year = '' if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year): search_url = urlparse.urljoin(self.base_url, '/?s=') search_url += urllib.quote_plus(title) html = self._http_get(search_url, cache_limit=.25) elements = dom_parser.parse_dom( html, 'li', {'class': '[^"]*%s[^"]*' % (CATEGORIES[video_type])}) results = [] for element in elements: match = re.search('href="([^"]+)[^>]+>\s*([^<]+)', element, re.DOTALL) if match: url, match_title_year = match.groups() match = re.search('(.*?)(?:\s+\(?(\d{4})\)?)', match_title_year) if match: match_title, match_year = match.groups() else: match_title = match_title_year match_year = '' if not year or not match_year or year == match_year: result = { 'title': match_title, 'year': match_year, 'url': url.replace('https', 'http').replace(self.base_url, '') } results.append(result) return results
def __get_post_links(self, html, video): sources = {} post = dom_parser.parse_dom(html, 'div', {'class': 'postContent'}) if post: post = post[0].content results = re.findall( '<p\s+style="text-align:\s*center;">(?:\s*<strong>)*(.*?)<br(.*?)</p>', post, re.DOTALL) if not results: match = re.search('>Release Name\s*:(.*?)<br', post, re.I) release = match.group(1) if match else '' match = re.search('>Download\s*:(.*?)</p>', post, re.DOTALL | re.I) links = match.group(1) if match else '' results = [(release, links)] for result in results: release, links = result release = re.sub('</?[^>]*>', '', release) for match in re.finditer('href="([^"]+)">([^<]+)', links): stream_url, hostname = match.groups() if hostname.upper() in ['TORRENT SEARCH', 'VIP FILE']: continue host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.blog_get_quality( video, release, host) sources[stream_url] = quality return sources
def _http_get(self, url, data=None, headers=None, auth=True, method=None, cache_limit=8): # return all uncached blank pages if no user or pass if not self.username or not self.password: return '' html = super(self.__class__, self)._http_get(url, data=data, headers=headers, method=method, cache_limit=cache_limit) if auth and not dom_parser.parse_dom( html, 'a', {'title': 'My Account'}, ret='href'): log_utils.log('Logging in for url (%s)' % (url), log_utils.LOGDEBUG) self.__login() html = super(self.__class__, self)._http_get(url, data=data, headers=headers, method=method, cache_limit=0) return html
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) if not html: url = scraper_utils.urljoin(self.old_base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) sources.update(self.__get_post_links(html, video)) if kodi.get_setting('%s-include_comments' % (self.get_name())) == 'true': for _attrs, comment in dom_parser.parse_dom( html, 'div', {'id': re.compile('commentbody-\d+')}): sources.update(self.__get_comment_links(comment, video)) for source in sources: if scraper_utils.excluded_link(source): continue host = urlparse.urlparse(source).hostname hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': sources[source], 'direct': False } hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] if title: first_letter = title[:1].lower() if first_letter.isdigit(): first_letter = '0-9' search_url = '/search.php/%s/' % (first_letter) search_url = urlparse.urljoin(self.base_url, search_url) html = self._http_get(search_url, cache_limit=24) fragment = dom_parser.parse_dom(html, 'div', {'class': 'home'}) if fragment: norm_title = scraper_utils.normalize_title(title) for match in re.finditer('''href=["']([^'"]+)[^>]+>([^<]+)''', fragment[0]): url, match_title_year = match.groups() match_title, match_year = scraper_utils.extra_year( match_title_year) if norm_title in scraper_utils.normalize_title( match_title) and (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): search_url = self.base_url if video_type in [VIDEO_TYPES.TVSHOW, VIDEO_TYPES.EPISODE]: search_url += '/?tv' search_url += '/index.php?advanced_search=' search_url += urllib.quote_plus(title) search_url += '&year=' + urllib.quote_plus(str(year)) search_url += '&advanced_search=Search' html = self._http_get(search_url, cache_limit=.25) results = [] for element in dom_parser.parse_dom(html, 'div', {'class': 'list_box_title'}): match = re.search('href="([^"]+)"\s+title="(?:Watch )?([^"]+)', element) if match: url, match_title_year = match.groups() match = re.search('(.*?)(?:\s+\(?\s*(\d{4})\s*\)?)', match_title_year) if match: match_title, match_year = match.groups() else: match_title = match_title_year match_year = '' if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def __alt_search(self, video_type, title, year, season=''): results = [] params = title.lower() if year: params += ' %s' % (year) if video_type == VIDEO_TYPES.SEASON and season: params += ' Season %s' % (season) params = {'key': params} search_url = urlparse.urljoin(self.base_url, '/search') html = self._http_get(search_url, params=params, cache_limit=1) norm_title = scraper_utils.normalize_title(title) for item in dom_parser.parse_dom(html, 'div', {'class': 'caption'}): match = re.search('href="([^"]+)[^>]+>(.*?)<span[^>]*>', item) if match: match_url, match_title = match.groups() is_season = re.search('-season-\d+', match_url) if (video_type == VIDEO_TYPES.MOVIE and not is_season) or (video_type == VIDEO_TYPES.SEASON and is_season): if video_type == VIDEO_TYPES.SEASON: if season and not re.search('season-0*%s$' % (season), match_url): continue match_title = re.sub('</?[^>]*>', '', match_title) match_title = re.sub('\s+Full\s+Movie', '', match_title) match = re.search('-(\d{4})(?:$|-)', match_url) if match: match_year = match.group(1) else: match_year = '' if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year): result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)} results.append(result) return results
def get_sources(self, goog, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) q_str = '' match = re.search('>quality(.*?)<br\s*/>', html, re.I) if match: q_str = match.group(1) q_str = q_str.decode('utf-8').encode('ascii', 'ignore') q_str = re.sub('(</?strong[^>]*>|:|\s)', '', q_str, re.I | re.U) fragment = dom_parser.parse_dom(html, 'div', {'class': 'video-embed'}) if fragment: for match in re.finditer('<iframe[^>]+src="([^"]+)', fragment[0], re.I): stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.blog_get_quality(video, q_str, host), 'views': None, 'rating': None, 'url': stream_url, 'direct': False} match = re.search('class="views-infos">(\d+).*?class="rating">(\d+)%', html, re.DOTALL) if match: hoster['views'] = int(match.group(1)) hoster['rating'] = match.group(2) hosters.append(hoster) return hosters
def parseDOM(html, name='', attrs=None, ret=False): if attrs: attrs = dict((key, re.compile(value + ('$' if value else ''))) for key, value in attrs.iteritems()) results = dom_parser.parse_dom(html, name, attrs, ret) if ret: results = [result.attrs[ret.lower()] for result in results] else: results = [result.content for result in results] return results
def resolve_link(self, link): if not link.startswith('http'): stream_url = urlparse.urljoin(self.base_url, link) html = self._http_get(stream_url, cache_limit=0) iframe_url = dom_parser.parse_dom(html, 'iframe', ret='src') if iframe_url: return iframe_url[0] else: return link