def get_sources(self, video): source_url = self.get_url(video) sources = [] if not source_url or source_url == FORCE_NO_MATCH: return sources url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) is_3d = False page_quality = QUALITIES.HD720 title = dom_parser2.parse_dom(html, 'title') if title: title = title[0].content match = re.search('(\d{3,})p', title) if match: page_quality = scraper_utils.height_get_quality(match.group(1)) is_3d = True if re.search('\s+3D\s+', title) else False fragment = dom_parser2.parse_dom(html, 'div', {'class': 'entry'}) if fragment: for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'h3'): for attrs, _content in dom_parser2.parse_dom(item, 'a', req='href'): stream_url = attrs['href'] host = urlparse.urlparse(stream_url).hostname source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': page_quality, 'views': None, 'rating': None, 'direct': False} source['format'] = 'x265' source['3D'] = is_3d sources.append(source) return sources
def search(self, video_type, title, year, season=''): results = [] search_url = scraper_utils.urljoin(self.base_url, '/movie/search/') title = re.sub('[^A-Za-z0-9 ]', '', title) title = re.sub('\s+', '-', title) search_url += title html = self._http_get(search_url, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'ml-item'}): match_title = dom_parser2.parse_dom(item, 'span', {'class': 'mli-info'}) match_url = dom_parser2.parse_dom(item, 'a', req='href') match_year = '' if not match_title or not match_url: continue match_url = match_url[0].attrs['href'] match_title = match_title[0].content is_season = re.search('season\s+(\d+)', match_title, re.I) if (video_type == VIDEO_TYPES.MOVIE and not is_season) or (video_type == VIDEO_TYPES.SEASON and is_season): match_title = re.sub('</?h\d+>', '', match_title) if video_type == VIDEO_TYPES.SEASON: if season and int(is_season.group(1)) != int(season): continue match_url += '/watching.html' if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)} results.append(result) return results
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'film-container'}) if fragment: iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src') if iframe_url: iframe_url = scraper_utils.urljoin(self.base_url, iframe_url[0].attrs['src']) headers = {'Referer': page_url} html = self._http_get(iframe_url, headers=headers, cache_limit=.5) sources = scraper_utils.parse_sources_list(self, html) for source in sources: quality = sources[source]['quality'] host = scraper_utils.get_direct_hostname(self, source) stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': iframe_url}) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} match = re.search('(\d+[a-z]bps)', source) if match: hoster['extra'] = match.group(1) hosters.append(hoster) hosters.sort(key=lambda x: x.get('extra', ''), reverse=True) return hosters
def _get_episode_url(self, show_url, video): url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=2) if html: force_title = scraper_utils.force_title(video) episodes = dom_parser2.parse_dom(html, 'div', {'class': 'el-item'}) if not force_title: episode_pattern = 'href="([^"]*-[sS]%02d[eE]%02d(?!\d)[^"]*)' % (int(video.season), int(video.episode)) match = re.search(episode_pattern, html) if match: return scraper_utils.pathify_url(match.group(1)) if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate: airdate_pattern = '%02d-%02d-%d' % (video.ep_airdate.day, video.ep_airdate.month, video.ep_airdate.year) for episode in episodes: episode = episode.content ep_url = dom_parser2.parse_dom(episode, 'a', req='href') ep_airdate = dom_parser2.parse_dom(episode, 'div', {'class': 'date'}) if ep_url and ep_airdate: ep_airdate = ep_airdate[0].content.strip() if airdate_pattern == ep_airdate: return scraper_utils.pathify_url(ep_url[0].attrs['href']) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title: norm_title = scraper_utils.normalize_title(video.ep_title) for episode in episodes: episode = episode.content ep_url = dom_parser2.parse_dom(episode, 'a', req='href') ep_title = dom_parser2.parse_dom(episode, 'div', {'class': 'e-name'}) if ep_url and ep_title and norm_title == scraper_utils.normalize_title(ep_title[0].content): return scraper_utils.pathify_url(ep_url[0].attrs['href'])
def _get_episode_url(self, show_url, video): force_title = scraper_utils.force_title(video) title_fallback = kodi.get_setting('title-fallback') == 'true' norm_title = scraper_utils.normalize_title(video.ep_title) page_url = [show_url] too_old = False while page_url and not too_old: url = scraper_utils.urljoin(self.base_url, page_url[0]) html = self._http_get(url, require_debrid=True, cache_limit=1) headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html) posts = [r.content for r in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})] for heading, post in zip(headings, posts): if self.__too_old(post): too_old = True break if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post: url, title = heading if not force_title: if scraper_utils.release_check(video, title, require_title=False): return scraper_utils.pathify_url(url) else: if title_fallback and norm_title: match = re.search('<strong>(.*?)</strong>', post) if match and norm_title == scraper_utils.normalize_title(match.group(1)): return scraper_utils.pathify_url(url) page_url = dom_parser2.parse_dom(html, 'a', {'class': 'nextpostslink'}, req='href') if page_url: page_url = [page_url[0].attrs['href']]
def __get_source_page(self, video_type, page_url): match = re.search('/movie/(.*?)-(\d+)\.html', page_url) if not match: return '', '', '' slug, movie_id = match.groups() vid_type = 'movie' if video_type == VIDEO_TYPES.MOVIE else 'series' qp_url = QP_URL.format(slug=slug, movie_id=movie_id, vid_type=vid_type) qp_url = scraper_utils.urljoin(self.base_url, qp_url) headers = {'Referer': scraper_utils.urljoin(self.base_url, page_url)} headers.update(XHR) html = self._http_get(qp_url, headers=headers, cache_limit=8) watching_url = dom_parser2.parse_dom(html, 'a', {'title': re.compile('View all episodes')}, req='href') if not watching_url: return '', '', '' watching_url = watching_url[0].attrs['href'] page_html = self._http_get(watching_url, headers={'Referer': scraper_utils.urljoin(self.base_url, page_url)}, cache_limit=8) for attrs, _content in dom_parser2.parse_dom(page_html, 'img', {'class': 'hidden'}, req='src'): _img = self._http_get(attrs['src'], headers={'Referer': watching_url}, cache_limit=8) sl_url = SL_URL.format(movie_id=movie_id) sl_url = scraper_utils.urljoin(self.base_url, sl_url) html = self._http_get(sl_url, headers=headers, cache_limit=8) js_data = scraper_utils.parse_json(html, sl_url) try: html = js_data['html'] except: html = '' return movie_id, watching_url, html
def search(self, video_type, title, year, season=''): results = [] search_url = scraper_utils.urljoin(self.base_url, '/search/') title = re.sub('[^A-Za-z0-9 ]', '', title) search_url += '%s.html' % (urllib.quote_plus(title)) html = self._http_get(search_url, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'ml-item'}): match_title = dom_parser2.parse_dom(item, 'span', {'class': 'mli-info'}) match_url = dom_parser2.parse_dom(item, 'a', req='href') match_year = re.search('class="jt-info">(\d{4})<', item) is_episodes = dom_parser2.parse_dom(item, 'span', {'class': 'mli-eps'}) if (video_type == VIDEO_TYPES.MOVIE and not is_episodes) or (video_type == VIDEO_TYPES.SEASON and is_episodes): if not match_title or not match_url: continue match_url = match_url[0].attrs['href'] match_title = match_title[0].content match_title = re.sub('</?h2>', '', match_title) match_title = re.sub('\s+\d{4}$', '', match_title) if video_type == VIDEO_TYPES.SEASON: if season and not re.search('Season\s+0*%s$' % (season), match_title): continue match_year = match_year.group(1) if match_year else '' if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)} results.append(result) return results
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'repro'}) if not fragment: return hosters iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src') if not iframe_url: return hosters iframe_url = iframe_url[0].attrs['src'] html = self._http_get(iframe_url, cache_limit=.5) for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'id': 'botones'}): for attrs, _content in dom_parser2.parse_dom(fragment, 'a', req='href'): media_url = attrs['href'] media_url = media_url.replace(' ', '') if self.get_name().lower() in media_url: headers = {'Referer': iframe_url[0]} html = self._http_get(media_url, headers=headers, cache_limit=.5) hosters += self.__get_page_links(html) hosters += self.__get_pk_links(html) # hosters += self.__get_gk_links(html, iframe_url) else: host = urlparse.urlparse(media_url).hostname hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': QUALITIES.HD720, 'views': None, 'rating': None, 'url': media_url, 'direct': False} hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search') search_type = 'movies' if video_type == VIDEO_TYPES.MOVIE else 'series' html = self._http_get(search_url, params={'query': title.lower(), 'type': search_type}, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'one_movie-item'}): match_url = dom_parser2.parse_dom(item, 'a', req='href') match_title = dom_parser2.parse_dom(item, 'img', req='alt') media_type = dom_parser2.parse_dom(item, 'div', {'class': 'movie-series'}) if not media_type: media_type = VIDEO_TYPES.MOVIE elif media_type[0].content == 'TV SERIE': media_type = VIDEO_TYPES.TVSHOW if match_url and match_title and video_type == media_type: match_url = match_url[0].attrs['href'] match_title = match_title[0].attrs['alt'] match_year = re.search('-(\d{4})-', match_url) if match_year: match_year = match_year.group(1) else: match_year = '' if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=1) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'player'}) if not fragment: return hosters iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src') if not iframe_url: return hosters html = self._http_get(iframe_url[0].attrs['src'], cache_limit=.25) sources.append(self.__get_embedded_sources(html)) sources.append(self.__get_linked_sources(html)) for source in sources: for stream_url in source['sources']: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) quality = scraper_utils.gv_get_quality(stream_url) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hoster['subs'] = source.get('subs', True) hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search/') search_url += urllib.quote_plus(title) html = self._http_get(search_url, cache_limit=1) for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'inner'}): name = dom_parser2.parse_dom(fragment, 'div', {'class': 'name'}) if not name: continue match = dom_parser2.parse_dom(name[0].content, 'a', req='href') if not match: continue match_url, match_title_year = match[0].attrs['href'], match[0].content if 'tv-series' in match_url and video_type == VIDEO_TYPES.MOVIE: continue match_title_year = re.sub('</?[^>]*>', '', match_title_year) match_title_year = re.sub('[Ww]atch\s+[Mm]ovie\s*', '', match_title_year) match_title_year = match_title_year.replace('’', "'") match_title, match_year = scraper_utils.extra_year(match_title_year) if not match_year: year_span = dom_parser2.parse_dom(fragment, 'span', {'class': 'year'}) if year_span: year_text = dom_parser2.parse_dom(year_span[0].content, 'a') if year_text: match_year = year_text[0].content.strip() if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'url': scraper_utils.pathify_url(match_url), 'year': match_year} results.append(result) return results
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) best_quality = QUALITIES.HIGH fragment = dom_parser2.parse_dom(html, 'div', {'class': 'entry'}) if fragment: for match in re.finditer('href="[^"]*/movies-quality/[^"]*[^>]*>([^<]+)', fragment[0].content, re.I): quality = Q_MAP.get(match.group(1).upper(), QUALITIES.HIGH) if Q_ORDER[quality] > Q_ORDER[best_quality]: best_quality = quality sources = [] for attrs, _content in dom_parser2.parse_dom(html, 'a', req='data-vid'): try: vid_url = dom_parser2.parse_dom(scraper_utils.cleanse_title(attrs['data-vid']), 'iframe', req='src') sources.append(vid_url[0]) except: pass fragment = dom_parser2.parse_dom(html, 'table', {'class': 'additional-links'}) if fragment: sources += dom_parser2.parse_dom(fragment[0].content, 'a', req='href') for stream_url in sources: stream_url = stream_url.attrs.get('href') or stream_url.attrs.get('src') host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, best_quality) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False} hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/results') params = {'q': title} referer = search_url + '?' + urllib.urlencode(params) headers = {'Referer': referer} headers.update(XHR) _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'), headers=headers, method='POST', cache_limit=0) cookies = {'begin_referer': referer, 'prounder': 1} html = self._http_get(search_url, params=params, cookies=cookies, cache_limit=8) if any('jquery.js' in match.attrs['src'] for match in dom_parser2.parse_dom(html, 'script', req='src')): html = self._http_get(search_url, params=params, cookies=cookies, cache_limit=0) for _attrs, result in dom_parser2.parse_dom(html, 'div', {'class': 'cell'}): title_frag = dom_parser2.parse_dom(result, 'div', {'class': 'video_title'}) year_frag = dom_parser2.parse_dom(result, 'div', {'class': 'video_quality'}) if not title_frag: continue match = dom_parser2.parse_dom(title_frag[0].content, 'a', req='href') if not match: continue match_url = match[0].attrs['href'] match_title = match[0].content try: match = re.search('\s+(\d{4})\s+', year_frag[0].content) match_year = match.group(1) except: match_year = '' if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def _get_episode_url(self, show_url, video): force_title = scraper_utils.force_title(video) title_fallback = kodi.get_setting('title-fallback') == 'true' norm_title = scraper_utils.normalize_title(video.ep_title) page_url = [show_url] too_old = False while page_url and not too_old: html = self._http_get(page_url[0], require_debrid=True, cache_limit=1) for _attr, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}): if self.__too_old(post): too_old = True break if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post: match = dom_parser2.parse_dom(post, 'a', req='href') if match: url, title = match[0].attrs['href'], match[0].content if not force_title: if scraper_utils.release_check(video, title, require_title=False): return scraper_utils.pathify_url(url) else: if title_fallback and norm_title: match = re.search('</strong>(.*?)</p>', post) if match and norm_title == scraper_utils.normalize_title(match.group(1)): return scraper_utils.pathify_url(url) page_url = dom_parser2.parse_dom(html, 'a', {'class': 'nextpostslink'}, req='href') if page_url: page_url = [page_url[0].attrs['href']]
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/movies.php') cookies = {'onlylanguage': 'en', 'lang': 'en'} params = {'list': 'search', 'search': title} html = self._http_get(search_url, params=params, cookies=cookies, cache_limit=8) for _attrs, content in dom_parser2.parse_dom(html, 'TR', {'id': re.compile('coverPreview\d+')}): match = dom_parser2.parse_dom(content, 'a', req='href') if not match: continue match_url, match_title = match[0].attrs['href'], match[0].content is_show = re.search('\(tvshow\)', match_title, re.I) if (video_type == VIDEO_TYPES.MOVIE and is_show) or (video_type == VIDEO_TYPES.TVSHOW and not is_show): continue match_title = match_title.replace('(TVshow)', '') match_title = match_title.strip() match_year = '' for _attrs, div in dom_parser2.parse_dom(content, 'div'): match = re.match('\s*(\d{4})\s*', div) if match: match_year = match.group(1) if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def _get_episode_url(self, show_url, video): url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=2) episode_pattern = 'href="([^"]+-s0*%se0*%s(?!\d)[^"]*)' % (video.season, video.episode) parts = dom_parser2.parse_dom(html, 'ul', {'class': 'episode_list'}) fragment = '\n'.join(part.content for part in parts) result = self._default_get_episode_url(fragment, video, episode_pattern) if result: return result ep_urls = [r.attrs['href'] for r in dom_parser2.parse_dom(fragment, 'a', req='href')] ep_dates = [r.content for r in dom_parser2.parse_dom(fragment, 'span', {'class': 'episode_air_d'})] ep_titles = [r.content for r in dom_parser2.parse_dom(fragment, 'span', {'class': 'episode_name'})] force_title = scraper_utils.force_title(video) if not force_title and kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate: for ep_url, ep_date in zip(ep_urls, ep_dates): logger.log('Quikr Ep Airdate Matching: %s - %s - %s' % (ep_url, ep_date, video.ep_airdate), log_utils.LOGDEBUG) if video.ep_airdate == scraper_utils.to_datetime(ep_date, '%Y-%m-%d').date(): return scraper_utils.pathify_url(ep_url) if force_title or kodi.get_setting('title-fallback') == 'true': norm_title = scraper_utils.normalize_title(video.ep_title) for ep_url, ep_title in zip(ep_urls, ep_titles): ep_title = re.sub('<span>.*?</span>\s*', '', ep_title) logger.log('Quikr Ep Title Matching: %s - %s - %s' % (ep_url.encode('utf-8'), ep_title.encode('utf-8'), video.ep_title), log_utils.LOGDEBUG) if norm_title == scraper_utils.normalize_title(ep_title): return scraper_utils.pathify_url(ep_url)
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) for _attrs, link in dom_parser2.parse_dom(html, 'div', {'class': 'ldr-item'}): stream_url = dom_parser2.parse_dom(link, 'a', req='data-actuallink') try: watched = dom_parser2.parse_dom(link, 'div', {'class': 'click-count'}) match = re.search(' (\d+) ', watched[0].content) views = match.group(1) except: views = None try: score = dom_parser2.parse_dom(link, 'div', {'class': 'point'}) score = int(score[0].content) rating = score * 10 if score else None except: rating = None if stream_url: stream_url = stream_url[0].attrs['data-actuallink'].strip() host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': rating, 'url': stream_url, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'movieplay'}): iframe_src = dom_parser2.parse_dom(fragment, 'iframe', req='src') if iframe_src: iframe_src = iframe_src[0].attrs['src'] if re.search('o(pen)?load', iframe_src, re.I): meta = scraper_utils.parse_movie_link(iframe_src) quality = scraper_utils.height_get_quality(meta['height']) links = {iframe_src: {'quality': quality, 'direct': False}} else: links = self.__get_links(iframe_src, url) for link in links: direct = links[link]['direct'] quality = links[link]['quality'] if direct: host = scraper_utils.get_direct_hostname(self, link) if host == 'gvideo': quality = scraper_utils.gv_get_quality(link) stream_url = link + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': url}) else: host = urlparse.urlparse(link).hostname stream_url = link source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct} hosters.append(source) return hosters
def _get_episode_url(self, show_url, video): url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=8) pattern = "<a[^>]*class='dropdown-toggle'[^>]*>Season\s+%s<(.*?)<li\s+class='divider'>" % (video.season) match = re.search(pattern, html, re.DOTALL) if not match: return fragment = match.group(1) episodes = dom_parser2.parse_dom(fragment, 'a', {'id': 'epiloader'}, req='class') airdates = dom_parser2.parse_dom(fragment, 'span', {'class': 'airdate'}) ep_airdate = video.ep_airdate.strftime('%Y-%m-%d') if isinstance(video.ep_airdate, datetime.date) else '' norm_title = scraper_utils.normalize_title(video.ep_title) num_id, airdate_id, title_id = '', '', '' for episode, airdate in zip(episodes, airdates): ep_id = episode.attrs['class'] episode = episode.content if ep_airdate and ep_airdate == airdate: airdate_id = ep_id match = re.search('(?:<span[^>]*>)?(\d+)\.\s*([^<]+)', episode) if match: ep_num, ep_title = match.groups() if int(ep_num) == int(video.episode): num_id = ep_id if norm_title and norm_title in scraper_utils.normalize_title(ep_title): title_id = ep_id best_id = '' if not scraper_utils.force_title(video): if num_id: best_id = num_id if kodi.get_setting('airdate-fallback') == 'true' and airdate_id: best_id = airdate_id if kodi.get_setting('title-fallback') == 'true' and title_id: best_id = title_id else: if title_id: best_id = title_id if best_id: return EP_URL % (best_id)
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = '/search/' + urllib.quote_plus(title) html = self._http_get(search_url, require_debrid=True, cache_limit=1) if video_type == VIDEO_TYPES.TVSHOW: seen_urls = {} for _attr, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}): if CATEGORIES[video_type] not in post: continue match = re.search('<span>\s*TAGS:\s*</span>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', post, re.I) if match: show_url, match_title = match.groups() if show_url in seen_urls: continue result = {'url': scraper_utils.pathify_url(show_url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''} seen_urls[show_url] = result results.append(result) elif video_type == VIDEO_TYPES.MOVIE: norm_title = scraper_utils.normalize_title(title) headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html) posts = [result.content for result in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})] for heading, post in zip(headings, posts): if CATEGORIES[video_type] not in post or self.__too_old(post): continue post_url, post_title = heading meta = scraper_utils.parse_movie_link(post_title) full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height']) match_year = meta['year'] match_norm_title = scraper_utils.normalize_title(meta['title']) if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = scraper_utils.urljoin(self.base_url, '/search/%s.html' % (urllib.quote_plus(title))) html = self._http_get(search_url, cache_limit=1) fragment = dom_parser2.parse_dom(html, 'ul', {'class': 'cfv'}) if not fragment: return results norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'li'): is_season = dom_parser2.parse_dom(item, 'div', {'class': 'status'}) if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON): match = dom_parser2.parse_dom(item, 'a', req=['href', 'title']) if not match: continue match_title = match[0].attrs['title'] match_url = match[0].attrs['href'] match_year = '' if video_type == VIDEO_TYPES.SEASON: if season and not re.search('Season\s+%s$' % (season), match_title, re.I): continue else: match = re.search('-(\d{4})[-.]', match_url) if match: match_year = match.group(1) match_norm_title = scraper_utils.normalize_title(match_title) title_match = (norm_title in match_norm_title) or (match_norm_title in norm_title) if title_match and (not year or not match_year or year == match_year): result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)} results.append(result) return results
def __proc_results(self, url, title, year): results = [] url = scraper_utils.urljoin(self.base_url, url) html = self._http_get(url, cache_limit=48) norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(html, 'li', {'class': 'bpM12'}): title_frag = dom_parser2.parse_dom(item, 'h2') year_frag = dom_parser2.parse_dom(item, 'div', {'class': 'sectionDetail'}) match_url = dom_parser2.parse_dom(item, 'a', req='href') if title_frag and match_url: match_url = match_url[0].attrs['href'] match = re.search('(.*?)<br>', title_frag[0].content) if match: match_title = match.group(1) else: match_title = title_frag[0] match_year = '' if year_frag: match = re.search('(\d{4})', year_frag[0].content) if match: match_year = match.group(1) if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) for _attrs, row in dom_parser2.parse_dom(html, 'tr', {'id': 'linktr'}): redirect = dom_parser2.parse_dom(row, 'span', req='id') link = dom_parser2.parse_dom(row, 'a', req='href') if link and link[0].attrs['href'].startswith('http'): stream_url = link[0].attrs['href'] elif redirect: stream_url = redirect[0].attrs['id'] else: stream_url = '' if stream_url.startswith('http'): host = urlparse.urlparse(stream_url).hostname else: host = dom_parser2.parse_dom(row, 'h9') host = host[0].content if host else '' if stream_url and host: quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'video-embed'}) if not fragment: return hosters iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src') if not iframe_url: return hosters stream_url = iframe_url[0].attrs['src'] host = urlparse.urlparse(stream_url).hostname q_str = 'HDRIP' match = re.search('>Quality(.*?)<br\s*/>', html, re.I) if match: q_str = match.group(1) q_str = q_str.decode('utf-8').encode('ascii', 'ignore') q_str = re.sub('(</?strong[^>]*>|:|\s)', '', q_str, re.I | re.U) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.blog_get_quality(video, q_str, host), 'views': None, 'rating': None, 'url': stream_url, 'direct': False} match = re.search('class="views-infos">(\d+).*?class="rating">(\d+)%', html, re.DOTALL) if match: hoster['views'] = int(match.group(1)) hoster['rating'] = match.group(2) hosters.append(hoster) return hosters
def get_sources(self, video): sources = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return sources url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=8) for div in dom_parser2.parse_dom(html, 'div', {'id': re.compile('stb-container-\d+')}): stream_url = dom_parser2.parse_dom(div.content, 'iframe', req='src') if not stream_url: continue stream_url = stream_url[0].attrs['src'] host = urlparse.urlparse(stream_url).hostname source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': QUALITIES.HIGH, 'views': None, 'rating': None, 'direct': False} sources.append(source) fragment = dom_parser2.parse_dom(html, 'div', {'class': "stb-download-body_box"}) if not fragment: return sources labels = dom_parser2.parse_dom(fragment[0].content, 'a', {'href': '#'}) stream_urls = [result for result in dom_parser2.parse_dom(fragment[0].content, 'a', req='href') if result.content.lower() == 'download now'] for label, stream_url in zip(labels, stream_urls): stream_url = stream_url.attrs['href'] label = re.sub('</?[^>]*>', '', label.content) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.blog_get_quality(video, label, host) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': False} sources.append(source) return sources
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) page_quality = QUALITIES.HD720 if video.video_type == VIDEO_TYPES.MOVIE else QUALITIES.HIGH for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'embed-responsive'}): iframe_url = dom_parser2.parse_dom(fragment, 'iframe', req='data-src') if iframe_url: iframe_url = iframe_url[0].attrs['data-src'] iframe_host = urlparse.urlparse(iframe_url).hostname if iframe_host in DIRECT_HOSTS: sources = self.__parse_streams(iframe_url, url) else: sources = {iframe_url: {'quality': scraper_utils.get_quality(video, iframe_host, page_quality), 'direct': False}} for source in sources: quality = sources[source]['quality'] direct = sources[source]['direct'] if direct: host = scraper_utils.get_direct_hostname(self, source) stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) else: host = urlparse.urlparse(source).hostname stream_url = source hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) quality = None match = re.search('Links\s+-\s+Quality\s*([^<]*)</h1>', html, re.DOTALL | re.I) if match: quality = QUALITY_MAP.get(match.group(1).strip().upper()) fragment = dom_parser2.parse_dom(html, 'div', {'id': 'links'}) if not fragment: return hosters for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'ul'): stream_url = dom_parser2.parse_dom(item, 'a', req='href') host = dom_parser2.parse_dom(item, 'li', {'id': 'download'}) if not stream_url or not host: continue stream_url = stream_url[0].attrs['href'] host = host[-1].content hoster = {'multi-part': False, 'host': host, 'class': self, 'url': stream_url, 'quality': scraper_utils.get_quality(video, host, quality), 'rating': None, 'views': None, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=1) for _attrs, item in dom_parser2.parse_dom(html, 'a', {'class': 'full-torrent1'}): stream_url = dom_parser2.parse_dom(item, 'span', req='onclick') host = dom_parser2.parse_dom(item, 'div', {'class': 'small_server'}) match = re.search('Views:\s*(?:</[^>]*>)?\s*(\d+)', item, re.I) views = match.group(1) if match else None match = re.search('Size:\s*(?:</[^>]*>)?\s*(\d+)', item, re.I) size = int(match.group(1)) * 1024 * 1024 if match else None if not stream_url or not host: continue stream_url = stream_url[0].attrs['onclick'] host = host[0].content.lower() host = host.replace('stream server: ', '') match = re.search("'(/redirect/[^']+)", stream_url) if match: stream_url = match.group(1) quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': False} if size is not None: hoster['size'] = scraper_utils.format_size(size, 'B') hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) q_str = 'HDRIP' match = re.search('>Quality(.*?)<br\s*/?>', html, re.I) if match: q_str = match.group(1) q_str = re.sub('(</?strong[^>]*>|:|\s)', '', q_str, re.I | re.U) for _attr, content in dom_parser2.parse_dom(html, 'div', {'class': 'tab_content'}): for attrs, _content in dom_parser2.parse_dom(content, 'iframe', req='src'): source = attrs['src'] host = urlparse.urlparse(source).hostname quality = scraper_utils.blog_get_quality(video, q_str, host) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': False} match = re.search('class="views-infos">(\d+).*?class="rating">(\d+)%', html, re.DOTALL) if not match: continue hoster['views'] = int(match.group(1)) hoster['rating'] = match.group(2) hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search-movies/%s.html' % (urllib.quote_plus(title))) html = self._http_get(search_url, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'li', {'class': 'item'}): match_url = dom_parser2.parse_dom(item, 'a', req='href') match_title_year = re.search('onmouseover="([^"]+)', item) if match_url and match_title_year: match_url = match_url[0].attrs['href'] match_title_year = match_title_year.group(1) match = re.search('<b>(?:<i>)?\s*(.*?)\s*(?:</i>)?</b>', match_title_year) if not match: continue match_title, match_year = scraper_utils.extra_year(match.group(1)) is_season = re.search('season\s+(\d+)', match_title_year, re.I) if (is_season and video_type == VIDEO_TYPES.MOVIE) or (not is_season and video_type == VIDEO_TYPES.SEASON): continue if video_type == VIDEO_TYPES.MOVIE: if not match_year: match_year = re.search('>Release:\s*(\d{4})', match_title_year) match_year = match_year.group(1) if match_year else '' else: if season and int(season) != int(is_season.group(1)): continue if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)} results.append(result) return results
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters sources = [] url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for _attrs, div in dom_parser2.parse_dom(html, 'div', {'class': 'tab-content'}): for attrs, _content in dom_parser2.parse_dom(div, 'iframe', req='src'): sources.append(attrs['src']) sources += [match.group(1) for match in re.finditer("window\.open\('([^']+)", html)] for source in sources: host = urlparse.urlparse(source).hostname quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': False} hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin( self.base_url, 'http://www.movie25.me/keywords/%s/' % (title)) html = self._http_get(search_url, cache_limit=4) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'movie_about'}): match = dom_parser2.parse_dom(item, 'a', req=['href', 'title']) if not match: continue match_url, match_title, extra = match[0].attrs['href'], match[ 0].attrs['title'], match[0].content _match_title, match_year = scraper_utils.extra_year(extra) if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def _http_get(self, url, params=None, data=None, headers=None, auth=True, method=None, cache_limit=8): # return all uncached blank pages if no user or pass if not self.username or not self.password: return '' html = super(self.__class__, self)._http_get(url, params=params, data=data, headers=headers, method=method, cache_limit=cache_limit) if auth and not dom_parser2.parse_dom(html, 'span', {'class': 'user-name'}): logger.log('Logging in for url (%s)' % (url), log_utils.LOGDEBUG) self.__login() html = super(self.__class__, self)._http_get(url, params=params, data=data, headers=headers, method=method, cache_limit=0) return html
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'videoPlayer'}): for attrs, _content in dom_parser2.parse_dom(fragment, 'iframe', req='src'): html = self._http_get(attrs['src'], headers={'Referer': page_url}, cache_limit=.5) match = re.search('downloadUrl\s*=\s*"([^"]+)', html) if match: stream_url = match.group(1) host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = QUALITIES.HIGH hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): results = [] search_url = scraper_utils.urljoin( self.base_url, '/wp-content/plugins/ajax-search-pro/ajax_search.php') data = { 'action': 'ajaxsearchpro_search', 'aspp': title, 'asid': '1', 'asp_inst_id': '1_1', 'options': 'current_page_id=7&qtranslate_lang=0&set_intitle=None&customset%5B%5D=post' } html = self._http_get(search_url, data=data, headers=XHR, cache_limit=8) norm_title = scraper_utils.normalize_title(title) for attrs, match_title_year in dom_parser2.parse_dom( html, 'a', {'class': 'asp_res_url'}): match_url = attrs['href'] match_title_year = re.sub('</?[^>]*>', '', match_title_year) is_season = re.search('Season\s+(\d+)\s*', match_title_year, re.I) if (not is_season and video_type == VIDEO_TYPES.MOVIE) or ( is_season and video_type == VIDEO_TYPES.SEASON): match_year = '' if video_type == VIDEO_TYPES.SEASON: if season and int(is_season.group(1)) != int(season): continue match_title = match_title_year match_title = re.sub('\s*\d{4}', '', match_title) else: match_title, match_year = scraper_utils.extra_year( match_title_year) match_norm_title = scraper_utils.normalize_title(match_title) title_match = (norm_title in match_norm_title) or (match_norm_title in norm_title) if title_match and (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def _get_episode_url(self, show_url, video): episode_pattern = 'href="([^"]+/season/%s/episode/%s/?)"' % ( video.season, video.episode) title_pattern = 'href="(?P<url>[^"]+)"[^>]+title="(?:S\d+\s*E\d+:\s*)?(?P<title>[^"]+)' headers = {'Referer': scraper_utils.urljoin(self.base_url, show_url)} season_url = scraper_utils.urljoin(show_url, '/season/%s' % (video.season)) season_url = scraper_utils.urljoin(self.base_url, season_url) html = self._http_get(season_url, headers=headers, cache_limit=2) fragment = dom_parser2.parse_dom(html, 'div', {'id': 'episodes'}) return self._default_get_episode_url(fragment, video, episode_pattern, title_pattern)
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] html = self._http_get(self.base_url, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'dizis'}) if not fragment: return results norm_title = scraper_utils.normalize_title(title) for attrs, match_title in dom_parser2.parse_dom(fragment[0].content, 'a', req='href'): match_url = attrs['href'] if norm_title in scraper_utils.normalize_title(match_title): match_title = re.sub('<div[^>]*>.*?</div>', '', match_title) result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': '' } results.append(result) return results
def resolve_link(self, link): html = self._http_get(link, cache_limit=.5) iframe_url = dom_parser2.parse_dom(html, 'iframe', req='src') if iframe_url: return iframe_url[0].attrs['src'] else: match = re.search('href="([^"]+)[^>]*>Click Here To Play<', html, re.I) if match: return match.group(1) else: return link
def _get_episode_url(self, show_url, video): episode_pattern = 'href="([^"]+season-%s-episode-%s/)' % ( video.season, video.episode) airdate_pattern = '{day} {short_month} {year}\s*<a\s+href="([^"]+)' show_url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(show_url, cache_limit=2) fragment = dom_parser2.parse_dom(html, 'table', {'class': 'alternate_color'}) return self._default_get_episode_url(fragment or html, video, episode_pattern, airdate_pattern=airdate_pattern)
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) for _attrs, button in dom_parser2.parse_dom(html, 'li', {'class': 'playing_button'}): try: link = dom_parser2.parse_dom(button, 'a', req='href') match = re.search('php\?.*?=?([^"]+)', link[0].attrs['href']) stream_url = base64.b64decode(match.group(1)) match = re.search('(https?://.*)', stream_url) stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False} hosters.append(hoster) except Exception as e: logger.log('Exception during tvonline source: %s - |%s|' % (e, button), log_utils.LOGDEBUG) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] norm_title = scraper_utils.normalize_title(title) url = scraper_utils.urljoin(self.base_url, '/watch-series/') headers = {'Referer': self.base_url} html = self._http_get(url, headers=headers, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'li'): for attrs, _content in dom_parser2.parse_dom(item, 'a', req=['title', 'href']): match_title, match_url = attrs['title'], attrs['href'] if norm_title in scraper_utils.normalize_title(match_title): result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': '' } results.append(result) return results
def __get_iframe_links(self, html, sub): hosters = [] html = html.replace('\\"', '"').replace('\\/', '/') iframe_urls = dom_parser2.parse_dom(html, 'iframe', {'id': 'episode_player'}, req='src') if iframe_urls: stream_url = iframe_urls[0].attrs['src'] host = urlparse.urlparse(stream_url).hostname quality = QUALITIES.HD720 hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False} hoster['subs'] = sub hosters.append(hoster) return hosters
def _get_episode_url(self, show_url, video): season_url = show_url if video.season != 1: show_url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(show_url, cache_limit=24) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'page-numbers'}) if fragment: match = re.search( 'href="([^"]+-%s-sezon[^"]*)' % (video.season), fragment[0].content) if match: season_url = match.group(1) episode_pattern = '''href=['"]([^'"]+-%s-%01d-sezon-%01d-bolum[^'"]*)''' % ( video.season, video.episode) season_url = scraper_utils.urljoin(self.base_url, season_url) html = self._http_get(season_url, cache_limit=2) fragment = dom_parser2.parse_dom(html, 'ul', {'class': 'posts-list'}) return self._default_get_episode_url(fragment or html, video, episode_pattern)
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] html = self._http_get(self.base_url, params={'s': title}, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'article', {'class': 'item-list'}): match = dom_parser2.parse_dom(item, 'a', req='href') if not match: continue match_title_year = match[0].content match_url = match[0].attrs['href'] match_title, match_year = scraper_utils.extra_year( match_title_year) if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable posts = [] html = self._http_get(self.base_url, params={ 's': title, 'submit': 'Search' }, require_debrid=True, cache_limit=2) for _attr, post in dom_parser2.parse_dom( html, 'article', {'id': re.compile('post-\d+')}): if self.__too_old(post): continue posts += [ post.content for post in dom_parser2.parse_dom( post, 'h1', {'class': 'entry-title'}) ] return self._blog_proc_results( '\n'.join(posts), 'href="(?P<url>[^"]+)[^>]+>(?P<post_title>.*?)</a>', '', video_type, title, year)
def __too_old(self, post): filter_days = datetime.timedelta(days=int(kodi.get_setting('%s-filter' % (self.get_name())))) post_date = dom_parser2.parse_dom(post, 'time', {'class': 'entry-date'}, req='datetime') if filter_days and post_date: today = datetime.date.today() try: post_date = datetime.date.fromtimestamp(utils.iso_2_utc(post_date[0].content)) if today - post_date > filter_days: return True except ValueError: return False return False
def menu(): try: url = urlparse.urljoin(base_domain, 'categories.php') c = client.request(url) r = dom_parser2.parse_dom(c, 'a', req='href') r = [i for i in r if len(i.content) > 2 and 'pages' in i.attrs['href'] \ and '0-9' not in i.attrs['href'] and ' ' not in i.content] r = [(urlparse.urljoin(base_domain, i.attrs['href']), i.content) for i in r] if (not r): log_utils.log( 'Scraping Error in %s:: Content of request: %s' % (base_name.title(), str(c)), log_utils.LOGERROR) kodi.notify(msg='Scraping Error: Info Added To Log File', duration=6000, sound=True) quit() except Exception as e: log_utils.log( 'Fatal Error in %s:: Error: %s' % (base_name.title(), str(e)), log_utils.LOGERROR) kodi.notify(msg='Fatal Error', duration=4000, sound=True) quit() dirlst = [] for i in r: try: name = kodi.sortX(i[1].encode('utf-8')) icon = xbmc.translatePath( os.path.join('special://home/addons/script.wankbank.artwork', 'resources/art/%s/icon.png' % filename)) fanarts = xbmc.translatePath( os.path.join('special://home/addons/script.wankbank.artwork', 'resources/art/%s/fanart.jpg' % filename)) dirlst.append({ 'name': name, 'url': i[0], 'mode': content_mode, 'icon': icon, 'fanart': fanarts, 'folder': True }) except Exception as e: log_utils.log( 'Error adding menu item %s in %s:: Error: %s' % (i[1].title(), base_name.title(), str(e)), log_utils.LOGERROR) if dirlst: buildDirectory(dirlst) else: kodi.notify(msg='No Menu Items Found') quit()
def get_gk_links(scraper, html, page_url, page_quality, link_url, player_url): def get_real_gk_url(scraper, player_url, params): html = scraper._http_get(player_url, params=params, headers=XHR, cache_limit=.25) js_data = parse_json(html, player_url) data = js_data.get('data', {}) if data is not None and 'files' in data: return data['files'] else: return data sources = {} for attrs, _content in dom_parser2.parse_dom( html, 'a', req=['data-film', 'data-name', 'data-server']): data = { 'ipplugins': 1, 'ip_film': attrs['data-film'], 'ip_server': attrs['data-server'], 'ip_name': attrs['data-name'] } headers = {'Referer': page_url} headers.update(XHR) html = scraper._http_get(link_url, data=data, headers=headers, cache_limit=.25) js_data = parse_json(html, link_url) params = { 'u': js_data.get('s'), 'w': '100%', 'h': 420, 's': js_data.get('v'), 'n': 0 } stream_urls = get_real_gk_url(scraper, player_url, params) if stream_urls is None: continue if isinstance(stream_urls, basestring): sources[stream_urls] = page_quality else: for item in stream_urls: stream_url = item['files'] if get_direct_hostname(scraper, stream_url) == 'gvideo': quality = gv_get_quality(stream_url) elif 'quality' in item: quality = height_get_quality(item['quality']) else: quality = page_quality sources[stream_url] = quality return sources
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) views = None fragment = dom_parser2.parse_dom(html, 'img', {'src': re.compile('[^"]*view_icon.png')}) if fragment: match = re.search('(\d+)', fragment[0].content) if match: views = match.group(1) match = re.search('href="([^"]+-full-movie-[^"]+)', html) if match: url = match.group(1) html = self._http_get(url, cache_limit=.5) sources = self.__get_embedded(html) for link in dom_parser2.parse_dom(html, 'span', {'class': 'btn-eps'}, req='link'): link = link.attrs['link'] ajax_url = scraper_utils.urljoin(self.base_url, AJAX_URL) headers = {'Referer': url} headers.update(XHR) html = self._http_get(ajax_url, params={'v': link}, headers=headers, cache_limit=.5) sources.update(self.__get_sources(html)) for source in sources: if sources[source]['direct']: host = scraper_utils.get_direct_hostname(self, source) else: host = urlparse.urlparse(source).hostname stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) direct = sources[source]['direct'] quality = sources[source]['quality'] hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'movieplay'}): for attrs, _content in dom_parser2.parse_dom(fragment, 'iframe', req='src') + dom_parser2.parse_dom(fragment, 'iframe', req='data-lazy-src'): iframe_url = attrs.get('src', '') if not iframe_url.startswith('http'): iframe_url = attrs.get('data-lazy-src', '') if not iframe_url.startswith('http'): continue if 'miradetodo' in iframe_url: html = self._http_get(iframe_url, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'nav', {'class': 'nav'}) if fragment: stream_url = dom_parser2.parse_dom(fragment[0].content, 'a', req='href') if stream_url: html = self._http_get(stream_url[0].attrs['href'], cache_limit=.5) sources.update(self.__get_gk_links(html)) sources.update(self.__get_gk_links2(html)) sources.update(self.__get_amazon_links(html)) sources.update(scraper_utils.parse_sources_list(self, html)) else: host = urlparse.urlparse(iframe_url).hostname source = {'quality': scraper_utils.get_quality(video, host, QUALITIES.HIGH), 'direct': False} sources.update({iframe_url: source}) for source in sources: stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua()) direct = sources[source]['direct'] quality = sources[source]['quality'] host = scraper_utils.get_direct_hostname(self, source) if direct else urlparse.urlparse(source).hostname hoster = {'multi-part': False, 'url': stream_url, 'class': self, 'quality': quality, 'host': host, 'rating': None, 'views': None, 'direct': direct} hosters.append(hoster) return hosters
def menu(): lover.checkupdates() try: url = urlparse.urljoin(base_domain,'video') c = client.request(url) r = dom_parser2.parse_dom(c, 'a', {'class': 'sidebar_section_item'}) r = [i for i in r if 'channels' in i.attrs['href']] r = [(urlparse.urljoin(base_domain,i.attrs['href']), i.content + ' - [ Professional ]') for i in r] url = urlparse.urljoin(base_domain,'amateur/videos/') c = client.request(url) e = dom_parser2.parse_dom(c, 'a', {'class': 'sidebar_section_item'}) e = [i for i in e if 'channels' in i.attrs['href']] r += [(urlparse.urljoin(base_domain,i.attrs['href']), i.content + ' - [ Amateur ]') for i in e] r = sorted(r, key=lambda x: x[1]) if ( not r ): log_utils.log('Scraping Error in %s:: Content of request: %s' % (base_name.title(),str(c)), log_utils.LOGERROR) kodi.notify(msg='Scraping Error: Info Added To Log File', duration=6000, sound=True) quit() except Exception as e: log_utils.log('Fatal Error in %s:: Error: %s' % (base_name.title(),str(e)), log_utils.LOGERROR) kodi.notify(msg='Fatal Error', duration=4000, sound=True) quit() dirlst = [] urls = [] for i in r: try: name = i[1] icon = xbmc.translatePath(os.path.join('special://home/addons/script.xxxodus.artwork', 'resources/art/%s/icon.png' % filename)) fanarts = xbmc.translatePath(os.path.join('special://home/addons/script.xxxodus.artwork', 'resources/art/%s/fanart.jpg' % filename)) dirlst.append({'name': name, 'url': i[0], 'mode': content_mode, 'icon': icon, 'fanart': fanarts, 'folder': True}) except Exception as e: log_utils.log('Error adding menu item %s in %s:: Error: %s' % (i[1].title(),base_name.title(),str(e)), log_utils.LOGERROR) if dirlst: buildDirectory(dirlst) else: kodi.notify(msg='No Menu Items Found') quit()
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search') search_type = 'movies' if video_type == VIDEO_TYPES.MOVIE else 'series' html = self._http_get(search_url, params={ 'query': title.lower(), 'type': search_type }, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'one_movie-item'}): match_url = dom_parser2.parse_dom(item, 'a', req='href') match_title = dom_parser2.parse_dom(item, 'img', req='alt') media_type = dom_parser2.parse_dom(item, 'div', {'class': 'movie-series'}) if not media_type: media_type = VIDEO_TYPES.MOVIE elif media_type[0].content == 'TV SERIE': media_type = VIDEO_TYPES.TVSHOW if match_url and match_title and video_type == media_type: match_url = match_url[0].attrs['href'] match_title = match_title[0].attrs['alt'] match_year = re.search('-(\d{4})-', match_url) if match_year: match_year = match_year.group(1) else: match_year = '' if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def __get_source_page(self, video_type, page_url): match = re.search('/movie/(.*?)-(\d+)\.html', page_url) if not match: return '', '', '' slug, movie_id = match.groups() vid_type = 'movie' if video_type == VIDEO_TYPES.MOVIE else 'series' qp_url = QP_URL.format(slug=slug, movie_id=movie_id, vid_type=vid_type) qp_url = scraper_utils.urljoin(self.base_url, qp_url) headers = {'Referer': scraper_utils.urljoin(self.base_url, page_url)} headers.update(XHR) html = self._http_get(qp_url, headers=headers, cache_limit=8) watching_url = dom_parser2.parse_dom( html, 'a', {'title': re.compile('View all episodes')}, req='href') if not watching_url: return '', '', '' watching_url = watching_url[0].attrs['href'] page_html = self._http_get(watching_url, headers={ 'Referer': scraper_utils.urljoin( self.base_url, page_url) }, cache_limit=8) for attrs, _content in dom_parser2.parse_dom(page_html, 'img', {'class': 'hidden'}, req='src'): _img = self._http_get(attrs['src'], headers={'Referer': watching_url}, cache_limit=8) sl_url = SL_URL.format(movie_id=movie_id) sl_url = scraper_utils.urljoin(self.base_url, sl_url) html = self._http_get(sl_url, headers=headers, cache_limit=8) js_data = scraper_utils.parse_json(html, sl_url) try: html = js_data['html'] except: html = '' return movie_id, watching_url, html
def menu(): lover.checkupdates() try: url = urlparse.urljoin(base_domain, 'extreme-videos/') c = client.request(url) r = dom_parser2.parse_dom(c, 'a', {'class': 'url16'}) r = [(i.attrs['href'], i.attrs['title']) for i in r if i] if (not r): log_utils.log( 'Scraping Error in %s:: Content of request: %s' % (base_name.title(), str(c)), log_utils.LOGERROR) kodi.notify(msg='Scraping Error: Info Added To Log File', duration=6000, sound=True) quit() except Exception as e: log_utils.log( 'Fatal Error in %s:: Error: %s' % (base_name.title(), str(e)), log_utils.LOGERROR) kodi.notify(msg='Fatal Error', duration=4000, sound=True) quit() dirlst = [] for i in r: try: name = kodi.sortX(i[1].encode('utf-8')) icon = xbmc.translatePath( os.path.join('special://home/addons/script.xxxodus.artwork', 'resources/art/%s/icon.png' % filename)) fanarts = xbmc.translatePath( os.path.join('special://home/addons/script.xxxodus.artwork', 'resources/art/%s/fanart.jpg' % filename)) dirlst.append({ 'name': name, 'url': i[0], 'mode': content_mode, 'icon': icon, 'fanart': fanarts, 'folder': True }) except Exception as e: log_utils.log( 'Error adding menu item %s in %s:: Error: %s' % (i[1].title(), base_name.title(), str(e)), log_utils.LOGERROR) if dirlst: buildDirectory(dirlst) else: kodi.notify(msg='No Menu Items Found') quit()
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=1) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'player'}) if not fragment: return hosters iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src') if not iframe_url: return hosters html = self._http_get(iframe_url[0].attrs['src'], cache_limit=.25) sources.append(self.__get_embedded_sources(html)) sources.append(self.__get_linked_sources(html)) for source in sources: for stream_url in source['sources']: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': stream_url += scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) quality = scraper_utils.gv_get_quality(stream_url) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } hoster['subs'] = source.get('subs', True) hosters.append(hoster) return hosters
def __get_iframe_sources(self, iframe_url, page_url): hosters = [] headers = {'Referer': page_url} html = self._http_get(iframe_url, headers=headers, cache_limit=.5) sources = dom_parser2.parse_dom(html, 'div', {'class': 'dzst-player'}, req='data-dzst-player') if sources: sources = scraper_utils.cleanse_title(sources[0].attrs['data-dzst-player'].replace('=', '=')) js_data = scraper_utils.parse_json(scraper_utils.cleanse_title(sources), iframe_url) sources = js_data.get('tr', {}) for key in sources: hosters.append(self.__create_source(sources[key], key, page_url, subs=True)) return hosters
def search_movie_name(self, name): meta = {} name = name.replace('!', '') url = self.SEARCH_MOVIES % (quote_plus(name)) html = client.request(url) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'movie_box'}) if not fragment: meta = {} if fragment: movie_url = dom_parser2.parse_dom(fragment, 'a', req='href') tvtsvod_id = (movie_url[0].attrs['href']).split('/', 5)[4] meta = self.search_movie_id(tvtsvod_id) xbmc.log('DATA URL: %s' % (str(url)), xbmc.LOGNOTICE) xbmc.log('MOVIE NAME: %s' % (str(name)), xbmc.LOGNOTICE) # xbmc.log('MOVIE ID: %s' % (str(tvtsvod_id)), xbmc.LOGNOTICE) return meta
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search') params = {'q': title, 's': 't'} html = self._http_get(search_url, params=params, cache_limit=1) for _attrs, content in dom_parser2.parse_dom(html, 'span', {'class': 'title_list'}): match = dom_parser2.parse_dom(content, 'a', req=['href', 'title']) if match: attrs = match[0].attrs match_url, match_title_year = attrs['href'], attrs['title'] match_title, match_year = scraper_utils.extra_year( match_title_year) if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = scraper_utils.urljoin(self.base_url, '/search-movies/%s.html' % urllib.quote_plus(title)) html = self._http_get(search_url, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'movie_about'}): match = dom_parser2.parse_dom(item, 'a', req='href') if match: match_url = match[0].attrs['href'] match_title_year = match[0].content is_season = re.search('Season\s+(\d+)\s*', match_title_year, re.I) if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON): match_title, match_year = scraper_utils.extra_year(match_title_year) if video_type == VIDEO_TYPES.SEASON: match_year = '' if season and int(season) != int(is_season.group(1)): continue if (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def __search(self, video_type, title, year, page): results = [] url = scraper_utils.urljoin(self.base_url, page['url']) params = page['params'] if 'params' in page else None html = self._http_get(url, params=params, cache_limit=24) norm_title = scraper_utils.normalize_title(title) match_year = '' for _attrs, item in dom_parser2.parse_dom(html, 'div', {'id': re.compile('movie-+\d+')}): is_tvshow = dom_parser2.parse_dom(item, 'div', {'class': 'movieTV'}) if (is_tvshow and video_type == VIDEO_TYPES.TVSHOW) or (not is_tvshow and video_type == VIDEO_TYPES.MOVIE): fragment = dom_parser2.parse_dom(item, 'h4', {'class': 'showRowName'}) if fragment: match = dom_parser2.parse_dom(fragment[0].content, 'a', req='href') if match: match_url, match_title = match[0].attrs['href'], match[0].content if re.search('/-?\d{7,}/', match_url): continue match_norm_title = scraper_utils.normalize_title(match_title) if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year): result = {'title': scraper_utils.cleanse_title(match_title), 'url': scraper_utils.pathify_url(match_url), 'year': match_year} results.append(result) return results