def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=0) match = re.search('var\s*video_id\s*=\s*"([^"]+)', html) if not match: return hosters video_id = match.group(1) headers = {'Referer': page_url} headers.update(XHR) _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'), headers=headers, method='POST', cache_limit=0) vid_url = scraper_utils.urljoin(self.base_url, VIDEO_URL) html = self._http_get(vid_url, data={'v': video_id}, headers=headers, cache_limit=0) for source, value in scraper_utils.parse_json(html, vid_url).iteritems(): match = re.search('url=(.*)', value) if not match: continue stream_url = urllib.unquote(match.group(1)) host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(source) stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=1) pages = self.__get_alt_pages(html, page_url) sources = self.__get_sources(html, page_url, pages.get(page_url, True)) for page in pages: if page == page_url: continue page_url = scraper_utils.urljoin(self.base_url, page, pages[page]) html = self._http_get(page_url, cache_limit=1) sources.update(self.__get_sources(html, page, pages[page])) for stream_url, values in sources.iteritems(): host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) direct = True elif values['direct']: quality = values['quality'] direct = True else: quality = values['quality'] direct = False host = urlparse.urlparse(stream_url).hostname hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} if values['subs']: hoster['subs'] = 'Turkish Subtitles' hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'film-container'}) if fragment: iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src') if iframe_url: iframe_url = scraper_utils.urljoin(self.base_url, iframe_url[0].attrs['src']) headers = {'Referer': page_url} html = self._http_get(iframe_url, headers=headers, cache_limit=.5) sources = scraper_utils.parse_sources_list(self, html) for source in sources: quality = sources[source]['quality'] host = scraper_utils.get_direct_hostname(self, source) stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': iframe_url}) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} match = re.search('(\d+[a-z]bps)', source) if match: hoster['extra'] = match.group(1) hosters.append(hoster) hosters.sort(key=lambda x: x.get('extra', ''), reverse=True) return hosters
def get_sources(self, video): hosters = [] sources = {} today = datetime.datetime.today().date() max_age = today - datetime.timedelta(days=self.filter) if video.ep_airdate and max_age < video.ep_airdate: day_after = video.ep_airdate + datetime.timedelta(days=1) for day in [day_after, video.ep_airdate]: if day < today: page_url = EP_PAGE % (day.strftime('%Y.%m.%d')) page_url = scraper_utils.urljoin(self.base_url, page_url) html = self._http_get(page_url, require_debrid=True, cache_limit=30 * 24) sources.update(self.__get_sources(video, html)) if sources: break if not sources and kodi.get_setting('scraper_url'): page_url = scraper_utils.urljoin(self.base_url, '/index.html') html = self._http_get(page_url, require_debrid=True, cache_limit=2) sources.update(self.__get_sources(video, html)) for source in sources: host = urlparse.urlparse(source).hostname hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': sources[source], 'direct': False} hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/results') params = {'q': title} referer = search_url + '?' + urllib.urlencode(params) headers = {'Referer': referer} headers.update(XHR) _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'), headers=headers, method='POST', cache_limit=0) cookies = {'begin_referer': referer, 'prounder': 1} html = self._http_get(search_url, params=params, cookies=cookies, cache_limit=8) if any('jquery.js' in match.attrs['src'] for match in dom_parser2.parse_dom(html, 'script', req='src')): html = self._http_get(search_url, params=params, cookies=cookies, cache_limit=0) for _attrs, result in dom_parser2.parse_dom(html, 'div', {'class': 'cell'}): title_frag = dom_parser2.parse_dom(result, 'div', {'class': 'video_title'}) year_frag = dom_parser2.parse_dom(result, 'div', {'class': 'video_quality'}) if not title_frag: continue match = dom_parser2.parse_dom(title_frag[0].content, 'a', req='href') if not match: continue match_url = match[0].attrs['href'] match_title = match[0].content try: match = re.search('\s+(\d{4})\s+', year_frag[0].content) match_year = match.group(1) except: match_year = '' if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def __get_source_page(self, video_type, page_url): match = re.search('/movie/(.*?)-(\d+)\.html', page_url) if not match: return '', '', '' slug, movie_id = match.groups() vid_type = 'movie' if video_type == VIDEO_TYPES.MOVIE else 'series' qp_url = QP_URL.format(slug=slug, movie_id=movie_id, vid_type=vid_type) qp_url = scraper_utils.urljoin(self.base_url, qp_url) headers = {'Referer': scraper_utils.urljoin(self.base_url, page_url)} headers.update(XHR) html = self._http_get(qp_url, headers=headers, cache_limit=8) watching_url = dom_parser2.parse_dom(html, 'a', {'title': re.compile('View all episodes')}, req='href') if not watching_url: return '', '', '' watching_url = watching_url[0].attrs['href'] page_html = self._http_get(watching_url, headers={'Referer': scraper_utils.urljoin(self.base_url, page_url)}, cache_limit=8) for attrs, _content in dom_parser2.parse_dom(page_html, 'img', {'class': 'hidden'}, req='src'): _img = self._http_get(attrs['src'], headers={'Referer': watching_url}, cache_limit=8) sl_url = SL_URL.format(movie_id=movie_id) sl_url = scraper_utils.urljoin(self.base_url, sl_url) html = self._http_get(sl_url, headers=headers, cache_limit=8) js_data = scraper_utils.parse_json(html, sl_url) try: html = js_data['html'] except: html = '' return movie_id, watching_url, html
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) if video.video_type == VIDEO_TYPES.EPISODE: gk_html = ''.join(match.group(0) for match in re.finditer('<a[^>]*>(%s|Server \d+)</a>' % (video.episode), html, re.I)) else: gk_html = html link_url = scraper_utils.urljoin(self.base_url, LINK_URL) player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL) sources = scraper_utils.get_gk_links(self, gk_html, page_url, QUALITIES.HIGH, link_url, player_url) sources.update(self.__get_ht_links(html, page_url)) for stream_url, quality in sources.iteritems(): host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': direct = True else: host = urlparse.urlparse(stream_url).hostname direct = False if host is None: continue stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) headers = {'Referer': page_url} html = self._http_get(page_url, headers=headers, cache_limit=.5) if video.video_type == VIDEO_TYPES.MOVIE: fragment = dom_parser2.parse_dom(html, 'div', {'class': 'poster'}) if fragment: movie_url = dom_parser2.parse_dom(fragment[0].content, 'a', req='href') if movie_url: page_url = scraper_utils.urljoin(self.base_url, movie_url[0].attrs['href']) html = self._http_get(page_url, cache_limit=.5) episodes = self.__get_episodes(html) page_url = self.__get_best_page(episodes) if not page_url: return hosters else: page_url = scraper_utils.urljoin(self.base_url, page_url) html = self._http_get(page_url, cache_limit=.5) streams = dom_parser2.parse_dom(html, 'iframe', req='src') if streams: streams = [(attrs['src'], 480) for attrs, _content in streams] direct = False else: streams = [(attrs['src'], attrs.get('data-res', 480)) for attrs, _content in dom_parser2.parse_dom(html, 'source', req=['src'])] direct = True headers = {'User-Agent': scraper_utils.get_ua(), 'Referer': page_url} for stream_url, height in streams: if 'video.php' in stream_url or 'moviexk.php' in stream_url: if 'title=' in stream_url: title = stream_url.split('title=')[-1] stream_url = stream_url.replace(title, urllib.quote(title)) redir_url = self._http_get(stream_url, headers=headers, allow_redirect=False, method='HEAD', cache_limit=0) if redir_url.startswith('http'): redir_url = redir_url.replace(' ', '').split(';codec')[0] stream_url = redir_url else: continue if direct: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(height) stream_url += scraper_utils.append_headers(headers) else: host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.height_get_quality(height) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct} hosters.append(source) return hosters
def __login(self): url = scraper_utils.urljoin(self.base_url, '/apis/v2/user/login.json') data = {'email': self.username, 'password': self.password, 'rememberMe': True} referer = scraper_utils.urljoin(self.base_url, '/login') headers = {'Content-Type': 'application/json', 'Referer': referer} headers.update(XHR) html = super(self.__class__, self)._http_get(url, data=json.dumps(data), headers=headers, cache_limit=0) js_data = scraper_utils.parse_json(html, url) return js_data.get('status') == 'success'
def _get_episode_url(self, show_url, video): episode_pattern = 'href="([^"]+/season/%s/episode/%s/?)"' % (video.season, video.episode) title_pattern = 'href="(?P<url>[^"]+)"[^>]+title="(?:S\d+\s*E\d+:\s*)?(?P<title>[^"]+)' headers = {'Referer': scraper_utils.urljoin(self.base_url, show_url)} season_url = scraper_utils.urljoin(show_url, '/season/%s' % (video.season)) season_url = scraper_utils.urljoin(self.base_url, season_url) html = self._http_get(season_url, headers=headers, cache_limit=2) fragment = dom_parser2.parse_dom(html, 'div', {'id': 'episodes'}) return self._default_get_episode_url(fragment, video, episode_pattern, title_pattern)
def _get_episode_url(self, show_url, video): url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=24) show_id = dom_parser2.parse_dom(html, 'div', {'id': 'icerikid'}, req='value') if show_id: episode_pattern = 'href="([^"]*-%s-sezon-%s-bolum[^"]*)"' % (video.season, video.episode) title_pattern = 'href="(?P<url>[^"]+)[^>]*class="realcuf".*?class="realcuf">(?P<title>[^<]*)' season_url = scraper_utils.urljoin(self.base_url, SEASON_URL) data = {'sezon_id': video.season, 'dizi_id': show_id[0].attrs['value'], 'tip': 'dizi', 'bolumid': ''} html = self._http_get(season_url, data=data, headers=XHR, cache_limit=2) return self._default_get_episode_url(html, video, episode_pattern, title_pattern)
def _get_episode_url(self, show_url, video): url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=24) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'poster'}) if not fragment: return show_url = dom_parser2.parse_dom(fragment[0].content, 'a', req='href') if not show_url: return show_url = scraper_utils.urljoin(self.base_url, show_url[0].attrs['href']) html = self._http_get(show_url, cache_limit=2) fragment = dom_parser2.parse_dom(html, 'div', {'id': 'servers'}) episode_pattern = 'href="([^"]+)[^>]+>[Ee][Pp]\s*(?:[Ss]0*%s-)?E?p?0*%s(?!\d)' % (video.season, video.episode) return self._default_get_episode_url(fragment or html, video, episode_pattern)
def _http_get(self, url, params=None, data=None, multipart_data=None, headers=None, cookies=None, allow_redirect=True, method=None, require_debrid=False, read_error=False, cache_limit=8): real_url = scraper_utils.urljoin(self.base_url, url) html = super(self.__class__, self)._http_get(real_url, params=params, data=data, multipart_data=multipart_data, headers=headers, cookies=cookies, allow_redirect=allow_redirect, method=method, require_debrid=require_debrid, read_error=read_error, cache_limit=cache_limit) if self.__update_base_url(html): real_url = scraper_utils.urljoin(self.base_url, url) html = super(self.__class__, self)._http_get(real_url, params=params, data=data, multipart_data=multipart_data, headers=headers, cookies=cookies, allow_redirect=allow_redirect, method=method, require_debrid=require_debrid, read_error=read_error, cache_limit=cache_limit) return html
def _get_episode_url(self, show_url, video): show_url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(show_url, headers={'Referer': self.base_url}, cache_limit=24 * 7) match = re.search('href="([^"]*season=0*%s(?!\d))[^"]*' % (video.season), html) if not match: return episode_pattern = 'href="([^"]*/0*%s-0*%s/[^"]*)' % (video.season, video.episode) title_pattern = 'href="(?P<url>[^"]+)[^>]*>\s*(?P<title>.*?)\s*</a>' season_url = scraper_utils.urljoin(show_url, match.group(1)) html = self._http_get(season_url, headers={'Referer': show_url}, cache_limit=2) episodes = dom_parser2.parse_dom(html, 'div', {'class': 'episodeDetail'}) fragment = '\n'.join(ep.content for ep in episodes) return self._default_get_episode_url(fragment, video, episode_pattern, title_pattern)
def get_sources(self, video): source_url = self.get_url(video) sources = [] if not source_url or source_url == FORCE_NO_MATCH: return sources page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) for attrs, _content in dom_parser2.parse_dom(html, 'img', req=['data-id', 'data-name']): film_id, data_name = attrs['data-id'], attrs['data-name'] data = {'id': film_id, 'n': data_name} server_url = scraper_utils.urljoin(self.base_url, SERVER_URL) server_url = server_url % (film_id) headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(server_url, data=data, headers=headers, cache_limit=.5) for attrs, _content in dom_parser2.parse_dom(html, 'a', req='data-id'): data = {'epid': attrs['data-id']} ep_url = scraper_utils.urljoin(self.base_url, EP_URL) ep_url = ep_url % (attrs['data-id']) headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(ep_url, data=data, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, ep_url) try: links = [r.attrs['src'] for r in dom_parser2.parse_dom(js_data['link']['embed'], 'iframe', req='src')] except: try: links = js_data['link']['l'] except: links = [] try: heights = js_data['link']['q'] except: heights = [] for stream_url, height in map(None, links, heights): match = re.search('movie_url=(.*)', stream_url) if match: stream_url = match.group(1) host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url}) direct = True else: host = urlparse.urlparse(stream_url).hostname if height: quality = scraper_utils.height_get_quality(height) else: quality = QUALITIES.HD720 direct = False source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct} sources.append(source) return sources
def __add_torrent(self, hash_id): list_url = scraper_utils.urljoin(self.base_url, LIST_URL) js_data = self._json_get(list_url, cache_limit=0) for transfer in js_data.get('transfers', []): if transfer['hash'].lower() == hash_id: return True add_url = scraper_utils.urljoin(self.base_url, ADD_URL) data = {'src': MAGNET_LINK % hash_id} js_data = self._json_get(add_url, data=data, cache_limit=0) if js_data.get('status') == 'success': return True else: return False
def resolve_link(self, link): url = scraper_utils.urljoin(self.base_url, link) html = self._http_get(url, cache_limit=.5) match = re.search('"file"\s*:\s*"([^"]+)', html) if match: file_link = match.group(1) stream_url = scraper_utils.urljoin(self.base_url, file_link) cj = self._set_cookies(self.base_url, {}) request = urllib2.Request(stream_url) request.add_header('User-Agent', scraper_utils.get_ua()) request.add_unredirected_header('Host', request.get_host()) request.add_unredirected_header('Referer', url) cj.add_cookie_header(request) response = urllib2.urlopen(request) return response.geturl()
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) hosters += self.__get_sources(html, url) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'parts-middle'}) if fragment: for attrs, _content in dom_parser2.parse_dom(fragment[0].content, 'a', req='href'): url = scraper_utils.urljoin(self.base_url, attrs['href']) html = self._http_get(url, cache_limit=8) hosters += self.__get_sources(html, url) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=False, cache_limit=.5) q_str = '' match = re.search('class="entry-title">([^<]+)', html) if match: q_str = match.group(1) pattern = 'href="?([^" ]+)(?:[^>]+>){2}\s+\|' for match in re.finditer(pattern, html, re.DOTALL): url = match.group(1) if 'adf.ly' in url: continue hoster = { 'multi-part': False, 'class': self, 'views': None, 'url': url, 'rating': None, 'quality': None, 'direct': False } hoster['host'] = urlparse.urlsplit(url).hostname hoster['quality'] = scraper_utils.blog_get_quality( video, q_str, hoster['host']) hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) match = re.search('Views?\s*:\s*(\d+)', html, re.I) if match: views = match.group(1) else: views = None pattern = 'href="[^"]+/rd\.html\?url=([^"]+)' for match in re.finditer(pattern, html): url = match.group(1) host = urlparse.urlsplit(url).hostname hoster = { 'multi-part': False, 'host': host, 'url': url, 'class': self, 'rating': None, 'views': views, 'quality': scraper_utils.get_quality(video, host, QUALITIES.HIGH), 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, headers=self.headers, require_debrid=False, cache_limit=.5) sources = self.__get_post_links(html, video) for source in sources: if scraper_utils.excluded_link(source): continue host = urlparse.urlparse(source).hostname hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': sources[source], 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) sources = self.__get_post_links(html) for source, value in sources.iteritems(): if scraper_utils.excluded_link(source): continue host = urlparse.urlparse(source).hostname if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(value['release']) else: meta = scraper_utils.parse_episode_link(value['release']) quality = scraper_utils.height_get_quality(meta['height']) hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': quality, 'direct': False } if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_in = 'm' if video_type == VIDEO_TYPES.MOVIE else 't' search_url = scraper_utils.urljoin(self.base_url, '/search') html = self._http_get(search_url, data={ 'searchquery': title, 'searchin': search_in }, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'search-page'}) if not fragment: return results fragment = dom_parser2.parse_dom(fragment[0].content, 'table') if not fragment: return results for attrs, match_title_year in dom_parser2.parse_dom( fragment[0].content, 'a', req='href'): match_url = attrs['href'] match_title, match_year = scraper_utils.extra_year( match_title_year) if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=False, cache_limit=.5) for source, values in self.__get_post_links(html).iteritems(): if scraper_utils.excluded_link(source): continue host = urlparse.urlparse(source).hostname release = values['release'] quality = scraper_utils.blog_get_quality(video, release, host) hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': quality, 'direct': False } if 'X265' in release or 'HEVC' in release: hoster['format'] = 'x265' hosters.append(hoster) return hosters
def __find_episode(self, show_url, video): url = scraper_utils.urljoin(self.tv_base_url, show_url) html = self._http_get(url, cache_limit=2) hashes = [] for attrs, _magnet in dom_parser2.parse_dom(html, 'a', {'class': 'magnet'}, req=['href', 'title']): magnet_link, magnet_title = attrs['href'], attrs['title'] match = re.search('urn:btih:(.*?)(?:&|$)', magnet_link, re.I) if match: magnet_title = re.sub(re.compile('\s+magnet\s+link', re.I), '', magnet_title) hashes.append((match.group(1), magnet_title)) episode_pattern = 'S%02d\s*E%02d' % (int( video.season), int(video.episode)) if video.ep_airdate: airdate_pattern = '%d{delim}%02d{delim}%02d'.format(delim=DELIM) airdate_pattern = airdate_pattern % (video.ep_airdate.year, video.ep_airdate.month, video.ep_airdate.day) else: airdate_pattern = '' matches = [ link for link in hashes if re.search(episode_pattern, link[1], re.I) ] if not matches and airdate_pattern: matches = [ link for link in hashes if re.search(airdate_pattern, link[1]) ] return matches
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters query = scraper_utils.parse_query(source_url) if 'id' in query: vid_type = 'movies' if video.video_type == VIDEO_TYPES.MOVIE else 'episodes' url = scraper_utils.urljoin( self.base_url, '/api/v2/%s/%s' % (vid_type, query['id'])) js_data = self._http_get(url, cache_limit=.5) if 'url' in js_data: stream_url = js_data['url'] quality = QUALITIES.HD720 hoster = { 'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'url': stream_url, 'quality': quality, 'views': None, 'rating': None, 'direct': True } hosters.append(hoster) return hosters
def _get_episode_url(self, show_url, video): episode_pattern = 'href="([^"]+s0*%s-?e0*%s[^"]+)' % (video.season, video.episode) title_pattern = 'href="(?P<url>[^"]+-s\d+-?e\d+-(?P<title>[^/"]*)[^"]*)' show_url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(show_url, cache_limit=2) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'entry'}) return self._default_get_episode_url(fragment or html, video, episode_pattern, title_pattern)
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] if video_type == VIDEO_TYPES.MOVIE: url = '/api/v2/movies' key = 'movies' else: url = '/api/v2/shows' key = 'shows' url = scraper_utils.urljoin(self.base_url, url) js_data = self._http_get(url, cache_limit=8) norm_title = scraper_utils.normalize_title(title) if key in js_data: for item in js_data[key]: match_title = item['name'] match_year = item.get('year', '') match_url = '?id=%s' % (item['id']) if norm_title in scraper_utils.normalize_title( match_title) and (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/index.php') data = {'subaction': 'search', 'story': title, 'do': 'search'} headers = {'Referer': search_url} html = self._http_get(search_url, params={'do': 'search'}, data=data, headers=headers, cache_limit=1) fragment = dom_parser2.parse_dom(html, 'div', {'id': 'dle-content'}) if not fragment: return results for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'div', {'class': 'short-film'}): match = re.search('<h5><a\s+href="([^"]+)[^>]+title="([^"]+)', item) if not match: continue url, match_title = match.groups('') result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': '' } results.append(result) return results
def get_sources(self, video): hosters = [] sources = {} source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'entry-content'}) if fragment: for _attrs, td in dom_parser2.parse_dom(fragment[0].content, 'td'): for attrs, _content in dom_parser2.parse_dom(td, 'a', req='href'): meta = scraper_utils.parse_episode_link(attrs['href']) sources[attrs['href']] = scraper_utils.height_get_quality( meta['height']) for source, values in sources.iteritems(): if scraper_utils.excluded_link(source): continue host = urlparse.urlparse(source).hostname hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': values, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) pattern = "/w\.php\?q=([^']+)" seen_hosts = {} for match in re.finditer(pattern, html, re.DOTALL): url = match.group(1) hoster = { 'multi-part': False, 'url': url.decode('base-64'), 'class': self, 'quality': None, 'views': None, 'rating': None, 'direct': False } hoster['host'] = urlparse.urlsplit(hoster['url']).hostname # top list is HD, bottom list is SD if hoster['host'] in seen_hosts: quality = QUALITIES.HIGH else: quality = QUALITIES.HD720 seen_hosts[hoster['host']] = True hoster['quality'] = scraper_utils.get_quality( video, hoster['host'], quality) hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) quality = None match = re.search('Links\s+-\s+Quality\s*([^<]*)</h1>', html, re.DOTALL | re.I) if match: quality = QUALITY_MAP.get(match.group(1).strip().upper()) fragment = dom_parser2.parse_dom(html, 'div', {'id': 'links'}) if not fragment: return hosters for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'ul'): stream_url = dom_parser2.parse_dom(item, 'a', req='href') host = dom_parser2.parse_dom(item, 'li', {'id': 'download'}) if not stream_url or not host: continue stream_url = stream_url[0].attrs['href'] host = host[-1].content hoster = {'multi-part': False, 'host': host, 'class': self, 'url': stream_url, 'quality': scraper_utils.get_quality(video, host, quality), 'rating': None, 'views': None, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) for _attrs, fragment in dom_parser2.parse_dom( html, 'div', {'class': 'videoPlayer'}): for attrs, _content in dom_parser2.parse_dom(fragment, 'iframe', req='src'): html = self._http_get(attrs['src'], headers={'Referer': page_url}, cache_limit=.5) match = re.search('downloadUrl\s*=\s*"([^"]+)', html) if match: stream_url = match.group(1) host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = QUALITIES.HIGH hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] data = { 'hash': 'indexert', 'do': 'search', 'subaction': 'search', 'search_start': 0, 'full_search': 0, 'result_from': 1, 'story': title } search_url = scraper_utils.urljoin(self.base_url, 'index.php') html = self._http_get(search_url, params={'do': 'search'}, data=data, cache_limit=8) if dom_parser2.parse_dom(html, 'div', {'class': 'sresult'}): for _attrs, item in dom_parser2.parse_dom( html, 'div', {'class': 'short_content'}): match_url = dom_parser2.parse_dom(item, 'a', req='href') match_title_year = dom_parser2.parse_dom( item, 'div', {'class': 'short_header'}) if match_url and match_title_year: match_url = match_url[0].attrs['href'] match_title, match_year = scraper_utils.extra_year( match_title_year[0].content) if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'tbody') if fragment: fragment = fragment[0].content for attrs, content in dom_parser2.parse_dom(fragment, 'a', req='href'): stream_url = attrs['href'] match = dom_parser2.parse_dom(content, 'img') if not match: continue host = match[0].content.strip() quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) for attrs, _content in dom_parser2.parse_dom(html, 'a', req='href'): stream_url = attrs['href'] if MOVIE_URL in stream_url: meta = scraper_utils.parse_movie_link(stream_url) stream_url = scraper_utils.pathify_url( stream_url) + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) quality = scraper_utils.height_get_quality(meta['height']) hoster = { 'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] html = self._http_get(self.base_url, cache_limit=48) norm_title = scraper_utils.normalize_title(title) fragment = dom_parser2.parse_dom(html, 'select', {'id': 'cat'}) if fragment: for attrs, label in dom_parser2.parse_dom(fragment[0].content, 'option', {'class': 'level-0'}, req='value'): label = scraper_utils.cleanse_title(label) label = re.sub('\s+\(\d+\)$', '', label) if norm_title in scraper_utils.normalize_title(label): cat_url = scraper_utils.urljoin( self.base_url, '/?cat=%s' % (attrs['value'])) html = self._http_get(cat_url, allow_redirect=False, cache_limit=8) if html.startswith('http'): cat_url = html result = { 'url': scraper_utils.pathify_url(cat_url), 'title': label, 'year': '' } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable if not self.include_paid and video_type != VIDEO_TYPES.MOVIE: return [] search_url = scraper_utils.urljoin(self.base_url, '/search.php') html = self._http_get(search_url, params={'q': title}, cache_limit=.25) results = [] if video_type == VIDEO_TYPES.MOVIE: pattern = '<i>\s*Movies\s*</i>(.*)' else: pattern = '<i>\s*TV Series\s*</i>(.*)' match = re.search(pattern, html) if not match: return results container = match.group(1) pattern = "href='([^']+)'>([^<]+)\s*</a>\s*(?:\((\d{4})\))?" for match in re.finditer(pattern, container): url, match_title, match_year = match.groups('') if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def __login(self): url = scraper_utils.urljoin(self.base_url, '/api/v1/user/login') data = {'user': self.username, 'password': self.password} headers = {'Content-Type': 'application/json'} html = self._http_get(url, data=json.dumps(data), headers=headers, cache_limit=0) js_data = scraper_utils.parse_json(html, url) if 'user' not in js_data: raise Exception('sit2play login failed')
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin( self.base_url, '/search/%s' % (urllib.quote(title))) html = self._http_get(search_url, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'div', {'id': 'who-likes'}) if not fragment: return results fragment = fragment[0].content match_url = dom_parser2.parse_dom(fragment, 'a', req='href') match_title_year = dom_parser2.parse_dom(fragment, 'img', req='alt') if match_url and match_title_year: match_url = match_url[0].attrs['href'] match_title_year = match_title_year[0].attrs['alt'] match_title, match_year = scraper_utils.extra_year( match_title_year) if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def _get_episode_url(self, show_url, video): episode_pattern = 'href="([^"]*-[Ss]0*%s[Ee]0*%s-[^"]+)' % (video.season, video.episode) title_pattern = 'class="head".*?</span>(?P<title>.*?)</a>.*?href="(?P<url>[^"]+)' show_url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(show_url, cache_limit=2) fragment = dom_parser2.parse_dom(html, 'div', {'id': 'season-wrapper'}) return self._default_get_episode_url(fragment, video, episode_pattern, title_pattern)
def __get_direct_links(self, iframe_url, page_url): sources = [] headers = {'Referer': page_url} html = self._http_get(iframe_url, headers=headers, cache_limit=.5) # if captions exist, then they aren't hardcoded subs = '' if re.search('kind\s*:\s*"captions"', html) else 'Turkish subtitles' streams = scraper_utils.parse_sources_list(self, html, key='VideoSources') streams.update(scraper_utils.parse_sources_list(self, html, var='video')) for stream_url in streams: quality = streams[stream_url]['quality'] if 'v.asp' in stream_url: stream_url = scraper_utils.urljoin(self.base_url, stream_url) stream_redirect = self._http_get(stream_url, allow_redirect=False, method='HEAD', cache_limit=0) if stream_redirect.startswith('http'): stream_url = stream_redirect sources.append({'stream_url': stream_url, 'subs': subs, 'quality': quality, 'direct': True}) if sources: return sources iframe_url = dom_parser2.parse_dom(html, 'iframe', req='src') if not iframe_url: return sources sources.append({'stream_url': iframe_url[0].attrs['src'], 'subs': subs, 'quality': QUALITIES.HD720, 'direct': False}) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters js_url = scraper_utils.urljoin(self.base_url, '/javascript/movies.js') html = self._http_get(js_url, cache_limit=48) if source_url.startswith('/'): source_url = source_url[1:] pattern = '''getElementById\(\s*"%s".*?play\(\s*'([^']+)''' % (source_url) match = re.search(pattern, html, re.I) if match: stream_url = match.group(1) if 'drive.google' in stream_url or 'docs.google' in stream_url: sources = scraper_utils.parse_google(self, stream_url) else: sources = [stream_url] for source in sources: stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) host = scraper_utils.get_direct_hostname(self, source) if host == 'gvideo': quality = scraper_utils.gv_get_quality(source) direct = True elif 'youtube' in stream_url: quality = QUALITIES.HD720 direct = False host = 'youtube.com' else: quality = QUALITIES.HIGH direct = True hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) sources = [] if not source_url or source_url == FORCE_NO_MATCH: return sources url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) is_3d = False page_quality = QUALITIES.HD720 title = dom_parser2.parse_dom(html, 'title') if title: title = title[0].content match = re.search('(\d{3,})p', title) if match: page_quality = scraper_utils.height_get_quality(match.group(1)) is_3d = True if re.search('\s+3D\s+', title) else False fragment = dom_parser2.parse_dom(html, 'div', {'class': 'entry'}) if fragment: for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'h3'): for attrs, _content in dom_parser2.parse_dom(item, 'a', req='href'): stream_url = attrs['href'] host = urlparse.urlparse(stream_url).hostname source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': page_quality, 'views': None, 'rating': None, 'direct': False} source['format'] = 'x265' source['3D'] = is_3d sources.append(source) return sources
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/movies.php') cookies = {'onlylanguage': 'en', 'lang': 'en'} params = {'list': 'search', 'search': title} html = self._http_get(search_url, params=params, cookies=cookies, cache_limit=8) for _attrs, content in dom_parser2.parse_dom(html, 'TR', {'id': re.compile('coverPreview\d+')}): match = dom_parser2.parse_dom(content, 'a', req='href') if not match: continue match_url, match_title = match[0].attrs['href'], match[0].content is_show = re.search('\(tvshow\)', match_title, re.I) if (video_type == VIDEO_TYPES.MOVIE and is_show) or (video_type == VIDEO_TYPES.TVSHOW and not is_show): continue match_title = match_title.replace('(TVshow)', '') match_title = match_title.strip() match_year = '' for _attrs, div in dom_parser2.parse_dom(content, 'div'): match = re.match('\s*(\d{4})\s*', div) if match: match_year = match.group(1) if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) if video.video_type == VIDEO_TYPES.EPISODE: html = self.__get_episode_fragment(html, video) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'linkTr'}): stream_url = dom_parser2.parse_dom(item, 'div', {'class': 'linkHiddenUrl'}) q_str = dom_parser2.parse_dom(item, 'div', {'class': 'linkQualityText'}) if stream_url and q_str: stream_url = stream_url[0].content q_str = q_str[0].content host = urlparse.urlparse(stream_url).hostname base_quality = QUALITY_MAP.get(q_str, QUALITIES.HIGH) quality = scraper_utils.get_quality(video, host, base_quality) source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': False } hosters.append(source) return hosters
def _get_episode_url(self, show_url, video): query = scraper_utils.parse_query(show_url) if 'id' in query: url = scraper_utils.urljoin(self.base_url, '/api/v2/shows/%s' % (query['id'])) js_data = self._http_get(url, cache_limit=.5) if 'episodes' in js_data: force_title = scraper_utils.force_title(video) if not force_title: for episode in js_data['episodes']: if int(video.season) == int(episode['season']) and int(video.episode) == int(episode['number']): return scraper_utils.pathify_url('?id=%s' % (episode['id'])) if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate: for episode in js_data['episodes']: if 'airdate' in episode: ep_airdate = scraper_utils.to_datetime(episode['airdate'], "%Y-%m-%d").date() if video.ep_airdate == (ep_airdate - datetime.timedelta(days=1)): return scraper_utils.pathify_url('?id=%s' % (episode['id'])) else: logger.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title: norm_title = scraper_utils.normalize_title(video.ep_title) for episode in js_data['episodes']: if 'name' in episode and norm_title in scraper_utils.normalize_title(episode['name']): return scraper_utils.pathify_url('?id=%s' % (episode['id']))
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] seen_urls = set() search_url = scraper_utils.urljoin(self.base_url, '/yabanci-diziler/') html = self._http_get(search_url, cache_limit=48) norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'category-post'}): match_url = dom_parser2.parse_dom(item, 'a', req='href') match_title = dom_parser2.parse_dom(item, 'h3') if match_url and match_title: match_url = scraper_utils.pathify_url( match_url[0].attrs['href']) match_title = match_title[0].content if match_url in seen_urls: continue seen_urls.add(match_url) if norm_title in scraper_utils.normalize_title(match_title): result = { 'url': match_url, 'title': scraper_utils.cleanse_title(match_title), 'year': '' } results.append(result) return results
def _get_episode_url(self, show_url, video): episode_pattern = 'href="([^"]+[sS]%s-?[eE]%s(?!\d)[^"]*)"' % (video.season, video.episode) title_pattern = 'href="(?P<url>[^"]+)(?:[^>]*>){3}\s*S\d+\s+Episode\s+\d+\s*:\s*(?P<title>[^<]+)' show_url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(show_url, cache_limit=2) fragment = dom_parser2.parse_dom(html, 'div', {'id': 'accordion'}) return self._default_get_episode_url(fragment, video, episode_pattern, title_pattern)
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'}) if fragment: html = fragment[0].content links = scraper_utils.parse_sources_list(self, html) for link in links: stream_url = link if self.base_url in link: redir_url = self._http_get(link, headers={'Referer': url}, allow_redirect=False, method='HEAD') if redir_url.startswith('http'): stream_url = redir_url host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = links[link]['quality'] stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': url}) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True} hosters.append(source) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=1) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'player'}) if not fragment: return hosters iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src') if not iframe_url: return hosters html = self._http_get(iframe_url[0].attrs['src'], cache_limit=.25) sources.append(self.__get_embedded_sources(html)) sources.append(self.__get_linked_sources(html)) for source in sources: for stream_url in source['sources']: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) quality = scraper_utils.gv_get_quality(stream_url) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hoster['subs'] = source.get('subs', True) hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) match = re.search('This movie is of poor quality', html, re.I) if match: quality = QUALITIES.LOW else: quality = QUALITIES.HIGH for match in re.finditer('href="([^"]+/embed\d*/[^"]+)', html): url = match.group(1) embed_html = self._http_get(url, cache_limit=.5) hosters += self.__get_links(embed_html) pattern = 'href="([^"]+)[^>]*>\s*<[^>]+play_video.gif' for match in re.finditer(pattern, html, re.I): stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, quality) hoster = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'rating': None, 'views': None, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): sources = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return sources object_id = self.__extract_id(source_url) if object_id is None: return sources source_url = TITLE_URL.format(id=object_id) page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._authed_http_get(page_url, cache_limit=.5) js_data = scraper_utils.parse_json(html, page_url) if video.video_type == VIDEO_TYPES.MOVIE: links = js_data.get('links', {}) else: links = self.__episode_match(js_data, video) prefix = js_data.get('domain', {}).get('prefix') suffix = js_data.get('domain', {}).get('suffix') for key, path in links.get('links', {}).iteritems(): for mirror in sorted(list(set(links.get('mirrors', [])))): stream_url = TEMPLATE.format(prefix=prefix, mirror=mirror, suffix=suffix, path=path) host = scraper_utils.get_direct_hostname(self, stream_url) quality = Q_MAP.get(key, QUALITIES.HIGH) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True} source['version'] = '(Mirror %d)' % (mirror) sources.append(source) return sources
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search-movies/%s.html' % (urllib.quote_plus(title))) html = self._http_get(search_url, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'li', {'class': 'item'}): match_url = dom_parser2.parse_dom(item, 'a', req='href') match_title_year = re.search('onmouseover="([^"]+)', item) if match_url and match_title_year: match_url = match_url[0].attrs['href'] match_title_year = match_title_year.group(1) match = re.search('<b>(?:<i>)?\s*(.*?)\s*(?:</i>)?</b>', match_title_year) if not match: continue match_title, match_year = scraper_utils.extra_year(match.group(1)) is_season = re.search('season\s+(\d+)', match_title_year, re.I) if (is_season and video_type == VIDEO_TYPES.MOVIE) or (not is_season and video_type == VIDEO_TYPES.SEASON): continue if video_type == VIDEO_TYPES.MOVIE: if not match_year: match_year = re.search('>Release:\s*(\d{4})', match_title_year) match_year = match_year.group(1) if match_year else '' else: if season and int(season) != int(is_season.group(1)): continue if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)} results.append(result) return results
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, headers=XHR, cache_limit=8) js_data = scraper_utils.parse_json(html, url) quality = Q_MAP.get( js_data.get('Key', {}).get('MovieDefinition'), QUALITIES.HIGH) value = js_data.get('Value', {}) stream_url = value.get('VideoLink') if stream_url and value.get('ProviderSource', '').lower() == 'youtube': host = 'youtube.com' source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': False } hosters.append(source) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/bestmatch-fund-movies-%s.html') search_title = title.replace(' ', '-') search_title = re.sub('[^A-Za-z0-9-]', '', search_title).lower() search_url = search_url % (search_title) html = self._http_get(search_url, cache_limit=1) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'thumbsTitle'}): match = dom_parser2.parse_dom(item, 'a', req='href') if not match: continue match_url, match_title_year = match[0].attrs['href'], match[ 0].content match_title, match_year = scraper_utils.extra_year( match_title_year) if (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def _get_episode_url(self, show_url, video): season_url = show_url if video.season != 1: show_url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(show_url, cache_limit=24) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'page-numbers'}) if fragment: match = re.search('href="([^"]+-%s-sezon[^"]*)' % (video.season), fragment[0].content) if match: season_url = match.group(1) episode_pattern = '''href=['"]([^'"]+-%s-sezon-%s-bolum[^'"]*)''' % (video.season, video.episode) season_url = scraper_utils.urljoin(self.base_url, season_url) html = self._http_get(season_url, cache_limit=2) fragment = dom_parser2.parse_dom(html, 'ul', {'class': 'posts-list'}) return self._default_get_episode_url(fragment or html, video, episode_pattern)
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search/searchBoxSuggestion') html = self._http_get(search_url, params={ 'top': 8, 'query': title }, cache_limit=8) js_data = scraper_utils.parse_json(html, search_url) for item in js_data: entityName = match_title_year = item.get('Value', '') if entityName: match_title, match_year2 = scraper_utils.extra_year( match_title_year) match_year = str(item.get('ReleaseYear', '')) if not match_year: match_year = match_year2 match_url = '/ontology/EntityDetails?' + urllib.urlencode( { 'entityName': entityName, 'ignoreMediaLinkError': 'false' }) if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def _get_episode_url(self, show_url, video): force_title = scraper_utils.force_title(video) title_fallback = kodi.get_setting('title-fallback') == 'true' norm_title = scraper_utils.normalize_title(video.ep_title) page_url = [show_url] too_old = False while page_url and not too_old: url = scraper_utils.urljoin(self.base_url, page_url[0]) html = self._http_get(url, require_debrid=True, cache_limit=1) headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html) posts = [r.content for r in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})] for heading, post in zip(headings, posts): if self.__too_old(post): too_old = True break if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post: url, title = heading if not force_title: if scraper_utils.release_check(video, title, require_title=False): return scraper_utils.pathify_url(url) else: if title_fallback and norm_title: match = re.search('<strong>(.*?)</strong>', post) if match and norm_title == scraper_utils.normalize_title(match.group(1)): return scraper_utils.pathify_url(url) page_url = dom_parser2.parse_dom(html, 'a', {'class': 'nextpostslink'}, req='href') if page_url: page_url = [page_url[0].attrs['href']]
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) headers = {'User-Agent': LOCAL_UA} html = self._http_get(url, require_debrid=False, headers=headers, cache_limit=.5) for match in re.finditer( "<span\s+class='info2'(.*?)(<span\s+class='info|<hr\s*/>)", html, re.DOTALL): for match2 in re.finditer('href="([^"]+)', match.group(1)): stream_url = match2.group(1) meta = scraper_utils.parse_episode_link(stream_url) quality = scraper_utils.height_get_quality(meta['height']) host = urlparse.urlparse(stream_url).hostname hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False } hosters.append(hoster) return hosters