def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'film-container'}) if fragment: iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src') if iframe_url: iframe_url = scraper_utils.urljoin(self.base_url, iframe_url[0].attrs['src']) headers = {'Referer': page_url} html = self._http_get(iframe_url, headers=headers, cache_limit=.5) sources = scraper_utils.parse_sources_list(self, html) for source in sources: quality = sources[source]['quality'] host = scraper_utils.get_direct_hostname(self, source) stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': iframe_url}) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} match = re.search('(\d+[a-z]bps)', source) if match: hoster['extra'] = match.group(1) hosters.append(hoster) hosters.sort(key=lambda x: x.get('extra', ''), reverse=True) return hosters
def _get_episode_url(self, show_url, video): show_url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(show_url, cache_limit=8) pattern = '''href=['"]([^'"]+)[^>]+>\s*%s\.\s*Sezon<''' % ( video.season) match = re.search(pattern, html) if match: episode_pattern = '''href=['"]([^'"]+-%s-sezon-%s-bolum[^'"]*)''' % ( video.season, video.episode) season_url = scraper_utils.urljoin(self.base_url, match.group(1)) html = self._http_get(season_url, cache_limit=2) ep_url = self._default_get_episode_url(html, video, episode_pattern) if ep_url: return ep_url # front page fallback html = self._http_get(self.base_url, cache_limit=2) for slug in reversed(show_url.split('/')): if slug: break ep_url_frag = 'href="([^"]+/{slug}-{season}-sezon-{episode}-bolum[^"]*)'.format( slug=slug, season=video.season, episode=video.episode) match = re.search(ep_url_frag, html) if match: return scraper_utils.pathify_url(match.group(1))
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) entry = '' while True: html = self._http_get(url, cache_limit=.5) if not html: url = scraper_utils.urljoin(BASE_URL2, source_url) html = self._http_get(url, cache_limit=.5) entry = dom_parser2.parse_dom(html, 'div', {'class': 'entry'}) if entry: entry = entry[0].content match = re.search('Watch it here\s*:.*?href="([^"]+)', entry, re.I) if not match: break url = match.group(1) else: entry = '' break for _attribs, tab in dom_parser2.parse_dom(entry, 'div', {'class': 'postTabs_divs'}): match = dom_parser2.parse_dom(tab, 'iframe', req='src') if not match: continue link = match[0].attrs['src'] host = urlparse.urlparse(link).hostname hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.get_quality(video, host, QUALITIES.HIGH), 'views': None, 'rating': None, 'url': link, 'direct': False} hosters.append(hoster) return hosters
def _get_episode_url(self, show_url, video): show_url = scraper_utils.urljoin(self.base_url, show_url) headers = {'Referer': self.base_url} html = self._http_get(show_url, headers=headers, cache_limit=.25) data = dom_parser2.parse_dom(html, 'div', {'id': 'dizidetay'}, req=['data-dizi', 'data-id']) if not data: return episode_pattern = '''href=['"]([^'"]*/%s-sezon-%s-[^'"]*bolum[^'"]*)''' % ( video.season, video.episode) title_pattern = '''href=['"](?P<url>[^'"]+)[^>]*>(?P<title>[^<]+)''' airdate_pattern = '''href=['"]([^"']+)[^>]*>[^<]*</a>\s*</td>\s*<td class="right aligned">{p_day}\.{p_month}\.{year}''' season_url = scraper_utils.urljoin(self.base_url, SEASON_URL) queries = { 'sekme': 'bolumler', 'id': data[0].attrs['data-id'], 'dizi': data[0].attrs['data-dizi'] } headers = {'Referer': show_url, 'Content-Length': 0} headers.update(XHR) html = self._http_get(season_url, params=queries, headers=headers, method='POST', cache_limit=2) result = self._default_get_episode_url(html, video, episode_pattern, title_pattern, airdate_pattern) if result and 'javascript:;' not in result: return result
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search/') search_url = scraper_utils.urljoin(search_url, urllib.quote_plus(title)) html = self._http_get(search_url, require_debrid=True, cache_limit=8) for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'list'}): if not dom_parser2.parse_dom(fragment, 'div', {'class': 'lists_titles'}): continue for attrs, match_title_year in dom_parser2.parse_dom( fragment, 'a', {'class': 'title'}, req='href'): match_url = attrs['href'] match_title_year = re.sub('</?[^>]*>', '', match_title_year) is_show = re.search('\(d{4|-\)', match_title_year) if (is_show and video_type == VIDEO_TYPES.MOVIE) or ( not is_show and video_type == VIDEO_TYPES.TVSHOW): continue match_title, match_year = scraper_utils.extra_year( match_title_year) if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) if not html: url = scraper_utils.urljoin(self.old_base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) sources.update(self.__get_post_links(html, video)) if kodi.get_setting('%s-include_comments' % (self.get_name())) == 'true': for _attrs, comment in dom_parser2.parse_dom( html, 'div', {'id': re.compile('commentbody-\d+')}): sources.update(self.__get_comment_links(comment, video)) for source in sources: if scraper_utils.excluded_link(source): continue host = urlparse.urlparse(source).hostname hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': sources[source], 'direct': False } hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] if video_type == VIDEO_TYPES.MOVIE: url = scraper_utils.urljoin(self.base_url, '/movies/a-z/') else: url = scraper_utils.urljoin(self.base_url, '/tv/a-z/') if title.upper().startswith('THE '): search_title = title[4:5] elif title.upper().startswith('A '): search_title = title[2:3] else: search_title = title if title[:1] in string.digits: first_letter = '1' else: first_letter = search_title[:1] url = url + first_letter.upper() html = self._http_get(url, cache_limit=48) norm_title = scraper_utils.normalize_title(title) pattern = 'class=star.*?href=([^>]+)>(.*?)</a>' for match in re.finditer(pattern, html, re.DOTALL): match_url, match_title_year = match.groups() match_title, match_year = scraper_utils.extra_year(match_title_year) if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year): result = {'url': match_url, 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = scraper_utils.urljoin(self.base_url, '/movies/search') html = self._http_get(search_url, params={'s': title}, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'item_movie'}): match = dom_parser2.parse_dom(item, 'a', req=['href', 'title']) if not match: continue match_title_year = match[0].attrs['title'] match_url = match[0].attrs['href'] is_season = re.search('S(?:eason\s+)?(\d+)', match_title_year, re.I) match_vt = video_type == (VIDEO_TYPES.MOVIE and not is_season) or (video_type == VIDEO_TYPES.SEASON and is_season) match_year = '' if video_type == VIDEO_TYPES.SEASON: if not season and not match_vt: continue if match_vt: if season and int(is_season.group(1)) != int(season): continue else: if season and int(season) != 1: continue site_title, site_year = scraper_utils.extra_year(match_title_year) if scraper_utils.normalize_title(site_title) not in scraper_utils.normalize_title(title) or year != site_year: continue match_title = match_title_year else: if not match_vt: continue match_title, match_year = scraper_utils.extra_year(match_title_year) match_url = scraper_utils.urljoin(match_url, 'watching.html') if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def __login(self): url = scraper_utils.urljoin(self.base_url, '/apis/v2/user/login.json') data = {'email': self.username, 'password': self.password, 'rememberMe': True} referer = scraper_utils.urljoin(self.base_url, '/login') headers = {'Content-Type': 'application/json', 'Referer': referer} headers.update(XHR) html = super(self.__class__, self)._http_get(url, data=json.dumps(data), headers=headers, cache_limit=0) js_data = scraper_utils.parse_json(html, url) return js_data.get('status') == 'success'
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) views = None fragment = dom_parser2.parse_dom( html, 'img', {'src': re.compile('[^"]*view_icon.png')}) if fragment: match = re.search('(\d+)', fragment[0].content) if match: views = match.group(1) match = re.search('href="([^"]+-full-movie-[^"]+)', html) if match: url = match.group(1) html = self._http_get(url, cache_limit=.5) sources = self.__get_embedded(html) for link in dom_parser2.parse_dom(html, 'span', {'class': 'btn-eps'}, req='link'): link = link.attrs['link'] ajax_url = scraper_utils.urljoin(self.base_url, AJAX_URL) headers = {'Referer': url} headers.update(XHR) html = self._http_get(ajax_url, params={'v': link}, headers=headers, cache_limit=.5) sources.update(self.__get_sources(html)) for source in sources: if sources[source]['direct']: host = scraper_utils.get_direct_hostname(self, source) else: host = urlparse.urlparse(source).hostname stream_url = source + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) direct = sources[source]['direct'] quality = sources[source]['quality'] hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': direct } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} headers = {'Accept-Language': 'en-US,en;q=0.5'} if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, headers=headers, cache_limit=2) if video.video_type == VIDEO_TYPES.MOVIE: sources.update(self.__scrape_sources(html, page_url)) pages = set([ r.attrs['href'] for r in dom_parser2.parse_dom( html, 'a', {'class': 'btn-eps'}, req='href') ]) active = set([ r.attrs['href'] for r in dom_parser2.parse_dom( html, 'a', {'class': 'active'}, req='href') ]) for page in list(pages - active): page_url = scraper_utils.urljoin(self.base_url, page) html = self._http_get(page_url, headers=headers, cache_limit=2) sources.update(self.__scrape_sources(html, page_url)) else: for page in self.__match_episode(video, html): page_url = scraper_utils.urljoin(self.base_url, page) html = self._http_get(page_url, headers=headers, cache_limit=2) sources.update(self.__scrape_sources(html, page_url)) for source, values in sources.iteritems(): if not source.lower().startswith('http'): continue if values['direct']: host = scraper_utils.get_direct_hostname(self, source) if host != 'gvideo': stream_url = source + scraper_utils.append_headers( { 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url }) else: stream_url = source else: host = urlparse.urlparse(source).hostname stream_url = source hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': values['quality'], 'views': None, 'rating': None, 'url': stream_url, 'direct': values['direct'] } hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): results = [] search_url = scraper_utils.urljoin(self.base_url, '/search/') headers = {'Accept-Language': 'en-US,en;q=0.5'} html = self._http_get(search_url, params={'q': title}, headers=headers, cache_limit=8) norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'ml-item'}): match_title = dom_parser2.parse_dom(item, 'span', {'class': 'mli-info'}) match_url = dom_parser2.parse_dom(item, 'a', req='href') year_frag = dom_parser2.parse_dom(item, 'img', req='alt') is_episodes = dom_parser2.parse_dom(item, 'span', {'class': 'mli-eps'}) if (video_type == VIDEO_TYPES.MOVIE and not is_episodes) or (video_type == VIDEO_TYPES.SEASON and is_episodes): if match_title and match_url: match_url = match_url[0].attrs['href'] match_title = match_title[0].content match_title = re.sub('</?h2>', '', match_title) match_title = re.sub('\s+\d{4}$', '', match_title) if video_type == VIDEO_TYPES.SEASON: if season and not re.search( 'Season\s+0*%s$' % (season), match_title): continue if not match_url.endswith('/'): match_url += '/' match_url = scraper_utils.urljoin(match_url, 'watch/') match_year = '' if video_type == VIDEO_TYPES.MOVIE and year_frag: match = re.search('\s*-\s*(\d{4})$', year_frag[0].attrs['alt']) if match: match_year = match.group(1) match_norm_title = scraper_utils.normalize_title( match_title) title_match = (norm_title in match_norm_title) or (match_norm_title in norm_title) if title_match and (not year or not match_year or year == match_year): result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def get_sources(self, video): hosters = [] sources = {} source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) match = re.search("load_player\('([^']+)", html) if not match: return hosters headers = {'Referer': page_url, 'Server': 'cloudflare-nginx', 'Accept': 'text/html, */*; q=0.01', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Formating': 'application/json, text/javascript', 'Accept-Encoding': 'gzip, deflate'} headers.update(XHR) params = {'id': match.group(1)} player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL) html = self._http_get(player_url, params=params, headers=headers, cache_limit=1) js_data = scraper_utils.parse_json(html, player_url) pl_url = js_data.get('value') or js_data.get('download') if not pl_url: return hosters headers = {'Referer': page_url} if pl_url.startswith('//'): pl_url = 'https:' + pl_url html = self._http_get(pl_url, headers=headers, allow_redirect=False, cache_limit=0) if html.startswith('http'): streams = [(html, '')] else: js_data = scraper_utils.parse_json(html, pl_url) try: streams = [(source['file'], source.get('label', '')) for source in js_data['playlist'][0]['sources']] except: streams = [] for stream in streams: stream_url, label = stream if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': sources[stream_url] = {'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True} else: if label: quality = scraper_utils.height_get_quality(label) else: quality = QUALITIES.HIGH sources[stream_url] = {'quality': quality, 'direct': False} for source, value in sources.iteritems(): direct = value['direct'] quality = value['quality'] if direct: host = scraper_utils.get_direct_hostname(self, source) else: host = urlparse.urlparse(source).hostname stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, require_debrid=True, cache_limit=.5) if video.video_type == VIDEO_TYPES.MOVIE: page_url = self.__get_release(html, video) if page_url is None: return hosters page_url = scraper_utils.urljoin(self.base_url, page_url) html = self._http_get(page_url, require_debrid=True, cache_limit=.5) hevc = False for _attrs, content in dom_parser2.parse_dom( html, 'span', {'class': 'releaselabel'}): if re.search('(hevc|x265)', content, re.I): hevc = 'x265' match = re.search('(\d+)x(\d+)', content) if match: _width, height = match.groups() quality = scraper_utils.height_get_quality(height) break else: quality = QUALITIES.HIGH streams = [ attrs['href'] for attrs, _content in dom_parser2.parse_dom( html, 'a', {'class': 'links'}, req='href') ] streams += [ content for _attrs, content in dom_parser2.parse_dom( html, 'pre', {'class': 'links'}) ] for stream_url in streams: if scraper_utils.excluded_link(stream_url): continue host = urlparse.urlparse(stream_url).hostname hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False } if hevc: hoster['format'] = hevc hosters.append(hoster) return hosters
def _get_episode_url(self, show_url, video): episode_pattern = 'href="([^"]+/season/%s/episode/%s/?)"' % ( video.season, video.episode) title_pattern = 'href="(?P<url>[^"]+)"[^>]+title="(?:S\d+\s*E\d+:\s*)?(?P<title>[^"]+)' headers = {'Referer': scraper_utils.urljoin(self.base_url, show_url)} season_url = scraper_utils.urljoin(show_url, '/season/%s' % (video.season)) season_url = scraper_utils.urljoin(self.base_url, season_url) html = self._http_get(season_url, headers=headers, cache_limit=2) fragment = dom_parser2.parse_dom(html, 'div', {'id': 'episodes'}) return self._default_get_episode_url(fragment, video, episode_pattern, title_pattern)
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'}) if fragment: html = fragment[0].content iframe_url = dom_parser2.parse_dom(html, 'iframe', req='src') if not iframe_url: return hosters iframe_url = iframe_url[0].attrs['src'] if iframe_url.startswith('/'): iframe_url = scraper_utils.urljoin(self.base_url, iframe_url) html = self._http_get(iframe_url, headers={'Referer': page_url}, cache_limit=.5) obj = dom_parser2.parse_dom(html, 'object', req='data') if obj: streams = dict((stream_url, { 'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True }) for stream_url in scraper_utils.parse_google( self, obj[0].attrs['data'])) else: streams = scraper_utils.parse_sources_list(self, html) for stream_url, values in streams.iteritems(): host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = values['quality'] stream_url += scraper_utils.append_headers({ 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url }) source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True } hosters.append(source) return hosters
def get_sources(self, video): sources = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return sources try: url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=2) pattern = '<iframe id="videoframe" src="([^"]+)' match = re.search(pattern, html) url = scraper_utils.urljoin(self.base_url, match.group(1)) html = self._http_get(url, cache_limit=0) match = re.search('lastChild\.value="([^"]+)"(?:\s*\+\s*"([^"]+))?', html) secret = ''.join(match.groups('')) match = re.search('"&t=([^"]+)', html) t = match.group(1) match = re.search('(?:\s+|,)s\s*=(\d+)', html) s_start = int(match.group(1)) match = re.search('(?:\s+|,)m\s*=(\d+)', html) m_start = int(match.group(1)) for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'ripdiv'}): match = re.match('<b>(.*?)</b>', fragment) if match: q_str = match.group(1).replace(' ', '').upper() quality = QUALITY_MAP.get(q_str, QUALITIES.HIGH) else: quality = QUALITIES.HIGH for attrs, label in dom_parser2.parse_dom(fragment, 'a', {'rel': re.compile('\d+')}, req='onclick'): link_id = re.sub('[^\d]', '', attrs['onclick']) match = re.match('([^:]+:)\s*(.*)', label) if not match: continue version, host_fragment = match.groups() source = {'multi-part': False, 'quality': quality, 'class': self, 'version': version, 'rating': None, 'views': None, 'direct': False} source['host'] = re.sub('(</?[^>]*>)', '', host_fragment) s = s_start + random.randint(3, 100) m = m_start + random.randint(21, 100) url = AJAX_URL.format(link_id=link_id, s=s, m=m, secret=secret, t=t) source['url'] = url sources.append(source) except Exception as e: logger.log('Failure (%s) during icefilms get sources: |%s|' % (str(e), video), log_utils.LOGWARNING) return sources
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/results') params = {'q': title} referer = search_url + '?' + urllib.urlencode(params) headers = {'Referer': referer} headers.update(XHR) _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'), headers=headers, method='POST', cache_limit=0) cookies = {'begin_referer': referer, 'prounder': 1} html = self._http_get(search_url, params=params, cookies=cookies, cache_limit=8) if any('jquery.js' in match.attrs['src'] for match in dom_parser2.parse_dom(html, 'script', req='src')): html = self._http_get(search_url, params=params, cookies=cookies, cache_limit=0) for _attrs, result in dom_parser2.parse_dom(html, 'div', {'class': 'cell'}): title_frag = dom_parser2.parse_dom(result, 'div', {'class': 'video_title'}) year_frag = dom_parser2.parse_dom(result, 'div', {'class': 'video_quality'}) if not title_frag: continue match = dom_parser2.parse_dom(title_frag[0].content, 'a', req='href') if not match: continue match_url = match[0].attrs['href'] match_title = match[0].content try: match = re.search('\s+(\d{4})\s+', year_frag[0].content) match_year = match.group(1) except: match_year = '' if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def __add_torrent(self, hash_id): list_url = scraper_utils.urljoin(self.base_url, LIST_URL) js_data = self._json_get(list_url, cache_limit=0) for transfer in js_data.get('transfers', []): if transfer['hash'].lower() == hash_id: return True add_url = scraper_utils.urljoin(self.base_url, ADD_URL) data = {'src': MAGNET_LINK % hash_id} js_data = self._json_get(add_url, data=data, cache_limit=0) if js_data.get('status') == 'success': return True else: return False
def _get_episode_url(self, show_url, video): url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=24) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'poster'}) if not fragment: return show_url = dom_parser2.parse_dom(fragment[0].content, 'a', req='href') if not show_url: return show_url = scraper_utils.urljoin(self.base_url, show_url[0].attrs['href']) html = self._http_get(show_url, cache_limit=2) fragment = dom_parser2.parse_dom(html, 'div', {'id': 'servers'}) episode_pattern = 'href="([^"]+)[^>]+>[Ee][Pp]\s*(?:[Ss]0*%s-)?E?p?0*%s(?!\d)' % ( video.season, video.episode) return self._default_get_episode_url(fragment or html, video, episode_pattern)
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/advanced-search/') headers = {'Referer': self.base_url} params = {'search_query': title, 'orderby': '', 'order': '', 'wpas': 1} html = self._http_get(search_url, params=params, headers=headers, cache_limit=8) norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'datos'}): match = dom_parser2.parse_dom(item, 'a', req='href') if not match: continue match_url = match[0].attrs['href'] is_tvshow = '/tvshows/' in match_url if is_tvshow and video_type == VIDEO_TYPES.MOVIE or not is_tvshow and video_type == VIDEO_TYPES.TVSHOW: continue match_title = match[0].content match_title, match_year = scraper_utils.extra_year(match_title) if scraper_utils.normalize_title(match_title) in norm_title and ( not year or not match_year or year == match_year): result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) best_quality = QUALITIES.HIGH fragment = dom_parser2.parse_dom(html, 'div', {'class': 'entry'}) if fragment: for match in re.finditer('href="[^"]*/movies-quality/[^"]*[^>]*>([^<]+)', fragment[0].content, re.I): quality = Q_MAP.get(match.group(1).upper(), QUALITIES.HIGH) if Q_ORDER[quality] > Q_ORDER[best_quality]: best_quality = quality sources = [] for attrs, _content in dom_parser2.parse_dom(html, 'a', req='data-vid'): try: vid_url = dom_parser2.parse_dom(scraper_utils.cleanse_title(attrs['data-vid']), 'iframe', req='src') sources.append(vid_url[0]) except: pass fragment = dom_parser2.parse_dom(html, 'table', {'class': 'additional-links'}) if fragment: sources += dom_parser2.parse_dom(fragment[0].content, 'a', req='href') for stream_url in sources: stream_url = stream_url.attrs.get('href') or stream_url.attrs.get('src') host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, best_quality) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False} hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search') html = self._http_get(search_url, params={'q': title}, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'movie-item'}): match = dom_parser2.parse_dom(item, 'a', {'itemprop': 'url'}, req='href') if not match: continue match_url, match_title_year = match[0].attrs['href'], match[ 0].content match_title, match_year = scraper_utils.extra_year( match_title_year) if not match_year: try: match_year = dom_parser2.parse_dom( item, 'div', {'class': 'overlay-year'})[0].content except: match_year = '' if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def _get_episode_url(self, show_url, video): episode_pattern = 'href=([^>]+)>0*%sx0*%s\s+' % (video.season, video.episode) title_pattern = 'href=(?P<url>[^>]+)>(?:\d+x\d+\s+)+(?P<title>[^<]+)' show_url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(show_url, cache_limit=2) fragment = dom_parser2.parse_dom(html, 'span', {'class': 'list'}) return self._default_get_episode_url(fragment, video, episode_pattern, title_pattern)
def _get_episode_url(self, show_url, video): force_title = scraper_utils.force_title(video) title_fallback = kodi.get_setting('title-fallback') == 'true' norm_title = scraper_utils.normalize_title(video.ep_title) page_url = [show_url] too_old = False while page_url and not too_old: url = scraper_utils.urljoin(self.base_url, page_url[0]) html = self._http_get(url, require_debrid=True, cache_limit=1) for _attrs, post in dom_parser2.parse_dom( html, 'div', {'id': re.compile('post-\d+')}): if self.__too_old(post): too_old = True break if show_url not in post: continue match = dom_parser2.parse_dom(post, 'a', req='href') if match: url, title = match[0].attrs['href'], match[0].content if not force_title: if scraper_utils.release_check(video, title, require_title=False): return scraper_utils.pathify_url(url) else: if title_fallback and norm_title: match = re.search('</strong>(.*?)</p>', post) if match and norm_title == scraper_utils.normalize_title( match.group(1)): return scraper_utils.pathify_url(url) page_url = dom_parser2.parse_dom(html, 'a', {'class': 'nextpostslink'}, req='href') if page_url: page_url = [page_url[0].attrs['href']]
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] seen_urls = set() for page in ['/latest-added/', '/popular-today/', '/most-popular/']: url = scraper_utils.urljoin(self.base_url, page) html = self._http_get(url, cache_limit=24) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'home'}) if fragment: norm_title = scraper_utils.normalize_title(title) for attrs, match_title_year in dom_parser2.parse_dom( fragment[0].content, 'a', req='href'): match_url = attrs['href'] match_title, match_year = scraper_utils.extra_year( match_title_year) if norm_title in scraper_utils.normalize_title( match_title) and (not year or not match_year or year == match_year): match_url = scraper_utils.pathify_url(match_url) if match_url in seen_urls: continue seen_urls.add(match_url) result = { 'url': match_url, 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) for _attrs, button in dom_parser2.parse_dom( html, 'li', {'class': 'playing_button'}): try: link = dom_parser2.parse_dom(button, 'a', req='href') match = re.search('php\?.*?=?([^"]+)', link[0].attrs['href']) stream_url = base64.b64decode(match.group(1)) match = re.search('(https?://.*)', stream_url) stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False } hosters.append(hoster) except Exception as e: logger.log( 'Exception during tvonline source: %s - |%s|' % (e, button), log_utils.LOGDEBUG) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_in = 'm' if video_type == VIDEO_TYPES.MOVIE else 't' search_url = scraper_utils.urljoin(self.base_url, '/search') html = self._http_get(search_url, data={ 'searchquery': title, 'searchin': search_in }, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'search-page'}) if not fragment: return results fragment = dom_parser2.parse_dom(fragment[0].content, 'table') if not fragment: return results for attrs, match_title_year in dom_parser2.parse_dom( fragment[0].content, 'a', req='href'): match_url = attrs['href'] match_title, match_year = scraper_utils.extra_year( match_title_year) if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) match = re.search('This movie is of poor quality', html, re.I) if match: quality = QUALITIES.LOW else: quality = QUALITIES.HIGH for match in re.finditer('href="([^"]+/embed\d*/[^"]+)', html): url = match.group(1) embed_html = self._http_get(url, cache_limit=.5) hosters += self.__get_links(embed_html) pattern = 'href="([^"]+)[^>]*>\s*<[^>]+play_video.gif' for match in re.finditer(pattern, html, re.I): stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, quality) hoster = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'rating': None, 'views': None, 'direct': False } hosters.append(hoster) return hosters
def __get_ajax_sources(self, html, page_url): hosters = [] match = re.search('''url\s*:\s*"([^"]+)"\s*,\s*data:'id=''', html) if match: ajax_url = match.group(1) for data_id in re.findall("kaynakdegis\('([^']+)", html): url = scraper_utils.urljoin(self.base_url, ajax_url) data = {'id': data_id} headers = {'Referer': page_url} headers.update(XHR) result = self._http_get(url, data=data, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(result, url) if 'iframe' in js_data: if self.base_url in js_data['iframe']: hosters += self.__get_iframe_sources( js_data['iframe'], page_url) else: hosters.append( self.__create_source(js_data['iframe'], 720, page_url, direct=False)) else: hosters += self.__get_js_sources(js_data, page_url) pass return hosters