def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search/%s.html') search_url = search_url % (scraper_utils.to_slug(title)) html = self._http_get(search_url, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'slideposter'}): match_url = dom_parser2.parse_dom(item, 'a', req='href') match_title_year = dom_parser2.parse_dom(item, 'img', req='alt') if match_url and match_title_year: match_url = match_url[0].attrs['href'] match_title_year = match_title_year[0].attrs['alt'] match_title, match_year = scraper_utils.extra_year( match_title_year) if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def __movie_search(self, title, year): results = [] params = { 'query_term': title, 'sort_by': 'seeders', 'order_by': 'desc' } search_url = scraper_utils.urljoin(self.movie_base_url, MOVIE_SEARCH_URL) js_data = self._json_get(search_url, params=params, cache_limit=1) for movie in js_data.get('data', {}).get('movies', []): match_url = movie['url'] + '?movie_id=%s' % (movie['id']) match_title = movie.get('title_english') or movie.get('title') match_year = str(movie['year']) if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search/ajax_search') html = self._http_get(search_url, params={'q': title}, headers=XHR, cache_limit=1) js_result = scraper_utils.parse_json(html, search_url) match_year = '' for series in js_result.get('series', []): match_url = series.get('seo') match_title = series.get('label') if match_url and match_title and (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url('/' + match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) iframes = [] for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'boxed'}): iframes += dom_parser2.parse_dom(fragment, 'iframe', req='src') for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'contenu'}): for attrs, _content in dom_parser2.parse_dom( fragment, 'a', req='href') + iframes: stream_url = attrs.get('href') or attrs.get('src') if '/go/' not in stream_url: continue stream_url = stream_url.split('/')[-1] if stream_url.startswith('aHR0c'): stream_url = base64.b64decode(stream_url) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = { 'multi-part': False, 'url': stream_url, 'class': self, 'quality': quality, 'host': host, 'rating': None, 'views': None, 'direct': False } hosters.append(hoster) return hosters
def _get_episode_url(self, show_url, video): force_title = scraper_utils.force_title(video) title_fallback = kodi.get_setting('title-fallback') == 'true' norm_title = scraper_utils.normalize_title(video.ep_title) page_url = [show_url] too_old = False while page_url and not too_old: url = scraper_utils.urljoin(self.base_url, page_url[0]) html = self._http_get(url, require_debrid=True, cache_limit=1) headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html) posts = [ r.content for r in dom_parser2.parse_dom( html, 'div', {'id': re.compile('post-\d+')}) ] for heading, post in zip(headings, posts): if self.__too_old(post): too_old = True break if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post: url, title = heading if not force_title: if scraper_utils.release_check(video, title, require_title=False): return scraper_utils.pathify_url(url) else: if title_fallback and norm_title: match = re.search('<strong>(.*?)</strong>', post) if match and norm_title == scraper_utils.normalize_title( match.group(1)): return scraper_utils.pathify_url(url) page_url = dom_parser2.parse_dom(html, 'a', {'class': 'nextpostslink'}, req='href') if page_url: page_url = [page_url[0].attrs['href']]
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=1) for _attrs, item in dom_parser2.parse_dom(html, 'a', {'class': 'full-torrent1'}): stream_url = dom_parser2.parse_dom(item, 'span', req='onclick') host = dom_parser2.parse_dom(item, 'div', {'class': 'small_server'}) match = re.search('Views:\s*(?:</[^>]*>)?\s*(\d+)', item, re.I) views = match.group(1) if match else None match = re.search('Size:\s*(?:</[^>]*>)?\s*(\d+)', item, re.I) size = int(match.group(1)) * 1024 * 1024 if match else None if not stream_url or not host: continue stream_url = stream_url[0].attrs['onclick'] host = host[0].content.lower() host = host.replace('stream server: ', '') match = re.search("'(/redirect/[^']+)", stream_url) if match: stream_url = match.group(1) quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': False } if size is not None: hoster['size'] = scraper_utils.format_size(size, 'B') hosters.append(hoster) return hosters
def resolve_link(self, link): if not link.startswith('http'): link = scraper_utils.urljoin(self.base_url, link) html = self._http_get(link, cache_limit=.25) fragment = dom_parser2.parse_dom(html, 'div', {'id': 'media-player'}) if not fragment: fragment = dom_parser2.parse_dom(html, 'div', {'id': 'player'}) if fragment: fragment = self.__decode_link(fragment[0].content) iframe_url = dom_parser2.parse_dom(fragment, 'iframe', req='src') if iframe_url: return iframe_url[0].attrs['src'] href = dom_parser2.parse_dom(fragment, 'a', {'target': '_blank'}, req='href') if href: return href[0].attrs['href'] return link
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/index.php') params = {'search_keywords': title, 'year': year} params['search_section'] = 2 if video_type == VIDEO_TYPES.TVSHOW else 1 html = self._http_get(self.base_url, cache_limit=8) match = re.search('input type="hidden" name="key" value="([0-9a-f]*)"', html) if match: params['key'] = match.group(1) html = self._http_get(search_url, params=params, cache_limit=1) pattern = r'class="index_item.+?href="(.+?)" title="Watch (.+?)"?\(?([0-9]{4})?\)?"?>' for match in re.finditer(pattern, html): url, title, year = match.groups('') result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(title), 'year': year } results.append(result) else: logger.log('Unable to locate PW search key', log_utils.LOGWARNING) return results
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=False, cache_limit=.5) q_str = '' match = re.search('class="entry-title">([^<]+)', html) if match: q_str = match.group(1) pattern = 'href="?([^" ]+)(?:[^>]+>){2}\s+\|' for match in re.finditer(pattern, html, re.DOTALL): url = match.group(1) if 'adf.ly' in url: continue hoster = {'multi-part': False, 'class': self, 'views': None, 'url': url, 'rating': None, 'quality': None, 'direct': False} hoster['host'] = urlparse.urlsplit(url).hostname hoster['quality'] = scraper_utils.blog_get_quality(video, q_str, hoster['host']) hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters sources = [] url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for _attrs, div in dom_parser2.parse_dom(html, 'div', {'class': 'tab-content'}): for attrs, _content in dom_parser2.parse_dom(div, 'iframe', req='src'): sources.append(attrs['src']) sources += [ match.group(1) for match in re.finditer("window\.open\('([^']+)", html) ] for source in sources: host = urlparse.urlparse(source).hostname quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': False } hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/index.php') params = {'search': title, 'image.x': 0, 'image.y': 0} html = self._http_get(search_url, params=params, cache_limit=1) # Are we on a results page? if not re.search('window\.location', html): pattern = '<td[^>]+class="movieText"[^>]*>(.*?)</p>.*?href="(/watch/[^"]+)' for match in re.finditer(pattern, html, re.DOTALL): match_title_year, match_url = match.groups('') # skip p**n if '-XXX-' in match_url.upper() or ' XXX:' in match_title_year: continue match_title_year = re.sub('</?.*?>', '', match_title_year) match_title, match_year = scraper_utils.extra_year( match_title_year) if not year or not match_year or year == match_year: result = { 'url': match_url, 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) else: match = re.search('window\.location\s+=\s+"([^"]+)', html) if not match: return results url = match.group(1) if url != 'movies.php': result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(title), 'year': year } results.append(result) return results
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = '' if video.video_type == VIDEO_TYPES.EPISODE: pattern = 'Season\s+%s\s+Serie\s+%s<(.*?)</table>' % ( video.season, video.episode) match = re.search(pattern, html, re.DOTALL) if match: fragment = match.group(1) else: fragment = html if not fragment: return hosters for attrs, stream_url in dom_parser2.parse_dom( fragment, 'td', {'class': 'linkHiddenUrl'}, req='data-width'): host = urlparse.urlsplit(stream_url).hostname.replace('embed.', '') url = url.replace('&', '&') quality = scraper_utils.width_get_quality(attrs['data-width']) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False } hoster['quality'] = scraper_utils.get_quality( video, host, hoster['quality']) hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) match = re.search('Quality\s*:\s*([^<]+)', html) if match: page_quality = QUALITY_MAP.get(match.group(1), QUALITIES.HIGH) else: page_quality = QUALITIES.HD720 match = re.search("onClick=\"javascript:replaceb64Text.*?,\s*'([^']+)", html) if match: html = match.group(1).decode('base-64').replace('"', '"') iframe_url = dom_parser2.parse_dom(html, 'iframe', req='src') if iframe_url: iframe_url = iframe_url[0].attrs['src'] host = urlparse.urlsplit(iframe_url).hostname hoster = { 'multi-part': False, 'host': host, 'url': iframe_url, 'class': self, 'rating': None, 'views': None, 'quality': scraper_utils.get_quality(video, host, page_quality), 'direct': False } hosters.append(hoster) return hosters
def resolve_link(self, link): url = scraper_utils.urljoin(self.base_url, link) html = self._http_get(url, cache_limit=0) match = re.search('href="([^"]+).*?src="/img/click_link.jpg"', html) if match: return match.group(1)
def get_sources(self, video): hosters = [] sources = {} source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) movie_id, watching_url, html = self.__get_source_page( video.video_type, page_url) links = [] for match in dom_parser2.parse_dom(html, 'li', {'class': 'ep-item'}, req=['data-id', 'data-server']): label = dom_parser2.parse_dom(match.content, 'a', req='title') if not label: continue if video.video_type == VIDEO_TYPES.EPISODE and not self.__episode_match( video, label[0].attrs['title']): continue links.append((match.attrs['data-server'], match.attrs['data-id'])) for link_type, link_id in links: if link_type in ['12', '13', '14', '15']: url = scraper_utils.urljoin( self.base_url, PLAYLIST_URL1.format(ep_id=link_id)) sources.update(self.__get_link_from_json(url)) elif kodi.get_setting('scraper_url'): url = scraper_utils.urljoin( self.base_url, PLAYLIST_URL2.format(ep_id=link_id)) params = self.__get_params(movie_id, link_id, watching_url) if params is not None: url += '?' + urllib.urlencode(params) sources.update( self.__get_links_from_json2(url, page_url, video.video_type)) for source in sources: if not source.lower().startswith('http'): continue if sources[source]['direct']: host = scraper_utils.get_direct_hostname(self, source) if host != 'gvideo': stream_url = source + scraper_utils.append_headers( { 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url }) else: stream_url = source else: host = urlparse.urlparse(source).hostname stream_url = source hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': sources[source]['quality'], 'views': None, 'rating': None, 'url': stream_url, 'direct': sources[source]['direct'] } hosters.append(hoster) return hosters
def _get_episode_url(self, show_url, video): episode_pattern = 'href="([^"]*[Ss]0*%s[Ee]0*%s(?!\d)[^"]*)"' % (video.season, video.episode) show_url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(show_url, cache_limit=2) return self._default_get_episode_url(html, video, episode_pattern)
def _get_episode_url(self, show_url, video): episode_pattern = 'href="([^"]*s0*%s_e0*%s(?!\d)[^"]*)' % (video.season, video.episode) show_url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(show_url, cache_limit=2) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'seasons-grid'}) return self._default_get_episode_url(fragment, video, episode_pattern)
def __translate_search(self, url): params = SEARCH_PARAMS params['pby'] = self.max_results params['gps'] = params['sbj'] = scraper_utils.parse_query(url)['query'] url = scraper_utils.urljoin(self.base_url, SEARCH_URL) return url, params
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) headers = {'Referer': page_url} html = self._http_get(page_url, headers=headers, cache_limit=.5) if video.video_type == VIDEO_TYPES.MOVIE: fragment = dom_parser2.parse_dom(html, 'div', {'class': 'poster'}) if fragment: movie_url = dom_parser2.parse_dom(fragment[0].content, 'a', req='href') if movie_url: page_url = scraper_utils.urljoin( self.base_url, movie_url[0].attrs['href']) html = self._http_get(page_url, cache_limit=.5) episodes = self.__get_episodes(html) page_url = self.__get_best_page(episodes) if not page_url: return hosters else: page_url = scraper_utils.urljoin( self.base_url, page_url) html = self._http_get(page_url, cache_limit=.5) streams = dom_parser2.parse_dom(html, 'iframe', req='src') if streams: streams = [(attrs['src'], 480) for attrs, _content in streams] direct = False else: streams = [(attrs['src'], attrs.get('data-res', 480)) for attrs, _content in dom_parser2.parse_dom( html, 'source', req=['src'])] direct = True headers = {'User-Agent': scraper_utils.get_ua(), 'Referer': page_url} for stream_url, height in streams: if 'video.php' in stream_url or 'moviexk.php' in stream_url: if 'title=' in stream_url: title = stream_url.split('title=')[-1] stream_url = stream_url.replace(title, urllib.quote(title)) redir_url = self._http_get(stream_url, headers=headers, allow_redirect=False, method='HEAD', cache_limit=0) if redir_url.startswith('http'): redir_url = redir_url.replace(' ', '').split(';codec')[0] stream_url = redir_url else: continue if direct: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(height) stream_url += scraper_utils.append_headers(headers) else: host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.height_get_quality(height) source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct } hosters.append(source) return hosters
def __get_links(self, url, video): hosters = [] search_url = scraper_utils.urljoin(self.base_url, SEARCH_URL) query = self.__translate_search(url) result = self._http_get(search_url, data=query, allow_redirect=False, cache_limit=.5) for item in result.get('files', []): checks = [False] * 6 if item.get('type', '').upper() != 'VIDEO': checks[0] = True if item.get('is_ready') != '1': checks[1] = True if item.get('av_result') in ['warning', 'infected']: checks[2] = True if 'video_info' not in item: checks[3] = True if item.get('video_info') and not re.search( '#0:(0|1)(\((eng|und)\))?:\s*Audio:', item['video_info'], re.I): checks[4] = True if not scraper_utils.release_check(video, item['name']): checks[5] = True if any(checks): logger.log( 'Furk.net result excluded: %s - |%s|' % (checks, item['name']), log_utils.LOGDEBUG) continue match = re.search('(\d{3,})\s*x\s*(\d{3,})', item['video_info']) if match: width, _height = match.groups() quality = scraper_utils.width_get_quality(width) else: if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(item['name']) else: meta = scraper_utils.parse_episode_link(item['name']) quality = scraper_utils.height_get_quality(meta['height']) if 'url_pls' in item: size_gb = scraper_utils.format_size(int(item['size']), 'B') if self.max_bytes and int(item['size']) > self.max_bytes: logger.log( 'Result skipped, Too big: |%s| - %s (%s) > %s (%sGB)' % (item['name'], item['size'], size_gb, self.max_bytes, self.max_gb)) continue stream_url = item['url_pls'] host = scraper_utils.get_direct_hostname(self, stream_url) hoster = { 'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': True } hoster['size'] = size_gb hoster['extra'] = item['name'] hosters.append(hoster) else: logger.log( 'Furk.net result skipped - no playlist: |%s|' % (json.dumps(item)), log_utils.LOGDEBUG) return hosters
def get_sources(self, video): source_url = self.get_url(video) sources = [] if not source_url or source_url == FORCE_NO_MATCH: return sources page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) for attrs, _content in dom_parser2.parse_dom( html, 'img', req=['data-id', 'data-name']): film_id, data_name = attrs['data-id'], attrs['data-name'] data = {'id': film_id, 'n': data_name} server_url = scraper_utils.urljoin(self.base_url, SERVER_URL) server_url = server_url % (film_id) headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(server_url, data=data, headers=headers, cache_limit=.5) for attrs, _content in dom_parser2.parse_dom(html, 'a', req='data-id'): data = {'epid': attrs['data-id']} ep_url = scraper_utils.urljoin(self.base_url, EP_URL) ep_url = ep_url % (attrs['data-id']) headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(ep_url, data=data, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, ep_url) try: links = [ r.attrs['src'] for r in dom_parser2.parse_dom( js_data['link']['embed'], 'iframe', req='src') ] except: try: links = js_data['link']['l'] except: links = [] try: heights = js_data['link']['q'] except: heights = [] for stream_url, height in map(None, links, heights): match = re.search('movie_url=(.*)', stream_url) if match: stream_url = match.group(1) host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) stream_url += scraper_utils.append_headers({ 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url }) direct = True else: host = urlparse.urlparse(stream_url).hostname if height: quality = scraper_utils.height_get_quality(height) else: quality = QUALITIES.HD720 direct = False source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct } sources.append(source) return sources
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] if video_type == VIDEO_TYPES.TVSHOW and title: test_url = '/show/%s/' % (scraper_utils.to_slug(title)) test_url = scraper_utils.urljoin(self.base_url, test_url) html = self._http_get(test_url, require_debrid=True, cache_limit=24) posts = dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}) if posts and CATEGORIES[video_type] in posts[0].content: match = re.search( '<div[^>]*>\s*show\s+name:.*?<a\s+href="([^"]+)[^>]+>(?!Season\s+\d+)([^<]+)', posts[0].content, re.I) if match: show_url, match_title = match.groups() result = { 'url': scraper_utils.pathify_url(show_url), 'title': scraper_utils.cleanse_title(match_title), 'year': '' } results.append(result) elif video_type == VIDEO_TYPES.MOVIE: search_url = scraper_utils.urljoin(self.base_url, '/search/%s/') search_title = re.sub('[^A-Za-z0-9 ]', '', title.lower()) search_url = search_url % (urllib.quote_plus(search_title)) headers = {'User-Agent': LOCAL_UA} html = self._http_get(search_url, headers=headers, require_debrid=True, cache_limit=1) headings = re.findall('<h2>\s*<a\s+href="([^"]+).*?">(.*?)</a>', html) posts = [ r.content for r in dom_parser2.parse_dom( html, 'div', {'id': re.compile('post-\d+')}) ] norm_title = scraper_utils.normalize_title(title) for heading, post in zip(headings, posts): if not re.search('[._ -]S\d+E\d+[._ -]', heading[1], re.I) and not self.__too_old(post): post_url, post_title = heading post_title = re.sub('<[^>]*>', '', post_title) meta = scraper_utils.parse_movie_link(post_title) full_title = '%s [%s] (%sp)' % ( meta['title'], meta['extra'], meta['height']) match_year = meta['year'] match_norm_title = scraper_utils.normalize_title( meta['title']) if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year } results.append(result) return results
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) fragment = dom_parser2.parse_dom(html, 'ul', {'class': 'dropdown-menu'}) if not fragment: return hosters for match in re.finditer( '''href=['"]([^'"]+)[^>]*>(Altyaz.{1,3}s.{1,3}z|ok\.ru|openload)<''', fragment[0].content, re.I): sources = [] subs = True if not match.group(2).startswith('Altyaz') else False option_url = scraper_utils.urljoin(self.base_url, match.group(1)) html = self._http_get(option_url, cache_limit=2) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'video-player'}) if not fragment: continue iframes = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src') for attrs, _content in iframes: iframe_url = attrs['src'] if attrs.get('id') == 'ifr': html = self._http_get(iframe_url, allow_redirect=False, method='HEAD', cache_limit=.25) if html.startswith('http'): sources.append({'stream_url': html, 'subs': subs}) else: html = self._http_get(iframe_url, cache_limit=.25) for match in re.finditer('"((?:\\\\x[A-Fa-f0-9]+)+)"', html): s = match.group(1).replace('\\x', '').decode('hex') if s.startswith('http'): s = urllib.unquote(s) match = re.search('videoPlayerMetadata&mid=(\d+)', s) if match: s = 'http://ok.ru/video/%s' % (match.group(1)) sources.append({'stream_url': s, 'subs': subs}) iframes += dom_parser2.parse_dom(html, 'iframe', req='src') for source in sources: stream_url = source['stream_url'] host = urlparse.urlparse(stream_url).hostname quality = QUALITIES.HIGH hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False } if source['subs']: hoster['subs'] = 'Turkish Subtitles' hosters.append(hoster) return hosters
def _get_episode_url(self, season_url, video): episode_pattern = 'href="([^"]+)[^>]*>\s*%s\s*<' % (video.episode) season_url = scraper_utils.urljoin(self.base_url, season_url) html = self._http_get(season_url, cache_limit=2) fragment = dom_parser2.parse_dom(html, 'div', {'id': 'episode_show'}) return self._default_get_episode_url(fragment, video, episode_pattern)
def __init__(self, timeout=scraper.DEFAULT_TIMEOUT): self.timeout = timeout self.base_url = kodi.get_setting('%s-base_url' % (self.get_name())) self.ajax_url = scraper_utils.urljoin(self.base_url, AJAX_URL) self.goog = 'https://www.google.co.uk'
def _get_episode_url(self, show_url, video): show_url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(show_url, require_debrid=True, cache_limit=.5) return self.__get_release(html, video)
def _get_episode_url(self, season_url, video): episode_pattern = 'data-title\s*=\s*"Season\s+0*%s\s+Episode\s+0*%s[^>]*data-permalink\s*=\s*"([^"]+)' % (video.season, video.episode) title_pattern = 'data-title\s*=\s*"Season\s+\d+\s+Episode\s+\d+\s*(?P<title>[^"]+)[^>]+data-permalink\s*=\s*"(?P<url>[^"]+)' season_url = scraper_utils.urljoin(self.base_url, season_url) html = self._http_get(season_url, cache_limit=2) return self._default_get_episode_url(html, video, episode_pattern, title_pattern)