def __get_source_page(self, video_type, page_url): match = re.search('/movie/(.*?)-(\d+)\.html', page_url) if not match: return '', '', '' slug, movie_id = match.groups() vid_type = 'movie' if video_type == VIDEO_TYPES.MOVIE else 'series' qp_url = QP_URL.format(slug=slug, movie_id=movie_id, vid_type=vid_type) qp_url = scraper_utils.urljoin(self.base_url, qp_url) headers = {'Referer': scraper_utils.urljoin(self.base_url, page_url)} headers.update(XHR) html = self._http_get(qp_url, headers=headers, cache_limit=8) watching_url = dom_parser2.parse_dom(html, 'a', {'title': re.compile('View all episodes')}, req='href') if not watching_url: return '', '', '' watching_url = watching_url[0].attrs['href'] page_html = self._http_get(watching_url, headers={'Referer': scraper_utils.urljoin(self.base_url, page_url)}, cache_limit=8) for attrs, _content in dom_parser2.parse_dom(page_html, 'img', {'class': 'hidden'}, req='src'): _img = self._http_get(attrs['src'], headers={'Referer': watching_url}, cache_limit=8) sl_url = SL_URL.format(movie_id=movie_id) sl_url = scraper_utils.urljoin(self.base_url, sl_url) html = self._http_get(sl_url, headers=headers, cache_limit=8) js_data = scraper_utils.parse_json(html, sl_url) try: html = js_data['html'] except: html = '' return movie_id, watching_url, html
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, SEARCH_URL) referer = scraper_utils.urljoin(self.base_url, '/search/?q=%s') referer = referer % (urllib.quote_plus(title)) headers = {'Referer': referer} headers.update(XHR) params = { 'searchTerm': title, 'type': SEARCH_TYPES[video_type], 'limit': 500 } html = self._http_get(search_url, params=params, headers=headers, auth=False, cache_limit=2) js_data = scraper_utils.parse_json(html, search_url) if 'results' in js_data: for result in js_data['results']: match_year = str(result.get('year', '')) match_url = result.get('permalink', '') match_title = result.get('title', '') if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) if not html: url = scraper_utils.urljoin(self.old_base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) sources.update(self.__get_post_links(html, video)) if kodi.get_setting('%s-include_comments' % (self.get_name())) == 'true': for _attrs, comment in dom_parser2.parse_dom( html, 'div', {'id': re.compile('commentbody-\d+')}): sources.update(self.__get_comment_links(comment, video)) for source in sources: if scraper_utils.excluded_link(source): continue host = urlparse.urlparse(source).hostname hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': sources[source], 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] sources = {} today = datetime.datetime.today().date() max_age = today - datetime.timedelta(days=self.filter) if video.ep_airdate and max_age < video.ep_airdate: day_after = video.ep_airdate + datetime.timedelta(days=1) for day in [day_after, video.ep_airdate]: if day < today: page_url = EP_PAGE % (day.strftime('%Y.%m.%d')) page_url = scraper_utils.urljoin(self.base_url, page_url) html = self._http_get(page_url, require_debrid=True, cache_limit=30 * 24) sources.update(self.__get_sources(video, html)) if sources: break if not sources and kodi.get_setting('scraper_url'): page_url = scraper_utils.urljoin(self.base_url, '/index.html') html = self._http_get(page_url, require_debrid=True, cache_limit=2) sources.update(self.__get_sources(video, html)) for source in sources: host = urlparse.urlparse(source).hostname hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': sources[source], 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=1) iframes = dom_parser2.parse_dom(html, 'iframe', req='src') for attrs, _content in iframes: iframe_url = attrs['src'] if 'docs.google.com' in iframe_url: sources = scraper_utils.parse_google(self, iframe_url) break else: iframe_url = scraper_utils.urljoin(self.base_url, iframe_url) html = self._http_get(iframe_url, cache_limit=1) iframes += dom_parser2.parse_dom(html, 'iframe', req='src') for source in sources: host = scraper_utils.get_direct_hostname(self, source) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.gv_get_quality(source), 'views': None, 'rating': None, 'url': source, 'direct': True } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) videos = re.findall('''onclick\s*=\s*"loadVideo\('([^']+)''', html) subs = self.__get_subs(html) for v_id, icon in map(None, videos, subs): ajax_url = scraper_utils.urljoin(self.base_url, AJAX_URL) data = {'vid': v_id, 'tip': 1, 'type': 'loadVideo'} headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(ajax_url, data=data, headers=headers, cache_limit=.5) sub = ICONS.get(icon, DEFAULT_SUB) hosters += self.__get_cloud_links(html, page_url, sub) hosters += self.__get_embedded_links(html, sub) hosters += self.__get_iframe_links(html, sub) hosters += self.__get_json_links(html, sub) if not kodi.get_setting('scraper_url'): break return hosters
def _get_episode_url(self, show_url, video): show_url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(show_url, cache_limit=8) pattern = '''href=['"]([^'"]+)[^>]+>\s*%s\.\s*Sezon<''' % ( video.season) match = re.search(pattern, html) if match: episode_pattern = '''href=['"]([^'"]+-%s-sezon-%s-bolum[^'"]*)''' % ( video.season, video.episode) season_url = scraper_utils.urljoin(self.base_url, match.group(1)) html = self._http_get(season_url, cache_limit=2) ep_url = self._default_get_episode_url(html, video, episode_pattern) if ep_url: return ep_url # front page fallback html = self._http_get(self.base_url, cache_limit=2) for slug in reversed(show_url.split('/')): if slug: break ep_url_frag = 'href="([^"]+/{slug}-{season}-sezon-{episode}-bolum[^"]*)'.format( slug=slug, season=video.season, episode=video.episode) match = re.search(ep_url_frag, html) if match: return scraper_utils.pathify_url(match.group(1))
def _get_episode_url(self, show_url, video): episode_pattern = 'href="([^"]+/season/%s/episode/%s/?)"' % (video.season, video.episode) title_pattern = 'href="(?P<url>[^"]+)"[^>]+title="(?:S\d+\s*E\d+:\s*)?(?P<title>[^"]+)' headers = {'Referer': scraper_utils.urljoin(self.base_url, show_url)} season_url = scraper_utils.urljoin(show_url, '/season/%s' % (video.season)) season_url = scraper_utils.urljoin(self.base_url, season_url) html = self._http_get(season_url, headers=headers, cache_limit=2) fragment = dom_parser2.parse_dom(html, 'div', {'id': 'episodes'}) return self._default_get_episode_url(fragment, video, episode_pattern, title_pattern)
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} headers = {'Accept-Language': 'en-US,en;q=0.5'} if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, headers=headers, cache_limit=2) if video.video_type == VIDEO_TYPES.MOVIE: sources.update(self.__scrape_sources(html, page_url)) pages = set([ r.attrs['href'] for r in dom_parser2.parse_dom( html, 'a', {'class': 'btn-eps'}, req='href') ]) active = set([ r.attrs['href'] for r in dom_parser2.parse_dom( html, 'a', {'class': 'active'}, req='href') ]) for page in list(pages - active): page_url = scraper_utils.urljoin(self.base_url, page) html = self._http_get(page_url, headers=headers, cache_limit=2) sources.update(self.__scrape_sources(html, page_url)) else: for page in self.__match_episode(video, html): page_url = scraper_utils.urljoin(self.base_url, page) html = self._http_get(page_url, headers=headers, cache_limit=2) sources.update(self.__scrape_sources(html, page_url)) for source, values in sources.iteritems(): if not source.lower().startswith('http'): continue if values['direct']: host = scraper_utils.get_direct_hostname(self, source) if host != 'gvideo': stream_url = source + scraper_utils.append_headers( { 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url }) else: stream_url = source else: host = urlparse.urlparse(source).hostname stream_url = source hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': values['quality'], 'views': None, 'rating': None, 'url': stream_url, 'direct': values['direct'] } hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): results = [] search_url = scraper_utils.urljoin(self.base_url, '/search/') headers = {'Accept-Language': 'en-US,en;q=0.5'} html = self._http_get(search_url, params={'q': title}, headers=headers, cache_limit=8) norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'ml-item'}): match_title = dom_parser2.parse_dom(item, 'span', {'class': 'mli-info'}) match_url = dom_parser2.parse_dom(item, 'a', req='href') year_frag = dom_parser2.parse_dom(item, 'img', req='alt') is_episodes = dom_parser2.parse_dom(item, 'span', {'class': 'mli-eps'}) if (video_type == VIDEO_TYPES.MOVIE and not is_episodes) or (video_type == VIDEO_TYPES.SEASON and is_episodes): if match_title and match_url: match_url = match_url[0].attrs['href'] match_title = match_title[0].content match_title = re.sub('</?h2>', '', match_title) match_title = re.sub('\s+\d{4}$', '', match_title) if video_type == VIDEO_TYPES.SEASON: if season and not re.search( 'Season\s+0*%s$' % (season), match_title): continue if not match_url.endswith('/'): match_url += '/' match_url = scraper_utils.urljoin(match_url, 'watch/') match_year = '' if video_type == VIDEO_TYPES.MOVIE and year_frag: match = re.search('\s*-\s*(\d{4})$', year_frag[0].attrs['alt']) if match: match_year = match.group(1) match_norm_title = scraper_utils.normalize_title( match_title) title_match = (norm_title in match_norm_title) or (match_norm_title in norm_title) if title_match and (not year or not match_year or year == match_year): result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def _get_episode_url(self, show_url, video): url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=24) show_id = dom_parser2.parse_dom(html, 'div', {'id': 'icerikid'}, req='value') if show_id: episode_pattern = 'href="([^"]*-%s-sezon-%s-bolum[^"]*)"' % (video.season, video.episode) title_pattern = 'href="(?P<url>[^"]+)[^>]*class="realcuf".*?class="realcuf">(?P<title>[^<]*)' season_url = scraper_utils.urljoin(self.base_url, SEASON_URL) data = {'sezon_id': video.season, 'dizi_id': show_id[0].attrs['value'], 'tip': 'dizi', 'bolumid': ''} html = self._http_get(season_url, data=data, headers=XHR, cache_limit=2) return self._default_get_episode_url(html, video, episode_pattern, title_pattern)
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'}) if fragment: html = fragment[0].content iframe_url = dom_parser2.parse_dom(html, 'iframe', req='src') if not iframe_url: return hosters iframe_url = iframe_url[0].attrs['src'] if iframe_url.startswith('/'): iframe_url = scraper_utils.urljoin(self.base_url, iframe_url) html = self._http_get(iframe_url, headers={'Referer': page_url}, cache_limit=.5) obj = dom_parser2.parse_dom(html, 'object', req='data') if obj: streams = dict((stream_url, { 'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True }) for stream_url in scraper_utils.parse_google( self, obj[0].attrs['data'])) else: streams = scraper_utils.parse_sources_list(self, html) for stream_url, values in streams.iteritems(): host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = values['quality'] stream_url += scraper_utils.append_headers({ 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url }) source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True } hosters.append(source) return hosters
def _get_episode_url(self, show_url, video): show_url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(show_url, headers={'Referer': self.base_url}, cache_limit=24 * 7) match = re.search('href="([^"]*season=0*%s(?!\d))[^"]*' % (video.season), html) if not match: return episode_pattern = 'href="([^"]*/0*%s-0*%s/[^"]*)' % (video.season, video.episode) title_pattern = 'href="(?P<url>[^"]+)[^>]*>\s*(?P<title>.*?)\s*</a>' season_url = scraper_utils.urljoin(show_url, match.group(1)) html = self._http_get(season_url, headers={'Referer': show_url}, cache_limit=2) episodes = dom_parser2.parse_dom(html, 'div', {'class': 'episodeDetail'}) fragment = '\n'.join(ep.content for ep in episodes) return self._default_get_episode_url(fragment, video, episode_pattern, title_pattern)
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) page_quality = dom_parser2.parse_dom(html, 'dd', {'class': 'status'}) if page_quality: page_quality = QUALITY_MAP.get(page_quality[0].content, QUALITIES.HIGH) else: page_quality = QUALITIES.HIGH if video.video_type == VIDEO_TYPES.EPISODE: fragment = dom_parser2.parse_dom(html, 'div', {'id': 'servers-list'}) gk_html = fragment[0].content if fragment else '' else: gk_html = html link_url = scraper_utils.urljoin(self.base_url, LINK_URL) player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL) for stream_url, quality in scraper_utils.get_gk_links( self, gk_html, page_url, page_quality, link_url, player_url).iteritems(): host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': direct = True quality = quality else: host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, quality) direct = False if host is not None: stream_url += scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct } hosters.append(hoster) return hosters
def __add_torrent(self, hash_id): list_url = scraper_utils.urljoin(self.base_url, LIST_URL) js_data = self._json_get(list_url, cache_limit=0) for transfer in js_data.get('transfers', []): if transfer['hash'].lower() == hash_id: return True add_url = scraper_utils.urljoin(self.base_url, ADD_URL) data = {'src': MAGNET_LINK % hash_id} js_data = self._json_get(add_url, data=data, cache_limit=0) if js_data.get('status') == 'success': return True else: return False
def _get_episode_url(self, show_url, video): url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=24) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'poster'}) if not fragment: return show_url = dom_parser2.parse_dom(fragment[0].content, 'a', req='href') if not show_url: return show_url = scraper_utils.urljoin(self.base_url, show_url[0].attrs['href']) html = self._http_get(show_url, cache_limit=2) fragment = dom_parser2.parse_dom(html, 'div', {'id': 'servers'}) episode_pattern = 'href="([^"]+)[^>]+>[Ee][Pp]\s*(?:[Ss]0*%s-)?E?p?0*%s(?!\d)' % ( video.season, video.episode) return self._default_get_episode_url(fragment or html, video, episode_pattern)
def __get_movie_sources(self, source_url): hosters = [] query = kodi.parse_query(urlparse.urlparse(source_url).query) movie_id = query.get('movie_id') or self.__get_movie_id(source_url) if not movie_id: return hosters details_url = scraper_utils.urljoin(self.movie_base_url, MOVIE_DETAILS_URL) detail_data = self._json_get(details_url, params={'movie_id': movie_id}, cache_limit=24) try: torrents = detail_data['data']['movie']['torrents'] except KeyError: torrents = [] try: hashes = [torrent['hash'].lower() for torrent in torrents] except KeyError: hashes = [] hash_data = self.__get_hash_data(hashes) for torrent in torrents: hash_id = torrent['hash'].lower() try: status = hash_data['hashes'][hash_id]['status'] except KeyError: status = '' if status.lower() != 'finished': continue stream_url = 'hash_id=%s' % (hash_id) host = scraper_utils.get_direct_hostname(self, stream_url) quality = QUALITY_MAP.get(torrent['quality'], QUALITIES.HD720) hoster = {'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': True} if 'size_bytes' in torrent: hoster['size'] = scraper_utils.format_size(torrent['size_bytes'], 'B') if torrent['quality'] == '3D': hoster['3D'] = True hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_in = 'm' if video_type == VIDEO_TYPES.MOVIE else 't' search_url = scraper_utils.urljoin(self.base_url, '/search') html = self._http_get(search_url, data={ 'searchquery': title, 'searchin': search_in }, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'search-page'}) if not fragment: return results fragment = dom_parser2.parse_dom(fragment[0].content, 'table') if not fragment: return results for attrs, match_title_year in dom_parser2.parse_dom( fragment[0].content, 'a', req='href'): match_url = attrs['href'] match_title, match_year = scraper_utils.extra_year( match_title_year) if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = scraper_utils.urljoin(self.base_url, '/search/') title = re.sub('[^A-Za-z0-9 ]', '', title) search_url += '%s.html' % (urllib.quote_plus(title)) html = self._http_get(search_url, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'ml-item'}): match_title = dom_parser2.parse_dom(item, 'span', {'class': 'mli-info'}) match_url = dom_parser2.parse_dom(item, 'a', req='href') match_year = re.search('class="jt-info">(\d{4})<', item) is_episodes = dom_parser2.parse_dom(item, 'span', {'class': 'mli-eps'}) if (video_type == VIDEO_TYPES.MOVIE and not is_episodes) or (video_type == VIDEO_TYPES.SEASON and is_episodes): if not match_title or not match_url: continue match_url = match_url[0].attrs['href'] match_title = match_title[0].content match_title = re.sub('</?h2>', '', match_title) match_title = re.sub('\s+\d{4}$', '', match_title) if video_type == VIDEO_TYPES.SEASON: if season and not re.search('Season\s+0*%s$' % (season), match_title): continue match_year = match_year.group(1) if match_year else '' if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)} results.append(result) return results
def get_sources(self, video): sources = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return sources url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=8) for div in dom_parser2.parse_dom(html, 'div', {'id': re.compile('stb-container-\d+')}): stream_url = dom_parser2.parse_dom(div.content, 'iframe', req='src') if not stream_url: continue stream_url = stream_url[0].attrs['src'] host = urlparse.urlparse(stream_url).hostname source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': QUALITIES.HIGH, 'views': None, 'rating': None, 'direct': False} sources.append(source) fragment = dom_parser2.parse_dom(html, 'div', {'class': "stb-download-body_box"}) if not fragment: return sources labels = dom_parser2.parse_dom(fragment[0].content, 'a', {'href': '#'}) stream_urls = [result for result in dom_parser2.parse_dom(fragment[0].content, 'a', req='href') if result.content.lower() == 'download now'] for label, stream_url in zip(labels, stream_urls): stream_url = stream_url.attrs['href'] label = re.sub('</?[^>]*>', '', label.content) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.blog_get_quality(video, label, host) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': False} sources.append(source) return sources
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] xml_url = scraper_utils.urljoin(self.base_url, '/series.xml') xml = self._http_get(xml_url, cache_limit=24) if not xml: return results try: norm_title = scraper_utils.normalize_title(title) match_year = '' for element in ET.fromstring(xml).findall('.//dizi'): name = element.find('adi') if name is not None and norm_title in scraper_utils.normalize_title( name.text): url = element.find('url') if url is not None and (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(url.text), 'title': scraper_utils.cleanse_title(name.text), 'year': '' } results.append(result) except (ParseError, ExpatError) as e: logger.log('Dizilab Search Parse Error: %s' % (e), log_utils.LOGWARNING) return results
def _get_episode_url(self, show_url, video): url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=8) pattern = "<a[^>]*class='dropdown-toggle'[^>]*>Season\s+%s<(.*?)<li\s+class='divider'>" % (video.season) match = re.search(pattern, html, re.DOTALL) if not match: return fragment = match.group(1) episodes = dom_parser2.parse_dom(fragment, 'a', {'id': 'epiloader'}, req='class') airdates = dom_parser2.parse_dom(fragment, 'span', {'class': 'airdate'}) ep_airdate = video.ep_airdate.strftime('%Y-%m-%d') if isinstance(video.ep_airdate, datetime.date) else '' norm_title = scraper_utils.normalize_title(video.ep_title) num_id, airdate_id, title_id = '', '', '' for episode, airdate in zip(episodes, airdates): ep_id = episode.attrs['class'] episode = episode.content if ep_airdate and ep_airdate == airdate: airdate_id = ep_id match = re.search('(?:<span[^>]*>)?(\d+)\.\s*([^<]+)', episode) if match: ep_num, ep_title = match.groups() if int(ep_num) == int(video.episode): num_id = ep_id if norm_title and norm_title in scraper_utils.normalize_title(ep_title): title_id = ep_id best_id = '' if not scraper_utils.force_title(video): if num_id: best_id = num_id if kodi.get_setting('airdate-fallback') == 'true' and airdate_id: best_id = airdate_id if kodi.get_setting('title-fallback') == 'true' and title_id: best_id = title_id else: if title_id: best_id = title_id if best_id: return EP_URL % (best_id)
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) title = dom_parser2.parse_dom(html, 'meta', {'property': 'og:title'}, req='content') meta = scraper_utils.parse_movie_link( title[0].attrs['content']) if title else {} fragment = dom_parser2.parse_dom(html, 'p', {'class': 'download_message'}) if fragment: for attrs, _content in dom_parser2.parse_dom(fragment[0].content, 'a', req='href'): source = attrs['href'] if scraper_utils.excluded_link(source): continue host = urlparse.urlparse(source).hostname quality = scraper_utils.height_get_quality( meta.get('height', 480)) hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': quality, 'direct': False } if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] seen_urls = set() search_url = scraper_utils.urljoin(self.base_url, '/yabanci-diziler/') html = self._http_get(search_url, cache_limit=48) norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'category-post'}): match_url = dom_parser2.parse_dom(item, 'a', req='href') match_title = dom_parser2.parse_dom(item, 'h3') if match_url and match_title: match_url = scraper_utils.pathify_url( match_url[0].attrs['href']) match_title = match_title[0].content if match_url in seen_urls: continue seen_urls.add(match_url) if norm_title in scraper_utils.normalize_title(match_title): result = { 'url': match_url, 'title': scraper_utils.cleanse_title(match_title), 'year': '' } results.append(result) return results
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) for _attrs, row in dom_parser2.parse_dom(html, 'tr', {'id': 'linktr'}): redirect = dom_parser2.parse_dom(row, 'span', req='id') link = dom_parser2.parse_dom(row, 'a', req='href') if link and link[0].attrs['href'].startswith('http'): stream_url = link[0].attrs['href'] elif redirect: stream_url = redirect[0].attrs['id'] else: stream_url = '' if stream_url.startswith('http'): host = urlparse.urlparse(stream_url).hostname else: host = dom_parser2.parse_dom(row, 'h9') host = host[0].content if host else '' if stream_url and host: quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False} hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/bestmatch-fund-movies-%s.html') search_title = title.replace(' ', '-') search_title = re.sub('[^A-Za-z0-9-]', '', search_title).lower() search_url = search_url % (search_title) html = self._http_get(search_url, cache_limit=1) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'thumbsTitle'}): match = dom_parser2.parse_dom(item, 'a', req='href') if not match: continue match_url, match_title_year = match[0].attrs['href'], match[ 0].content match_title, match_year = scraper_utils.extra_year( match_title_year) if (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) sources = self.__get_posts(html) sources.update(self.__get_ajax(html, url)) sources.update(self.__get_embedded(html, url)) for source in sources: stream_url = source + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) host = scraper_utils.get_direct_hostname(self, source) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': sources[source], 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': 'Turkish subtitles' } hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] self.__get_token() if self.__token is None: return results search_url, u = self.__get_search_url() search_url = scraper_utils.urljoin(API_BASE_URL, search_url) timestamp = int(time.time() * 1000) s = self.__get_s() query = {'q': title, 'limit': '100', 'timestamp': timestamp, 'verifiedCheck': self.__token, 'set': s, 'rt': self.__get_rt(self.__token + s), 'sl': self.__get_sl(u)} headers = {'Referer': self.base_url} html = self._http_get(search_url, data=query, headers=headers, cache_limit=1) if video_type in [VIDEO_TYPES.TVSHOW, VIDEO_TYPES.EPISODE]: media_type = 'TV SHOW' else: media_type = 'MOVIE' for item in scraper_utils.parse_json(html, search_url): if not item['meta'].upper().startswith(media_type): continue match_year = str(item['year']) if 'year' in item and item['year'] else '' if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(item['title']), 'url': scraper_utils.pathify_url(item['permalink'].replace('/show/', '/tv-show/')), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] url = scraper_utils.urljoin(self.base_url, '/arsiv') html = self._http_get(url, cache_limit=48) norm_title = scraper_utils.normalize_title(title) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'ts-list-content'}) if not fragment: return results items = dom_parser2.parse_dom(fragment[0].content, 'h1', {'class': 'ts-list-name'}) details = dom_parser2.parse_dom(fragment[0].content, 'ul') for item, detail in zip(items, details): match = dom_parser2.parse_dom(item.content, 'a', req='href') match_year = re.search('<span>(\d{4})</span>', detail.content) if not match: continue match_url = match[0].attrs['href'] match_title = match[0].content match_year = match_year.group(1) if match_year else '' if norm_title in scraper_utils.normalize_title(match_title): result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) sources = self.__get_post_links(html) for source, value in sources.iteritems(): if scraper_utils.excluded_link(source): continue host = urlparse.urlparse(source).hostname if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(value['release']) else: meta = scraper_utils.parse_episode_link(value['release']) quality = scraper_utils.height_get_quality(meta['height']) hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': quality, 'direct': False } if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) return hosters