def search(self, video_type, title, year, season=''): results = [] search_url = scraper_utils.urljoin(self.base_url, '/movies/search') html = self._http_get(search_url, params={'s': title}, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'item_movie'}): match = dom_parser2.parse_dom(item, 'a', req=['href', 'title']) if not match: continue match_title_year = match[0].attrs['title'] match_url = match[0].attrs['href'] is_season = re.search('S(?:eason\s+)?(\d+)', match_title_year, re.I) match_vt = video_type == (VIDEO_TYPES.MOVIE and not is_season) or (video_type == VIDEO_TYPES.SEASON and is_season) match_year = '' if video_type == VIDEO_TYPES.SEASON: if not season and not match_vt: continue if match_vt: if season and int(is_season.group(1)) != int(season): continue else: if season and int(season) != 1: continue site_title, site_year = scraper_utils.extra_year(match_title_year) if scraper_utils.normalize_title(site_title) not in scraper_utils.normalize_title(title) or year != site_year: continue match_title = match_title_year else: if not match_vt: continue match_title, match_year = scraper_utils.extra_year(match_title_year) match_url = scraper_utils.urljoin(match_url, 'watching.html') if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] url = scraper_utils.urljoin(self.base_url, '/searchtest.php') data = {'searchapi': title} headers = {'Referer': self.base_url} html = self._http_get(url, data=data, headers=headers, cache_limit=2) if video_type == VIDEO_TYPES.MOVIE: query_type = 'watch-movie-' else: query_type = 'watch-tvshow-' norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(html, 'a', {'href': '#'}): match = re.search('href="(%s[^"]+)' % (query_type), item) if match: link = match.group(1) match_title = self.__make_title(link, query_type) match_year = '' if norm_title in scraper_utils.normalize_title( match_title) and (not year or not match_year or int(year) == int(match_year)): result = { 'url': scraper_utils.pathify_url(link), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] seen_urls = set() for page in ['/latest-added/', '/popular-today/', '/most-popular/']: url = scraper_utils.urljoin(self.base_url, page) html = self._http_get(url, cache_limit=24) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'home'}) if fragment: norm_title = scraper_utils.normalize_title(title) for attrs, match_title_year in dom_parser2.parse_dom( fragment[0].content, 'a', req='href'): match_url = attrs['href'] match_title, match_year = scraper_utils.extra_year( match_title_year) if norm_title in scraper_utils.normalize_title( match_title) and (not year or not match_year or year == match_year): match_url = scraper_utils.pathify_url(match_url) if match_url in seen_urls: continue seen_urls.add(match_url) result = { 'url': match_url, 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] if video_type == VIDEO_TYPES.TVSHOW and title: test_url = '/tv-show/%s/' % (scraper_utils.to_slug(title)) test_url = scraper_utils.urljoin(self.base_url, test_url) html = self._http_get(test_url, require_debrid=True, cache_limit=24) posts = dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}) if posts: result = {'url': scraper_utils.pathify_url(test_url), 'title': scraper_utils.cleanse_title(title), 'year': ''} results.append(result) elif video_type == VIDEO_TYPES.MOVIE: search_title = re.sub('[^A-Za-z0-9 ]', '', title.lower()) html = self._http_get(self.base_url, params={'s': search_title}, require_debrid=True, cache_limit=1) norm_title = scraper_utils.normalize_title(title) for _attrs, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}): match = re.search('<h\d+[^>]*>\s*<a\s+href="([^"]+)[^>]*>(.*?)</a>', post) if match: post_url, post_title = match.groups() if '/tv-show/' in post or self.__too_old(post): continue post_title = re.sub('<[^>]*>', '', post_title) meta = scraper_utils.parse_movie_link(post_title) full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height']) match_year = meta['year'] match_norm_title = scraper_utils.normalize_title(meta['title']) if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] xml_url = scraper_utils.urljoin(self.base_url, '/series.xml') xml = self._http_get(xml_url, cache_limit=24) if not xml: return results try: norm_title = scraper_utils.normalize_title(title) match_year = '' for element in ET.fromstring(xml).findall('.//dizi'): name = element.find('adi') if name is not None and norm_title in scraper_utils.normalize_title( name.text): url = element.find('url') if url is not None and (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(url.text), 'title': scraper_utils.cleanse_title(name.text), 'year': '' } results.append(result) except (ParseError, ExpatError) as e: logger.log('Dizilab Search Parse Error: %s' % (e), log_utils.LOGWARNING) return results
def _get_episode_url(self, show_url, video): url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=2) if html: force_title = scraper_utils.force_title(video) episodes = dom_parser2.parse_dom(html, 'div', {'class': 'el-item'}) if not force_title: episode_pattern = 'href="([^"]*-[sS]%02d[eE]%02d(?!\d)[^"]*)' % (int(video.season), int(video.episode)) match = re.search(episode_pattern, html) if match: return scraper_utils.pathify_url(match.group(1)) if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate: airdate_pattern = '%02d-%02d-%d' % (video.ep_airdate.day, video.ep_airdate.month, video.ep_airdate.year) for episode in episodes: episode = episode.content ep_url = dom_parser2.parse_dom(episode, 'a', req='href') ep_airdate = dom_parser2.parse_dom(episode, 'div', {'class': 'date'}) if ep_url and ep_airdate: ep_airdate = ep_airdate[0].content.strip() if airdate_pattern == ep_airdate: return scraper_utils.pathify_url(ep_url[0].attrs['href']) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title: norm_title = scraper_utils.normalize_title(video.ep_title) for episode in episodes: episode = episode.content ep_url = dom_parser2.parse_dom(episode, 'a', req='href') ep_title = dom_parser2.parse_dom(episode, 'div', {'class': 'e-name'}) if ep_url and ep_title and norm_title == scraper_utils.normalize_title(ep_title[0].content): return scraper_utils.pathify_url(ep_url[0].attrs['href'])
def _get_episode_url(self, show_url, video): force_title = scraper_utils.force_title(video) title_fallback = kodi.get_setting('title-fallback') == 'true' norm_title = scraper_utils.normalize_title(video.ep_title) page_url = [show_url] too_old = False while page_url and not too_old: url = scraper_utils.urljoin(self.base_url, page_url[0]) html = self._http_get(url, require_debrid=True, cache_limit=1) for _attrs, post in dom_parser2.parse_dom( html, 'div', {'id': re.compile('post-\d+')}): if self.__too_old(post): too_old = True break if show_url not in post: continue match = dom_parser2.parse_dom(post, 'a', req='href') if match: url, title = match[0].attrs['href'], match[0].content if not force_title: if scraper_utils.release_check(video, title, require_title=False): return scraper_utils.pathify_url(url) else: if title_fallback and norm_title: match = re.search('</strong>(.*?)</p>', post) if match and norm_title == scraper_utils.normalize_title( match.group(1)): return scraper_utils.pathify_url(url) page_url = dom_parser2.parse_dom(html, 'a', {'class': 'nextpostslink'}, req='href') if page_url: page_url = [page_url[0].attrs['href']]
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] if video_type == VIDEO_TYPES.MOVIE: url = '/api/v2/movies' key = 'movies' else: url = '/api/v2/shows' key = 'shows' url = scraper_utils.urljoin(self.base_url, url) js_data = self._http_get(url, cache_limit=8) norm_title = scraper_utils.normalize_title(title) if key in js_data: for item in js_data[key]: match_title = item['name'] match_year = item.get('year', '') match_url = '?id=%s' % (item['id']) if norm_title in scraper_utils.normalize_title( match_title) and (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable try: season = int(season) except: season = 0 results = self.__list(title) results = [] if not results: results = self.__search(title, season) filtered_results = [] norm_title = scraper_utils.normalize_title(title) for result in results: if norm_title in scraper_utils.normalize_title( result['title']) and (not season or season == int(result['season'])): result['title'] = '%s - Season %s [%s]' % ( result['title'], result['season'], result['q_str']) if Q_ORDER[result['quality']] <= self.max_qorder: filtered_results.append(result) filtered_results.sort(key=lambda x: Q_ORDER[x['quality']], reverse=True) return filtered_results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] if video_type == VIDEO_TYPES.MOVIE: url = scraper_utils.urljoin(self.base_url, '/movies/a-z/') else: url = scraper_utils.urljoin(self.base_url, '/tv/a-z/') if title.upper().startswith('THE '): search_title = title[4:5] elif title.upper().startswith('A '): search_title = title[2:3] else: search_title = title if title[:1] in string.digits: first_letter = '1' else: first_letter = search_title[:1] url = url + first_letter.upper() html = self._http_get(url, cache_limit=48) norm_title = scraper_utils.normalize_title(title) pattern = 'class=star.*?href=([^>]+)>(.*?)</a>' for match in re.finditer(pattern, html, re.DOTALL): match_url, match_title_year = match.groups() match_title, match_year = scraper_utils.extra_year(match_title_year) if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year): result = {'url': match_url, 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] url = scraper_utils.urljoin(self.base_url, '/arsiv') html = self._http_get(url, cache_limit=48) norm_title = scraper_utils.normalize_title(title) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'ts-list-content'}) if not fragment: return results items = dom_parser2.parse_dom(fragment[0].content, 'h1', {'class': 'ts-list-name'}) details = dom_parser2.parse_dom(fragment[0].content, 'ul') for item, detail in zip(items, details): match = dom_parser2.parse_dom(item.content, 'a', req='href') match_year = re.search('<span>(\d{4})</span>', detail.content) if not match: continue match_url = match[0].attrs['href'] match_title = match[0].content match_year = match_year.group(1) if match_year else '' if norm_title in scraper_utils.normalize_title(match_title): result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] seen_urls = set() search_url = scraper_utils.urljoin(self.base_url, '/ajax_submit.php') html = self._http_get(search_url, cache_limit=48) norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'category-post'}): match_url = dom_parser2.parse_dom(item, 'a', req='href') match_title = dom_parser2.parse_dom(item, 'h3') if match_url and match_title: match_url = scraper_utils.pathify_url( match_url[0].attrs['href']) match_title = match_title[0].content if match_url in seen_urls: continue seen_urls.add(match_url) if norm_title in scraper_utils.normalize_title(match_title): result = { 'url': match_url, 'title': scraper_utils.cleanse_title(match_title), 'year': '' } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/advanced-search/') headers = {'Referer': self.base_url} params = {'search_query': title, 'orderby': '', 'order': '', 'wpas': 1} html = self._http_get(search_url, params=params, headers=headers, cache_limit=8) norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'datos'}): match = dom_parser2.parse_dom(item, 'a', req='href') if not match: continue match_url = match[0].attrs['href'] is_tvshow = '/tvshows/' in match_url if is_tvshow and video_type == VIDEO_TYPES.MOVIE or not is_tvshow and video_type == VIDEO_TYPES.TVSHOW: continue match_title = match[0].content match_title, match_year = scraper_utils.extra_year(match_title) if scraper_utils.normalize_title(match_title) in norm_title and ( not year or not match_year or year == match_year): result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def _default_get_episode_url(self, html, video, episode_pattern, title_pattern='', airdate_pattern=''): logger.log('Default Episode Url: |%s|%s|' % (self.get_name(), video), log_utils.LOGDEBUG) if not html: return try: html = html[0].content except AttributeError: pass force_title = scraper_utils.force_title(video) if not force_title: if episode_pattern: match = re.search(episode_pattern, html, re.DOTALL | re.I) if match: return scraper_utils.pathify_url(match.group(1)) if kodi.get_setting( 'airdate-fallback' ) == 'true' and airdate_pattern and video.ep_airdate: airdate_pattern = airdate_pattern.replace( '{year}', str(video.ep_airdate.year)) airdate_pattern = airdate_pattern.replace( '{month}', str(video.ep_airdate.month)) airdate_pattern = airdate_pattern.replace( '{p_month}', '%02d' % (video.ep_airdate.month)) airdate_pattern = airdate_pattern.replace( '{month_name}', MONTHS[video.ep_airdate.month - 1]) airdate_pattern = airdate_pattern.replace( '{short_month}', SHORT_MONS[video.ep_airdate.month - 1]) airdate_pattern = airdate_pattern.replace( '{day}', str(video.ep_airdate.day)) airdate_pattern = airdate_pattern.replace( '{p_day}', '%02d' % (video.ep_airdate.day)) logger.log('Air Date Pattern: %s' % (airdate_pattern), log_utils.LOGDEBUG) match = re.search(airdate_pattern, html, re.DOTALL | re.I) if match: return scraper_utils.pathify_url(match.group(1)) else: logger.log( 'Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title and title_pattern: norm_title = scraper_utils.normalize_title(video.ep_title) for match in re.finditer(title_pattern, html, re.DOTALL | re.I): episode = match.groupdict() if norm_title == scraper_utils.normalize_title( episode['title']): return scraper_utils.pathify_url(episode['url'])
def search(self, video_type, title, year, season=''): results = [] search_url = scraper_utils.urljoin(self.base_url, '/search/') headers = {'Accept-Language': 'en-US,en;q=0.5'} html = self._http_get(search_url, params={'q': title}, headers=headers, cache_limit=8) norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'ml-item'}): match_title = dom_parser2.parse_dom(item, 'span', {'class': 'mli-info'}) match_url = dom_parser2.parse_dom(item, 'a', req='href') year_frag = dom_parser2.parse_dom(item, 'img', req='alt') is_episodes = dom_parser2.parse_dom(item, 'span', {'class': 'mli-eps'}) if (video_type == VIDEO_TYPES.MOVIE and not is_episodes) or (video_type == VIDEO_TYPES.SEASON and is_episodes): if match_title and match_url: match_url = match_url[0].attrs['href'] match_title = match_title[0].content match_title = re.sub('</?h2>', '', match_title) match_title = re.sub('\s+\d{4}$', '', match_title) if video_type == VIDEO_TYPES.SEASON: if season and not re.search( 'Season\s+0*%s$' % (season), match_title): continue if not match_url.endswith('/'): match_url += '/' match_url = scraper_utils.urljoin(match_url, 'watch/') match_year = '' if video_type == VIDEO_TYPES.MOVIE and year_frag: match = re.search('\s*-\s*(\d{4})$', year_frag[0].attrs['alt']) if match: match_year = match.group(1) match_norm_title = scraper_utils.normalize_title( match_title) title_match = (norm_title in match_norm_title) or (match_norm_title in norm_title) if title_match and (not year or not match_year or year == match_year): result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = scraper_utils.urljoin( self.base_url, '/wp-content/plugins/ajax-search-pro/ajax_search.php') data = { 'action': 'ajaxsearchpro_search', 'aspp': title, 'asid': '1', 'asp_inst_id': '1_1', 'options': 'current_page_id=7&qtranslate_lang=0&set_intitle=None&customset%5B%5D=post' } html = self._http_get(search_url, data=data, headers=XHR, cache_limit=8) norm_title = scraper_utils.normalize_title(title) for attrs, match_title_year in dom_parser2.parse_dom( html, 'a', {'class': 'asp_res_url'}): match_url = attrs['href'] match_title_year = re.sub('</?[^>]*>', '', match_title_year) is_season = re.search('Season\s+(\d+)\s*', match_title_year, re.I) if (not is_season and video_type == VIDEO_TYPES.MOVIE) or ( is_season and video_type == VIDEO_TYPES.SEASON): match_year = '' if video_type == VIDEO_TYPES.SEASON: if season and int(is_season.group(1)) != int(season): continue match_title = match_title_year match_title = re.sub('\s*\d{4}', '', match_title) else: match_title, match_year = scraper_utils.extra_year( match_title_year) match_norm_title = scraper_utils.normalize_title(match_title) title_match = (norm_title in match_norm_title) or (match_norm_title in norm_title) if title_match and (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] if video_type == VIDEO_TYPES.MOVIE: results = self.__movie_search(title, year) else: norm_title = scraper_utils.normalize_title(title) html = self._http_get(self.base_url, cache_limit=48) for item in scraper_utils.parse_directory(self, html): if norm_title in scraper_utils.normalize_title(item['title']) and item['directory']: result = {'url': scraper_utils.pathify_url(item['link']), 'title': scraper_utils.cleanse_title(item['title']), 'year': ''} results.append(result) return results
def __episode_match(self, video, label): episode_pattern = 'Episode\s+0*%s(?!\d)' % (video.episode) if re.search(episode_pattern, label, re.I): return True if video.ep_title: match = re.search('Episode\s+\d+: (.*)', label) if match: label = match.group(1) if scraper_utils.normalize_title( video.ep_title) in scraper_utils.normalize_title(label): return True return False
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] html = self._http_get(self.base_url, cache_limit=8) norm_title = scraper_utils.normalize_title(title) pattern = 'li><a\s+href="([^"]+)">([^<]+)' for match in re.finditer(pattern, html): url, match_title = match.groups() if norm_title in scraper_utils.normalize_title(match_title): match_title = match_title.replace(' – ', '') result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = '/search/' + urllib.quote_plus(title) html = self._http_get(search_url, require_debrid=True, cache_limit=1) if video_type == VIDEO_TYPES.TVSHOW: seen_urls = {} for _attr, post in dom_parser2.parse_dom( html, 'div', {'id': re.compile('post-\d+')}): if CATEGORIES[video_type] not in post: continue match = re.search( '<span>\s*TAGS:\s*</span>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', post, re.I) if match: show_url, match_title = match.groups() if show_url in seen_urls: continue result = { 'url': scraper_utils.pathify_url(show_url), 'title': scraper_utils.cleanse_title(match_title), 'year': '' } seen_urls[show_url] = result results.append(result) elif video_type == VIDEO_TYPES.MOVIE: norm_title = scraper_utils.normalize_title(title) headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html) posts = [ result.content for result in dom_parser2.parse_dom( html, 'div', {'id': re.compile('post-\d+')}) ] for heading, post in zip(headings, posts): if CATEGORIES[video_type] not in post or self.__too_old(post): continue post_url, post_title = heading meta = scraper_utils.parse_movie_link(post_title) full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height']) match_year = meta['year'] match_norm_title = scraper_utils.normalize_title(meta['title']) if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year } results.append(result) return results
def __movie_search(self, title, year): results = [] norm_title = scraper_utils.normalize_title(title) html = self._http_get(self.base_url, cache_limit=48) for item in scraper_utils.parse_directory(self, html): if not item['directory']: meta = scraper_utils.parse_movie_link(item['title']) if meta['dubbed']: continue if (norm_title in scraper_utils.normalize_title(meta['title'])) and (not year or not meta['year'] or year == meta['year']): match_title = meta['title'].replace('.', ' ') match_title += ' [%sp.%s]' % (meta['height'], meta['extra']) result = {'url': scraper_utils.pathify_url(item['link']), 'title': scraper_utils.cleanse_title(match_title), 'year': meta['year']} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] norm_title = scraper_utils.normalize_title(title) for movie in self.__get_movies(): match_year = movie['year'] if norm_title in scraper_utils.normalize_title( movie['title']) and (not year or not match_year or year == match_year): result = { 'title': scraper_utils.cleanse_title(movie['title']), 'year': match_year, 'url': scraper_utils.pathify_url(movie['url']) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] url = scraper_utils.urljoin(self.base_url, AJAX_URL) data = {'type': 'getDizi'} headers = {'Referer': scraper_utils.urljoin(self.base_url, '/arsiv')} headers.update(XHR) html = self._http_get(url, data=data, headers=headers, cache_limit=48) norm_title = scraper_utils.normalize_title(title) match_year = '' js_data = scraper_utils.parse_json(html, url) for item in js_data.get('data', []): match_title = item.get('adi', '') if 'url' in item and norm_title in scraper_utils.normalize_title(match_title): result = {'url': scraper_utils.pathify_url(item['url']), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] html = self._http_get(self.base_url, cache_limit=48) fragment = dom_parser2.parse_dom(html, 'div', {'id': 'fil'}) if not fragment: return results norm_title = scraper_utils.normalize_title(title) for match in re.finditer('href="([^"]+)"\s+title="([^"]+)', fragment[0].content): url, match_title = match.groups() if norm_title in scraper_utils.normalize_title(match_title): result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': '' } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] url = scraper_utils.urljoin(self.base_url, '/index') html = self._http_get(url, cache_limit=24) for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'ddmcc'}): norm_title = scraper_utils.normalize_title(title) for attrs, match_title in dom_parser2.parse_dom(fragment, 'a', req='href'): if norm_title in scraper_utils.normalize_title(match_title): result = { 'url': scraper_utils.pathify_url(attrs['href']), 'title': scraper_utils.cleanse_title(match_title), 'year': '' } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable show_list_url = scraper_utils.urljoin(self.base_url, '/tv-lists/') html = self._http_get(show_list_url, cache_limit=8) results = [] seen_urls = set() norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(html, 'li'): match = dom_parser2.parse_dom(item, 'a', req='href') if match: match_url = scraper_utils.pathify_url(match[0].attrs['href']) match_title = match[0].content if match_url in seen_urls: continue seen_urls.add(match_url) match_title = re.sub('</?strong[^>]*>', '', match_title) if norm_title in scraper_utils.normalize_title(match_title): result = {'url': match_url, 'title': scraper_utils.cleanse_title(match_title), 'year': ''} results.append(result) return results
def _get_episode_url(self, show_url, video): url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=8) pattern = "<a[^>]*class='dropdown-toggle'[^>]*>Season\s+%s<(.*?)<li\s+class='divider'>" % ( video.season) match = re.search(pattern, html, re.DOTALL) if not match: return fragment = match.group(1) episodes = dom_parser2.parse_dom(fragment, 'a', {'id': 'epiloader'}, req='class') airdates = dom_parser2.parse_dom(fragment, 'span', {'class': 'airdate'}) ep_airdate = video.ep_airdate.strftime('%Y-%m-%d') if isinstance( video.ep_airdate, datetime.date) else '' norm_title = scraper_utils.normalize_title(video.ep_title) num_id, airdate_id, title_id = '', '', '' for episode, airdate in zip(episodes, airdates): ep_id = episode.attrs['class'] episode = episode.content if ep_airdate and ep_airdate == airdate: airdate_id = ep_id match = re.search('(?:<span[^>]*>)?(\d+)\.\s*([^<]+)', episode) if match: ep_num, ep_title = match.groups() if int(ep_num) == int(video.episode): num_id = ep_id if norm_title and norm_title in scraper_utils.normalize_title( ep_title): title_id = ep_id best_id = '' if not scraper_utils.force_title(video): if num_id: best_id = num_id if kodi.get_setting('airdate-fallback') == 'true' and airdate_id: best_id = airdate_id if kodi.get_setting('title-fallback') == 'true' and title_id: best_id = title_id else: if title_id: best_id = title_id if best_id: return EP_URL % (best_id)
def search(self, video_type, title, year, season=''): results = [] norm_title = scraper_utils.normalize_title(title) for item in self.__get_torrents(): if title or year or season: is_season = re.search( '(.*?{delim}season{delim}+(\d+)){delim}?(.*)'.format( delim=DELIM), item['name'], re.I) if (not is_season and video_type == VIDEO_TYPES.SEASON) or ( is_season and video_type == VIDEO_TYPES.MOVIE): continue if re.search('{delim}S\d+E\d+{delim}'.format(delim=DELIM), item['name'], re.I): continue # skip episodes if video_type == VIDEO_TYPES.SEASON: match_title, match_season, extra = is_season.groups() if season and int(match_season) != int(season): continue match_year = '' match_title = re.sub(DELIM, ' ', match_title) else: match = re.search('(.*?)\(?(\d{4})\)?(.*)', item['name']) if match: match_title, match_year, extra = match.groups() else: match_title, match_year, extra = item['name'], '', '' else: match_title, match_year, extra = item['name'], '', '' match_title = match_title.strip() extra = extra.strip() if norm_title in scraper_utils.normalize_title(match_title) and ( not year or not match_year or year == match_year): result_title = match_title if extra: result_title += ' [%s]' % (extra) result = { 'title': result_title, 'year': match_year, 'url': 'hash=%s' % (item['hash']) } results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = scraper_utils.urljoin( self.base_url, '/search/%s.html' % (urllib.quote_plus(title))) html = self._http_get(search_url, cache_limit=1) fragment = dom_parser2.parse_dom(html, 'ul', {'class': 'cfv'}) if not fragment: return results norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'li'): is_season = dom_parser2.parse_dom(item, 'div', {'class': 'status'}) if (not is_season and video_type == VIDEO_TYPES.MOVIE) or ( is_season and video_type == VIDEO_TYPES.SEASON): match = dom_parser2.parse_dom(item, 'a', req=['href', 'title']) if not match: continue match_title = match[0].attrs['title'] match_url = match[0].attrs['href'] match_year = '' if video_type == VIDEO_TYPES.SEASON: if season and not re.search('Season\s+%s$' % (season), match_title, re.I): continue else: match = re.search('-(\d{4})[-.]', match_url) if match: match_year = match.group(1) match_norm_title = scraper_utils.normalize_title(match_title) title_match = (norm_title in match_norm_title) or (match_norm_title in norm_title) if title_match and (not year or not match_year or year == match_year): result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] page_url = scraper_utils.urljoin(self.base_url, '/movies/search.php') html = self._http_get(page_url, params={'dayq': title}, cache_limit=48) html = re.sub('<!--.*?-->', '', html) norm_title = scraper_utils.normalize_title(title) for _attrs, td in dom_parser2.parse_dom(html, 'td', {'class': 'topic_content'}): match_url = dom_parser2.parse_dom(td, 'a', req='href') match_title_year = dom_parser2.parse_dom(td, 'img', req='alt') if not match_url or not match_title_year: continue match_url = match_url[0].attrs['href'] match_title_year = match_title_year[0].attrs['alt'] if not match_url.startswith('/'): match_url = '/movies/' + match_url match_title, match_year = scraper_utils.extra_year(match_title_year) if (norm_title in scraper_utils.normalize_title(match_title)) and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results