def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] if title: first_letter = title[:1].lower() if first_letter.isdigit(): first_letter = '0-9' search_url = '/search.php/%s/' % (first_letter) search_url = urlparse.urljoin(self.base_url, search_url) html = self._http_get(search_url, cache_limit=24) fragment = dom_parser.parse_dom(html, 'div', {'class': 'home'}) if fragment: norm_title = scraper_utils.normalize_title(title) for match in re.finditer('''href=["']([^'"]+)[^>]+>([^<]+)''', fragment[0]): url, match_title_year = match.groups() match_title, match_year = scraper_utils.extra_year( match_title_year) if norm_title in scraper_utils.normalize_title( match_title) and (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] norm_title = scraper_utils.normalize_title(title) for item in self.__get_torrents(): if title or year or season: is_season = re.search('(.*?{delim}season{delim}+(\d+)){delim}?(.*)'.format(delim=DELIM), item['name'], re.I) if (not is_season and video_type == VIDEO_TYPES.SEASON) or (is_season and video_type == VIDEO_TYPES.MOVIE): continue if re.search('{delim}S\d+E\d+{delim}'.format(delim=DELIM), item['name'], re.I): continue # skip episodes if video_type == VIDEO_TYPES.SEASON: match_title, match_season, extra = is_season.groups() if season and int(match_season) != int(season): continue match_year = '' match_title = re.sub(DELIM, ' ', match_title) else: match = re.search('(.*?)\(?(\d{4})\)?(.*)', item['name']) if match: match_title, match_year, extra = match.groups() else: match_title, match_year, extra = item['name'], '', '' else: match_title, match_year, extra = item['name'], '', '' match_title = match_title.strip() extra = extra.strip() if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year): result_title = match_title if extra: result_title += ' [%s]' % (extra) result = {'title': result_title, 'year': match_year, 'url': 'hash=%s' % (item['hash'])} results.append(result) return results
def _get_episode_url(self, show_url, video): url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=8) pattern = "<a[^>]*class='dropdown-toggle'[^>]*>Season\s+%s<(.*?)<li\s+class='divider'>" % (video.season) match = re.search(pattern, html, re.DOTALL) if not match: return fragment = match.group(1) episodes = dom_parser2.parse_dom(fragment, 'a', {'id': 'epiloader'}, req='class') airdates = dom_parser2.parse_dom(fragment, 'span', {'class': 'airdate'}) ep_airdate = video.ep_airdate.strftime('%Y-%m-%d') if isinstance(video.ep_airdate, datetime.date) else '' norm_title = scraper_utils.normalize_title(video.ep_title) num_id, airdate_id, title_id = '', '', '' for episode, airdate in zip(episodes, airdates): ep_id = episode.attrs['class'] episode = episode.content if ep_airdate and ep_airdate == airdate: airdate_id = ep_id match = re.search('(?:<span[^>]*>)?(\d+)\.\s*([^<]+)', episode) if match: ep_num, ep_title = match.groups() if int(ep_num) == int(video.episode): num_id = ep_id if norm_title and norm_title in scraper_utils.normalize_title(ep_title): title_id = ep_id best_id = '' if not scraper_utils.force_title(video): if num_id: best_id = num_id if kodi.get_setting('airdate-fallback') == 'true' and airdate_id: best_id = airdate_id if kodi.get_setting('title-fallback') == 'true' and title_id: best_id = title_id else: if title_id: best_id = title_id if best_id: return EP_URL % (best_id)
def _get_episode_url(self, show_url, video): url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=2) if html: force_title = scraper_utils.force_title(video) episodes = dom_parser2.parse_dom(html, 'div', {'class': 'el-item'}) if not force_title: episode_pattern = 'href="([^"]*-[sS]%02d[eE]%02d(?!\d)[^"]*)' % (int(video.season), int(video.episode)) match = re.search(episode_pattern, html) if match: return scraper_utils.pathify_url(match.group(1)) if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate: airdate_pattern = '%02d-%02d-%d' % (video.ep_airdate.day, video.ep_airdate.month, video.ep_airdate.year) for episode in episodes: episode = episode.content ep_url = dom_parser2.parse_dom(episode, 'a', req='href') ep_airdate = dom_parser2.parse_dom(episode, 'div', {'class': 'date'}) if ep_url and ep_airdate: ep_airdate = ep_airdate[0].content.strip() if airdate_pattern == ep_airdate: return scraper_utils.pathify_url(ep_url[0].attrs['href']) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title: norm_title = scraper_utils.normalize_title(video.ep_title) for episode in episodes: episode = episode.content ep_url = dom_parser2.parse_dom(episode, 'a', req='href') ep_title = dom_parser2.parse_dom(episode, 'div', {'class': 'e-name'}) if ep_url and ep_title and norm_title == scraper_utils.normalize_title(ep_title[0].content): return scraper_utils.pathify_url(ep_url[0].attrs['href'])
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] xml_url = scraper_utils.urljoin(self.base_url, '/series.xml') xml = self._http_get(xml_url, cache_limit=24) if not xml: return results try: norm_title = scraper_utils.normalize_title(title) match_year = '' for element in ET.fromstring(xml).findall('.//dizi'): name = element.find('adi') if name is not None and norm_title in scraper_utils.normalize_title( name.text): url = element.find('url') if url is not None and (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(url.text), 'title': scraper_utils.cleanse_title(name.text), 'year': '' } results.append(result) except (ParseError, ExpatError) as e: logger.log('Dizilab Search Parse Error: %s' % (e), log_utils.LOGWARNING) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] seen_urls = set() search_url = scraper_utils.urljoin(self.base_url, '/search/') html = self._http_get(search_url, cache_limit=48) norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'category-post'}): match_url = dom_parser2.parse_dom(item, 'a', req='href') match_title = dom_parser2.parse_dom(item, 'h3') if match_url and match_title: match_url = scraper_utils.pathify_url( match_url[0].attrs['href']) match_title = match_title[0].content if match_url in seen_urls: continue seen_urls.add(match_url) if norm_title in scraper_utils.normalize_title(match_title): result = { 'url': match_url, 'title': scraper_utils.cleanse_title(match_title), 'year': '' } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_title = re.sub('[^A-Za-z0-9. ]', '', title) url = '/search/%s/' % (urllib.quote(search_title)) url = scraper_utils.urljoin(self.base_url, url) html = self._http_get(url, cache_limit=48) norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(html, 'article', {'class': 'movie-details'}): match_url = dom_parser2.parse_dom(item, 'a', req='href') match_title = dom_parser2.parse_dom(item, 'h2', {'class': 'movie-title'}) match_year = dom_parser2.parse_dom(item, 'div', {'class': 'movie-year'}) if match_url and match_title: match_url = match_url[0].attrs['href'] match_title = match_title[0].content match_year = match_year[0].content if match_year else '' if norm_title in scraper_utils.normalize_title( match_title) and (not match_year or not year or year == match_year): result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def _get_episode_url(self, show_url, video): force_title = scraper_utils.force_title(video) title_fallback = kodi.get_setting('title-fallback') == 'true' norm_title = scraper_utils.normalize_title(video.ep_title) page_url = [show_url] too_old = False while page_url and not too_old: url = scraper_utils.urljoin(self.base_url, page_url[0]) html = self._http_get(url, require_debrid=True, cache_limit=1) for _attrs, post in dom_parser2.parse_dom( html, 'div', {'id': re.compile('post-\d+')}): if self.__too_old(post): too_old = True break if show_url not in post: continue match = dom_parser2.parse_dom(post, 'a', req='href') if match: url, title = match[0].attrs['href'], match[0].content if not force_title: if scraper_utils.release_check(video, title, require_title=False): return scraper_utils.pathify_url(url) else: if title_fallback and norm_title: match = re.search('</strong>(.*?)</p>', post) if match and norm_title == scraper_utils.normalize_title( match.group(1)): return scraper_utils.pathify_url(url) page_url = dom_parser2.parse_dom(html, 'a', {'class': 'nextpostslink'}, req='href') if page_url: page_url = [page_url[0].attrs['href']]
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] html = self._http_get(self.base_url, params={'s': title}, cache_limit=1) if re.search('Sorry, but nothing matched', html, re.I): return results norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(html, 'li', {'class': 'box-shadow'}): for attrs, _content in dom_parser2.parse_dom(item, 'a', req=['href', 'title']): match_url, match_title_year = attrs['href'], attrs['title'] if re.search('S\d{2}E\d{2}', match_title_year): continue if re.search('TV\s*SERIES', match_title_year, re.I): continue match_title, match_year = scraper_utils.extra_year( match_title_year) if ( not year or not match_year or year == match_year ) and norm_title in scraper_utils.normalize_title(match_title): result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def _default_get_episode_url(self, html, video, episode_pattern, title_pattern='', airdate_pattern=''): logger.log('Default Episode Url: |%s|%s|' % (self.get_name(), video), log_utils.LOGDEBUG) if not html: return try: html = html[0].content except AttributeError: pass force_title = scraper_utils.force_title(video) if not force_title: if episode_pattern: match = re.search(episode_pattern, html, re.DOTALL | re.I) if match: return scraper_utils.pathify_url(match.group(1)) if kodi.get_setting( 'airdate-fallback' ) == 'true' and airdate_pattern and video.ep_airdate: airdate_pattern = airdate_pattern.replace( '{year}', str(video.ep_airdate.year)) airdate_pattern = airdate_pattern.replace( '{month}', str(video.ep_airdate.month)) airdate_pattern = airdate_pattern.replace( '{p_month}', '%02d' % (video.ep_airdate.month)) airdate_pattern = airdate_pattern.replace( '{month_name}', MONTHS[video.ep_airdate.month - 1]) airdate_pattern = airdate_pattern.replace( '{short_month}', SHORT_MONS[video.ep_airdate.month - 1]) airdate_pattern = airdate_pattern.replace( '{day}', str(video.ep_airdate.day)) airdate_pattern = airdate_pattern.replace( '{p_day}', '%02d' % (video.ep_airdate.day)) logger.log('Air Date Pattern: %s' % (airdate_pattern), log_utils.LOGDEBUG) match = re.search(airdate_pattern, html, re.DOTALL | re.I) if match: return scraper_utils.pathify_url(match.group(1)) else: logger.log( 'Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title and title_pattern: norm_title = scraper_utils.normalize_title(video.ep_title) for match in re.finditer(title_pattern, html, re.DOTALL | re.I): episode = match.groupdict() if norm_title == scraper_utils.normalize_title( episode['title']): return scraper_utils.pathify_url(episode['url'])
def search(self, video_type, title, year, season=''): html = self._http_get(self.base_url, cache_limit=8) results = [] norm_title = scraper_utils.normalize_title(title) pattern = 'class="[^"]*cat-item.*?href="([^"]+)[^>]+>([^<]+)' for match in re.finditer(pattern, html): url, match_title = match.groups() if norm_title in scraper_utils.normalize_title(match_title): result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] url = scraper_utils.urljoin(self.base_url, '/index') html = self._http_get(url, cache_limit=24) for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'ddmcc'}): norm_title = scraper_utils.normalize_title(title) for attrs, match_title in dom_parser2.parse_dom(fragment, 'a', req='href'): if norm_title in scraper_utils.normalize_title(match_title): result = {'url': scraper_utils.pathify_url(attrs['href']), 'title': scraper_utils.cleanse_title(match_title), 'year': ''} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] if video_type == VIDEO_TYPES.TVSHOW and title: test_url = '/category/tv-shows/' % (scraper_utils.to_slug(title)) test_url = scraper_utils.urljoin(self.base_url, test_url) html = self._http_get(test_url, require_debrid=True, cache_limit=24) posts = dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}) if posts: result = { 'url': scraper_utils.pathify_url(test_url), 'title': scraper_utils.cleanse_title(title), 'year': '' } results.append(result) elif video_type == VIDEO_TYPES.MOVIE: search_title = re.sub( '[/forum/7-1080p-720p-high-definition-movies/]', '', title.lower()) html = self._http_get(self.base_url, params={'s': search_title}, require_debrid=True, cache_limit=1) norm_title = scraper_utils.normalize_title(title) for _attrs, post in dom_parser2.parse_dom( html, 'div', {'id': re.compile('post-\d+')}): match = re.search( '<h\d+[^>]*>\s*<a\s+href="([^"]+)[^>]*>(.*?)</a>', post) if match: post_url, post_title = match.groups() if '/tv-show/' in post or self.__too_old(post): continue post_title = re.sub('<[^>]*>', '', post_title) meta = scraper_utils.parse_movie_link(post_title) full_title = '%s [%s] (%sp)' % ( meta['title'], meta['extra'], meta['height']) match_year = meta['year'] match_norm_title = scraper_utils.normalize_title( meta['title']) if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year } results.append(result) return results
def __episode_match(self, video, label): episode_pattern = 'Episode\s+0*%s(?!\d)' % (video.episode) if re.search(episode_pattern, label, re.I): return True if video.ep_title: match = re.search('Episode\s+\d+: (.*)', label) if match: label = match.group(1) if scraper_utils.normalize_title( video.ep_title) in scraper_utils.normalize_title(label): return True return False
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = '/search/' + urllib.quote_plus(title) html = self._http_get(search_url, require_debrid=False, cache_limit=1) if video_type == VIDEO_TYPES.TVSHOW: seen_urls = {} for _attr, post in dom_parser2.parse_dom( html, 'div', {'id': re.compile('post-\d+')}): if CATEGORIES[video_type] not in post: continue match = re.search( '<span>\s*TAGS:\s*</span>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', post, re.I) if match: show_url, match_title = match.groups() if show_url in seen_urls: continue result = { 'url': scraper_utils.pathify_url(show_url), 'title': scraper_utils.cleanse_title(match_title), 'year': '' } seen_urls[show_url] = result results.append(result) elif video_type == VIDEO_TYPES.MOVIE: norm_title = scraper_utils.normalize_title(title) headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html) posts = [ result.content for result in dom_parser2.parse_dom( html, 'div', {'id': re.compile('post-\d+')}) ] for heading, post in zip(headings, posts): if CATEGORIES[video_type] not in post or self.__too_old(post): continue post_url, post_title = heading meta = scraper_utils.parse_movie_link(post_title) full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height']) match_year = meta['year'] match_norm_title = scraper_utils.normalize_title(meta['title']) if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] norm_title = scraper_utils.normalize_title(title) url = scraper_utils.urljoin(self.base_url, '/search/') headers = {'Referer': self.base_url} html = self._http_get(url, headers=headers, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'li'): for attrs, _content in dom_parser2.parse_dom(item, 'a', req=['title', 'href']): match_title, match_url = attrs['title'], attrs['href'] if norm_title in scraper_utils.normalize_title(match_title): result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] html = self._http_get(self.base_url, cache_limit=48) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'dizis'}) if not fragment: return results norm_title = scraper_utils.normalize_title(title) for attrs, match_title in dom_parser2.parse_dom(fragment[0].content, 'a', req='href'): match_url = attrs['href'] match_title = re.sub('<div[^>]*>.*?</div>', '', match_title) if norm_title in scraper_utils.normalize_title(match_title): result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] url = scraper_utils.urljoin(self.base_url, AJAX_URL) data = {'type': 'getDizi'} headers = {'Referer': scraper_utils.urljoin(self.base_url, '/arsiv')} headers.update(XHR) html = self._http_get(url, data=data, headers=headers, cache_limit=48) norm_title = scraper_utils.normalize_title(title) match_year = '' js_data = scraper_utils.parse_json(html, url) for item in js_data.get('data', []): match_title = item.get('adi', '') if 'url' in item and norm_title in scraper_utils.normalize_title(match_title): result = {'url': scraper_utils.pathify_url(item['url']), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = scraper_utils.urljoin(self.base_url, '/search/%s.html') search_url = search_url % (urllib.quote_plus(title)) html = self._http_get(search_url, cache_limit=1) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'movie'}) if not fragment: return results norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'li'): match_url = dom_parser2.parse_dom(item, 'a', req='href') match_title = dom_parser2.parse_dom(item, 'span', {'class': 'text'}) match_year = dom_parser2.parse_dom(item, 'span', {'class': 'year'}) if not match_url or not match_title: continue match_url = match_url[0].attrs['href'] match_title = re.sub('</?strong>', '', match_title[0].content) is_season = re.search('Season\s+(\d+)$', match_title, re.I) if (not is_season and video_type == VIDEO_TYPES.MOVIE) or ( is_season and video_type == VIDEO_TYPES.SEASON): if video_type == VIDEO_TYPES.MOVIE: if match_year: match_year = match_year[0].content else: match_year = '' else: if season and int(is_season.group(1)) != int(season): continue match_year = '' match_norm_title = scraper_utils.normalize_title(match_title) title_match = (norm_title in match_norm_title) or (match_norm_title in norm_title) if title_match and (not year or not match_year or year == match_year): result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def _get_episode_url(self, show_url, video): url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=2) episode_pattern = 'href="([^"]+-s0*%se0*%s(?!\d)[^"]*)' % ( video.season, video.episode) parts = dom_parser2.parse_dom(html, 'ul', {'class': 'episode_list'}) fragment = '\n'.join(part.content for part in parts) result = self._default_get_episode_url(fragment, video, episode_pattern) if result: return result ep_urls = [ r.attrs['href'] for r in dom_parser2.parse_dom(fragment, 'a', req='href') ] ep_dates = [ r.content for r in dom_parser2.parse_dom( fragment, 'span', {'class': 'episode_air_d'}) ] ep_titles = [ r.content for r in dom_parser2.parse_dom(fragment, 'span', {'class': 'episode_name'}) ] force_title = scraper_utils.force_title(video) if not force_title and kodi.get_setting( 'airdate-fallback') == 'true' and video.ep_airdate: for ep_url, ep_date in zip(ep_urls, ep_dates): logger.log( 'Quikr Ep Airdate Matching: %s - %s - %s' % (ep_url, ep_date, video.ep_airdate), log_utils.LOGDEBUG) if video.ep_airdate == scraper_utils.to_datetime( ep_date, '%Y-%m-%d').date(): return scraper_utils.pathify_url(ep_url) if force_title or kodi.get_setting('title-fallback') == 'true': norm_title = scraper_utils.normalize_title(video.ep_title) for ep_url, ep_title in zip(ep_urls, ep_titles): ep_title = re.sub('<span>.*?</span>\s*', '', ep_title) logger.log( 'Quikr Ep Title Matching: %s - %s - %s' % (ep_url.encode('utf-8'), ep_title.encode('utf-8'), video.ep_title), log_utils.LOGDEBUG) if norm_title == scraper_utils.normalize_title(ep_title): return scraper_utils.pathify_url(ep_url)
def __search(self, video_type, title, year, season=''): results = [] search_url = (SEARCH_URL) % (urllib.quote_plus(title)) html = self._http_get(search_url, cache_limit=1) js_data = scraper_utils.parse_json(html) norm_title = scraper_utils.normalize_title(title) for item in js_data.get('results', []): if '/watch/' not in item['url'].lower(): continue is_season = re.search('Season\s+(\d+)', item['titleNoFormatting'], re.IGNORECASE) if (not is_season and video_type == VIDEO_TYPES.MOVIE) or ( is_season and video_type == VIDEO_TYPES.SEASON): match_title_year = item['titleNoFormatting'] match_title_year = re.sub('^Watch\s+', '', match_title_year) match_url = item['url'] match_year = '' if video_type == VIDEO_TYPES.MOVIE: match = re.search('(.*?)(?:\s+\(?(\d{4})\)?)', match_title_year) if match: match_title, match_year = match.groups() else: match_title = match_title_year else: if season and int(is_season.group(1)) != int(season): continue match = re.search('(.*?)\s+\(\d{4}\)', match_title_year) if match: match_title = match.group(1) else: match_title = match_title_year if norm_title in scraper_utils.normalize_title( match_title) and (not year or not match_year or year == match_year): result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable try: season = int(season) except: season = 0 results = self.__list(title) results = [] if not results: results = self.__search(title, season) filtered_results = [] norm_title = scraper_utils.normalize_title(title) for result in results: if norm_title in scraper_utils.normalize_title(result['title']) and (not season or season == int(result['season'])): result['title'] = '%s - Season %s [%s]' % (result['title'], result['season'], result['q_str']) if Q_ORDER[result['quality']] <= self.max_qorder: filtered_results.append(result) filtered_results.sort(key=lambda x: Q_ORDER[x['quality']], reverse=True) return filtered_results
def __alt_search(self, video_type, title, year, season=''): results = [] params = title.lower() if year: params += ' %s' % (year) if video_type == VIDEO_TYPES.SEASON and season: params += ' Season %s' % (season) params = {'key': params} search_url = urlparse.urljoin(self.base_url, '/search') html = self._http_get(search_url, params=params, cache_limit=1) norm_title = scraper_utils.normalize_title(title) for item in dom_parser.parse_dom(html, 'div', {'class': 'caption'}): match = re.search('href="([^"]+)[^>]+>(.*?)<span[^>]*>', item) if match: match_url, match_title = match.groups() is_season = re.search('-season-\d+', match_url) if (video_type == VIDEO_TYPES.MOVIE and not is_season) or (video_type == VIDEO_TYPES.SEASON and is_season): if video_type == VIDEO_TYPES.SEASON: if season and not re.search('season-0*%s$' % (season), match_url): continue match_title = re.sub('</?[^>]*>', '', match_title) match_title = re.sub('\s+Full\s+Movie', '', match_title) match = re.search('-(\d{4})(?:$|-)', match_url) if match: match_year = match.group(1) else: match_year = '' if norm_title in scraper_utils.normalize_title( match_title) and (not year or not match_year or year == match_year): result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = scraper_utils.urljoin( self.base_url, '/search/%s.html' % (urllib.quote_plus(title))) html = self._http_get(search_url, cache_limit=1) fragment = dom_parser2.parse_dom(html, 'ul', {'class': 'cfv'}) if not fragment: return results norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'li'): is_season = dom_parser2.parse_dom(item, 'div', {'class': 'status'}) if (not is_season and video_type == VIDEO_TYPES.MOVIE) or ( is_season and video_type == VIDEO_TYPES.SEASON): match = dom_parser2.parse_dom(item, 'a', req=['href', 'title']) if not match: continue match_title = match[0].attrs['title'] match_url = match[0].attrs['href'] match_year = '' if video_type == VIDEO_TYPES.SEASON: if season and not re.search('Season\s+%s$' % (season), match_title, re.I): continue else: match = re.search('-(\d{4})[-.]', match_url) if match: match_year = match.group(1) match_norm_title = scraper_utils.normalize_title(match_title) title_match = (norm_title in match_norm_title) or (match_norm_title in norm_title) if title_match and (not year or not match_year or year == match_year): result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] page_url = scraper_utils.urljoin(self.base_url, '/search.php') html = self._http_get(page_url, params={'dayq': title}, cache_limit=48) html = re.sub('<!--.*?-->', '', html) norm_title = scraper_utils.normalize_title(title) for _attrs, td in dom_parser2.parse_dom(html, 'td', {'class': 'topic_content'}): match_url = dom_parser2.parse_dom(td, 'a', req='href') match_title_year = dom_parser2.parse_dom(td, 'img', req='alt') if not match_url or not match_title_year: continue match_url = match_url[0].attrs['href'] match_title_year = match_title_year[0].attrs['alt'] if not match_url.startswith('/'): match_url = '/tvseries/' + match_url match_title, match_year = scraper_utils.extra_year(match_title_year) if (norm_title in scraper_utils.normalize_title(match_title)) and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def __tv_search(self, title, year): results = [] search_url = scraper_utils.urljoin(self.tv_base_url, '/showlist/') html = self._http_get(search_url, cache_limit=48) match_year = '' norm_title = scraper_utils.normalize_title(title) for attrs, match_title in dom_parser2.parse_dom( html, 'a', {'class': 'thread_link'}, req='href'): match_url = attrs['href'] if match_title.upper().endswith(', THE'): match_title = 'The ' + match_title[:-5] if norm_title in scraper_utils.normalize_title(match_title) and ( not year or not match_year or year == match_year): result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] if video_type == VIDEO_TYPES.MOVIE: search_url = urlparse.urljoin(self.base_url, '/?s=') search_url += urllib.quote_plus('%s' % (title)) html = self._http_get(search_url, cache_limit=1) links = dom_parser.parse_dom(html, 'a', {'class': 'clip-link'}, 'href') titles = dom_parser.parse_dom(html, 'a', {'class': 'clip-link'}, 'title') matches = zip(links, titles) else: html = self._http_get(self.base_url, cache_limit=8) matches = re.findall( '<li\s+class="cat-item[^>]+>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', html) norm_title = scraper_utils.normalize_title(title) for item in matches: url = item[0].attrs['href'] match_title_year = item[0].attrs['title'] match = re.search('(.*?)\s+\(?(\d{4})\)?', match_title_year) if match: match_title, match_year = match.groups() else: match_title = match_title_year match_year = '' if norm_title in scraper_utils.normalize_title(match_title) and ( not year or not match_year or year == match_year): log_utils.log('Rainierland - search - Match Found: ' + str(norm_title)) result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(url) } results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = urlparse.urljoin(self.base_url, '/search') html = self._http_get(search_url, cache_limit=48) norm_title = scraper_utils.normalize_title(title) for item in dom_parser.parse_dom(html, 'li'): match = re.search('''href=["']([^"']+)[^>]+>([^<]+)''', item) if match: url, match_title = match.groups() match = re.search('(.*?)\s*\(Season\s+\d+', match_title) if match: match_title = match.group(1) if norm_title in scraper_utils.normalize_title(match_title): result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': '' } results.append(result) return results
def __search(self, video_type, title, year, page): results = [] url = scraper_utils.urljoin(self.base_url, page['url']) params = page['params'] if 'params' in page else None html = self._http_get(url, params=params, cache_limit=24) norm_title = scraper_utils.normalize_title(title) match_year = '' for _attrs, item in dom_parser2.parse_dom(html, 'div', {'id': re.compile('movie-+\d+')}): is_tvshow = dom_parser2.parse_dom(item, 'div', {'class': 'movieTV'}) if (is_tvshow and video_type == VIDEO_TYPES.TVSHOW) or (not is_tvshow and video_type == VIDEO_TYPES.MOVIE): fragment = dom_parser2.parse_dom(item, 'h4', {'class': 'showRowName'}) if fragment: match = dom_parser2.parse_dom(fragment[0].content, 'a', req='href') if match: match_url, match_title = match[0].attrs['href'], match[0].content if re.search('/-?\d{7,}/', match_url): continue match_norm_title = scraper_utils.normalize_title(match_title) if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year): result = {'title': scraper_utils.cleanse_title(match_title), 'url': scraper_utils.pathify_url(match_url), 'year': match_year} results.append(result) return results
def __search(self, video_type, title, year): url = urlparse.urljoin( self.base_url, '/advanced-search/menu-id-111.html?view=buscador') html = self._http_get(url, cache_limit=48) results = [] norm_title = scraper_utils.normalize_title(title) fragment = dom_parser.parse_dom(html, 'div', {'class': 'tagindex'}) if fragment: for match in re.finditer('href="([^"]+)[^>]+>(.*?)</a>', fragment[0]): url, match_title = match.groups() match_title = re.sub('\s+\(\d+\)$', '', match_title) match_title = match_title.replace('&', '&') if norm_title in scraper_utils.normalize_title(match_title): result = { 'url': scraper_utils.pathify_url(url), 'title': match_title, 'year': '' } results.append(result) return results