def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = '/search/' + urllib.quote_plus(title) html = self._http_get(search_url, require_debrid=False, cache_limit=1) if video_type == VIDEO_TYPES.TVSHOW: seen_urls = {} for _attr, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}): if CATEGORIES[video_type] not in post: continue match = re.search('<span>\s*TAGS:\s*</span>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', post, re.I) if match: show_url, match_title = match.groups() if show_url in seen_urls: continue result = {'url': scraper_utils.pathify_url(show_url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''} seen_urls[show_url] = result results.append(result) elif video_type == VIDEO_TYPES.MOVIE: norm_title = scraper_utils.normalize_title(title) headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html) posts = [result.content for result in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})] for heading, post in zip(headings, posts): if CATEGORIES[video_type] not in post or self.__too_old(post): continue post_url, post_title = heading meta = scraper_utils.parse_movie_link(post_title) full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height']) match_year = meta['year'] match_norm_title = scraper_utils.normalize_title(meta['title']) if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year} results.append(result) return results
def _get_episode_url(self, show_url, video): url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=2) episode_pattern = 'href="([^"]+-s0*%se0*%s(?!\d)[^"]*)' % (video.season, video.episode) parts = dom_parser2.parse_dom(html, 'ul', {'class': 'episode_list'}) fragment = '\n'.join(part.content for part in parts) result = self._default_get_episode_url(fragment, video, episode_pattern) if result: return result ep_urls = [r.attrs['href'] for r in dom_parser2.parse_dom(fragment, 'a', req='href')] ep_dates = [r.content for r in dom_parser2.parse_dom(fragment, 'span', {'class': 'episode_air_d'})] ep_titles = [r.content for r in dom_parser2.parse_dom(fragment, 'span', {'class': 'episode_name'})] force_title = scraper_utils.force_title(video) if not force_title and kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate: for ep_url, ep_date in zip(ep_urls, ep_dates): logger.log('Quikr Ep Airdate Matching: %s - %s - %s' % (ep_url, ep_date, video.ep_airdate), log_utils.LOGDEBUG) if video.ep_airdate == scraper_utils.to_datetime(ep_date, '%Y-%m-%d').date(): return scraper_utils.pathify_url(ep_url) if force_title or kodi.get_setting('title-fallback') == 'true': norm_title = scraper_utils.normalize_title(video.ep_title) for ep_url, ep_title in zip(ep_urls, ep_titles): ep_title = re.sub('<span>.*?</span>\s*', '', ep_title) logger.log('Quikr Ep Title Matching: %s - %s - %s' % (ep_url.encode('utf-8'), ep_title.encode('utf-8'), video.ep_title), log_utils.LOGDEBUG) if norm_title == scraper_utils.normalize_title(ep_title): return scraper_utils.pathify_url(ep_url)
def _get_episode_url(self, show_url, video): url = urlparse.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=2) if html: force_title = scraper_utils.force_title(video) episodes = dom_parser.parse_dom(html, 'div', {'class': '\s*el-item\s*'}) if not force_title: episode_pattern = 'href="([^"]*-[sS]%02d[eE]%02d(?!\d)[^"]*)' % (int(video.season), int(video.episode)) match = re.search(episode_pattern, html) if match: return scraper_utils.pathify_url(match.group(1)) if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate: airdate_pattern = '%02d-%02d-%d' % (video.ep_airdate.day, video.ep_airdate.month, video.ep_airdate.year) for episode in episodes: ep_url = dom_parser.parse_dom(episode, 'a', ret='href') ep_airdate = dom_parser.parse_dom(episode, 'div', {'class': 'date'}) if ep_url and ep_airdate: ep_airdate = ep_airdate[0].strip() if airdate_pattern == ep_airdate: return scraper_utils.pathify_url(ep_url[0]) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title: norm_title = scraper_utils.normalize_title(video.ep_title) for episode in episodes: ep_url = dom_parser.parse_dom(episode, 'a', ret='href') ep_title = dom_parser.parse_dom(episode, 'div', {'class': 'e-name'}) if ep_url and ep_title and norm_title == scraper_utils.normalize_title(ep_title[0]): return scraper_utils.pathify_url(ep_url[0])
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] seen_urls = set() for page in ['/latest-added/', '/popular-today/', '/most-popular/']: url = scraper_utils.urljoin(self.base_url, page) html = self._http_get(url, cache_limit=24) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'home'}) if fragment: norm_title = scraper_utils.normalize_title(title) for attrs, match_title_year in dom_parser2.parse_dom( fragment[0].content, 'a', req='href'): match_url = attrs['href'] match_title, match_year = scraper_utils.extra_year( match_title_year) if norm_title in scraper_utils.normalize_title( match_title) and (not year or not match_year or year == match_year): match_url = scraper_utils.pathify_url(match_url) if match_url in seen_urls: continue seen_urls.add(match_url) result = { 'url': match_url, 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = urlparse.urljoin(self.base_url, '/search?q=') search_url += urllib.quote_plus(title) html = self._http_get(search_url, cache_limit=1) norm_title = scraper_utils.normalize_title(title) for item in dom_parser.parse_dom(html, 'div', {'class': 'video_item'}): match_url = dom_parser.parse_dom(item, 'a', ret='href') match_title = dom_parser.parse_dom(item, 'img', ret='alt') match_year = '' if match_url and match_title: match_url = match_url[0] match_title = match_title[0] if VIDEO_TYPES == VIDEO_TYPES.TVSHOW and '/tv-series/' not in match_url: continue if match_year: match_year = match_year[0] else: match_year = '' if norm_title in scraper_utils.normalize_title( match_title) and (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def __alt_search(self, video_type, title, year, season=''): results = [] params = title.lower() if year: params += ' %s' % (year) if video_type == VIDEO_TYPES.SEASON and season: params += ' Season %s' % (season) params = {'key': params} search_url = urlparse.urljoin(self.base_url, '/search') html = self._http_get(search_url, params=params, cache_limit=1) norm_title = scraper_utils.normalize_title(title) for item in dom_parser.parse_dom(html, 'div', {'class': 'caption'}): match = re.search('href="([^"]+)[^>]+>(.*?)<span[^>]*>', item) if match: match_url, match_title = match.groups() is_season = re.search('-season-\d+', match_url) if (video_type == VIDEO_TYPES.MOVIE and not is_season) or (video_type == VIDEO_TYPES.SEASON and is_season): if video_type == VIDEO_TYPES.SEASON: if season and not re.search('season-0*%s$' % (season), match_url): continue match_title = re.sub('</?[^>]*>', '', match_title) match_title = re.sub('\s+Full\s+Movie', '', match_title) match = re.search('-(\d{4})(?:$|-)', match_url) if match: match_year = match.group(1) else: match_year = '' if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year): result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] norm_title = scraper_utils.normalize_title(title) if video_type == VIDEO_TYPES.MOVIE: if year: base_url = urlparse.urljoin(self.base_url, '/Film/') html = self._http_get(base_url, cache_limit=48) for link in self.__parse_directory(html): if year == link['title']: url = urlparse.urljoin(base_url, link['link']) for movie in self.__get_files(url, cache_limit=24): match_title, match_year, _height, _extra = scraper_utils.parse_movie_link(movie['link']) if not movie['directory'] and norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(url), 'title': match_title, 'year': year} results.append(result) else: base_url = urlparse.urljoin(self.base_url, '/Serial/') html = self._http_get(base_url, cache_limit=48) for link in self.__parse_directory(html): if link['directory'] and norm_title in scraper_utils.normalize_title(link['title']): url = urlparse.urljoin(base_url, link['link']) result = {'url': scraper_utils.pathify_url(url), 'title': link['title'], 'year': ''} results.append(result) return results
def search(self, video_type, title, year, season=''): url = urlparse.urljoin(self.base_url, LIST_URL) js_data = self._http_get(url, cache_limit=0) norm_title = scraper_utils.normalize_title(title) results = [] if 'transfers' in js_data: for item in js_data['transfers']: is_season = re.search('(.*?[._ ]season[._ ]+(\d+))[._ ](.*)', item['name'], re.I) if not is_season and video_type == VIDEO_TYPES.MOVIE or is_season and VIDEO_TYPES.SEASON: if re.search('[._ ]S\d+E\d+[._ ]', item['name']): continue # skip episodes if video_type == VIDEO_TYPES.SEASON: match_title, match_season, extra = is_season.groups() if season and int(match_season) != int(season): continue match_year = '' match_title = re.sub('[._]', ' ', match_title) else: match = re.search('(.*?)\(?(\d{4})\)?(.*)', item['name']) if match: match_title, match_year, extra = match.groups() else: match_title, match_year, extra = item['name'], '', '' match_title = match_title.strip() extra = extra.strip() if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year): result_title = match_title if extra: result_title += ' [%s]' % (extra) result = {'title': result_title, 'year': match_year, 'url': 'hash=%s' % (item['hash'])} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = '/search/' + urllib.quote_plus(title) html = self._http_get(search_url, require_debrid=True, cache_limit=1) if video_type == VIDEO_TYPES.TVSHOW: seen_urls = {} for _attr, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}): if CATEGORIES[video_type] not in post: continue match = re.search('<span>\s*TAGS:\s*</span>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', post, re.I) if match: show_url, match_title = match.groups() if show_url in seen_urls: continue result = {'url': scraper_utils.pathify_url(show_url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''} seen_urls[show_url] = result results.append(result) elif video_type == VIDEO_TYPES.MOVIE: norm_title = scraper_utils.normalize_title(title) headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html) posts = [result.content for result in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})] for heading, post in zip(headings, posts): if CATEGORIES[video_type] not in post or self.__too_old(post): continue post_url, post_title = heading meta = scraper_utils.parse_movie_link(post_title) full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height']) match_year = meta['year'] match_norm_title = scraper_utils.normalize_title(meta['title']) if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year} results.append(result) return results
def _get_episode_url(self, show_url, video): force_title = scraper_utils.force_title(video) title_fallback = kodi.get_setting('title-fallback') == 'true' norm_title = scraper_utils.normalize_title(video.ep_title) page_url = [show_url] too_old = False while page_url and not too_old: html = self._http_get(page_url[0], require_debrid=True, cache_limit=1) for _attr, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}): if self.__too_old(post): too_old = True break if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post: match = dom_parser2.parse_dom(post, 'a', req='href') if match: url, title = match[0].attrs['href'], match[0].content if not force_title: if scraper_utils.release_check(video, title, require_title=False): return scraper_utils.pathify_url(url) else: if title_fallback and norm_title: match = re.search('</strong>(.*?)</p>', post) if match and norm_title == scraper_utils.normalize_title(match.group(1)): return scraper_utils.pathify_url(url) page_url = dom_parser2.parse_dom(html, 'a', {'class': 'nextpostslink'}, req='href') if page_url: page_url = [page_url[0].attrs['href']]
def _get_episode_url(self, show_url, video): episode_pattern = 'href="([^"]+-s0*%se0*%s(?!\d)[^"]*)' % (video.season, video.episode) result = self._default_get_episode_url(show_url, video, episode_pattern) if result: return result url = urlparse.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=2) fragment = dom_parser.parse_dom(html, "ul", {"class": "episode_list"}) if fragment: ep_urls = dom_parser.parse_dom(fragment[0], "a", ret="href") ep_dates = dom_parser.parse_dom(fragment[0], "span", {"class": "episode_air_d"}) ep_titles = dom_parser.parse_dom(fragment[0], "span", {"class": "episode_name"}) force_title = scraper_utils.force_title(video) if not force_title and kodi.get_setting("airdate-fallback") == "true" and video.ep_airdate: for ep_url, ep_date in zip(ep_urls, ep_dates): log_utils.log( "Quikr Ep Airdate Matching: %s - %s - %s" % (ep_url, ep_date, video.ep_airdate), log_utils.LOGDEBUG, ) if video.ep_airdate == scraper_utils.to_datetime(ep_date, "%Y-%m-%d").date(): return scraper_utils.pathify_url(ep_url) if force_title or kodi.get_setting("title-fallback") == "true": norm_title = scraper_utils.normalize_title(video.ep_title) for ep_url, ep_title in zip(ep_urls, ep_titles): ep_title = re.sub("<span>.*?</span>\s*", "", ep_title) log_utils.log( "Quikr Ep Title Matching: %s - %s - %s" % (ep_url, norm_title, video.ep_title), log_utils.LOGDEBUG, ) if norm_title == scraper_utils.normalize_title(ep_title): return scraper_utils.pathify_url(ep_url)
def search(self, video_type, title, year, season=''): results = [] if title: search_url = '/tv-listings/%s/' % (title[:1].lower()) search_url = urlparse.urljoin(self.base_url, search_url) html = self._http_get(search_url, cache_limit=8) fragment = dom_parser.parse_dom(html, 'div', {'class': 'home'}) if fragment: norm_title = scraper_utils.normalize_title(title) for match in re.finditer('''href=["']([^'"]+)[^>]+>([^<]+)''', fragment[0]): url, match_title_year = match.groups() match = re.search('(.*?)\s+\((\d{4})\)', match_title_year) if match: match_title, match_year = match.groups() else: match_title = match_title_year match_year = '' if norm_title in scraper_utils.normalize_title( match_title) and (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] html = self._http_get(self.base_url, params={'s': title}, cache_limit=1) if re.search('Sorry, but nothing matched', html, re.I): return results norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(html, 'li', {'class': 'box-shadow'}): for attrs, _content in dom_parser2.parse_dom(item, 'a', req=['href', 'title']): match_url, match_title_year = attrs['href'], attrs['title'] if re.search('S\d{2}E\d{2}', match_title_year): continue # skip episodes if re.search('TV\s*SERIES', match_title_year, re.I): continue # skip shows match_title, match_year = scraper_utils.extra_year( match_title_year) if ( not year or not match_year or year == match_year ) and norm_title in scraper_utils.normalize_title(match_title): result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year): results = [] norm_title = scraper_utils.normalize_title(title) if video_type == VIDEO_TYPES.MOVIE: if year: base_url = urlparse.urljoin(self.base_url, '/Film/') html = self._http_get(base_url, cache_limit=48) for link in self.__parse_directory(html): if year == link['title']: url = urlparse.urljoin(base_url, link['link']) for movie in self.__get_files(url, cache_limit=24): match_title, match_year, _height, _extra = scraper_utils.parse_movie_link(movie['link']) if not movie['directory'] and norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(url), 'title': match_title, 'year': year} results.append(result) else: base_url = urlparse.urljoin(self.base_url, '/Serial/') html = self._http_get(base_url, cache_limit=48) for link in self.__parse_directory(html): if link['directory'] and norm_title in scraper_utils.normalize_title(link['title']): url = urlparse.urljoin(base_url, link['link']) result = {'url': scraper_utils.pathify_url(url), 'title': link['title'], 'year': ''} results.append(result) return results
def get_sources(self, video): source_url = self.get_url(video) hosters = [] norm_title = scraper_utils.normalize_title(video.title) if source_url and source_url != FORCE_NO_MATCH: source_url = urlparse.urljoin(self.base_url, source_url) for line in self.__get_files(source_url, cache_limit=24): if not line['directory']: match = {} if video.video_type == VIDEO_TYPES.MOVIE: match_title, _match_year, height, extra = scraper_utils.parse_movie_link(line['link']) if norm_title in scraper_utils.normalize_title(match_title): match = line else: _show_title, season, episode, height, extra = scraper_utils.parse_episode_link(line['link']) if int(video.season) == int(season) and int(video.episode) == int(episode): match = line if 'dubbed' in extra.lower(): continue if match: stream_url = match['url'] + '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': scraper_utils.height_get_quality(height), 'views': None, 'rating': None, 'url': stream_url, 'direct': True} if 'x265' in extra: hoster['format'] = 'x265' if 'size' in match: hoster['size'] = scraper_utils.format_size(int(match['size'])) hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): if video_type == VIDEO_TYPES.MOVIE: url = urlparse.urljoin(self.base_url, '/movies/a-z/') else: url = urlparse.urljoin(self.base_url, '/tv/a-z/') if title.upper().startswith('THE '): first_letter = title[4:5] elif title.upper().startswith('A '): first_letter = title[2:3] elif title[:1] in string.digits: first_letter = '1' else: first_letter = title[:1] url = url + first_letter.upper() html = self._http_get(url, cache_limit=.25) h = HTMLParser.HTMLParser() html = unicode(html, 'windows-1252') html = h.unescape(html) norm_title = scraper_utils.normalize_title(title) pattern = 'class=star.*?href=([^>]+)>(.*?)(?:\s*\((\d+)\))?</a>' results = [] for match in re.finditer(pattern, html, re.DOTALL): url, match_title, match_year = match.groups('') if norm_title in scraper_utils.normalize_title(match_title) and ( not year or not match_year or year == match_year): result = {'url': url, 'title': match_title, 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] if title and title[0].isalpha(): page_url = ['/list/?char=%s' % (title[0])] while page_url: page_url = urlparse.urljoin(self.base_url, page_url[0]) html = self._http_get(page_url, cache_limit=48) fragment = dom_parser.parse_dom(html, 'ul', {'class': 'list-film-char'}) if fragment: norm_title = scraper_utils.normalize_title(title) for match in re.finditer('href="([^"]+)[^>]+>(.*?)</a>', fragment[0]): match_url, match_title = match.groups() match_title = re.sub('</?strong>', '', match_title) match = re.search('Season\s+(\d+)', match_title, re.I) if match: if season and int(season) != int(match.group(1)): continue if norm_title in scraper_utils.normalize_title(match_title): result = {'title': scraper_utils.cleanse_title(match_title), 'year': '', 'url': scraper_utils.pathify_url(match_url)} results.append(result) if results: break page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href') return results
def _get_episode_url(self, show_url, video): sxe = '(\.|_| )S%02dE%02d(\.|_| )' % (int(video.season), int(video.episode)) force_title = scraper_utils.force_title(video) title_fallback = kodi.get_setting('title-fallback') == 'true' norm_title = scraper_utils.normalize_title(video.ep_title) try: airdate_pattern = video.ep_airdate.strftime('(\.|_| )%Y(\.|_| )%m(\.|_| )%d(\.|_| )') except: airdate_pattern = '' page_url = [show_url] too_old = False while page_url and not too_old: url = urlparse.urljoin(self.base_url, page_url[0]) html = self._http_get(url, require_debrid=True, cache_limit=1) posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'}) for post in posts: if self.__too_old(post): too_old = True break if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post: match = re.search('<a\s+href="([^"]+)[^>]+>(.*?)</a>', post) if match: url, title = match.groups() if not force_title: if re.search(sxe, title) or (airdate_pattern and re.search(airdate_pattern, title)): return scraper_utils.pathify_url(url) else: if title_fallback and norm_title: match = re.search('</strong>(.*?)</p>', post) if match and norm_title == scraper_utils.normalize_title(match.group(1)): return scraper_utils.pathify_url(url) page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')
def get_sources(self, video): source_url = self.get_url(video) hosters = [] norm_title = scraper_utils.normalize_title(video.title) if source_url and source_url != FORCE_NO_MATCH: source_url = urlparse.urljoin(self.base_url, source_url) for line in self._get_files(source_url, cache_limit=24): if not line['directory']: match = {} if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(line['link']) if norm_title in scraper_utils.normalize_title(meta['title']): match = line elif self.__episode_match(line, video): match = line meta = scraper_utils.parse_episode_link(line['link']) if match: if meta['dubbed']: continue stream_url = match['url'] + '|User-Agent=%s' % (scraper_utils.get_ua()) stream_url = stream_url.replace(self.base_url, '') quality = scraper_utils.height_get_quality(meta['height']) hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} if 'format' in meta: hoster['format'] = meta['format'] if 'size' in match: hoster['size'] = scraper_utils.format_size(int(match['size'])) hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): results = [] if video_type == VIDEO_TYPES.TVSHOW: url = urlparse.urljoin(self.base_url, '/series/all/') html = self._http_get(url, cache_limit=8) links = dom_parser.parse_dom(html, 'a', {'class': 'underilne'}, 'href') titles = dom_parser.parse_dom(html, 'a', {'class': 'underilne'}) items = zip(links, titles) else: url = urlparse.urljoin(self.base_url, '/search?=%s' % urllib.quote_plus(title)) data = {'q': title, 'go': 'Search'} html = self._http_get(url, data=data, cache_limit=8) match = re.search('you can search again in (\d+) seconds', html, re.I) if match: wait = int(match.group(1)) if wait > self.timeout: wait = self.timeout time.sleep(wait) html = self._http_get(url, data=data, cache_limit=0) pattern = 'class="movie_box.*?href="([^"]+).*?<h1>([^<]+)' items = re.findall(pattern, html, re.DOTALL) norm_title = scraper_utils.normalize_title(title) for item in items: url, match_title = item if norm_title in scraper_utils.normalize_title(match_title): result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] if title: first_letter = title[:1].lower() if first_letter.isdigit(): first_letter = '0-9' search_url = '/alphabet/%s/' % (first_letter) search_url = urlparse.urljoin(self.base_url, search_url) html = self._http_get(search_url, cache_limit=24) fragment = dom_parser.parse_dom(html, 'div', {'class': 'home'}) if fragment: norm_title = scraper_utils.normalize_title(title) for match in re.finditer('''href=["']([^'"]+)[^>]+>([^<]+)''', fragment[0]): url, match_title_year = match.groups() match = re.search('(.*?)\s+\((\d{4})\)', match_title_year) if match: match_title, match_year = match.groups() else: match_title = match_title_year match_year = '' if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year): if video_type == VIDEO_TYPES.MOVIE: url = urlparse.urljoin(self.base_url, '/movies/a-z/') else: url = urlparse.urljoin(self.base_url, '/tv/a-z/') if title.upper().startswith('THE '): first_letter = title[4:5] elif title.upper().startswith('A '): first_letter = title[2:3] elif title[:1] in string.digits: first_letter = '1' else: first_letter = title[:1] url = url + first_letter.upper() html = self._http_get(url, cache_limit=.25) h = HTMLParser.HTMLParser() html = unicode(html, 'windows-1252') html = h.unescape(html) norm_title = scraper_utils.normalize_title(title) pattern = 'class=star.*?href=([^>]+)>(.*?)(?:\s*\((\d+)\))?</a>' results = [] for match in re.finditer(pattern, html, re.DOTALL): url, match_title, match_year = match.groups('') if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year): result = {'url': url, 'title': match_title, 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = scraper_utils.urljoin(self.base_url, '/wp-content/plugins/ajax-search-pro/ajax_search.php') data = {'action': 'ajaxsearchpro_search', 'aspp': title, 'asid': '1', 'asp_inst_id': '1_1', 'options': 'current_page_id=7&qtranslate_lang=0&set_intitle=None&customset%5B%5D=post'} html = self._http_get(search_url, data=data, headers=XHR, cache_limit=8) norm_title = scraper_utils.normalize_title(title) for attrs, match_title_year in dom_parser2.parse_dom(html, 'a', {'class': 'asp_res_url'}): match_url = attrs['href'] match_title_year = re.sub('</?[^>]*>', '', match_title_year) is_season = re.search('Season\s+(\d+)\s*', match_title_year, re.I) if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON): match_year = '' if video_type == VIDEO_TYPES.SEASON: if season and int(is_season.group(1)) != int(season): continue match_title = match_title_year match_title = re.sub('\s*\d{4}', '', match_title) else: match_title, match_year = scraper_utils.extra_year(match_title_year) match_norm_title = scraper_utils.normalize_title(match_title) title_match = (norm_title in match_norm_title) or (match_norm_title in norm_title) if title_match and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): url = urlparse.urljoin(self.base_url, '/torrent/list') js_data = self._http_get(url, cache_limit=0) norm_title = scraper_utils.normalize_title(title) results = [] if 'torrents' in js_data: for item in js_data['torrents']: if re.search('[._ ]S\d+E\d+[._ ]', item['name']): continue # skip episodes for movies match = re.search('(.*?)\(?(\d{4})\)?(.*)', item['name']) if match: match_title, match_year, extra = match.groups() else: match_title, match_year, extra = item['name'], '', '' match_title = match_title.strip() extra = extra.strip() if norm_title in scraper_utils.normalize_title( match_title) and (not year or not match_year or year == match_year): result_title = match_title if extra: result_title += ' [%s]' % (extra) result = { 'title': result_title, 'year': match_year, 'url': 'hash=%s' % (item['hash']) } results.append(result) return results
def __alt_search(self, video_type, title, year, season=''): search_url = base64.decodestring(SEARCH_URL) % ( urllib.quote_plus(title)) html = self._http_get(search_url, cache_limit=1) results = [] js_data = scraper_utils.parse_json(html) if 'results' in js_data: norm_title = scraper_utils.normalize_title(title) for item in js_data['results']: match_title_year = item['titleNoFormatting'] match_title_year = re.sub('^Watch\s+', '', match_title_year) match_url = item['url'] match = re.search('(.*?)(?:\s+\(?(\d{4})\)?)', match_title_year) if match: match_title, match_year = match.groups() else: match_title = match_title_year match_year = '' if norm_title in scraper_utils.normalize_title( match_title) and (not year or not match_year or year == match_year): result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] page_url = scraper_utils.urljoin(self.base_url, '/tvseries/search.php') html = self._http_get(page_url, params={'dayq': title}, cache_limit=48) html = re.sub('<!--.*?-->', '', html) norm_title = scraper_utils.normalize_title(title) for _attrs, td in dom_parser2.parse_dom(html, 'td', {'class': 'topic_content'}): match_url = dom_parser2.parse_dom(td, 'a', req='href') match_title_year = dom_parser2.parse_dom(td, 'img', req='alt') if not match_url or not match_title_year: continue match_url = match_url[0].attrs['href'] match_title_year = match_title_year[0].attrs['alt'] if not match_url.startswith('/'): match_url = '/tvseries/' + match_url match_title, match_year = scraper_utils.extra_year( match_title_year) if (norm_title in scraper_utils.normalize_title(match_title)) and ( not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_title = re.sub('[^A-Za-z0-9. ]', '', title) url = '/search/%s/' % (urllib.quote(search_title)) url = scraper_utils.urljoin(self.base_url, url) html = self._http_get(url, cache_limit=48) norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(html, 'article', {'class': 'movie-details'}): match_url = dom_parser2.parse_dom(item, 'a', req='href') match_title = dom_parser2.parse_dom(item, 'h2', {'class': 'movie-title'}) match_year = dom_parser2.parse_dom(item, 'div', {'class': 'movie-year'}) if match_url and match_title: match_url = match_url[0].attrs['href'] match_title = match_title[0].content match_year = match_year[0].content if match_year else '' if norm_title in scraper_utils.normalize_title( match_title) and (not match_year or not year or year == match_year): result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] html = self._http_get(self.base_url, cache_limit=48) norm_title = scraper_utils.normalize_title(title) fragment = dom_parser2.parse_dom(html, 'select', {'id': 'cat'}) if fragment: for attrs, label in dom_parser2.parse_dom(fragment[0].content, 'option', {'class': 'level-0'}, req='value'): label = scraper_utils.cleanse_title(label) label = re.sub('\s+\(\d+\)$', '', label) if norm_title in scraper_utils.normalize_title(label): cat_url = scraper_utils.urljoin( self.base_url, '/?cat=%s' % (attrs['value'])) html = self._http_get(cat_url, allow_redirect=False, cache_limit=8) if html.startswith('http'): cat_url = html result = { 'url': scraper_utils.pathify_url(cat_url), 'title': label, 'year': '' } results.append(result) return results
def _get_episode_url(self, show_url, video): force_title = scraper_utils.force_title(video) title_fallback = kodi.get_setting('title-fallback') == 'true' norm_title = scraper_utils.normalize_title(video.ep_title) page_url = [show_url] too_old = False while page_url and not too_old: url = urlparse.urljoin(self.base_url, page_url[0]) html = self._http_get(url, require_debrid=True, cache_limit=1) posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'}) for post in posts: if self.__too_old(post): too_old = True break if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post: match = re.search('<a\s+href="([^"]+)[^>]+>(.*?)</a>', post) if match: url, title = match.groups() if not force_title: if scraper_utils.release_check(video, title, require_title=False): return scraper_utils.pathify_url(url) else: if title_fallback and norm_title: match = re.search('</strong>(.*?)</p>', post) if match and norm_title == scraper_utils.normalize_title(match.group(1)): return scraper_utils.pathify_url(url) page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')
def _get_episode_url(self, show_url, video): query = scraper_utils.parse_query(show_url) if 'id' in query: url = scraper_utils.urljoin(self.base_url, '/api/v2/shows/%s' % (query['id'])) js_data = self._http_get(url, cache_limit=.5) if 'episodes' in js_data: force_title = scraper_utils.force_title(video) if not force_title: for episode in js_data['episodes']: if int(video.season) == int(episode['season']) and int(video.episode) == int(episode['number']): return scraper_utils.pathify_url('?id=%s' % (episode['id'])) if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate: for episode in js_data['episodes']: if 'airdate' in episode: ep_airdate = scraper_utils.to_datetime(episode['airdate'], "%Y-%m-%d").date() if video.ep_airdate == (ep_airdate - datetime.timedelta(days=1)): return scraper_utils.pathify_url('?id=%s' % (episode['id'])) else: logger.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title: norm_title = scraper_utils.normalize_title(video.ep_title) for episode in js_data['episodes']: if 'name' in episode and norm_title in scraper_utils.normalize_title(episode['name']): return scraper_utils.pathify_url('?id=%s' % (episode['id']))
def search(self, video_type, title, year, season=''): results = [] html = self._http_get(self.base_url, cache_limit=48) norm_title = scraper_utils.normalize_title(title) for series in dom_parser.parse_dom(html, 'div', {'class': 'series-item'}): match_url = dom_parser.parse_dom(series, 'a', ret='href') match_title = dom_parser.parse_dom(series, 'h3') match_year = dom_parser.parse_dom(series, 'p') if match_url and match_title: match_url = match_url[0] match_title = match_title[0] if match_year: match = re.search('\s*(\d{4})\s+', match_year[0]) if match: match_year = match.group(1) else: match_year = '' else: match_year = '' if norm_title in scraper_utils.normalize_title(match_title): result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] if video_type == VIDEO_TYPES.MOVIE: search_url = urlparse.urljoin(self.base_url, '/?s=') search_url += urllib.quote_plus('%s' % (title)) html = self._http_get(search_url, cache_limit=1) links = dom_parser.parse_dom(html, 'a', {'class': 'clip-link'}, 'href') titles = dom_parser.parse_dom(html, 'a', {'class': 'clip-link'}, 'title') matches = zip(links, titles) else: html = self._http_get(self.base_url, cache_limit=8) matches = re.findall('<li\s+class="cat-item[^>]+>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', html) norm_title = scraper_utils.normalize_title(title) for item in matches: url, match_title_year = item match = re.search('(.*?)\s+\(?(\d{4})\)?', match_title_year) if match: match_title, match_year = match.groups() else: match_title = match_title_year match_year = '' if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year): result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(url)} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] url = scraper_utils.urljoin(self.base_url, '/arsiv') html = self._http_get(url, cache_limit=48) norm_title = scraper_utils.normalize_title(title) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'ts-list-content'}) if not fragment: return results items = dom_parser2.parse_dom(fragment[0].content, 'h1', {'class': 'ts-list-name'}) details = dom_parser2.parse_dom(fragment[0].content, 'ul') for item, detail in zip(items, details): match = dom_parser2.parse_dom(item.content, 'a', req='href') match_year = re.search('<span>(\d{4})</span>', detail.content) if not match: continue match_url = match[0].attrs['href'] match_title = match[0].content match_year = match_year.group(1) if match_year else '' if norm_title in scraper_utils.normalize_title(match_title): result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def get_sources(self, video): source_url = self.get_url(video) hosters = [] norm_title = scraper_utils.normalize_title(video.title) if source_url and source_url != FORCE_NO_MATCH: source_url = urlparse.urljoin(self.base_url2, source_url) for line in self._get_files(source_url, cache_limit=24): if not line['directory']: match = {} if video.video_type == VIDEO_TYPES.MOVIE: match_title, _match_year, height, extra = scraper_utils.parse_movie_link(line['link']) if norm_title in scraper_utils.normalize_title(match_title): match = line else: _show_title, season, episode, height, extra = scraper_utils.parse_episode_link(line['link']) if int(video.season) == int(season) and int(video.episode) == int(episode): match = line if 'dubbed' in extra.lower(): continue if match: stream_url = match['url'] + '|User-Agent=%s' % (scraper_utils.get_ua()) hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': scraper_utils.height_get_quality(height), 'views': None, 'rating': None, 'url': stream_url, 'direct': True} if 'x265' in extra: hoster['format'] = 'x265' if 'size' in match: hoster['size'] = scraper_utils.format_size(int(match['size'])) hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] if video_type == VIDEO_TYPES.TVSHOW and title: test_url = '/tv-show/%s/' % (scraper_utils.to_slug(title)) test_url = scraper_utils.urljoin(self.base_url, test_url) html = self._http_get(test_url, require_debrid=True, cache_limit=24) posts = dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}) if posts: result = {'url': scraper_utils.pathify_url(test_url), 'title': scraper_utils.cleanse_title(title), 'year': ''} results.append(result) elif video_type == VIDEO_TYPES.MOVIE: search_title = re.sub('[^A-Za-z0-9 ]', '', title.lower()) html = self._http_get(self.base_url, params={'s': search_title}, require_debrid=True, cache_limit=1) norm_title = scraper_utils.normalize_title(title) for _attrs, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}): match = re.search('<h\d+[^>]*>\s*<a\s+href="([^"]+)[^>]*>(.*?)</a>', post) if match: post_url, post_title = match.groups() if '/tv-show/' in post or self.__too_old(post): continue post_title = re.sub('<[^>]*>', '', post_title) meta = scraper_utils.parse_movie_link(post_title) full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height']) match_year = meta['year'] match_norm_title = scraper_utils.normalize_title(meta['title']) if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year} results.append(result) return results
def _default_get_episode_url(self, html, video, episode_pattern, title_pattern='', airdate_pattern=''): logger.log('Default Episode Url: |%s|%s|' % (self.get_name(), video), log_utils.LOGDEBUG) if not html: return try: html = html[0].content except AttributeError: pass force_title = scraper_utils.force_title(video) if not force_title: if episode_pattern: match = re.search(episode_pattern, html, re.DOTALL | re.I) if match: return scraper_utils.pathify_url(match.group(1)) if kodi.get_setting('airdate-fallback') == 'true' and airdate_pattern and video.ep_airdate: airdate_pattern = airdate_pattern.replace('{year}', str(video.ep_airdate.year)) airdate_pattern = airdate_pattern.replace('{month}', str(video.ep_airdate.month)) airdate_pattern = airdate_pattern.replace('{p_month}', '%02d' % (video.ep_airdate.month)) airdate_pattern = airdate_pattern.replace('{month_name}', MONTHS[video.ep_airdate.month - 1]) airdate_pattern = airdate_pattern.replace('{short_month}', SHORT_MONS[video.ep_airdate.month - 1]) airdate_pattern = airdate_pattern.replace('{day}', str(video.ep_airdate.day)) airdate_pattern = airdate_pattern.replace('{p_day}', '%02d' % (video.ep_airdate.day)) logger.log('Air Date Pattern: %s' % (airdate_pattern), log_utils.LOGDEBUG) match = re.search(airdate_pattern, html, re.DOTALL | re.I) if match: return scraper_utils.pathify_url(match.group(1)) else: logger.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title and title_pattern: norm_title = scraper_utils.normalize_title(video.ep_title) for match in re.finditer(title_pattern, html, re.DOTALL | re.I): episode = match.groupdict() if norm_title == scraper_utils.normalize_title(episode['title']): return scraper_utils.pathify_url(episode['url'])
def search(self, video_type, title, year, season=''): results = [] if video_type == VIDEO_TYPES.MOVIE: search_url = urlparse.urljoin(self.base_url, '/?s=') search_url += urllib.quote_plus('%s' % (title)) html = self._http_get(search_url, cache_limit=1) links = dom_parser.parse_dom(html, 'a', {'class': 'clip-link'}, 'href') titles = dom_parser.parse_dom(html, 'a', {'class': 'clip-link'}, 'title') matches = zip(links, titles) else: html = self._http_get(self.base_url, cache_limit=8) matches = re.findall('<li\s+class="cat-item[^>]+>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', html) norm_title = scraper_utils.normalize_title(title) for item in matches: url, match_title_year = item match = re.search('(.*?)\s+\(?(\d{4})\)?', match_title_year) if match: match_title, match_year = match.groups() else: match_title = match_title_year match_year = '' if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year): result = {'title': match_title, 'year': match_year, 'url': scraper_utils.pathify_url(url)} results.append(result) return results
def _get_episode_url(self, show_url, video): sxe = '(\.|_| )S%02dE%02d(\.|_| )' % (int(video.season), int(video.episode)) force_title = scraper_utils.force_title(video) title_fallback = kodi.get_setting('title-fallback') == 'true' norm_title = scraper_utils.normalize_title(video.ep_title) try: airdate_pattern = video.ep_airdate.strftime('(\.|_| )%Y(\.|_| )%m(\.|_| )%d(\.|_| )') except: airdate_pattern = '' page_url = [show_url] too_old = False while page_url and not too_old: url = urlparse.urljoin(self.base_url, page_url[0]) html = self._http_get(url, cache_limit=1) headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html) posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'}) for heading, post in zip(headings, posts): if self.__too_old(post): too_old = True break if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post: url, title = heading if not force_title: if re.search(sxe, title) or (airdate_pattern and re.search(airdate_pattern, title)): return scraper_utils.pathify_url(url) else: if title_fallback and norm_title: match = re.search('</strong>(.*?)</p>', post) if match and norm_title == scraper_utils.normalize_title(match.group(1)): return scraper_utils.pathify_url(url) page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')
def _get_episode_url(self, show_url, video): url = urlparse.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=8) pattern = "<a[^>]*class='dropdown-toggle'[^>]*>Season\s+%s<(.*?)<li\s+class='divider'>" % (video.season) match = re.search(pattern, html, re.DOTALL) if match: fragment = match.group(1) ep_ids = dom_parser.parse_dom(fragment, 'a', {'id': 'epiloader'}, ret='class') episodes = dom_parser.parse_dom(fragment, 'a', {'id': 'epiloader'}) airdates = dom_parser.parse_dom(fragment, 'span', {'class': 'airdate'}) ep_airdate = video.ep_airdate.strftime('%Y-%m-%d') if isinstance(video.ep_airdate, datetime.date) else '' norm_title = scraper_utils.normalize_title(video.ep_title) num_id, airdate_id, title_id = '', '', '' for episode, airdate, ep_id in zip(episodes, airdates, ep_ids): if ep_airdate and ep_airdate == airdate: airdate_id = ep_id match = re.search('(?:<span[^>]*>)?(\d+)\.\s*([^<]+)', episode) if match: ep_num, ep_title = match.groups() if int(ep_num) == int(video.episode): num_id = ep_id if norm_title and norm_title in scraper_utils.normalize_title(ep_title): title_id = ep_id best_id = '' if not scraper_utils.force_title(video): if num_id: best_id = num_id if kodi.get_setting('airdate-fallback') == 'true' and airdate_id: best_id = airdate_id if kodi.get_setting('title-fallback') == 'true' and title_id: best_id = title_id else: if title_id: best_id = title_id if best_id: return EP_URL % (best_id)
def search(self, video_type, title, year, season=''): results = [] page_url = urlparse.urljoin(self.base_url, '/tvseries/index.php?&page=1') while page_url: html = self._http_get(page_url, cache_limit=48) html = re.sub('<!--.*?-->', '', html) norm_title = scraper_utils.normalize_title(title) for td in dom_parser.parse_dom(html, 'td', {'class': 'topic_content'}): match_url = re.search('href="([^"]+)', td) match_title_year = dom_parser.parse_dom(td, 'img', ret='alt') if match_url and match_title_year: match_url = match_url.group(1) if not match_url.startswith('/'): match_url = '/tvseries/' + match_url match_title, match_year = scraper_utils.extra_year(match_title_year[0]) if norm_title in scraper_utils.normalize_title(match_title): result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) match = re.search('href="([^"]+)[^>]*>>', html) if match: page_url = urlparse.urljoin(self.base_url, match.group(1)) else: page_url = '' return results
def __tv_search(self, title, year): results = [] if title: norm_title = scraper_utils.normalize_title(title) url = '/series/letra/%s/' % (title[0]) url = urlparse.urljoin(self.base_url, url) html = self._http_get(url, cache_limit=48) for item in dom_parser.parse_dom(html, 'li', {'class': '[^"]*bpM12[^"]*'}): title_frag = dom_parser.parse_dom(item, 'h2') year_frag = dom_parser.parse_dom(item, 'div', {'class': '[^"]*sectionDetail[^"]*'}) match_url = dom_parser.parse_dom(item, 'a', ret='href') if title_frag and match_url: match_url = match_url[0] match = re.search('(.*?)<br>', title_frag[0]) if match: match_title = match.group(1) else: match_title = title_frag[0] match_year = '' if year_frag: match = re.search('(\d{4})', year_frag[0]) if match: match_year = match.group(1) if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): url = urlparse.urljoin(self.base_url, 'http://ororo.tv/en') if video_type == VIDEO_TYPES.MOVIE: url += '/movies' html = self._http_get(url, cache_limit=.25) results = [] norm_title = scraper_utils.normalize_title(title) include_paid = kodi.get_setting('%s-include_premium' % (self.get_name())) == 'true' for match in re.finditer( '''<span class='value'>(\d{4})(.*?)href="([^"]+)[^>]+>([^<]+)''', html, re.DOTALL): match_year, middle, url, match_title = match.groups() if not include_paid and video_type == VIDEO_TYPES.MOVIE and 'paid accounts' in middle: continue if norm_title in scraper_utils.normalize_title(match_title) and ( not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def __search(self, video_type, title, year, season=''): results = [] search_url = base64.decodestring(SEARCH_URL) % (urllib.quote_plus(title)) html = self._http_get(search_url, cache_limit=1) js_data = scraper_utils.parse_json(html) norm_title = scraper_utils.normalize_title(title) for item in js_data.get('results', []): if '/watch/' not in item['url'].lower(): continue is_season = re.search('Season\s+(\d+)', item['titleNoFormatting'], re.IGNORECASE) if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON): match_title_year = item['titleNoFormatting'] match_title_year = re.sub('^Watch\s+', '', match_title_year) match_url = item['url'] match_year = '' if video_type == VIDEO_TYPES.MOVIE: match = re.search('(.*?)(?:\s+\(?(\d{4})\)?)', match_title_year) if match: match_title, match_year = match.groups() else: match_title = match_title_year else: if season and int(is_season.group(1)) != int(season): continue match = re.search('(.*?)\s+\(\d{4}\)', match_title_year) if match: match_title = match.group(1) else: match_title = match_title_year if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year): result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] url = urlparse.urljoin(self.base_url, '/search.html') data = {'search': title} headers = {'Referer': self.base_url} html = self._http_get(url, data=data, headers=headers, cache_limit=2) if video_type == VIDEO_TYPES.MOVIE: query_type = 'watch-movie-' else: query_type = 'watch-tvshow-' norm_title = scraper_utils.normalize_title(title) for item in dom_parser.parse_dom(html, 'a', {'href': '#'}): match = re.search('href="(%s[^"]+)' % (query_type), item) if match: link = match.group(1) match_title = self.__make_title(link, query_type) match_year = '' if norm_title in scraper_utils.normalize_title( match_title) and (not year or not match_year or int(year) == int(match_year)): result = { 'url': scraper_utils.pathify_url(link), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def __movie_search(self, title, year): results = [] search_url = urlparse.urljoin(self.base_url, '/search?q=') search_url += urllib.quote_plus(title) html = self._http_get(search_url, cache_limit=1) norm_title = scraper_utils.normalize_title(title) for item in dom_parser.parse_dom(html, 'div', {'class': 'video_item'}): match_url = dom_parser.parse_dom(item, 'a', ret='href') match_title = dom_parser.parse_dom(item, 'img', ret='alt') match_year = '' if match_url and match_title: match_url = match_url[0] match_title = match_title[0] if match_year: match_year = match_year[0] else: match_year = '' if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = scraper_utils.urljoin(self.base_url, '/search/%s.html' % (urllib.quote_plus(title))) html = self._http_get(search_url, cache_limit=1) fragment = dom_parser2.parse_dom(html, 'ul', {'class': 'cfv'}) if not fragment: return results norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'li'): is_season = dom_parser2.parse_dom(item, 'div', {'class': 'status'}) if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON): match = dom_parser2.parse_dom(item, 'a', req=['href', 'title']) if not match: continue match_title = match[0].attrs['title'] match_url = match[0].attrs['href'] match_year = '' if video_type == VIDEO_TYPES.SEASON: if season and not re.search('Season\s+%s$' % (season), match_title, re.I): continue else: match = re.search('-(\d{4})[-.]', match_url) if match: match_year = match.group(1) match_norm_title = scraper_utils.normalize_title(match_title) title_match = (norm_title in match_norm_title) or (match_norm_title in norm_title) if title_match and (not year or not match_year or year == match_year): result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)} results.append(result) return results
def _get_episode_url(self, video): url = urlparse.urljoin(self.base_url, '/torrent/list') js_data = self._http_get(url, cache_limit=0) norm_title = scraper_utils.normalize_title(video.title) if 'torrents' in js_data: airdate_fallback = kodi.get_setting( 'airdate-fallback') == 'true' and video.ep_airdate show_title = '' if not scraper_utils.force_title(video): for item in js_data['torrents']: sxe_pattern = '(.*?)[. ][Ss]%02d[Ee]%02d[. ]' % (int( video.season), int(video.episode)) match = re.search(sxe_pattern, item['name']) if match: show_title = match.group(1) elif airdate_fallback: airdate_pattern = '(.*?)[. ]%s[. ]%02d[. ]%02d[. ]' % ( video.ep_airdate.year, video.ep_airdate.month, video.ep_airdate.day) match = re.search(airdate_pattern, item['name']) if match: show_title = match.group(1) if show_title and norm_title in scraper_utils.normalize_title( show_title): return 'hash=%s' % (item['hash'])
def search(self, video_type, title, year, season=''): results = [] xml_url = urlparse.urljoin(self.base_url, '/series.xml') xml = self._http_get(xml_url, cache_limit=24) if xml: norm_title = scraper_utils.normalize_title(title) match_year = '' try: for element in ET.fromstring(xml).findall('.//dizi'): name = element.find('adi') if name is not None and norm_title in scraper_utils.normalize_title( name.text): url = element.find('url') if url is not None and (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(url.text), 'title': name.text, 'year': '' } results.append(result) except (ParseError, ExpatError) as e: log_utils.log('Dizilab Search Parse Error: %s' % (e), log_utils.LOGWARNING) return results
def _get_episode_url(self, show_url, video): sxe = '.S%02dE%02d.' % (int(video.season), int(video.episode)) force_title = scraper_utils.force_title(video) title_fallback = kodi.get_setting('title-fallback') == 'true' norm_title = scraper_utils.normalize_title(video.ep_title) try: ep_airdate = video.ep_airdate.strftime('.%Y.%m.%d.') except: ep_airdate = '' page_url = [show_url] too_old = False while page_url and not too_old: url = urlparse.urljoin(self.base_url, page_url[0]) html = self._http_get(url, require_debrid=True, cache_limit=1) headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html) posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'}) for heading, post in zip(headings, posts): if self.__too_old(post): too_old = True break if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post: url, title = heading if not force_title: if (sxe in title) or (ep_airdate and ep_airdate in title): return scraper_utils.pathify_url(url) else: if title_fallback and norm_title: match = re.search('<strong>(.*?)</strong>', post) if match and norm_title == scraper_utils.normalize_title(match.group(1)): return scraper_utils.pathify_url(url) page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/advanced-search/') headers = {'Referer': self.base_url} params = {'search_query': title, 'orderby': '', 'order': '', 'wpas': 1} html = self._http_get(search_url, params=params, headers=headers, cache_limit=8) norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'datos'}): match = dom_parser2.parse_dom(item, 'a', req='href') if not match: continue match_url = match[0].attrs['href'] is_tvshow = '/tvshows/' in match_url if is_tvshow and video_type == VIDEO_TYPES.MOVIE or not is_tvshow and video_type == VIDEO_TYPES.TVSHOW: continue match_title = match[0].content match_title, match_year = scraper_utils.extra_year(match_title) if scraper_utils.normalize_title(match_title) in norm_title and ( not year or not match_year or year == match_year): result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def _get_episode_url(self, show_url, video): force_title = scraper_utils.force_title(video) title_fallback = kodi.get_setting('title-fallback') == 'true' norm_title = scraper_utils.normalize_title(video.ep_title) page_url = [show_url] too_old = False while page_url and not too_old: url = scraper_utils.urljoin(self.base_url, page_url[0]) html = self._http_get(url, require_debrid=True, cache_limit=1) headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html) posts = [r.content for r in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})] for heading, post in zip(headings, posts): if self.__too_old(post): too_old = True break if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post: url, title = heading if not force_title: if scraper_utils.release_check(video, title, require_title=False): return scraper_utils.pathify_url(url) else: if title_fallback and norm_title: match = re.search('<strong>(.*?)</strong>', post) if match and norm_title == scraper_utils.normalize_title(match.group(1)): return scraper_utils.pathify_url(url) page_url = dom_parser2.parse_dom(html, 'a', {'class': 'nextpostslink'}, req='href') if page_url: page_url = [page_url[0].attrs['href']]
def search(self, video_type, title, year, season=''): url = urlparse.urljoin(self.base_url, '/tv-series-a-z-list') html = self._http_get(url, cache_limit=8) results = [] pattern = '<li>\s*<a.*?href="([^"]+)[^>]*>([^<]+)' norm_title = scraper_utils.normalize_title(title) for match in re.finditer(pattern, html, re.DOTALL): url, match_title_year = match.groups() r = re.search('(.*?)\s+\((\d{4})\)', match_title_year) if r: match_title, match_year = r.groups() else: match_title = match_title_year match_year = '' if norm_title in scraper_utils.normalize_title(match_title) and ( not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year): search_url = urlparse.urljoin(self.base_url, '/?s=') search_url += urllib.quote_plus('%s %s' % (title, year)) html = self._http_get(search_url, cache_limit=.25) results = [] if not re.search('Sorry, but nothing matched', html): norm_title = scraper_utils.normalize_title(title) for item in dom_parser.parse_dom( html, 'li', {'class': '[^"]*box-shadow[^"]*'}): match = re.search('href="([^"]+)"\s+title="([^"]+)', item) if match: url, match_title_year = match.groups() if re.search('S\d{2}E\d{2}', match_title_year): continue # skip episodes if re.search('TV\s*SERIES', match_title_year, re.I): continue # skip shows match = re.search('(.*?)\s+\(?(\d{4})\)?', match_title_year) if match: match_title, match_year = match.groups() else: match_title = match_title_year match_year = '' if (not year or not match_year or year == match_year ) and norm_title in scraper_utils.normalize_title( match_title): result = { 'title': match_title, 'year': match_year, 'url': scraper_utils.pathify_url(url) } results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] norm_title = scraper_utils.normalize_title(title) for item in self.__get_torrents(): is_season = re.search('(.*?[._ ]season[._ ]+(\d+))[._ ](.*)', item['name'], re.I) if not is_season and video_type == VIDEO_TYPES.MOVIE or is_season and VIDEO_TYPES.SEASON: if re.search('[._ ]S\d+E\d+[._ ]', item['name']): continue # skip episodes if video_type == VIDEO_TYPES.SEASON: match_title, match_season, extra = is_season.groups() if season and int(match_season) != int(season): continue match_year = '' match_title = re.sub('[._]', ' ', match_title) else: match = re.search('(.*?)\(?(\d{4})\)?(.*)', item['name']) if match: match_title, match_year, extra = match.groups() else: match_title, match_year, extra = item['name'], '', '' match_title = match_title.strip() extra = extra.strip() if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year): result_title = match_title if extra: result_title += ' [%s]' % (extra) result = {'title': result_title, 'year': match_year, 'url': 'hash=%s' % (item['hash'])} results.append(result) return results
def _get_episode_url(self, show_url, video): force_title = scraper_utils.force_title(video) title_fallback = kodi.get_setting('title-fallback') == 'true' norm_title = scraper_utils.normalize_title(video.ep_title) page_url = [show_url] too_old = False while page_url and not too_old: html = self._http_get(page_url[0], require_debrid=False, cache_limit=1) for _attr, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}): if self.__too_old(post): too_old = True break if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post: match = dom_parser2.parse_dom(post, 'a', req='href') if match: url, title = match[0].attrs['href'], match[0].content if not force_title: if scraper_utils.release_check(video, title, require_title=False): return scraper_utils.pathify_url(url) else: if title_fallback and norm_title: match = re.search('</strong>(.*?)</p>', post) if match and norm_title == scraper_utils.normalize_title(match.group(1)): return scraper_utils.pathify_url(url) page_url = dom_parser2.parse_dom(html, 'a', {'class': 'nextpostslink'}, req='href') if page_url: page_url = [page_url[0].attrs['href']]
def search(self, video_type, title, year, season=''): results = [] if video_type == VIDEO_TYPES.TVSHOW: url = urlparse.urljoin(self.base_url, '/series/all/') html = self._http_get(url, cache_limit=8) links = dom_parser.parse_dom(html, 'a', {'class': 'underilne'}, 'href') titles = dom_parser.parse_dom(html, 'a', {'class': 'underilne'}) items = zip(links, titles) else: url = urlparse.urljoin(self.base_url, '/search?=%s' % urllib.quote_plus(title)) data = {'q': title, 'go': 'Search'} html = self._http_get(url, data=data, cache_limit=8) match = re.search('you can search again in (\d+) seconds', html, re.I) if match: wait = int(match.group(1)) if wait > self.timeout: wait = self.timeout time.sleep(wait) html = self._http_get(url, data=data, cache_limit=0) pattern = 'class="movie_box.*?href="([^"]+).*?<h1>([^<]+)' items = re.findall(pattern, html, re.DOTALL) norm_title = scraper_utils.normalize_title(title) for item in items: url, match_title = item if norm_title in scraper_utils.normalize_title(match_title): result = {'url': scraper_utils.pathify_url(url), 'title': match_title, 'year': ''} results.append(result) return results