def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] url = scraper_utils.urljoin(self.base_url, AJAX_URL) data = {'type': 'getDizi'} headers = {'Referer': scraper_utils.urljoin(self.base_url, '/arsiv')} headers.update(XHR) html = self._http_get(url, data=data, headers=headers, cache_limit=48) norm_title = scraper_utils.normalize_title(title) match_year = '' js_data = scraper_utils.parse_json(html, url) for item in js_data.get('data', []): match_title = item.get('adi', '') if 'url' in item and norm_title in scraper_utils.normalize_title(match_title): result = {'url': scraper_utils.pathify_url(item['url']), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = scraper_utils.urljoin(self.base_url, '/search/%s.html') search_url = search_url % (urllib.quote_plus(title)) html = self._http_get(search_url, cache_limit=1) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'movie'}) if not fragment: return results norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'li'): match_url = dom_parser2.parse_dom(item, 'a', req='href') match_title = dom_parser2.parse_dom(item, 'span', {'class': 'text'}) match_year = dom_parser2.parse_dom(item, 'span', {'class': 'year'}) if not match_url or not match_title: continue match_url = match_url[0].attrs['href'] match_title = re.sub('</?strong>', '', match_title[0].content) is_season = re.search('Season\s+(\d+)$', match_title, re.I) if (not is_season and video_type == VIDEO_TYPES.MOVIE) or ( is_season and video_type == VIDEO_TYPES.SEASON): if video_type == VIDEO_TYPES.MOVIE: if match_year: match_year = match_year[0].content else: match_year = '' else: if season and int(is_season.group(1)) != int(season): continue match_year = '' match_norm_title = scraper_utils.normalize_title(match_title) title_match = (norm_title in match_norm_title) or (match_norm_title in norm_title) if title_match and (not year or not match_year or year == match_year): result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def __list(self, title): results = [] search_url = scraper_utils.urljoin(self.base_url, 'index.php') params = {'do': 'charmap', 'name': 'series-list', 'args': '/' + title[0]} html = self._http_get(search_url, params=params, require_debrid=True, cache_limit=48) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'downpara-list'}) if not fragment: return results for match in dom_parser2.parse_dom(fragment[0].content, 'a', req='href'): match_url = match.attrs['href'] match_title_extra = match.content match_title, match_season, q_str, is_pack = self.__get_title_parts(match_title_extra) if is_pack: continue quality = QUALITY_MAP.get(q_str, QUALITIES.HIGH) result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': '', 'quality': quality, 'season': match_season, 'q_str': q_str} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable scrape = title.lower().replace(' ','+').replace(':', '') start_url = self.search_link %(self.goog,scrape,year) html = client.request(start_url) results = [] search_url = scraper_utils.urljoin(self.base_url, '/search/ajax_search') html = self._http_get(search_url, params={'q': title}, headers=XHR, cache_limit=1) js_result = scraper_utils.parse_json(html, search_url) match_year = '' for series in js_result.get('series', []): match_url = series.get('seo') match_title = series.get('label') if match_url and match_title and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url('/' + match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): search_url = scraper_utils.urljoin(SEARCH_BASE_URL, '/search/') search_url += urllib.quote_plus(title) html = self._http_get(search_url, cache_limit=0) results = [] match = re.search('ul class="list-film"(.*?)</ul>', html, re.DOTALL) if match: result_fragment = match.group(1) pattern = 'class="name">\s*<a\s+href="([^"]+)"\s+title="Watch\s+(.*?)\s+\((\d{4})\)' for match in re.finditer(pattern, result_fragment, re.DOTALL): url, title, match_year = match.groups('') if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] test_url = title.replace("'", '') test_url = re.sub(r'[^a-zA-Z0-9\s]+', ' ', test_url).lower().strip() test_url = re.sub('\s+', ' ', test_url) test_url = test_url.replace(' ', '-') if year: test_url += '-%s' % (year) test_url = urlparse.urljoin(self.base_url, test_url) if self._http_get(test_url, cache_limit=1): result = { 'title': scraper_utils.cleanse_title(title), 'year': year, 'url': scraper_utils.pathify_url(test_url) } results.append(result) return results
def __search(self, video_type, title, year, season=''): results = [] search_url = (SEARCH_URL) % (urllib.quote_plus(title)) html = self._http_get(search_url, cache_limit=1) js_data = scraper_utils.parse_json(html) norm_title = scraper_utils.normalize_title(title) for item in js_data.get('results', []): if '/watch/' not in item['url'].lower(): continue is_season = re.search('Season\s+(\d+)', item['titleNoFormatting'], re.IGNORECASE) if (not is_season and video_type == VIDEO_TYPES.MOVIE) or ( is_season and video_type == VIDEO_TYPES.SEASON): match_title_year = item['titleNoFormatting'] match_title_year = re.sub('^Watch\s+', '', match_title_year) match_url = item['url'] match_year = '' if video_type == VIDEO_TYPES.MOVIE: match = re.search('(.*?)(?:\s+\(?(\d{4})\)?)', match_title_year) if match: match_title, match_year = match.groups() else: match_title = match_title_year else: if season and int(is_season.group(1)) != int(season): continue match = re.search('(.*?)\s+\(\d{4}\)', match_title_year) if match: match_title = match.group(1) else: match_title = match_title_year if norm_title in scraper_utils.normalize_title( match_title) and (not year or not match_year or year == match_year): result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = scraper_utils.urljoin( self.base_url, '/search/%s.html' % (urllib.quote_plus(title))) html = self._http_get(search_url, cache_limit=1) fragment = dom_parser2.parse_dom(html, 'ul', {'class': 'cfv'}) if not fragment: return results norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'li'): is_season = dom_parser2.parse_dom(item, 'div', {'class': 'status'}) if (not is_season and video_type == VIDEO_TYPES.MOVIE) or ( is_season and video_type == VIDEO_TYPES.SEASON): match = dom_parser2.parse_dom(item, 'a', req=['href', 'title']) if not match: continue match_title = match[0].attrs['title'] match_url = match[0].attrs['href'] match_year = '' if video_type == VIDEO_TYPES.SEASON: if season and not re.search('Season\s+%s$' % (season), match_title, re.I): continue else: match = re.search('-(\d{4})[-.]', match_url) if match: match_year = match.group(1) match_norm_title = scraper_utils.normalize_title(match_title) title_match = (norm_title in match_norm_title) or (match_norm_title in norm_title) if title_match and (not year or not match_year or year == match_year): result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search/') search_url += urllib.quote_plus(title) html = self._http_get(search_url, cache_limit=1) for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'inner'}): name = dom_parser2.parse_dom(fragment, 'div', {'class': 'name'}) if not name: continue match = dom_parser2.parse_dom(name[0].content, 'a', req='href') if not match: continue match_url, match_title_year = match[0].attrs['href'], match[ 0].content if 'tv-series' in match_url and video_type == VIDEO_TYPES.MOVIE: continue match_title_year = re.sub('</?[^>]*>', '', match_title_year) match_title_year = re.sub('[Ww]atch\s+[Mm]ovie\s*', '', match_title_year) match_title_year = match_title_year.replace('’', "'") match_title, match_year = scraper_utils.extra_year( match_title_year) if not match_year: year_span = dom_parser2.parse_dom(fragment, 'span', {'class': 'year'}) if year_span: year_text = dom_parser2.parse_dom(year_span[0].content, 'a') if year_text: match_year = year_text[0].content.strip() if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'url': scraper_utils.pathify_url(match_url), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin( self.base_url, '/search-movies/%s.html' % (urllib.quote_plus(title))) html = self._http_get(search_url, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'li', {'class': 'item'}): match_url = dom_parser2.parse_dom(item, 'a', req='href') match_title_year = re.search('onmouseover="([^"]+)', item) if match_url and match_title_year: match_url = match_url[0].attrs['href'] match_title_year = match_title_year.group(1) match = re.search('<b>(?:<i>)?\s*(.*?)\s*(?:</i>)?</b>', match_title_year) if not match: continue match_title, match_year = scraper_utils.extra_year( match.group(1)) is_season = re.search('season\s+(\d+)', match_title_year, re.I) if (is_season and video_type == VIDEO_TYPES.MOVIE) or ( not is_season and video_type == VIDEO_TYPES.SEASON): continue if video_type == VIDEO_TYPES.MOVIE: if not match_year: match_year = re.search('>Release:\s*(\d{4})', match_title_year) match_year = match_year.group(1) if match_year else '' else: if season and int(season) != int(is_season.group(1)): continue if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def __alt_search(self, video_type, title, year, season=''): results = [] params = title.lower() if year: params += ' %s' % (year) if video_type == VIDEO_TYPES.SEASON and season: params += ' Season %s' % (season) params = {'key': params} search_url = urlparse.urljoin(self.base_url, '/search') html = self._http_get(search_url, params=params, cache_limit=1) norm_title = scraper_utils.normalize_title(title) for item in dom_parser.parse_dom(html, 'div', {'class': 'caption'}): match = re.search('href="([^"]+)[^>]+>(.*?)<span[^>]*>', item) if match: match_url, match_title = match.groups() is_season = re.search('-season-\d+', match_url) if (video_type == VIDEO_TYPES.MOVIE and not is_season) or (video_type == VIDEO_TYPES.SEASON and is_season): if video_type == VIDEO_TYPES.SEASON: if season and not re.search('season-0*%s$' % (season), match_url): continue match_title = re.sub('</?[^>]*>', '', match_title) match_title = re.sub('\s+Full\s+Movie', '', match_title) match = re.search('-(\d{4})(?:$|-)', match_url) if match: match_year = match.group(1) else: match_year = '' if norm_title in scraper_utils.normalize_title( match_title) and (not year or not match_year or year == match_year): result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] page_url = scraper_utils.urljoin(self.base_url, '/search.php') html = self._http_get(page_url, params={'dayq': title}, cache_limit=48) html = re.sub('<!--.*?-->', '', html) norm_title = scraper_utils.normalize_title(title) for _attrs, td in dom_parser2.parse_dom(html, 'td', {'class': 'topic_content'}): match_url = dom_parser2.parse_dom(td, 'a', req='href') match_title_year = dom_parser2.parse_dom(td, 'img', req='alt') if not match_url or not match_title_year: continue match_url = match_url[0].attrs['href'] match_title_year = match_title_year[0].attrs['alt'] if not match_url.startswith('/'): match_url = '/tvseries/' + match_url match_title, match_year = scraper_utils.extra_year(match_title_year) if (norm_title in scraper_utils.normalize_title(match_title)) and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = urlparse.urljoin(self.base_url, '/search.php') html = self._http_get(search_url, params={'q': title}, cache_limit=4) for item in dom_parser.parse_dom(html, 'div', {'class': 'movie_about'}): match_url = dom_parser.parse_dom(item, 'a', ret='href') match_title_year = dom_parser.parse_dom(item, 'a') if match_url and match_title_year: match_url = match_url[0] match_title, match_year = scraper_utils.extra_year( match_title_year[0]) if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def __tv_search(self, title, year): results = [] search_url = scraper_utils.urljoin(self.tv_base_url, '/showlist/') html = self._http_get(search_url, cache_limit=48) match_year = '' norm_title = scraper_utils.normalize_title(title) for attrs, match_title in dom_parser2.parse_dom( html, 'a', {'class': 'thread_link'}, req='href'): match_url = attrs['href'] if match_title.upper().endswith(', THE'): match_title = 'The ' + match_title[:-5] if norm_title in scraper_utils.normalize_title(match_title) and ( not year or not match_year or year == match_year): result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def _get_episode_url(self, show_url, video): show_url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(show_url, cache_limit=8) pattern = '''href=['"]([^'"]+)[^>]+>\s*%s\.\s*Sezon<''' % (video.season) match = re.search(pattern, html) if match: episode_pattern = '''href=['"]([^'"]+-%s-sezon-%s-bolum[^'"]*)''' % (video.season, video.episode) season_url = scraper_utils.urljoin(self.base_url, match.group(1)) html = self._http_get(season_url, cache_limit=2) ep_url = self._default_get_episode_url(html, video, episode_pattern) if ep_url: return ep_url # front page fallback html = self._http_get(self.base_url, cache_limit=2) for slug in reversed(show_url.split('/')): if slug: break ep_url_frag = 'href="([^"]+/{slug}-{season}-sezon-{episode}-bolum[^"]*)'.format(slug=slug, season=video.season, episode=video.episode) match = re.search(ep_url_frag, html) if match: return scraper_utils.pathify_url(match.group(1))
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/suggest.php') headers = {'Referer': self.base_url} headers.update(XHR) params = {'ajax': 1, 's': title, 'type': 'TVShows'} html = self._http_get(search_url, params=params, cache_limit=8) for attrs, match_title in dom_parser2.parse_dom(html, 'a', req='href'): match_url = attrs['href'] match_title = re.sub('</?[^>]*>', '', match_title) match = re.search('\((\d{4})\)$', match_url) if match: match_year = match.group(1) else: match_year = '' if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search') search_type = 'movies' if video_type == VIDEO_TYPES.MOVIE else 'series' html = self._http_get(search_url, params={ 'query': title.lower(), 'type': search_type }, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'one_movie-item'}): match_url = dom_parser2.parse_dom(item, 'a', req='href') match_title = dom_parser2.parse_dom(item, 'img', req='alt') media_type = dom_parser2.parse_dom(item, 'div', {'class': 'movie-series'}) if not media_type: media_type = VIDEO_TYPES.MOVIE elif media_type[0].content == 'TV SERIE': media_type = VIDEO_TYPES.TVSHOW if match_url and match_title and video_type == media_type: match_url = match_url[0].attrs['href'] match_title = match_title[0].attrs['alt'] match_year = re.search('-(\d{4})-', match_url) if match_year: match_year = match_year.group(1) else: match_year = '' if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] if video_type == VIDEO_TYPES.MOVIE: search_url = urlparse.urljoin(self.base_url, '/?s=') search_url += urllib.quote_plus('%s' % (title)) html = self._http_get(search_url, cache_limit=1) links = dom_parser.parse_dom(html, 'a', {'class': 'clip-link'}, 'href') titles = dom_parser.parse_dom(html, 'a', {'class': 'clip-link'}, 'title') matches = zip(links, titles) else: html = self._http_get(self.base_url, cache_limit=8) matches = re.findall( '<li\s+class="cat-item[^>]+>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', html) norm_title = scraper_utils.normalize_title(title) for item in matches: url = item[0].attrs['href'] match_title_year = item[0].attrs['title'] match = re.search('(.*?)\s+\(?(\d{4})\)?', match_title_year) if match: match_title, match_year = match.groups() else: match_title = match_title_year match_year = '' if norm_title in scraper_utils.normalize_title(match_title) and ( not year or not match_year or year == match_year): log_utils.log('Rainierland - search - Match Found: ' + str(norm_title)) result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(url) } results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = urlparse.urljoin(self.base_url, '/search') html = self._http_get(search_url, cache_limit=48) norm_title = scraper_utils.normalize_title(title) for item in dom_parser.parse_dom(html, 'li'): match = re.search('''href=["']([^"']+)[^>]+>([^<]+)''', item) if match: url, match_title = match.groups() match = re.search('(.*?)\s*\(Season\s+\d+', match_title) if match: match_title = match.group(1) if norm_title in scraper_utils.normalize_title(match_title): result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': '' } results.append(result) return results
def __search(self, video_type, title, year): url = urlparse.urljoin( self.base_url, '/advanced-search/menu-id-111.html?view=buscador') html = self._http_get(url, cache_limit=48) results = [] norm_title = scraper_utils.normalize_title(title) fragment = dom_parser.parse_dom(html, 'div', {'class': 'tagindex'}) if fragment: for match in re.finditer('href="([^"]+)[^>]+>(.*?)</a>', fragment[0]): url, match_title = match.groups() match_title = re.sub('\s+\(\d+\)$', '', match_title) match_title = match_title.replace('&', '&') if norm_title in scraper_utils.normalize_title(match_title): result = { 'url': scraper_utils.pathify_url(url), 'title': match_title, 'year': '' } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search') params = {'q': title, 's': 't'} html = self._http_get(search_url, params=params, cache_limit=1) for _attrs, content in dom_parser2.parse_dom(html, 'span', {'class': 'title_list'}): match = dom_parser2.parse_dom(content, 'a', req=['href', 'title']) if match: attrs = match[0].attrs match_url, match_title_year = attrs['href'], attrs['title'] match_title, match_year = scraper_utils.extra_year( match_title_year) if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = scraper_utils.urljoin(self.base_url, '/search/') title = re.sub('[^A-Za-z0-9 ]', '', title) search_url += '%s.html' % (urllib.quote_plus(title)) html = self._http_get(search_url, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'ml-item'}): match_title = dom_parser2.parse_dom(item, 'span', {'class': 'mli-info'}) match_url = dom_parser2.parse_dom(item, 'a', req='href') match_year = re.search('class="jt-info">(\d{4})<', item) is_episodes = dom_parser2.parse_dom(item, 'span', {'class': 'mli-eps'}) if (video_type == VIDEO_TYPES.MOVIE and not is_episodes) or (video_type == VIDEO_TYPES.SEASON and is_episodes): if not match_title or not match_url: continue match_url = match_url[0].attrs['href'] match_title = match_title[0].content match_title = re.sub('</?h2>', '', match_title) match_title = re.sub('\s+\d{4}$', '', match_title) if video_type == VIDEO_TYPES.SEASON: if season and not re.search('Season\s+0*%s$' % (season), match_title): continue match_year = match_year.group(1) if match_year else '' if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] referer = urlparse.urljoin(SEARCH_BASE_URL, '/search/') headers = {'Referer': referer + urllib.quote_plus(title)} headers.update(XHR) search_url = urlparse.urljoin(SEARCH_BASE_URL, '/lib/search526049.php') params = {'phrase': title, 'pindex': 1} html = self._http_get(search_url, params=params, headers=headers, require_debrid=True, cache_limit=1) for post in dom_parser.parse_dom(html, 'div', {'class': 'entry post'}): if not CATEGORIES[video_type] in post: continue if self.__too_old(post): continue result = self._blog_proc_results(post.get('post_title', ''), '(?P<post_title>.+)(?P<url>.*?)', '', video_type, title, year) if result: result[0]['url'] = scraper_utils.pathify_url(post['post_name']) results.append(result[0]) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] params = {'a': title, 'submit': 'Search'} html = self._http_get(self.base_url, params=params, cache_limit=8) fragment = dom_parser.parse_dom(html, 'div', {'id': 'single-post'}) if fragment: for item in dom_parser.parse_dom(fragment[0], 'div', {'class': 'box-bg'}): match = re.search('href="([^"]+)[^>]+>([^<]+)', item) if match: match_url, match_title_year = match.groups() match_title, match_year = scraper_utils.extra_year( match_title_year) if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search?query=') search_url += title.replace("'", "") html = self._http_get(search_url, cache_limit=.25) js_result = scraper_utils.parse_json(html, search_url) if 'error' in js_result: logger.log( 'Direct API error: "%s" @ %s' % (js_result['error'], search_url), log_utils.LOGWARNING) return results for match in js_result: # url = search_url + '&quality=%s' % match['quality'] result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match['release']), 'quality': match['quality'], 'year': '' } results.append(result) return results
def search(self, video_type, title, year, season=''): search_url = urlparse.urljoin(self.base_url, '/movie/search/') title = re.sub('[^A-Za-z0-9 ]', '', title) search_url += urllib.quote_plus(title) html = self._http_get(search_url, cache_limit=1) results = [] for item in dom_parser.parse_dom(html, 'div', {'class': 'ml-item'}): match_title = dom_parser.parse_dom(item, 'span', {'class': 'mli-info'}) match_url = re.search('href="([^"]+)', item, re.DOTALL) match_year = re.search('class="jt-info">(\d{4})<', item) is_episodes = dom_parser.parse_dom(item, 'span', {'class': 'mli-eps'}) if (video_type == VIDEO_TYPES.MOVIE and not is_episodes) or (video_type == VIDEO_TYPES.SEASON and is_episodes): if match_title and match_url: match_title = match_title[0] match_title = re.sub('</?h2>', '', match_title) match_title = re.sub('\s+\d{4}$', '', match_title) if video_type == VIDEO_TYPES.SEASON: if season and not re.search('Season\s+%s$' % (season), match_title): continue url = urlparse.urljoin(match_url.group(1), 'watching.html') match_year = match_year.group(1) if match_year else '' if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(url) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(SEARCH_BASE, '/1/indexes/al_titles_index/query') params = { 'x-algolia-agent': 'Algolia for vanilla JavaScript (lite) 3.22.1', 'x-algolia-application-id': 'XXDAZCOUL3', 'x-algolia-api-key': 'c5c1279f5ad09819ecf2af9d6b5ee06a' } data = { 'params': urllib.urlencode({ 'query': title, 'facets': '*', 'hitsPerPage': 30 }) } headers = {'Origin': self.base_url} html = self._http_get(search_url, params=params, data=json.dumps(data), headers=headers, cache_limit=8) js_data = scraper_utils.parse_json(html, search_url) media_type = '/movies/' if video_type == VIDEO_TYPES.MOVIE else '/tv/' for item in js_data.get('hits', []): if 'permalink' in item and 'title' in item and media_type in item[ 'permalink']: match_year = str(item.get('yr', '')) if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(item['title']), 'url': scraper_utils.pathify_url(item['permalink']), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search.php') if video_type == VIDEO_TYPES.MOVIE: params = {'all': 'all', 'searchin': 'mov', 'subtitles': '', 'imdbfrom': '', 'yearrange': '', 'keywords': title} else: params = {'all': 'all', 'vselect': 'ser', 'keywords': title} html = self._http_get(search_url, params=params, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'ul', {'class': 'cbp-rfgrid'}) if not fragment: return results for item in dom_parser2.parse_dom(fragment, 'li'): match = dom_parser2.parse_dom(item, 'a', req=['title', 'href']) if not match: continue match_url = match[0].attrs['href'] match_title_year = match[0].attrs['title'] match_title, match_year = scraper_utils.extra_year(match_title_year) if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable show_list_url = scraper_utils.urljoin(self.base_url, '/tv-lists/') html = self._http_get(show_list_url, cache_limit=8) results = [] seen_urls = set() norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(html, 'li'): match = dom_parser2.parse_dom(item, 'a', req='href') if match: match_url = scraper_utils.pathify_url(match[0].attrs['href']) match_title = match[0].content if match_url in seen_urls: continue seen_urls.add(match_url) match_title = re.sub('</?strong[^>]*>', '', match_title) if norm_title in scraper_utils.normalize_title(match_title): result = { 'url': match_url, 'title': scraper_utils.cleanse_title(match_title), 'year': '' } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search/%s.html') search_url = search_url % (scraper_utils.to_slug(title)) html = self._http_get(search_url, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'slideposter'}): match_url = dom_parser2.parse_dom(item, 'a', req='href') match_title_year = dom_parser2.parse_dom(item, 'img', req='alt') if match_url and match_title_year: match_url = match_url[0].attrs['href'] match_title_year = match_title_year[0].attrs['alt'] match_title, match_year = scraper_utils.extra_year( match_title_year) if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results