def search(self, video_type, title, year, season=''): results = [] page_url = urlparse.urljoin(self.base_url, '/tvseries/index.php?&page=1') while page_url: html = self._http_get(page_url, cache_limit=48) html = re.sub('<!--.*?-->', '', html) norm_title = scraper_utils.normalize_title(title) for td in dom_parser.parse_dom(html, 'td', {'class': 'topic_content'}): match_url = re.search('href="([^"]+)', td) match_title_year = dom_parser.parse_dom(td, 'img', ret='alt') if match_url and match_title_year: match_url = match_url.group(1) if not match_url.startswith('/'): match_url = '/tvseries/' + match_url match_title, match_year = scraper_utils.extra_year(match_title_year[0]) if norm_title in scraper_utils.normalize_title(match_title): result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) match = re.search('href="([^"]+)[^>]*>>', html) if match: page_url = urlparse.urljoin(self.base_url, match.group(1)) else: page_url = '' return results
def search(self, video_type, title, year, season=''): results = [] search_url = urlparse.urljoin(self.base_url, '/search/') search_url += urllib.quote_plus(title) html = self._http_get(search_url, cache_limit=1) for fragment in dom_parser.parse_dom(html, 'div', {'class': 'inner'}): name = dom_parser.parse_dom(fragment, 'div', {'class': 'name'}) if name: match = re.search('href="([^"]+)[^>]+>(.*?)</a>', name[0]) if match: match_url, match_title_year = match.groups() if 'tv-series' in match_url and video_type == VIDEO_TYPES.MOVIE: continue match_title_year = re.sub('</?[^>]*>', '', match_title_year) match_title_year = re.sub('[Ww]atch\s+[Mm]ovie\s*', '', match_title_year) match_title_year = match_title_year.replace('’', "'") match_title, match_year = scraper_utils.extra_year(match_title_year) if not match_year: year_span = dom_parser.parse_dom(fragment, 'span', {'class': 'year'}) if year_span: year_text = dom_parser.parse_dom(year_span[0], 'a') if year_text: match_year = year_text[0].strip() if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'url': scraper_utils.pathify_url(match_url), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = scraper_utils.urljoin(self.base_url, '/wp-content/plugins/ajax-search-pro/ajax_search.php') data = {'action': 'ajaxsearchpro_search', 'aspp': title, 'asid': '1', 'asp_inst_id': '1_1', 'options': 'current_page_id=7&qtranslate_lang=0&set_intitle=None&customset%5B%5D=post'} html = self._http_get(search_url, data=data, headers=XHR, cache_limit=8) norm_title = scraper_utils.normalize_title(title) for attrs, match_title_year in dom_parser2.parse_dom(html, 'a', {'class': 'asp_res_url'}): match_url = attrs['href'] match_title_year = re.sub('</?[^>]*>', '', match_title_year) is_season = re.search('Season\s+(\d+)\s*', match_title_year, re.I) if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON): match_year = '' if video_type == VIDEO_TYPES.SEASON: if season and int(is_season.group(1)) != int(season): continue match_title = match_title_year match_title = re.sub('\s*\d{4}', '', match_title) else: match_title, match_year = scraper_utils.extra_year(match_title_year) match_norm_title = scraper_utils.normalize_title(match_title) title_match = (norm_title in match_norm_title) or (match_norm_title in norm_title) if title_match and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = urlparse.urljoin(self.base_url, '/search/%s' % (urllib.quote(title))) headers = {'Referer': self.base_url} html = self._http_get(search_url, headers=headers, cache_limit=8) for item in dom_parser.parse_dom(html, 'div', {'class': 'recent-item'}): fragment = dom_parser.parse_dom(item, 'h\d+') if not fragment: continue match_title_year = dom_parser.parse_dom(fragment[0], 'a', {'rel': 'bookmark'}) match_url = dom_parser.parse_dom(fragment[0], 'a', {'rel': 'bookmark'}, ret='href') if match_title_year and match_url: match_title_year = match_title_year[0] match_url = match_url[0] match_title_year = re.sub('</?span[^>]*>', '', match_title_year) is_season = re.search('Season\s+(\d+)\s*', match_title_year, re.I) if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON): match_year = '' if video_type == VIDEO_TYPES.SEASON: match_title = match_title_year if season and int(is_season.group(1)) != int(season): continue else: match_title, match_year = scraper_utils.extra_year(match_title_year) if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] html = self._http_get(self.base_url, params={'s': title}, cache_limit=8) for item in dom_parser.parse_dom(html, 'div', {'class': 'item'}): match = re.search('href="([^"]+)', item) match_title = dom_parser.parse_dom(item, 'span', {'class': 'tt'}) year_frag = dom_parser.parse_dom(item, 'span', {'class': 'year'}) if match and match_title: url = match.group(1) match_title = match_title[0] if re.search('\d+\s*x\s*\d+', match_title): continue # exclude episodes match_title, match_year = scraper_utils.extra_year(match_title) if not match_year and year_frag: match_year = year_frag[0] match = re.search('(.*?)\s+\d{3,}p', match_title) if match: match_title = match.group(1) extra = dom_parser.parse_dom(item, 'span', {'class': 'calidad2'}) if extra: match_title += ' [%s]' % (extra[0]) if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(url)} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = urlparse.urljoin(self.base_url, '/index.php') params = {'search': title, 'image.x': 0, 'image.y': 0} html = self._http_get(search_url, params=params, cache_limit=1) # Are we on a results page? if not re.search('window\.location', html): pattern = '<td[^>]+class="movieText"[^>]*>(.*?)</p>.*?href="(/watch/[^"]+)' for match in re.finditer(pattern, html, re.DOTALL): match_title_year, match_url = match.groups('') # skip p**n if '-XXX-' in match_url.upper() or ' XXX:' in match_title_year: continue match_title_year = re.sub('</?.*?>', '', match_title_year) match_title, match_year = scraper_utils.extra_year(match_title_year) if not year or not match_year or year == match_year: result = {'url': match_url, 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) else: match = re.search('window\.location\s+=\s+"([^"]+)', html) if match: url = match.group(1) if url != 'movies.php': result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(title), 'year': year} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search-movies/%s.html' % (urllib.quote_plus(title))) html = self._http_get(search_url, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'li', {'class': 'item'}): match_url = dom_parser2.parse_dom(item, 'a', req='href') match_title_year = re.search('onmouseover="([^"]+)', item) if match_url and match_title_year: match_url = match_url[0].attrs['href'] match_title_year = match_title_year.group(1) match = re.search('<b>(?:<i>)?\s*(.*?)\s*(?:</i>)?</b>', match_title_year) if not match: continue match_title, match_year = scraper_utils.extra_year(match.group(1)) is_season = re.search('season\s+(\d+)', match_title_year, re.I) if (is_season and video_type == VIDEO_TYPES.MOVIE) or (not is_season and video_type == VIDEO_TYPES.SEASON): continue if video_type == VIDEO_TYPES.MOVIE: if not match_year: match_year = re.search('>Release:\s*(\d{4})', match_title_year) match_year = match_year.group(1) if match_year else '' else: if season and int(season) != int(is_season.group(1)): continue if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] html = self._http_get(self.base_url, params={'s': title}, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'article', {'class': 'item-list'}): match = dom_parser2.parse_dom(item, 'a', req='href') if not match: continue match_title_year = match[0].content match_url = match[0].attrs['href'] match_title, match_year = scraper_utils.extra_year(match_title_year) if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] html = self._http_get(self.base_url, params={'s': title, 'search': ''}, cache_limit=8) for item in dom_parser.parse_dom(html, 'div', {'class': '[^"]*movie_poster[^"]*'}): match = re.search('href="([^"]+)[^>]+title="([^"]+)', item) if match: url, match_title_year = match.groups() match_title, match_year = scraper_utils.extra_year(match_title_year) if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] html = self._http_get(self.base_url, params={'s': title, 'search': ''}, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'movie_poster'}): match = dom_parser2.parse_dom(item, 'a', req=['href', 'title']) if match: attrs = match[0].attrs match_title, match_year = scraper_utils.extra_year(attrs['title']) if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(attrs['href']), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] html = self._http_get(self.base_url, params={'s': title}, cache_limit=1) if not re.search('nothing matched your search criteria', html, re.I): for item in dom_parser.parse_dom(html, 'li', {'class': '[^"]*box-shadow[^"]*'}): match = re.search('href="([^"]+)[^>]*title="([^"]+)', item) if match: match_url, match_title_year = match.groups() match_title, match_year = scraper_utils.extra_year(match_title_year) if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/keywords/%s/' % (title)) html = self._http_get(search_url, cache_limit=4) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'movie_about'}): match = dom_parser2.parse_dom(item, 'a', req=['href', 'title']) if not match: continue match_url, match_title, extra = match[0].attrs['href'], match[0].attrs['title'], match[0].content _match_title, match_year = scraper_utils.extra_year(extra) if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = urlparse.urljoin(self.base_url, '/search.php') html = self._http_get(search_url, params={'q': title}, cache_limit=4) for item in dom_parser.parse_dom(html, 'div', {'class': 'movie_about'}): match_url = dom_parser.parse_dom(item, 'a', ret='href') match_title_year = dom_parser.parse_dom(item, 'a') if match_url and match_title_year: match_url = match_url[0] match_title, match_year = scraper_utils.extra_year(match_title_year[0]) if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] html = self._http_get(self.base_url, params={'s': title}, cache_limit=8) fragment = dom_parser.parse_dom(html, 'ul', {'class': '[^"]*listing-videos[^"]*'}) if fragment: urls = dom_parser.parse_dom(fragment[0], 'a', ret='href') labels = dom_parser.parse_dom(fragment[0], 'a') for match_url, match_title_year in zip(urls, labels): match_title_year = re.sub('</?[^>]*>', '', match_title_year) match_title, match_year = scraper_utils.extra_year(match_title_year) if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = urlparse.urljoin(self.base_url, '/wp-content/themes/afdah/ajax-search.php') data = {'search': title, 'type': 'title'} html = self._http_get(search_url, data=data, headers=XHR, cache_limit=1) for item in dom_parser.parse_dom(html, 'li'): match_url = dom_parser.parse_dom(item, 'a', ret='href') match_title_year = dom_parser.parse_dom(item, 'a') if match_url and match_title_year: match_url = match_url[0] match_title, match_year = scraper_utils.extra_year(match_title_year[0]) if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/wp-content/themes/afdah/ajax-search.php') data = {'test1': title, 'test2': 'title'} html = self._http_get(search_url, data=data, headers=XHR, cache_limit=1) for _attrs, item in dom_parser2.parse_dom(html, 'li'): match = dom_parser2.parse_dom(item, 'a', req='href') if not match: continue match_url = match[0].attrs['href'] match_title, match_year = scraper_utils.extra_year(match[0].content) if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] headers = {'Referer': self.base_url} params = {'s': title, 'submit': 'Search Now!'} html = self._http_get(self.base_url, params=params, headers=headers, cache_limit=8) for item in dom_parser.parse_dom(html, 'div', {'class': 'aaa_item'}): match_title_year = dom_parser.parse_dom(item, 'a', ret='title') match_url = dom_parser.parse_dom(item, 'a', ret='href') if match_title_year and match_url: match_url = match_url[0] match_title, match_year = scraper_utils.extra_year(match_title_year[0]) if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): if video_type == VIDEO_TYPES.TVSHOW: return self.__tv_search(title, year) else: results = [] html = self. _http_get(self.base_url, params={'s': title}, cache_limit=1) titles = dom_parser.parse_dom(html, 'a', {'class': 'coverImage'}, ret='title') links = dom_parser.parse_dom(html, 'a', {'class': 'coverImage'}, ret='href') for match_title_year, match_url in zip(titles, links): if 'Season' in match_title_year and 'Episode' in match_title_year: continue match_title, match_year = scraper_utils.extra_year(match_title_year) if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] data = {'subaction': 'search', 'do': 'search', 'story': urllib.quote(title)} html = self._http_get(self.base_url, data=data, cache_limit=8) if dom_parser.parse_dom(html, 'div', {'class': 'sresult'}): for item in dom_parser.parse_dom(html, 'div', {'class': 'short_content'}): match = re.search('href="([^"]+)', item) match_title_year = dom_parser.parse_dom(item, 'div', {'class': 'short_header'}) if match and match_title_year: url = match.group(1) match_title, match_year = scraper_utils.extra_year(match_title_year[0]) if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(url)} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] data = {'hash': 'indexert', 'do': 'search', 'subaction': 'search', 'search_start': 0, 'full_search': 0, 'result_from': 1, 'story': title} search_url = scraper_utils.urljoin(self.base_url, 'index.php') html = self._http_get(search_url, params={'do': 'search'}, data=data, cache_limit=8) if dom_parser2.parse_dom(html, 'div', {'class': 'sresult'}): for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'short_content'}): match_url = dom_parser2.parse_dom(item, 'a', req='href') match_title_year = dom_parser2.parse_dom(item, 'div', {'class': 'short_header'}) if match_url and match_title_year: match_url = match_url[0].attrs['href'] match_title, match_year = scraper_utils.extra_year(match_title_year[0].content) if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = urlparse.urljoin(self.base_url, '/bestmatch-search-%s.html') search_title = title.replace(' ', '-') search_title = re.sub('[^A-Za-z0-9-]', '', search_title).lower() search_url = search_url % (search_title) html = self._http_get(search_url, cache_limit=1) for item in dom_parser.parse_dom(html, 'div', {'class': 'thumbsTitle'}): match = re.search('href="([^"]+)[^>]*>(.*?)</a>', item) if match: url, match_title_year = match.groups('') match_title, match_year = scraper_utils.extra_year(match_title_year) if (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = urlparse.urljoin(self.base_url, '/search/%s.html') search_url = search_url % (self.__to_slug(title)) html = self._http_get(search_url, cache_limit=8) for item in dom_parser.parse_dom(html, 'div', {'class': '[^"]*slideposter[^"]*'}): match_url = dom_parser.parse_dom(item, 'a', ret='href') match_title_year = dom_parser.parse_dom(item, 'img', ret='alt') if match_url and match_title_year: match_url = match_url[0] match_title_year = match_title_year[0] match_title, match_year = scraper_utils.extra_year(match_title_year[0]) if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] title = re.sub('[^A-Za-z0-9 ]', '', title) title = re.sub('\s+', '-', title) search_url = scraper_utils.urljoin(self.base_url, '/tag/%s' % (title)) html = self._http_get(search_url, cache_limit=1) for attrs, match_title_year in dom_parser2.parse_dom(html, 'a', {'class': 'top-item'}, req='href'): match_url = attrs['href'] if '-tvshow-' in match_url: continue match_title_year = re.sub('</?[^>]*>', '', match_title_year) match_title_year = re.sub('^Watch\s*', '', match_title_year) match_title, match_year = scraper_utils.extra_year(match_title_year) if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] html = self._http_get(self.base_url, params={'s': title}, cache_limit=1) if re.search('Sorry, but nothing matched', html, re.I): return results fragment = dom_parser2.parse_dom(html, 'ul', {'class': 'listing-videos'}) if not fragment: return results for attrs, match_title_year in dom_parser2.parse_dom(fragment[0].content, 'a', req='href'): match_url = attrs['href'] match_title_year = re.sub('</?[^>]*>', '', match_title_year) match_title, match_year = scraper_utils.extra_year(match_title_year) if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)} results.append(result) return results
def search(self, video_type, title, year, season=''): html = self._http_get(self.base_url, params={'s': title}, cache_limit=1) results = [] if not re.search('Sorry, but nothing matched', html): norm_title = scraper_utils.normalize_title(title) for item in dom_parser.parse_dom(html, 'li', {'class': '[^"]*box-shadow[^"]*'}): match = re.search('href="([^"]+)"\s+title="([^"]+)', item) if match: url, match_title_year = match.groups() if re.search('S\d{2}E\d{2}', match_title_year): continue # skip episodes if re.search('TV\s*SERIES', match_title_year, re.I): continue # skip shows match_title, match_year = scraper_utils.extra_year(match_title_year) if (not year or not match_year or year == match_year) and norm_title in scraper_utils.normalize_title(match_title): result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(url)} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] html = self._http_get(self.base_url, params={'s': title}, cache_limit=1) if re.search('Sorry, but nothing matched', html, re.I): return results norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(html, 'li', {'class': 'box-shadow'}): for attrs, _content in dom_parser2.parse_dom(item, 'a', req=['href', 'title']): match_url, match_title_year = attrs['href'], attrs['title'] if re.search('S\d{2}E\d{2}', match_title_year): continue # skip episodes if re.search('TV\s*SERIES', match_title_year, re.I): continue # skip shows match_title, match_year = scraper_utils.extra_year(match_title_year) if (not year or not match_year or year == match_year) and norm_title in scraper_utils.normalize_title(match_title): result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_in = 'm' if video_type == VIDEO_TYPES.MOVIE else 't' search_url = scraper_utils.urljoin(self.base_url, '/search') html = self._http_get(search_url, data={'searchquery': title, 'searchin': search_in}, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'search-page'}) if not fragment: return results fragment = dom_parser2.parse_dom(fragment[0].content, 'table') if not fragment: return results for attrs, match_title_year in dom_parser2.parse_dom(fragment[0].content, 'a', req='href'): match_url = attrs['href'] match_title, match_year = scraper_utils.extra_year(match_title_year) if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] html = self._http_get(self.base_url, params={'s': title}, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'browse-movie-bottom'}): match = dom_parser2.parse_dom(item, 'a', req='href') if match: match_url, match_title_year = match[0].attrs['href'], match[0].content match_title, match_year = scraper_utils.extra_year(match_title_year) if not match_year: div = dom_parser2.parse_dom(item, 'div', {'class': 'browse-movie-year'}) if div: match_year = div[0].content.strip() match_url += '?watching' if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search/searchBoxSuggestion') html = self._http_get(search_url, params={'top': 8, 'query': title}, cache_limit=8) js_data = scraper_utils.parse_json(html, search_url) for item in js_data: entityName = match_title_year = item.get('Value', '') if entityName: match_title, match_year2 = scraper_utils.extra_year(match_title_year) match_year = str(item.get('ReleaseYear', '')) if not match_year: match_year = match_year2 match_url = '/ontology/EntityDetails?' + urllib.urlencode({'entityName': entityName, 'ignoreMediaLinkError': 'false'}) if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = urlparse.urljoin(self.base_url, '/wp-admin/admin-ajax.php') referer = self.base_url + '/?' + urllib.urlencode({'s': title, 'submit': 'Search'}) headers = {'Referer': referer} headers.update(XHR) params = {'s': title, 'action': 'dwls_search'} html = self._http_get(search_url, params=params, headers=headers, cache_limit=8) js_data = scraper_utils.parse_json(html, search_url) for match in js_data.get('results', []): match_title_year = match.get('post_title') match_url = match.get('permalink') if match_url and match_title_year: match_title, match_year = scraper_utils.extra_year(match_title_year) if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin( self.base_url, '/wp-content/themes/afdah/ajax-search.php') data = {'test1': title, 'test2': 'title'} html = self._http_get(search_url, data=data, headers=XHR, cache_limit=1) for _attrs, item in dom_parser2.parse_dom(html, 'li'): match = dom_parser2.parse_dom(item, 'a', req='href') if not match: continue match_url = match[0].attrs['href'] match_title, match_year = scraper_utils.extra_year( match[0].content) if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/index.php') params = {'search': title, 'image.x': 0, 'image.y': 0} html = self._http_get(search_url, params=params, cache_limit=1) # Are we on a results page? if not re.search('window\.location', html): pattern = '<td[^>]+class="movieText"[^>]*>(.*?)</p>.*?href="(/watch/[^"]+)' for match in re.finditer(pattern, html, re.DOTALL): match_title_year, match_url = match.groups('') # skip p**n if '-XXX-' in match_url.upper() or ' XXX:' in match_title_year: continue match_title_year = re.sub('</?.*?>', '', match_title_year) match_title, match_year = scraper_utils.extra_year( match_title_year) if not year or not match_year or year == match_year: result = { 'url': match_url, 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) else: match = re.search('window\.location\s+=\s+"([^"]+)', html) if not match: return results url = match.group(1) if url != 'movies.php': result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(title), 'year': year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] html = self._http_get(self.base_url, params={'s': title}, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'result-item'}): match = dom_parser2.parse_dom(item, 'div', {'class': 'title'}) is_movie = dom_parser2.parse_dom(item, 'span', {'class': 'movies'}) is_show = dom_parser2.parse_dom(item, 'span', {'class': 'tvshows'}) if (video_type == VIDEO_TYPES.TVSHOW and is_movie) or (video_type == VIDEO_TYPES.MOVIE and is_show) or not match: continue match = dom_parser2.parse_dom(match[0].content, 'a', req='href') if not match: continue match_url, match_title_year = match[0].attrs['href'], match[ 0].content match_title, match_year = scraper_utils.extra_year( match_title_year) if not match_year: match_year = dom_parser2.parse_dom(item, 'span', {'class': 'year'}) match_year = match_year[0].content if match_year else '' if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results