def __get_ok(self, embed, flashvars): hosters = [] link = flashvars[0].attrs['value'] match = re.search('metadataUrl=([^"]+)', link) if match: referer = scraper_utils.cleanse_title( urllib.unquote(embed[0].attrs['data'])) ok_url = scraper_utils.cleanse_title(urllib.unquote( match.group(1))) html = self._http_get(ok_url, data='ok', headers={'Referer': referer}, cache_limit=.25) js_data = scraper_utils.parse_json(html, ok_url) stream_url = js_data.get('movie', {}).get('url') if stream_url is not None: host = urlparse.urlparse(stream_url).hostname hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': QUALITIES.HD720, 'views': None, 'rating': None, 'url': stream_url, 'direct': False, 'subs': 'Turkish Subtitles' } hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] if video_type == VIDEO_TYPES.TVSHOW and title: test_url = '/tvshow/' % (scraper_utils.to_slug(title)) test_url = scraper_utils.urljoin(self.base_url, test_url) html = self._http_get(test_url, require_debrid=True, cache_limit=24) posts = dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}) if posts: result = { 'url': scraper_utils.pathify_url(test_url), 'title': scraper_utils.cleanse_title(title), 'year': '' } results.append(result) elif video_type == VIDEO_TYPES.MOVIE: search_title = re.sub('[^A-Za-z0-9 ]', '', title.lower()) html = self._http_get(self.base_url, params={'s': search_title}, require_debrid=True, cache_limit=1) norm_title = scraper_utils.normalize_title(title) for _attrs, post in dom_parser2.parse_dom( html, 'div', {'id': re.compile('post-\d+')}): match = re.search( '<h\d+[^>]*>\s*<a\s+href="([^"]+)[^>]*>(.*?)</a>', post) if match: post_url, post_title = match.groups() if '/tv-show/' in post or self.__too_old(post): continue post_title = re.sub('<[^>]*>', '', post_title) meta = scraper_utils.parse_movie_link(post_title) full_title = '%s [%s] (%sp)' % ( meta['title'], meta['extra'], meta['height']) match_year = meta['year'] match_norm_title = scraper_utils.normalize_title( meta['title']) if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = '/search/' + urllib.quote_plus(title) html = self._http_get(search_url, require_debrid=True, cache_limit=1) if video_type == VIDEO_TYPES.TVSHOW: seen_urls = {} for _attr, post in dom_parser2.parse_dom( html, 'div', {'id': re.compile('post-\d+')}): if CATEGORIES[video_type] not in post: continue match = re.search( '<span>\s*TAGS:\s*</span>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', post, re.I) if match: show_url, match_title = match.groups() if show_url in seen_urls: continue result = { 'url': scraper_utils.pathify_url(show_url), 'title': scraper_utils.cleanse_title(match_title), 'year': '' } seen_urls[show_url] = result results.append(result) elif video_type == VIDEO_TYPES.MOVIE: norm_title = scraper_utils.normalize_title(title) headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html) posts = [ result.content for result in dom_parser2.parse_dom( html, 'div', {'id': re.compile('post-\d+')}) ] for heading, post in zip(headings, posts): if CATEGORIES[video_type] not in post or self.__too_old(post): continue post_url, post_title = heading meta = scraper_utils.parse_movie_link(post_title) full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height']) match_year = meta['year'] match_norm_title = scraper_utils.normalize_title(meta['title']) if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] xml_url = scraper_utils.urljoin(self.base_url, '/series.xml') xml = self._http_get(xml_url, cache_limit=24) if not xml: return results try: norm_title = scraper_utils.normalize_title(title) match_year = '' for element in ET.fromstring(xml).findall('.//dizi'): name = element.find('adi') if name is not None and norm_title in scraper_utils.normalize_title( name.text): url = element.find('url') if url is not None and (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(url.text), 'title': scraper_utils.cleanse_title(name.text), 'year': '' } results.append(result) except (ParseError, ExpatError) as e: logger.log('Dizilab Search Parse Error: %s' % (e), log_utils.LOGWARNING) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] html = self._http_get(self.base_url, params={'s': title}, cache_limit=4) for _attrs, movie in dom_parser2.parse_dom(html, 'div', {'class': 'movie'}): match_url = dom_parser2.parse_dom(movie, 'a', req='href') match_title_year = dom_parser2.parse_dom(movie, 'img', req='alt') if match_url and match_title_year: match_url = match_url[0].attrs['href'] if re.search('season-\d+-episode\d+', match_url): continue match_title_year = match_title_year[0].attrs['alt'] match_title, match_year = scraper_utils.extra_year( match_title_year) if not match_year: match_year = dom_parser2.parse_dom(movie, 'div', {'class': 'year'}) try: match_year = match_year[0].content except: match_year = '' if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = scraper_utils.urljoin(self.base_url, '/movies/search') html = self._http_get(search_url, params={'s': title}, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'item_movie'}): match = dom_parser2.parse_dom(item, 'a', req=['href', 'title']) if not match: continue match_title_year = match[0].attrs['title'] match_url = match[0].attrs['href'] is_season = re.search('S(?:eason\s+)?(\d+)', match_title_year, re.I) match_vt = video_type == (VIDEO_TYPES.MOVIE and not is_season) or (video_type == VIDEO_TYPES.SEASON and is_season) match_year = '' if video_type == VIDEO_TYPES.SEASON: if not season and not match_vt: continue if match_vt: if season and int(is_season.group(1)) != int(season): continue else: if season and int(season) != 1: continue site_title, site_year = scraper_utils.extra_year(match_title_year) if scraper_utils.normalize_title(site_title) not in scraper_utils.normalize_title(title) or year != site_year: continue match_title = match_title_year else: if not match_vt: continue match_title, match_year = scraper_utils.extra_year(match_title_year) match_url = scraper_utils.urljoin(match_url, 'watching.html') if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/bestmatch-fund-movies-%s.html') search_title = title.replace(' ', '-') search_title = re.sub('[^A-Za-z0-9-]', '', search_title).lower() search_url = search_url % (search_title) html = self._http_get(search_url, cache_limit=1) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'thumbsTitle'}): match = dom_parser2.parse_dom(item, 'a', req='href') if not match: continue match_url, match_title_year = match[0].attrs['href'], match[ 0].content match_title, match_year = scraper_utils.extra_year( match_title_year) if (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, SEARCH_URL) referer = scraper_utils.urljoin(self.base_url, '/search/?q=%s') referer = referer % (urllib.quote_plus(title)) headers = {'Referer': referer} headers.update(XHR) params = { 'searchTerm': title, 'type': SEARCH_TYPES[video_type], 'limit': 500 } html = self._http_get(search_url, params=params, headers=headers, auth=False, cache_limit=2) js_data = scraper_utils.parse_json(html, search_url) if 'results' in js_data: for result in js_data['results']: match_year = str(result.get('year', '')) match_url = result.get('permalink', '') match_title = result.get('title', '') if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] self.__get_token() if self.__token is None: return results search_url, u = self.__get_search_url() search_url = scraper_utils.urljoin(API_BASE_URL, search_url) timestamp = int(time.time() * 1000) s = self.__get_s() query = {'q': title, 'limit': '100', 'timestamp': timestamp, 'verifiedCheck': self.__token, 'set': s, 'rt': self.__get_rt(self.__token + s), 'sl': self.__get_sl(u)} headers = {'Referer': self.base_url} html = self._http_get(search_url, data=query, headers=headers, cache_limit=1) if video_type in [VIDEO_TYPES.TVSHOW, VIDEO_TYPES.EPISODE]: media_type = 'TV SHOW' else: media_type = 'MOVIE' for item in scraper_utils.parse_json(html, search_url): if not item['meta'].upper().startswith(media_type): continue match_year = str(item['year']) if 'year' in item and item['year'] else '' if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(item['title']), 'url': scraper_utils.pathify_url(item['permalink'].replace('/show/', '/tv-show/')), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = scraper_utils.urljoin(self.base_url, '/search/') title = re.sub('[^A-Za-z0-9 ]', '', title) search_url += '%s.html' % (urllib.quote_plus(title)) html = self._http_get(search_url, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'ml-item'}): match_title = dom_parser2.parse_dom(item, 'span', {'class': 'mli-info'}) match_url = dom_parser2.parse_dom(item, 'a', req='href') match_year = re.search('class="jt-info">(\d{4})<', item) is_episodes = dom_parser2.parse_dom(item, 'span', {'class': 'mli-eps'}) if (video_type == VIDEO_TYPES.MOVIE and not is_episodes) or (video_type == VIDEO_TYPES.SEASON and is_episodes): if not match_title or not match_url: continue match_url = match_url[0].attrs['href'] match_title = match_title[0].content match_title = re.sub('</?h2>', '', match_title) match_title = re.sub('\s+\d{4}$', '', match_title) if video_type == VIDEO_TYPES.SEASON: if season and not re.search('Season\s+0*%s$' % (season), match_title): continue match_year = match_year.group(1) if match_year else '' if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable search_url = '/Category-FilmsAndTV/Genre-Any/Letter-Any/ByPopularity/1/Search-%s.htm' % ( urllib.quote(title)) search_url = scraper_utils.urljoin(self.base_url, search_url) html = self._http_get(search_url, cache_limit=10) results = [] for _attrs, result in dom_parser2.parse_dom(html, 'div', {'class': 'searchResult'}): match_url = dom_parser2.parse_dom(result, 'a', {'itemprop': 'url'}, req='href') match_title = dom_parser2.parse_dom(result, 'span', {'itemprop': 'name'}) match_year = dom_parser2.parse_dom(result, 'span', {'itemprop': 'copyrightYear'}) match_year = match_year[0].content if match_year else '' if match_url and match_title and (not year or not match_year or year == match_year): match_url = match_url[0].attrs['href'] match_title = match_title[0].content if FRAGMENTS[video_type] not in match_url.lower(): continue result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/index.php') data = {'subaction': 'search', 'story': title, 'do': 'search'} headers = {'Referer': search_url} html = self._http_get(search_url, params={'do': 'search'}, data=data, headers=headers, cache_limit=1) fragment = dom_parser2.parse_dom(html, 'div', {'id': 'dle-content'}) if not fragment: return results for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'div', {'class': 'short-film'}): match = re.search('<h5><a\s+href="([^"]+)[^>]+title="([^"]+)', item) if not match: continue url, match_title = match.groups('') result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': '' } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin( self.base_url, '/search/%s' % (urllib.quote(title))) html = self._http_get(search_url, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'div', {'id': 'who-likes'}) if not fragment: return results fragment = fragment[0].content match_url = dom_parser2.parse_dom(fragment, 'a', req='href') match_title_year = dom_parser2.parse_dom(fragment, 'img', req='alt') if match_url and match_title_year: match_url = match_url[0].attrs['href'] match_title_year = match_title_year[0].attrs['alt'] match_title, match_year = scraper_utils.extra_year( match_title_year) if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] html = self._http_get(self.base_url, params={'s': title}, cache_limit=1) if re.search('Sorry, but nothing matched', html, re.I): return results fragment = dom_parser2.parse_dom(html, 'ul', {'class': 'listing-videos'}) if not fragment: return results for attrs, match_title_year in dom_parser2.parse_dom( fragment[0].content, 'a', req='href'): match_url = attrs['href'] match_title_year = re.sub('</?[^>]*>', '', match_title_year) match_title, match_year = scraper_utils.extra_year( match_title_year) if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] seen_urls = set() search_url = scraper_utils.urljoin(self.base_url, '/search/') html = self._http_get(search_url, cache_limit=48) norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'category-post'}): match_url = dom_parser2.parse_dom(item, 'a', req='href') match_title = dom_parser2.parse_dom(item, 'h3') if match_url and match_title: match_url = scraper_utils.pathify_url( match_url[0].attrs['href']) match_title = match_title[0].content if match_url in seen_urls: continue seen_urls.add(match_url) if norm_title in scraper_utils.normalize_title(match_title): result = { 'url': match_url, 'title': scraper_utils.cleanse_title(match_title), 'year': '' } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] if title: first_letter = title[:1].lower() if first_letter.isdigit(): first_letter = '0-9' search_url = '/alphabet/%s/' % (first_letter) search_url = urlparse.urljoin(self.base_url, search_url) html = self._http_get(search_url, cache_limit=24) fragment = dom_parser.parse_dom(html, 'div', {'class': 'home'}) if fragment: norm_title = scraper_utils.normalize_title(title) for match in re.finditer('''href=["']([^'"]+)[^>]+>([^<]+)''', fragment[0]): url, match_title_year = match.groups() match_title, match_year = scraper_utils.extra_year( match_title_year) if norm_title in scraper_utils.normalize_title( match_title) and (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] html = self._http_get(self.base_url, params={'s': title}, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'result-item'}): match = dom_parser2.parse_dom(item, 'div', {'class': 'title'}) is_movie = dom_parser2.parse_dom(item, 'span', {'class': 'movies'}) if not is_movie or not match: return results match = dom_parser2.parse_dom(match[0].content, 'a', req='href') if not match: return results match_url, match_title_year = match[0].attrs['href'], match[ 0].content match_title, match_year = scraper_utils.extra_year( match_title_year) if not match_year: match_year = dom_parser2.parse_dom(item, 'span', {'class': 'year'}) match_year = match_year[0].content if match_year else '' if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search/searchBoxSuggestion') html = self._http_get(search_url, params={ 'top': 8, 'query': title }, cache_limit=8) js_data = scraper_utils.parse_json(html, search_url) for item in js_data: entityName = match_title_year = item.get('Value', '') if entityName: match_title, match_year2 = scraper_utils.extra_year( match_title_year) match_year = str(item.get('ReleaseYear', '')) if not match_year: match_year = match_year2 match_url = '/ontology/EntityDetails?' + urllib.urlencode( { 'entityName': entityName, 'ignoreMediaLinkError': 'false' }) if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] html = self._http_get(self.base_url, params={'s': title}, cache_limit=1) if re.search('Sorry, but nothing matched', html, re.I): return results norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(html, 'li', {'class': 'box-shadow'}): for attrs, _content in dom_parser2.parse_dom(item, 'a', req=['href', 'title']): match_url, match_title_year = attrs['href'], attrs['title'] if re.search('S\d{2}E\d{2}', match_title_year): continue # skip episodes if re.search('TV\s*SERIES', match_title_year, re.I): continue # skip shows match_title, match_year = scraper_utils.extra_year( match_title_year) if ( not year or not match_year or year == match_year ) and norm_title in scraper_utils.normalize_title(match_title): result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): search_url = urlparse.urljoin( self.base_url, '/search?keyword=%s' % (urllib.quote_plus(title))) html = self._http_get(search_url, cache_limit=1) results = [] match_year = '' fragment = dom_parser.parse_dom(html, 'href', {'class': '[^"]*movie-list[^"]*'}) if fragment: for item in dom_parser.parse_dom(fragment[0], 'div', {'class': 'item'}): links = dom_parser.parse_dom(item, 'a', {'class': 'name'}, ret='href') titles = dom_parser.parse_dom(item, 'a', {'class': 'name'}) is_season = dom_parser.parse_dom(item, 'div', {'class': 'status'}) for match_url, match_title in zip(links, titles): if (not is_season and video_type == VIDEO_TYPES.MOVIE) or ( is_season and video_type == VIDEO_TYPES.SEASON): if video_type == VIDEO_TYPES.SEASON: if season and not re.search( '\s+%s$' % (season), match_title): continue if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': '', 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): search_url = self.base_url if video_type in [VIDEO_TYPES.TVSHOW, VIDEO_TYPES.EPISODE]: search_url += '/?tv' search_url += '/index.php?advanced_search=' search_url += urllib.quote_plus(title) search_url += '&year=' + urllib.quote_plus(str(year)) search_url += '&advanced_search=Search' html = self._http_get(search_url, cache_limit=.25) results = [] for element in dom_parser.parse_dom(html, 'div', {'class': 'list_box_title'}): match = re.search('href="([^"]+)"\s+title="(?:Watch )?([^"]+)', element) if match: url, match_title_year = match.groups() match = re.search('(.*?)(?:\s+\(?\s*(\d{4})\s*\)?)', match_title_year) if match: match_title, match_year = match.groups() else: match_title = match_title_year match_year = '' if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] html = self._http_get(self.base_url, params={'s': title}, cache_limit=1) for item in dom_parser.parse_dom(html, 'div', {'class': 'item'}): match = re.search('href="([^"]+).*?alt="([^"]+)', item, re.DOTALL) if match: url, match_title_year = match.groups() match_title, match_year = scraper_utils.extra_year( match_title_year) if not match_year: year_fragment = dom_parser.parse_dom( item, 'span', {'class': 'year'}) if year_fragment: match_year = year_fragment[0] else: match_year = '' if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search.php') if video_type == VIDEO_TYPES.MOVIE: params = { 'all': 'all', 'searchin': 'mov', 'subtitles': '', 'imdbfrom': '', 'yearrange': '', 'keywords': title } else: params = {'all': 'all', 'vselect': 'ser', 'keywords': title} html = self._http_get(search_url, params=params, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'ul', {'class': 'cbp-rfgrid'}) if not fragment: return results for item in dom_parser2.parse_dom(fragment, 'li'): match = dom_parser2.parse_dom(item, 'a', req=['title', 'href']) if not match: continue match_url = match[0].attrs['href'] match_title_year = match[0].attrs['title'] match_title, match_year = scraper_utils.extra_year( match_title_year) if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] html = self._http_get(self.base_url, params={'s': title}, cache_limit=8) for _attrs, item in dom_parser2.parse_dom( html, 'div', {'class': 'browse-movie-top'}): match = dom_parser2.parse_dom(item, 'a', req='href') if match: match_url, match_title_year = match[0].attrs['href'], match[ 0].content match_title, match_year = scraper_utils.extra_year( match_title_year) if not match_year: div = dom_parser2.parse_dom(item, 'div', {'class': 'browse-movie-year'}) if div: match_year = div[0].content.strip() match_url += '?watching' if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, 'http://dwatchseries.to/search/%s') headers = {'Referer': self.base_url} headers.update(XHR) params = {'ajax': 1, 's': title, 'type': 'TVShows'} html = self._http_get(search_url, params=params, cache_limit=8) for attrs, match_title in dom_parser2.parse_dom(html, 'a', req='href'): match_url = attrs['href'] match_title = re.sub('</?[^>]*>', '', match_title) match = re.search('\((\d{4})\)$', match_url) if match: match_year = match.group(1) else: match_year = '' if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] url = scraper_utils.urljoin(self.base_url, '/arsiv/') html = self._http_get(url, cache_limit=48) norm_title = scraper_utils.normalize_title(title) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'ts-list-content'}) if not fragment: return results items = dom_parser2.parse_dom(fragment[0].content, 'h1', {'class': 'ts-list-name'}) details = dom_parser2.parse_dom(fragment[0].content, 'ul') for item, detail in zip(items, details): match = dom_parser2.parse_dom(item.content, 'a', req='href') match_year = re.search('<span>(\d{4})</span>', detail.content) if not match: continue match_url = match[0].attrs['href'] match_title = match[0].content match_year = match_year.group(1) if match_year else '' if norm_title in scraper_utils.normalize_title(match_title): result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/ajax/search.php') timestamp = int(time.time() * 1000) query = { 'q': title, 'limit': 100, 'timestamp': timestamp, 'verifiedCheck': '' } html = self._http_get(search_url, data=query, headers=XHR, cache_limit=1) if video_type in [VIDEO_TYPES.TVSHOW, VIDEO_TYPES.EPISODE]: media_type = 'TV SHOW' else: media_type = 'MOVIE' js_data = scraper_utils.parse_json(html, search_url) for item in js_data: if not item['meta'].upper().startswith(media_type): continue result = { 'title': scraper_utils.cleanse_title(item['title']), 'url': scraper_utils.pathify_url(item['permalink']), 'year': '' } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/videos/search/') headers = {'Referer': ''} html = self._http_get(search_url, params={ 'search': title, 've': 1 }, headers=headers, cache_limit=8) for _attrs, article in dom_parser2.parse_dom(html, 'article', {'class': 'movie_item'}): match = dom_parser2.parse_dom(article, 'a', req=['href', 'data-title']) if match: match_url = match[0].attrs['href'] match_title = match[0].attrs['data-title'] match_year = '' if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): search_url = urlparse.urljoin(self.base_url, '/search/%s.html') search_url = search_url % (urllib.quote_plus(title)) html = self._http_get(search_url, cache_limit=8) results = [] for thumb in dom_parser.parse_dom(html, 'div', {'class': 'thumb'}): match_title = dom_parser.parse_dom(thumb, 'a', {'class': 'clip-link'}, ret='title') url = dom_parser.parse_dom(thumb, 'a', {'class': 'clip-link'}, ret='href') if match_title and url: match_title, url = match_title[0], url[0] is_season = re.search('Season\s+(\d+)$', match_title, re.I) if (not is_season and video_type == VIDEO_TYPES.MOVIE) or ( is_season and video_type == VIDEO_TYPES.SEASON): match_year = '' if video_type == VIDEO_TYPES.MOVIE: match_year = dom_parser.parse_dom( thumb, 'div', {'class': '[^"]*status-year[^"]*'}) if match_year: match_year = match_year[0] else: if season and int(is_season.group(1)) != int(season): continue if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] page_url = scraper_utils.urljoin(self.base_url, '/search.php') html = self._http_get(page_url, params={'dayq': title}, cache_limit=60) html = re.sub('<!--.*?-->', '', html) norm_title = scraper_utils.normalize_title(title) for _attrs, td in dom_parser2.parse_dom(html, 'td', {'class': 'topic_content'}): match_url = dom_parser2.parse_dom(td, 'a', req='href') match_title_year = dom_parser2.parse_dom(td, 'img', req='alt') if not match_url or not match_title_year: continue match_url = match_url[0].attrs['href'] match_title_year = match_title_year[0].attrs['alt'] # if not match_url.startswith('/'): match_url = '/tvseries/' + match_url match_title, match_year = scraper_utils.extra_year( match_title_year) if (norm_title in scraper_utils.normalize_title(match_title)) and ( not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results