def search(self, video_type, title, year, season=''): search_url = urlparse.urljoin(self.base_url, '/index.php?search_keywords=') search_url += urllib.quote_plus(title) search_url += '&year=' + urllib.quote_plus(str(year)) if video_type == 'shows': search_url += '&search_section=2' else: search_url += '&search_section=1' results = [] html = self._http_get(self.base_url, cache_limit=0) #kodi.log("HTML is : " + html) match = re.search('input type="hidden" name="key" value="([0-9a-f]*)"', html) if match: key = match.group(1) search_url += '&key=' + key html = self._http_get(search_url, cache_limit=.25) pattern = r'class="index_item.+?href="(.+?)" title="Watch (.+?)"?\(?([0-9]{4})?\)?"?>' for match in re.finditer(pattern, html): url, title, year = match.groups('') result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(title), 'year': year } results.append(result) else: log_utils.log('Unable to locate PW search key', log_utils.LOGWARNING) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = urlparse.urljoin(self.base_url, '/search/' + title) #search_url = urlparse.urljoin(self.base_url, '/suggest.php') headers = {'Referer': self.base_url} headers.update(XHR) params = {'ajax': 1, 's': title, 'type': 'TVShows'} html = self._http_get(search_url, params=params, cache_limit=8) kodi.log(html) for match in re.finditer('href="([^"]+)[^>]*>(.*?)</a>', html): match_url, match_title = match.groups() match_title = re.sub('</?span[^>]*>', '', match_title) match = re.search('\((\d{4})\)$', match_url) if match: match_year = match.group(1) else: match_year = '' if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] headers = {'Referer': self.base_url} params = {'search': title} html = self._http_get(self.base_url, params=params, headers=headers, cache_limit=8) for item in dom_parser.parse_dom(html, 'div', {'class': 'listCard'}): match_title = dom_parser.parse_dom(item, 'p', {'class': 'extraTitle'}) match_url = dom_parser.parse_dom(item, 'a', ret='href') match_year = dom_parser.parse_dom(item, 'p', {'class': 'cardYear'}) if match_url and match_title: match_url = match_url[0] match_title = match_title[0] match_year = match_year[0] if match_year else '' if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def __movie_search( self, title, year, ): results = [] norm_title = scraper_utils.normalize_title(title) html = self._http_get(self.base_url, cache_limit=48) for item in self._parse_directory(html): if not item['directory']: meta = scraper_utils.parse_movie_link(item['title']) if meta['dubbed']: continue if (norm_title in scraper_utils.normalize_title( meta['title'])) and (not year or not meta['year'] or year == meta['year']): match_title = meta['title'].replace('.', ' ') match_title += ' [%sp.%s]' % (meta['height'], meta['extra']) result = { 'url': scraper_utils.pathify_url(item['link']), 'title': scraper_utils.cleanse_title(match_title), 'year': meta['year'] } results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = urlparse.urljoin( self.base_url, '/?s=%s&submit=Search+Now!' % (urllib.quote_plus(title))) headers = {'Referer': search_url} html = self._http_get(search_url, headers=headers, cache_limit=8) index = 0 if video_type == 'shows' else 1 fragments = re.findall('<h2.*?(?=<h2|$)', html, re.DOTALL) if len(fragments) > index: for item in dom_parser.parse_dom(fragments[index], 'div', {'class': 'aaa_item'}): match_title_year = dom_parser.parse_dom(item, 'a', ret='title') match_url = dom_parser.parse_dom(item, 'a', ret='href') if match_title_year and match_url: match_url = match_url[0] match_title_year = match_title_year[0] match = re.search('(.*?)\s+\((\d{4})\)', match_title_year) if match: match_title, match_year = match.groups() else: match_title = match_title_year match_year = '' if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] if video_type == 'movies': results = self.__movie_search(title, year) else: norm_title = scraper_utils.normalize_title(title) html = self._http_get(self.base_url, cache_limit=48) for item in self._parse_directory(html): if norm_title in scraper_utils.normalize_title( item['title']) and item['directory']: result = { 'url': scraper_utils.pathify_url(item['link']), 'title': scraper_utils.cleanse_title(item['title']), 'year': '' } results.append(result) return results
def search(self, video_type, title, year, season=''): search_url = urlparse.urljoin( self.base_url, '/wp-content/themes/afdah/ajax-search.php') #kodi.log(search_url) data = {'search': title, 'type': 'title'} html = self._http_get(search_url, data=data, cache_limit=1) #kodi.log(html) pattern = '<li>.*?href="([^"]+)">([^<]+)\s+\((\d{4})\)' results = [] for match in re.finditer(pattern, html, re.DOTALL | re.I): url, match_title, match_year = match.groups('') if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] folders = ['/addons/real-movies/base.xml'] norm_title = scraper_utils.normalize_title(title) for page_url in folders: #kodi.log(page_url) xml_file = os.path.basename(page_url) page_url = urlparse.urljoin(self.base_url, page_url) xml = self._http_get(page_url, require_debrid=True, cache_limit=48) new_folders = re.findall('<folder>(.*?)</folder>', xml, re.I) if new_folders: folders += [folder for folder in new_folders if folder] for match in re.finditer('<item>(.*?)</item>', xml, re.I | re.DOTALL): item = match.group(1) match_title_year = re.search('<title>(.*?)</title>', item, re.I) match_url = re.search('<link>(.*?)</link>', item, re.I) if match_title_year and match_url: match_title_year = match_title_year.group(1) match_url = match_url.group(1) if match_title_year and match_url: match_title, match_year = scraper_utils.extra_year( match_title_year) xml_file = xml_file.replace(' ', '').lower() match_url = 'xml_file=%s&link=%s' % (xml_file, match_url) if norm_title in scraper_utils.normalize_title( match_title) and (not year or not match_year or year == match_year): if 'format' in XML_META.get(xml_file, {}): match_title += ' (%s)' % ( XML_META[xml_file]['format']) result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': match_url } results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] if video_type == 'movies': url = urlparse.urljoin(self.base_url, '/movies/a-z/') else: url = urlparse.urljoin(self.base_url, '/tv/a-z/') if title.upper().startswith('THE '): search_title = title[4:5] elif title.upper().startswith('A '): search_title = title[2:3] else: search_title = title if title[:1] in string.digits: first_letter = '1' else: first_letter = search_title[:1] url = url + first_letter.upper() html = self._http_get(url, cache_limit=48) norm_title = scraper_utils.normalize_title(title) pattern = 'class=star.*?href=([^>]+)>(.*?)</a>' for match in re.finditer(pattern, html, re.DOTALL): match_url, match_title_year = match.groups() match = re.search('(.*?)\s+\((\d{4})\)', match_title_year) if match: match_title, match_year = match.groups() else: match_title = match_title_year match_year = '' if norm_title in scraper_utils.normalize_title(match_title) and ( not year or not match_year or year == match_year): result = { 'url': match_url, 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = urlparse.urljoin(self.base_url, '/search/ajax_search') html = self._http_get(search_url, params={'q': title}, headers=XHR, cache_limit=1) js_result = scraper_utils.parse_json(html, search_url) match_year = '' if 'series' in js_result: for series in js_result['series']: if 'seo' in series and 'label' in series: if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url('/' + series['seo']), 'title': scraper_utils.cleanse_title(series['label']), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = urlparse.urljoin(self.base_url, '/search') if video_type == 'movies': data = {'searchin': 'm'} else: data = {'searchin': 't'} data.update({'searchquery': title}) html = self._http_get(search_url, data=data, cache_limit=8) pattern = r'href="([^"]+)">(.*?)\s+\((\d{4})\)' for match in re.finditer(pattern, html): url, title, match_year = match.groups('') if not year or not match_year or year == match_year: url = url.replace( '/episode/', '/tv-shows/') # fix wrong url returned from search results result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = urlparse.urljoin( self.base_url, '/wp-content/themes/afdah/ajax-search.php') data = {'search': title, 'type': 'title'} html = self._http_get(search_url, data=data, headers=XHR, cache_limit=1) for item in dom_parser.parse_dom(html, 'li'): match_url = dom_parser.parse_dom(item, 'a', ret='href') match_title_year = dom_parser.parse_dom(item, 'a') if match_url and match_title_year: match_url = match_url[0] match_title, match_year = scraper_utils.extra_year( match_title_year[0]) if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = urlparse.urljoin(self.base_url, '/search/') search_url += urllib.quote_plus(title) html = self._http_get(search_url, require_debrid=True, cache_limit=1) if video_type == 'shows': seen_urls = {} for post in dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'}): if 'shows' in post: match = re.search('<span>\s*TAGS:\s*</span>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', post, re.I) if match: show_url, match_title = match.groups() if show_url not in seen_urls: result = {'url': scraper_utils.pathify_url(show_url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''} seen_urls[show_url] = result results.append(result) elif video_type == 'movies': headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html) posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'}) norm_title = scraper_utils.normalize_title(title) for heading, post in zip(headings, posts): if 'movies' in post and not self.__too_old(post): post_url, post_title = heading match = re.search('(.*?)\s*[.\[(]?(\d{4})[.)\]]?\s*(.*)', post_title) if match: match_title, match_year, extra_title = match.groups() full_title = '%s [%s]' % (match_title, extra_title) else: full_title = match_title = post_title match_year = '' match_norm_title = scraper_utils.normalize_title(match_title) if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year} results.append(result) return results
def _blog_proc_results(self, html, post_pattern, date_format, video_type, title, year): results = [] search_date = '' search_sxe = '' if video_type == VIDEO_TYPES.EPISODE: match = re.search('(.*?)\s*(S\d+E\d+)\s*', title) if match: show_title, search_sxe = match.groups() else: match = re.search( '(.*?)\s*(\d{4})[._ -]?(\d{2})[._ -]?(\d{2})\s*', title) if match: show_title, search_year, search_month, search_day = match.groups( ) search_date = '%s-%s-%s' % (search_year, search_month, search_day) search_date = scraper_utils.to_datetime( search_date, "%Y-%m-%d").date() else: show_title = title else: show_title = title today = datetime.date.today() for match in re.finditer(post_pattern, html, re.DOTALL): post_data = match.groupdict() post_title = post_data['post_title'] post_title = re.sub('<[^>]*>', '', post_title) if 'quality' in post_data: post_title += '- [%s]' % (post_data['quality']) try: filter_days = int( kodi.get_setting('%s-filter' % (self.get_name()))) except ValueError: filter_days = 0 if filter_days and date_format and 'date' in post_data: post_data['date'] = post_data['date'].strip() filter_days = datetime.timedelta(days=filter_days) post_date = scraper_utils.to_datetime(post_data['date'], date_format).date() if not post_date: log_utils.log( 'Failed date Check in %s: |%s|%s|%s|' % (self.get_name(), post_data['date'], date_format), log_utils.LOGWARNING) post_date = today if today - post_date > filter_days: continue match_year = '' match_date = '' match_sxe = '' match_title = full_title = post_title if video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(post_title) match_year = meta['year'] else: meta = scraper_utils.parse_episode_link(post_title) match_sxe = 'S%02dE%02d' % (int( meta['season']), int(meta['episode'])) match_date = meta['airdate'] match_title = meta['title'] full_title = '%s (%sp) [%s]' % (meta['title'], meta['height'], meta['extra']) norm_title = scraper_utils.normalize_title(show_title) match_norm_title = scraper_utils.normalize_title(match_title) title_match = norm_title and (match_norm_title in norm_title or norm_title in match_norm_title) year_match = not year or not match_year or year == match_year sxe_match = not search_sxe or (search_sxe == match_sxe) date_match = not search_date or (search_date == match_date) log_utils.log( 'Blog Results: |%s|%s|%s| - |%s|%s|%s| - |%s|%s|%s| - |%s|%s|%s| (%s)' % (match_norm_title, norm_title, title_match, year, match_year, year_match, search_date, match_date, date_match, search_sxe, match_sxe, sxe_match, self.get_name()), log_utils.LOGDEBUG) if title_match and year_match and date_match and sxe_match: result = { 'url': scraper_utils.pathify_url(post_data['url']), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year } results.append(result) return results
def processCaptcha(self, key, lang, name=None, referer=None): if referer is None: referer = 'https://www.google.com/recaptcha/api2/demo' headers = {'Referer': referer, 'Accept-Language': lang} html = get_url('http://www.google.com/recaptcha/api/fallback?k=%s' % (key), headers=headers) token = '' iteration = 0 while True: payload = dom_parser2.parse_dom( html, 'img', {'class': 'fbc-imageselect-payload'}, req='src') iteration += 1 message = dom_parser2.parse_dom( html, 'label', {'class': 'fbc-imageselect-message-text'}) if not message: message = dom_parser2.parse_dom( html, 'div', {'class': 'fbc-imageselect-message-error'}) if message and payload: message = message[0].content payload = payload[0].attrs['src'] else: token = dom_parser2.parse_dom( html, 'div', {'class': 'fbc-verification-token'}) if token: token = dom_parser2.parse_dom(token[0].content, 'textarea')[0].content logger.log('Captcha Success: %s' % (token), log_utils.LOGDEBUG) else: logger.log('Captcha Failed', log_utils.LOGDEBUG) break cval = dom_parser2.parse_dom(html, 'input', {'name': 'c'}, req='value') if not cval: break cval = cval[0].attrs['value'] captcha_imgurl = scraper_utils.urljoin( 'https://www.google.com', scraper_utils.cleanse_title(payload)) message = message.replace('<strong>', '[B]').replace('</strong>', '[/B]') message = re.sub(re.compile('</?(div|strong)[^>]*>', re.I), '', message) if any(c for c in ['<', '>'] if c in message): logger.log('Suspicious Captcha Prompt: %s' % (message), log_utils.LOGWARNING) oSolver = cInputWindow(captcha=captcha_imgurl, msg=message, iteration=iteration, name=name) captcha_response = oSolver.get() if not captcha_response: break data = {'c': cval, 'response': captcha_response} html = get_url( "http://www.google.com/recaptcha/api/fallback?k=%s" % (key), data=data, headers=headers) return token