def _get_episode_url(self, show_url, video): url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=2) episode_pattern = 'href="([^"]+-s0*%se0*%s(?!\d)[^"]*)' % (video.season, video.episode) parts = dom_parser2.parse_dom(html, 'ul', {'class': 'episode_list'}) fragment = '\n'.join(part.content for part in parts) result = self._default_get_episode_url(fragment, video, episode_pattern) if result: return result ep_urls = [r.attrs['href'] for r in dom_parser2.parse_dom(fragment, 'a', req='href')] ep_dates = [r.content for r in dom_parser2.parse_dom(fragment, 'span', {'class': 'episode_air_d'})] ep_titles = [r.content for r in dom_parser2.parse_dom(fragment, 'span', {'class': 'episode_name'})] force_title = scraper_utils.force_title(video) if not force_title and kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate: for ep_url, ep_date in zip(ep_urls, ep_dates): logger.log('Quikr Ep Airdate Matching: %s - %s - %s' % (ep_url, ep_date, video.ep_airdate), log_utils.LOGDEBUG) if video.ep_airdate == scraper_utils.to_datetime(ep_date, '%Y-%m-%d').date(): return scraper_utils.pathify_url(ep_url) if force_title or kodi.get_setting('title-fallback') == 'true': norm_title = scraper_utils.normalize_title(video.ep_title) for ep_url, ep_title in zip(ep_urls, ep_titles): ep_title = re.sub('<span>.*?</span>\s*', '', ep_title) logger.log('Quikr Ep Title Matching: %s - %s - %s' % (ep_url.encode('utf-8'), ep_title.encode('utf-8'), video.ep_title), log_utils.LOGDEBUG) if norm_title == scraper_utils.normalize_title(ep_title): return scraper_utils.pathify_url(ep_url)
def _get_episode_url(self, show_url, video): query = scraper_utils.parse_query(show_url) if 'id' in query: url = scraper_utils.urljoin(self.base_url, '/api/v2/shows/%s' % (query['id'])) js_data = self._http_get(url, cache_limit=.5) if 'episodes' in js_data: force_title = scraper_utils.force_title(video) if not force_title: for episode in js_data['episodes']: if int(video.season) == int(episode['season']) and int(video.episode) == int(episode['number']): return scraper_utils.pathify_url('?id=%s' % (episode['id'])) if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate: for episode in js_data['episodes']: if 'airdate' in episode: ep_airdate = scraper_utils.to_datetime(episode['airdate'], "%Y-%m-%d").date() if video.ep_airdate == (ep_airdate - datetime.timedelta(days=1)): return scraper_utils.pathify_url('?id=%s' % (episode['id'])) else: logger.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title: norm_title = scraper_utils.normalize_title(video.ep_title) for episode in js_data['episodes']: if 'name' in episode and norm_title in scraper_utils.normalize_title(episode['name']): return scraper_utils.pathify_url('?id=%s' % (episode['id']))
def __too_old(self, post): filter_days = datetime.timedelta( days=int(kodi.get_setting('%s-filter' % (self.get_name())))) post_date = post.get('post_date', '') if filter_days and post_date: today = datetime.date.today() try: post_date = scraper_utils.to_datetime( post_date, '%Y-%m-%d %H:%M:%S').date() if today - post_date > filter_days: return True except ValueError: return False return False
def __too_old(self, post): try: filter_days = datetime.timedelta( days=int(kodi.get_setting('%s-filter' % (self.get_name())))) if filter_days: post_date = re.search( 'class="postMonth"\s+title="(\d+)[^>]*>([^<]+).*?class="postDay"[^>]*>(\d+)', post, re.DOTALL) year, mon_name, day = post_date.groups() post_date = '%s-%s-%s' % (year, SHORT_MONS.index(mon_name) + 1, day) post_date = scraper_utils.to_datetime(post_date, '%Y-%m-%d').date() if datetime.date.today() - post_date > filter_days: return True except: return False return False
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = base64.decodestring(SEARCH_URL) % ( urllib.quote_plus(title)) html = self._http_get(search_url, cache_limit=2) if html: js_data = scraper_utils.parse_json(html) search_meta = scraper_utils.parse_episode_link(title) for item in js_data.get('results', []): metatags = item.get('richSnippet', {}).get('metatags', {}) post_date = metatags.get('articlePublishedTime') if post_date: post_date = re.sub('[+-]\d+:\d+$', '', post_date) post_date = scraper_utils.to_datetime( post_date, '%Y-%m-%dT%H:%M:%S').date() if self.__too_old(post_date): continue match_title = metatags.get('ogTitle', '') if not match_title: match_title = item['titleNoFormatting'] match_title = re.sub( re.compile('\s*-\s*Release\s*Scene$', re.I), '', match_title) match_url = item['url'] match_year = '' item_meta = scraper_utils.parse_episode_link(match_title) if scraper_utils.meta_release_check(video_type, search_meta, item_meta): result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) if not results: results = self.__site_search(video_type, title, year) return results
def _blog_proc_results(self, html, post_pattern, date_format, video_type, title, year): results = [] search_date = '' search_sxe = '' if video_type == VIDEO_TYPES.EPISODE: match = re.search('(.*?)\s*(S\d+E\d+)\s*', title) if match: show_title, search_sxe = match.groups() else: match = re.search( '(.*?)\s*(\d{4})[._ -]?(\d{2})[._ -]?(\d{2})\s*', title) if match: show_title, search_year, search_month, search_day = match.groups( ) search_date = '%s-%s-%s' % (search_year, search_month, search_day) search_date = scraper_utils.to_datetime( search_date, "%Y-%m-%d").date() else: show_title = title else: show_title = title today = datetime.date.today() for match in re.finditer(post_pattern, html, re.DOTALL): post_data = match.groupdict() post_title = post_data['post_title'] post_title = re.sub('<[^>]*>', '', post_title) if 'quality' in post_data: post_title += '- [%s]' % (post_data['quality']) try: filter_days = int( kodi.get_setting('%s-filter' % (self.get_name()))) except ValueError: filter_days = 0 if filter_days and date_format and 'date' in post_data: post_data['date'] = post_data['date'].strip() filter_days = datetime.timedelta(days=filter_days) post_date = scraper_utils.to_datetime(post_data['date'], date_format).date() if not post_date: logger.log( 'Failed date Check in %s: |%s|%s|%s|' % (self.get_name(), post_data['date'], date_format), log_utils.LOGWARNING) post_date = today if today - post_date > filter_days: continue match_year = '' match_date = '' match_sxe = '' match_title = full_title = post_title if video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(post_title) match_year = meta['year'] else: meta = scraper_utils.parse_episode_link(post_title) match_sxe = 'S%02dE%02d' % (int( meta['season']), int(meta['episode'])) match_date = meta['airdate'] match_title = meta['title'] full_title = '%s (%sp) [%s]' % (meta['title'], meta['height'], meta['extra']) norm_title = scraper_utils.normalize_title(show_title) match_norm_title = scraper_utils.normalize_title(match_title) title_match = norm_title and (match_norm_title in norm_title or norm_title in match_norm_title) year_match = not year or not match_year or year == match_year sxe_match = not search_sxe or (search_sxe == match_sxe) date_match = not search_date or (search_date == match_date) logger.log( 'Blog Results: |%s|%s|%s| - |%s|%s|%s| - |%s|%s|%s| - |%s|%s|%s| (%s)' % (match_norm_title, norm_title, title_match, year, match_year, year_match, search_date, match_date, date_match, search_sxe, match_sxe, sxe_match, self.get_name()), log_utils.LOGDEBUG) if title_match and year_match and date_match and sxe_match: quality = scraper_utils.height_get_quality(meta['height']) result = { 'url': scraper_utils.pathify_url(post_data['url']), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year, 'quality': quality } results.append(result) return results