def _get_episode_url(self, show_url, video): sxe = '.S%02dE%02d.' % (int(video.season), int(video.episode)) force_title = scraper_utils.force_title(video) title_fallback = kodi.get_setting('title-fallback') == 'true' norm_title = scraper_utils.normalize_title(video.ep_title) try: ep_airdate = video.ep_airdate.strftime('.%Y.%m.%d.') except: ep_airdate = '' page_url = [show_url] too_old = False while page_url and not too_old: url = urlparse.urljoin(self.base_url, page_url[0]) html = self._http_get(url, require_debrid=True, cache_limit=1) headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html) posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'}) for heading, post in zip(headings, posts): if self.__too_old(post): too_old = True break if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post: url, title = heading if not force_title: if (sxe in title) or (ep_airdate and ep_airdate in title): return scraper_utils.pathify_url(url) else: if title_fallback and norm_title: match = re.search('<strong>(.*?)</strong>', post) if match and norm_title == scraper_utils.normalize_title(match.group(1)): return scraper_utils.pathify_url(url) page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')
def _get_episode_url(self, show_url, video): force_title = scraper_utils.force_title(video) title_fallback = kodi.get_setting('title-fallback') == 'true' norm_title = scraper_utils.normalize_title(video.ep_title) page_url = [show_url] too_old = False while page_url and not too_old: url = scraper_utils.urljoin(self.base_url, page_url[0]) html = self._http_get(url, require_debrid=True, cache_limit=1) headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html) posts = [r.content for r in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})] for heading, post in zip(headings, posts): if self.__too_old(post): too_old = True break if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post: url, title = heading if not force_title: if scraper_utils.release_check(video, title, require_title=False): return scraper_utils.pathify_url(url) else: if title_fallback and norm_title: match = re.search('<strong>(.*?)</strong>', post) if match and norm_title == scraper_utils.normalize_title(match.group(1)): return scraper_utils.pathify_url(url) page_url = dom_parser2.parse_dom(html, 'a', {'class': 'nextpostslink'}, req='href') if page_url: page_url = [page_url[0].attrs['href']]
def _get_episode_url(self, video): url = urlparse.urljoin(self.base_url, '/torrent/list') js_data = self._http_get(url, cache_limit=0) norm_title = scraper_utils.normalize_title(video.title) if 'torrents' in js_data: airdate_fallback = kodi.get_setting( 'airdate-fallback') == 'true' and video.ep_airdate show_title = '' if not scraper_utils.force_title(video): for item in js_data['torrents']: sxe_pattern = '(.*?)[. ][Ss]%02d[Ee]%02d[. ]' % (int( video.season), int(video.episode)) match = re.search(sxe_pattern, item['name']) if match: show_title = match.group(1) elif airdate_fallback: airdate_pattern = '(.*?)[. ]%s[. ]%02d[. ]%02d[. ]' % ( video.ep_airdate.year, video.ep_airdate.month, video.ep_airdate.day) match = re.search(airdate_pattern, item['name']) if match: show_title = match.group(1) if show_title and norm_title in scraper_utils.normalize_title( show_title): return 'hash=%s' % (item['hash'])
def _get_episode_url(self, show_url, video): force_title = scraper_utils.force_title(video) title_fallback = kodi.get_setting('title-fallback') == 'true' norm_title = scraper_utils.normalize_title(video.ep_title) page_url = [show_url] too_old = False while page_url and not too_old: html = self._http_get(page_url[0], require_debrid=False, cache_limit=1) for _attr, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}): if self.__too_old(post): too_old = True break if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post: match = dom_parser2.parse_dom(post, 'a', req='href') if match: url, title = match[0].attrs['href'], match[0].content if not force_title: if scraper_utils.release_check(video, title, require_title=False): return scraper_utils.pathify_url(url) else: if title_fallback and norm_title: match = re.search('</strong>(.*?)</p>', post) if match and norm_title == scraper_utils.normalize_title(match.group(1)): return scraper_utils.pathify_url(url) page_url = dom_parser2.parse_dom(html, 'a', {'class': 'nextpostslink'}, req='href') if page_url: page_url = [page_url[0].attrs['href']]
def _get_episode_url(self, show_url, video): url = urlparse.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=2) if html: force_title = scraper_utils.force_title(video) episodes = dom_parser.parse_dom(html, 'div', {'class': '\s*el-item\s*'}) if not force_title: episode_pattern = 'href="([^"]*-[sS]%02d[eE]%02d(?!\d)[^"]*)' % (int(video.season), int(video.episode)) match = re.search(episode_pattern, html) if match: return scraper_utils.pathify_url(match.group(1)) if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate: airdate_pattern = '%02d-%02d-%d' % (video.ep_airdate.day, video.ep_airdate.month, video.ep_airdate.year) for episode in episodes: ep_url = dom_parser.parse_dom(episode, 'a', ret='href') ep_airdate = dom_parser.parse_dom(episode, 'div', {'class': 'date'}) if ep_url and ep_airdate: ep_airdate = ep_airdate[0].strip() if airdate_pattern == ep_airdate: return scraper_utils.pathify_url(ep_url[0]) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title: norm_title = scraper_utils.normalize_title(video.ep_title) for episode in episodes: ep_url = dom_parser.parse_dom(episode, 'a', ret='href') ep_title = dom_parser.parse_dom(episode, 'div', {'class': 'e-name'}) if ep_url and ep_title and norm_title == scraper_utils.normalize_title(ep_title[0]): return scraper_utils.pathify_url(ep_url[0])
def __find_episodes(self, video): if not scraper_utils.force_title(video): norm_title = scraper_utils.normalize_title(video.title) for item in self.__get_torrents(): match_url = self.__match_episode(video, norm_title, item['name'], item['hash']) if match_url is not None: return match_url
def _get_episode_url(self, show_url, video): params = urlparse.parse_qs(show_url) cmd = '{"jsonrpc": "2.0", "method": "VideoLibrary.GetEpisodes", "params": {"tvshowid": %s, "season": %s, "filter": {"field": "%s", "operator": "is", "value": "%s"}, \ "limits": { "start" : 0, "end": 25 }, "properties" : ["title", "season", "episode", "file", "streamdetails"], "sort": { "order": "ascending", "method": "label", "ignorearticle": true }}, "id": "libTvShows"}' base_url = 'video_type=%s&id=%s' episodes = [] force_title = scraper_utils.force_title(video) if not force_title: run = cmd % (params['id'][0], video.season, 'episode', video.episode) meta = xbmc.executeJSONRPC(run) meta = scraper_utils.parse_json(meta) log_utils.log('Episode Meta: %s' % (meta), log_utils.LOGDEBUG) if 'result' in meta and 'episodes' in meta['result']: episodes = meta['result']['episodes'] else: log_utils.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title and not episodes: run = cmd % (params['id'][0], video.season, 'title', video.ep_title) meta = xbmc.executeJSONRPC(run) meta = scraper_utils.parse_json(meta) log_utils.log('Episode Title Meta: %s' % (meta), log_utils.LOGDEBUG) if 'result' in meta and 'episodes' in meta['result']: episodes = meta['result']['episodes'] for episode in episodes: if episode['file'].endswith('.strm'): continue return base_url % (video.video_type, episode['episodeid'])
def _get_episode_url(self, show_url, video): url = urlparse.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=8) pattern = "<a[^>]*class='dropdown-toggle'[^>]*>Season\s+%s<(.*?)<li\s+class='divider'>" % (video.season) match = re.search(pattern, html, re.DOTALL) if match: fragment = match.group(1) ep_ids = dom_parser.parse_dom(fragment, 'a', {'id': 'epiloader'}, ret='class') episodes = dom_parser.parse_dom(fragment, 'a', {'id': 'epiloader'}) airdates = dom_parser.parse_dom(fragment, 'span', {'class': 'airdate'}) ep_airdate = video.ep_airdate.strftime('%Y-%m-%d') if isinstance(video.ep_airdate, datetime.date) else '' norm_title = scraper_utils.normalize_title(video.ep_title) num_id, airdate_id, title_id = '', '', '' for episode, airdate, ep_id in zip(episodes, airdates, ep_ids): if ep_airdate and ep_airdate == airdate: airdate_id = ep_id match = re.search('(?:<span[^>]*>)?(\d+)\.\s*([^<]+)', episode) if match: ep_num, ep_title = match.groups() if int(ep_num) == int(video.episode): num_id = ep_id if norm_title and norm_title in scraper_utils.normalize_title(ep_title): title_id = ep_id best_id = '' if not scraper_utils.force_title(video): if num_id: best_id = num_id if kodi.get_setting('airdate-fallback') == 'true' and airdate_id: best_id = airdate_id if kodi.get_setting('title-fallback') == 'true' and title_id: best_id = title_id else: if title_id: best_id = title_id if best_id: return EP_URL % (best_id)
def _get_episode_url(self, show_url, video): sxe = '(\.|_| )S%02dE%02d(\.|_| )' % (int(video.season), int(video.episode)) force_title = scraper_utils.force_title(video) title_fallback = kodi.get_setting('title-fallback') == 'true' norm_title = scraper_utils.normalize_title(video.ep_title) try: airdate_pattern = video.ep_airdate.strftime('(\.|_| )%Y(\.|_| )%m(\.|_| )%d(\.|_| )') except: airdate_pattern = '' page_url = [show_url] too_old = False while page_url and not too_old: url = urlparse.urljoin(self.base_url, page_url[0]) html = self._http_get(url, require_debrid=True, cache_limit=1) posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'}) for post in posts: if self.__too_old(post): too_old = True break if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post: match = re.search('<a\s+href="([^"]+)[^>]+>(.*?)</a>', post) if match: url, title = match.groups() if not force_title: if re.search(sxe, title) or (airdate_pattern and re.search(airdate_pattern, title)): return scraper_utils.pathify_url(url) else: if title_fallback and norm_title: match = re.search('</strong>(.*?)</p>', post) if match and norm_title == scraper_utils.normalize_title(match.group(1)): return scraper_utils.pathify_url(url) page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')
def _default_get_episode_url(self, html, video, episode_pattern, title_pattern='', airdate_pattern=''): logger.log('Default Episode Url: |%s|%s|' % (self.get_name(), video), log_utils.LOGDEBUG) if not html: return try: html = html[0].content except AttributeError: pass force_title = scraper_utils.force_title(video) if not force_title: if episode_pattern: match = re.search(episode_pattern, html, re.DOTALL | re.I) if match: return scraper_utils.pathify_url(match.group(1)) if kodi.get_setting('airdate-fallback') == 'true' and airdate_pattern and video.ep_airdate: airdate_pattern = airdate_pattern.replace('{year}', str(video.ep_airdate.year)) airdate_pattern = airdate_pattern.replace('{month}', str(video.ep_airdate.month)) airdate_pattern = airdate_pattern.replace('{p_month}', '%02d' % (video.ep_airdate.month)) airdate_pattern = airdate_pattern.replace('{month_name}', MONTHS[video.ep_airdate.month - 1]) airdate_pattern = airdate_pattern.replace('{short_month}', SHORT_MONS[video.ep_airdate.month - 1]) airdate_pattern = airdate_pattern.replace('{day}', str(video.ep_airdate.day)) airdate_pattern = airdate_pattern.replace('{p_day}', '%02d' % (video.ep_airdate.day)) logger.log('Air Date Pattern: %s' % (airdate_pattern), log_utils.LOGDEBUG) match = re.search(airdate_pattern, html, re.DOTALL | re.I) if match: return scraper_utils.pathify_url(match.group(1)) else: logger.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title and title_pattern: norm_title = scraper_utils.normalize_title(video.ep_title) for match in re.finditer(title_pattern, html, re.DOTALL | re.I): episode = match.groupdict() if norm_title == scraper_utils.normalize_title(episode['title']): return scraper_utils.pathify_url(episode['url'])
def _get_episode_url(self, show_url, video): url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=2) episode_pattern = 'href="([^"]+-s0*%se0*%s(?!\d)[^"]*)' % (video.season, video.episode) parts = dom_parser2.parse_dom(html, 'ul', {'class': 'episode_list'}) fragment = '\n'.join(part.content for part in parts) result = self._default_get_episode_url(fragment, video, episode_pattern) if result: return result ep_urls = [r.attrs['href'] for r in dom_parser2.parse_dom(fragment, 'a', req='href')] ep_dates = [r.content for r in dom_parser2.parse_dom(fragment, 'span', {'class': 'episode_air_d'})] ep_titles = [r.content for r in dom_parser2.parse_dom(fragment, 'span', {'class': 'episode_name'})] force_title = scraper_utils.force_title(video) if not force_title and kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate: for ep_url, ep_date in zip(ep_urls, ep_dates): logger.log('Quikr Ep Airdate Matching: %s - %s - %s' % (ep_url, ep_date, video.ep_airdate), log_utils.LOGDEBUG) if video.ep_airdate == scraper_utils.to_datetime(ep_date, '%Y-%m-%d').date(): return scraper_utils.pathify_url(ep_url) if force_title or kodi.get_setting('title-fallback') == 'true': norm_title = scraper_utils.normalize_title(video.ep_title) for ep_url, ep_title in zip(ep_urls, ep_titles): ep_title = re.sub('<span>.*?</span>\s*', '', ep_title) logger.log('Quikr Ep Title Matching: %s - %s - %s' % (ep_url.encode('utf-8'), ep_title.encode('utf-8'), video.ep_title), log_utils.LOGDEBUG) if norm_title == scraper_utils.normalize_title(ep_title): return scraper_utils.pathify_url(ep_url)
def _get_episode_url(self, show_url, video): episode_pattern = 'href="([^"]+-s0*%se0*%s(?!\d)[^"]*)' % (video.season, video.episode) result = self._default_get_episode_url(show_url, video, episode_pattern) if result: return result url = urlparse.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=2) fragment = dom_parser.parse_dom(html, "ul", {"class": "episode_list"}) if fragment: ep_urls = dom_parser.parse_dom(fragment[0], "a", ret="href") ep_dates = dom_parser.parse_dom(fragment[0], "span", {"class": "episode_air_d"}) ep_titles = dom_parser.parse_dom(fragment[0], "span", {"class": "episode_name"}) force_title = scraper_utils.force_title(video) if not force_title and kodi.get_setting("airdate-fallback") == "true" and video.ep_airdate: for ep_url, ep_date in zip(ep_urls, ep_dates): log_utils.log( "Quikr Ep Airdate Matching: %s - %s - %s" % (ep_url, ep_date, video.ep_airdate), log_utils.LOGDEBUG, ) if video.ep_airdate == scraper_utils.to_datetime(ep_date, "%Y-%m-%d").date(): return scraper_utils.pathify_url(ep_url) if force_title or kodi.get_setting("title-fallback") == "true": norm_title = scraper_utils.normalize_title(video.ep_title) for ep_url, ep_title in zip(ep_urls, ep_titles): ep_title = re.sub("<span>.*?</span>\s*", "", ep_title) log_utils.log( "Quikr Ep Title Matching: %s - %s - %s" % (ep_url, norm_title, video.ep_title), log_utils.LOGDEBUG, ) if norm_title == scraper_utils.normalize_title(ep_title): return scraper_utils.pathify_url(ep_url)
def _get_episode_url(self, show_url, video): force_title = scraper_utils.force_title(video) title_fallback = kodi.get_setting('title-fallback') == 'true' norm_title = scraper_utils.normalize_title(video.ep_title) page_url = [show_url] too_old = False while page_url and not too_old: html = self._http_get(page_url[0], require_debrid=True, cache_limit=1) for _attr, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}): if self.__too_old(post): too_old = True break if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post: match = dom_parser2.parse_dom(post, 'a', req='href') if match: url, title = match[0].attrs['href'], match[0].content if not force_title: if scraper_utils.release_check(video, title, require_title=False): return scraper_utils.pathify_url(url) else: if title_fallback and norm_title: match = re.search('</strong>(.*?)</p>', post) if match and norm_title == scraper_utils.normalize_title(match.group(1)): return scraper_utils.pathify_url(url) page_url = dom_parser2.parse_dom(html, 'a', {'class': 'nextpostslink'}, req='href') if page_url: page_url = [page_url[0].attrs['href']]
def _get_episode_url(self, show_url, video): query = scraper_utils.parse_query(show_url) if 'id' in query: url = scraper_utils.urljoin(self.base_url, '/api/v2/shows/%s' % (query['id'])) js_data = self._http_get(url, cache_limit=.5) if 'episodes' in js_data: force_title = scraper_utils.force_title(video) if not force_title: for episode in js_data['episodes']: if int(video.season) == int(episode['season']) and int(video.episode) == int(episode['number']): return scraper_utils.pathify_url('?id=%s' % (episode['id'])) if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate: for episode in js_data['episodes']: if 'airdate' in episode: ep_airdate = scraper_utils.to_datetime(episode['airdate'], "%Y-%m-%d").date() if video.ep_airdate == (ep_airdate - datetime.timedelta(days=1)): return scraper_utils.pathify_url('?id=%s' % (episode['id'])) else: logger.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title: norm_title = scraper_utils.normalize_title(video.ep_title) for episode in js_data['episodes']: if 'name' in episode and norm_title in scraper_utils.normalize_title(episode['name']): return scraper_utils.pathify_url('?id=%s' % (episode['id']))
def _get_episode_url(self, show_url, video): sxe = '(\.|_| )S%02dE%02d(\.|_| )' % (int(video.season), int(video.episode)) force_title = scraper_utils.force_title(video) title_fallback = kodi.get_setting('title-fallback') == 'true' norm_title = scraper_utils.normalize_title(video.ep_title) try: airdate_pattern = video.ep_airdate.strftime('(\.|_| )%Y(\.|_| )%m(\.|_| )%d(\.|_| )') except: airdate_pattern = '' page_url = [show_url] too_old = False while page_url and not too_old: url = urlparse.urljoin(self.base_url, page_url[0]) html = self._http_get(url, cache_limit=1) headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html) posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'}) for heading, post in zip(headings, posts): if self.__too_old(post): too_old = True break if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post: url, title = heading if not force_title: if re.search(sxe, title) or (airdate_pattern and re.search(airdate_pattern, title)): return scraper_utils.pathify_url(url) else: if title_fallback and norm_title: match = re.search('</strong>(.*?)</p>', post) if match and norm_title == scraper_utils.normalize_title(match.group(1)): return scraper_utils.pathify_url(url) page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')
def _get_episode_url(self, show_url, video): force_title = scraper_utils.force_title(video) title_fallback = kodi.get_setting('title-fallback') == 'true' norm_title = scraper_utils.normalize_title(video.ep_title) page_url = [show_url] too_old = False while page_url and not too_old: url = urlparse.urljoin(self.base_url, page_url[0]) html = self._http_get(url, require_debrid=True, cache_limit=1) posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'}) for post in posts: if self.__too_old(post): too_old = True break if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post: match = re.search('<a\s+href="([^"]+)[^>]+>(.*?)</a>', post) if match: url, title = match.groups() if not force_title: if scraper_utils.release_check(video, title, require_title=False): return scraper_utils.pathify_url(url) else: if title_fallback and norm_title: match = re.search('</strong>(.*?)</p>', post) if match and norm_title == scraper_utils.normalize_title(match.group(1)): return scraper_utils.pathify_url(url) page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')
def _default_get_episode_url(self, html, video, episode_pattern, title_pattern='', airdate_pattern=''): logger.log('Default Episode Url: |%s|%s|' % (self.get_name(), video), log_utils.LOGDEBUG) if not html: return try: html = html[0].content except AttributeError: pass force_title = scraper_utils.force_title(video) if not force_title: if episode_pattern: match = re.search(episode_pattern, html, re.DOTALL | re.I) if match: return scraper_utils.pathify_url(match.group(1)) if kodi.get_setting( 'airdate-fallback' ) == 'true' and airdate_pattern and video.ep_airdate: airdate_pattern = airdate_pattern.replace( '{year}', str(video.ep_airdate.year)) airdate_pattern = airdate_pattern.replace( '{month}', str(video.ep_airdate.month)) airdate_pattern = airdate_pattern.replace( '{p_month}', '%02d' % (video.ep_airdate.month)) airdate_pattern = airdate_pattern.replace( '{month_name}', MONTHS[video.ep_airdate.month - 1]) airdate_pattern = airdate_pattern.replace( '{short_month}', SHORT_MONS[video.ep_airdate.month - 1]) airdate_pattern = airdate_pattern.replace( '{day}', str(video.ep_airdate.day)) airdate_pattern = airdate_pattern.replace( '{p_day}', '%02d' % (video.ep_airdate.day)) logger.log('Air Date Pattern: %s' % (airdate_pattern), log_utils.LOGDEBUG) match = re.search(airdate_pattern, html, re.DOTALL | re.I) if match: return scraper_utils.pathify_url(match.group(1)) else: logger.log( 'Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title and title_pattern: norm_title = scraper_utils.normalize_title(video.ep_title) for match in re.finditer(title_pattern, html, re.DOTALL | re.I): episode = match.groupdict() if norm_title == scraper_utils.normalize_title( episode['title']): return scraper_utils.pathify_url(episode['url'])
def _blog_get_url(self, video, delim="."): url = None self.create_db_connection() result = self.db_connection.get_related_url( video.video_type, video.title, video.year, self.get_name(), video.season, video.episode ) if result: url = result[0][0] log_utils.log( "Got local related url: |%s|%s|%s|%s|%s|" % (video.video_type, video.title, video.year, self.get_name(), url) ) else: select = int(kodi.get_setting("%s-select" % (self.get_name()))) if video.video_type == VIDEO_TYPES.EPISODE: temp_title = re.sub("[^A-Za-z0-9 ]", "", video.title) if not scraper_utils.force_title(video): search_title = "%s S%02dE%02d" % (temp_title, int(video.season), int(video.episode)) if isinstance(video.ep_airdate, datetime.date): fallback_search = "%s %s" % ( temp_title, video.ep_airdate.strftime("%Y{0}%m{0}%d".format(delim)), ) else: fallback_search = "" else: if not video.ep_title: return None search_title = "%s %s" % (temp_title, video.ep_title) fallback_search = "" else: search_title = "%s %s" % (video.title, video.year) fallback_search = "" results = self.search(video.video_type, search_title, video.year) if not results and fallback_search: results = self.search(video.video_type, fallback_search, video.year) if results: # TODO: First result isn't always the most recent... best_result = results[0] if select != 0: best_qorder = 0 for result in results: match = re.search("\[(.*)\]$", result["title"]) if match: q_str = match.group(1) quality = scraper_utils.blog_get_quality(video, q_str, "") # print 'result: |%s|%s|%s|%s|' % (result, q_str, quality, Q_ORDER[quality]) if Q_ORDER[quality] > best_qorder: # print 'Setting best as: |%s|%s|%s|%s|' % (result, q_str, quality, Q_ORDER[quality]) best_result = result best_qorder = Q_ORDER[quality] url = best_result["url"] self.db_connection.set_related_url(video.video_type, video.title, video.year, self.get_name(), url) return url
def __find_episodes(self, video): url = urlparse.urljoin(self.base_url, LIST_URL) js_data = self._http_get(url, cache_limit=0) if 'transfers' in js_data: if not scraper_utils.force_title(video): norm_title = scraper_utils.normalize_title(video.title) for item in js_data['transfers']: match_url = self.__match_episode(video, norm_title, item['name'], item['hash']) if match_url is not None: return match_url
def _get_episode_url(self, show_url, video): log_utils.log( 'WS Episode Url: |%s|%s|' % (show_url, str(video).decode('utf-8', 'replace')), log_utils.LOGDEBUG) html = self._http_get(show_url, cache_limit=2) js_result = scraper_utils.parse_json(html, show_url) if 'results' in js_result and '0' in js_result[ 'results'] and 'episodes' in js_result['results']['0']: seasons = js_result['results']['0']['episodes'] force_title = scraper_utils.force_title(video) if not force_title: if str(video.season) in seasons: season = seasons[str(video.season)] if isinstance(season, list): season = dict((ep['episode'], ep) for ep in season) if str(video.episode) in season: url = season[str(video.episode)]['url'] return scraper_utils.pathify_url( url.replace('/json', '')) if kodi.get_setting( 'airdate-fallback') == 'true' and video.ep_airdate: airdate_pattern = video.ep_airdate.strftime('%d/%M/%Y') for season in seasons: if season.lower() == 'epcount': continue episodes = seasons[season] if isinstance(episodes, dict): episodes = [episodes[key] for key in episodes] for episode in episodes: if airdate_pattern == episode['release']: url = episode['url'] return scraper_utils.pathify_url( url.replace('/json', '')) else: log_utils.log( 'Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title: norm_title = scraper_utils.normalize_title(video.ep_title) for season in seasons: if season.lower() == 'epcount': continue episodes = seasons[season] if isinstance(episodes, dict): episodes = [episodes[key] for key in episodes] for episode in episodes: if episode[ 'name'] is not None and norm_title == scraper_utils.normalize_title( episode['name']): url = episode['url'] return scraper_utils.pathify_url( url.replace('/json', ''))
def _blog_get_url(self, video, delim='.'): url = None result = self.db_connection().get_related_url(video.video_type, video.title, video.year, self.get_name(), video.season, video.episode) if result: url = result[0][0] logger.log('Got local related url: |%s|%s|%s|%s|%s|' % (video.video_type, video.title, video.year, self.get_name(), url), log_utils.LOGDEBUG) else: try: select = int(kodi.get_setting('%s-select' % (self.get_name()))) except: select = 0 if video.video_type == VIDEO_TYPES.EPISODE: temp_title = re.sub('[^A-Za-z0-9 ]', '', video.title) if not scraper_utils.force_title(video): search_title = '%s S%02dE%02d' % (temp_title, int(video.season), int(video.episode)) if isinstance(video.ep_airdate, datetime.date): fallback_search = '%s %s' % (temp_title, video.ep_airdate.strftime('%Y{0}%m{0}%d'.format(delim))) else: fallback_search = '' else: if not video.ep_title: return None search_title = '%s %s' % (temp_title, video.ep_title) fallback_search = '' else: search_title = video.title fallback_search = '' results = self.search(video.video_type, search_title, video.year) if not results and fallback_search: results = self.search(video.video_type, fallback_search, video.year) if results: # TODO: First result isn't always the most recent... best_result = results[0] if select != 0: best_qorder = 0 for result in results: if 'quality' in result: quality = result['quality'] else: match = re.search('\((\d+p)\)', result['title']) if match: quality = scraper_utils.height_get_quality(match.group(1)) else: match = re.search('\[(.*)\]$', result['title']) q_str = match.group(1) if match else '' quality = scraper_utils.blog_get_quality(video, q_str, '') logger.log('result: |%s|%s|%s|' % (result, quality, Q_ORDER[quality]), log_utils.LOGDEBUG) if Q_ORDER[quality] > best_qorder: logger.log('Setting best as: |%s|%s|%s|' % (result, quality, Q_ORDER[quality]), log_utils.LOGDEBUG) best_result = result best_qorder = Q_ORDER[quality] url = best_result['url'] self.db_connection().set_related_url(video.video_type, video.title, video.year, self.get_name(), url, video.season, video.episode) return url
def _get_episode_url(self, show_url, video): if not scraper_utils.force_title(video): url = urlparse.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=2) season_div = 'episodediv%s' % (video.season) fragment = dom_parser.parse_dom(html, 'div', {'id': season_div}) if fragment: pattern = 'value="([^"]+)[^>]*>Episode %s\s*<' % (video.episode) match = re.search(pattern, fragment[0], re.I) if match: return scraper_utils.pathify_url(match.group(1))
def get_url(self, video): url = super(self.__class__, self).get_url(video) # check each torrent to see if it's an episode if there is no season url if url is None and video.video_type == VIDEO_TYPES.EPISODE: if not scraper_utils.force_title(video): for item in self.__get_torrents(): if scraper_utils.release_check(video, item['name']): return 'hash=%s' % (item['hash']) return url
def _get_episode_url(self, show_url, video): if not scraper_utils.force_title(video): url = urlparse.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=.25) match = re.search('<div id="episodediv%s"(.*?)</div>' % (video.season), html, re.DOTALL) if match: fragment = match.group(1) pattern = 'value="([^"]+)">Episode %s<' % (video.episode) match = re.search(pattern, fragment) if match: return scraper_utils.pathify_url(match.group(1))
def _get_episode_url(self, show_url, video): force_title = scraper_utils.force_title(video) if not force_title: show_url = urlparse.urljoin(self.base_url, show_url) html = self._http_get(show_url, cache_limit=24) match = re.search('href="(S%02d/)"' % (int(video.season)), html) if match: season_url = urlparse.urljoin(show_url, match.group(1)) for item in self.__get_files(season_url, cache_limit=1): match = re.search('(\.|_| )S%02d(\.|_| )?E%02d(\.|_| )' % (int(video.season), int(video.episode)), item['title'], re.I) if match: return scraper_utils.pathify_url(season_url)
def _get_episode_url(self, show_url, video): if not scraper_utils.force_title(video): url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=2) season_div = 'episodediv%s' % (video.season) fragment = dom_parser2.parse_dom(html, 'div', {'id': season_div}) if not fragment: return pattern = 'value="([^"]+)[^>]*>Episode %s\s*<' % (video.episode) match = re.search(pattern, fragment[0].content, re.I) if not match: return return scraper_utils.pathify_url(match.group(1))
def _get_episode_url(self, show_url, video): force_title = scraper_utils.force_title(video) if not force_title: show_url = urlparse.urljoin(self.base_url, show_url) html = self._http_get(show_url, cache_limit=24) match = re.search('href="(S0*%s/)"' % (int(video.season)), html, re.I) if match: season_url = urlparse.urljoin(show_url, match.group(1)) for item in self.__get_files(season_url, cache_limit=1): if '720p' in item['link']: continue match = re.search('[._ -]S%02d[._ -]?E%02d[^\d]' % (int(video.season), int(video.episode)), item['title'], re.I) if match: return scraper_utils.pathify_url(item['url'])
def _get_episode_url(self, show_url, video): force_title = scraper_utils.force_title(video) if not force_title: show_url = urlparse.urljoin(self.base_url, show_url) html = self._http_get(show_url, cache_limit=24) match = re.search('href="(S0*%s/)"' % (int(video.season)), html, re.I) if match: season_url = urlparse.urljoin(show_url, match.group(1)) for item in self._get_files(season_url, cache_limit=1): if '720p' in item['link']: continue match = re.search('[._ -]S%02d[._ -]?E%02d[^\d]' % (int(video.season), int(video.episode)), item['title'], re.I) if match: return scraper_utils.pathify_url(item['url'])
def _get_episode_url(self, show_url, video): if not scraper_utils.force_title(video): url = urlparse.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=.25) match = re.search( '<div id="episodediv%s"(.*?)</div>' % (video.season), html, re.DOTALL) if match: fragment = match.group(1) pattern = 'value="([^"]+)">Episode %s<' % (video.episode) match = re.search(pattern, fragment) if match: return scraper_utils.pathify_url(match.group(1))
def _get_episode_url(self, show_url, video): force_title = scraper_utils.force_title(video) if not force_title: show_url = self.base_url + show_url html = self._http_get(show_url, cache_limit=48) match = re.search('href="(S%02d/)"' % (int(video.season)), html) if match: season_url = urlparse.urljoin(show_url, match.group(1)) else: season_url = show_url for item in self._get_files(season_url, cache_limit=8): if self.__episode_match(item, video): return scraper_utils.pathify_url(season_url)
def _blog_get_url(self, video, delim='.'): url = None self.create_db_connection() result = self.db_connection.get_related_url(video.video_type, video.title, video.year, self.get_name(), video.season, video.episode) if result: url = result[0][0] log_utils.log('Got local related url: |%s|%s|%s|%s|%s|' % (video.video_type, video.title, video.year, self.get_name(), url)) else: select = int(kodi.get_setting('%s-select' % (self.get_name()))) if video.video_type == VIDEO_TYPES.EPISODE: temp_title = re.sub('[^A-Za-z0-9 ]', '', video.title) if not scraper_utils.force_title(video): search_title = '%s S%02dE%02d' % (temp_title, int(video.season), int(video.episode)) if isinstance(video.ep_airdate, datetime.date): fallback_search = '%s %s' % (temp_title, video.ep_airdate.strftime('%Y{0}%m{0}%d'.format(delim))) else: fallback_search = '' else: if not video.ep_title: return None search_title = '%s %s' % (temp_title, video.ep_title) fallback_search = '' else: search_title = '%s %s' % (video.title, video.year) fallback_search = '' results = self.search(video.video_type, search_title, video.year) if not results and fallback_search: results = self.search(video.video_type, fallback_search, video.year) if results: # TODO: First result isn't always the most recent... best_result = results[0] if select != 0: best_qorder = 0 for result in results: match = re.search('\[(.*)\]$', result['title']) if match: q_str = match.group(1) quality = scraper_utils.blog_get_quality(video, q_str, '') log_utils.log('result: |%s|%s|%s|%s|' % (result, q_str, quality, Q_ORDER[quality]), log_utils.LOGDEBUG) if Q_ORDER[quality] > best_qorder: log_utils.log('Setting best as: |%s|%s|%s|%s|' % (result, q_str, quality, Q_ORDER[quality]), log_utils.LOGDEBUG) best_result = result best_qorder = Q_ORDER[quality] url = best_result['url'] self.db_connection.set_related_url(video.video_type, video.title, video.year, self.get_name(), url) return url
def _default_get_episode_url( self, show_url, video, episode_pattern, title_pattern="", airdate_pattern="", data=None, headers=None ): log_utils.log( "Default Episode Url: |%s|%s|%s|%s|" % (self.base_url, show_url, str(video).decode("utf-8", "replace"), data), log_utils.LOGDEBUG, ) if not show_url.startswith("http"): url = urlparse.urljoin(self.base_url, show_url) else: url = show_url html = self._http_get(url, data=data, headers=headers, cache_limit=2) if html: force_title = scraper_utils.force_title(video) if not force_title: if episode_pattern: match = re.search(episode_pattern, html, re.DOTALL) if match: return scraper_utils.pathify_url(match.group(1)) if kodi.get_setting("airdate-fallback") == "true" and airdate_pattern and video.ep_airdate: airdate_pattern = airdate_pattern.replace("{year}", str(video.ep_airdate.year)) airdate_pattern = airdate_pattern.replace("{month}", str(video.ep_airdate.month)) airdate_pattern = airdate_pattern.replace("{p_month}", "%02d" % (video.ep_airdate.month)) airdate_pattern = airdate_pattern.replace("{month_name}", MONTHS[video.ep_airdate.month - 1]) airdate_pattern = airdate_pattern.replace("{short_month}", SHORT_MONS[video.ep_airdate.month - 1]) airdate_pattern = airdate_pattern.replace("{day}", str(video.ep_airdate.day)) airdate_pattern = airdate_pattern.replace("{p_day}", "%02d" % (video.ep_airdate.day)) log_utils.log("Air Date Pattern: %s" % (airdate_pattern), log_utils.LOGDEBUG) match = re.search(airdate_pattern, html, re.DOTALL | re.I) if match: return scraper_utils.pathify_url(match.group(1)) else: log_utils.log( "Skipping S&E matching as title search is forced on: %s" % (video.trakt_id), log_utils.LOGDEBUG ) if (force_title or kodi.get_setting("title-fallback") == "true") and video.ep_title and title_pattern: norm_title = scraper_utils.normalize_title(video.ep_title) for match in re.finditer(title_pattern, html, re.DOTALL | re.I): episode = match.groupdict() if norm_title == scraper_utils.normalize_title(episode["title"]): return scraper_utils.pathify_url(episode["url"])
def __episode_match(self, video, show_url): show_url = scraper_utils.urljoin(self.base_url, show_url) headers = {'Referer': ''} html = self._http_get(show_url, headers=headers, cache_limit=2) force_title = scraper_utils.force_title(video) if not force_title: match = dom_parser2.parse_dom(html, 'li', {'id': 'season%s-%s' % (video.season, video.episode)}) if match: return match[0].content if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title: norm_title = scraper_utils.normalize_title(video.ep_title) for episode in re.finditer('''<li\s+id=['"]?season\d+-\d+['"]?>.*?</ul>''', html, re.DOTALL): ep_title = dom_parser2.parse_dom(episode.group(0), 'h2') if ep_title and norm_title == scraper_utils.normalize_title(ep_title[0].content): return episode
def _get_episode_url(self, show_url, video): url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=2) episode_pattern = 'href="([^"]+-s0*%se0*%s(?!\d)[^"]*)' % ( video.season, video.episode) parts = dom_parser2.parse_dom(html, 'ul', {'class': 'episode_list'}) fragment = '\n'.join(part.content for part in parts) result = self._default_get_episode_url(fragment, video, episode_pattern) if result: return result ep_urls = [ r.attrs['href'] for r in dom_parser2.parse_dom(fragment, 'a', req='href') ] ep_dates = [ r.content for r in dom_parser2.parse_dom( fragment, 'span', {'class': 'episode_air_d'}) ] ep_titles = [ r.content for r in dom_parser2.parse_dom(fragment, 'span', {'class': 'episode_name'}) ] force_title = scraper_utils.force_title(video) if not force_title and kodi.get_setting( 'airdate-fallback') == 'true' and video.ep_airdate: for ep_url, ep_date in zip(ep_urls, ep_dates): logger.log( 'Quikr Ep Airdate Matching: %s - %s - %s' % (ep_url, ep_date, video.ep_airdate), log_utils.LOGDEBUG) if video.ep_airdate == scraper_utils.to_datetime( ep_date, '%Y-%m-%d').date(): return scraper_utils.pathify_url(ep_url) if force_title or kodi.get_setting('title-fallback') == 'true': norm_title = scraper_utils.normalize_title(video.ep_title) for ep_url, ep_title in zip(ep_urls, ep_titles): ep_title = re.sub('<span>.*?</span>\s*', '', ep_title) logger.log( 'Quikr Ep Title Matching: %s - %s - %s' % (ep_url.encode('utf-8'), ep_title.encode('utf-8'), video.ep_title), log_utils.LOGDEBUG) if norm_title == scraper_utils.normalize_title(ep_title): return scraper_utils.pathify_url(ep_url)
def _get_episode_url(self, show_url, video): url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=8) pattern = "<a[^>]*class='dropdown-toggle'[^>]*>Season\s+%s<(.*?)<li\s+class='divider'>" % ( video.season) match = re.search(pattern, html, re.DOTALL) if not match: return fragment = match.group(1) episodes = dom_parser2.parse_dom(fragment, 'a', {'id': 'epiloader'}, req='class') airdates = dom_parser2.parse_dom(fragment, 'span', {'class': 'airdate'}) ep_airdate = video.ep_airdate.strftime('%Y-%m-%d') if isinstance( video.ep_airdate, datetime.date) else '' norm_title = scraper_utils.normalize_title(video.ep_title) num_id, airdate_id, title_id = '', '', '' for episode, airdate in zip(episodes, airdates): ep_id = episode.attrs['class'] episode = episode.content if ep_airdate and ep_airdate == airdate: airdate_id = ep_id match = re.search('(?:<span[^>]*>)?(\d+)\.\s*([^<]+)', episode) if match: ep_num, ep_title = match.groups() if int(ep_num) == int(video.episode): num_id = ep_id if norm_title and norm_title in scraper_utils.normalize_title( ep_title): title_id = ep_id best_id = '' if not scraper_utils.force_title(video): if num_id: best_id = num_id if kodi.get_setting('airdate-fallback') == 'true' and airdate_id: best_id = airdate_id if kodi.get_setting('title-fallback') == 'true' and title_id: best_id = title_id else: if title_id: best_id = title_id if best_id: return EP_URL % (best_id)
def _get_episode_url(self, show_url, video): log_utils.log('WS Episode Url: |%s|%s|' % (show_url, str(video).decode('utf-8', 'replace')), log_utils.LOGDEBUG) html = self._http_get(show_url, cache_limit=2) js_result = scraper_utils.parse_json(html, show_url) if 'results' in js_result and '0' in js_result['results'] and 'episodes' in js_result['results']['0']: seasons = js_result['results']['0']['episodes'] force_title = scraper_utils.force_title(video) if not force_title: if str(video.season) in seasons: season = seasons[str(video.season)] if isinstance(season, list): season = dict((ep['episode'], ep) for ep in season) if str(video.episode) in season: url = season[str(video.episode)]['url'] return scraper_utils.pathify_url(url.replace('/json', '')) if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate: airdate_pattern = video.ep_airdate.strftime('%d/%M/%Y') for season in seasons: if season.lower() == 'epcount': continue episodes = seasons[season] if isinstance(episodes, dict): episodes = [episodes[key] for key in episodes] for episode in episodes: if airdate_pattern == episode['release']: url = episode['url'] return scraper_utils.pathify_url(url.replace('/json', '')) else: log_utils.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title: norm_title = scraper_utils.normalize_title(video.ep_title) for season in seasons: if season.lower() == 'epcount': continue episodes = seasons[season] if isinstance(episodes, dict): episodes = [episodes[key] for key in episodes] for episode in episodes: if episode['name'] is not None and norm_title == scraper_utils.normalize_title(episode['name']): url = episode['url'] return scraper_utils.pathify_url(url.replace('/json', ''))
def _get_episode_url(self, show_url, video): url = urlparse.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=1) match = re.search("var\s+id\s*=\s*'?(\d+)'?", html) if match: show_id = match.group(1) season_url = SEASON_URL % (show_id, video.season, str(int(time.time()) * 1000), self.__get_token()) season_url = urlparse.urljoin(self.base_url, season_url) html = self._http_get(season_url, cache_limit=1) js_data = scraper_utils.parse_json(html, season_url) force_title = scraper_utils.force_title(video) if not force_title: for episode in js_data: if int(episode['episode_number']) == int(video.episode): return LINK_URL % (show_id, video.season, episode['episode_number'], show_url) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title: norm_title = scraper_utils.normalize_title(video.ep_title) for episode in js_data: if norm_title == scraper_utils.normalize_title(episode['title']): return LINK_URL % (show_id, video.season, episode['episode_number'], show_url)
def _get_episode_url(self, video): url = urlparse.urljoin(self.base_url, '/torrent/list') js_data = self._http_get(url, cache_limit=0) norm_title = scraper_utils.normalize_title(video.title) if 'torrents' in js_data: airdate_fallback = kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate show_title = '' if not scraper_utils.force_title(video): for item in js_data['torrents']: sxe_pattern = '(.*?)[. ][Ss]%02d[Ee]%02d[. ]' % (int(video.season), int(video.episode)) match = re.search(sxe_pattern, item['name']) if match: show_title = match.group(1) elif airdate_fallback: airdate_pattern = '(.*?)[. ]%s[. ]%02d[. ]%02d[. ]' % (video.ep_airdate.year, video.ep_airdate.month, video.ep_airdate.day) match = re.search(airdate_pattern, item['name']) if match: show_title = match.group(1) if show_title and norm_title in scraper_utils.normalize_title(show_title): return 'hash=%s' % (item['hash'])
def _get_episode_url(self, show_url, video): query = scraper_utils.parse_query(show_url) if 'id' in query: url = scraper_utils.urljoin(self.base_url, '/api/v2/shows/%s' % (query['id'])) js_data = self._http_get(url, cache_limit=.5) if 'episodes' in js_data: force_title = scraper_utils.force_title(video) if not force_title: for episode in js_data['episodes']: if int(video.season) == int(episode['season']) and int( video.episode) == int(episode['number']): return scraper_utils.pathify_url('?id=%s' % (episode['id'])) if kodi.get_setting( 'airdate-fallback') == 'true' and video.ep_airdate: for episode in js_data['episodes']: if 'airdate' in episode: ep_airdate = scraper_utils.to_datetime( episode['airdate'], "%Y-%m-%d").date() if video.ep_airdate == ( ep_airdate - datetime.timedelta(days=1)): return scraper_utils.pathify_url( '?id=%s' % (episode['id'])) else: logger.log( 'Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title: norm_title = scraper_utils.normalize_title(video.ep_title) for episode in js_data['episodes']: if 'name' in episode and norm_title in scraper_utils.normalize_title( episode['name']): return scraper_utils.pathify_url('?id=%s' % (episode['id']))
def _default_get_episode_url(self, show_url, video, episode_pattern, title_pattern='', airdate_pattern='', data=None, headers=None, method=None): if isinstance(show_url, unicode): show_url = show_url.encode('utf-8') log_utils.log('Default Episode Url: |%s|%s|%s|%s|' % (self.base_url, show_url, str(video), data), log_utils.LOGDEBUG) if not show_url.startswith('http'): url = urlparse.urljoin(self.base_url, show_url) else: url = show_url html = self._http_get(url, data=data, headers=headers, method=method, cache_limit=2) if html: force_title = scraper_utils.force_title(video) if not force_title: if episode_pattern: match = re.search(episode_pattern, html, re.DOTALL) if match: return scraper_utils.pathify_url(match.group(1)) if kodi.get_setting('airdate-fallback') == 'true' and airdate_pattern and video.ep_airdate: airdate_pattern = airdate_pattern.replace('{year}', str(video.ep_airdate.year)) airdate_pattern = airdate_pattern.replace('{month}', str(video.ep_airdate.month)) airdate_pattern = airdate_pattern.replace('{p_month}', '%02d' % (video.ep_airdate.month)) airdate_pattern = airdate_pattern.replace('{month_name}', MONTHS[video.ep_airdate.month - 1]) airdate_pattern = airdate_pattern.replace('{short_month}', SHORT_MONS[video.ep_airdate.month - 1]) airdate_pattern = airdate_pattern.replace('{day}', str(video.ep_airdate.day)) airdate_pattern = airdate_pattern.replace('{p_day}', '%02d' % (video.ep_airdate.day)) log_utils.log('Air Date Pattern: %s' % (airdate_pattern), log_utils.LOGDEBUG) match = re.search(airdate_pattern, html, re.DOTALL | re.I) if match: return scraper_utils.pathify_url(match.group(1)) else: log_utils.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title and title_pattern: norm_title = scraper_utils.normalize_title(video.ep_title) for match in re.finditer(title_pattern, html, re.DOTALL | re.I): episode = match.groupdict() if norm_title == scraper_utils.normalize_title(episode['title']): return scraper_utils.pathify_url(episode['url'])
def _default_get_episode_url(self, show_url, video, episode_pattern, title_pattern='', airdate_pattern='', data=None, headers=None, method=None): log_utils.log('Default Episode Url: |%s|%s|%s|%s|' % (self.base_url, show_url, str(video), data), log_utils.LOGDEBUG) if not show_url.startswith('http'): url = urlparse.urljoin(self.base_url, show_url) else: url = show_url html = self._http_get(url, data=data, headers=headers, method=method, cache_limit=2) if html: force_title = scraper_utils.force_title(video) if not force_title: if episode_pattern: match = re.search(episode_pattern, html, re.DOTALL) if match: return scraper_utils.pathify_url(match.group(1)) if kodi.get_setting('airdate-fallback') == 'true' and airdate_pattern and video.ep_airdate: airdate_pattern = airdate_pattern.replace('{year}', str(video.ep_airdate.year)) airdate_pattern = airdate_pattern.replace('{month}', str(video.ep_airdate.month)) airdate_pattern = airdate_pattern.replace('{p_month}', '%02d' % (video.ep_airdate.month)) airdate_pattern = airdate_pattern.replace('{month_name}', MONTHS[video.ep_airdate.month - 1]) airdate_pattern = airdate_pattern.replace('{short_month}', SHORT_MONS[video.ep_airdate.month - 1]) airdate_pattern = airdate_pattern.replace('{day}', str(video.ep_airdate.day)) airdate_pattern = airdate_pattern.replace('{p_day}', '%02d' % (video.ep_airdate.day)) log_utils.log('Air Date Pattern: %s' % (airdate_pattern), log_utils.LOGDEBUG) match = re.search(airdate_pattern, html, re.DOTALL | re.I) if match: return scraper_utils.pathify_url(match.group(1)) else: log_utils.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title and title_pattern: norm_title = scraper_utils.normalize_title(video.ep_title) for match in re.finditer(title_pattern, html, re.DOTALL | re.I): episode = match.groupdict() if norm_title == scraper_utils.normalize_title(episode['title']): return scraper_utils.pathify_url(episode['url'])
def __episode_match(self, video, show_url): show_url = scraper_utils.urljoin(self.base_url, show_url) headers = {'Referer': ''} html = self._http_get(show_url, headers=headers, cache_limit=2) force_title = scraper_utils.force_title(video) if not force_title: match = dom_parser2.parse_dom( html, 'li', {'id': 'season%s-%s' % (video.season, video.episode)}) if match: return match[0].content if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title: norm_title = scraper_utils.normalize_title(video.ep_title) for episode in re.finditer( '''<li\s+id=['"]?season\d+-\d+['"]?>.*?</ul>''', html, re.DOTALL): ep_title = dom_parser2.parse_dom(episode.group(0), 'h2') if ep_title and norm_title == scraper_utils.normalize_title( ep_title[0].content): return episode
def _get_episode_url(self, show_url, video): force_title = scraper_utils.force_title(video) if not force_title and self.__match_episode(show_url, video): return scraper_utils.pathify_url(show_url)