def _get_episode_url(self, show_url, video): params = urlparse.parse_qs(show_url) cmd = '{"jsonrpc": "2.0", "method": "VideoLibrary.GetEpisodes", "params": {"tvshowid": %s, "season": %s, "filter": {"field": "%s", "operator": "is", "value": "%s"}, \ "limits": { "start" : 0, "end": 25 }, "properties" : ["title", "season", "episode", "file", "streamdetails"], "sort": { "order": "ascending", "method": "label", "ignorearticle": true }}, "id": "libTvShows"}' base_url = 'video_type=%s&id=%s' episodes = [] force_title = self._force_title(video) if not force_title: run = cmd % (params['id'][0], video.season, 'episode', video.episode) meta = xbmc.executeJSONRPC(run) meta = json.loads(meta) log_utils.log('Episode Meta: %s' % (meta), log_utils.LOGDEBUG) if 'result' in meta and 'episodes' in meta['result']: episodes = meta['result']['episodes'] else: log_utils.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title and not episodes: run = cmd % (params['id'][0], video.season, 'title', video.ep_title) meta = xbmc.executeJSONRPC(run) meta = json.loads(meta) log_utils.log('Episode Title Meta: %s' % (meta), log_utils.LOGDEBUG) if 'result' in meta and 'episodes' in meta['result']: episodes = meta['result']['episodes'] for episode in episodes: if episode['file'].endswith('.strm'): continue return base_url % (video.video_type, episode['episodeid'])
def _get_episode_url(self, show_url, video): url = urlparse.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=2) if html: force_title = scraper_utils.force_title(video) episodes = dom_parser.parse_dom(html, 'div', {'class': '\s*el-item\s*'}) if not force_title: episode_pattern = 'href="([^"]*-[sS]%02d[eE]%02d(?!\d)[^"]*)' % (int(video.season), int(video.episode)) match = re.search(episode_pattern, html) if match: return scraper_utils.pathify_url(match.group(1)) if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate: airdate_pattern = '%02d-%02d-%d' % (video.ep_airdate.day, video.ep_airdate.month, video.ep_airdate.year) for episode in episodes: ep_url = dom_parser.parse_dom(episode, 'a', ret='href') ep_airdate = dom_parser.parse_dom(episode, 'div', {'class': 'date'}) if ep_url and ep_airdate: ep_airdate = ep_airdate[0].strip() if airdate_pattern == ep_airdate: return scraper_utils.pathify_url(ep_url[0]) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title: norm_title = scraper_utils.normalize_title(video.ep_title) for episode in episodes: ep_url = dom_parser.parse_dom(episode, 'a', ret='href') ep_title = dom_parser.parse_dom(episode, 'div', {'class': 'e-name'}) if ep_url and ep_title and norm_title == scraper_utils.normalize_title(ep_title[0]): return scraper_utils.pathify_url(ep_url[0])
def _get_episode_url(self, show_url, video): url = urlparse.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=8) pattern = "<a[^>]*class='dropdown-toggle'[^>]*>Season\s+%s<(.*?)<li\s+class='divider'>" % (video.season) match = re.search(pattern, html, re.DOTALL) if match: fragment = match.group(1) ep_ids = dom_parser.parse_dom(fragment, 'a', {'id': 'epiloader'}, ret='class') episodes = dom_parser.parse_dom(fragment, 'a', {'id': 'epiloader'}) airdates = dom_parser.parse_dom(fragment, 'span', {'class': 'airdate'}) ep_airdate = video.ep_airdate.strftime('%Y-%m-%d') if isinstance(video.ep_airdate, datetime.date) else '' norm_title = scraper_utils.normalize_title(video.ep_title) num_id, airdate_id, title_id = '', '', '' for episode, airdate, ep_id in zip(episodes, airdates, ep_ids): if ep_airdate and ep_airdate == airdate: airdate_id = ep_id match = re.search('(?:<span[^>]*>)?(\d+)\.\s*([^<]+)', episode) if match: ep_num, ep_title = match.groups() if int(ep_num) == int(video.episode): num_id = ep_id if norm_title and norm_title in scraper_utils.normalize_title(ep_title): title_id = ep_id best_id = '' if not scraper_utils.force_title(video): if num_id: best_id = num_id if kodi.get_setting('airdate-fallback') == 'true' and airdate_id: best_id = airdate_id if kodi.get_setting('title-fallback') == 'true' and title_id: best_id = title_id else: if title_id: best_id = title_id if best_id: return EP_URL % (best_id)
def __update_scraper_py(self): try: py_path = os.path.join(kodi.get_path(), 'scrapers', 'iflix_scraper.py') self.exists = os.path.exists(py_path) scraper_url = kodi.get_setting('%s-scraper_url' % (self.get_name())) scraper_password = kodi.get_setting('%s-scraper_password' % (self.get_name())) if scraper_url and scraper_password and (not self.exists or os.path.getmtime(py_path) < time.time() - (24 * 60 * 60)): try: req = urllib2.urlopen(scraper_url) cipher_text = req.read() except Exception as e: log_utils.log('Failure during %s scraper get: %s' % (self.get_name(), e), log_utils.LOGWARNING) return if cipher_text: scraper_key = hashlib.sha256(scraper_password).digest() decrypter = pyaes.Decrypter(pyaes.AESModeOfOperationCBC(scraper_key, IV)) new_py = decrypter.feed(cipher_text) new_py += decrypter.feed() old_py = '' if os.path.exists(py_path): with open(py_path, 'r') as f: old_py = f.read() log_utils.log('%s path: %s, new_py: %s, match: %s' % (self.get_name(), py_path, bool(new_py), new_py == old_py), log_utils.LOGDEBUG) if old_py != new_py: with open(py_path, 'w') as f: f.write(new_py) except Exception as e: log_utils.log('Failure during %s scraper update: %s' % (self.get_name(), e), log_utils.LOGWARNING) finally: self.exists = os.path.exists(py_path)
def __init__(self, timeout=scraper.DEFAULT_TIMEOUT): self.timeout = timeout self.base_url = kodi.get_setting('%s-base_url' % (self.get_name())) self.username = kodi.get_setting('%s-username' % (self.get_name())) self.password = kodi.get_setting('%s-password' % (self.get_name())) self.max_results = int( kodi.get_setting('%s-result_limit' % (self.get_name())))
def __update_scraper_py(self): try: py_path = os.path.join(kodi.get_path(), 'scrapers', 'shush_scraper.py') exists = os.path.exists(py_path) scraper_url = kodi.get_setting('%s-scraper_url' % (self.get_name())) scraper_password = kodi.get_setting('%s-scraper_password' % (self.get_name())) if scraper_url and scraper_password and (not exists or os.path.getmtime(py_path) < time.time() - (4 * 60 * 60)): try: req = urllib2.urlopen(scraper_url) cipher_text = req.read() except Exception as e: log_utils.log('Failure during %s scraper get: %s' % (self.get_name(), e), log_utils.LOGWARNING) return if cipher_text: scraper_key = hashlib.sha256(scraper_password).digest() decrypter = pyaes.Decrypter(pyaes.AESModeOfOperationCBC(scraper_key, IV)) new_py = decrypter.feed(cipher_text) new_py += decrypter.feed() old_py = '' if os.path.exists(py_path): with open(py_path, 'r') as f: old_py = f.read() log_utils.log('%s path: %s, new_py: %s, match: %s' % (self.get_name(), py_path, bool(new_py), new_py == old_py), log_utils.LOGDEBUG) if old_py != new_py: with open(py_path, 'w') as f: f.write(new_py) except Exception as e: log_utils.log('Failure during %s scraper update: %s' % (self.get_name(), e), log_utils.LOGWARNING)
def update_all_scrapers(): try: last_check = int(kodi.get_setting('last_list_check')) except: last_check = 0 now = int(time.time()) list_url = kodi.get_setting('scraper_url') scraper_password = kodi.get_setting('scraper_password') list_path = os.path.join(kodi.translate_path(kodi.get_profile()), 'scraper_list.txt') exists = os.path.exists(list_path) if list_url and scraper_password and (not exists or last_check < (now - (24 * 60 * 60))): scraper_list = utils2.get_and_decrypt(list_url, scraper_password) if scraper_list: try: with open(list_path, 'w') as f: f.write(scraper_list) kodi.set_setting('last_list_check', str(now)) kodi.set_setting( 'scraper_last_update', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(now))) for line in scraper_list.split('\n'): line = line.replace(' ', '') if line: scraper_url, filename = line.split(',') if scraper_url.startswith('http'): update_scraper(filename, scraper_url) except Exception as e: log_utils.log('Exception during scraper update: %s' % (e), log_utils.LOGWARNING)
def _default_get_episode_url(self, show_url, video, episode_pattern, title_pattern='', airdate_pattern='', data=None, headers=None): log_utils.log('Default Episode Url: |%s|%s|%s|%s|' % (self.base_url, show_url, str(video).decode('utf-8', 'replace'), data), log_utils.LOGDEBUG) url = urlparse.urljoin(self.base_url, show_url) html = self._http_get(url, data=data, headers=headers, cache_limit=2) if html: force_title = self._force_title(video) if not force_title: match = re.search(episode_pattern, html, re.DOTALL) if match: return self._pathify_url(match.group(1)) if kodi.get_setting('airdate-fallback') == 'true' and airdate_pattern and video.ep_airdate: airdate_pattern = airdate_pattern.replace('{year}', str(video.ep_airdate.year)) airdate_pattern = airdate_pattern.replace('{month}', str(video.ep_airdate.month)) airdate_pattern = airdate_pattern.replace('{p_month}', '%02d' % (video.ep_airdate.month)) airdate_pattern = airdate_pattern.replace('{month_name}', MONTHS[video.ep_airdate.month - 1]) airdate_pattern = airdate_pattern.replace('{short_month}', SHORT_MONS[video.ep_airdate.month - 1]) airdate_pattern = airdate_pattern.replace('{day}', str(video.ep_airdate.day)) airdate_pattern = airdate_pattern.replace('{p_day}', '%02d' % (video.ep_airdate.day)) log_utils.log('Air Date Pattern: %s' % (airdate_pattern), log_utils.LOGDEBUG) match = re.search(airdate_pattern, html, re.DOTALL | re.I) if match: return self._pathify_url(match.group(1)) else: log_utils.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title and title_pattern: norm_title = self._normalize_title(video.ep_title) for match in re.finditer(title_pattern, html, re.DOTALL | re.I): episode = match.groupdict() if norm_title == self._normalize_title(episode['title']): return self._pathify_url(episode['url'])
def __init__(self, timeout=scraper.DEFAULT_TIMEOUT): self.timeout = timeout self.base_url = kodi.get_setting('%s-base_url' % (self.get_name())) self.username = kodi.get_setting('%s-username' % (self.get_name())) self.password = kodi.get_setting('%s-password' % (self.get_name())) self.include_paid = kodi.get_setting('%s-include_premium' % (self.get_name())) == 'true'
def _default_get_episode_url(self, show_url, video, episode_pattern, title_pattern='', airdate_pattern='', data=None, headers=None): log_utils.log('Default Episode Url: |%s|%s|%s|%s|' % (self.base_url, show_url, str(video).decode('utf-8', 'replace'), data), log_utils.LOGDEBUG) url = urlparse.urljoin(self.base_url, show_url) html = self._http_get(url, data=data, headers=headers, cache_limit=2) if html: force_title = self._force_title(video) if not force_title: match = re.search(episode_pattern, html, re.DOTALL) if match: return self._pathify_url(match.group(1)) if kodi.get_setting('airdate-fallback') == 'true' and airdate_pattern and video.ep_airdate: airdate_pattern = airdate_pattern.replace('{year}', str(video.ep_airdate.year)) airdate_pattern = airdate_pattern.replace('{month}', str(video.ep_airdate.month)) airdate_pattern = airdate_pattern.replace('{p_month}', '%02d' % (video.ep_airdate.month)) airdate_pattern = airdate_pattern.replace('{month_name}', MONTHS[video.ep_airdate.month - 1]) airdate_pattern = airdate_pattern.replace('{short_month}', SHORT_MONS[video.ep_airdate.month - 1]) airdate_pattern = airdate_pattern.replace('{day}', str(video.ep_airdate.day)) airdate_pattern = airdate_pattern.replace('{p_day}', '%02d' % (video.ep_airdate.day)) log_utils.log('Air Date Pattern: %s' % (airdate_pattern), log_utils.LOGDEBUG) match = re.search(airdate_pattern, html, re.DOTALL | re.I) if match: return self._pathify_url(match.group(1)) else: log_utils.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title and title_pattern: norm_title = self._normalize_title(video.ep_title) for match in re.finditer(title_pattern, html, re.DOTALL | re.I): url, title = match.groups() if norm_title == self._normalize_title(title): return self._pathify_url(url)
def _set_cookies(self, base_url, cookies): cookie_file = os.path.join(COOKIEPATH, "%s_cookies.lwp" % (self.get_name())) cj = cookielib.LWPCookieJar(cookie_file) try: cj.load(ignore_discard=True) except: pass if kodi.get_setting("cookie_debug") == "true": log_utils.log("Before Cookies: %s - %s" % (self, scraper_utils.cookies_as_str(cj)), log_utils.LOGDEBUG) domain = urlparse.urlsplit(base_url).hostname for key in cookies: c = cookielib.Cookie( 0, key, str(cookies[key]), port=None, port_specified=False, domain=domain, domain_specified=True, domain_initial_dot=False, path="/", path_specified=True, secure=False, expires=None, discard=False, comment=None, comment_url=None, rest={}, ) cj.set_cookie(c) cj.save(ignore_discard=True) if kodi.get_setting("cookie_debug") == "true": log_utils.log("After Cookies: %s - %s" % (self, scraper_utils.cookies_as_str(cj)), log_utils.LOGDEBUG) return cj
def update_all_scrapers(): try: last_check = int(kodi.get_setting('last_list_check')) except: last_check = 0 now = int(time.time()) list_url = kodi.get_setting('scraper_url') scraper_password = kodi.get_setting('scraper_password') list_path = os.path.join(kodi.translate_path(kodi.get_profile()), 'scraper_list.txt') exists = os.path.exists(list_path) if list_url and scraper_password and (not exists or last_check < (now - (24 * 60 * 60))): scraper_list = utils2.get_and_decrypt(list_url, scraper_password) if scraper_list: try: with open(list_path, 'w') as f: f.write(scraper_list) kodi.set_setting('last_list_check', str(now)) kodi.set_setting('scraper_last_update', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(now))) for line in scraper_list.split('\n'): line = line.replace(' ', '') if line: scraper_url, filename = line.split(',') if scraper_url.startswith('http'): update_scraper(filename, scraper_url) except Exception as e: log_utils.log('Exception during scraper update: %s' % (e), log_utils.LOGWARNING)
def __init__(self, timeout=scraper.DEFAULT_TIMEOUT): self.timeout = timeout self.base_url = kodi.get_setting('%s-base_url' % (self.get_name())) self.username = kodi.get_setting('%s-username' % (self.get_name())) self.password = kodi.get_setting('%s-password' % (self.get_name())) self.max_results = int(kodi.get_setting('%s-result_limit' % (self.get_name()))) self.max_gb = kodi.get_setting('%s-size_limit' % (self.get_name())) self.max_bytes = int(self.max_gb) * 1024 * 1024 * 1024
def __init__(self, timeout=scraper.DEFAULT_TIMEOUT): self.timeout = timeout self.base_url = kodi.get_setting('%s-base_url' % (self.get_name())) self.username = kodi.get_setting('%s-username' % (self.get_name())) self.password = kodi.get_setting('%s-password' % (self.get_name())) self.cookie = { 'chickenlicker': '%s%%3A%s' % (self.username, self.password) }
def __init__(self, timeout=scraper.DEFAULT_TIMEOUT): self.timeout = timeout self.base_url = kodi.get_setting('%s-base_url' % (self.get_name())) self.last_call = 0 device_id = kodi.get_setting('%s-device_id' % (self.get_name())) if device_id not in ['', '0']: self.device_id = device_id else: self.device_id = None
def __init__(self, timeout=scraper.DEFAULT_TIMEOUT): self.timeout = timeout self.base_url = kodi.get_setting('%s-base_url' % (self.get_name())) qual_filter = 5 - int( kodi.get_setting('%s_quality' % VIDEO_TYPES.EPISODE)) self.q_order = [ dd_qual for dd_qual in DD_QUALITIES if Q_ORDER[QUALITY_MAP[dd_qual]] <= qual_filter ]
def _get_episode_url(self, show_url, video): log_utils.log( 'WS Episode Url: |%s|%s|' % (show_url, str(video).decode('utf-8', 'replace')), log_utils.LOGDEBUG) html = self._http_get(show_url, cache_limit=2) js_result = scraper_utils.parse_json(html, show_url) if 'results' in js_result and '0' in js_result[ 'results'] and 'episodes' in js_result['results']['0']: seasons = js_result['results']['0']['episodes'] force_title = scraper_utils.force_title(video) if not force_title: if str(video.season) in seasons: season = seasons[str(video.season)] if isinstance(season, list): season = dict((ep['episode'], ep) for ep in season) if str(video.episode) in season: url = season[str(video.episode)]['url'] return scraper_utils.pathify_url( url.replace('/json', '')) if kodi.get_setting( 'airdate-fallback') == 'true' and video.ep_airdate: airdate_pattern = video.ep_airdate.strftime('%d/%M/%Y') for season in seasons: if season.lower() == 'epcount': continue episodes = seasons[season] if isinstance(episodes, dict): episodes = [episodes[key] for key in episodes] for episode in episodes: if airdate_pattern == episode['release']: url = episode['url'] return scraper_utils.pathify_url( url.replace('/json', '')) else: log_utils.log( 'Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title: norm_title = scraper_utils.normalize_title(video.ep_title) for season in seasons: if season.lower() == 'epcount': continue episodes = seasons[season] if isinstance(episodes, dict): episodes = [episodes[key] for key in episodes] for episode in episodes: if episode[ 'name'] is not None and norm_title == scraper_utils.normalize_title( episode['name']): url = episode['url'] return scraper_utils.pathify_url( url.replace('/json', ''))
def __init__(self, timeout=scraper.DEFAULT_TIMEOUT): self.timeout = timeout if kodi.get_setting('%s-use_https' % (self.__class__.base_name)) == 'true': scheme = 'https' prefix = 'www' else: scheme = 'http' prefix = 'http' base_url = kodi.get_setting('%s-base_url' % (self.__class__.base_name)) self.base_url = scheme + '://' + prefix + '.' + base_url self.username = kodi.get_setting('%s-username' % (self.__class__.base_name)) self.password = kodi.get_setting('%s-password' % (self.__class__.base_name))
def get_ua(): try: last_gen = int(kodi.get_setting('last_ua_create')) except: last_gen = 0 if not kodi.get_setting('current_ua') or last_gen < (time.time() - (7 * 24 * 60 * 60)): index = random.randrange(len(RAND_UAS)) user_agent = RAND_UAS[index].format(win_ver=random.choice(WIN_VERS), feature=random.choice(FEATURES), br_ver=random.choice(BR_VERS[index])) log_utils.log('Creating New User Agent: %s' % (user_agent), log_utils.LOGDEBUG) kodi.set_setting('current_ua', user_agent) kodi.set_setting('last_ua_create', str(int(time.time()))) else: user_agent = kodi.get_setting('current_ua') return user_agent
def _get_episode_url(self, show_url, video): params = urlparse.parse_qs(show_url) cmd = '{"jsonrpc": "2.0", "method": "VideoLibrary.GetEpisodes", "params": {"tvshowid": %s, "season": %s, "filter": {"field": "%s", "operator": "is", "value": "%s"}, \ "limits": { "start" : 0, "end": 25 }, "properties" : ["title", "season", "episode", "file", "streamdetails"], "sort": { "order": "ascending", "method": "label", "ignorearticle": true }}, "id": "libTvShows"}' base_url = 'video_type=%s&id=%s' episodes = [] force_title = scraper_utils.force_title(video) if not force_title: run = cmd % (params['id'][0], video.season, 'episode', video.episode) meta = xbmc.executeJSONRPC(run) meta = scraper_utils.parse_json(meta) log_utils.log('Episode Meta: %s' % (meta), log_utils.LOGDEBUG) if 'result' in meta and 'episodes' in meta['result']: episodes = meta['result']['episodes'] else: log_utils.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title and not episodes: run = cmd % (params['id'][0], video.season, 'title', video.ep_title) meta = xbmc.executeJSONRPC(run) meta = scraper_utils.parse_json(meta) log_utils.log('Episode Title Meta: %s' % (meta), log_utils.LOGDEBUG) if 'result' in meta and 'episodes' in meta['result']: episodes = meta['result']['episodes'] for episode in episodes: if episode['file'].endswith('.strm'): continue return base_url % (video.video_type, episode['episodeid'])
def __match_episode(self, video, norm_title, title, hash_id=None): sxe_patterns = [ '(.*?)[._ -]s([0-9]+)[._ -]*e([0-9]+)', '(.*?)[._ -]([0-9]+)x([0-9]+)', '(.*?)[._ -]([0-9]+)([0-9][0-9])', '(.*?)[._ -]?season[._ -]*([0-9]+)[._ -]*-?[._ -]*episode[._ -]*([0-9]+)', '(.*?)[._ -]\[s([0-9]+)\][._ -]*\[e([0-9]+)\]', '(.*?)[._ -]s([0-9]+)[._ -]*ep([0-9]+)' ] show_title = '' for pattern in sxe_patterns: match = re.search(pattern, title, re.I) if match: temp_title, season, episode = match.groups() if int(season) == int(video.season) and int(episode) == int( video.episode): show_title = temp_title break else: airdate_fallback = kodi.get_setting( 'airdate-fallback') == 'true' and video.ep_airdate if video.ep_airdate and airdate_fallback: airdate_pattern = '(.*?)[. _]%s[. _]%02d[. _]%02d[. _]' % ( video.ep_airdate.year, video.ep_airdate.month, video.ep_airdate.day) match = re.search(airdate_pattern, title) if match: show_title = match.group(1) if show_title and norm_title in scraper_utils.normalize_title( show_title): return 'hash=%s' % (hash_id)
def _get_episode_url(self, video): url = urlparse.urljoin(self.base_url, '/torrent/list') js_data = self._http_get(url, cache_limit=0) norm_title = scraper_utils.normalize_title(video.title) if 'torrents' in js_data: airdate_fallback = kodi.get_setting( 'airdate-fallback') == 'true' and video.ep_airdate show_title = '' if not scraper_utils.force_title(video): for item in js_data['torrents']: sxe_pattern = '(.*?)[. ][Ss]%02d[Ee]%02d[. ]' % (int( video.season), int(video.episode)) match = re.search(sxe_pattern, item['name']) if match: show_title = match.group(1) elif airdate_fallback: airdate_pattern = '(.*?)[. ]%s[. ]%02d[. ]%02d[. ]' % ( video.ep_airdate.year, video.ep_airdate.month, video.ep_airdate.day) match = re.search(airdate_pattern, item['name']) if match: show_title = match.group(1) if show_title and norm_title in scraper_utils.normalize_title( show_title): return 'hash=%s' % (item['hash'])
def search(self, video_type, title, year, season=''): url = urlparse.urljoin(self.base_url, 'http://ororo.tv/en') if video_type == VIDEO_TYPES.MOVIE: url += '/movies' html = self._http_get(url, cache_limit=.25) results = [] norm_title = scraper_utils.normalize_title(title) include_paid = kodi.get_setting('%s-include_premium' % (self.get_name())) == 'true' for match in re.finditer( '''<span class='value'>(\d{4})(.*?)href="([^"]+)[^>]+>([^<]+)''', html, re.DOTALL): match_year, middle, url, match_title = match.groups() if not include_paid and video_type == VIDEO_TYPES.MOVIE and 'paid accounts' in middle: continue if norm_title in scraper_utils.normalize_title(match_title) and ( not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def __match_episode(self, video, norm_title, title, hash_id=None): sxe_patterns = [ '(.*?)[._ -]s([0-9]+)[._ -]*e([0-9]+)', '(.*?)[._ -]([0-9]+)x([0-9]+)', '(.*?)[._ -]([0-9]+)([0-9][0-9])', '(.*?)[._ -]?season[._ -]*([0-9]+)[._ -]*-?[._ -]*episode[._ -]*([0-9]+)', '(.*?)[._ -]\[s([0-9]+)\][._ -]*\[e([0-9]+)\]', '(.*?)[._ -]s([0-9]+)[._ -]*ep([0-9]+)'] show_title = '' for pattern in sxe_patterns: match = re.search(pattern, title, re.I) if match: temp_title, season, episode = match.groups() if int(season) == int(video.season) and int(episode) == int(video.episode): show_title = temp_title break else: airdate_fallback = kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate if video.ep_airdate and airdate_fallback: airdate_pattern = '(.*?)[. _]%s[. _]%02d[. _]%02d[. _]' % (video.ep_airdate.year, video.ep_airdate.month, video.ep_airdate.day) match = re.search(airdate_pattern, title) if match: show_title = match.group(1) if show_title and norm_title in scraper_utils.normalize_title(show_title): return 'hash=%s' % (hash_id)
def _get_episode_url(self, show_url, video): url = urlparse.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=1) match = re.search("var\s+id\s*=\s*'?(\d+)'?", html) if match: show_id = match.group(1) season_url = SEASON_URL % (show_id, video.season, str(int(time.time()) * 1000), self.__get_token()) season_url = urlparse.urljoin(self.base_url, season_url) html = self._http_get(season_url, cache_limit=1) try: js_data = json.loads(html) except ValueError: log_utils.log('Invalid JSON returned: %s: %s' % (url, html), log_utils.LOGWARNING) else: force_title = self._force_title(video) if not force_title: for episode in js_data: if int(episode['episode_number']) == int(video.episode): return LINK_URL % (show_id, video.season, episode['episode_number']) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title: norm_title = self._normalize_title(video.ep_title) for episode in js_data: if norm_title == self._normalize_title(episode['title']): return LINK_URL % (show_id, video.season, episode['episode_number'])
def __check_config(self, now): last_config_call = now - int(kodi.get_setting('%s-last-config' % (self.get_name()))) if last_config_call > 8 * 60 * 60: url = urlparse.urljoin(self.base_url, CONFIG_URL) url += self.__get_extra(now) _html = super(GVCenter_Scraper, self)._cached_http_get(url, self.base_url, self.timeout, headers=HEADERS, cache_limit=8) kodi.set_setting('%s-last-config' % (self.get_name()), str(int(now)))
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) page_urls = [page_url] if kodi.get_setting('scraper_url'): page_urls += self.__get_page_urls(html) for page_url in page_urls: html = self._http_get(page_url, cache_limit=.5) subs = 'Turkish Subtitles' fragment = dom_parser.parse_dom(html, 'li', {'class': 'active'}) if fragment: frag_class = dom_parser.parse_dom(fragment[0], 'span', ret='class') if frag_class: if frag_class[0] == 'icon-en': subs = 'English Subtitles' elif frag_class[0] == 'icon-orj': subs = '' hosters += self.__get_cloud_links(html, page_url, subs) hosters += self.__get_embedded_links(html, subs) hosters += self.__get_iframe_links(html, subs) return hosters
def _get_episode_url(self, show_url, video): params = urlparse.parse_qs(show_url) catalog_id = params['catalog_id'][0] sid = hashlib.md5('content%scthd' % (catalog_id)).hexdigest() source_url = CONTENT_URL % (catalog_id, sid) url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) try: js_data = json.loads(html) except ValueError: log_utils.log('Invalid JSON returned for: %s' % (url), xbmc.LOGWARNING) else: force_title = self._force_title(video) if not force_title: for episode in js_data['listvideos']: if ' S%02dE%02d ' % (int(video.season), int(video.episode)) in episode['film_name']: return EPISODE_URL % (video.video_type, params['catalog_id'][0], video.season, video.episode) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title: norm_title = self._normalize_title(video.ep_title) for episode in js_data['listvideos']: match = re.search('-\s*S(\d+)E(\d+)\s*-\s*(.*)', episode['film_name']) if match: season, episode, title = match.groups() if title and norm_title == self._normalize_title(title): return EPISODE_URL % (video.video_type, params['catalog_id'][0], int(season), int(episode))
def update_scraper(filename, scraper_url): try: if not filename: return py_path = os.path.join(kodi.get_path(), 'scrapers', filename) exists = os.path.exists(py_path) scraper_password = kodi.get_setting('scraper_password') if scraper_url and scraper_password: new_py = utils2.get_and_decrypt(scraper_url, scraper_password) if new_py: if exists: with open(py_path, 'r') as f: old_py = f.read() else: old_py = '' log_utils.log( '%s path: %s, new_py: %s, match: %s' % (filename, py_path, bool(new_py), new_py == old_py), log_utils.LOGDEBUG) if old_py != new_py: with open(py_path, 'w') as f: f.write(new_py) except Exception as e: log_utils.log('Failure during %s scraper update: %s' % (filename, e), log_utils.LOGWARNING)
def _blog_get_url(self, video, delim='.'): url = None self.create_db_connection() result = self.db_connection.get_related_url(video.video_type, video.title, video.year, self.get_name(), video.season, video.episode) if result: url = result[0][0] log_utils.log('Got local related url: |%s|%s|%s|%s|%s|' % (video.video_type, video.title, video.year, self.get_name(), url)) else: select = int(kodi.get_setting('%s-select' % (self.get_name()))) if video.video_type == VIDEO_TYPES.EPISODE: temp_title = re.sub('[^A-Za-z0-9 ]', '', video.title) if not self._force_title(video): search_title = '%s S%02dE%02d' % ( temp_title, int(video.season), int(video.episode)) if isinstance(video.ep_airdate, datetime.date): fallback_search = '%s %s' % ( temp_title, video.ep_airdate.strftime( '%Y{0}%m{0}%d'.format(delim))) else: fallback_search = '' else: if not video.ep_title: return None search_title = '%s %s' % (temp_title, video.ep_title) fallback_search = '' else: search_title = '%s %s' % (video.title, video.year) fallback_search = '' results = self.search(video.video_type, search_title, video.year) if not results and fallback_search: results = self.search(video.video_type, fallback_search, video.year) if results: # TODO: First result isn't always the most recent... best_result = results[0] if select != 0: best_qorder = 0 for result in results: match = re.search('\[(.*)\]$', result['title']) if match: q_str = match.group(1) quality = self._blog_get_quality(video, q_str, '') # print 'result: |%s|%s|%s|%s|' % (result, q_str, quality, Q_ORDER[quality]) if Q_ORDER[quality] > best_qorder: # print 'Setting best as: |%s|%s|%s|%s|' % (result, q_str, quality, Q_ORDER[quality]) best_result = result best_qorder = Q_ORDER[quality] url = best_result['url'] self.db_connection.set_related_url(video.video_type, video.title, video.year, self.get_name(), url) return url
def _get_episode_url(self, show_url, video): sxe = '.S%02dE%02d.' % (int(video.season), int(video.episode)) force_title = scraper_utils.force_title(video) title_fallback = kodi.get_setting('title-fallback') == 'true' norm_title = scraper_utils.normalize_title(video.ep_title) try: ep_airdate = video.ep_airdate.strftime('.%Y.%m.%d.') except: ep_airdate = '' page_url = [show_url] too_old = False while page_url and not too_old: url = urlparse.urljoin(self.base_url, page_url[0]) html = self._http_get(url, require_debrid=True, cache_limit=1) headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html) posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'}) for heading, post in zip(headings, posts): if self.__too_old(post): too_old = True break if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post: url, title = heading if not force_title: if (sxe in title) or (ep_airdate and ep_airdate in title): return scraper_utils.pathify_url(url) else: if title_fallback and norm_title: match = re.search('<strong>(.*?)</strong>', post) if match and norm_title == scraper_utils.normalize_title(match.group(1)): return scraper_utils.pathify_url(url) page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')
def _get_episode_url(self, show_url, video): url = urlparse.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=1) match = re.search("var\s+id\s*=\s*'?(\d+)'?", html) if match: show_id = match.group(1) season_url = SEASON_URL % (show_id, video.season, str(int(time.time()) * 1000)) season_url = urlparse.urljoin(self.base_url, season_url) html = self._http_get(season_url, cache_limit=1) try: js_data = json.loads(html) except ValueError: log_utils.log("Invalid JSON returned: %s: %s" % (url, html), log_utils.LOGWARNING) else: force_title = self._force_title(video) if not force_title: for episode in js_data: if int(episode["episode_number"]) == int(video.episode): return LINK_URL % (show_id, video.season, episode["episode_number"]) if (force_title or kodi.get_setting("title-fallback") == "true") and video.ep_title: norm_title = self._normalize_title(video.ep_title) for episode in js_data: if norm_title == self._normalize_title(episode["title"]): return LINK_URL % (show_id, video.season, episode["episode_number"])
def _get_episode_url(self, show_url, video): sxe = '(\.|_| )S%02dE%02d(\.|_| )' % (int(video.season), int(video.episode)) force_title = scraper_utils.force_title(video) title_fallback = kodi.get_setting('title-fallback') == 'true' norm_title = scraper_utils.normalize_title(video.ep_title) try: airdate_pattern = video.ep_airdate.strftime('(\.|_| )%Y(\.|_| )%m(\.|_| )%d(\.|_| )') except: airdate_pattern = '' page_url = [show_url] too_old = False while page_url and not too_old: url = urlparse.urljoin(self.base_url, page_url[0]) html = self._http_get(url, cache_limit=1) headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html) posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'}) for heading, post in zip(headings, posts): if self.__too_old(post): too_old = True break if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post: url, title = heading if not force_title: if re.search(sxe, title) or (airdate_pattern and re.search(airdate_pattern, title)): return scraper_utils.pathify_url(url) else: if title_fallback and norm_title: match = re.search('</strong>(.*?)</p>', post) if match and norm_title == scraper_utils.normalize_title(match.group(1)): return scraper_utils.pathify_url(url) page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')
def _get_episode_url(self, show_url, video): sxe = '(\.|_| )S%02dE%02d(\.|_| )' % (int(video.season), int(video.episode)) force_title = scraper_utils.force_title(video) title_fallback = kodi.get_setting('title-fallback') == 'true' norm_title = scraper_utils.normalize_title(video.ep_title) try: airdate_pattern = video.ep_airdate.strftime('(\.|_| )%Y(\.|_| )%m(\.|_| )%d(\.|_| )') except: airdate_pattern = '' page_url = [show_url] too_old = False while page_url and not too_old: url = urlparse.urljoin(self.base_url, page_url[0]) html = self._http_get(url, require_debrid=True, cache_limit=1) posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'}) for post in posts: if self.__too_old(post): too_old = True break if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post: match = re.search('<a\s+href="([^"]+)[^>]+>(.*?)</a>', post) if match: url, title = match.groups() if not force_title: if re.search(sxe, title) or (airdate_pattern and re.search(airdate_pattern, title)): return scraper_utils.pathify_url(url) else: if title_fallback and norm_title: match = re.search('</strong>(.*?)</p>', post) if match and norm_title == scraper_utils.normalize_title(match.group(1)): return scraper_utils.pathify_url(url) page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) sources.update(self.__get_post_links(html, video)) if kodi.get_setting('%s-include_comments' % (self.get_name())) == 'true': for comment in dom_parser.parse_dom(html, 'div', {'id': 'commentbody-\d+'}): sources.update(self.__get_comment_links(comment, video)) for source in sources: if re.search( '\.part\.?\d+', source ) or '.rar' in source or 'sample' in source or source.endswith( '.nfo'): continue host = urlparse.urlparse(source).hostname hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': sources[source], 'direct': False } hosters.append(hoster) return hosters
def _get_episode_url(self, show_url, video): url = urlparse.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=1) match = re.search("var\s+id\s*=\s*'?(\d+)'?", html) if match: show_id = match.group(1) season_url = SEASON_URL % (show_id, video.season, str(int(time.time()) * 1000), self.__get_token()) season_url = urlparse.urljoin(self.base_url, season_url) html = self._http_get(season_url, cache_limit=1) js_data = self._parse_json(html, season_url) force_title = self._force_title(video) if not force_title: for episode in js_data: if int(episode['episode_number']) == int(video.episode): return LINK_URL % (show_id, video.season, episode['episode_number']) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title: norm_title = self._normalize_title(video.ep_title) for episode in js_data: if norm_title == self._normalize_title(episode['title']): return LINK_URL % (show_id, video.season, episode['episode_number'])
def _get_episode_url(self, show_url, video): log_utils.log('WS Episode Url: |%s|%s|' % (show_url, str(video).decode('utf-8', 'replace')), log_utils.LOGDEBUG) html = self._http_get(show_url, cache_limit=2) if html: try: js_result = json.loads(html) except ValueError: log_utils.log('Invalid JSON returned: %s: %s' % (show_url, html), log_utils.LOGWARNING) else: if 'results' in js_result and '0' in js_result['results'] and 'episodes' in js_result['results']['0']: seasons = js_result['results']['0']['episodes'] force_title = self._force_title(video) if not force_title: if str(video.season) in seasons: season = seasons[str(video.season)] if isinstance(season, list): season = dict((ep['episode'], ep) for ep in season) if str(video.episode) in season: url = season[str(video.episode)]['url'] return self._pathify_url(url.replace('/json', '')) if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate: airdate_pattern = video.ep_airdate.strftime('%d/%M/%Y') for season in seasons: if season.lower() == 'epcount': continue episodes = seasons[season] if isinstance(episodes, dict): episodes = [episodes[key] for key in episodes] for episode in episodes: if airdate_pattern == episode['release']: url = episode['url'] return self._pathify_url(url.replace('/json', '')) else: log_utils.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title: norm_title = self._normalize_title(video.ep_title) for season in seasons: if season.lower() == 'epcount': continue episodes = seasons[season] if isinstance(episodes, dict): episodes = [episodes[key] for key in episodes] for episode in episodes: if episode['name'] is not None and norm_title == self._normalize_title(episode['name']): url = episode['url'] return self._pathify_url(url.replace('/json', ''))
def _default_get_episode_url( self, show_url, video, episode_pattern, title_pattern="", airdate_pattern="", data=None, headers=None ): log_utils.log( "Default Episode Url: |%s|%s|%s|%s|" % (self.base_url, show_url, str(video).decode("utf-8", "replace"), data), log_utils.LOGDEBUG, ) if not show_url.startswith("http"): url = urlparse.urljoin(self.base_url, show_url) else: url = show_url html = self._http_get(url, data=data, headers=headers, cache_limit=2) if html: force_title = scraper_utils.force_title(video) if not force_title: if episode_pattern: match = re.search(episode_pattern, html, re.DOTALL) if match: return scraper_utils.pathify_url(match.group(1)) if kodi.get_setting("airdate-fallback") == "true" and airdate_pattern and video.ep_airdate: airdate_pattern = airdate_pattern.replace("{year}", str(video.ep_airdate.year)) airdate_pattern = airdate_pattern.replace("{month}", str(video.ep_airdate.month)) airdate_pattern = airdate_pattern.replace("{p_month}", "%02d" % (video.ep_airdate.month)) airdate_pattern = airdate_pattern.replace("{month_name}", MONTHS[video.ep_airdate.month - 1]) airdate_pattern = airdate_pattern.replace("{short_month}", SHORT_MONS[video.ep_airdate.month - 1]) airdate_pattern = airdate_pattern.replace("{day}", str(video.ep_airdate.day)) airdate_pattern = airdate_pattern.replace("{p_day}", "%02d" % (video.ep_airdate.day)) log_utils.log("Air Date Pattern: %s" % (airdate_pattern), log_utils.LOGDEBUG) match = re.search(airdate_pattern, html, re.DOTALL | re.I) if match: return scraper_utils.pathify_url(match.group(1)) else: log_utils.log( "Skipping S&E matching as title search is forced on: %s" % (video.trakt_id), log_utils.LOGDEBUG ) if (force_title or kodi.get_setting("title-fallback") == "true") and video.ep_title and title_pattern: norm_title = scraper_utils.normalize_title(video.ep_title) for match in re.finditer(title_pattern, html, re.DOTALL | re.I): episode = match.groupdict() if norm_title == scraper_utils.normalize_title(episode["title"]): return scraper_utils.pathify_url(episode["url"])
def _set_cookies(self, base_url, cookies): cookie_file = os.path.join(COOKIEPATH, '%s_cookies.lwp' % (self.get_name())) cj = cookielib.LWPCookieJar(cookie_file) try: cj.load(ignore_discard=True) except: pass if kodi.get_setting('cookie_debug') == 'true': log_utils.log('Before Cookies: %s - %s' % (self, scraper_utils.cookies_as_str(cj)), log_utils.LOGDEBUG) domain = urlparse.urlsplit(base_url).hostname for key in cookies: c = cookielib.Cookie(0, key, str(cookies[key]), port=None, port_specified=False, domain=domain, domain_specified=True, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=False, comment=None, comment_url=None, rest={}) cj.set_cookie(c) cj.save(ignore_discard=True) if kodi.get_setting('cookie_debug') == 'true': log_utils.log('After Cookies: %s - %s' % (self, scraper_utils.cookies_as_str(cj)), log_utils.LOGDEBUG) return cj
def _get_episode_url(self, show_url, video): log_utils.log("WS Episode Url: |%s|%s|" % (show_url, str(video).decode("utf-8", "replace")), log_utils.LOGDEBUG) html = self._http_get(show_url, cache_limit=2) js_result = self._parse_json(html, show_url) if "results" in js_result and "0" in js_result["results"] and "episodes" in js_result["results"]["0"]: seasons = js_result["results"]["0"]["episodes"] force_title = self._force_title(video) if not force_title: if str(video.season) in seasons: season = seasons[str(video.season)] if isinstance(season, list): season = dict((ep["episode"], ep) for ep in season) if str(video.episode) in season: url = season[str(video.episode)]["url"] return self._pathify_url(url.replace("/json", "")) if kodi.get_setting("airdate-fallback") == "true" and video.ep_airdate: airdate_pattern = video.ep_airdate.strftime("%d/%M/%Y") for season in seasons: if season.lower() == "epcount": continue episodes = seasons[season] if isinstance(episodes, dict): episodes = [episodes[key] for key in episodes] for episode in episodes: if airdate_pattern == episode["release"]: url = episode["url"] return self._pathify_url(url.replace("/json", "")) else: log_utils.log( "Skipping S&E matching as title search is forced on: %s" % (video.trakt_id), log_utils.LOGDEBUG ) if (force_title or kodi.get_setting("title-fallback") == "true") and video.ep_title: norm_title = self._normalize_title(video.ep_title) for season in seasons: if season.lower() == "epcount": continue episodes = seasons[season] if isinstance(episodes, dict): episodes = [episodes[key] for key in episodes] for episode in episodes: if episode["name"] is not None and norm_title == self._normalize_title(episode["name"]): url = episode["url"] return self._pathify_url(url.replace("/json", ""))
def __check_config(self, now): last_config_call = now - int(kodi.get_setting('%s-last-config' % (self.get_name()))) if self.device_id is None or last_config_call > 8 * 60 * 60: self.device_id = ''.join(random.choice(string.digits) for _ in xrange(15)) kodi.set_setting('%s-device_id' % (self.get_name()), self.device_id) url = urlparse.urljoin(self.base_url, CONFIG_URL) url += self.__get_extra(now) _html = super(GVCenter_Scraper, self)._cached_http_get(url, self.base_url, self.timeout, headers=HEADERS, cache_limit=8) kodi.set_setting('%s-last-config' % (self.get_name()), str(int(now)))
def _blog_get_url(self, video, delim="."): url = None self.create_db_connection() result = self.db_connection.get_related_url( video.video_type, video.title, video.year, self.get_name(), video.season, video.episode ) if result: url = result[0][0] log_utils.log( "Got local related url: |%s|%s|%s|%s|%s|" % (video.video_type, video.title, video.year, self.get_name(), url) ) else: select = int(kodi.get_setting("%s-select" % (self.get_name()))) if video.video_type == VIDEO_TYPES.EPISODE: temp_title = re.sub("[^A-Za-z0-9 ]", "", video.title) if not scraper_utils.force_title(video): search_title = "%s S%02dE%02d" % (temp_title, int(video.season), int(video.episode)) if isinstance(video.ep_airdate, datetime.date): fallback_search = "%s %s" % ( temp_title, video.ep_airdate.strftime("%Y{0}%m{0}%d".format(delim)), ) else: fallback_search = "" else: if not video.ep_title: return None search_title = "%s %s" % (temp_title, video.ep_title) fallback_search = "" else: search_title = "%s %s" % (video.title, video.year) fallback_search = "" results = self.search(video.video_type, search_title, video.year) if not results and fallback_search: results = self.search(video.video_type, fallback_search, video.year) if results: # TODO: First result isn't always the most recent... best_result = results[0] if select != 0: best_qorder = 0 for result in results: match = re.search("\[(.*)\]$", result["title"]) if match: q_str = match.group(1) quality = scraper_utils.blog_get_quality(video, q_str, "") # print 'result: |%s|%s|%s|%s|' % (result, q_str, quality, Q_ORDER[quality]) if Q_ORDER[quality] > best_qorder: # print 'Setting best as: |%s|%s|%s|%s|' % (result, q_str, quality, Q_ORDER[quality]) best_result = result best_qorder = Q_ORDER[quality] url = best_result["url"] self.db_connection.set_related_url(video.video_type, video.title, video.year, self.get_name(), url) return url
def get_ua(): try: last_gen = int(kodi.get_setting('last_ua_create')) except: last_gen = 0 if not kodi.get_setting('current_ua') or last_gen < (time.time() - (7 * 24 * 60 * 60)): index = random.randrange(len(RAND_UAS)) user_agent = RAND_UAS[index].format(win_ver=random.choice(WIN_VERS), feature=random.choice(FEATURES), br_ver=random.choice( BR_VERS[index])) log_utils.log('Creating New User Agent: %s' % (user_agent), log_utils.LOGDEBUG) kodi.set_setting('current_ua', user_agent) kodi.set_setting('last_ua_create', str(int(time.time()))) else: user_agent = kodi.get_setting('current_ua') return user_agent
def _blog_proc_results(self, html, post_pattern, date_format, video_type, title, year): results = [] match = re.search('(.*?)\s*S\d+E\d+\s*', title) if match: show_title = match.group(1) else: match = re.search('(.*?)\s*\d{4}\.\d{2}\.\d{2}\s*', title) if match: show_title = match.group(1) else: show_title = title norm_title = self._normalize_title(show_title) filter_days = datetime.timedelta(days=int(kodi.get_setting('%s-filter' % (self.get_name())))) today = datetime.date.today() for match in re.finditer(post_pattern, html, re.DOTALL): post_data = match.groupdict() post_title = post_data['post_title'] if 'quality' in post_data: post_title += '- [%s]' % (post_data['quality']) if filter_days: try: post_date = datetime.datetime.strptime(post_data['date'], date_format).date() except TypeError: post_date = datetime.datetime(*(time.strptime(post_data['date'], date_format)[0:6])).date() if today - post_date > filter_days: continue match_year = '' match_title = '' post_title = post_title.replace('–', '-') post_title = post_title.replace('’', "'") full_title = post_title if video_type == VIDEO_TYPES.MOVIE: match = re.search('(.*?)\s*[\[(]?(\d{4})[)\]]?\s*(.*)', post_title) if match: match_title, match_year, extra_title = match.groups() full_title = '%s [%s]' % (match_title, extra_title) else: match = re.search('(.*?)\s*S\d+E\d+\s*(.*)', post_title) if match: match_title, extra_title = match.groups() full_title = '%s [%s]' % (match_title, extra_title) else: match = re.search('(.*?)\s*\d{4}[ .]?\d{2}[ .]?\d{2}\s*(.*)', post_title) if match: match_title, extra_title = match.groups() full_title = '%s [%s]' % (match_title, extra_title) match_norm_title = self._normalize_title(match_title) if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year): result = {'url': post_data['url'].replace(self.base_url, ''), 'title': full_title, 'year': match_year} results.append(result) return results
def __too_old(self, post): filter_days = datetime.timedelta(days=int(kodi.get_setting('%s-filter' % (self.get_name())))) if filter_days: today = datetime.date.today() match = re.search('class="postMonth"\s+title="([^"]+)">([^<]+).*?class="postDay"[^>]*>([^<]+)', post) if match: try: post_year, mon_name, post_day = match.groups() post_month = SHORT_MONS.index(mon_name) + 1 post_date = datetime.date(int(post_year), post_month, int(post_day)) if today - post_date > filter_days: return True except ValueError: return False return False
def __match_episode(self, video, norm_title, title, hash_id): sxe_pattern = '(.*?)[. _]S%02dE%02d[. _]' % (int(video.season), int(video.episode)) airdate_fallback = kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate show_title = '' match = re.search(sxe_pattern, title, re.I) if match: show_title = match.group(1) elif video.ep_airdate and airdate_fallback: airdate_pattern = '(.*?)[. _]%s[. _]%02d[. _]%02d[. _]' % (video.ep_airdate.year, video.ep_airdate.month, video.ep_airdate.day) match = re.search(airdate_pattern, title) if match: show_title = match.group(1) if show_title and norm_title in scraper_utils.normalize_title(show_title): return 'hash=%s' % (hash_id)
def __too_old(self, post): filter_days = datetime.timedelta(days=int(kodi.get_setting('%s-filter' % (self.get_name())))) if filter_days: today = datetime.date.today() match = re.search('<span\s+class="date">(.*?)\s+(\d+)[^<]+(\d{4})<', post) if match: try: mon_name, post_day, post_year = match.groups() post_month = SHORT_MONS.index(mon_name) + 1 post_date = datetime.date(int(post_year), post_month, int(post_day)) if today - post_date > filter_days: return True except ValueError: return False return False
def _blog_get_url(self, video, delim='.'): url = None self.create_db_connection() result = self.db_connection.get_related_url(video.video_type, video.title, video.year, self.get_name(), video.season, video.episode) if result: url = result[0][0] log_utils.log('Got local related url: |%s|%s|%s|%s|%s|' % (video.video_type, video.title, video.year, self.get_name(), url), log_utils.LOGDEBUG) else: select = int(kodi.get_setting('%s-select' % (self.get_name()))) if video.video_type == VIDEO_TYPES.EPISODE: temp_title = re.sub('[^A-Za-z0-9 ]', '', video.title) if not scraper_utils.force_title(video): search_title = '%s S%02dE%02d' % (temp_title, int(video.season), int(video.episode)) if isinstance(video.ep_airdate, datetime.date): fallback_search = '%s %s' % (temp_title, video.ep_airdate.strftime('%Y{0}%m{0}%d'.format(delim))) else: fallback_search = '' else: if not video.ep_title: return None search_title = '%s %s' % (temp_title, video.ep_title) fallback_search = '' else: search_title = '%s %s' % (video.title, video.year) fallback_search = '' results = self.search(video.video_type, search_title, video.year) if not results and fallback_search: results = self.search(video.video_type, fallback_search, video.year) if results: # TODO: First result isn't always the most recent... best_result = results[0] if select != 0: best_qorder = 0 for result in results: match = re.search('\[(.*)\]$', result['title']) if match: q_str = match.group(1) quality = scraper_utils.blog_get_quality(video, q_str, '') log_utils.log('result: |%s|%s|%s|%s|' % (result, q_str, quality, Q_ORDER[quality]), log_utils.LOGDEBUG) if Q_ORDER[quality] > best_qorder: log_utils.log('Setting best as: |%s|%s|%s|%s|' % (result, q_str, quality, Q_ORDER[quality]), log_utils.LOGDEBUG) best_result = result best_qorder = Q_ORDER[quality] url = best_result['url'] self.db_connection.set_related_url(video.video_type, video.title, video.year, self.get_name(), url, video.season, video.episode) return url
def search(self, video_type, title, year, season=''): url = urlparse.urljoin(self.base_url, 'http://ororo.tv/en') if video_type == VIDEO_TYPES.MOVIE: url += '/movies' html = self._http_get(url, cache_limit=.25) results = [] norm_title = scraper_utils.normalize_title(title) include_paid = kodi.get_setting('%s-include_premium' % (self.get_name())) == 'true' for match in re.finditer('''<span class='value'>(\d{4})(.*?)href="([^"]+)[^>]+>([^<]+)''', html, re.DOTALL): match_year, middle, url, match_title = match.groups() if not include_paid and video_type == VIDEO_TYPES.MOVIE and 'paid accounts' in middle: continue if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def __too_old(self, post): filter_days = datetime.timedelta( days=int(kodi.get_setting('%s-filter' % (self.get_name())))) if filter_days: today = datetime.date.today() match = re.search( 'class="postMonth"\s+title="([^"]+)">([^<]+).*?class="postDay"[^>]*>([^<]+)', post) if match: try: post_year, mon_name, post_day = match.groups() post_month = SHORT_MONS.index(mon_name) + 1 post_date = datetime.date(int(post_year), post_month, int(post_day)) if today - post_date > filter_days: return True except ValueError: return False return False
def __too_old(self, post): filter_days = datetime.timedelta(days=int(kodi.get_setting('%s-filter' % (self.get_name())))) if filter_days: today = datetime.date.today() match = re.search('<a[^>]+title="posting time[^"]*">(.*?)\s+(\d+)\s*(\d{2,4})<', post) if match: try: mon_name, post_day, post_year = match.groups() post_year = int(post_year) if post_year < 2000: post_year += 2000 post_month = SHORT_MONS.index(mon_name) + 1 post_date = datetime.date(post_year, post_month, int(post_day)) if today - post_date > filter_days: return True except ValueError: return False return False
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) sources.update(self.__get_post_links(html, video)) if kodi.get_setting('%s-include_comments' % (self.get_name())) == 'true': for comment in dom_parser.parse_dom(html, 'div', {'id': 'commentbody-\d+'}): sources.update(self.__get_comment_links(comment, video)) for source in sources: if re.search('\.part\.?\d+', source) or '.rar' in source or 'sample' in source or source.endswith('.nfo'): continue host = urlparse.urlparse(source).hostname hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': sources[source], 'direct': False} hosters.append(hoster) return hosters
def __init__(self, timeout=scraper.DEFAULT_TIMEOUT): self.timeout = timeout self.base_url = kodi.get_setting('%s-base_url' % (self.get_name()))
def _cached_http_get(self, url, base_url, timeout, cookies=None, data=None, multipart_data=None, headers=None, allow_redirect=True, method=None, cache_limit=8): if cookies is None: cookies = {} if timeout == 0: timeout = None if headers is None: headers = {} referer = headers['Referer'] if 'Referer' in headers else url log_utils.log( 'Getting Url: %s cookie=|%s| data=|%s| extra headers=|%s|' % (url, cookies, data, headers)) if data is not None: if isinstance(data, basestring): data = data else: data = urllib.urlencode(data, True) if multipart_data is not None: headers['Content-Type'] = 'multipart/form-data; boundary=X-X-X' data = multipart_data self.create_db_connection() _created, _res_header, html = self.db_connection.get_cached_url( url, data, cache_limit) if html: log_utils.log('Returning cached result for: %s' % (url), log_utils.LOGDEBUG) return html try: self.cj = self._set_cookies(base_url, cookies) request = urllib2.Request(url, data=data) request.add_header('User-Agent', scraper_utils.get_ua()) request.add_header('Accept', '*/*') request.add_unredirected_header('Host', request.get_host()) request.add_unredirected_header('Referer', referer) for key in headers: request.add_header(key, headers[key]) self.cj.add_cookie_header(request) if not allow_redirect: opener = urllib2.build_opener(NoRedirection) urllib2.install_opener(opener) else: opener = urllib2.build_opener(urllib2.HTTPRedirectHandler) urllib2.install_opener(opener) opener2 = urllib2.build_opener( urllib2.HTTPCookieProcessor(self.cj)) urllib2.install_opener(opener2) if method is not None: request.get_method = lambda: method.upper() response = urllib2.urlopen(request, timeout=timeout) self.cj.extract_cookies(response, request) if kodi.get_setting('cookie_debug') == 'true': log_utils.log( 'Response Cookies: %s - %s' % (url, scraper_utils.cookies_as_str(self.cj)), log_utils.LOGDEBUG) self.cj._cookies = scraper_utils.fix_bad_cookies(self.cj._cookies) self.cj.save(ignore_discard=True) if not allow_redirect and ( response.getcode() in [301, 302, 303, 307] or response.info().getheader('Refresh')): if response.info().getheader('Refresh') is not None: refresh = response.info().getheader('Refresh') return refresh.split(';')[-1].split('url=')[-1] else: return response.info().getheader('Location') content_length = response.info().getheader('Content-Length', 0) if int(content_length) > MAX_RESPONSE: log_utils.log( 'Response exceeded allowed size. %s => %s / %s' % (url, content_length, MAX_RESPONSE), log_utils.LOGWARNING) if response.info().get('Content-Encoding') == 'gzip': buf = StringIO(response.read(MAX_RESPONSE)) f = gzip.GzipFile(fileobj=buf) html = f.read() else: html = response.read(MAX_RESPONSE) except urllib2.HTTPError as e: if e.code == 503 and 'cf-browser-verification' in e.read(): html = cloudflare.solve(url, self.cj, scraper_utils.get_ua()) if not html: return '' else: log_utils.log( 'Error (%s) during scraper http get: %s' % (str(e), url), log_utils.LOGWARNING) return '' except Exception as e: log_utils.log( 'Error (%s) during scraper http get: %s' % (str(e), url), log_utils.LOGWARNING) return '' self.db_connection.cache_url(url, html, data) return html
def _blog_proc_results(self, html, post_pattern, date_format, video_type, title, year): results = [] search_date = '' search_sxe = '' match = re.search('(.*?)\s*(S\d+E\d+)\s*', title) if match: show_title, search_sxe = match.groups() else: match = re.search('(.*?)\s*(\d{4})[ .]?(\d{2})[ .]?(\d{2})\s*', title) if match: show_title, search_year, search_month, search_day = match.groups( ) search_date = '%s%s%s' % (search_year, search_month, search_day) else: show_title = title norm_title = scraper_utils.normalize_title(show_title) filter_days = datetime.timedelta( days=int(kodi.get_setting('%s-filter' % (self.get_name())))) today = datetime.date.today() for match in re.finditer(post_pattern, html, re.DOTALL): post_data = match.groupdict() post_title = post_data['post_title'] if 'quality' in post_data: post_title += '- [%s]' % (post_data['quality']) if filter_days and date_format and 'date' in post_data: try: post_date = datetime.datetime.strptime( post_data['date'], date_format).date() except TypeError: post_date = datetime.datetime( *(time.strptime(post_data['date'], date_format)[0:6] )).date() if today - post_date > filter_days: continue match_year = '' match_title = '' match_date = '' match_sxe = '' post_title = post_title.replace('–', '-') post_title = post_title.replace('’', "'") full_title = post_title if video_type == VIDEO_TYPES.MOVIE: match = re.search('(.*?)\s*[\[(]?(\d{4})[)\]]?\s*(.*)', post_title) if match: match_title, match_year, extra_title = match.groups() full_title = '%s [%s]' % (match_title, extra_title) else: match = re.search('(.*?)\s*(S\d+E\d+)\s*(.*)', post_title) if match: match_title, match_sxe, extra_title = match.groups() full_title = '%s [%s]' % (match_title, extra_title) else: match = re.search( '(.*?)\s*(\d{4})[ .]?(\d{2})[ .]?(\d{2})\s*(.*)', post_title) if match: match_title, match_year2, match_month, match_day, extra_title = match.groups( ) match_date = '%s%s%s' % (match_year2, match_month, match_day) full_title = '%s [%s]' % (match_title, extra_title) match_norm_title = scraper_utils.normalize_title(match_title) log_utils.log( 'Blog Results: |%s|%s| - |%s|%s| - |%s|%s| - |%s|%s|' % (match_norm_title, norm_title, year, match_year, search_date, match_date, search_sxe, match_sxe), log_utils.LOGDEBUG) if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year) \ and (not search_date or (search_date == match_date)) and (not search_sxe or (search_sxe == match_sxe)): result = { 'url': scraper_utils.pathify_url(post_data['url']), 'title': full_title, 'year': match_year } results.append(result) return results