コード例 #1
1
ファイル: local_scraper.py プロジェクト: matt2005/salts
    def _get_episode_url(self, show_url, video):
        params = urlparse.parse_qs(show_url)
        cmd = '{"jsonrpc": "2.0", "method": "VideoLibrary.GetEpisodes", "params": {"tvshowid": %s, "season": %s, "filter": {"field": "%s", "operator": "is", "value": "%s"}, \
        "limits": { "start" : 0, "end": 25 }, "properties" : ["title", "season", "episode", "file", "streamdetails"], "sort": { "order": "ascending", "method": "label", "ignorearticle": true }}, "id": "libTvShows"}'
        base_url = 'video_type=%s&id=%s'
        episodes = []
        force_title = self._force_title(video)
        if not force_title:
            run = cmd % (params['id'][0], video.season, 'episode', video.episode)
            meta = xbmc.executeJSONRPC(run)
            meta = json.loads(meta)
            log_utils.log('Episode Meta: %s' % (meta), log_utils.LOGDEBUG)
            if 'result' in meta and 'episodes' in meta['result']:
                episodes = meta['result']['episodes']
        else:
            log_utils.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG)

        if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title and not episodes:
            run = cmd % (params['id'][0], video.season, 'title', video.ep_title)
            meta = xbmc.executeJSONRPC(run)
            meta = json.loads(meta)
            log_utils.log('Episode Title Meta: %s' % (meta), log_utils.LOGDEBUG)
            if 'result' in meta and 'episodes' in meta['result']:
                episodes = meta['result']['episodes']

        for episode in episodes:
            if episode['file'].endswith('.strm'):
                continue
            
            return base_url % (video.video_type, episode['episodeid'])
コード例 #2
0
    def _get_episode_url(self, show_url, video):
        url = urlparse.urljoin(self.base_url, show_url)
        html = self._http_get(url, cache_limit=2)
        if html:
            force_title = scraper_utils.force_title(video)
            episodes = dom_parser.parse_dom(html, 'div', {'class': '\s*el-item\s*'})
            if not force_title:
                episode_pattern = 'href="([^"]*-[sS]%02d[eE]%02d(?!\d)[^"]*)' % (int(video.season), int(video.episode))
                match = re.search(episode_pattern, html)
                if match:
                    return scraper_utils.pathify_url(match.group(1))
                
                if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate:
                    airdate_pattern = '%02d-%02d-%d' % (video.ep_airdate.day, video.ep_airdate.month, video.ep_airdate.year)
                    for episode in episodes:
                        ep_url = dom_parser.parse_dom(episode, 'a', ret='href')
                        ep_airdate = dom_parser.parse_dom(episode, 'div', {'class': 'date'})
                        if ep_url and ep_airdate:
                            ep_airdate = ep_airdate[0].strip()
                            if airdate_pattern == ep_airdate:
                                return scraper_utils.pathify_url(ep_url[0])

            if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title:
                norm_title = scraper_utils.normalize_title(video.ep_title)
                for episode in episodes:
                    ep_url = dom_parser.parse_dom(episode, 'a', ret='href')
                    ep_title = dom_parser.parse_dom(episode, 'div', {'class': 'e-name'})
                    if ep_url and ep_title and norm_title == scraper_utils.normalize_title(ep_title[0]):
                        return scraper_utils.pathify_url(ep_url[0])
コード例 #3
0
    def _get_episode_url(self, show_url, video):
        url = urlparse.urljoin(self.base_url, show_url)
        html = self._http_get(url, cache_limit=8)
        pattern = "<a[^>]*class='dropdown-toggle'[^>]*>Season\s+%s<(.*?)<li\s+class='divider'>" % (video.season)
        match = re.search(pattern, html, re.DOTALL)
        if match:
            fragment = match.group(1)
            ep_ids = dom_parser.parse_dom(fragment, 'a', {'id': 'epiloader'}, ret='class')
            episodes = dom_parser.parse_dom(fragment, 'a', {'id': 'epiloader'})
            airdates = dom_parser.parse_dom(fragment, 'span', {'class': 'airdate'})
            ep_airdate = video.ep_airdate.strftime('%Y-%m-%d') if isinstance(video.ep_airdate, datetime.date) else ''
            norm_title = scraper_utils.normalize_title(video.ep_title)
            num_id, airdate_id, title_id = '', '', ''
            for episode, airdate, ep_id in zip(episodes, airdates, ep_ids):
                if ep_airdate and ep_airdate == airdate: airdate_id = ep_id
                match = re.search('(?:<span[^>]*>)?(\d+)\.\s*([^<]+)', episode)
                if match:
                    ep_num, ep_title = match.groups()
                    if int(ep_num) == int(video.episode): num_id = ep_id
                    if norm_title and norm_title in scraper_utils.normalize_title(ep_title): title_id = ep_id

            best_id = ''
            if not scraper_utils.force_title(video):
                if num_id: best_id = num_id
                if kodi.get_setting('airdate-fallback') == 'true' and airdate_id: best_id = airdate_id
                if kodi.get_setting('title-fallback') == 'true' and title_id: best_id = title_id
            else:
                if title_id: best_id = title_id
            
            if best_id:
                return EP_URL % (best_id)
コード例 #4
0
ファイル: iflix_proxy.py プロジェクト: SQL-MisterMagoo/salts
 def __update_scraper_py(self):
     try:
         py_path = os.path.join(kodi.get_path(), 'scrapers', 'iflix_scraper.py')
         self.exists = os.path.exists(py_path)
         scraper_url = kodi.get_setting('%s-scraper_url' % (self.get_name()))
         scraper_password = kodi.get_setting('%s-scraper_password' % (self.get_name()))
         if scraper_url and scraper_password and (not self.exists or os.path.getmtime(py_path) < time.time() - (24 * 60 * 60)):
             try:
                 req = urllib2.urlopen(scraper_url)
                 cipher_text = req.read()
             except Exception as e:
                 log_utils.log('Failure during %s scraper get: %s' % (self.get_name(), e), log_utils.LOGWARNING)
                 return
             
             if cipher_text:
                 scraper_key = hashlib.sha256(scraper_password).digest()
                 decrypter = pyaes.Decrypter(pyaes.AESModeOfOperationCBC(scraper_key, IV))
                 new_py = decrypter.feed(cipher_text)
                 new_py += decrypter.feed()
                 
                 old_py = ''
                 if os.path.exists(py_path):
                     with open(py_path, 'r') as f:
                         old_py = f.read()
                 
                 log_utils.log('%s path: %s, new_py: %s, match: %s' % (self.get_name(), py_path, bool(new_py), new_py == old_py), log_utils.LOGDEBUG)
                 if old_py != new_py:
                     with open(py_path, 'w') as f:
                         f.write(new_py)
     except Exception as e:
         log_utils.log('Failure during %s scraper update: %s' % (self.get_name(), e), log_utils.LOGWARNING)
     finally:
         self.exists = os.path.exists(py_path)
コード例 #5
0
 def __init__(self, timeout=scraper.DEFAULT_TIMEOUT):
     self.timeout = timeout
     self.base_url = kodi.get_setting('%s-base_url' % (self.get_name()))
     self.username = kodi.get_setting('%s-username' % (self.get_name()))
     self.password = kodi.get_setting('%s-password' % (self.get_name()))
     self.max_results = int(
         kodi.get_setting('%s-result_limit' % (self.get_name())))
コード例 #6
0
 def __update_scraper_py(self):
     try:
         py_path = os.path.join(kodi.get_path(), 'scrapers', 'shush_scraper.py')
         exists = os.path.exists(py_path)
         scraper_url = kodi.get_setting('%s-scraper_url' % (self.get_name()))
         scraper_password = kodi.get_setting('%s-scraper_password' % (self.get_name()))
         if scraper_url and scraper_password and (not exists or os.path.getmtime(py_path) < time.time() - (4 * 60 * 60)):
             try:
                 req = urllib2.urlopen(scraper_url)
                 cipher_text = req.read()
             except Exception as e:
                 log_utils.log('Failure during %s scraper get: %s' % (self.get_name(), e), log_utils.LOGWARNING)
                 return
             
             if cipher_text:
                 scraper_key = hashlib.sha256(scraper_password).digest()
                 decrypter = pyaes.Decrypter(pyaes.AESModeOfOperationCBC(scraper_key, IV))
                 new_py = decrypter.feed(cipher_text)
                 new_py += decrypter.feed()
                 
                 old_py = ''
                 if os.path.exists(py_path):
                     with open(py_path, 'r') as f:
                         old_py = f.read()
                 
                 log_utils.log('%s path: %s, new_py: %s, match: %s' % (self.get_name(), py_path, bool(new_py), new_py == old_py), log_utils.LOGDEBUG)
                 if old_py != new_py:
                     with open(py_path, 'w') as f:
                         f.write(new_py)
     except Exception as e:
         log_utils.log('Failure during %s scraper update: %s' % (self.get_name(), e), log_utils.LOGWARNING)
コード例 #7
0
def update_all_scrapers():
    try:
        last_check = int(kodi.get_setting('last_list_check'))
    except:
        last_check = 0
    now = int(time.time())
    list_url = kodi.get_setting('scraper_url')
    scraper_password = kodi.get_setting('scraper_password')
    list_path = os.path.join(kodi.translate_path(kodi.get_profile()),
                             'scraper_list.txt')
    exists = os.path.exists(list_path)
    if list_url and scraper_password and (not exists or last_check <
                                          (now - (24 * 60 * 60))):
        scraper_list = utils2.get_and_decrypt(list_url, scraper_password)
        if scraper_list:
            try:
                with open(list_path, 'w') as f:
                    f.write(scraper_list)

                kodi.set_setting('last_list_check', str(now))
                kodi.set_setting(
                    'scraper_last_update',
                    time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(now)))
                for line in scraper_list.split('\n'):
                    line = line.replace(' ', '')
                    if line:
                        scraper_url, filename = line.split(',')
                        if scraper_url.startswith('http'):
                            update_scraper(filename, scraper_url)
            except Exception as e:
                log_utils.log('Exception during scraper update: %s' % (e),
                              log_utils.LOGWARNING)
コード例 #8
0
ファイル: scraper.py プロジェクト: sfennell/salts
    def _default_get_episode_url(self, show_url, video, episode_pattern, title_pattern='', airdate_pattern='', data=None, headers=None):
        log_utils.log('Default Episode Url: |%s|%s|%s|%s|' % (self.base_url, show_url, str(video).decode('utf-8', 'replace'), data), log_utils.LOGDEBUG)
        url = urlparse.urljoin(self.base_url, show_url)
        html = self._http_get(url, data=data, headers=headers, cache_limit=2)
        if html:
            force_title = self._force_title(video)

            if not force_title:
                match = re.search(episode_pattern, html, re.DOTALL)
                if match:
                    return self._pathify_url(match.group(1))

                if kodi.get_setting('airdate-fallback') == 'true' and airdate_pattern and video.ep_airdate:
                    airdate_pattern = airdate_pattern.replace('{year}', str(video.ep_airdate.year))
                    airdate_pattern = airdate_pattern.replace('{month}', str(video.ep_airdate.month))
                    airdate_pattern = airdate_pattern.replace('{p_month}', '%02d' % (video.ep_airdate.month))
                    airdate_pattern = airdate_pattern.replace('{month_name}', MONTHS[video.ep_airdate.month - 1])
                    airdate_pattern = airdate_pattern.replace('{short_month}', SHORT_MONS[video.ep_airdate.month - 1])
                    airdate_pattern = airdate_pattern.replace('{day}', str(video.ep_airdate.day))
                    airdate_pattern = airdate_pattern.replace('{p_day}', '%02d' % (video.ep_airdate.day))
                    log_utils.log('Air Date Pattern: %s' % (airdate_pattern), log_utils.LOGDEBUG)

                    match = re.search(airdate_pattern, html, re.DOTALL | re.I)
                    if match:
                        return self._pathify_url(match.group(1))
            else:
                log_utils.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG)

            if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title and title_pattern:
                norm_title = self._normalize_title(video.ep_title)
                for match in re.finditer(title_pattern, html, re.DOTALL | re.I):
                    episode = match.groupdict()
                    if norm_title == self._normalize_title(episode['title']):
                        return self._pathify_url(episode['url'])
コード例 #9
0
ファイル: noobroom_scraper.py プロジェクト: henry73/salts
 def __init__(self, timeout=scraper.DEFAULT_TIMEOUT):
     self.timeout = timeout
     self.base_url = kodi.get_setting('%s-base_url' % (self.get_name()))
     self.username = kodi.get_setting('%s-username' % (self.get_name()))
     self.password = kodi.get_setting('%s-password' % (self.get_name()))
     self.include_paid = kodi.get_setting('%s-include_premium' %
                                          (self.get_name())) == 'true'
コード例 #10
0
    def _default_get_episode_url(self, show_url, video, episode_pattern, title_pattern='', airdate_pattern='', data=None, headers=None):
        log_utils.log('Default Episode Url: |%s|%s|%s|%s|' % (self.base_url, show_url, str(video).decode('utf-8', 'replace'), data), log_utils.LOGDEBUG)
        url = urlparse.urljoin(self.base_url, show_url)
        html = self._http_get(url, data=data, headers=headers, cache_limit=2)
        if html:
            force_title = self._force_title(video)

            if not force_title:
                match = re.search(episode_pattern, html, re.DOTALL)
                if match:
                    return self._pathify_url(match.group(1))

                if kodi.get_setting('airdate-fallback') == 'true' and airdate_pattern and video.ep_airdate:
                    airdate_pattern = airdate_pattern.replace('{year}', str(video.ep_airdate.year))
                    airdate_pattern = airdate_pattern.replace('{month}', str(video.ep_airdate.month))
                    airdate_pattern = airdate_pattern.replace('{p_month}', '%02d' % (video.ep_airdate.month))
                    airdate_pattern = airdate_pattern.replace('{month_name}', MONTHS[video.ep_airdate.month - 1])
                    airdate_pattern = airdate_pattern.replace('{short_month}', SHORT_MONS[video.ep_airdate.month - 1])
                    airdate_pattern = airdate_pattern.replace('{day}', str(video.ep_airdate.day))
                    airdate_pattern = airdate_pattern.replace('{p_day}', '%02d' % (video.ep_airdate.day))
                    log_utils.log('Air Date Pattern: %s' % (airdate_pattern), log_utils.LOGDEBUG)

                    match = re.search(airdate_pattern, html, re.DOTALL | re.I)
                    if match:
                        return self._pathify_url(match.group(1))
            else:
                log_utils.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG)

            if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title and title_pattern:
                norm_title = self._normalize_title(video.ep_title)
                for match in re.finditer(title_pattern, html, re.DOTALL | re.I):
                    url, title = match.groups()
                    if norm_title == self._normalize_title(title):
                        return self._pathify_url(url)
コード例 #11
0
 def _set_cookies(self, base_url, cookies):
     cookie_file = os.path.join(COOKIEPATH, "%s_cookies.lwp" % (self.get_name()))
     cj = cookielib.LWPCookieJar(cookie_file)
     try:
         cj.load(ignore_discard=True)
     except:
         pass
     if kodi.get_setting("cookie_debug") == "true":
         log_utils.log("Before Cookies: %s - %s" % (self, scraper_utils.cookies_as_str(cj)), log_utils.LOGDEBUG)
     domain = urlparse.urlsplit(base_url).hostname
     for key in cookies:
         c = cookielib.Cookie(
             0,
             key,
             str(cookies[key]),
             port=None,
             port_specified=False,
             domain=domain,
             domain_specified=True,
             domain_initial_dot=False,
             path="/",
             path_specified=True,
             secure=False,
             expires=None,
             discard=False,
             comment=None,
             comment_url=None,
             rest={},
         )
         cj.set_cookie(c)
     cj.save(ignore_discard=True)
     if kodi.get_setting("cookie_debug") == "true":
         log_utils.log("After Cookies: %s - %s" % (self, scraper_utils.cookies_as_str(cj)), log_utils.LOGDEBUG)
     return cj
コード例 #12
0
def update_all_scrapers():
        try: last_check = int(kodi.get_setting('last_list_check'))
        except: last_check = 0
        now = int(time.time())
        list_url = kodi.get_setting('scraper_url')
        scraper_password = kodi.get_setting('scraper_password')
        list_path = os.path.join(kodi.translate_path(kodi.get_profile()), 'scraper_list.txt')
        exists = os.path.exists(list_path)
        if list_url and scraper_password and (not exists or last_check < (now - (24 * 60 * 60))):
            scraper_list = utils2.get_and_decrypt(list_url, scraper_password)
            if scraper_list:
                try:
                    with open(list_path, 'w') as f:
                        f.write(scraper_list)
    
                    kodi.set_setting('last_list_check', str(now))
                    kodi.set_setting('scraper_last_update', time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(now)))
                    for line in scraper_list.split('\n'):
                        line = line.replace(' ', '')
                        if line:
                            scraper_url, filename = line.split(',')
                            if scraper_url.startswith('http'):
                                update_scraper(filename, scraper_url)
                except Exception as e:
                    log_utils.log('Exception during scraper update: %s' % (e), log_utils.LOGWARNING)
コード例 #13
0
    def _get_episode_url(self, show_url, video):
        url = urlparse.urljoin(self.base_url, show_url)
        html = self._http_get(url, cache_limit=2)
        if html:
            force_title = scraper_utils.force_title(video)
            episodes = dom_parser.parse_dom(html, 'div', {'class': '\s*el-item\s*'})
            if not force_title:
                episode_pattern = 'href="([^"]*-[sS]%02d[eE]%02d(?!\d)[^"]*)' % (int(video.season), int(video.episode))
                match = re.search(episode_pattern, html)
                if match:
                    return scraper_utils.pathify_url(match.group(1))
                
                if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate:
                    airdate_pattern = '%02d-%02d-%d' % (video.ep_airdate.day, video.ep_airdate.month, video.ep_airdate.year)
                    for episode in episodes:
                        ep_url = dom_parser.parse_dom(episode, 'a', ret='href')
                        ep_airdate = dom_parser.parse_dom(episode, 'div', {'class': 'date'})
                        if ep_url and ep_airdate:
                            ep_airdate = ep_airdate[0].strip()
                            if airdate_pattern == ep_airdate:
                                return scraper_utils.pathify_url(ep_url[0])

            if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title:
                norm_title = scraper_utils.normalize_title(video.ep_title)
                for episode in episodes:
                    ep_url = dom_parser.parse_dom(episode, 'a', ret='href')
                    ep_title = dom_parser.parse_dom(episode, 'div', {'class': 'e-name'})
                    if ep_url and ep_title and norm_title == scraper_utils.normalize_title(ep_title[0]):
                        return scraper_utils.pathify_url(ep_url[0])
コード例 #14
0
 def __init__(self, timeout=scraper.DEFAULT_TIMEOUT):
     self.timeout = timeout
     self.base_url = kodi.get_setting('%s-base_url' % (self.get_name()))
     self.username = kodi.get_setting('%s-username' % (self.get_name()))
     self.password = kodi.get_setting('%s-password' % (self.get_name()))
     self.max_results = int(kodi.get_setting('%s-result_limit' % (self.get_name())))
     self.max_gb = kodi.get_setting('%s-size_limit' % (self.get_name()))
     self.max_bytes = int(self.max_gb) * 1024 * 1024 * 1024
コード例 #15
0
 def __init__(self, timeout=scraper.DEFAULT_TIMEOUT):
     self.timeout = timeout
     self.base_url = kodi.get_setting('%s-base_url' % (self.get_name()))
     self.username = kodi.get_setting('%s-username' % (self.get_name()))
     self.password = kodi.get_setting('%s-password' % (self.get_name()))
     self.cookie = {
         'chickenlicker': '%s%%3A%s' % (self.username, self.password)
     }
コード例 #16
0
ファイル: gvcenter_scraper.py プロジェクト: s7eele/salts
 def __init__(self, timeout=scraper.DEFAULT_TIMEOUT):
     self.timeout = timeout
     self.base_url = kodi.get_setting('%s-base_url' % (self.get_name()))
     self.last_call = 0
     device_id = kodi.get_setting('%s-device_id' % (self.get_name()))
     if device_id not in ['', '0']:
         self.device_id = device_id
     else:
         self.device_id = None
コード例 #17
0
ファイル: directdl_scraper.py プロジェクト: bialagary/mw
 def __init__(self, timeout=scraper.DEFAULT_TIMEOUT):
     self.timeout = timeout
     self.base_url = kodi.get_setting('%s-base_url' % (self.get_name()))
     qual_filter = 5 - int(
         kodi.get_setting('%s_quality' % VIDEO_TYPES.EPISODE))
     self.q_order = [
         dd_qual for dd_qual in DD_QUALITIES
         if Q_ORDER[QUALITY_MAP[dd_qual]] <= qual_filter
     ]
コード例 #18
0
    def _get_episode_url(self, show_url, video):
        log_utils.log(
            'WS Episode Url: |%s|%s|' %
            (show_url, str(video).decode('utf-8', 'replace')),
            log_utils.LOGDEBUG)
        html = self._http_get(show_url, cache_limit=2)
        js_result = scraper_utils.parse_json(html, show_url)
        if 'results' in js_result and '0' in js_result[
                'results'] and 'episodes' in js_result['results']['0']:
            seasons = js_result['results']['0']['episodes']
            force_title = scraper_utils.force_title(video)
            if not force_title:
                if str(video.season) in seasons:
                    season = seasons[str(video.season)]
                    if isinstance(season, list):
                        season = dict((ep['episode'], ep) for ep in season)

                    if str(video.episode) in season:
                        url = season[str(video.episode)]['url']
                        return scraper_utils.pathify_url(
                            url.replace('/json', ''))

                if kodi.get_setting(
                        'airdate-fallback') == 'true' and video.ep_airdate:
                    airdate_pattern = video.ep_airdate.strftime('%d/%M/%Y')
                    for season in seasons:
                        if season.lower() == 'epcount': continue
                        episodes = seasons[season]
                        if isinstance(episodes, dict):
                            episodes = [episodes[key] for key in episodes]
                        for episode in episodes:
                            if airdate_pattern == episode['release']:
                                url = episode['url']
                                return scraper_utils.pathify_url(
                                    url.replace('/json', ''))
            else:
                log_utils.log(
                    'Skipping S&E matching as title search is forced on: %s' %
                    (video.trakt_id), log_utils.LOGDEBUG)

            if (force_title or kodi.get_setting('title-fallback')
                    == 'true') and video.ep_title:
                norm_title = scraper_utils.normalize_title(video.ep_title)
                for season in seasons:
                    if season.lower() == 'epcount': continue
                    episodes = seasons[season]
                    if isinstance(episodes, dict):
                        episodes = [episodes[key] for key in episodes]
                    for episode in episodes:
                        if episode[
                                'name'] is not None and norm_title == scraper_utils.normalize_title(
                                    episode['name']):
                            url = episode['url']
                            return scraper_utils.pathify_url(
                                url.replace('/json', ''))
コード例 #19
0
 def __init__(self, timeout=scraper.DEFAULT_TIMEOUT):
     self.timeout = timeout
     if kodi.get_setting('%s-use_https' % (self.__class__.base_name)) == 'true':
         scheme = 'https'
         prefix = 'www'
     else:
         scheme = 'http'
         prefix = 'http'
     base_url = kodi.get_setting('%s-base_url' % (self.__class__.base_name))
     self.base_url = scheme + '://' + prefix + '.' + base_url
     self.username = kodi.get_setting('%s-username' % (self.__class__.base_name))
     self.password = kodi.get_setting('%s-password' % (self.__class__.base_name))
コード例 #20
0
def get_ua():
    try: last_gen = int(kodi.get_setting('last_ua_create'))
    except: last_gen = 0
    if not kodi.get_setting('current_ua') or last_gen < (time.time() - (7 * 24 * 60 * 60)):
        index = random.randrange(len(RAND_UAS))
        user_agent = RAND_UAS[index].format(win_ver=random.choice(WIN_VERS), feature=random.choice(FEATURES), br_ver=random.choice(BR_VERS[index]))
        log_utils.log('Creating New User Agent: %s' % (user_agent), log_utils.LOGDEBUG)
        kodi.set_setting('current_ua', user_agent)
        kodi.set_setting('last_ua_create', str(int(time.time())))
    else:
        user_agent = kodi.get_setting('current_ua')
    return user_agent
コード例 #21
0
 def __init__(self, timeout=scraper.DEFAULT_TIMEOUT):
     self.timeout = timeout
     if kodi.get_setting('%s-use_https' %
                         (self.__class__.base_name)) == 'true':
         scheme = 'https'
         prefix = 'www'
     else:
         scheme = 'http'
         prefix = 'http'
     base_url = kodi.get_setting('%s-base_url' % (self.__class__.base_name))
     self.base_url = scheme + '://' + prefix + '.' + base_url
     self.username = kodi.get_setting('%s-username' %
                                      (self.__class__.base_name))
     self.password = kodi.get_setting('%s-password' %
                                      (self.__class__.base_name))
コード例 #22
0
    def _get_episode_url(self, show_url, video):
        params = urlparse.parse_qs(show_url)
        cmd = '{"jsonrpc": "2.0", "method": "VideoLibrary.GetEpisodes", "params": {"tvshowid": %s, "season": %s, "filter": {"field": "%s", "operator": "is", "value": "%s"}, \
        "limits": { "start" : 0, "end": 25 }, "properties" : ["title", "season", "episode", "file", "streamdetails"], "sort": { "order": "ascending", "method": "label", "ignorearticle": true }}, "id": "libTvShows"}'
        base_url = 'video_type=%s&id=%s'
        episodes = []
        force_title = scraper_utils.force_title(video)
        if not force_title:
            run = cmd % (params['id'][0], video.season, 'episode', video.episode)
            meta = xbmc.executeJSONRPC(run)
            meta = scraper_utils.parse_json(meta)
            log_utils.log('Episode Meta: %s' % (meta), log_utils.LOGDEBUG)
            if 'result' in meta and 'episodes' in meta['result']:
                episodes = meta['result']['episodes']
        else:
            log_utils.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG)

        if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title and not episodes:
            run = cmd % (params['id'][0], video.season, 'title', video.ep_title)
            meta = xbmc.executeJSONRPC(run)
            meta = scraper_utils.parse_json(meta)
            log_utils.log('Episode Title Meta: %s' % (meta), log_utils.LOGDEBUG)
            if 'result' in meta and 'episodes' in meta['result']:
                episodes = meta['result']['episodes']

        for episode in episodes:
            if episode['file'].endswith('.strm'):
                continue
            
            return base_url % (video.video_type, episode['episodeid'])
コード例 #23
0
    def __match_episode(self, video, norm_title, title, hash_id=None):
        sxe_patterns = [
            '(.*?)[._ -]s([0-9]+)[._ -]*e([0-9]+)',
            '(.*?)[._ -]([0-9]+)x([0-9]+)', '(.*?)[._ -]([0-9]+)([0-9][0-9])',
            '(.*?)[._ -]?season[._ -]*([0-9]+)[._ -]*-?[._ -]*episode[._ -]*([0-9]+)',
            '(.*?)[._ -]\[s([0-9]+)\][._ -]*\[e([0-9]+)\]',
            '(.*?)[._ -]s([0-9]+)[._ -]*ep([0-9]+)'
        ]

        show_title = ''
        for pattern in sxe_patterns:
            match = re.search(pattern, title, re.I)
            if match:
                temp_title, season, episode = match.groups()
                if int(season) == int(video.season) and int(episode) == int(
                        video.episode):
                    show_title = temp_title
                    break
        else:
            airdate_fallback = kodi.get_setting(
                'airdate-fallback') == 'true' and video.ep_airdate
            if video.ep_airdate and airdate_fallback:
                airdate_pattern = '(.*?)[. _]%s[. _]%02d[. _]%02d[. _]' % (
                    video.ep_airdate.year, video.ep_airdate.month,
                    video.ep_airdate.day)
                match = re.search(airdate_pattern, title)
                if match:
                    show_title = match.group(1)

        if show_title and norm_title in scraper_utils.normalize_title(
                show_title):
            return 'hash=%s' % (hash_id)
コード例 #24
0
    def _get_episode_url(self, video):
        url = urlparse.urljoin(self.base_url, '/torrent/list')
        js_data = self._http_get(url, cache_limit=0)
        norm_title = scraper_utils.normalize_title(video.title)
        if 'torrents' in js_data:
            airdate_fallback = kodi.get_setting(
                'airdate-fallback') == 'true' and video.ep_airdate
            show_title = ''
            if not scraper_utils.force_title(video):
                for item in js_data['torrents']:
                    sxe_pattern = '(.*?)[. ][Ss]%02d[Ee]%02d[. ]' % (int(
                        video.season), int(video.episode))
                    match = re.search(sxe_pattern, item['name'])
                    if match:
                        show_title = match.group(1)
                    elif airdate_fallback:
                        airdate_pattern = '(.*?)[. ]%s[. ]%02d[. ]%02d[. ]' % (
                            video.ep_airdate.year, video.ep_airdate.month,
                            video.ep_airdate.day)
                        match = re.search(airdate_pattern, item['name'])
                        if match:
                            show_title = match.group(1)

                    if show_title and norm_title in scraper_utils.normalize_title(
                            show_title):
                        return 'hash=%s' % (item['hash'])
コード例 #25
0
ファイル: ororotv_scraper.py プロジェクト: kevintone/tdbaddon
    def search(self, video_type, title, year, season=''):
        url = urlparse.urljoin(self.base_url, 'http://ororo.tv/en')
        if video_type == VIDEO_TYPES.MOVIE:
            url += '/movies'
        html = self._http_get(url, cache_limit=.25)
        results = []
        norm_title = scraper_utils.normalize_title(title)
        include_paid = kodi.get_setting('%s-include_premium' %
                                        (self.get_name())) == 'true'
        for match in re.finditer(
                '''<span class='value'>(\d{4})(.*?)href="([^"]+)[^>]+>([^<]+)''',
                html, re.DOTALL):
            match_year, middle, url, match_title = match.groups()
            if not include_paid and video_type == VIDEO_TYPES.MOVIE and 'paid accounts' in middle:
                continue

            if norm_title in scraper_utils.normalize_title(match_title) and (
                    not year or not match_year or year == match_year):
                result = {
                    'url': scraper_utils.pathify_url(url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results
コード例 #26
0
 def __match_episode(self, video, norm_title, title, hash_id=None):
     sxe_patterns = [
         '(.*?)[._ -]s([0-9]+)[._ -]*e([0-9]+)',
         '(.*?)[._ -]([0-9]+)x([0-9]+)',
         '(.*?)[._ -]([0-9]+)([0-9][0-9])',
         '(.*?)[._ -]?season[._ -]*([0-9]+)[._ -]*-?[._ -]*episode[._ -]*([0-9]+)',
         '(.*?)[._ -]\[s([0-9]+)\][._ -]*\[e([0-9]+)\]',
         '(.*?)[._ -]s([0-9]+)[._ -]*ep([0-9]+)']
     
     show_title = ''
     for pattern in sxe_patterns:
         match = re.search(pattern, title, re.I)
         if match:
             temp_title, season, episode = match.groups()
             if int(season) == int(video.season) and int(episode) == int(video.episode):
                 show_title = temp_title
                 break
     else:
         airdate_fallback = kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate
         if video.ep_airdate and airdate_fallback:
             airdate_pattern = '(.*?)[. _]%s[. _]%02d[. _]%02d[. _]' % (video.ep_airdate.year, video.ep_airdate.month, video.ep_airdate.day)
             match = re.search(airdate_pattern, title)
             if match:
                 show_title = match.group(1)
     
     if show_title and norm_title in scraper_utils.normalize_title(show_title):
         return 'hash=%s' % (hash_id)
コード例 #27
0
ファイル: movietv_scraper.py プロジェクト: bialagary/mw
 def _get_episode_url(self, show_url, video):
     url = urlparse.urljoin(self.base_url, show_url)
     html = self._http_get(url, cache_limit=1)
     match = re.search("var\s+id\s*=\s*'?(\d+)'?", html)
     if match:
         show_id = match.group(1)
         season_url = SEASON_URL % (show_id, video.season, str(int(time.time()) * 1000), self.__get_token())
         season_url = urlparse.urljoin(self.base_url, season_url)
         html = self._http_get(season_url, cache_limit=1)
         try:
             js_data = json.loads(html)
         except ValueError:
             log_utils.log('Invalid JSON returned: %s: %s' % (url, html), log_utils.LOGWARNING)
         else:
             force_title = self._force_title(video)
             if not force_title:
                 for episode in js_data:
                         if int(episode['episode_number']) == int(video.episode):
                             return LINK_URL % (show_id, video.season, episode['episode_number'])
             
             if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title:
                 norm_title = self._normalize_title(video.ep_title)
                 for episode in js_data:
                     if norm_title == self._normalize_title(episode['title']):
                         return LINK_URL % (show_id, video.season, episode['episode_number'])
コード例 #28
0
 def __check_config(self, now):
     last_config_call = now - int(kodi.get_setting('%s-last-config' % (self.get_name())))
     if last_config_call > 8 * 60 * 60:
         url = urlparse.urljoin(self.base_url, CONFIG_URL)
         url += self.__get_extra(now)
         _html = super(GVCenter_Scraper, self)._cached_http_get(url, self.base_url, self.timeout, headers=HEADERS, cache_limit=8)
         kodi.set_setting('%s-last-config' % (self.get_name()), str(int(now)))
コード例 #29
0
    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            page_url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(page_url, cache_limit=.5)
            page_urls = [page_url]
            if kodi.get_setting('scraper_url'):
                page_urls += self.__get_page_urls(html)
            
            for page_url in page_urls:
                html = self._http_get(page_url, cache_limit=.5)
                subs = 'Turkish Subtitles'
                fragment = dom_parser.parse_dom(html, 'li', {'class': 'active'})
                if fragment:
                    frag_class = dom_parser.parse_dom(fragment[0], 'span', ret='class')
                    if frag_class:
                        if frag_class[0] == 'icon-en':
                            subs = 'English Subtitles'
                        elif frag_class[0] == 'icon-orj':
                            subs = ''
                            
                hosters += self.__get_cloud_links(html, page_url, subs)
                hosters += self.__get_embedded_links(html, subs)
                hosters += self.__get_iframe_links(html, subs)

        return hosters
コード例 #30
0
 def _get_episode_url(self, show_url, video):
     params = urlparse.parse_qs(show_url)
     catalog_id = params['catalog_id'][0]
     sid = hashlib.md5('content%scthd' % (catalog_id)).hexdigest()
     source_url = CONTENT_URL % (catalog_id, sid)
     url = urlparse.urljoin(self.base_url, source_url)
     html = self._http_get(url, cache_limit=.5)
     try:
         js_data = json.loads(html)
     except ValueError:
         log_utils.log('Invalid JSON returned for: %s' % (url), xbmc.LOGWARNING)
     else:
         force_title = self._force_title(video)
         if not force_title:
             for episode in js_data['listvideos']:
                 if ' S%02dE%02d ' % (int(video.season), int(video.episode)) in episode['film_name']:
                     return EPISODE_URL % (video.video_type, params['catalog_id'][0], video.season, video.episode)
         
         if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title:
             norm_title = self._normalize_title(video.ep_title)
             for episode in js_data['listvideos']:
                 match = re.search('-\s*S(\d+)E(\d+)\s*-\s*(.*)', episode['film_name'])
                 if match:
                     season, episode, title = match.groups()
                     if title and norm_title == self._normalize_title(title):
                         return EPISODE_URL % (video.video_type, params['catalog_id'][0], int(season), int(episode))
コード例 #31
0
def update_scraper(filename, scraper_url):
    try:
        if not filename: return
        py_path = os.path.join(kodi.get_path(), 'scrapers', filename)
        exists = os.path.exists(py_path)
        scraper_password = kodi.get_setting('scraper_password')
        if scraper_url and scraper_password:
            new_py = utils2.get_and_decrypt(scraper_url, scraper_password)
            if new_py:
                if exists:
                    with open(py_path, 'r') as f:
                        old_py = f.read()
                else:
                    old_py = ''

                log_utils.log(
                    '%s path: %s, new_py: %s, match: %s' %
                    (filename, py_path, bool(new_py), new_py == old_py),
                    log_utils.LOGDEBUG)
                if old_py != new_py:
                    with open(py_path, 'w') as f:
                        f.write(new_py)

    except Exception as e:
        log_utils.log('Failure during %s scraper update: %s' % (filename, e),
                      log_utils.LOGWARNING)
コード例 #32
0
ファイル: scraper.py プロジェクト: bialagary/mw
    def _blog_get_url(self, video, delim='.'):
        url = None
        self.create_db_connection()
        result = self.db_connection.get_related_url(video.video_type,
                                                    video.title, video.year,
                                                    self.get_name(),
                                                    video.season,
                                                    video.episode)
        if result:
            url = result[0][0]
            log_utils.log('Got local related url: |%s|%s|%s|%s|%s|' %
                          (video.video_type, video.title, video.year,
                           self.get_name(), url))
        else:
            select = int(kodi.get_setting('%s-select' % (self.get_name())))
            if video.video_type == VIDEO_TYPES.EPISODE:
                temp_title = re.sub('[^A-Za-z0-9 ]', '', video.title)
                if not self._force_title(video):
                    search_title = '%s S%02dE%02d' % (
                        temp_title, int(video.season), int(video.episode))
                    if isinstance(video.ep_airdate, datetime.date):
                        fallback_search = '%s %s' % (
                            temp_title,
                            video.ep_airdate.strftime(
                                '%Y{0}%m{0}%d'.format(delim)))
                    else:
                        fallback_search = ''
                else:
                    if not video.ep_title: return None
                    search_title = '%s %s' % (temp_title, video.ep_title)
                    fallback_search = ''
            else:
                search_title = '%s %s' % (video.title, video.year)
                fallback_search = ''

            results = self.search(video.video_type, search_title, video.year)
            if not results and fallback_search:
                results = self.search(video.video_type, fallback_search,
                                      video.year)
            if results:
                # TODO: First result isn't always the most recent...
                best_result = results[0]
                if select != 0:
                    best_qorder = 0
                    for result in results:
                        match = re.search('\[(.*)\]$', result['title'])
                        if match:
                            q_str = match.group(1)
                            quality = self._blog_get_quality(video, q_str, '')
                            # print 'result: |%s|%s|%s|%s|' % (result, q_str, quality, Q_ORDER[quality])
                            if Q_ORDER[quality] > best_qorder:
                                # print 'Setting best as: |%s|%s|%s|%s|' % (result, q_str, quality, Q_ORDER[quality])
                                best_result = result
                                best_qorder = Q_ORDER[quality]

                url = best_result['url']
                self.db_connection.set_related_url(video.video_type,
                                                   video.title, video.year,
                                                   self.get_name(), url)
        return url
コード例 #33
0
 def _get_episode_url(self, show_url, video):
     sxe = '.S%02dE%02d.' % (int(video.season), int(video.episode))
     force_title = scraper_utils.force_title(video)
     title_fallback = kodi.get_setting('title-fallback') == 'true'
     norm_title = scraper_utils.normalize_title(video.ep_title)
     try: ep_airdate = video.ep_airdate.strftime('.%Y.%m.%d.')
     except: ep_airdate = ''
     
     page_url = [show_url]
     too_old = False
     while page_url and not too_old:
         url = urlparse.urljoin(self.base_url, page_url[0])
         html = self._http_get(url, require_debrid=True, cache_limit=1)
         headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html)
         posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'})
         for heading, post in zip(headings, posts):
             if self.__too_old(post):
                 too_old = True
                 break
             if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
                 url, title = heading
                 if not force_title:
                     if (sxe in title) or (ep_airdate and ep_airdate in title):
                         return scraper_utils.pathify_url(url)
                 else:
                     if title_fallback and norm_title:
                         match = re.search('<strong>(.*?)</strong>', post)
                         if match and norm_title == scraper_utils.normalize_title(match.group(1)):
                             return scraper_utils.pathify_url(url)
             
         page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')
コード例 #34
0
ファイル: movietv_scraper.py プロジェクト: lidormalicb/salts
    def _get_episode_url(self, show_url, video):
        url = urlparse.urljoin(self.base_url, show_url)
        html = self._http_get(url, cache_limit=1)
        match = re.search("var\s+id\s*=\s*'?(\d+)'?", html)
        if match:
            show_id = match.group(1)
            season_url = SEASON_URL % (show_id, video.season, str(int(time.time()) * 1000))
            season_url = urlparse.urljoin(self.base_url, season_url)
            html = self._http_get(season_url, cache_limit=1)
            try:
                js_data = json.loads(html)
            except ValueError:
                log_utils.log("Invalid JSON returned: %s: %s" % (url, html), log_utils.LOGWARNING)
            else:
                force_title = self._force_title(video)
                if not force_title:
                    for episode in js_data:
                        if int(episode["episode_number"]) == int(video.episode):
                            return LINK_URL % (show_id, video.season, episode["episode_number"])

                if (force_title or kodi.get_setting("title-fallback") == "true") and video.ep_title:
                    norm_title = self._normalize_title(video.ep_title)
                    for episode in js_data:
                        if norm_title == self._normalize_title(episode["title"]):
                            return LINK_URL % (show_id, video.season, episode["episode_number"])
コード例 #35
0
 def _get_episode_url(self, show_url, video):
     sxe = '(\.|_| )S%02dE%02d(\.|_| )' % (int(video.season), int(video.episode))
     force_title = scraper_utils.force_title(video)
     title_fallback = kodi.get_setting('title-fallback') == 'true'
     norm_title = scraper_utils.normalize_title(video.ep_title)
     try: airdate_pattern = video.ep_airdate.strftime('(\.|_| )%Y(\.|_| )%m(\.|_| )%d(\.|_| )')
     except: airdate_pattern = ''
     
     page_url = [show_url]
     too_old = False
     while page_url and not too_old:
         url = urlparse.urljoin(self.base_url, page_url[0])
         html = self._http_get(url, cache_limit=1)
         headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html)
         posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'})
         for heading, post in zip(headings, posts):
             if self.__too_old(post):
                 too_old = True
                 break
             if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
                 url, title = heading
                 if not force_title:
                     if re.search(sxe, title) or (airdate_pattern and re.search(airdate_pattern, title)):
                         return scraper_utils.pathify_url(url)
                 else:
                     if title_fallback and norm_title:
                         match = re.search('</strong>(.*?)</p>', post)
                         if match and norm_title == scraper_utils.normalize_title(match.group(1)):
                             return scraper_utils.pathify_url(url)
             
         page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')
コード例 #36
0
 def _get_episode_url(self, show_url, video):
     sxe = '(\.|_| )S%02dE%02d(\.|_| )' % (int(video.season), int(video.episode))
     force_title = scraper_utils.force_title(video)
     title_fallback = kodi.get_setting('title-fallback') == 'true'
     norm_title = scraper_utils.normalize_title(video.ep_title)
     try: airdate_pattern = video.ep_airdate.strftime('(\.|_| )%Y(\.|_| )%m(\.|_| )%d(\.|_| )')
     except: airdate_pattern = ''
     
     page_url = [show_url]
     too_old = False
     while page_url and not too_old:
         url = urlparse.urljoin(self.base_url, page_url[0])
         html = self._http_get(url, require_debrid=True, cache_limit=1)
         posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'})
         for post in posts:
             if self.__too_old(post):
                 too_old = True
                 break
             if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
                 match = re.search('<a\s+href="([^"]+)[^>]+>(.*?)</a>', post)
                 if match:
                     url, title = match.groups()
                     if not force_title:
                         if re.search(sxe, title) or (airdate_pattern and re.search(airdate_pattern, title)):
                             return scraper_utils.pathify_url(url)
                     else:
                         if title_fallback and norm_title:
                             match = re.search('</strong>(.*?)</p>', post)
                             if match and norm_title == scraper_utils.normalize_title(match.group(1)):
                                 return scraper_utils.pathify_url(url)
             
         page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')
コード例 #37
0
ファイル: rlsbb_scraper.py プロジェクト: henry73/salts
    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = {}
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)
            sources.update(self.__get_post_links(html, video))

            if kodi.get_setting('%s-include_comments' %
                                (self.get_name())) == 'true':
                for comment in dom_parser.parse_dom(html, 'div',
                                                    {'id': 'commentbody-\d+'}):
                    sources.update(self.__get_comment_links(comment, video))

        for source in sources:
            if re.search(
                    '\.part\.?\d+', source
            ) or '.rar' in source or 'sample' in source or source.endswith(
                    '.nfo'):
                continue
            host = urlparse.urlparse(source).hostname
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'views': None,
                'url': source,
                'rating': None,
                'quality': sources[source],
                'direct': False
            }
            hosters.append(hoster)
        return hosters
コード例 #38
0
    def _get_episode_url(self, show_url, video):
        url = urlparse.urljoin(self.base_url, show_url)
        html = self._http_get(url, cache_limit=1)
        match = re.search("var\s+id\s*=\s*'?(\d+)'?", html)
        if match:
            show_id = match.group(1)
            season_url = SEASON_URL % (show_id, video.season,
                                       str(int(time.time()) * 1000),
                                       self.__get_token())
            season_url = urlparse.urljoin(self.base_url, season_url)
            html = self._http_get(season_url, cache_limit=1)
            js_data = self._parse_json(html, season_url)
            force_title = self._force_title(video)
            if not force_title:
                for episode in js_data:
                    if int(episode['episode_number']) == int(video.episode):
                        return LINK_URL % (show_id, video.season,
                                           episode['episode_number'])

            if (force_title or kodi.get_setting('title-fallback')
                    == 'true') and video.ep_title:
                norm_title = self._normalize_title(video.ep_title)
                for episode in js_data:
                    if norm_title == self._normalize_title(episode['title']):
                        return LINK_URL % (show_id, video.season,
                                           episode['episode_number'])
コード例 #39
0
    def _get_episode_url(self, show_url, video):
        log_utils.log('WS Episode Url: |%s|%s|' % (show_url, str(video).decode('utf-8', 'replace')), log_utils.LOGDEBUG)
        html = self._http_get(show_url, cache_limit=2)
        if html:
            try:
                js_result = json.loads(html)
            except ValueError:
                log_utils.log('Invalid JSON returned: %s: %s' % (show_url, html), log_utils.LOGWARNING)
            else:
                if 'results' in js_result and '0' in js_result['results'] and 'episodes' in js_result['results']['0']:
                    seasons = js_result['results']['0']['episodes']
                    force_title = self._force_title(video)
                    if not force_title:
                        if str(video.season) in seasons:
                            season = seasons[str(video.season)]
                            if isinstance(season, list):
                                season = dict((ep['episode'], ep) for ep in season)

                            if str(video.episode) in season:
                                url = season[str(video.episode)]['url']
                                return self._pathify_url(url.replace('/json', ''))
    
                        if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate:
                            airdate_pattern = video.ep_airdate.strftime('%d/%M/%Y')
                            for season in seasons:
                                if season.lower() == 'epcount': continue
                                episodes = seasons[season]
                                if isinstance(episodes, dict):
                                    episodes = [episodes[key] for key in episodes]
                                for episode in episodes:
                                    if airdate_pattern == episode['release']:
                                        url = episode['url']
                                        return self._pathify_url(url.replace('/json', ''))
                    else:
                        log_utils.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG)
     
                    if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title:
                        norm_title = self._normalize_title(video.ep_title)
                        for season in seasons:
                            if season.lower() == 'epcount': continue
                            episodes = seasons[season]
                            if isinstance(episodes, dict):
                                episodes = [episodes[key] for key in episodes]
                            for episode in episodes:
                                if episode['name'] is not None and norm_title == self._normalize_title(episode['name']):
                                    url = episode['url']
                                    return self._pathify_url(url.replace('/json', ''))
コード例 #40
0
    def _default_get_episode_url(
        self, show_url, video, episode_pattern, title_pattern="", airdate_pattern="", data=None, headers=None
    ):
        log_utils.log(
            "Default Episode Url: |%s|%s|%s|%s|"
            % (self.base_url, show_url, str(video).decode("utf-8", "replace"), data),
            log_utils.LOGDEBUG,
        )
        if not show_url.startswith("http"):
            url = urlparse.urljoin(self.base_url, show_url)
        else:
            url = show_url
        html = self._http_get(url, data=data, headers=headers, cache_limit=2)
        if html:
            force_title = scraper_utils.force_title(video)

            if not force_title:
                if episode_pattern:
                    match = re.search(episode_pattern, html, re.DOTALL)
                    if match:
                        return scraper_utils.pathify_url(match.group(1))

                if kodi.get_setting("airdate-fallback") == "true" and airdate_pattern and video.ep_airdate:
                    airdate_pattern = airdate_pattern.replace("{year}", str(video.ep_airdate.year))
                    airdate_pattern = airdate_pattern.replace("{month}", str(video.ep_airdate.month))
                    airdate_pattern = airdate_pattern.replace("{p_month}", "%02d" % (video.ep_airdate.month))
                    airdate_pattern = airdate_pattern.replace("{month_name}", MONTHS[video.ep_airdate.month - 1])
                    airdate_pattern = airdate_pattern.replace("{short_month}", SHORT_MONS[video.ep_airdate.month - 1])
                    airdate_pattern = airdate_pattern.replace("{day}", str(video.ep_airdate.day))
                    airdate_pattern = airdate_pattern.replace("{p_day}", "%02d" % (video.ep_airdate.day))
                    log_utils.log("Air Date Pattern: %s" % (airdate_pattern), log_utils.LOGDEBUG)

                    match = re.search(airdate_pattern, html, re.DOTALL | re.I)
                    if match:
                        return scraper_utils.pathify_url(match.group(1))
            else:
                log_utils.log(
                    "Skipping S&E matching as title search is forced on: %s" % (video.trakt_id), log_utils.LOGDEBUG
                )

            if (force_title or kodi.get_setting("title-fallback") == "true") and video.ep_title and title_pattern:
                norm_title = scraper_utils.normalize_title(video.ep_title)
                for match in re.finditer(title_pattern, html, re.DOTALL | re.I):
                    episode = match.groupdict()
                    if norm_title == scraper_utils.normalize_title(episode["title"]):
                        return scraper_utils.pathify_url(episode["url"])
コード例 #41
0
ファイル: scraper.py プロジェクト: AMOboxTV/AMOBox.LegoBuild
 def _set_cookies(self, base_url, cookies):
     cookie_file = os.path.join(COOKIEPATH, '%s_cookies.lwp' % (self.get_name()))
     cj = cookielib.LWPCookieJar(cookie_file)
     try: cj.load(ignore_discard=True)
     except: pass
     if kodi.get_setting('cookie_debug') == 'true':
         log_utils.log('Before Cookies: %s - %s' % (self, scraper_utils.cookies_as_str(cj)), log_utils.LOGDEBUG)
     domain = urlparse.urlsplit(base_url).hostname
     for key in cookies:
         c = cookielib.Cookie(0, key, str(cookies[key]), port=None, port_specified=False, domain=domain, domain_specified=True,
                              domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=False, comment=None,
                              comment_url=None, rest={})
         cj.set_cookie(c)
     cj.save(ignore_discard=True)
     if kodi.get_setting('cookie_debug') == 'true':
         log_utils.log('After Cookies: %s - %s' % (self, scraper_utils.cookies_as_str(cj)), log_utils.LOGDEBUG)
     return cj
コード例 #42
0
ファイル: watchseries_scraper.py プロジェクト: edwtjo/salts
    def _get_episode_url(self, show_url, video):
        log_utils.log("WS Episode Url: |%s|%s|" % (show_url, str(video).decode("utf-8", "replace")), log_utils.LOGDEBUG)
        html = self._http_get(show_url, cache_limit=2)
        js_result = self._parse_json(html, show_url)
        if "results" in js_result and "0" in js_result["results"] and "episodes" in js_result["results"]["0"]:
            seasons = js_result["results"]["0"]["episodes"]
            force_title = self._force_title(video)
            if not force_title:
                if str(video.season) in seasons:
                    season = seasons[str(video.season)]
                    if isinstance(season, list):
                        season = dict((ep["episode"], ep) for ep in season)

                    if str(video.episode) in season:
                        url = season[str(video.episode)]["url"]
                        return self._pathify_url(url.replace("/json", ""))

                if kodi.get_setting("airdate-fallback") == "true" and video.ep_airdate:
                    airdate_pattern = video.ep_airdate.strftime("%d/%M/%Y")
                    for season in seasons:
                        if season.lower() == "epcount":
                            continue
                        episodes = seasons[season]
                        if isinstance(episodes, dict):
                            episodes = [episodes[key] for key in episodes]
                        for episode in episodes:
                            if airdate_pattern == episode["release"]:
                                url = episode["url"]
                                return self._pathify_url(url.replace("/json", ""))
            else:
                log_utils.log(
                    "Skipping S&E matching as title search is forced on: %s" % (video.trakt_id), log_utils.LOGDEBUG
                )

            if (force_title or kodi.get_setting("title-fallback") == "true") and video.ep_title:
                norm_title = self._normalize_title(video.ep_title)
                for season in seasons:
                    if season.lower() == "epcount":
                        continue
                    episodes = seasons[season]
                    if isinstance(episodes, dict):
                        episodes = [episodes[key] for key in episodes]
                    for episode in episodes:
                        if episode["name"] is not None and norm_title == self._normalize_title(episode["name"]):
                            url = episode["url"]
                            return self._pathify_url(url.replace("/json", ""))
コード例 #43
0
ファイル: gvcenter_scraper.py プロジェクト: s7eele/salts
 def __check_config(self, now):
     last_config_call = now - int(kodi.get_setting('%s-last-config' % (self.get_name())))
     if self.device_id is None or last_config_call > 8 * 60 * 60:
         self.device_id = ''.join(random.choice(string.digits) for _ in xrange(15))
         kodi.set_setting('%s-device_id' % (self.get_name()), self.device_id)
         url = urlparse.urljoin(self.base_url, CONFIG_URL)
         url += self.__get_extra(now)
         _html = super(GVCenter_Scraper, self)._cached_http_get(url, self.base_url, self.timeout, headers=HEADERS, cache_limit=8)
         kodi.set_setting('%s-last-config' % (self.get_name()), str(int(now)))
コード例 #44
0
    def _blog_get_url(self, video, delim="."):
        url = None
        self.create_db_connection()
        result = self.db_connection.get_related_url(
            video.video_type, video.title, video.year, self.get_name(), video.season, video.episode
        )
        if result:
            url = result[0][0]
            log_utils.log(
                "Got local related url: |%s|%s|%s|%s|%s|"
                % (video.video_type, video.title, video.year, self.get_name(), url)
            )
        else:
            select = int(kodi.get_setting("%s-select" % (self.get_name())))
            if video.video_type == VIDEO_TYPES.EPISODE:
                temp_title = re.sub("[^A-Za-z0-9 ]", "", video.title)
                if not scraper_utils.force_title(video):
                    search_title = "%s S%02dE%02d" % (temp_title, int(video.season), int(video.episode))
                    if isinstance(video.ep_airdate, datetime.date):
                        fallback_search = "%s %s" % (
                            temp_title,
                            video.ep_airdate.strftime("%Y{0}%m{0}%d".format(delim)),
                        )
                    else:
                        fallback_search = ""
                else:
                    if not video.ep_title:
                        return None
                    search_title = "%s %s" % (temp_title, video.ep_title)
                    fallback_search = ""
            else:
                search_title = "%s %s" % (video.title, video.year)
                fallback_search = ""

            results = self.search(video.video_type, search_title, video.year)
            if not results and fallback_search:
                results = self.search(video.video_type, fallback_search, video.year)
            if results:
                # TODO: First result isn't always the most recent...
                best_result = results[0]
                if select != 0:
                    best_qorder = 0
                    for result in results:
                        match = re.search("\[(.*)\]$", result["title"])
                        if match:
                            q_str = match.group(1)
                            quality = scraper_utils.blog_get_quality(video, q_str, "")
                            # print 'result: |%s|%s|%s|%s|' % (result, q_str, quality, Q_ORDER[quality])
                            if Q_ORDER[quality] > best_qorder:
                                # print 'Setting best as: |%s|%s|%s|%s|' % (result, q_str, quality, Q_ORDER[quality])
                                best_result = result
                                best_qorder = Q_ORDER[quality]

                url = best_result["url"]
                self.db_connection.set_related_url(video.video_type, video.title, video.year, self.get_name(), url)
        return url
コード例 #45
0
def get_ua():
    try:
        last_gen = int(kodi.get_setting('last_ua_create'))
    except:
        last_gen = 0
    if not kodi.get_setting('current_ua') or last_gen < (time.time() -
                                                         (7 * 24 * 60 * 60)):
        index = random.randrange(len(RAND_UAS))
        user_agent = RAND_UAS[index].format(win_ver=random.choice(WIN_VERS),
                                            feature=random.choice(FEATURES),
                                            br_ver=random.choice(
                                                BR_VERS[index]))
        log_utils.log('Creating New User Agent: %s' % (user_agent),
                      log_utils.LOGDEBUG)
        kodi.set_setting('current_ua', user_agent)
        kodi.set_setting('last_ua_create', str(int(time.time())))
    else:
        user_agent = kodi.get_setting('current_ua')
    return user_agent
コード例 #46
0
    def _blog_proc_results(self, html, post_pattern, date_format, video_type, title, year):
        results = []
        match = re.search('(.*?)\s*S\d+E\d+\s*', title)
        if match:
            show_title = match.group(1)
        else:
            match = re.search('(.*?)\s*\d{4}\.\d{2}\.\d{2}\s*', title)
            if match:
                show_title = match.group(1)
            else:
                show_title = title
        norm_title = self._normalize_title(show_title)

        filter_days = datetime.timedelta(days=int(kodi.get_setting('%s-filter' % (self.get_name()))))
        today = datetime.date.today()
        for match in re.finditer(post_pattern, html, re.DOTALL):
            post_data = match.groupdict()
            post_title = post_data['post_title']
            if 'quality' in post_data:
                post_title += '- [%s]' % (post_data['quality'])

            if filter_days:
                try: post_date = datetime.datetime.strptime(post_data['date'], date_format).date()
                except TypeError: post_date = datetime.datetime(*(time.strptime(post_data['date'], date_format)[0:6])).date()
                if today - post_date > filter_days:
                    continue

            match_year = ''
            match_title = ''
            post_title = post_title.replace('&#8211;', '-')
            post_title = post_title.replace('&#8217;', "'")
            full_title = post_title
            if video_type == VIDEO_TYPES.MOVIE:
                match = re.search('(.*?)\s*[\[(]?(\d{4})[)\]]?\s*(.*)', post_title)
                if match:
                    match_title, match_year, extra_title = match.groups()
                    full_title = '%s [%s]' % (match_title, extra_title)
            else:
                match = re.search('(.*?)\s*S\d+E\d+\s*(.*)', post_title)
                if match:
                    match_title, extra_title = match.groups()
                    full_title = '%s [%s]' % (match_title, extra_title)
                else:
                    match = re.search('(.*?)\s*\d{4}[ .]?\d{2}[ .]?\d{2}\s*(.*)', post_title)
                    if match:
                        match_title, extra_title = match.groups()
                        full_title = '%s [%s]' % (match_title, extra_title)

            match_norm_title = self._normalize_title(match_title)
            if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year):
                result = {'url': post_data['url'].replace(self.base_url, ''), 'title': full_title, 'year': match_year}
                results.append(result)
        return results
コード例 #47
0
 def __too_old(self, post):
     filter_days = datetime.timedelta(days=int(kodi.get_setting('%s-filter' % (self.get_name()))))
     if filter_days:
         today = datetime.date.today()
         match = re.search('class="postMonth"\s+title="([^"]+)">([^<]+).*?class="postDay"[^>]*>([^<]+)', post)
         if match:
             try:
                 post_year, mon_name, post_day = match.groups()
                 post_month = SHORT_MONS.index(mon_name) + 1
                 post_date = datetime.date(int(post_year), post_month, int(post_day))
                 if today - post_date > filter_days:
                     return True
             except ValueError:
                 return False
     return False
コード例 #48
0
 def __match_episode(self, video, norm_title, title, hash_id):
     sxe_pattern = '(.*?)[. _]S%02dE%02d[. _]' % (int(video.season), int(video.episode))
     airdate_fallback = kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate
     show_title = ''
     match = re.search(sxe_pattern, title, re.I)
     if match:
         show_title = match.group(1)
     elif video.ep_airdate and airdate_fallback:
         airdate_pattern = '(.*?)[. _]%s[. _]%02d[. _]%02d[. _]' % (video.ep_airdate.year, video.ep_airdate.month, video.ep_airdate.day)
         match = re.search(airdate_pattern, title)
         if match:
             show_title = match.group(1)
     
     if show_title and norm_title in scraper_utils.normalize_title(show_title):
         return 'hash=%s' % (hash_id)
コード例 #49
0
 def __too_old(self, post):
     filter_days = datetime.timedelta(days=int(kodi.get_setting('%s-filter' % (self.get_name()))))
     if filter_days:
         today = datetime.date.today()
         match = re.search('<span\s+class="date">(.*?)\s+(\d+)[^<]+(\d{4})<', post)
         if match:
             try:
                 mon_name, post_day, post_year = match.groups()
                 post_month = SHORT_MONS.index(mon_name) + 1
                 post_date = datetime.date(int(post_year), post_month, int(post_day))
                 if today - post_date > filter_days:
                     return True
             except ValueError:
                 return False
     
     return False
コード例 #50
0
 def __too_old(self, post):
     filter_days = datetime.timedelta(days=int(kodi.get_setting('%s-filter' % (self.get_name()))))
     if filter_days:
         today = datetime.date.today()
         match = re.search('<span\s+class="date">(.*?)\s+(\d+)[^<]+(\d{4})<', post)
         if match:
             try:
                 mon_name, post_day, post_year = match.groups()
                 post_month = SHORT_MONS.index(mon_name) + 1
                 post_date = datetime.date(int(post_year), post_month, int(post_day))
                 if today - post_date > filter_days:
                     return True
             except ValueError:
                 return False
     
     return False
コード例 #51
0
    def _blog_get_url(self, video, delim='.'):
        url = None
        self.create_db_connection()
        result = self.db_connection.get_related_url(video.video_type, video.title, video.year, self.get_name(), video.season, video.episode)
        if result:
            url = result[0][0]
            log_utils.log('Got local related url: |%s|%s|%s|%s|%s|' % (video.video_type, video.title, video.year, self.get_name(), url), log_utils.LOGDEBUG)
        else:
            select = int(kodi.get_setting('%s-select' % (self.get_name())))
            if video.video_type == VIDEO_TYPES.EPISODE:
                temp_title = re.sub('[^A-Za-z0-9 ]', '', video.title)
                if not scraper_utils.force_title(video):
                    search_title = '%s S%02dE%02d' % (temp_title, int(video.season), int(video.episode))
                    if isinstance(video.ep_airdate, datetime.date):
                        fallback_search = '%s %s' % (temp_title, video.ep_airdate.strftime('%Y{0}%m{0}%d'.format(delim)))
                    else:
                        fallback_search = ''
                else:
                    if not video.ep_title: return None
                    search_title = '%s %s' % (temp_title, video.ep_title)
                    fallback_search = ''
            else:
                search_title = '%s %s' % (video.title, video.year)
                fallback_search = ''

            results = self.search(video.video_type, search_title, video.year)
            if not results and fallback_search:
                results = self.search(video.video_type, fallback_search, video.year)
            if results:
                # TODO: First result isn't always the most recent...
                best_result = results[0]
                if select != 0:
                    best_qorder = 0
                    for result in results:
                        match = re.search('\[(.*)\]$', result['title'])
                        if match:
                            q_str = match.group(1)
                            quality = scraper_utils.blog_get_quality(video, q_str, '')
                            log_utils.log('result: |%s|%s|%s|%s|' % (result, q_str, quality, Q_ORDER[quality]), log_utils.LOGDEBUG)
                            if Q_ORDER[quality] > best_qorder:
                                log_utils.log('Setting best as: |%s|%s|%s|%s|' % (result, q_str, quality, Q_ORDER[quality]), log_utils.LOGDEBUG)
                                best_result = result
                                best_qorder = Q_ORDER[quality]

                url = best_result['url']
                self.db_connection.set_related_url(video.video_type, video.title, video.year, self.get_name(), url, video.season, video.episode)
        return url
コード例 #52
0
    def search(self, video_type, title, year, season=''):
        url = urlparse.urljoin(self.base_url, 'http://ororo.tv/en')
        if video_type == VIDEO_TYPES.MOVIE:
            url += '/movies'
        html = self._http_get(url, cache_limit=.25)
        results = []
        norm_title = scraper_utils.normalize_title(title)
        include_paid = kodi.get_setting('%s-include_premium' % (self.get_name())) == 'true'
        for match in re.finditer('''<span class='value'>(\d{4})(.*?)href="([^"]+)[^>]+>([^<]+)''', html, re.DOTALL):
            match_year, middle, url, match_title = match.groups()
            if not include_paid and video_type == VIDEO_TYPES.MOVIE and 'paid accounts' in middle:
                continue

            if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                results.append(result)

        return results
コード例 #53
0
ファイル: rlsbb_scraper.py プロジェクト: henry73/salts
 def __too_old(self, post):
     filter_days = datetime.timedelta(
         days=int(kodi.get_setting('%s-filter' % (self.get_name()))))
     if filter_days:
         today = datetime.date.today()
         match = re.search(
             'class="postMonth"\s+title="([^"]+)">([^<]+).*?class="postDay"[^>]*>([^<]+)',
             post)
         if match:
             try:
                 post_year, mon_name, post_day = match.groups()
                 post_month = SHORT_MONS.index(mon_name) + 1
                 post_date = datetime.date(int(post_year), post_month,
                                           int(post_day))
                 if today - post_date > filter_days:
                     return True
             except ValueError:
                 return False
     return False
コード例 #54
0
 def __too_old(self, post):
     filter_days = datetime.timedelta(days=int(kodi.get_setting('%s-filter' % (self.get_name()))))
     if filter_days:
         today = datetime.date.today()
         match = re.search('<a[^>]+title="posting time[^"]*">(.*?)\s+(\d+)\s*(\d{2,4})<', post)
         if match:
             try:
                 mon_name, post_day, post_year = match.groups()
                 post_year = int(post_year)
                 if post_year < 2000:
                     post_year += 2000
                 post_month = SHORT_MONS.index(mon_name) + 1
                 post_date = datetime.date(post_year, post_month, int(post_day))
                 if today - post_date > filter_days:
                     return True
             except ValueError:
                 return False
     
     return False
コード例 #55
0
    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = {}
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)
            sources.update(self.__get_post_links(html, video))
            
            if kodi.get_setting('%s-include_comments' % (self.get_name())) == 'true':
                for comment in dom_parser.parse_dom(html, 'div', {'id': 'commentbody-\d+'}):
                    sources.update(self.__get_comment_links(comment, video))

        for source in sources:
            if re.search('\.part\.?\d+', source) or '.rar' in source or 'sample' in source or source.endswith('.nfo'): continue
            host = urlparse.urlparse(source).hostname
            hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': sources[source], 'direct': False}
            hosters.append(hoster)
        return hosters
コード例 #56
0
 def __init__(self, timeout=scraper.DEFAULT_TIMEOUT):
     self.timeout = timeout
     self.base_url = kodi.get_setting('%s-base_url' % (self.get_name()))
コード例 #57
0
    def _cached_http_get(self,
                         url,
                         base_url,
                         timeout,
                         cookies=None,
                         data=None,
                         multipart_data=None,
                         headers=None,
                         allow_redirect=True,
                         method=None,
                         cache_limit=8):
        if cookies is None: cookies = {}
        if timeout == 0: timeout = None
        if headers is None: headers = {}
        referer = headers['Referer'] if 'Referer' in headers else url
        log_utils.log(
            'Getting Url: %s cookie=|%s| data=|%s| extra headers=|%s|' %
            (url, cookies, data, headers))
        if data is not None:
            if isinstance(data, basestring):
                data = data
            else:
                data = urllib.urlencode(data, True)

        if multipart_data is not None:
            headers['Content-Type'] = 'multipart/form-data; boundary=X-X-X'
            data = multipart_data

        self.create_db_connection()
        _created, _res_header, html = self.db_connection.get_cached_url(
            url, data, cache_limit)
        if html:
            log_utils.log('Returning cached result for: %s' % (url),
                          log_utils.LOGDEBUG)
            return html

        try:
            self.cj = self._set_cookies(base_url, cookies)
            request = urllib2.Request(url, data=data)
            request.add_header('User-Agent', scraper_utils.get_ua())
            request.add_header('Accept', '*/*')
            request.add_unredirected_header('Host', request.get_host())
            request.add_unredirected_header('Referer', referer)
            for key in headers:
                request.add_header(key, headers[key])
            self.cj.add_cookie_header(request)
            if not allow_redirect:
                opener = urllib2.build_opener(NoRedirection)
                urllib2.install_opener(opener)
            else:
                opener = urllib2.build_opener(urllib2.HTTPRedirectHandler)
                urllib2.install_opener(opener)
                opener2 = urllib2.build_opener(
                    urllib2.HTTPCookieProcessor(self.cj))
                urllib2.install_opener(opener2)

            if method is not None: request.get_method = lambda: method.upper()
            response = urllib2.urlopen(request, timeout=timeout)
            self.cj.extract_cookies(response, request)
            if kodi.get_setting('cookie_debug') == 'true':
                log_utils.log(
                    'Response Cookies: %s - %s' %
                    (url, scraper_utils.cookies_as_str(self.cj)),
                    log_utils.LOGDEBUG)
            self.cj._cookies = scraper_utils.fix_bad_cookies(self.cj._cookies)
            self.cj.save(ignore_discard=True)
            if not allow_redirect and (
                    response.getcode() in [301, 302, 303, 307]
                    or response.info().getheader('Refresh')):
                if response.info().getheader('Refresh') is not None:
                    refresh = response.info().getheader('Refresh')
                    return refresh.split(';')[-1].split('url=')[-1]
                else:
                    return response.info().getheader('Location')

            content_length = response.info().getheader('Content-Length', 0)
            if int(content_length) > MAX_RESPONSE:
                log_utils.log(
                    'Response exceeded allowed size. %s => %s / %s' %
                    (url, content_length, MAX_RESPONSE), log_utils.LOGWARNING)

            if response.info().get('Content-Encoding') == 'gzip':
                buf = StringIO(response.read(MAX_RESPONSE))
                f = gzip.GzipFile(fileobj=buf)
                html = f.read()
            else:
                html = response.read(MAX_RESPONSE)
        except urllib2.HTTPError as e:
            if e.code == 503 and 'cf-browser-verification' in e.read():
                html = cloudflare.solve(url, self.cj, scraper_utils.get_ua())
                if not html:
                    return ''
            else:
                log_utils.log(
                    'Error (%s) during scraper http get: %s' % (str(e), url),
                    log_utils.LOGWARNING)
                return ''
        except Exception as e:
            log_utils.log(
                'Error (%s) during scraper http get: %s' % (str(e), url),
                log_utils.LOGWARNING)
            return ''

        self.db_connection.cache_url(url, html, data)
        return html
コード例 #58
0
    def _blog_proc_results(self, html, post_pattern, date_format, video_type,
                           title, year):
        results = []
        search_date = ''
        search_sxe = ''
        match = re.search('(.*?)\s*(S\d+E\d+)\s*', title)
        if match:
            show_title, search_sxe = match.groups()
        else:
            match = re.search('(.*?)\s*(\d{4})[ .]?(\d{2})[ .]?(\d{2})\s*',
                              title)
            if match:
                show_title, search_year, search_month, search_day = match.groups(
                )
                search_date = '%s%s%s' % (search_year, search_month,
                                          search_day)
            else:
                show_title = title
        norm_title = scraper_utils.normalize_title(show_title)

        filter_days = datetime.timedelta(
            days=int(kodi.get_setting('%s-filter' % (self.get_name()))))
        today = datetime.date.today()
        for match in re.finditer(post_pattern, html, re.DOTALL):
            post_data = match.groupdict()
            post_title = post_data['post_title']
            if 'quality' in post_data:
                post_title += '- [%s]' % (post_data['quality'])

            if filter_days and date_format and 'date' in post_data:
                try:
                    post_date = datetime.datetime.strptime(
                        post_data['date'], date_format).date()
                except TypeError:
                    post_date = datetime.datetime(
                        *(time.strptime(post_data['date'], date_format)[0:6]
                          )).date()
                if today - post_date > filter_days:
                    continue

            match_year = ''
            match_title = ''
            match_date = ''
            match_sxe = ''
            post_title = post_title.replace('&#8211;', '-')
            post_title = post_title.replace('&#8217;', "'")
            full_title = post_title
            if video_type == VIDEO_TYPES.MOVIE:
                match = re.search('(.*?)\s*[\[(]?(\d{4})[)\]]?\s*(.*)',
                                  post_title)
                if match:
                    match_title, match_year, extra_title = match.groups()
                    full_title = '%s [%s]' % (match_title, extra_title)
            else:
                match = re.search('(.*?)\s*(S\d+E\d+)\s*(.*)', post_title)
                if match:
                    match_title, match_sxe, extra_title = match.groups()
                    full_title = '%s [%s]' % (match_title, extra_title)
                else:
                    match = re.search(
                        '(.*?)\s*(\d{4})[ .]?(\d{2})[ .]?(\d{2})\s*(.*)',
                        post_title)
                    if match:
                        match_title, match_year2, match_month, match_day, extra_title = match.groups(
                        )
                        match_date = '%s%s%s' % (match_year2, match_month,
                                                 match_day)
                        full_title = '%s [%s]' % (match_title, extra_title)

            match_norm_title = scraper_utils.normalize_title(match_title)
            log_utils.log(
                'Blog Results: |%s|%s| - |%s|%s| - |%s|%s| - |%s|%s|' %
                (match_norm_title, norm_title, year, match_year, search_date,
                 match_date, search_sxe, match_sxe), log_utils.LOGDEBUG)
            if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year) \
                    and (not search_date or (search_date == match_date)) and (not search_sxe or (search_sxe == match_sxe)):
                result = {
                    'url': scraper_utils.pathify_url(post_data['url']),
                    'title': full_title,
                    'year': match_year
                }
                results.append(result)
        return results