def number_gen(first_part, start_num, end_num, semester, dept, subjects): driver = webdriver.Firefox() # Firefox used for testing. Change it to PhantomJS driver.implicitly_wait(30) base_url = "http://result.pondiuni.edu.in/candidate.asp" url = base_url driver.get(base_url) # os.mkdir(str(first_part)) os.chdir("results") os.chdir(str(first_part)) cache = FileSystemCache('.cachedir', threshold=100000) for number in range(start_num, end_num + 1): current_num = "%04d" % number numb = first_part + str(current_num) driver.find_element_by_id("txtregno").clear() driver.find_element_by_id("txtregno").send_keys(numb) Select(driver.find_element_by_id("cmbdegree")).select_by_visible_text(dept) Select(driver.find_element_by_id("cmbexamno")).select_by_visible_text(semester) driver.find_element_by_id("button1").click() # copying the content page_source = cache.get(url) page_source = driver.page_source cache.set(url, page_source, timeout=60 * 60 * 24 * 7) # week in seconds root = html.document_fromstring(page_source) Cleaner(kill_tags=['noscript'], style=True)(root) # lxml >= 2.3.1 # pasting to file filename = str(numb) + ".txt" fp = open(filename, 'w') fp.write((root.text_content()).encode('utf-8')) fp.close() driver.back() driver.close() return analyze(subjects)
def get_configuration(): sheet_scope = ['https://www.googleapis.com/auth/spreadsheets.readonly'] credentials = ServiceAccountCredentials.from_json_keyfile_name( SERVICE_KEY, sheet_scope) http_auth = credentials.authorize(Http()) sheets_service = build('sheets', 'v4', http=http_auth) fileId = '1I8un3pP8aE3b2ixeQTN9mVPwMzTsE_TEkaFzAHEWZ-A' data_range = 'configuration' data_result = sheets_service.spreadsheets().values().get( spreadsheetId=fileId, range=data_range).execute() return_data = data_result.get('values', []) if not return_data: return -1 configuration = {} for row in return_data: configuration[row[0]] = row[1] cc = FileSystemCache('/var/www/html/gs-cookies/cache/config', default_timeout=300) if cc.get('first_monday') is None: for key, value in configuration.iteritems(): cc.set(key, value) return configuration
class FileSystemSessionInterface(SessionInterface): """Uses the :class:`werkzeug.contrib.cache.FileSystemCache` as a session backend. :param cache_dir: the directory where session files are stored. :param threshold: the maximum number of items the session stores before it starts deleting some. :param mode: the file mode wanted for the session files, default 0600 :param key_prefix: A prefix that is added to FileSystemCache store keys. """ session_class = FileSystemSession def __init__(self, cache_dir, threshold, mode, key_prefix): from werkzeug.contrib.cache import FileSystemCache self.cache = FileSystemCache(cache_dir, threshold=threshold, mode=mode) self.key_prefix = key_prefix def _generate_sid(self): return str(uuid4()) def open_session(self, app, request): sid = request.cookies.get(app.session_cookie_name) if not sid: sid = self._generate_sid() return self.session_class(sid=sid) data = self.cache.get(self.key_prefix + sid) if data is not None: return self.session_class(data, sid=sid) return self.session_class(sid=sid) def save_session(self, app, session, response): domain = self.get_cookie_domain(app) path = self.get_cookie_path(app) if not session: if session.modified: self.cache.delete(self.key_prefix + session.sid) response.delete_cookie(app.session_cookie_name, domain=domain, path=path) return # Modification case. There are upsides and downsides to # emitting a set-cookie header each request. The behavior # is controlled by the :meth:`should_set_cookie` method # which performs a quick check to figure out if the cookie # should be set or not. This is controlled by the # SESSION_REFRESH_EACH_REQUEST config flag as well as # the permanent flag on the session itself. #if not self.should_set_cookie(app, session): # return httponly = self.get_cookie_httponly(app) secure = self.get_cookie_secure(app) expires = self.get_expiration_time(app, session) data = dict(session) self.cache.set(self.key_prefix + session.sid, data, int(app.permanent_session_lifetime.total_seconds())) response.set_cookie(app.session_cookie_name, session.sid, expires=expires, httponly=httponly, domain=domain, path=path, secure=secure)
def get_last_digests(): cache = FileSystemCache(Config.CACHE_DIR, default_timeout=0) cache_key = 'last_digests' api = Api(host=Config.HOST) contract = { 'args': '["5"]', 'function': 'getLast', } try: response = api.call( from_addr=Config.ADDRESS, to_addr=Config.ADDRESS, value='0', nonce=0, gasprice=Config.GASPRICE, gaslimit=Config.GASLIMIT, contract=contract) except RequestException: result = cache.get(cache_key) else: if response.status_code == 200: result = response.json().get('result', {}).get('result', '{}') result = json.loads(result) if isinstance(result, str): result = json.loads(result) cache.set(cache_key, result) return result
def cache_get_or_set(cache_name, function, timeout=3600): cache = FileSystemCache(cache_dir=abspath('tmp')) cache_data = cache.get(cache_name) if cache_data is None: cache_data = function() cache.set(cache_name, cache_data, timeout=timeout) return cache_data
class FileSystemSessionInterface(SessionInterface): """Uses the :class:`werkzeug.contrib.cache.FileSystemCache` as a session backend. :param cache_dir: the directory where session files are stored. :param threshold: the maximum number of items the session stores before it starts deleting some. :param mode: the file mode wanted for the session files, default 0600 :param key_prefix: A prefix that is added to FileSystemCache store keys. """ session_class = FileSystemSession def __init__(self, cache_dir, threshold, mode, key_prefix): from werkzeug.contrib.cache import FileSystemCache self.cache = FileSystemCache(cache_dir, threshold=threshold, mode=mode) self.key_prefix = key_prefix def _generate_sid(self): return str(uuid4()) def open_session(self, app, request): sid = request.cookies.get(app.session_cookie_name) if not sid: sid = self._generate_sid() return self.session_class(sid=sid) data = self.cache.get(self.key_prefix + sid) if data is not None: return self.session_class(data, sid=sid) return self.session_class(sid=sid) def save_session(self, app, session, response): domain = self.get_cookie_domain(app) path = self.get_cookie_path(app) if not session: if session.modified: self.cache.delete(self.key_prefix + session.sid) response.delete_cookie(app.session_cookie_name, domain=domain, path=path) return # Modification case. There are upsides and downsides to # emitting a set-cookie header each request. The behavior # is controlled by the :meth:`should_set_cookie` method # which performs a quick check to figure out if the cookie # should be set or not. This is controlled by the # SESSION_REFRESH_EACH_REQUEST config flag as well as # the permanent flag on the session itself. # if not self.should_set_cookie(app, session): # return httponly = self.get_cookie_httponly(app) secure = self.get_cookie_secure(app) expires = self.get_expiration_time(app, session) data = dict(session) self.cache.set(self.key_prefix + session.sid, data, int(app.permanent_session_lifetime.total_seconds())) response.set_cookie(app.session_cookie_name, session.sid, expires=expires, httponly=httponly, domain=domain, path=path, secure=secure)
class Message: def __init__(self): #获取当前目录 cur_path = os.path.split(os.path.realpath(__file__))[0] #获取父级目录 parent_path = os.path.dirname(cur_path) #配置文件路径 ini_file= "%s/conf.ini" % parent_path #缓存文件路径 cache_path = "%s/cache/" % parent_path cf = ConfigParser.ConfigParser() cf.read(ini_file) self.sendkey = cf.get('pushbear', 'sendkey') self.text = cf.get('pushbear', 'sendname') self.push_url = 'https://pushbear.ftqq.com/sub' #缓存 self.cache = FileSystemCache(cache_path) #push次数key self.cache_push_times_key = 'today_push_times' #计算距离当天24点还多少秒 def _shengyu(self): h = int(time.strftime('%H',time.localtime(time.time()))) m = int(time.strftime('%M',time.localtime(time.time()))) s = int(time.strftime('%S',time.localtime(time.time()))) shengyu = int(24*60 - (h*60 + m + 1)) * 60 return shengyu + (60 - s) #发送消息 def _send(self, desp): push_data = { 'sendkey' : self.sendkey, 'text' : self.text, 'desp' : desp } r = requests.post(self.push_url, data=push_data) #print r.text #获取当前已经提醒了几次, 如果大于3次了, 今天不再提醒 def push(self, desp, max_remind_times=3): push_times = self.cache.get(self.cache_push_times_key) if push_times is None: #今天还没有发送过消息时 push_times = 1 else: #今天已经发送过时 if push_times < max_remind_times: push_times = push_times + 1 else: print "[今日已提醒超过%s次,不再提醒]" % (max_remind_times) return False self.cache.set(self.cache_push_times_key, push_times, timeout=self._shengyu()) desp = ("%s [今日已提醒%s次]") % (desp, push_times) self._send(desp)
def test_filesystemcache_prune(): """ test if FileSystemCache._prune works and keeps the cache entry count below the given threshold. """ THRESHOLD = 13 tmp_dir = tempfile.mkdtemp() cache = FileSystemCache(cache_dir=tmp_dir, threshold=THRESHOLD) for i in range(2 * THRESHOLD): cache.set(str(i), i) cache_files = os.listdir(tmp_dir) shutil.rmtree(tmp_dir) assert len(cache_files) <= THRESHOLD
def test_filesystemcache_clear(): """ test if FileSystemCache.clear works """ tmp_dir = tempfile.mkdtemp() cache = FileSystemCache(cache_dir=tmp_dir) cache.set('foo', 'bar') cache_files = os.listdir(tmp_dir) assert len(cache_files) == 1 cache.clear() cache_files = os.listdir(tmp_dir) assert len(cache_files) == 0 shutil.rmtree(tmp_dir)
def test_filesystemcache_clear(): """ test if FileSystemCache.clear works """ tmp_dir = tempfile.mkdtemp() cache = FileSystemCache(cache_dir=tmp_dir) cache.set("foo", "bar") cache_files = os.listdir(tmp_dir) assert len(cache_files) == 1 cache.clear() cache_files = os.listdir(tmp_dir) assert len(cache_files) == 0 shutil.rmtree(tmp_dir)
def test_filesystemcache_set_get(): """ test if FileSystemCache.set/get works """ tmp_dir = tempfile.mkdtemp() try: cache = FileSystemCache(cache_dir=tmp_dir) for i in range(3): cache.set(str(i), i * i) for i in range(3): result = cache.get(str(i)) assert result == i * i finally: shutil.rmtree(tmp_dir)
def get_content(url, params, cache): c = FileSystemCache('cache', threshold=cache['threshold'], default_timeout=cache['default_timeout']) cache_id = url + str(params) cache_content = c.get(cache_id) if cache_content is not None: return cache_content headers = {'user-agent': UserAgent().chrome} resp = requests.get(url, params=params, headers=headers) resp.raise_for_status() content = resp.text c.set(cache_id, content) return content
class BaseProvider(object): __metaclass__ = ABCMeta def __init__(self, cache_dir=None, default_timeout=60 * 60 * 24, api_key=None): # store it in cache for 1 day. using file system cache because # memcached is too mainstream. :) self.cache = FileSystemCache(cache_dir=cache_dir or '/tmp/__arcoiro__', default_timeout=default_timeout) self._api_key = api_key @abstractproperty def name(self): pass @abstractproperty def url(self): pass @abstractmethod def get_urls_from_tag(self, tag): pass @property def display_name(self): return self.name @property def api_key(self): if self._api_key is not None: return self._api_key config_key = '%s_API_KEY' % self.name.upper() key = current_app.config.get(config_key) if key is None: raise RuntimeError('%s not defined!' % config_key) return key def get_cached_urls_from_tag(self, tag): cache_key = '%s:%s' % (self.name, tag) urls = self.cache.get(cache_key) if urls is not None: return urls urls = self.get_urls_from_tag(tag) if urls is None: return None self.cache.set(cache_key, urls) return urls
class Cache: def __init__(self): self._cache = FileSystemCache(Cache.cache_dir(), default_timeout=10 * 60) @staticmethod def cache_dir(): return os.path.join(os.getcwd(), 'forum', 'src', 'storage', 'cache') def set(self, key, value, timeout=None): try: return self._cache.set(key, json.dumps(value), timeout=timeout) except Exception as exception: print(exception) def get(self, key): try: data = self._cache.get(key) if data: return json.loads(data) except Exception as exception: print(exception) def has(self, key): return self._cache.has(key) is not None
def weather_data(latitude, longitude): cache = FileSystemCache('./cache') cache_key = '{}.{}'.format(latitude, longitude) data = cache.get(cache_key) if data is None: apikey = '3a4c7bb6f812d00b3bb9d19ceace7222' fio = ForecastIO.ForecastIO(apikey, units=ForecastIO.ForecastIO.UNITS_SI, lang=ForecastIO.ForecastIO.LANG_ENGLISH, latitude=latitude, longitude=longitude) currently = fio.currently data = currently cache.set(cache_key, data, timeout=300) # 5 minutes return data
class Message: def __init__(self): self.sendkey = '1562-20254d9020f4f5883c56d0836c8bf5cb' self.text = 'Lazy' self.push_url = 'https://pushbear.ftqq.com/sub' # 缓存 self.cache = FileSystemCache('/tmp/jjz_cache') # push次数 self.cache_push_times_key = 'today_push_times' # 计算距离当天24点还多少秒 def _shengyu(self): h = int(time.strftime('%H', time.localtime(time.time()))) m = int(time.strftime('%M', time.localtime(time.time()))) s = int(time.strftime('%S', time.localtime(time.time()))) shengyu = int(24 * 60 - (h * 60 + m + 1)) * 60 return shengyu + (60 - s) # 发送消息 def _send(self, desp): push_data = { 'sendkey': self.sendkey, 'text': self.text, 'desp': desp } r = requests.post(self.push_url, data=push_data, verify=False) # print r.text # 获取当前已经提醒了几次, 如果大于3次了, 今天不再提醒 def push(self, desp, max_remind_times=3): push_times = self.cache.get(self.cache_push_times_key) if push_times is None: # 今天还没有发送过消息时 push_times = 1 else: # 今天已经发送过时 if push_times < max_remind_times: push_times = push_times + 1 else: print "[今日已提醒超过%s次,不再提醒]" % (max_remind_times) return False self.cache.set(self.cache_push_times_key, push_times, timeout=self._shengyu()) desp = ("%s [今日已提醒%s次]") % (desp, push_times) self._send(desp)
def parse(url): cache = FileSystemCache('.cachedir', threshold=100000) # get page page_source = cache.get(url) if page_source is None: # use firefox to get page with javascript generated content with closing(Firefox()) as browser: browser.get(url) page_source = browser.page_source cache.set(url, page_source, timeout=60*60*24*7) # week in seconds # extract text root = html.document_fromstring(page_source) # remove flash, images, <script>,<style>, etc Cleaner(kill_tags=['noscript'], style=True)(root) # lxml >= 2.3.1 return root.text_content() # extract text
def parse(url): cache = FileSystemCache('.cachedir', threshold=100000) # get page page_source = cache.get(url) if page_source is None: # use firefox to get page with javascript generated content with closing(Firefox()) as browser: browser.get(url) page_source = browser.page_source cache.set(url, page_source, timeout=60 * 60 * 24 * 7) # week in seconds # extract text root = html.document_fromstring(page_source) # remove flash, images, <script>,<style>, etc Cleaner(kill_tags=['noscript'], style=True)(root) # lxml >= 2.3.1 return root.text_content() # extract text
def getSpaceInfo(spaceId): global spaceCache if spaceCache is None: mkdir_p(DURACLOUD_SPACE_CACHE_DIR) spaceCache = FileSystemCache(DURACLOUD_SPACE_CACHE_DIR, threshold=50, default_timeout=(24*3600), mode=384) # check spaceCache, otherwise fetch info from DuraCloud result = spaceCache.get(spaceId) if result is None: url = DURACLOUD_URL+ "/duradmin/download/contentItem" auth = HTTPBasicAuth(DURACLOUD_USERNAME, DURACLOUD_PASSWORD) payload = {'spaceId': spaceId, 'contentId': 'info.json'} try: response = requests.get(url, params=payload, auth=auth) result = response.json() spaceCache.set(spaceId, result) except RequestException as e: print e raise return result
def number_gen(first_part, start_num, end_num, semester, dept, subjects): driver = webdriver.Firefox( ) # Firefox used for testing. Change it to PhantomJS driver.implicitly_wait(30) base_url = "http://result.pondiuni.edu.in/candidate.asp" url = base_url driver.get(base_url) # os.mkdir(str(first_part)) os.chdir("results") os.chdir(str(first_part)) cache = FileSystemCache('.cachedir', threshold=100000) for number in range(start_num, end_num + 1): current_num = "%04d" % number numb = first_part + str(current_num) driver.find_element_by_id("txtregno").clear() driver.find_element_by_id("txtregno").send_keys(numb) Select(driver.find_element_by_id("cmbdegree")).select_by_visible_text( dept) Select(driver.find_element_by_id("cmbexamno")).select_by_visible_text( semester) driver.find_element_by_id("button1").click() # copying the content page_source = cache.get(url) page_source = driver.page_source cache.set(url, page_source, timeout=60 * 60 * 24 * 7) # week in seconds root = html.document_fromstring(page_source) Cleaner(kill_tags=['noscript'], style=True)(root) # lxml >= 2.3.1 # pasting to file filename = str(numb) + ".txt" fp = open(filename, 'w') fp.write((root.text_content()).encode('utf-8')) fp.close() driver.back() driver.close() return analyze(subjects)
def getSpaceInfo(spaceId): global spaceCache if spaceCache is None: mkdir_p(DURACLOUD_SPACE_CACHE_DIR) spaceCache = FileSystemCache(DURACLOUD_SPACE_CACHE_DIR, threshold=50, default_timeout=(24 * 3600), mode=384) # check spaceCache, otherwise fetch info from DuraCloud result = spaceCache.get(spaceId) if result is None: url = DURACLOUD_URL + "/duradmin/download/contentItem" auth = HTTPBasicAuth(DURACLOUD_USERNAME, DURACLOUD_PASSWORD) payload = {'spaceId': spaceId, 'contentId': 'info.json'} try: response = requests.get(url, params=payload, auth=auth) result = response.json() spaceCache.set(spaceId, result) except RequestException as e: print e raise return result
async def get_content(url, payload, cache): """Fetch content from url or get it from cache.""" cache = FileSystemCache( 'cache', threshold=cache['threshold'], default_timeout=cache['default_timeout'], ) cache_id = url + str(payload) cache_content = cache.get(cache_id) if cache_content is not None: return cache_content headers = {'user-agent': UserAgent().chrome} resp = await asks.get( url, params=payload, headers=headers, timeout=TIMEOUT, retries=LIMITS, ) resp.raise_for_status() content = resp.text cache.set(cache_id, content) return content
class WechatCache(WechatSogouBase): """基于文件的缓存 """ def __init__(self, cache_dir='cache', default_timeout=300): """初始化 cache_dir是缓存目录 """ self.cache = FileSystemCache(cache_dir, default_timeout=default_timeout) def clear(self): """清空缓存 """ return self.cache.clear() def get(self, key): """获取缓存 获取键值key的缓存值 如果没有对应缓存,返回None """ return self.cache.get(key) def add(self, key, value, timeout=None): """增加缓存 如果键值key对应的缓存不存在,那么增加值value到键值key,过期时间timeout,默认300秒 否则返回False(即不能覆盖设置缓存) """ return self.cache.add(key, value, timeout) def set(self, key, value, timeout=None): """设置缓存 设置键值key的缓存为value,过期时间300秒 """ return self.cache.set(key, value, timeout) def delete(self, key): """删除缓存 删除键值key存储的缓存 """ return self.cache.delete(key)
class WechatCache(object): """基于文件的缓存 """ def __init__(self, cache_dir='cache', default_timeout=300): """初始化 cache_dir是缓存目录 """ self.cache = FileSystemCache(cache_dir, default_timeout=default_timeout) def clear(self): """清空缓存 """ return self.cache.clear() def get(self, key): """获取缓存 获取键值key的缓存值 如果没有对应缓存,返回None """ return self.cache.get(key) def add(self, key, value, timeout=None): """增加缓存 如果键值key对应的缓存不存在,那么增加值value到键值key,过期时间timeout,默认300秒 否则返回False(即不能覆盖设置缓存) """ return self.cache.add(key, value, timeout) def set(self, key, value, timeout=None): """设置缓存 设置键值key的缓存为value,过期时间300秒 """ return self.cache.set(key, value, timeout) def delete(self, key): """删除缓存 删除键值key存储的缓存 """ return self.cache.delete(key)
class WechatCache: def __init__(self, cache_dir='cache', default_timeout=300): self.cache = FileSystemCache(cache_dir=cache_dir, default_timeout=default_timeout) def clear(self): return self.cache.clear() def get(self, key): return self.cache.get(key) def add(self, key, value, timeout=None): return self.cache.add(key, value, timeout) def set(self, key, value, timeout=None): return self.cache.set(key, value, timeout) def delete(self, key): return self.cache.delete(key)
class FileSystemSessionInterface(SessionInterface): """Uses the :class:`werkzeug.contrib.cache.FileSystemCache` as a session backend. .. versionadded:: 0.2 The `use_signer` parameter was added. :param cache_dir: the directory where session files are stored. :param threshold: the maximum number of items the session stores before it starts deleting some. :param mode: the file mode wanted for the session files, default 0600 :param key_prefix: A prefix that is added to FileSystemCache store keys. :param use_signer: Whether to sign the session id cookie or not. :param permanent: Whether to use permanent session or not. """ session_class = FileSystemSession def __init__(self, cache_dir, threshold, mode, key_prefix, use_signer=False, permanent=True): from werkzeug.contrib.cache import FileSystemCache self.cache = FileSystemCache(cache_dir, threshold=threshold, mode=mode) self.key_prefix = key_prefix self.use_signer = use_signer self.permanent = permanent def open_session(self, app, request): sid = request.cookies.get(app.session_cookie_name) if not sid: sid = self._generate_sid() return self.session_class(sid=sid, permanent=self.permanent) if self.use_signer: signer = self._get_signer(app) if signer is None: return None try: sid_as_bytes = signer.unsign(sid) sid = sid_as_bytes.decode() except BadSignature: sid = self._generate_sid() return self.session_class(sid=sid, permanent=self.permanent) data = self.cache.get(self.key_prefix + sid) if data is not None: return self.session_class(data, sid=sid) return self.session_class(sid=sid, permanent=self.permanent) def save_session(self, app, session, response): domain = self.get_cookie_domain(app) path = self.get_cookie_path(app) if not session: if session.modified: self.cache.delete(self.key_prefix + session.sid) response.delete_cookie(app.session_cookie_name, domain=domain, path=path) return httponly = self.get_cookie_httponly(app) secure = self.get_cookie_secure(app) expires = self.get_expiration_time(app, session) samesite = self.get_cookie_samesite(app) data = dict(session) self.cache.set(self.key_prefix + session.sid, data, total_seconds(app.permanent_session_lifetime)) if self.use_signer: session_id = self._get_signer(app).sign(want_bytes(session.sid)) else: session_id = session.sid response.set_cookie(app.session_cookie_name, session_id, expires=expires, httponly=httponly, domain=domain, path=path, secure=secure, samesite=samesite)
class FileSystemSessionInterface(SessionInterface): """Uses the :class:`werkzeug.contrib.cache.FileSystemCache` as a session backend. .. versionadded:: 0.2 The `use_signer` parameter was added. :param cache_dir: the directory where session files are stored. :param threshold: the maximum number of items the session stores before it starts deleting some. :param mode: the file mode wanted for the session files, default 0600 :param key_prefix: A prefix that is added to FileSystemCache store keys. :param use_signer: Whether to sign the session id cookie or not. :param permanent: Whether to use permanent session or not. """ session_class = FileSystemSession def __init__(self, cache_dir, threshold, mode, key_prefix, use_signer=False, permanent=True): from werkzeug.contrib.cache import FileSystemCache self.cache = FileSystemCache(cache_dir, threshold=threshold, mode=mode) self.key_prefix = key_prefix self.use_signer = use_signer self.permanent = permanent def open_session(self, app, request): sid = request.cookies.get(app.session_cookie_name) if not sid: sid = self._generate_sid() return self.session_class(sid=sid, permanent=self.permanent) if self.use_signer: signer = self._get_signer(app) if signer is None: return None try: sid = signer.unsign(sid) except BadSignature: sid = self._generate_sid() return self.session_class(sid=sid, permanent=self.permanent) data = self.cache.get(self.key_prefix + sid) if data is not None: return self.session_class(data, sid=sid) return self.session_class(sid=sid, permanent=self.permanent) def save_session(self, app, session, response): domain = self.get_cookie_domain(app) path = self.get_cookie_path(app) if not session: if session.modified: self.cache.delete(self.key_prefix + session.sid) response.delete_cookie(app.session_cookie_name, domain=domain, path=path) return httponly = self.get_cookie_httponly(app) secure = self.get_cookie_secure(app) expires = self.get_expiration_time(app, session) data = dict(session) self.cache.set(self.key_prefix + session.sid, data, total_seconds(app.permanent_session_lifetime)) if self.use_signer: session_id = self._get_signer(app).sign(session.sid) else: session_id = session.sid response.set_cookie(app.session_cookie_name, session_id, expires=expires, httponly=httponly, domain=domain, path=path, secure=secure)
from selenium.webdriver import Firefox # pip install selenium from werkzeug.contrib.cache import FileSystemCache # pip install werkzeug cache = FileSystemCache('.cachedir', threshold=100000) url = sys.argv[1] if len(sys.argv) > 1 else "http://www.schibsted.cl/testqa/" # get page page_source = cache.get(url) if page_source is None: # use firefox to get page with javascript generated content with closing(Firefox()) as browser: browser.get(url) page_source = browser.page_source cache.set(url, page_source, timeout=60*60*24*7) # week in seconds # extract text root = html.document_fromstring(page_source) # remove flash, images, <script>,<style>, etc Cleaner(kill_tags=['noscript'], style=True)(root) # lxml >= 2.3.1 webtext = root.text_content() # extract text f = open("C:/schibsted/data/Test.txt", "w"); print f value = (webtext) myString = str(value) f.write(myString) f.close()
Feedliste ist eine Datei mit einem URL zu einem ATOM-Feed pro Zeile. Grenzwert ist die maximale Entfernung von Partyteilnehmern in Sekunden unter der Annahme, dass die Lichtgeschwindigkeit 1 m/s beträgt. """) exit(1) current_events = {} with open(filename, 'r') as feeds: progress = ProgressBar(maxval=len(feeds.readlines())) with open(filename, 'r') as feeds: for line in feeds: feed_url = line.strip() cached_content = feed_cache.get(feed_url) if not cached_content: request = get(feed_url) events = getEvents(StringIO(request.text.encode('utf-8'))) feed_cache.set(feed_url, request.text.encode('utf-8')) else: events = getEvents(StringIO(cached_content)) current_events = updateEvents(current_events, events) progress.update(progress.currval+1) clustering = HierarchicalClustering(current_events.values(), timelikeInterval) clusters = clustering.getlevel(threshold) for cluster in clusters: if len(cluster) > 2: partyPrint(cluster, threshold)
class weather(): def __init__(self): self.cache = FileSystemCache('/tmp/leju_weather_cache_dir') self.weather_cache_key = 'dongcheng_weather' self.aqi_cache_key = 'aqicn_num' self.cache_time = 3600 def get_weather(self): weather = self.__cache_get(self.weather_cache_key) aqi = self.__cache_get(self.aqi_cache_key) return [weather, aqi] def __get_weather(self): url = 'http://beijing.tianqi.com/dongchengqu/' headers = { 'Accept-Language': 'zh-CN,zh;q=0.8', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36', 'Host': 'beijing.tianqi.com', 'Cookie': 'bdshare_firstime=1451003806108; cs_prov=01; cs_city=0101; ccity=101011501; a8205_pages=175; a8205_times=1; Hm_lvt_ab6a683aa97a52202eab5b3a9042a8d2=1451003806; Hm_lpvt_ab6a683aa97a52202eab5b3a9042a8d2=1451004424' } #content = BeautifulSoup(requests.get(url, headers=headers, timeout=(10, 3600)).content) try: content = BeautifulSoup( requests.get(url, headers=headers, timeout=10).content, "html.parser") return content.find(class_='fuzhitxt')['value'] except requests.exceptions.ConnectTimeout as e: #超时信息存入缓存3600秒 #self.__get_weather() return '获取室外数据超时' def __get_usa_aqi(self): url = 'http://aqicn.org/city/beijing/' headers = { 'Accept-Language': 'zh-CN,zh;q=0.8', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36', 'Host': 'aqicn.org', 'Cookie': 'waqi-m-history=[{%22url%22:%22http://aqicn.org/city/beijing/chaoyangaotizhongxin/cn/m/%22%2C%22id%22:%22@450%22%2C%22name%22:%22%E6%9C%9D%E9%98%B3%E5%A5%A5%E4%BD%93%E4%B8%AD%E5%BF%83%22%2C%22time%22:%222016-09-21T02:59:23.642Z%22}%2C{%22url%22:%22http://aqicn.org/city/beijing/us-embassy/cn/m/%22%2C%22id%22:%22@3303%22%2C%22name%22:%22%E5%8C%97%E4%BA%AC%E7%BE%8E%E5%9B%BD%E5%A4%A7%E4%BD%BF%E9%A6%86%22%2C%22time%22:%222016-09-21T02:59:04.177Z%22}]; __uvt=; __atuvc=2%7C42; waqi-w-station={%22url%22:%22http://aqicn.org/city/beijing/%22%2C%22name%22:%22Beijing%22%2C%22idx%22:1451%2C%22time%22:%222016-11-16T02:07:56.247Z%22}; waqi-w-history=[{%22url%22:%22http://aqicn.org/city/beijing/%22%2C%22name%22:%22Beijing%22%2C%22idx%22:1451%2C%22time%22:%222016-11-16T02:07:56.247Z%22}%2C{%22url%22:%22http://aqicn.org/city/beijing/us-embassy/%22%2C%22name%22:%22Beijing%20US%20Embassy%22%2C%22idx%22:3303%2C%22time%22:%222016-11-14T23:39:31.677Z%22}]; __utma=42180789.1319042313.1474426745.1479166782.1479261892.7; __utmb=42180789.2.10.1479261892; __utmc=42180789; __utmz=42180789.1479166782.6.4.utmcsr=google|utmccn=(organic)|utmcmd=organic|utmctr=(not%20provided); uvts=5AqenfALEYF052Ho' } #content = BeautifulSoup(requests.get(url, headers=headers, timeout=(10, 3600)).content) try: content = BeautifulSoup( requests.get(url, headers=headers, timeout=10).content, "html.parser") return "大使馆AQI: %s" % content.select( '#aqiwgtvalue')[0].get_text().encode('utf-8') except requests.exceptions.ConnectTimeout as e: #超时信息存入缓存3600秒 #self.__get_weather() return '获取大使馆AQI数据超时' def __cache_get(self, cache_key): cache_val = self.cache.get(cache_key) if cache_val is None: if cache_key == 'dongcheng_weather': string = self.__get_weather() else: string = self.__get_usa_aqi() self.cache.set(cache_key, string, self.cache_time) return string else: return cache_val
class Cache(object): """Base class for TimeGate caches.""" def __init__(self, path, tolerance, expiration, max_values, run_tests=True, max_file_size=0): """Constructor method. :param path: The path of the cache database file. :param tolerance: The tolerance, in seconds to which a TimeMap is considered young enough to be used as is. :param expiration: How long, in seconds, the cache entries are stored every get will be a CACHE MISS. :param max_values: The maximum number of TimeMaps stored in cache before some are deleted :param run_tests: (Optional) Tests the cache at initialization. :param max_file_size: (Optional) The maximum size (in Bytes) for a TimeMap cache value. When max_file_size=0, there is no limit to a cache value. When max_file_size=X > 0, the cache will not store TimeMap that require more than X Bytes on disk. """ # Parameters Check if tolerance <= 0 or expiration <= 0 or max_values <= 0: raise CacheError("Cannot create cache: all parameters must be > 0") self.tolerance = relativedelta(seconds=tolerance) self.path = path.rstrip('/') self.max_file_size = max(max_file_size, 0) self.CHECK_SIZE = self.max_file_size > 0 self.max_values = max_values self.backend = FileSystemCache(path, threshold=self.max_values, default_timeout=expiration) # Testing cache if run_tests: try: key = '1' val = 1 self.backend.set(key, val) assert (not self.CHECK_SIZE) or self._check_size(key) > 0 assert self.backend.get(key) == val os.remove(self.path + '/' + md5(key).hexdigest()) except Exception as e: raise CacheError("Error testing cache: %s" % e) logging.debug( "Cache created. max_files = %d. Expiration = %d. " "max_file_size = %d" % ( self.max_values, expiration, self.max_file_size)) def get_until(self, uri_r, date): """Returns the TimeMap (memento,datetime)-list for the requested Memento. The TimeMap is guaranteed to span at least until the 'date' parameter, within the tolerance. :param uri_r: The URI-R of the resource as a string. :param date: The target date. It is the accept-datetime for TimeGate requests, and the current date. The cache will return all Mementos prior to this date (within cache.tolerance parameter) :return: [(memento_uri_string, datetime_obj),...] list if it is in cache and if it is within the cache tolerance for *date*, None otherwise. """ # Query the backend for stored cache values to that memento key = uri_r try: val = self.backend.get(key) except Exception as e: logging.error("Exception loading cache content: %s" % e) return None if val: # There is a value in the cache timestamp, timemap = val logging.info("Cached value exists for %s" % uri_r) if date > timestamp + self.tolerance: logging.info("Cache MISS: value outdated for %s" % uri_r) timemap = None else: logging.info("Cache HIT: found value for %s" % uri_r) else: # Cache MISS: No value logging.info("Cache MISS: No cached value for %s" % uri_r) timemap = None return timemap def get_all(self, uri_r): """Request the whole TimeMap for that uri. :param uri_r: the URI-R of the resource. :return: [(memento_uri_string, datetime_obj),...] list if it is in cache and if it is within the cache tolerance, None otherwise. """ return self.get_until(uri_r, timegate_utils.now()) def refresh(self, uri_r, getter, *args, **kwargs): """Refreshes the cached TimeMap for a specific resource and returns it. :param uri_r: The original resource URI to refresh the TimeMap :param getter: The function to call to get a fresh TimeMap :param args: *getter* arguments :param kwargs: *getter* keywords arguments :return: The fresh TimeMap """ timemap = parsed_request(getter, *args, **kwargs) # timemap,new_uri = parsed_request(getter, *args, **kwargs) # if new_uri: # uri_r = new_uri # Creates or refreshes the new timemap for that URI-R self._set(uri_r, timemap) return timemap def _set(self, uri_r, timemap): """Sets / refreshes the cached TimeMap for that URI-R. And appends it with a timestamp of when it is stored. :param uri_r: The URI-R of the original resource. :param timemap: The value to cache. :return: The backend setter method return value. """ logging.info("Updating cache for %s" % uri_r) timestamp = timegate_utils.now() val = (timestamp, timemap) key = uri_r try: self.backend.set(key, val) if self.CHECK_SIZE: self._check_size(uri_r) except Exception as e: logging.error("Error setting cache value: %s" % e) def _check_size(self, key, delete=True): """Check the size that a specific TimeMap value is using on disk. It deletes if it is more than the maximum size. :param key: The TimeMap original resource. :param delete: (Optional) When true, the value is deleted. Else only a warning is raised. :return: The size of the value on disk (0 if it was deleted). """ try: fname = md5(key).hexdigest() # werkzeug key fpath = self.path + '/' + fname size = os.path.getsize(fpath) if size > self.max_file_size and delete: message = ("Cache value too big (%dB, max %dB) " "for the TimeMap of %s") if delete: message += ". Deleting cached value." os.remove(fpath) size = 0 logging.warning(message % (size, self.max_file_size, key)) return size except Exception as e: logging.error( "Exception checking cache value size for TimeMap of %s " "Exception: %s" % (key, e)) return 0
class Cache(object): """Cache module based on werkzeug.contrib.cache. This is a mixed version of NullCache, SimpleCache, FileSystemCache, MemcachedCache, and RedisCache. :param app: Flask app instance. :param config_prefix: Define a prefix for Flask app config. :param kwargs: Extra parameters. You need to configure a type of the cache, and its related configurations. The default ``config_prefix`` is ``AUTHLIB``, so it requires a config of:: AUTHLIB_CACHE_TYPE = 'simple' If ``config_prefix`` is something else, like ``EXAMPLE``, it would be:: EXAMPLE_CACHE_TYPE = 'simple' The available cache types are: * null: It will not cache anything. No configuration. * simple: It caches things in memory. The only configuration is ``threshold``:: AUTHLIB_CACHE_THRESHOLD = 500 * memcache: It caches things in Memcache. Available configurations:: AUTHLIB_CACHE_MEMCACHED_SERVERS = [] AUTHLIB_CACHE_KEY_PREFIX = None * redis: It caches things in Redis. Available configurations:: AUTHLIB_CACHE_REDIS_HOST = 'localhost' AUTHLIB_CACHE_REDIS_PORT = 6379 AUTHLIB_CACHE_REDIS_PASSWORD = None AUTHLIB_CACHE_REDIS_DB = 0 AUTHLIB_CACHE_KEY_PREFIX = None * filesystem: It caches things in local filesystem. Available configurations:: AUTHLIB_CACHE_DIR = '' # required AUTHLIB_CACHE_THRESHOLD = 500 """ def __init__(self, app, config_prefix='AUTHLIB', **kwargs): deprecate(DEPRECATE_MESSAGE, 0.7) self.config_prefix = config_prefix self.config = app.config cache_type = self._config('type') kwargs.update( dict(default_timeout=self._config('DEFAULT_TIMEOUT', 100))) if cache_type == 'null': self.cache = NullCache() elif cache_type == 'simple': kwargs.update(dict(threshold=self._config('threshold', 500))) self.cache = SimpleCache(**kwargs) elif cache_type == 'memcache': kwargs.update( dict( servers=self._config('MEMCACHED_SERVERS'), key_prefix=self._config('KEY_PREFIX', None), )) self.cache = MemcachedCache(**kwargs) elif cache_type == 'redis': kwargs.update( dict( host=self._config('REDIS_HOST', 'localhost'), port=self._config('REDIS_PORT', 6379), password=self._config('REDIS_PASSWORD', None), db=self._config('REDIS_DB', 0), key_prefix=self._config('KEY_PREFIX', None), )) self.cache = RedisCache(**kwargs) elif cache_type == 'filesystem': kwargs.update(dict(threshold=self._config('threshold', 500), )) self.cache = FileSystemCache(self._config('DIR'), **kwargs) else: raise RuntimeError('`%s` is not a valid cache type!' % cache_type) app.extensions[config_prefix.lower() + '_cache'] = self.cache def _config(self, key, default=_missing): key = key.upper() prior = '%s_CACHE_%s' % (self.config_prefix, key) if prior in self.config: return self.config[prior] fallback = 'CACHE_%s' % key if fallback in self.config: return self.config[fallback] if default is _missing: raise RuntimeError('%s is missing.' % prior) return default def get(self, key): """Look up key in the cache and return the value for it. :param key: the key to be looked up. :returns: The value if it exists and is readable, else ``None``. """ return self.cache.get(key) def delete(self, key): """Delete `key` from the cache. :param key: the key to delete. :returns: Whether the key existed and has been deleted. """ return self.cache.delete(key) def get_many(self, *keys): """Returns a list of values for the given keys. For each key a item in the list is created:: foo, bar = cache.get_many("foo", "bar") Has the same error handling as :meth:`get`. :param keys: The function accepts multiple keys as positional arguments. """ return [self.cache.get(k) for k in keys] def get_dict(self, *keys): """Like :meth:`get_many` but return a dict:: d = cache.get_dict("foo", "bar") foo = d["foo"] bar = d["bar"] :param keys: The function accepts multiple keys as positional arguments. """ return self.cache.get_dict(*keys) def set(self, key, value, timeout=None): """Add a new key/value to the cache (overwrites value, if key already exists in the cache). :param key: the key to set :param value: the value for the key :param timeout: the cache timeout for the key in seconds (if not specified, it uses the default timeout). A timeout of 0 idicates that the cache never expires. :returns: ``True`` if key has been updated, ``False`` for backend errors. Pickling errors, however, will raise a subclass of ``pickle.PickleError``. """ return self.cache.set(key, value, timeout) def add(self, key, value, timeout=None): """Works like :meth:`set` but does not overwrite the values of already existing keys. :param key: the key to set :param value: the value for the key :param timeout: the cache timeout for the key in seconds (if not specified, it uses the default timeout). A timeout of 0 idicates that the cache never expires. :returns: Same as :meth:`set`, but also ``False`` for already existing keys. """ return self.cache.add(key, value, timeout) def set_many(self, mapping, timeout=None): """Sets multiple keys and values from a mapping. :param mapping: a mapping with the keys/values to set. :param timeout: the cache timeout for the key in seconds (if not specified, it uses the default timeout). A timeout of 0 idicates that the cache never expires. :returns: Whether all given keys have been set. """ return self.cache.set_many(mapping, timeout) def delete_many(self, *keys): """Deletes multiple keys at once. :param keys: The function accepts multiple keys as positional arguments. :returns: Whether all given keys have been deleted. :rtype: boolean """ return self.cache.delete_many(*keys) def has(self, key): """Checks if a key exists in the cache without returning it. This is a cheap operation that bypasses loading the actual data on the backend. This method is optional and may not be implemented on all caches. :param key: the key to check """ return self.cache.has(key) def clear(self): """Clears the cache. Keep in mind that not all caches support completely clearing the cache. :returns: Whether the cache has been cleared. """ return self.cache.clear() def inc(self, key, delta=1): """Increments the value of a key by `delta`. If the key does not yet exist it is initialized with `delta`. For supporting caches this is an atomic operation. :param key: the key to increment. :param delta: the delta to add. :returns: The new value or ``None`` for backend errors. """ return self.cache.inc(key, delta=delta) def dec(self, key, delta=1): """Decrements the value of a key by `delta`. If the key does not yet exist it is initialized with `-delta`. For supporting caches this is an atomic operation. :param key: the key to increment. :param delta: the delta to subtract. :returns: The new value or `None` for backend errors. """ return self.cache.dec(key, delta=delta)
class FileSystemSessionInterface(SessionInterface): """Uses the :class:`werkzeug.contrib.cache.FileSystemCache` as a session backend. .. versionadded:: 0.2 The `use_signer` parameter was added. :param cache_dir: the directory where session files are stored. :param threshold: the maximum number of items the session stores before it starts deleting some. :param mode: the file mode wanted for the session files, default 0600 :param key_prefix: A prefix that is added to FileSystemCache store keys. :param use_signer: Whether to sign the session id cookie or not. """ session_class = FileSystemSession def __init__(self, cache_dir, threshold, mode, key_prefix, use_signer=False): from werkzeug.contrib.cache import FileSystemCache self.cache = FileSystemCache(cache_dir, threshold=threshold, mode=mode) #FileSystemCache是从上句导入的,threshold代表最多纪录条数 self.key_prefix = key_prefix self.use_signer = use_signer def open_session(self, app, request): sid = request.cookies.get(app.session_cookie_name) if not sid: sid = self._generate_sid() return self.session_class(sid=sid) if self.use_signer: signer = self._get_signer(app) if signer is None: return None try: sid = signer.unsign(sid) except BadSignature: sid = None data = self.cache.get(self.key_prefix + sid) #这是根据session_id获取session值 if data is not None: return self.session_class(data, sid=sid) return self.session_class(sid=sid) def save_session(self, app, session, response): domain = self.get_cookie_domain(app) path = self.get_cookie_path(app) if not session: #验证过了,就是判断session是否为空(空字典) if session.modified: self.cache.delete(self.key_prefix + session.sid) response.delete_cookie(app.session_cookie_name, domain=domain, path=path) return # Modification case. There are upsides and downsides to # emitting a set-cookie header each request. The behavior # is controlled by the :meth:`should_set_cookie` method # which performs a quick check to figure out if the cookie # should be set or not. This is controlled by the # SESSION_REFRESH_EACH_REQUEST config flag as well as # the permanent flag on the session itself. #if not self.should_set_cookie(app, session): # return httponly = self.get_cookie_httponly(app) secure = self.get_cookie_secure(app) expires = self.get_expiration_time(app, session) #data = dict(session) #self.cache.set(self.key_prefix + session.sid, data, # int(app.permanent_session_lifetime.total_seconds())) if self.use_signer: #签名加密 session_id = self._get_signer(app).sign(session.sid) else: session_id = session.sid response.set_cookie(app.session_cookie_name, session_id, expires=expires, httponly=httponly, domain=domain, path=path, secure=secure) def save_session_without_response(self, app, session): #httponly = self.get_cookie_httponly(app) #secure = self.get_cookie_secure(app) #expires = self.get_expiration_time(app, session) data = dict(session) self.cache.set(self.key_prefix + session.sid, data, int(app.permanent_session_lifetime.total_seconds())) def judge_attack(self, app, request): #判断两次间隔的访问是否大于三秒 sid = request.cookies.get(app.session_cookie_name) if not sid: return False current_time = time.time() try: last_time = self.cache.get(self.key_prefix + sid)['time'] #这是根据session_id获取session值 except: return False if current_time-last_time < 3: return True return False
class Cache(object): """ This class is used to control the cache objects. If TESTING is True it will use NullCache. """ def __init__(self, app=None): self.cache = None if app is not None: self.init_app(app) else: self.app = None def init_app(self, app): "This is used to initialize cache with your app object" app.config.setdefault('CACHE_DEFAULT_TIMEOUT', 300) app.config.setdefault('CACHE_THRESHOLD', 500) app.config.setdefault('CACHE_KEY_PREFIX', None) app.config.setdefault('CACHE_MEMCACHED_SERVERS', None) app.config.setdefault('CACHE_DIR', None) app.config.setdefault('CACHE_TYPE', 'NullCache') self.app = app self._set_cache() def _set_cache(self): if self.app.config['TESTING']: self.cache = NullCache() else: if self.app.config['CACHE_TYPE'] == 'Null': self.cache = NullCache() elif self.app.config['CACHE_TYPE'] == 'Simple': self.cache = SimpleCache( threshold=self.app.config['CACHE_THRESHOLD'], default_timeout=self.app.config['CACHE_DEFAULT_TIMEOUT']) elif self.app.config['CACHE_TYPE'] == 'Memcached': self.cache = MemcachedCache( self.app.config['CACHE_MEMCACHED_SERVERS'], default_timeout=self.app.config['CACHE_DEFAULT_TIMEOUT'], key_prefix=self.app.config['CACHE_KEY_PREFIX']) elif self.app.config['CACHE_TYPE'] == 'GAE': self.cache = GAEMemcachedCache( default_timeout=self.app.config['CACHE_DEFAULT_TIMEOUT'], key_prefix=self.app.config['CACHE_KEY_PREFIX']) elif self.app.config['CACHE_TYPE'] == 'FileSystem': self.cache = FileSystemCache( self.app.config['CACHE_DIR'], threshold=self.app.config['CACHE_THRESHOLD'], default_timeout=self.app.config['CACHE_DEFAULT_TIMEOUT']) def get(self, *args, **kwargs): "Proxy function for internal cache object." return self.cache.get(*args, **kwargs) def set(self, *args, **kwargs): "Proxy function for internal cache object." self.cache.set(*args, **kwargs) def add(self, *args, **kwargs): "Proxy function for internal cache object." self.cache.add(*args, **kwargs) def delete(self, *args, **kwargs): "Proxy function for internal cache object." self.cache.delete(*args, **kwargs) def cached(self, timeout=None, key_prefix='view/%s', unless=None): """ Decorator. Use this to cache a function. By default the cache key is `view/request.path`. You are able to use this decorator with any function by changing the `key_prefix`. If the token `%s` is located within the `key_prefix` then it will replace that with `request.path` Example:: # An example view function @cache.cached(timeout=50) def big_foo(): return big_bar_calc() # An example misc function to cache. @cache.cached(key_prefix='MyCachedList') def get_list(): return [random.randrange(0, 1) for i in range(50000)] .. code-block:: pycon >>> my_list = get_list() :param timeout: Default None. If set to an integer, will cache for that amount of time. :param key_prefix: Default 'view/%(request.path)s'. Beginning key to . use for the cache key. :param unless: Default None. Cache will *always* execute the caching facilities unless this callable is true. This will bypass the caching entirely. """ def decorator(f): @wraps(f) def decorated_function(*args, **kwargs): #: Bypass the cache entirely. if callable(unless) and unless() is True: return f(*args, **kwargs) if '%s' in key_prefix: cache_key = key_prefix % request.path else: cache_key = key_prefix rv = self.cache.get(cache_key) if not rv or current_app.debug: rv = f(*args, **kwargs) self.cache.set(cache_key, rv, timeout=timeout) return rv return decorated_function return decorator def memoize(self, timeout=None): """ Use this to cache the result of a function, taking its arguments into account in the cache key. Information on `Memoization <http://en.wikipedia.org/wiki/Memoization>`_. Example:: @cache.memoize(timeout=50) def big_foo(a, b): return a + b + random.randrange(0, 1000) .. code-block:: pycon >>> big_foo(5, 2) 753 >>> big_foo(5, 3) 234 >>> big_foo(5, 2) 753 :param timeout: Default None. If set to an integer, will cache for that amount of time. """ def memoize(f): @wraps(f) def decorated_function(*args, **kwargs): cache_key = (f.__name__, id(f), args, str(kwargs)) rv = self.cache.get(cache_key) if rv is None: rv = f(*args, **kwargs) self.cache.set(cache_key, rv) return rv return decorated_function return memoize
class Cache(object): """Base class for TimeGate caches.""" def __init__(self, path, tolerance, expiration, max_values, run_tests=True, max_file_size=0): """Constructor method. :param path: The path of the cache database file. :param tolerance: The tolerance, in seconds to which a TimeMap is considered young enough to be used as is. :param expiration: How long, in seconds, the cache entries are stored every get will be a CACHE MISS. :param max_values: The maximum number of TimeMaps stored in cache before some are deleted :param run_tests: (Optional) Tests the cache at initialization. :param max_file_size: (Optional) The maximum size (in Bytes) for a TimeMap cache value. When max_file_size=0, there is no limit to a cache value. When max_file_size=X > 0, the cache will not store TimeMap that require more than X Bytes on disk. """ # Parameters Check if tolerance <= 0 or expiration <= 0 or max_values <= 0: raise CacheError('Cannot create cache: all parameters must be > 0') self.tolerance = relativedelta(seconds=tolerance) self.path = path.rstrip('/') self.max_file_size = max(max_file_size, 0) self.CHECK_SIZE = self.max_file_size > 0 self.max_values = max_values self.backend = FileSystemCache(path, threshold=self.max_values, default_timeout=expiration) # Testing cache if run_tests: try: key = b'1' val = 1 self.backend.set(key, val) assert (not self.CHECK_SIZE) or self._check_size(key) > 0 assert self.backend.get(key) == val os.remove(os.path.join(self.path, md5(key).hexdigest())) except Exception as e: raise CacheError('Error testing cache: %s' % e) logging.debug('Cache created. max_files = %d. Expiration = %d. ' 'max_file_size = %d' % (self.max_values, expiration, self.max_file_size)) def get_until(self, uri_r, date): """Returns the TimeMap (memento,datetime)-list for the requested Memento. The TimeMap is guaranteed to span at least until the 'date' parameter, within the tolerance. :param uri_r: The URI-R of the resource as a string. :param date: The target date. It is the accept-datetime for TimeGate requests, and the current date. The cache will return all Mementos prior to this date (within cache.tolerance parameter) :return: [(memento_uri_string, datetime_obj),...] list if it is in cache and if it is within the cache tolerance for *date*, None otherwise. """ # Query the backend for stored cache values to that memento key = uri_r try: val = self.backend.get(key) except Exception as e: logging.error('Exception loading cache content: %s' % e) return None if val: # There is a value in the cache timestamp, timemap = val logging.info('Cached value exists for %s' % uri_r) if date > timestamp + self.tolerance: logging.info('Cache MISS: value outdated for %s' % uri_r) timemap = None else: logging.info('Cache HIT: found value for %s' % uri_r) else: # Cache MISS: No value logging.info('Cache MISS: No cached value for %s' % uri_r) timemap = None return timemap def get_all(self, uri_r): """Request the whole TimeMap for that uri. :param uri_r: the URI-R of the resource. :return: [(memento_uri_string, datetime_obj),...] list if it is in cache and if it is within the cache tolerance, None otherwise. """ until = datetime.utcnow().replace(tzinfo=tzutc()) return self.get_until(uri_r, until) def set(self, uri_r, timemap): """Set the cached TimeMap for that URI-R. It appends it with a timestamp of when it is stored. :param uri_r: The URI-R of the original resource. :param timemap: The value to cache. :return: The backend setter method return value. """ logging.info('Updating cache for %s' % uri_r) timestamp = datetime.utcnow().replace(tzinfo=tzutc()) val = (timestamp, timemap) key = uri_r try: self.backend.set(key, val) if self.CHECK_SIZE: self._check_size(uri_r) except Exception as e: logging.error('Error setting cache value: %s' % e) def _check_size(self, key, delete=True): """Check the size that a specific TimeMap value is using on disk. It deletes if it is more than the maximum size. :param key: The TimeMap original resource. :param delete: (Optional) When true, the value is deleted. Else only a warning is raised. :return: The size of the value on disk (0 if it was deleted). """ try: fname = md5(key).hexdigest() # werkzeug key fpath = self.path + '/' + fname size = os.path.getsize(fpath) if size > self.max_file_size and delete: message = ('Cache value too big (%dB, max %dB) ' 'for the TimeMap of %s') if delete: message += '. Deleting cached value.' os.remove(fpath) size = 0 logging.warning(message % (size, self.max_file_size, key)) return size except Exception as e: logging.error( 'Exception checking cache value size for TimeMap of %s ' 'Exception: %s' % (key, e)) return 0