def rotate(self): ''' Create and rotate file streams. The ``path`` format string determines the filename. For example, ``tweets.{:%Y-%m-%d}.jsonl`` creates a filename based on the current date, e.g. ``tweets.2016-12-31.jsonl``. When rotating, if the new filename is the same as the old, the file continues. If it's a different file, the old file is closed and the new file is created. The rotation frequency is based on the crontab entries in the config, i.e. based on ``hours``, ``days``, ``weeks``, etc. It defaults to every minute. ''' # First, flush the stream to ensure that data is not lost. # Then set up new stream (if required, based on the filename) self.flush() path = self.path.format(datetime.datetime.utcnow()) if path != self.stream_path: if self.stream is not None: self.stream.close() self.stream_path = path folder = os.path.dirname(os.path.abspath(path)) if not os.path.exists(folder): os.makedirs(folder) self.stream = open(path, 'ab') app_log.debug('StreamWriter writing to %s', path) # Schedule the next call after a minute IOLoop.current().call_later(60, self.rotate)
def close(self): try: self.store.close() # h5py.h5f.get_obj_ids often raises a ValueError: Not a file id. # This is presumably if the file handle has been closed. Log & ignore. except ValueError: app_log.debug('HDF5Store("%s").close() error ignored', self.path)
def load_datasets(data, each): ''' Modify data by load datasets and filter by condition. Modify each to apply the each: argument, else return (None, None) ''' for key, val in datasets.items(): # Allow raw data in lists as-is. Treat dicts as {url: ...} data[key] = val if isinstance(val, list) else gramex.data.filter( **val) result = condition(**data) # Avoiding isinstance(result, pd.DataFrame) to avoid importing pandas if type(result).__name__ == 'DataFrame': data['data'] = result elif isinstance(result, dict): data.update(result) elif not result: app_log.debug('alert: %s stopped. condition = %s', name, result) return if 'each' in alert: each_data = data[alert['each']] if isinstance(each_data, dict): each += list(each_data.items()) elif isinstance(each_data, list): each += list(enumerate(each_data)) elif hasattr(each_data, 'iterrows'): each += list(each_data.iterrows()) else: raise ValueError( 'alert: %s: each: data.%s must be dict/list/DF, not %s' % (name, alert['each'], type(each_data))) else: each.append((0, None))
def override_user(self): ''' Use ``X-Gramex-User`` HTTP header to override current user for the session. Use ``X-Gramex-OTP`` HTTP header to set user based on OTP. ``?gramex-otp=`` is a synonym for X-Gramex-OTP. ''' headers = self.request.headers cipher = headers.get('X-Gramex-User') if cipher: import json try: user = json.loads(decode_signed_value( conf.app.settings['cookie_secret'], 'user', cipher, max_age_days=self._session_expiry)) except Exception: reason = '%s: invalid X-Gramex-User: %s' % (self.name, cipher) raise HTTPError(BAD_REQUEST, reason=reason) else: app_log.debug('%s: Overriding user to %r', self.name, user) self.session['user'] = user return otp = headers.get('X-Gramex-OTP') or self.get_argument('gramex-otp', None) if otp: otp_data = self._session_store.load('otp:' + otp, None) if not isinstance(otp_data, dict) or '_t' not in otp_data or 'user' not in otp_data: reason = '%s: invalid X-Gramex-OTP: %s' % (self.name, otp) raise HTTPError(BAD_REQUEST, reason=reason) elif otp_data['_t'] < time.time(): reason = '%s: expired X-Gramex-OTP: %s' % (self.name, otp) raise HTTPError(BAD_REQUEST, reason=reason) self._session_store.dump('otp:' + otp, None) self.session['user'] = otp_data['user']
def call_later(self): '''Schedule next run automatically. Do NOT call twice: creates two callbacks''' delay = self.cron.next(default_utc=self.utc) if hasattr(self, 'cron') else None self._call_later(delay) if delay is not None: app_log.debug('Scheduling %s after %.0fs', self.name, delay) else: app_log.debug('No further schedule for %s', self.name)
def call_later(self): '''Schedule next run automatically. Clears any previous scheduled runs''' delay = self.cron.next( default_utc=self.utc) if hasattr(self, 'cron') else None self._call_later(delay) if delay is not None: app_log.debug('Scheduling %s after %.0fs', self.name, delay) else: app_log.debug('No further schedule for %s', self.name)
def call_later(self): '''Schedule next run. Do NOT call twice: creates two callbacks''' delay = self.cron.next( default_utc=False) if hasattr(self, 'cron') else None if delay is not None: app_log.debug('Scheduling %s after %.0fs', self.name, delay) self.callback = self.ioloop.call_later(delay, self.run) else: app_log.debug('No further schedule for %s', self.name)
def flush(self): super(JSONStore, self).flush() if self.changed: app_log.debug('Flushing %s', self.path) store = self._read_json() store.update(self.update) self._write_json(store) self.store = store self.update = {} self.changed = False
def query(sql, engine, state=None, **kwargs): ''' Read SQL query or database table into a DataFrame. Caches results unless state has changed. It always re-runs the query unless state is specified. The state can be specified in 3 ways: 1. A string. This must be as a lightweight SQL query. If the result changes, the original SQL query is re-run. 2. A function. This is called to determine the state of the database. 3. A list of tables. This list of ["db.table"] names specifies which tables to watch for. This is currently experimental. 4. ``None``: the default. The query is always re-run and not cached. ''' # Pass _reload_status = True for testing purposes. This returns a tuple: # (result, reloaded) instead of just the result. _reload_status = kwargs.pop('_reload_status', False) reloaded = False _cache = kwargs.pop('_cache', _QUERY_CACHE) store_cache = True key = (str(sql), json.dumps(kwargs.get('params', {}), sort_keys=True), engine.url) cached = _cache.get(key, {}) current_status = cached.get('status', None) if cached else None if isinstance(state, (list, tuple)): status = _table_status(engine, tuple(state)) elif isinstance(state, six.string_types): status = pd.read_sql(state, engine).to_dict(orient='list') elif callable(state): status = state() elif state is None: # Create a new status every time, so that the query is always re-run status = object() store_cache = False else: raise TypeError( 'gramex.cache.query(state=) must be a table list, query or fn, not %s', repr(state)) if status == current_status: result = _cache[key]['data'] else: app_log.debug( 'gramex.cache.query: %s. engine: %s. state: %s. kwargs: %s', sql, engine, state, kwargs) result = pd.read_sql(sql, engine, **kwargs) if store_cache: _cache[key] = { 'data': result, 'status': status, } reloaded = True return (result, reloaded) if _reload_status else result
def purge(self): ''' Load all keys into self.store. Delete what's required. Save. ''' self.flush() changed = False for key in self.purge_keys(self.store): del self.store[key] changed = True if changed: app_log.debug('Purging %s', self.path) self._write_json(self.store)
def get(self, *path_args, **path_kwargs): if '_params' in self.args: params = { 'opts': self.config_store.load('transform'), 'params': self.config_store.load('model') } self.write(json.dumps(params, indent=2)) elif '_cache' in self.args: self.write(self.load_data().to_json(orient='records')) else: self._check_model_path() if '_download' in self.args: self.set_header('Content-Type', 'application/octet-strem') self.set_header( 'Content-Disposition', f'attachment; filename={op.basename(self.model_path)}') with open(self.model_path, 'rb') as fout: self.write(fout.read()) elif '_model' in self.args: self.write(json.dumps(self.get_opt('params'), indent=2)) else: try: data_args = { k: v for k, v in self.args.items() if not k.startswith('_') } data_args = { k: [v] if not isinstance(v, list) else v for k, v in data_args.items() } data = pd.DataFrame.from_dict(data_args) except Exception as err: app_log.debug(err.msg) data = [] if len(data) > 0: self.set_header('Content-Type', 'application/json') data = data.drop([self.get_opt('target_col')], axis=1, errors='ignore') prediction = yield gramex.service.threadpool.submit( self._predict, data) self.write( json.dumps(prediction, indent=2, cls=CustomJSONEncoder)) else: self.set_header('Content-Type', 'text/html') self.render(self.template, handler=self, data=self.load_data()) super(MLHandler, self).get(*path_args, **path_kwargs)
def allowed(self, path): ''' A path is allowed if it matches any allow:, or matches no ignore:. Override this method for a custom implementation. ''' for ignore in self.ignore: if _match(path, ignore): # Check allows only if an ignore: is matched. # If any allow: is matched, allow it for allow in self.allow: if _match(path, allow): return True app_log.debug('%s: Disallow "%s". It matches "%s"', self.name, path, ignore) return False return True
def purge(self): ''' Load all keys into self.store. Delete what's required. Save. ''' self.flush() changed = False items = { key: json.loads(val[()], object_pairs_hook=AttrDict, cls=CustomJSONDecoder) for key, val in self.store.items() } for key in self.purge_keys(items): del self.store[key] changed = True if changed: app_log.debug('Purging %s', self.path) self.store.flush()
def gramex_update(url): '''If a newer version of gramex is available, logs a warning''' import time import requests import platform from . import services if not services.info.eventlog: return app_log.error( 'eventlog: service is not running. So Gramex update is disabled') query = services.info.eventlog.query update = query( 'SELECT * FROM events WHERE event="update" ORDER BY time DESC LIMIT 1') delay = 24 * 60 * 60 # Wait for one day before updates if update and time.time() < update[0]['time'] + delay: return app_log.debug('Gramex update ran recently. Deferring check.') meta = { 'dir': variables.get('GRAMEXDATA'), 'uname': platform.uname(), } if update: events = query('SELECT * FROM events WHERE time > ? ORDER BY time', (update[0]['time'], )) else: events = query('SELECT * FROM events') logs = [dict(log, **meta) for log in events] r = requests.post(url, data=json.dumps(logs)) r.raise_for_status() update = r.json() version = update['version'] if version > __version__: app_log.error( 'Gramex %s is available. See https://learn.gramener.com/guide/', version) elif version < __version__: app_log.warning( 'Gramex update: your version %s is ahead of the stable %s', __version__, version) else: app_log.debug('Gramex version %s is up to date', __version__) services.info.eventlog.add('update', update) return {'logs': logs, 'response': update}
def ensure_single_session(handler): ''' Ensure that user in this session is logged out of all other sessions. ''' user_id = handler.session.get('user', {}).get('id') if user_id is not None: for key in handler._session_store.keys(): # Ignore current session or OTP sessions if key == handler.session.get('id'): continue if isinstance(key, six.text_type) and key.startswith('otp:'): continue if isinstance(key, six.binary_type) and key.startswith(b'otp:'): continue # Remove user from all other sessions other_session = handler._session_store.load(key) if other_session is not None: other_user = other_session.get('user') if other_user is not None and other_user.get('id'): other_session.pop('user') handler._session_store.dump(key, other_session) app_log.debug('dropped user %s from session %s', user_id, other_session['id'])
def _get(self, *args, **kwargs): self.include_body = kwargs.pop('include_body', True) path = urljoin('/', args[0] if len(args) else '').lstrip('/') if isinstance(self.root, list): # Concatenate multiple files and serve them one after another for path_item in self.root: yield self._get_path(path_item, multipart=True) elif isinstance(self.root, dict): # Render path for the the first matching regex for pattern, filestr in self.root.items(): match = pattern.match(path) if match: q = defaultdict(text_type, **self.default) q.update( {k: v[0] for k, v in self.args.items() if len(v) > 0}) q.update(match.groupdict()) p = Path(filestr.format(*match.groups(), **q)).absolute() app_log.debug('%s: %s renders %s', self.name, self.request.path, p) yield self._get_path(p) break else: raise HTTPError(NOT_FOUND, '%s matches no path key', self.request.path) elif not args: # No group has been specified in the pattern. So just serve root yield self._get_path(self.root) else: # Eliminate parent directory references like `../` in the URL path = urljoin('/', path)[1:] if self.pattern: yield self._get_path( Path(self.pattern.replace('*', path)).absolute()) else: yield self._get_path(self.root / path if self.root.is_dir() else self.root)
def run_alert(callback=None): ''' Runs the configured alert. If a callback is specified, calls the callback with all email arguments. Else sends the email. ''' app_log.info('alert: %s running', name) data = {'config': alert} for key, dataset in datasets.items(): # Allow raw data in lists as-is. Treat dicts as {url: ...} data[key] = dataset if isinstance( dataset, list) else gramex.data.filter(**dataset) result = condition(**data) # Avoiding isinstance(result, pd.DataFrame) to avoid importing pandas if type(result).__name__ == 'DataFrame': data['data'] = result elif isinstance(result, dict): data.update(result) elif not result: app_log.debug('alert: %s stopped. condition = %s', name, result) return each = [(None, None)] if 'each' in alert: each_data = data[alert['each']] if isinstance(each_data, dict): each = list(each_data.items()) elif isinstance(each_data, list): each = list(enumerate(each_data)) elif hasattr(each_data, 'iterrows'): each = list(each_data.iterrows()) else: app_log.error( 'alert: %s: each: requires data.%s to be a dict/list/DataFrame', name, alert['each']) return kwargslist = [] for index, row in each: data['index'], data['row'], data['config'] = index, row, alert # Generate email content kwargs = {} kwargslist.append(kwargs) for key in ['bodyfile', 'htmlfile', 'markdownfile']: target = key.replace('file', '') if key in templates and target not in templates: path = templates[key].generate(**data).decode('utf-8') tmpl = gramex.cache.open(path, 'template') kwargs[target] = tmpl.generate(**data).decode('utf-8') try: for key in [ 'to', 'cc', 'bcc', 'from', 'subject', 'body', 'html', 'markdown' ]: if key in templates: tmpl = templates[key] if isinstance(tmpl, list): kwargs[key] = [] for subtmpl in tmpl: kwargs[key].append( subtmpl.generate(**data).decode('utf-8')) else: kwargs[key] = tmpl.generate(**data).decode('utf-8') except Exception: # If any template raises an exception, log it and continue with next email app_log.exception('alert: %s(#%s).%s: Template exception', name, index, key) continue headers = {} # user: {id: ...} creates an X-Gramex-User header to mimic the user if 'user' in alert: user = json.dumps(alert['user'], ensure_ascii=True, separators=(',', ':')) headers['X-Gramex-User'] = tornado.web.create_signed_value( info.app.settings['cookie_secret'], 'user', user) if 'markdown' in kwargs: kwargs['html'] = _markdown_convert(kwargs.pop('markdown')) if 'images' in templates: kwargs['images'] = {} for cid, val in templates['images'].items(): urlpath = val.generate(**data).decode('utf-8') urldata = urlfetch(urlpath, info=True, headers=headers) if urldata['content_type'].startswith('image/'): kwargs['images'][cid] = urldata['name'] else: with io.open(urldata['name'], 'rb') as temp_file: bytestoread = 80 first_line = temp_file.read(bytestoread) app_log.error( 'alert: %s: %s: %d (%s) not an image: %s\n%r', name, cid, urldata['r'].status_code, urldata['content_type'], urlpath, first_line) if 'attachments' in templates: kwargs['attachments'] = [ urlfetch(attachment.generate(**data).decode('utf-8'), headers=headers) for attachment in templates['attachments'] ] if callable(callback): return callback(**kwargs) # Email recipient. TODO: run this in a queue. (Anand) mailer.mail(**kwargs) # Log the event event = { 'alert': name, 'service': service, 'from': mailer.email or '', 'to': '', 'cc': '', 'bcc': '', 'subject': '', 'datetime': datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ") } event.update({k: v for k, v in kwargs.items() if k in event}) event['attachments'] = ', '.join(kwargs.get('attachments', [])) alert_logger.info(event) return kwargslist
def url(conf): '''Set up the tornado web app URL handlers''' handlers = [] # Sort the handlers in descending order of priority specs = sorted(conf.items(), key=_sort_url_patterns, reverse=True) for name, spec in specs: _key = cache_key('url', spec) if _key in _cache: handlers.append(_cache[_key]) continue if 'handler' not in spec: app_log.error('url: %s: no handler specified') continue app_log.debug('url: %s (%s) %s', name, spec.handler, spec.get('priority', '')) urlspec = AttrDict(spec) handler = locate(spec.handler, modules=['gramex.handlers']) if handler is None: app_log.error('url: %s: ignoring missing handler %s', name, spec.handler) continue # Create a subclass of the handler with additional attributes. class_vars = {'name': name, 'conf': spec} # If there's a cache section, get the cache method for use by BaseHandler if 'cache' in urlspec: class_vars['cache'] = _cache_generator(urlspec['cache'], name=name) else: class_vars['cache'] = None # PY27 type() requires the class name to be a string, not unicode urlspec.handler = type(str(spec.handler), (handler, ), class_vars) # If there's a setup method, call it to initialize the class kwargs = urlspec.get('kwargs', {}) if hasattr(handler, 'setup'): try: urlspec.handler.setup_default_kwargs() urlspec.handler.setup(**kwargs) except Exception: app_log.exception('url: %s: setup exception in handler %s', name, spec.handler) # Since we can't set up the handler, all requests must report the error instead class_vars['exc_info'] = sys.exc_info() error_handler = locate('SetupFailedHandler', modules=['gramex.handlers']) urlspec.handler = type(str(spec.handler), (error_handler, ), class_vars) urlspec.handler.setup(**kwargs) try: handler_entry = tornado.web.URLSpec( name=name, pattern=_url_normalize(urlspec.pattern), handler=urlspec.handler, kwargs=kwargs, ) except re.error: app_log.error('url: %s: pattern: %s is invalid', name, urlspec.pattern) continue except Exception: app_log.exception('url: %s: invalid', name) continue _cache[_key] = handler_entry handlers.append(handler_entry) info.app.clear_handlers() info.app.add_handlers('.*$', handlers)
def init(force_reload=False, **kwargs): ''' Update Gramex configurations and start / restart the instance. ``gramex.init()`` can be called any time to refresh configuration files. ``gramex.init(key=val)`` adds ``val`` as a configuration layer named ``key``. If ``val`` is a Path, it is converted into a PathConfig. (If it is Path directory, use ``gramex.yaml``.) Services are re-initialised if their configurations have changed. Service callbacks are always re-run (even if the configuration hasn't changed.) ''' try: setup_secrets(paths['base'] / '.secrets.yaml') except Exception as e: app_log.exception(e) # Reset variables variables.clear() variables.update(setup_variables()) # Initialise configuration layers with provided configurations # AttrDicts are updated as-is. Paths are converted to PathConfig paths.update(kwargs) for key, val in paths.items(): if isinstance(val, Path): if val.is_dir(): val = val / 'gramex.yaml' val = PathConfig(val) config_layers[key] = val # Locate all config files config_files = set() for path_config in config_layers.values(): if hasattr(path_config, '__info__'): for pathinfo in path_config.__info__.imports: config_files.add(pathinfo.path) config_files = list(config_files) # Add config file folders to sys.path sys.path[:] = _sys_path + [ str(path.absolute().parent) for path in config_files ] from . import services globals( )['service'] = services.info # gramex.service = gramex.services.info # Override final configurations appconfig.clear() appconfig.update(+config_layers) # --settings.debug => log.root.level = True if appconfig.app.get('settings', {}).get('debug', False): appconfig.log.root.level = logging.DEBUG # Set up a watch on config files (including imported files) if appconfig.app.get('watch', True): from services import watcher watcher.watch('gramex-reconfig', paths=config_files, on_modified=lambda event: init()) # Run all valid services. (The "+" before config_chain merges the chain) # Services may return callbacks to be run at the end for key, val in appconfig.items(): if key not in conf or conf[key] != val or force_reload: if hasattr(services, key): app_log.debug('Loading service: %s', key) conf[key] = prune_keys(val, {'comment'}) callback = getattr(services, key)(conf[key]) if callable(callback): callbacks[key] = callback else: app_log.error('No service named %s', key) # Run the callbacks. Specifically, the app service starts the Tornado ioloop for key in (+config_layers).keys(): if key in callbacks: app_log.debug('Running callback: %s', key) callbacks[key]()
def stop(self): '''Suspend task, clearing any pending callbacks''' if self.callback is not None: app_log.debug('Stopping %s', self.name) self.ioloop.remove_timeout(self.callback) self.delay, self.callback = None, None
def get(self, *path_args, **path_kwargs): if '_download' in self.args: self.set_header('Content-Type', 'application/octet-strem') self.set_header( 'Content-Disposition', f'attachment; filename={op.basename(self.model_path)}') self.write(open(self.model_path, 'rb').read()) elif '_model' in self.args: self._check_model_path() if isinstance(self.model, Pipeline): for k, v in self.model.named_steps.items(): if k != 'transform': break params = v.get_params() elif isinstance(self.model, BaseEstimator): params = self.model.get_params() elif self.model is None: params = self.get_opt('params') self.write(json.dumps(params, indent=4)) elif '_cache' in self.args: if '_opts' in self.args: self.write(json.dumps(self.config_store.load('transform'))) self.finish() elif '_params' in self.args: self.write(json.dumps(self.config_store.load('model'))) self.finish() else: data = self.load_data() if len(data): self.write(data.to_json(orient='records')) else: self.write(json.dumps([])) else: self._check_model_path() self.set_header('Content-Type', 'application/json') action = self.args.pop('_action', [''])[0] try: data = pd.DataFrame.from_dict({ k: v for k, v in self.args.items() if not k.startswith('_') }) if len(data) > 0 and not action: action = 'predict' except Exception as err: app_log.debug(err.msg) data = self.load_data() if len(data) == 0: data = self.load_data() target_col = self.get_opt('target_col') if target_col in data: target = data.pop(target_col) else: target = None if action in ('predict', 'score'): prediction = yield gramex.service.threadpool.submit( # self._predict, data, transform=False) self._predict, data) if action == 'predict': self.write(_serialize_prediction(prediction)) elif action == 'score': score = accuracy_score(target.astype(prediction.dtype), prediction) self.write(json.dumps({'score': score}, indent=4)) else: if isinstance(self.template, str) and op.isfile(self.template): self.set_header('Content-Type', 'text/html') # return Template(self.template) self.render(self.template, handler=self, data=self.load_data()) elif self.template: self.set_header('Content-Type', 'text/html') self.render(DEFAULT_TEMPLATE, handler=self, data=self.load_data()) else: self.set_header('Content-Type', 'application/json') self.write(json.dumps([])) super(MLHandler, self).get(*path_args, **path_kwargs)
def get(self, *path_args): ws = self.request.headers.get('Upgrade', '') == 'websocket' if ws: return WebSocketHandler.get(self) # Construct HTTP headers headers = HTTPHeaders(self.request.headers if self.request_headers.get('*', None) else {}) for key, val in self.request_headers.items(): if key == '*': continue if val is True: if key in self.request.headers: headers[key] = self.request.headers[key] else: headers[key] = str(val).format(handler=self) # Update query parameters # TODO: use a named capture for path_args? This is not the right method parts = urlsplit(self.url.format(*path_args)) params = { key: ([str(v).format(handler=self) for v in val] if isinstance(val, list) else str(val).format(handler=self)) for key, val in self.default.items() } params.update(parse_qs(parts.query)) params.update(self.args) query = urlencode(params, doseq=True) url = urlunsplit((parts.scheme, parts.netloc, parts.path, query, parts.fragment)) request = HTTPRequest( url=url, method=self.request.method, headers=headers, body=None if self.request.method == 'GET' else self.request.body, connect_timeout=self.connect_timeout, request_timeout=self.request_timeout, ) if 'prepare' in self.info: self.info['prepare'](handler=self, request=request, response=None) app_log.debug('%s: proxying %s', self.name, url) response = yield self.browser.fetch(request, raise_error=False) if response.code in (MOVED_PERMANENTLY, FOUND): location = response.headers.get('Location', '') # TODO; check if Location: header MATCHES the url, not startswith # url: example.org/?x should match Location: example.org/?a=1&x # even though location does not start with url. if location.startswith(url): response.headers['Location'] = location.replace('url', self.conf.pattern) if 'modify' in self.info: self.info['modify'](handler=self, request=request, response=response) # Pass on the headers as-is, but override with the handler HTTP headers self.set_header('X-Proxy-Url', response.effective_url) for header_name, header_value in response.headers.items(): if header_name not in {'Connection', 'Transfer-Encoding', 'Content-Length'}: self.set_header(header_name, header_value) # Proxies may send the wrong Content-Length. Correct it, else Tornado raises an error if response.body is not None: self.set_header('Content-Length', len(response.body)) for header_name, header_value in self.headers.items(): self.set_header(header_name, header_value) # Pass on HTTP status code and response body as-is self.set_status(response.code, reason=response.reason) if response.body is not None: self.write(response.body)
def flush(self): super(HDF5Store, self).flush() if self.changed: app_log.debug('Flushing %s', self.path) self.store.flush() self.changed = False
def purge(self): app_log.debug('Purging %s', self.path) super(SQLiteStore, self).purge()
def purge(self): app_log.debug('Purging %s', self.store) # TODO: optimize item retrieval items = {key: self.load(key, None) for key in self.store.keys()} for key in self.purge_keys(items): self.store.delete(key)