def __init__(self, user_lookup: Set[UserLookup], cache_timer: Callable[[], int] = None, cache_max_size: int = 10000, cache_user_expiration: int = 300, cache_is_valid_expiration: int = 3600) -> None: """ Create the handler set. The cache_* parameters are mainly provided for testing purposes. :param user_lookup: the set of user lookup instances to query when looking up user names from tokens or checking that a provided user name is valid. :param cache_timer: the timer used for cache expiration. Defaults to time.time. :param cache_max_size: the maximum size of the token -> user and username -> validity caches. :param cache_user_expiration: the default expiration time for the token -> user cache in seconds. This time can be overridden by a user handler on a per token basis. :param cache_is_valid_expiration: the default expiration time for the username -> validity cache. This time can be overridden by a user handler on a per user basis. """ no_Nones_in_iterable(user_lookup, 'user_lookup') self._lookup = {l.get_authsource_id(): l for l in user_lookup} self._cache_timer = time.time if not cache_timer else cache_timer self._user_cache = LRUCache(timer=self._cache_timer, maxsize=cache_max_size, ttl=cache_user_expiration) self._valid_cache = LRUCache(timer=self._cache_timer, maxsize=cache_max_size, ttl=cache_is_valid_expiration)
class RealCache: """ create a realCache using the 'cacheout' lib """ def __init__(self, cacheSize, policy='LFU'): if policy == 'LFU': self.cache = LFUCache(maxsize=cacheSize, ttl=0, timer=time.time, default=None) elif policy == 'LRU': self.cache = LRUCache(maxsize=cacheSize, ttl=0, timer=time.time, default=None) self.hit, self.miss = 0, 0 def handle_request(self, req): if self.cache._has(req): self.hit += 1 else: self.cache._add(req, 1) self.miss += 1 def hit_rate(self, trace): """ return hit rate for trace """ for req in trace: self.handle_request(req) return self.hit / (self.hit + self.miss + 0.01) # hit rate
def __init__(self, auth_url: str, auth_token: str, full_admin_roles: List[str] = None, read_admin_roles: List[str] = None, cache_max_size: int = 10000, cache_admin_expiration: int = 300, cache_valid_expiration: int = 3600): ''' Create the client. :param auth_url: The root url of the authentication service. :param auth_token: A valid token for the authentication service. :raises InvalidTokenError: if the token is invalid :param cache_max_size: the maximum size of the token -> admin and username -> validity caches. :param cache_admin_expiration: the default expiration time for the token -> admin cache in seconds. This time can be overridden by a user handler on a per token basis. :param cache_valid_expiration: the default expiration time for the username -> validity cache. This time can be overridden by a user handler on a per user basis. ''' self._url = _not_falsy(auth_url, 'auth_url') if not self._url.endswith('/'): self._url += '/' self._user_url = self._url + 'api/V2/users?list=' self._me_url = self._url + 'api/V2/me' self._token = _not_falsy(auth_token, 'auth_token') self._full_roles = set(full_admin_roles) if full_admin_roles else set() self._read_roles = set(read_admin_roles) if read_admin_roles else set() self._cache_timer = time.time self._admin_cache = LRUCache(timer=self._cache_timer, maxsize=cache_max_size, ttl=cache_admin_expiration) self._valid_cache = LRUCache(timer=self._cache_timer, maxsize=cache_max_size, ttl=cache_valid_expiration) # Auth 0.4.1 needs to be deployed before this will work # r = requests.get(self._url, headers={'Accept': 'application/json'}) # self._check_error(r) # if r.json().get('servicename') != 'Authentication Service': # raise IOError(f'The service at {self._url} does not appear to be the KBase ' + # 'Authentication Service') # could use the server time to adjust for clock skew, probably not worth the trouble # check token is valid r = requests.get(self._user_url, headers={ 'Accept': 'application/json', 'authorization': self._token }) self._check_error(r)
def prepare_bow(self, overwrite=False): """ Run the CountVectorizer with parameters set in the constructor. The term-doc matrix is stored in self.vector_corpus_representations['bow'], and self.vector_split_representations :param overwrite: if a BoW was already prepared, should we overwrite? :return: nothing """ self.logger.info('Preparing the BoW') if 'bow' in self.vector_representations and overwrite is False: self.logger.info('BoW already there, no overwrite') return # Prepare stemming with cache stemmer = PorterStemmer() cache = LRUCache(maxsize=2**16) @cache.memoize() def stem_word(word): return stemmer.stem(word) # Our analyzer will proceed with tokenizing by regexp (all groups of more than 2 alpha characters) # and then will stem the words analyzer = CountVectorizer( analyzer='word', token_pattern='[A-Za-z]{2,}').build_analyzer() def stemming_analyzer(doc): return (stem_word(w) for w in analyzer(doc)) bow = CountVectorizer(stop_words='english', lowercase=True, analyzer=stemming_analyzer, min_df=self.parameters['min_df'], max_df=self.parameters['max_df'], ngram_range=self.parameters['ngram_range'], max_features=self.parameters['max_features']) vecs = bow.fit_transform(raw_documents=self.data.corpus) self.logger.info('BoW with {} documents, {} terms'.format( vecs.shape[0], vecs.shape[1])) self.vector_representations['bow'] = vecs
class Task(Object): _running_cache: LRUCache = LRUCache( maxsize=constants.RUNNING_CACHE_MAXSIZE, ttl=constants.RUNNING_CACHE_TTL) encode_attr_value: Optional[Callable[..., ByteString]] = \ cast(Callable[..., ByteString], staticmethod(cloudpickle.dumps)) decode_attr_value: Optional[Callable[..., Any]] = \ cast(Callable[..., Any], staticmethod(cloudpickle.loads)) def __init__(self, path: Optional[str] = None, /, *, asyncable: Optional[Object] = None, args: Optional[Tuple[Any, ...]] = None, kwargs: Optional[Dict[str, Any]] = None, metadata: Optional[Dict[str, Any]] = None, site_uuid: Optional[str] = None, create: bool = True, bind: bool = True):
def __init__(self, cacheSize, policy='LFU'): if policy == 'LFU': self.cache = LFUCache(maxsize=cacheSize, ttl=0, timer=time.time, default=None) elif policy == 'LRU': self.cache = LRUCache(maxsize=cacheSize, ttl=0, timer=time.time, default=None) self.hit, self.miss = 0, 0
from typing_extensions import TypedDict import pint from pint import UnitRegistry as _UnitRegistry from pint import DimensionalityError as _DimensionalityError from pint import UndefinedUnitError as _UndefinedUnitError from pint import DefinitionSyntaxError as _DefinitionSyntaxError from SampleService.core.core_types import PrimitiveType from installed_clients.OntologyAPIClient import OntologyAPI import time from cacheout.lru import LRUCache # type: ignore _CACHE_MAX_SIZE = 10000 _CACHE_EXPIRATION = 3600 _TOKEN_SEP = '::' _ontology_terms_cache = LRUCache(timer=time.time, maxsize=_CACHE_MAX_SIZE, ttl=_CACHE_EXPIRATION) _ontology_ancestors_cache = LRUCache(timer=time.time, maxsize=_CACHE_MAX_SIZE, ttl=_CACHE_EXPIRATION) srv_wizard_url = None if 'KB_DEPLOYMENT_CONFIG' in os.environ: with open(os.environ['KB_DEPLOYMENT_CONFIG']) as f: for line in f: if line.startswith('srv-wiz-url'): srv_wizard_url = line.split('=')[1].strip() def _check_unknown_keys(d, expected): if type(d) != dict:
def __init__(self): self.caches = defaultdict(lambda: LRUCache(maxsize=512, ttl=0, default=False))
class UserLookupSet: """ A container for a number of user handlers that provides caching for said handlers. """ def __init__(self, user_lookup: Set[UserLookup], cache_timer: Callable[[], int] = None, cache_max_size: int = 10000, cache_user_expiration: int = 300, cache_is_valid_expiration: int = 3600) -> None: """ Create the handler set. The cache_* parameters are mainly provided for testing purposes. :param user_lookup: the set of user lookup instances to query when looking up user names from tokens or checking that a provided user name is valid. :param cache_timer: the timer used for cache expiration. Defaults to time.time. :param cache_max_size: the maximum size of the token -> user and username -> validity caches. :param cache_user_expiration: the default expiration time for the token -> user cache in seconds. This time can be overridden by a user handler on a per token basis. :param cache_is_valid_expiration: the default expiration time for the username -> validity cache. This time can be overridden by a user handler on a per user basis. """ no_Nones_in_iterable(user_lookup, 'user_lookup') self._lookup = {l.get_authsource_id(): l for l in user_lookup} self._cache_timer = time.time if not cache_timer else cache_timer self._user_cache = LRUCache(timer=self._cache_timer, maxsize=cache_max_size, ttl=cache_user_expiration) self._valid_cache = LRUCache(timer=self._cache_timer, maxsize=cache_max_size, ttl=cache_is_valid_expiration) def _check_authsource_id(self, authsource_id: AuthsourceID) -> None: """ :raises NoSuchAuthsourceError: if there's no handler for the provided authsource. """ not_none(authsource_id, 'authsource_id') if authsource_id not in self._lookup: raise NoSuchAuthsourceError(authsource_id.id) def _calc_ttl(self, epoch, rel): if not rel and not epoch: return None if not rel: return epoch - self._cache_timer() if not epoch: return rel return min(epoch - self._cache_timer(), rel) def get_user(self, authsource_id: AuthsourceID, token: Token) -> Tuple[User, bool]: """ Get a user given the user's token. :param authsource_id: the authsource where the user resides. :param token: the users's token. :raises TypeError: if any of the arguments are None. :raises NoSuchAuthsourceError: if there's no handler for the provided authsource. :raises InvalidTokenError: if the token is invalid. :returns: a tuple of the user and a boolean indicating whether the authsource claims the user is a mapping service system admin. """ not_none(token, 'token') self._check_authsource_id(authsource_id) # None default causes a key error cacheres = self._user_cache.get((authsource_id, token), default=False) if cacheres: return cacheres user, admin, epoch, rel = self._lookup[authsource_id].get_user(token) self._user_cache.set((authsource_id, token), (user, admin), ttl=self._calc_ttl(epoch, rel)) return (user, admin) def is_valid_user(self, user: User) -> bool: """ Check whether a given user exists. :param user: the user to check. :raises NoSuchAuthsourceError: if there's no handler for the user's authsource. """ not_none(user, 'user') self._check_authsource_id(user.authsource_id) # None default causes a key error exists = self._valid_cache.get(user, default=False) if not exists: exists, epoch, rel = self._lookup[ user.authsource_id].is_valid_user(user.username) if exists: self._valid_cache.set(user, True, ttl=self._calc_ttl(epoch, rel)) return exists
class KBaseUserLookup: ''' A client for contacting the KBase authentication server to verify user names. ''' def __init__(self, auth_url: str, auth_token: str, full_admin_roles: List[str] = None, read_admin_roles: List[str] = None, cache_max_size: int = 10000, cache_admin_expiration: int = 300, cache_valid_expiration: int = 3600): ''' Create the client. :param auth_url: The root url of the authentication service. :param auth_token: A valid token for the authentication service. :raises InvalidTokenError: if the token is invalid :param cache_max_size: the maximum size of the token -> admin and username -> validity caches. :param cache_admin_expiration: the default expiration time for the token -> admin cache in seconds. This time can be overridden by a user handler on a per token basis. :param cache_valid_expiration: the default expiration time for the username -> validity cache. This time can be overridden by a user handler on a per user basis. ''' self._url = _not_falsy(auth_url, 'auth_url') if not self._url.endswith('/'): self._url += '/' self._user_url = self._url + 'api/V2/users?list=' self._me_url = self._url + 'api/V2/me' self._token = _not_falsy(auth_token, 'auth_token') self._full_roles = set(full_admin_roles) if full_admin_roles else set() self._read_roles = set(read_admin_roles) if read_admin_roles else set() self._cache_timer = time.time self._admin_cache = LRUCache(timer=self._cache_timer, maxsize=cache_max_size, ttl=cache_admin_expiration) self._valid_cache = LRUCache(timer=self._cache_timer, maxsize=cache_max_size, ttl=cache_valid_expiration) # Auth 0.4.1 needs to be deployed before this will work # r = requests.get(self._url, headers={'Accept': 'application/json'}) # self._check_error(r) # if r.json().get('servicename') != 'Authentication Service': # raise IOError(f'The service at {self._url} does not appear to be the KBase ' + # 'Authentication Service') # could use the server time to adjust for clock skew, probably not worth the trouble # check token is valid r = requests.get(self._user_url, headers={ 'Accept': 'application/json', 'authorization': self._token }) self._check_error(r) # need to test this with a mock. YAGNI for now. # if r.json() != {}: # raise ValueError(f'Invalid auth url, expected empty map, got {r.text}') def _check_error(self, r): if r.status_code != 200: try: j = r.json() except Exception: err = ( 'Non-JSON response from KBase auth server, status code: ' + str(r.status_code)) logging.getLogger(__name__).info('%s, response:\n%s', err, r.text) raise IOError(err) # assume that if we get json then at least this is the auth server and we can # rely on the error structure. if j['error'].get('appcode') == 10020: # Invalid token raise InvalidTokenError( 'KBase auth server reported token is invalid.') if j['error'].get('appcode') == 30010: # Invalid username raise InvalidUserError( 'The KBase auth server is being very assertive about ' + 'one of the usernames being illegal: ' + j['error']['message']) # don't really see any other error codes we need to worry about - maybe disabled? # worry about it later. raise IOError('Error from KBase auth server: ' + j['error']['message']) def invalid_users(self, usernames: Sequence[UserID]) -> List[UserID]: ''' Check whether users exist in the authentication service. :param users: the users to check. :returns: A list of users that have legal usernames but do not exist in the authentication service. :raises InvalidTokenError: if the token has expired :raises InvalidUserError: if any of the user names are illegal user names. ''' if usernames is None: raise ValueError('usernames cannot be None') if not usernames: return [] _no_falsy_in_iterable(usernames, 'usernames') bad_usernames = [ u for u in usernames if not self._valid_cache.get(u.id, default=False) ] if len(bad_usernames) == 0: return [] r = requests.get(self._user_url + ','.join([u.id for u in bad_usernames]), headers={'Authorization': self._token}) self._check_error(r) good_users = r.json() for u in bad_usernames: if u.id in good_users: self._valid_cache.set(u.id, True) return [u for u in bad_usernames if u.id not in good_users] def is_admin(self, token: str) -> Tuple[AdminPermission, str]: ''' Check whether a user is a service administrator. :param token: The user's token. :returns: A tuple consisting of an enum indicating the user's administration permissions, if any, and the username. ''' # TODO CODE should regex the token to check for \n etc., but the SDK has already checked it _not_falsy(token, 'token') admin_cache = self._admin_cache.get(token, default=False) if admin_cache: return admin_cache r = requests.get(self._me_url, headers={'Authorization': token}) self._check_error(r) j = r.json() v = (self._get_role(j['customroles']), j['user']) self._admin_cache.set(token, v) return v def _get_role(self, roles): r = set(roles) if r & self._full_roles: return AdminPermission.FULL if r & self._read_roles: return AdminPermission.READ return AdminPermission.NONE