def __init__( self, filepath: PurePosixPath, version: Optional[Version], exists_function: Callable[[str], bool] = None, glob_function: Callable[[str], List[str]] = None, ): """Creates a new instance of ``AbstractVersionedDataSet``. Args: filepath: Filepath in POSIX format to a file. version: If specified, should be an instance of ``kedro.io.core.Version``. If its ``load`` attribute is None, the latest version will be loaded. If its ``save`` attribute is None, save version will be autogenerated. exists_function: Function that is used for determining whether a path exists in a filesystem. glob_function: Function that is used for finding all paths in a filesystem, which match a given pattern. """ self._filepath = filepath self._version = version self._exists_function = exists_function or _local_exists self._glob_function = glob_function or iglob # 1 entry for load version, 1 for save version self._version_cache = Cache(maxsize=2)
def __init__(self, maxsize, ttl=MAXTTL, timer=time.time, getsizeof=None): Cache.__init__(self, maxsize, getsizeof) self.__root = root = _Link() root.prev = root.next = root self.__links = collections.OrderedDict() self.__timer = _Timer(timer) self.__ttl = ttl # CHANGE: .set() is the same as setitem self.set = self.__setitem__
def __init__(self, maxsize, ttl, expiry_callback=None, timer=default_timer, getsizeof=None): Cache.__init__(self, maxsize, getsizeof) self.__root = root = _Link() root.prev = root.next = root self.__links = collections.OrderedDict() self.__timer = _Timer(timer) self.__ttl = ttl self.__expiry_callback = expiry_callback
def evaluate_agent(agent, game_args, cache_size=144): # Try and prevent agents from using more than 1 thread torch.set_num_threads(1) game = Game(**game_args, view_size=agent.view) cache = Cache(cache_size) idle_detector = IdleDetector(False) while game.game_over == False: player_view = game.get_player_view() view_hashable = player_view.tobytes() # Check cache if view_hashable in cache: keys = cache[view_hashable] else: keys = agent.evaluate(player_view) if cache_size > 0: cache[view_hashable] = keys game.update(keys) if idle_detector.update(game.player.tile) is True: game.game_over = True game.game_over_type = Game.PLAYER_TIMEOUT return (game.player.fitness, game.game_over_type)
def __init__(self): #Web3 instance connecting to node self.w3 = Web3(Web3.HTTPProvider(NODE_URL)) #Cache with 3 categories fast, medium ,slow self.priceCache = priceCache = Cache(maxsize=3) #Caching Parameters self.cacheInterval = CACHE_INTERVAL self.blocksToCache = 150 # Construct Cache with with LRU Cache and 150 items self.block_hash_cache_middleware = construct_simple_cache_middleware( cache_class=partial(LRUCache, self.blocksToCache), rpc_whitelist='eth_getBlockByHash' ) #Adding caching to middle ware self.w3.middleware_stack.add(self.block_hash_cache_middleware) #Faucet Account initalization with priavte Key self.faucetAccount = Account.privateKeyToAccount(ETH_PRIVATE_KEY) #Transaction parameters of faucet self.txGas = 314150 self.txGasPrice = 20000000000 self.chainId = 3 self.txData = '53656e742066726f6d20676173466175636574202a2e2a'
class CredentialProvider: credentials = None cache = Cache(maxsize=10) CREDENTIAL_PROVIDERS = [ FromCodeCredentialProvider, FromEnvironmentVariablesCredentialProvider, FromSecretsCredentialProvider, FromConfigFileCredentialProvider ] def __init__(self, account='default', credentials=None): self.account = account for cp in self.CREDENTIAL_PROVIDERS: try: self.credentials = cp(account=account, credentials=credentials)() break except MissingCredentials: continue if self.credentials: self.credentials = self.Config(**self.credentials) else: raise MissingCredentials( f'Credentials are missing: {", ".join(required_credentials)}') class Config: def __init__(self, **kwargs): self.refresh_token = kwargs.get('refresh_token') self.lwa_app_id = kwargs.get('lwa_app_id') self.lwa_client_secret = kwargs.get('lwa_client_secret') self.aws_access_key = kwargs.get('aws_access_key') self.aws_secret_key = kwargs.get('aws_secret_key') self.role_arn = kwargs.get('role_arn')
def fast_match(self, cluster_ids: list, tokens): match_cluster = None max_sim = -1 max_param_count = -1 max_cluster = None for cluster_id in cluster_ids: # Try to retrieve cluster from cache with bypassing eviction # algorithm as we are only testing candidates for a match. cluster = Cache.get(self.id_to_cluster, cluster_id) if cluster is None: continue cur_sim, param_count = self.get_seq_distance( cluster.log_template_tokens, tokens) if cur_sim > max_sim or (cur_sim == max_sim and param_count > max_param_count): max_sim = cur_sim max_param_count = param_count max_cluster = cluster if max_sim >= self.sim_th: match_cluster = max_cluster return match_cluster
def __init__(self, maxsize, client=None, host=None, port=None, password=None, db=None, ttl=15 * 60, clear_on_exit=False, key_prefix='RedisCache'): Cache.__init__(self, maxsize, None) self.client_ = client self.host = host or redis_config.HOST self.port = port or redis_config.PORT self.password = password or redis_config.PASSWORD self.db = db or redis_config.DB self.ttl = ttl self.key_prefix = key_prefix if clear_on_exit: atexit.register(self.clear())
def main(img_file_path='images/1.jpeg', final_img_file_path='images/result_images/mosaic.jpeg', tile_directory='images/image_set', tile_size=2000, tile_to_user_img_pixel_ratio=100, user_img_len=1000, user_img_breadth=1000, cache_size=10): """ :param img_file_path: The image that needs to be made as a mosaic :param final_img_file_path: Final mosaic image will be stored at this path :param tile_directory: The directory from which images are picked up to make part of mosaic image :param tile_size: Size in pixels of each tile in the final mosaic image :param tile_to_user_img_pixel_ratio: This will decide the number of sub images inside the mosaic :param user_img_len: The image that needs to be made as a mosaic will be resized to this :param user_img_breadth: The image that needs to be made as a mosaic will be resized to this :param cache_size: Number of images stored in memory. Depending on the RAM of the machine need to control this The function creates a mosaic image at final_img_file_path The final image will contain the following number of sub images: (user_img_len/tile_to_user_img_pixel_ratio) * (user_img_breadth/tile_to_user_img_pixel_ratio) tile_size decides the number of pixels each sub image will contain, so this will decide the clarity of each image. Higher the number, higher the quality with the downside being increased size """ cache = Cache(cache_size) tiles_data = [] tile_file_names = [] for root, subFolders, files in os.walk(tile_directory): for tile_name in files: if tile_name.endswith(".jpeg") or tile_name.endswith(".JPG"): tile_file_names.append(os.path.join(root, tile_name)) user_img = Image.open(img_file_path) user_img = user_img.resize((user_img_len, user_img_breadth)) user_img_data = np.array(user_img) column_list = [] img_index_dict = {} for r in range(user_img_breadth): row_list = [] for c in range(user_img_len): index = ((r % tile_to_user_img_pixel_ratio), (c % tile_to_user_img_pixel_ratio)) img_index = (int(r / tile_to_user_img_pixel_ratio), int(c / tile_to_user_img_pixel_ratio)) if img_index not in img_index_dict: img_index_dict[img_index] = random.randrange(len(tile_file_names)) tile_split_img = get_tile_split_img_dict(cache, img_index_dict[img_index], tile_file_names, tile_size, tile_to_user_img_pixel_ratio)[index] tint_img = Image.new('RGB', tile_split_img.size, tuple(user_img_data[r, c])) new_data = np.array(Image.blend(tile_split_img, tint_img, 0.8)) print(r, c, user_img_breadth, user_img_len, tile_size, user_img_data[r, c]) row_list.append(new_data) column_list.append(np.concatenate(row_list, axis=1)) del row_list new_img_data = np.concatenate(column_list, axis=0) del column_list new_img = Image.fromarray(new_img_data).convert('RGB') new_img.save(final_img_file_path)
def __init__(self, *args, **kwargs): super(ALGO, self).__init__(*args, **kwargs) self.base_url = 'http://al.go.leg.br' self.institution, _ = Institution.objects.get_or_create( siglum='ALGO', name=u'Assembléia Legislativa do Estado de Goiás' ) self.legislature, _ = Legislature.objects.get_or_create( institution=self.institution, date_start=datetime(2015, 1, 1), date_end=datetime(2018, 12, 31) ) self.list_of_legislators_cache = Cache(1024) self.expenses_nature_cached = {}
def __getitem__(self, key): self._key_check(key) try: value = Cache.__getitem__(self, key) except KeyError: try: value = self._second_gen[key] except KeyError: try: value = self._get_item(key) except KeyError as ke3: raise ke3 else: self.__setitem__(key, value) else: self.__setitem__(key, value) if key in self._second_gen: # the second gen clean up could be triggered during set in first gen del self._second_gen[key] else: self._update_order(key) return value
def __init__(self, delay=1.5, timeout=15, init_cache=False, executable_path="geckodriver", log_path="geckodriver.log", cache=Cache(24), logger=create_logger("milanuncios"), debug=False, firefox_binary="/usr/bin/firefox", display=False): self.main_url = "https://www.milanuncios.com" self.timeout = timeout self.delay = delay self.debug = debug self.init_cache = init_cache self.logger = logger if self.debug: self.logger.setLevel(logging.DEBUG) self.cache = cache self._executable_path = executable_path self._log_path = log_path self._firefox_binary = firefox_binary # Attributes defined on __enter__ self.session = None self.firefox_user_processes = None self.browser = None self.browser_pid = None self.display = display # Account methods self.logged = False self._logged_soup = None
async def exists_cached( self, cache: Cache, memory: bool = True, **filters: Any, ) -> bool: id = filters.pop(self.repository.id_name, None) if id is None: raise RequiredKeyAttributeError( type(self.repository).__name__, self.repository.id_name, self.repository.key_attrs, ) cache_key = self.cache_key(id, self.cache_key_suffix(**filters)) entity_exists = cache.get(cache_key) if entity_exists is None: filters[self.repository.id_name] = id if memory: entity_exists = await self.exists_circuit( self.repository.query(**filters) ) else: entity_exists = await self.exists_fallback_circuit( self.repository.query(memory=False, **filters) ) if not entity_exists: cache[cache_key] = CACHE_ALREADY_NOT_FOUND return False else: cache[cache_key] = True elif entity_exists is CACHE_ALREADY_NOT_FOUND: return False return True
class PartitionedDataSet(AbstractDataSet): # pylint: disable=too-many-instance-attributes,protected-access """``PartitionedDataSet`` loads and saves partitioned file-like data using the underlying dataset definition. For filesystem level operations it uses `fsspec`: https://github.com/intake/filesystem_spec. Example: :: >>> import pandas as pd >>> from kedro.io import PartitionedDataSet >>> >>> # these credentials will be passed to both 'fsspec.filesystem()' call >>> # and the dataset initializer >>> credentials = {"key1": "secret1", "key2": "secret2"} >>> >>> data_set = PartitionedDataSet( >>> path="s3://bucket-name/path/to/folder", >>> dataset="CSVDataSet", >>> credentials=credentials >>> ) >>> loaded = data_set.load() >>> # assert isinstance(loaded, dict) >>> >>> combine_all = pd.DataFrame() >>> >>> for partition_id, partition_load_func in loaded.items(): >>> partition_data = partition_load_func() >>> combine_all = pd.concat( >>> [combine_all, partition_data], ignore_index=True, sort=True >>> ) >>> >>> new_data = pd.DataFrame({"new": [1, 2]}) >>> # creates "s3://bucket-name/path/to/folder/new/partition.csv" >>> data_set.save({"new/partition.csv": new_data}) >>> """ def __init__( # pylint: disable=too-many-arguments self, path: str, dataset: Union[str, Type[AbstractDataSet], Dict[str, Any]], filepath_arg: str = "filepath", filename_suffix: str = "", credentials: Dict[str, Any] = None, load_args: Dict[str, Any] = None, fs_args: Dict[str, Any] = None, ): """Creates a new instance of ``PartitionedDataSet``. Args: path: Path to the folder containing partitioned data. If path starts with the protocol (e.g., ``s3://``) then the corresponding ``fsspec`` concrete filesystem implementation will be used. If protocol is not specified, ``fsspec.implementations.local.LocalFileSystem`` will be used. **Note:** Some concrete implementations are bundled with ``fsspec``, while others (like ``s3`` or ``gcs``) must be installed separately prior to usage of the ``PartitionedDataSet``. dataset: Underlying dataset definition. This is used to instantiate the dataset for each file located inside the ``path``. Accepted formats are: a) object of a class that inherits from ``AbstractDataSet`` b) a string representing a fully qualified class name to such class c) a dictionary with ``type`` key pointing to a string from b), other keys are passed to the Dataset initializer. Credentials for the dataset can be explicitly specified in this configuration. filepath_arg: Underlying dataset initializer argument that will contain a path to each corresponding partition file. If unspecified, defaults to "filepath". filename_suffix: If specified, only partitions that end with this string will be processed. credentials: Protocol-specific options that will be passed to ``fsspec.filesystem`` https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.filesystem and the dataset initializer. If the dataset config contains explicit credentials spec, then such spec will take precedence. All possible credentials management scenarios are documented here: https://kedro.readthedocs.io/en/stable/04_user_guide/08_advanced_io.html#partitioned-dataset-credentials load_args: Keyword arguments to be passed into ``find()`` method of the filesystem implementation. fs_args: Extra arguments to pass into underlying filesystem class constructor (e.g. `{"project": "my-project"}` for ``GCSFileSystem``) Raises: DataSetError: If versioning is enabled for the underlying dataset. """ # pylint: disable=import-outside-toplevel from fsspec.utils import infer_storage_options # for performance reasons super().__init__() self._path = path self._filename_suffix = filename_suffix self._protocol = infer_storage_options(self._path)["protocol"] self._partition_cache = Cache(maxsize=1) dataset = dataset if isinstance(dataset, dict) else {"type": dataset} self._dataset_type, self._dataset_config = parse_dataset_definition( dataset) if VERSION_KEY in self._dataset_config: raise DataSetError( "`{}` does not support versioning of the underlying dataset. " "Please remove `{}` flag from the dataset definition.".format( self.__class__.__name__, VERSIONED_FLAG_KEY)) if credentials: if CREDENTIALS_KEY in self._dataset_config: self._logger.warning( KEY_PROPAGATION_WARNING, { "keys": CREDENTIALS_KEY, "target": "underlying dataset" }, ) else: self._dataset_config[CREDENTIALS_KEY] = deepcopy(credentials) self._credentials = deepcopy(credentials) or {} self._fs_args = deepcopy(fs_args) or {} if self._fs_args: if "fs_args" in self._dataset_config: self._logger.warning( KEY_PROPAGATION_WARNING, { "keys": "filesystem arguments", "target": "underlying dataset" }, ) else: self._dataset_config["fs_args"] = deepcopy(self._fs_args) self._filepath_arg = filepath_arg if self._filepath_arg in self._dataset_config: warn( "`{}` key must not be specified in the dataset definition as it " "will be overwritten by partition path".format( self._filepath_arg)) self._load_args = deepcopy(load_args) or {} self._sep = self._filesystem.sep # since some filesystem implementations may implement a global cache self._invalidate_caches() @property def _filesystem(self): # for performance reasons import fsspec # pylint: disable=import-outside-toplevel protocol = "s3" if self._protocol in S3_PROTOCOLS else self._protocol return fsspec.filesystem(protocol, **self._credentials, **self._fs_args) @property def _normalized_path(self) -> str: if self._protocol in S3_PROTOCOLS: return urlparse(self._path)._replace(scheme="s3").geturl() return self._path @cachedmethod(cache=operator.attrgetter("_partition_cache")) def _list_partitions(self) -> List[str]: return [ path for path in self._filesystem.find(self._normalized_path, ** self._load_args) if path.endswith(self._filename_suffix) ] def _join_protocol(self, path: str) -> str: if self._path.startswith( self._protocol) and not path.startswith(self._protocol): return f"{self._protocol}://{path}" return path def _partition_to_path(self, path: str): dir_path = self._path.rstrip(self._sep) path = path.lstrip(self._sep) full_path = self._sep.join([dir_path, path]) + self._filename_suffix return full_path def _path_to_partition(self, path: str) -> str: dir_path = self._filesystem._strip_protocol(self._normalized_path) path = path.split(dir_path, 1).pop().lstrip(self._sep) if self._filename_suffix and path.endswith(self._filename_suffix): path = path[:-len(self._filename_suffix)] return path def _load(self) -> Dict[str, Callable[[], Any]]: partitions = {} for partition in self._list_partitions(): kwargs = deepcopy(self._dataset_config) # join the protocol back since PySpark may rely on it kwargs[self._filepath_arg] = self._join_protocol(partition) dataset = self._dataset_type(**kwargs) # type: ignore partition_id = self._path_to_partition(partition) partitions[partition_id] = dataset.load if not partitions: raise DataSetError(f"No partitions found in `{self._path}`") return partitions def _save(self, data: Dict[str, Any]) -> None: for partition_id, partition_data in sorted(data.items()): kwargs = deepcopy(self._dataset_config) partition = self._partition_to_path(partition_id) # join the protocol back since tools like PySpark may rely on it kwargs[self._filepath_arg] = self._join_protocol(partition) dataset = self._dataset_type(**kwargs) # type: ignore dataset.save(partition_data) self._invalidate_caches() def _describe(self) -> Dict[str, Any]: clean_dataset_config = ({ k: v for k, v in self._dataset_config.items() if k != CREDENTIALS_KEY } if isinstance(self._dataset_config, dict) else self._dataset_config) return dict( path=self._path, dataset_type=self._dataset_type.__name__, dataset_config=clean_dataset_config, ) def _invalidate_caches(self): self._partition_cache.clear() self._filesystem.invalidate_cache(self._normalized_path) def _exists(self) -> bool: return bool(self._list_partitions()) def _release(self) -> None: super()._release() self._invalidate_caches()
def __init__(self, X, Y, R, target_sparsity=0.01, gamma0_v=1.0, lambda_params=(1e-6, 1e-6), nu_params=(1e-6, 1e-6), xi=0.999999, xi_prior_shape=(1, 1), check_finite=True, min_eigenval=0, jitter=1e-6): """The Probit model used for modeling Sparse Regression using a Gaussian field. :cite:`Engelhardt2014`. .. math:: y|X,\\beta,\\beta_0, \\nu \propto \mathcal{N}(\\beta_0 1_n + X \\beta, \\nu^{-1} I_n) Parameters ---------- X : ndarray The predictor matrix of real numbers, n x p in size, where n is the no. of samples (genotypes) and p is the no. of features (SNPs). Y : ndarray The response vector of real numbers, n x 1 in size, with each value representing the phenotype value for the sample. R : ndarray The covariance matrix for the SNPs, p x p in size. The matrix may not be positive-definite, but is converted to one internally. target_sparsity : float The proportion of included predictors. For example, a value of 0.01 indicates that around 1% of total SNPs are expected be included in our model. This value affects the probit threshold gamma_0 of the model. gamma0_v : float Variance of the probit threshold gamma_0 lambda_params : tuple Shape parameter and Inverse-scale parameter of the gamma prior placed on the model parameter lambda, where lambda is the inverse squared global scale parameter for the regression weights. nu_params : tuple Shape parameter and Inverse-scale parameter of the gamma prior placed on the model parameter nu, where nu is the residual precision. xi : float The shrinkage constant in the interval [0,1] to regularize the covariance matrix towards the identity matrix. This ensures that the covariance matrix is positive definite. A larger xi value biases our estimate towards the supplied R matrix, a lower value biases it towards the identity matrix. If None, then xi is sampled from a beta distribution with shape parameters specified by the tuple xi_prior_shape. xi_prior_shape : tuple Shape parameters of the beta prior placed on the model parameter xi, specified as a 2-tuple of real values. This argument is ignored and xi is not sampled, if it is specified explicitly using the xi parameter. check_finite : bool Whether to check that the input matrices contain only finite numbers. Disabling may give a performance gain, but may result in problems (crashes, non-termination) if the inputs do contain infinities or NaNs. This parameter is passed on to several linear algebra functions in scipy internally. min_eigenval : float Minimum Eigenvalue we can accept in the covariance matrix. Any eigenvalues encountered below this threshold are set to zero, and the resulting covariance matrix normalized to give ones on the diagonal. jitter : float A small value to add to the diagonals of the covariance matrix to avoid conditioning issues. """ self.X = X self.Y = Y self.R = Mvn(cov=R, min_eigenval=min_eigenval, jitter=jitter) self.N, self.P = self.X.shape self.nu_a, self.nu_b = nu_params self.check_finite = check_finite if xi is None: self.sample_xi = True self._xi_distribution = beta(*xi_prior_shape) self.xi = self._xi_distribution.mean() else: self.sample_xi = False self.xi = xi # Initialize scalar model distributions and the parameter values to their prior means. self._gamma0_distribution = norm(loc=norm.ppf(1.0 - target_sparsity), scale=gamma0_v) self.gamma0 = self._gamma0_distribution.mean() self._lambda_distribution = gamma(lambda_params[0], scale=1. / lambda_params[1]) self.lamb = self._lambda_distribution.mean() self._nu_distribution = gamma(self.nu_a, scale=1. / self.nu_b) self.nu = self._nu_distribution.mean() # Cache for holding probit prior distributions (multivariate normal distributions with 0 mean and known # covariance, possibly adjusted by a shrinkage factor xi expressing our confidence in the covariance). # A single iteration of MCMC calls on many computations on this distribution, so caching improves performance # significantly. A small cache size works just as well as a large one, # because the most recently used distribution tends to be used repeatedly in a single MCMC step. self._probit_cache = Cache(maxsize=4) # A cache used to hold the marginal PPI (Posterior Probability of Inclusion) distributions # p(y | X, gamma, gamma_0, nu, lambda) ~ Normal(..) # A small cache size works just as well as a large one, because the most recently used distribution tends to # be used repeatedly in a single MCMC step. self._ppi_cache = Cache(maxsize=8) # Initialize the sparsity function by generating a random variate from the model's probit distribution self.gamma = self.probit_distribution(self.xi).rvs()
def __init__(self, config, hass_api=None): # Defaults to UTC now() - every interval self.plant_update = {} self.hass_api = hass_api self.logger = logger self.config = config self.cache = Cache(maxsize=10) self.start_total_energy = {} self.pasttime = datetime.combine(date.today(), datetime.min.time()).timestamp() self.dsmr_access = threading.Condition(threading.Lock()) if self.config.get('default', 'debug', fallback=False): logger.setLevel(logging.DEBUG) if not self._init_data_fields(): sys.exit(1) # init energy cache # total_energy_cache.json self.persistant_cache_file = self.config.get( 'default', 'persistant_cache_file', fallback='./persistant_cache.json') self._load_persistant_cache() # read data_fields try: with open(self.data_config_file) as json_file_config: self.config.data_field_config = json.load(json_file_config) except Exception as e: hybridlogger.ha_log( self.logger, self.hass_api, "ERROR", f"Error reading configuration file (data_fields.json). Exiting!. Error: {e.args}" ) raise e sys.exit(1) # read attributes if not self._read_attributes(): sys.exit(1) self.every = self._get_interval() self.interval_aggregated = self._get_interval_aggregated() # Make sure we check for a recent update first self.last_update_time = datetime.now( timezone.utc) - timedelta(seconds=self.interval_aggregated) # Initialize plugin paths sys.path.append(self.__expand_path('plugin_output')) sys.path.append(self.__expand_path('plugin_client')) sys.path.append(self.__expand_path('plugin_localproxy')) self.city = self.config.get('default', 'city', fallback='Amsterdam') try: self.dl = daylight(self.city) except Exception as e: hybridlogger.ha_log( self.logger, self.hass_api, "ERROR", f"City '{self.city}' not recognized. Error: {e}") sys.exit(1) # Initialize client self._init_client() # Initialize output plugins self._init_output_plugins() self.omnik_api_level = 0 # Init dsmr self._init_dsmr()
#stock data provider from cachetools import Cache, keys, cached import pandas as pd __local_cache = Cache(maxsize=42) def __hash_key_for_2(data1, *args): return keys.hashkey(id(data1), *args) @cached(cache=__local_cache, key=__hash_key_for_2) def create_dataframe(all_data, name='close'): trading_data = {} for data in all_data: trading_data[data.stock_id] = data.data_frame[name] panel = pd.DataFrame(data=trading_data) return panel.fillna(method='pad') def filter_dataframe(data, start_date=None, end_date=None): if start_date: start_date = pd.to_datetime(start_date) if end_date: end_date = pd.to_datetime(end_date) if not start_date and not end_date: return data
# coding: utf-8 import re from pkgutil import get_data import sqlparse from cachetools import cached, Cache from synchromoodle.dbutils import Database __statements_cache = Cache(maxsize=100) def init(db: Database): run_script('data/ddl.sql', db) def reset(db: Database): run_script('data/ddl.sql', db) @cached(__statements_cache) def _get_statements(path: str): script_data = str(get_data('test', path), 'utf8') cleaned_script_data = re.sub(r'/\*.+?\*/;\n', "", script_data, flags=re.MULTILINE) statements = sqlparse.split(cleaned_script_data) return statements def run_script(script: str, db: Database, connect=True): if connect:
def close_issue(self, issue): headers = { 'Authorization': f'token {self.token}', 'Accept': 'application/vnd.github.v3+json' } params = {'state': 'closed'} response = requests.patch( f'https://api.github.com/repos/{self.user}/{self.repo}/issues/{issue}', headers=headers, json=params) response.raise_for_status() data = response.json() logging.info('response from github: %r', data) @cached(Cache(maxsize=1)) def github_app_key(): with open(os.getenv('GITHUB_APP_KEY'), 'rb') as keyfile: return load_pem_private_key(keyfile.read(), password=None) @cached(TTLCache(maxsize=1, ttl=600)) def github_jwt() -> bytes: now = datetime.datetime.now(datetime.timezone.utc) delta_before = datetime.timedelta(0, 0, 0, 0, -1, 0, 0) # 1 minute delta_after = datetime.timedelta(0, 0, 0, 0, 10, 0, 0) # 10 minutes key = github_app_key() payload = { 'exp': int((now + delta_after).timestamp()), 'iat': int((now + delta_before).timestamp()), 'iss': os.getenv('GITHUB_APP_ID')
class AbstractVersionedDataSet(AbstractDataSet, abc.ABC): """ ``AbstractVersionedDataSet`` is the base class for all versioned data set implementations. All data sets that implement versioning should extend this abstract class and implement the methods marked as abstract. Example: :: >>> from pathlib import Path, PurePosixPath >>> import pandas as pd >>> from kedro.io import AbstractVersionedDataSet >>> >>> >>> class MyOwnDataSet(AbstractVersionedDataSet): >>> def __init__(self, filepath, version, param1, param2=True): >>> super().__init__(PurePosixPath(filepath), version) >>> self._param1 = param1 >>> self._param2 = param2 >>> >>> def _load(self) -> pd.DataFrame: >>> load_path = self._get_load_path() >>> return pd.read_csv(load_path) >>> >>> def _save(self, df: pd.DataFrame) -> None: >>> save_path = self._get_save_path() >>> df.to_csv(str(save_path)) >>> >>> def _exists(self) -> bool: >>> path = self._get_load_path() >>> return Path(path.as_posix()).exists() >>> >>> def _describe(self): >>> return dict(version=self._version, param1=self._param1, param2=self._param2) Example catalog.yml specification: :: my_dataset: type: <path-to-my-own-dataset>.MyOwnDataSet filepath: data/01_raw/my_data.csv versioned: true param1: <param1-value> # param1 is a required argument # param2 will be True by default """ def __init__( self, filepath: PurePosixPath, version: Optional[Version], exists_function: Callable[[str], bool] = None, glob_function: Callable[[str], List[str]] = None, ): """Creates a new instance of ``AbstractVersionedDataSet``. Args: filepath: Filepath in POSIX format to a file. version: If specified, should be an instance of ``kedro.io.core.Version``. If its ``load`` attribute is None, the latest version will be loaded. If its ``save`` attribute is None, save version will be autogenerated. exists_function: Function that is used for determining whether a path exists in a filesystem. glob_function: Function that is used for finding all paths in a filesystem, which match a given pattern. """ self._filepath = filepath self._version = version self._exists_function = exists_function or _local_exists self._glob_function = glob_function or iglob # 1 entry for load version, 1 for save version self._version_cache = Cache(maxsize=2) # 'key' is set to prevent cache key overlapping for load and save: # https://cachetools.readthedocs.io/en/stable/#cachetools.cachedmethod @cachedmethod(cache=attrgetter("_version_cache"), key=partial(hashkey, "load")) def _fetch_latest_load_version(self) -> str: # When load version is unpinned, fetch the most recent existing # version from the given path. pattern = str(self._get_versioned_path("*")) version_paths = sorted(self._glob_function(pattern), reverse=True) most_recent = next( (path for path in version_paths if self._exists_function(path)), None ) if not most_recent: raise VersionNotFoundError(f"Did not find any versions for {self}") return PurePath(most_recent).parent.name # 'key' is set to prevent cache key overlapping for load and save: # https://cachetools.readthedocs.io/en/stable/#cachetools.cachedmethod @cachedmethod(cache=attrgetter("_version_cache"), key=partial(hashkey, "save")) def _fetch_latest_save_version(self) -> str: # pylint: disable=no-self-use """Generate and cache the current save version""" return generate_timestamp() def resolve_load_version(self) -> Optional[str]: """Compute the version the dataset should be loaded with.""" if not self._version: return None if self._version.load: return self._version.load return self._fetch_latest_load_version() def _get_load_path(self) -> PurePosixPath: if not self._version: # When versioning is disabled, load from original filepath return self._filepath load_version = self.resolve_load_version() return self._get_versioned_path(load_version) # type: ignore def resolve_save_version(self) -> Optional[str]: """Compute the version the dataset should be saved with.""" if not self._version: return None if self._version.save: return self._version.save return self._fetch_latest_save_version() def _get_save_path(self) -> PurePosixPath: if not self._version: # When versioning is disabled, return original filepath return self._filepath save_version = self.resolve_save_version() versioned_path = self._get_versioned_path(save_version) # type: ignore if self._exists_function(str(versioned_path)): raise DataSetError( f"Save path `{versioned_path}` for {str(self)} must not exist if " f"versioning is enabled." ) return versioned_path def _get_versioned_path(self, version: str) -> PurePosixPath: return self._filepath / version / self._filepath.name def load(self) -> Any: self.resolve_load_version() # Make sure last load version is set return super().load() def save(self, data: Any) -> None: self._version_cache.clear() save_version = self.resolve_save_version() # Make sure last save version is set try: super().save(data) except (FileNotFoundError, NotADirectoryError) as err: # FileNotFoundError raised in Win, NotADirectoryError raised in Unix _default_version = "YYYY-MM-DDThh.mm.ss.sssZ" raise DataSetError( f"Cannot save versioned dataset `{self._filepath.name}` to " f"`{self._filepath.parent.as_posix()}` because a file with the same " f"name already exists in the directory. This is likely because " f"versioning was enabled on a dataset already saved previously. Either " f"remove `{self._filepath.name}` from the directory or manually " f"convert it into a versioned dataset by placing it in a versioned " f"directory (e.g. with default versioning format " f"`{self._filepath.as_posix()}/{_default_version}/{self._filepath.name}" f"`)." ) from err load_version = self.resolve_load_version() if load_version != save_version: warnings.warn( _CONSISTENCY_WARNING.format(save_version, load_version, str(self)) ) def exists(self) -> bool: """Checks whether a data set's output already exists by calling the provided _exists() method. Returns: Flag indicating whether the output already exists. Raises: DataSetError: when underlying exists method raises error. """ self._logger.debug("Checking whether target of %s exists", str(self)) try: return self._exists() except VersionNotFoundError: return False except Exception as exc: # SKIP_IF_NO_SPARK message = ( f"Failed during exists check for data set {str(self)}.\n{str(exc)}" ) raise DataSetError(message) from exc def _release(self) -> None: super()._release() self._version_cache.clear()
def cache(self, maxsize, missing=None, getsizeof=None): return Cache(maxsize, missing=missing, getsizeof=getsizeof)
class CredentialProvider: credentials = None cache = Cache(maxsize=10) def __init__(self, account='default', credentials=None): self.account = account self.read_credentials = [ self.from_env, self.from_secrets, self.read_config ] if credentials: self.credentials = self.Config(**credentials) missing = self.credentials.check_config() if len(missing): raise MissingCredentials( f'The following configuration parameters are missing: {missing}' ) else: self.load_credentials() def load_credentials(self): for read_method in self.read_credentials: if read_method(): return True def from_secrets(self): if not os.environ.get('SP_API_AWS_SECRET_ID', None): return try: client = boto3.client('secretsmanager') response = client.get_secret_value( SecretId=os.environ.get('SP_API_AWS_SECRET_ID')) secret = json.loads(response.get('SecretString')) account_data = dict( refresh_token=secret.get('SP_API_REFRESH_TOKEN'), lwa_app_id=secret.get('LWA_APP_ID'), lwa_client_secret=secret.get('LWA_CLIENT_SECRET'), aws_secret_key=secret.get('SP_API_SECRET_KEY'), aws_access_key=secret.get('SP_API_ACCESS_KEY'), role_arn=secret.get('SP_API_ROLE_ARN')) self.cache['account_data'] = json.dumps(account_data) except ClientError as client_error: return else: self.credentials = self.Config(**account_data) return len(self.credentials.check_config()) == 0 def from_env(self): try: account_data = json.loads(self.cache['account_data']) except KeyError: account_data = dict( refresh_token=self._get_env('SP_API_REFRESH_TOKEN'), lwa_app_id=self._get_env('LWA_APP_ID'), lwa_client_secret=self._get_env('LWA_CLIENT_SECRET'), aws_secret_key=self._get_env('SP_API_SECRET_KEY'), aws_access_key=self._get_env('SP_API_ACCESS_KEY'), role_arn=self._get_env('SP_API_ROLE_ARN')) self.credentials = self.Config(**account_data) return len(self.credentials.check_config()) == 0 def _get_env(self, key): return os.environ.get(f'{key}_{self.account}', os.environ.get(key)) def read_config(self): try: config = confuse.Configuration('python-sp-api') config_filename = os.path.join(config.config_dir(), 'credentials.yml') config.set_file(config_filename) account_data = config[self.account].get() self.credentials = self.Config(**account_data) missing = self.credentials.check_config() if len(missing): raise MissingCredentials( f'The following configuration parameters are missing: {missing}' ) except confuse.exceptions.NotFoundError: raise MissingCredentials( f'The account {self.account} was not setup in your configuration file.' ) except confuse.exceptions.ConfigReadError: raise MissingCredentials( f'Neither environment variables nor a config file were found. ' f'Please set the correct variables, or use a config file (credentials.yml). ' f'See https://confuse.readthedocs.io/en/latest/usage.html#search-paths for search paths.' ) else: return True class Config: def __init__(self, refresh_token, lwa_app_id, lwa_client_secret, aws_access_key, aws_secret_key, role_arn, use_instance_profile=None): self.refresh_token = refresh_token self.lwa_app_id = lwa_app_id self.lwa_client_secret = lwa_client_secret self.aws_access_key = aws_access_key self.aws_secret_key = aws_secret_key self.role_arn = role_arn def check_config(self): errors = [] for k, v in self.__dict__.items(): if not v and k != 'refresh_token': errors.append(k) return errors
import os from flask import Flask, redirect, render_template, request import urllib import datetime import json import ibm_db import geocoder import geopy.distance from config import * import time from cachetools import cached, Cache import pandas as pd app = Flask(__name__) cache = Cache(maxsize=1000000) @cached(cache) def load_csv(fname, ftype): df = pd.read_csv('./static/{}.{}'.format(fname, ftype)) return df @cached(cache) def save_file(fname): f = open('./static/{}.txt'.format(fname), 'w') return f cache_sp = load_csv('sp', 'csv') cache_pc = load_csv('pc', 'csv')
def __init__(self, maxsize, getsizeof=None): Cache.__init__(self, maxsize, getsizeof=getsizeof) self.__order = collections.OrderedDict()
def __init__( # pylint: disable=too-many-arguments self, path: str, dataset: Union[str, Type[AbstractDataSet], Dict[str, Any]], filepath_arg: str = "filepath", filename_suffix: str = "", credentials: Dict[str, Any] = None, load_args: Dict[str, Any] = None, fs_args: Dict[str, Any] = None, ): """Creates a new instance of ``PartitionedDataSet``. Args: path: Path to the folder containing partitioned data. If path starts with the protocol (e.g., ``s3://``) then the corresponding ``fsspec`` concrete filesystem implementation will be used. If protocol is not specified, ``fsspec.implementations.local.LocalFileSystem`` will be used. **Note:** Some concrete implementations are bundled with ``fsspec``, while others (like ``s3`` or ``gcs``) must be installed separately prior to usage of the ``PartitionedDataSet``. dataset: Underlying dataset definition. This is used to instantiate the dataset for each file located inside the ``path``. Accepted formats are: a) object of a class that inherits from ``AbstractDataSet`` b) a string representing a fully qualified class name to such class c) a dictionary with ``type`` key pointing to a string from b), other keys are passed to the Dataset initializer. Credentials for the dataset can be explicitly specified in this configuration. filepath_arg: Underlying dataset initializer argument that will contain a path to each corresponding partition file. If unspecified, defaults to "filepath". filename_suffix: If specified, only partitions that end with this string will be processed. credentials: Protocol-specific options that will be passed to ``fsspec.filesystem`` https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.filesystem and the dataset initializer. If the dataset config contains explicit credentials spec, then such spec will take precedence. All possible credentials management scenarios are documented here: https://kedro.readthedocs.io/en/stable/04_user_guide/08_advanced_io.html#partitioned-dataset-credentials load_args: Keyword arguments to be passed into ``find()`` method of the filesystem implementation. fs_args: Extra arguments to pass into underlying filesystem class constructor (e.g. `{"project": "my-project"}` for ``GCSFileSystem``) Raises: DataSetError: If versioning is enabled for the underlying dataset. """ # pylint: disable=import-outside-toplevel from fsspec.utils import infer_storage_options # for performance reasons super().__init__() self._path = path self._filename_suffix = filename_suffix self._protocol = infer_storage_options(self._path)["protocol"] self._partition_cache = Cache(maxsize=1) dataset = dataset if isinstance(dataset, dict) else {"type": dataset} self._dataset_type, self._dataset_config = parse_dataset_definition( dataset) if VERSION_KEY in self._dataset_config: raise DataSetError( "`{}` does not support versioning of the underlying dataset. " "Please remove `{}` flag from the dataset definition.".format( self.__class__.__name__, VERSIONED_FLAG_KEY)) if credentials: if CREDENTIALS_KEY in self._dataset_config: self._logger.warning( KEY_PROPAGATION_WARNING, { "keys": CREDENTIALS_KEY, "target": "underlying dataset" }, ) else: self._dataset_config[CREDENTIALS_KEY] = deepcopy(credentials) self._credentials = deepcopy(credentials) or {} self._fs_args = deepcopy(fs_args) or {} if self._fs_args: if "fs_args" in self._dataset_config: self._logger.warning( KEY_PROPAGATION_WARNING, { "keys": "filesystem arguments", "target": "underlying dataset" }, ) else: self._dataset_config["fs_args"] = deepcopy(self._fs_args) self._filepath_arg = filepath_arg if self._filepath_arg in self._dataset_config: warn( "`{}` key must not be specified in the dataset definition as it " "will be overwritten by partition path".format( self._filepath_arg)) self._load_args = deepcopy(load_args) or {} self._sep = self._filesystem.sep # since some filesystem implementations may implement a global cache self._invalidate_caches()
from ..meta import ( MetaSymbol, MetaSymbolType, MetaOp, MetaVariable, MetaReificationError, meta_reify_iter, _metatize, metatize, ) from .. import meta from ..utils import HashableNDArray tf_metatize_cache = Cache(50) class MetaOpDefLibrary(object): """A singleton-like object that holds correspondences between TF Python API functions and the `OpDef`s they construct. It provides a map of `OpDef` names (lower-cased) to the Python API functions in `tensorflow.raw_ops`, as well as `inspect.Signature` objects for said functions so that default values and lists of arguments (keywords included) can be more easily used. """ lower_op_name_to_raw = { op_name.lower(): op_name for op_name in dir(tf.raw_ops)
import spacy import textacy from textacy.compat import PY2, bytes_type logger = logging.getLogger(__name__) DEFAULT_DATA_DIR = textacy.__resources_dir__ _CACHE = {} """dict: key-value store used to cache datasets and such in memory""" # TODO: maybe don't actually cache this -- it takes up a lot of RAM # but is indeed a pain to load @cached(Cache(1), key=partial(hashkey, 'spacy')) def load_spacy(name, **kwargs): """ Load a language-specific spaCy pipeline (collection of data, models, and resources) for tokenizing, tagging, parsing, etc. text; the most recent package loaded is cached. Args: name (str): standard 2-letter language abbreviation for a language; currently, spaCy supports English ('en') and German ('de') **kwargs: keyword arguments passed to :func:`spacy.load`; see the `spaCy docs <https://spacy.io/docs#english>`_ for details * via (str): non-default directory from which to load package data * vocab * tokenizer
from cachetools import cached, Cache from ssd_detector.trainer import create_session, detection_model, InputValData from ssd_detector.toolbox.coco_metrics_eval import calc_coco_metrics from ssd_detector.toolbox.summary import group_ssd_heads, write_histogram_2d from tfutils.helpers import draw_bboxes, load_module def parse_args(): parser = argparse.ArgumentParser( description='Perform evaluation of a detection model') parser.add_argument('path_to_config', help='Path to a config.py') return parser.parse_args() @cached(Cache(100)) def load_coco(path): from pycocotools.coco import COCO return COCO(path) # pylint: disable=too-many-locals,too-many-arguments def eval_dataset(annotations, config, eval_name, checkpoint_path, session_config, sample_images=None, dump_priors_info=True): log_dir = os.path.join(config.MODEL_DIR, 'eval_' + eval_name) run_config = tf.estimator.RunConfig(session_config=session_config)
def setdefault(self, *args, **kwargs): with self.__timer: return Cache.setdefault(self, *args, **kwargs)
def get(self, key): """ Returns the value of the item with the specified key without updating the cache eviction algorithm. """ return Cache.__getitem__(self, key)