def recreate_cache(): shutil.rmtree(diskcache_location, ignore_errors=True) os.mkdir(diskcache_location) diskcache_cache = Cache(diskcache_location, disk_pickle_protocol=pickle_protocol) diskcache_cache.close() return diskcache_cache
class CacheProxy: def __init__(self, script): self.config = get_configs() collectors_dir = self.config.get('base', 'collectors_dir') self.cache = Cache( os.path.join(collectors_dir, 'cache/script/', script)) def get(self, key): return self.cache.get(key) def set(self, key, value): self.cache.set(key, value) def delete(self, key): self.cache.delete(key) def close(self): self.cache.close() def counter_to_gauge(self, key, value): last_value = self.get(key) self.set(key, value) if last_value is None: return None gauge = value - last_value if gauge < 0 or gauge > last_value: return None return gauge
class BaseCacheAnalyzer(BaseAnalyzer): def __init__(self, cache_location=None, force=False): super().__init__() self.cache_location = cache_location self.cache = None self.force = force def initialize(self): from diskcache import Cache self.cache = Cache(self.cache_location or self.uid + "_cache") def filter(self, simulation): return self.force or not self.is_in_cache(simulation.id) def to_cache(self, key, data): self.cache.set(key, data) def from_cache(self, key): return self.cache.get(key) def is_in_cache(self, key): return key in self.cache def __del__(self): if self.cache: self.cache.close() @property def keys(self): return list(self.cache.iterkeys()) if self.cache else None
def worker(queue, eviction_policy, processes, threads): timings = co.defaultdict(list) cache = Cache('tmp', eviction_policy=eviction_policy) for index, (action, key, value) in enumerate(iter(queue.get, None)): start = time.time() try: if action == 'set': cache.set(key, value, expire=EXPIRE) elif action == 'get': result = cache.get(key) else: assert action == 'delete' cache.delete(key) except Timeout: miss = True else: miss = False stop = time.time() if (action == 'get' and processes == 1 and threads == 1 and EXPIRE is None): assert result == value if index > WARMUP: delta = stop - start timings[action].append(delta) if miss: timings[action + '-miss'].append(delta) queue.put(timings) cache.close()
def worker(queue, eviction_policy): timings = {'get': [], 'set': [], 'delete': []} cache = Cache('tmp', eviction_policy=eviction_policy) for index, (action, key, value) in enumerate(iter(queue.get, None)): start = time.time() if action == 'set': cache.set(key, value, expire=EXPIRE) elif action == 'get': result = cache.get(key) else: assert action == 'delete' cache.delete(key) stop = time.time() if action == 'get' and PROCESSES == 1 and THREADS == 1 and EXPIRE is None: assert result == value if index > WARMUP: timings[action].append(stop - start) queue.put(timings) cache.close()
class FileCache(BaseCache): """ BaseCache implementation using files to store the data. This implementation uses diskcache.Cache see http://www.grantjenks.com/docs/diskcache/api.html#cache for more informations This cache requires you to install diskcache using `pip install diskcache` """ def __init__(self, path, **settings): """ Constructor Arguments: path {String} -- The path on the disk to save the data settings {dict} -- The settings values for diskcache """ from diskcache import Cache self._cache = Cache(path, **settings) def __del__(self): """ Close the connection as the cache instance is deleted. Safe to use as there are no circular ref. """ self._cache.close() def set(self, key, value, timeout=300): expire_time = None if timeout == 0 else timeout self._cache.set(_hash(key), value, expire=expire_time) def get(self, key, default=None): return self._cache.get(_hash(key), default) def invalidate(self, key): self._cache.delete(_hash(key))
def worker(queue, eviction_policy, processes, threads): timings = {'get': [], 'set': [], 'delete': []} cache = Cache('tmp', eviction_policy=eviction_policy) for index, (action, key, value) in enumerate(iter(queue.get, None)): start = time.time() if action == 'set': cache.set(key, value, expire=EXPIRE) elif action == 'get': result = cache.get(key) else: assert action == 'delete' cache.delete(key) stop = time.time() if action == 'get' and processes == 1 and threads == 1 and EXPIRE is None: assert result == value if index > WARMUP: timings[action].append(stop - start) queue.put(timings) cache.close()
class CacheInteraction: def __init__(self, dimension=DimensionType.DIM_2D): self._cache = Cache(settings.CACHE_ROOT) self._dimension = dimension def __del__(self): self._cache.close() def get_buff_mime(self, chunk_number, quality, db_data): chunk, tag = self._cache.get('{}_{}_{}'.format(db_data.id, chunk_number, quality), tag=True) if not chunk: chunk, tag = self.prepare_chunk_buff(db_data, quality, chunk_number) self.save_chunk(db_data.id, chunk_number, quality, chunk, tag) return chunk, tag def prepare_chunk_buff(self, db_data, quality, chunk_number): from cvat.apps.engine.frame_provider import FrameProvider # TODO: remove circular dependency writer_classes = { FrameProvider.Quality.COMPRESSED : Mpeg4CompressedChunkWriter if db_data.compressed_chunk_type == DataChoice.VIDEO else ZipCompressedChunkWriter, FrameProvider.Quality.ORIGINAL : Mpeg4ChunkWriter if db_data.original_chunk_type == DataChoice.VIDEO else ZipChunkWriter, } image_quality = 100 if writer_classes[quality] in [Mpeg4ChunkWriter, ZipChunkWriter] else db_data.image_quality mime_type = 'video/mp4' if writer_classes[quality] in [Mpeg4ChunkWriter, Mpeg4CompressedChunkWriter] else 'application/zip' kwargs = {} if self._dimension == DimensionType.DIM_3D: kwargs["dimension"] = DimensionType.DIM_3D writer = writer_classes[quality](image_quality, **kwargs) images = [] buff = BytesIO() upload_dir = { StorageChoice.LOCAL: db_data.get_upload_dirname(), StorageChoice.SHARE: settings.SHARE_ROOT }[db_data.storage] if hasattr(db_data, 'video'): source_path = os.path.join(upload_dir, db_data.video.path) reader = VideoDatasetManifestReader(manifest_path=db_data.get_manifest_path(), source_path=source_path, chunk_number=chunk_number, chunk_size=db_data.chunk_size, start=db_data.start_frame, stop=db_data.stop_frame, step=db_data.get_frame_step()) for frame in reader: images.append((frame, source_path, None)) else: reader = ImageDatasetManifestReader(manifest_path=db_data.get_manifest_path(), chunk_number=chunk_number, chunk_size=db_data.chunk_size, start=db_data.start_frame, stop=db_data.stop_frame, step=db_data.get_frame_step()) for item in reader: source_path = os.path.join(upload_dir, f"{item['name']}{item['extension']}") images.append((source_path, source_path, None)) writer.save_as_chunk(images, buff) buff.seek(0) return buff, mime_type def save_chunk(self, db_data_id, chunk_number, quality, buff, mime_type): self._cache.set('{}_{}_{}'.format(db_data_id, chunk_number, quality), buff, tag=mime_type)
def main(): # Sort command-line parameters args = parse_args() filters = [{'Name':'tag:{}'.format(list(item.keys())[0]), 'Values':list(item.values())} for item in args.tags] if args.verbosity: print("filters: {}".format(filters)) # Create a session and connection to ec2 session = boto3.Session(profile_name=args.profile) conn = session.client('ec2') # Create region list if args.region[0] == 'all': userRegion = [ region['RegionName'] for region in [ region for region in conn.describe_regions()['Regions']]] else: userRegion = args.region if args.verbosity: print("userRegion: {}".format(userRegion)) # Cache results to disk cache = Cache(os.path.expanduser('~') + '/.awstools') # Create a list of {region: EC2Resources} regions = [] for region in userRegion: k = "{}_{}_{}".format(args.profile, region, '_'.join(["{}_{}".format(x, y) for f in filters for x, y in f.items()])) if args.verbosity > 2: print(k) try: if not args.ignore_cache: regions.append({k:pickle.loads(cache[k])}) if args.verbosity > 1: print("{} from cache".format(k)) else: raise KeyError except KeyError: regions.append({k:EC2Resources(session, filters, region)}) if args.verbosity > 1: print("{} skipped cache".format(k)) if args.verbosity>1: print("regions: {}".format(regions)) # Iterate through the list of {region: EC2Resources}, print, and update cache for rdict in regions: for region, ec2Instance in rdict.items(): # Convert to JSON -> Python data structure -> JSON for proper formatting jsonContent = json.dumps(ec2Instance.instances, cls=DateTimeEncoder) from_json = json.loads(jsonContent) # Shrugs print(json.dumps(from_json, indent=4)) if args.verbosity > 2: print(ec2Instance.__dict__) if ec2Instance.session: cache.set(region, pickle.dumps(ec2Instance), expire=3600) cache.close()
class CacheInteraction: def __init__(self, dimension=DimensionType.DIM_2D): self._cache = Cache(settings.CACHE_ROOT) self._dimension = dimension def __del__(self): self._cache.close() def get_buff_mime(self, chunk_number, quality, db_data): chunk, tag = self._cache.get('{}_{}_{}'.format(db_data.id, chunk_number, quality), tag=True) if not chunk: chunk, tag = self.prepare_chunk_buff(db_data, quality, chunk_number) self.save_chunk(db_data.id, chunk_number, quality, chunk, tag) return chunk, tag def prepare_chunk_buff(self, db_data, quality, chunk_number): from cvat.apps.engine.frame_provider import FrameProvider # TODO: remove circular dependency writer_classes = { FrameProvider.Quality.COMPRESSED : Mpeg4CompressedChunkWriter if db_data.compressed_chunk_type == DataChoice.VIDEO else ZipCompressedChunkWriter, FrameProvider.Quality.ORIGINAL : Mpeg4ChunkWriter if db_data.original_chunk_type == DataChoice.VIDEO else ZipChunkWriter, } image_quality = 100 if writer_classes[quality] in [Mpeg4ChunkWriter, ZipChunkWriter] else db_data.image_quality mime_type = 'video/mp4' if writer_classes[quality] in [Mpeg4ChunkWriter, Mpeg4CompressedChunkWriter] else 'application/zip' kwargs = {} if self._dimension == DimensionType.DIM_3D: kwargs["dimension"] = DimensionType.DIM_3D writer = writer_classes[quality](image_quality, **kwargs) images = [] buff = BytesIO() upload_dir = { StorageChoice.LOCAL: db_data.get_upload_dirname(), StorageChoice.SHARE: settings.SHARE_ROOT }[db_data.storage] if os.path.exists(db_data.get_meta_path()): source_path = os.path.join(upload_dir, db_data.video.path) meta = PrepareInfo(source_path=source_path, meta_path=db_data.get_meta_path()) for frame in meta.decode_needed_frames(chunk_number, db_data): images.append(frame) writer.save_as_chunk([(image, source_path, None) for image in images], buff) else: with open(db_data.get_dummy_chunk_path(chunk_number), 'r') as dummy_file: images = [os.path.join(upload_dir, line.strip()) for line in dummy_file] writer.save_as_chunk([(image, image, None) for image in images], buff) buff.seek(0) return buff, mime_type def save_chunk(self, db_data_id, chunk_number, quality, buff, mime_type): self._cache.set('{}_{}_{}'.format(db_data_id, chunk_number, quality), buff, tag=mime_type)
def recreate_diskcache(): if cache_options["CACHE_BACKEND"] != "redis": try: diskcache_cache = Cache(diskcache_location, disk_pickle_protocol=pickle_protocol) except DatabaseError: shutil.rmtree(diskcache_location, ignore_errors=True) os.mkdir(diskcache_location) diskcache_cache = Cache(diskcache_location, disk_pickle_protocol=pickle_protocol) diskcache_cache.clear() diskcache_cache.close()
class localCache(object): def __init__(self, config): self.cache_file = config.ad_cache_file def __enter__(self): self.cache = Cache(self.cache_file) self.cache.expire() return self def __exit__(self, exctype, exception, traceback): self.cache.close() def correct_ldap_group_list(self, group_list): # DELETE just deleted group from list deleted_groups = list() if len(self.cache) > 0: for group in group_list: if group.get("name") in self.cache and self.cache.get( group.get("name")).get("cache_state") == "deleted": log.info( 'Group{0} in state "deleted" founded in cache'.format( group.get("name"))) deleted_groups.append(group) corrected_group_list = [ x for x in group_list if x not in deleted_groups ] # ADD just created group to list created_groups = list() groups_name_list = [group.get("name") for group in group_list] if len(self.cache) > 0: cached = self.cache._sql('SELECT key FROM Cache').fetchall() for group in cached: if self.cache.get(group[0]).get("name") not in groups_name_list and\ self.cache.get(group[0]).get("cache_state") == "created": log.info( 'Group{0} in state "created" founded in cache'.format( group[0])) created_groups.append(self.cache.get(group[0])) corrected_group_list.extend( [x for x in created_groups if x not in groups_name_list]) return corrected_group_list
class Cache(object): def __init__(self): try: self.cache = DC('./tmp') except Exception as ex: print('Get an exception with diskcache open: {}'.format(ex)) self.cache = None def __del__(self): try: self.cache.close() except Exception as ex: print('Get an exception with diskcache close: {}'.format(ex)) def set(self, key, value): if self.cache is not None: self.cache.set(key, BytesIO(value), read=True, tag=u'data') def get(self, key): if self.cache is not None: value = self.cache.get(key, default=b'', read=True, tag=True) if value is not None and value != b'': return value return None def pop(self, key): if self.cache is not None: value = self.cache.pop(key, default=b'', read=True, tag=True) if value is not None and value != b'': return value return None def delete(self, key): if self.cache is not None: self.cache.delete(key) def create_index(self): if self.cache is not None: self.cache.create_tag_index() return self.cache.tag_index return None def clear_all(self): if self.cache is not None: self.cache.clear()
def save_to_cache(cache: Cache, data: dict): """Save dogs listing in persistent cache. Saves a diskcache Cache instance on disk. The available dogs dictionary is saved ('data' key) together with a timestamp ('time' key). Parameters ---------- cache : Cache Cache object containing the dogs listing info. data : dict Dogs listing dictionary to cache for future use. """ if len(data) == 0 or data is None: print('Nothing to save in cache.') else: cache['data'] = data cache['time'] = dt.strftime(dt.now(), '%Y-%m-%d %H:%M:%S') cache.close()
class BaseCacheAnalyzer(BaseAnalyzer): def __init__(self, cache_location=None, force=False, delete_cache_when_done=False, **kwargs): super().__init__(**kwargs) self.cache_location = cache_location self.cache = None self.force = force self.delete_cache_when_done = delete_cache_when_done def initialize(self): from diskcache import Cache self.cache = Cache(self.cache_location or self.uid + "_cache") def filter(self, simulation): return self.force or not self.is_in_cache(simulation.id) def to_cache(self, key, data): self.cache.set(key, data) def from_cache(self, key): return self.cache.get(key) def is_in_cache(self, key): return key in self.cache def destroy(self): if self.cache: self.cache.close() if self.cache and self.delete_cache_when_done and os.path.exists( self.cache.directory): cache_directory = self.cache.directory del self.cache shutil.rmtree(cache_directory) @property def keys(self): return list(self.cache.iterkeys()) if self.cache else None
class Spider(object): def __init__(self, directory=cache_dir.abspath, expire=24 * 3600): self.cache = Cache(directory) self.expire = expire def close(self): self.cache.close() def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close() def get_content(self, url): if url in self.cache: return self.cache[url] else: content = requests.get(url).content self.cache[url] = content return content
class CacheEnabled: def __init__(self): self.cache_directory = None self.cache = None self.queue = False def initialize_cache(self, shards=None, timeout=1, queue=False): self.reset_cache() # Create a temporary directory for the cache self.cache_directory = mkdtemp() # Create a queue? if queue: self.cache = Deque(directory=self.cache_directory) self.queue = True elif shards: self.cache = FanoutCache(self.cache_directory, shards=shards, timeout=timeout) self.queue = False else: self.cache = Cache(self.cache_directory, timeout=timeout) self.queue = False return self.cache def reset_cache(self): # If already initialized, destroy and recreate if self.cache and not self.queue: self.cache.close() else: del self.cache if self.cache_directory: shutil.rmtree(self.cache_directory) def __del__(self): self.reset_cache()
class KeyValueDB(Generic[NativeType, StorageType], abc.ABC): """Interface for concrete DB backend.""" _native_type: NativeType _storage_type: StorageType def __init__(self, datebase_dir: Path): if not datebase_dir.exists(): datebase_dir.mkdir(mode=0o750, parents=True) self._cache = Cache(str(datebase_dir)) def __contains__(self, key: Any) -> bool: return self._cache.__contains__(key) def __delitem__(self, key: Any) -> bool: return self._cache.__delitem__(key) def __getitem__(self, key: Any) -> NativeType: return self._storage_to_native_type(self._cache.__getitem__(key)) def __setitem__(self, key: Any, value: NativeType) -> None: return self._cache.__setitem__(key, self._native_to_storage_type(value)) def _native_to_storage_type(self, value: NativeType) -> StorageType: if self._native_type is self._storage_type or self._storage_type is None: return cast(StorageType, value) else: return self._storage_type(value) def _storage_to_native_type(self, value: StorageType) -> NativeType: if self._native_type is self._storage_type or self._native_type is None: return cast(NativeType, value) else: return self._native_type(value) def close(self, *args, **kwargs) -> None: return self._cache.close() def get(self, key: Any, default: Any = None, *args, **kwargs) -> Union[Any, NativeType]: value = self._cache.get(key, default, *args, **kwargs) if value is default: return default else: return self._storage_to_native_type(value) def set(self, key: Any, value: NativeType, *args, **kwargs) -> bool: return self._cache.set(key, self._native_to_storage_type(value), *args, **kwargs) def touch(self, *args, **kwargs) -> bool: return self._cache.touch(*args, **kwargs)
class cache: def __init__(self, latest=False): self.feedCache = Cache(".feedcache") self.latest = latest def __preprocess_title(self, feed): for entry in feed.entries: entry["feed_src"] = feed["feed"]["title"] return feed def __manage_cache(self, url): try: if url in self.feedCache: data = self.feedCache.get(url) else: parsed_feed = feedparser.parse(url) data = self.__preprocess_title(parsed_feed) # cache expires in 30 mins self.feedCache.add(url, data, expire=1800) self.feedCache.close() except ValueError: pass except Exception: pass return data def get_feed(self, url): if self.latest: latestFeed = self.__manage_cache(url).entries if len(latestFeed) > 0: return latestFeed[0] else: pass return self.__manage_cache(url).entries
class CacheManager(object): def __init__(self, cache_path=CACHE_PATH): self.cache = Cache(cache_path) def items(self): return self.cache.iterkeys() def has_key(self, key): return key in self.cache def set(self, key, value, ttl=TTL): return self.cache.set(key=key, value=value, expire=ttl) def get(self, key): return self.cache.get(key=key) def clear_cache(self): for key in self.cache: if key != 'censys_credentials': del self.cache[key] return True def close(self): self.cache.close()
def check(self, dir, args): warnings = [] log.info("Running URL checks (CheckURLs)") assert 'URLs' in __main__.remoteCheckList if 'URLs' in args.disableChecksRemote: return warnings cache_dir = 'data-check-cache/URLs' if not os.path.exists(cache_dir): os.makedirs(cache_dir) global cache cache = Cache(cache_dir) if 'URLs' in args.purgeCaches: cache.clear() log.info("Testing biobank URLs") for biobank in dir.getBiobanks(): if not 'url' in biobank or re.search('^\s*$', biobank['url']): warnings.append( DataCheckWarning(self.__class__.__name__, "", dir.getBiobankNN(biobank['id']), DataCheckWarningLevel.WARNING, biobank['id'], DataCheckEntityType.BIOBANK, "Missing URL")) else: URLwarnings = testURL( biobank['url'], DataCheckWarning(self.__class__.__name__, "", dir.getBiobankNN(biobank['id']), DataCheckWarningLevel.ERROR, biobank['id'], DataCheckEntityType.BIOBANK, "Biobank URL")) warnings += URLwarnings log.info("Testing collection URLs") for collection in dir.getBiobanks(): # non-existence of access URIs is tested in the access policy checks - here we only check validity of the URL if it exists if 'data_access_uri' in collection and not re.search( '^\s*$', collection['data_access_uri']): URLwarnings = testURL( collection['data_access_uri'], DataCheckWarning(self.__class__.__name__, "", dir.getCollectionNN(collection['id']), DataCheckWarningLevel.ERROR, collection['id'], DataCheckEntityType.COLLECTION, "Data access URL for collection")) warnings += URLwarnings if 'sample_access_uri' in collection and not re.search( '^\s*$', collection['sample_access_uri']): URLwarnings = testURL( collection['sample_access_uri'], DataCheckWarning(self.__class__.__name__, "", dir.getCollectionNN(collection['id']), DataCheckWarningLevel.ERROR, collection['id'], DataCheckEntityType.COLLECTION, "Sample access URL for collection")) warnings += URLwarnings if 'image_access_uri' in collection and not re.search( '^\s*$', collection['image_access_uri']): URLwarnings = testURL( collection['image_access_uri'], DataCheckWarning(self.__class__.__name__, "", dir.getCollectionNN(collection['id']), DataCheckWarningLevel.ERROR, collection['id'], DataCheckEntityType.COLLECTION, "Image access URL for collection")) warnings += URLwarnings cache.close() return warnings
class State(StateBase): # pylint: disable=too-many-instance-attributes def __init__(self, root_dir=None, tmp_dir=None): from diskcache import Cache super().__init__() self.tmp_dir = tmp_dir self.root_dir = root_dir self.fs = LocalFileSystem(None, {"url": self.root_dir}) if not tmp_dir: return config = {"eviction_policy": "least-recently-used"} self.links = Cache(directory=os.path.join(tmp_dir, "links"), **config) self.md5s = Cache(directory=os.path.join(tmp_dir, "md5s"), **config) def close(self): self.md5s.close() self.links.close() def save(self, path_info, fs, hash_info): """Save hash for the specified path info. Args: path_info (dict): path_info to save hash for. hash_info (HashInfo): hash to save. """ if not isinstance(fs, LocalFileSystem): return assert isinstance(path_info, str) or path_info.scheme == "local" assert hash_info assert isinstance(hash_info, HashInfo) assert os.path.exists(path_info) mtime, size = get_mtime_and_size(path_info, self.fs) inode = get_inode(path_info) logger.debug("state save (%s, %s, %s) %s", inode, mtime, size, hash_info.value) self.md5s[inode] = (mtime, size, hash_info.value) def get(self, path_info, fs): """Gets the hash for the specified path info. Hash will be retrieved from the state database if available. Args: path_info (dict): path info to get the hash for. Returns: HashInfo or None: hash for the specified path info or None if it doesn't exist in the state database. """ if not isinstance(fs, LocalFileSystem): return None assert isinstance(path_info, str) or path_info.scheme == "local" path = os.fspath(path_info) # NOTE: use os.path.exists instead of LocalFileSystem.exists # because it uses lexists() and will return True for broken # symlinks that we cannot stat() in get_mtime_and_size if not os.path.exists(path): return None mtime, size = get_mtime_and_size(path, self.fs) inode = get_inode(path) value = self.md5s.get(inode) if not value or value[0] != mtime or value[1] != size: return None return HashInfo("md5", value[2], size=int(size)) def save_link(self, path_info, fs): """Adds the specified path to the list of links created by dvc. This list is later used on `dvc checkout` to cleanup old links. Args: path_info (dict): path info to add to the list of links. """ if not isinstance(fs, LocalFileSystem): return assert isinstance(path_info, str) or path_info.scheme == "local" if not self.fs.exists(path_info): return mtime, _ = get_mtime_and_size(path_info, self.fs) inode = get_inode(path_info) relative_path = relpath(path_info, self.root_dir) with self.links as ref: ref[relative_path] = (inode, mtime) def get_unused_links(self, used, fs): """Removes all saved links except the ones that are used. Args: used (list): list of used links that should not be removed. """ if not isinstance(fs, LocalFileSystem): return unused = [] with self.links as ref: for relative_path in ref: path = os.path.join(self.root_dir, relative_path) if path in used or not self.fs.exists(path): continue inode = get_inode(path) mtime, _ = get_mtime_and_size(path, self.fs) if ref[relative_path] == (inode, mtime): logger.debug("Removing '%s' as unused link.", path) unused.append(relative_path) return unused def remove_links(self, unused, fs): if not isinstance(fs, LocalFileSystem): return for path in unused: remove(os.path.join(self.root_dir, path)) with self.links as ref: for path in unused: del ref[path]
class CAgent: def __init__(self, name, oDir: CDirectoryConfig, oConfigByYaml: CConfigByYaml, connectKnowlegeServer=False): self.name = name self.crawlerManager: CCrawlerManager = None self.storageManager: CStorage = None self.knowledgeManagerClient: CKnowledgeClient = None self.oDir: CDirectoryConfig = oDir self.oConf = oConfigByYaml self.oLog = CLog(oDir['Log'], self.name + '_log') self.dbWeb = '' self.cacheAgent = Cache(oDir['cacheAgentFolder']) self.cacheCrawler = Cache(oDir['cacheCrawlerFolder']) self.flagConnectKnowlegeServer = connectKnowlegeServer fKeyboardInterruptRegistrar(self._callbackKeyboardInterrupt) self.flagUserClose = False # fKeyboardInterruptRegistrar._register['test'] = self._callbackKeyboardInterrupt def _configStorage(self, mode='mongoDB'): oSubConfig = self.oConf['Storage'] self.dbWeb = oSubConfig['dbWeb'] if (oSubConfig.get('mode') != None): mode = oSubConfig['mode'] path = self.dbWeb if (mode == 'mongoDB'): self.storageManager = CStorageMongoDB(self.name, path) def _configCrawler(self): self.crawlerManager = CCrawlerManager(self.name, self.oDir['crawlerCWD'], self.oLog, self.oDir['cacheCrawlerFolder'], self.oDir['cacheAgentFolder']) def _configKnowledgeManager(self): oSubConfig = self.oConf['KnowledgeManager'] addressTuple = (oSubConfig['address'], oSubConfig['port']) key = oSubConfig['password'] key = bytes(key, 'utf-8') print(key) self.knowledgeManagerClient = CKnowledgeClient(addressTuple, key, self.oLog) if self.flagConnectKnowlegeServer: err = self.knowledgeManagerClient.connect() if err == False: raise ValueError("KnowledgeManager connection failed") def configAll(self): self._configCrawler() self.oLog.safeRecordTime('CrawlerManager conf finished') self._configKnowledgeManager() self.oLog.safeRecordTime('KnowledgeManager conf finished') self._configStorage() self.oLog.safeRecordTime('StorageManager conf finished') def startCrawling(self, jobsList: list): return self.crawlerManager.engineStart(jobsList) def fetchResult( self, handler, subProcHandle, timeWaitStep=1, maxWaitTimes=5 ): #total continuous waittime will be (timeWaitStep * maxWaitTimes) result = '' cnt = 0 global WRITE_TO_STORAGE_FLAG WRITE_TO_STORAGE_FLAG = True while (True): _, result = self.cacheAgent.pull() if (result != None): result = json.loads(result) ans = handler(result['type'], result['content']) # print(ans) for temp in ans: self.storageManager.storeData(temp[0], temp[1], temp[2]) # break cnt = 0 #clear counter elif (timeWaitStep * maxWaitTimes > 0): if (cnt >= maxWaitTimes ): # if continuous wait time equals to maxWaitTimes WRITE_TO_STORAGE_FLAG = False return False elif subProcHandle.poll( ) != None: #if the subprocess is finished WRITE_TO_STORAGE_FLAG = False return subProcHandle.poll() else: time.sleep(timeWaitStep) cnt += 1 #counter add one else: WRITE_TO_STORAGE_FLAG = False raise ValueError( "timeWaitStep * maxWaitTimes should be bigger than 0") def clearCache(self): self.cacheAgent.clear() self.cacheCrawler.clear() def closeCache(self): self.cacheAgent.close() self.cacheCrawler.close() self.crawlerManager.closeCache() def _callbackKeyboardInterrupt(self, *args, **kwargs): global WRITE_TO_STORAGE_FLAG self.flagUserClose = True if (WRITE_TO_STORAGE_FLAG is True): numRemainedMsg = len(self.cacheAgent) MSG = "Agent is fetching the result to the Storage," + \ " number of remained items: " + str(numRemainedMsg) + \ ", will close later." return False, MSG else: return True, '' def test(self): #code for testing keyboard interruption handle global WRITE_TO_STORAGE_FLAG WRITE_TO_STORAGE_FLAG = True for i in range(1000): time.sleep(0.01) WRITE_TO_STORAGE_FLAG = False # # print('Press Ctrl+C') # for x in range(1,100): # time.sleep(0.2) # print(x) def close(self): self.knowledgeManagerClient.close() self.closeCache()
class FinancialCache(): """ A Disk based database containing an offline version of financial data and used as a cache """ def __init__(self, path, **kwargs): ''' Initializes the cache Parameters ---------- path : str The path where the cache will be located max_cache_size_bytes : int (kwargs) (optional) the maximum size of the cache in bytes Raises ------ ValidationError : in case an invalid cache size is supplied FileSystemError : in case the cache directory cannot be created Returns ----------- A tuple of strings containing the start and end date of the fiscal period ''' try: max_cache_size_bytes = kwargs['max_cache_size_bytes'] except KeyError: # default max cache is 4GB max_cache_size_bytes = 4e9 util.create_dir(path) try: self.cache = Cache(path, size_limit=int(max_cache_size_bytes)) except Exception as e: raise ValidationError('invalid max cache size', e) log.debug("Cache was initialized: %s" % path) def write(self, key : str, value : object): """ Writes an object to the cache Parameters ---------- key : str The cache key value : object The cache value Returns ---------- None """ if (key == "" or key is None) or (value == "" or value is None): return self.cache[key] = value def read(self, key): """ Reads an object to the cache and returns None if it cannot be found Parameters ---------- key : str The cache key Returns ---------- The object in question, or None if they key is not present """ try: return self.cache[key] except KeyError: log.debug("%s not found inside cache" % key) return None def close(self): self.cache.close()
from kolibri.utils.conf import KOLIBRI_HOME from kolibri.utils.conf import OPTIONS cache_options = OPTIONS["Cache"] pickle_protocol = OPTIONS["Python"]["PICKLE_PROTOCOL"] diskcache_location = os.path.join(KOLIBRI_HOME, "process_cache") try: diskcache_cache = Cache(diskcache_location, disk_pickle_protocol=pickle_protocol) except DatabaseError: shutil.rmtree(diskcache_location, ignore_errors=True) os.mkdir(diskcache_location) diskcache_cache = Cache(diskcache_location, disk_pickle_protocol=pickle_protocol) diskcache_cache.close() # Default to LocMemCache, as it has the simplest configuration default_cache = { "BACKEND": "django.core.cache.backends.locmem.LocMemCache", # Default time out of each cache key "TIMEOUT": cache_options["CACHE_TIMEOUT"], "OPTIONS": {"MAX_ENTRIES": cache_options["CACHE_MAX_ENTRIES"]}, } built_files_prefix = "built_files" built_files_cache = { "BACKEND": "django.core.cache.backends.locmem.LocMemCache", # Default time out of each cache key "TIMEOUT": cache_options["CACHE_TIMEOUT"], "OPTIONS": {"MAX_ENTRIES": cache_options["CACHE_MAX_ENTRIES"]},
def check(self, dir, args): warnings = [] log.info("Running geographical location checks (BiobankGeo)") # This is to be enabled for real runs. assert 'geocoding' in __main__.remoteCheckList if 'geocoding' in args.disableChecksRemote: geoCodingEnabled = False else: geoCodingEnabled = True cache_dir = 'data-check-cache/geolocator' if not os.path.exists(cache_dir): os.makedirs(cache_dir) cache = Cache(cache_dir) if 'geocoding' in args.purgeCaches: cache.clear() geocoords_pattern = '^-?\d+\.\d+$' geolocator = Nominatim( user_agent= 'Mozilla/5.0 (X11; Linux i686; rv:10.0) Gecko/20100101 Firefox/10.0', timeout=15) for biobank in dir.getBiobanks(): if 'latitude' in biobank and not re.search( '^\s*$', biobank['latitude'] ) and 'longitude' in biobank and not re.search( '^\s*$', biobank['longitude']): # we check before doing any convenience substitutions if not re.search(geocoords_pattern, biobank['latitude']): warnings.append( DataCheckWarning( self.__class__.__name__, "", dir.getBiobankNN(biobank['id']), DataCheckWarningLevel.ERROR, biobank['id'], DataCheckEntityType.BIOBANK, "Invalid biobank latitude (should be a decimal number with period without any spaces or stray characters around - the surrounding quotes are added in this report): offending value '" + biobank['latitude'] + "'")) if not re.search(geocoords_pattern, biobank['longitude']): warnings.append( DataCheckWarning( self.__class__.__name__, "", dir.getBiobankNN(biobank['id']), DataCheckWarningLevel.ERROR, biobank['id'], DataCheckEntityType.BIOBANK, "Invalid biobank longitude (should be a decimal number with period without any spaces or stray characters around - the surrounding quotes are added in this report): offending value '" + biobank['longitude'] + "'")) # this is for convenience - if there are commas used instead of periods, we should still do the remaining checks biobank['latitude'] = re.sub(r',', r'.', biobank['latitude']) biobank['longitude'] = re.sub(r',', r'.', biobank['longitude']) if re.search(geocoords_pattern, biobank['latitude']) and re.search( geocoords_pattern, biobank['longitude']): if geoCodingEnabled: logMessage = "Checking reverse geocoding for " + biobank[ 'latitude'] + ", " + biobank['longitude'] try: loc_string = biobank['latitude'] + ", " + biobank[ 'longitude'] if loc_string in cache and cache[loc_string] != "": country_code = cache[loc_string] else: location = geolocator.reverse(loc_string, language='en') country_code = location.raw['address'][ 'country_code'] cache[loc_string] = country_code logMessage += " -> OK" if ((biobank['country']['id'] != "IARC" and biobank['country']['id'] != "EU") and country_code.upper() != biobank['country']['id'] and not (country_code.upper() == "GB" and biobank['country']['id'] == "UK")): warnings.append( DataCheckWarning( self.__class__.__name__, "", dir.getBiobankNN(biobank['id']), DataCheckWarningLevel.WARNING, biobank['id'], DataCheckEntityType.BIOBANK, "Geolocation of the biobank is likely outside of its country " + biobank['country']['id'] + "; biobank seems to be in " + country_code.upper() + f" based on geographical coordinates 'latitude'={biobank['latitude']} 'longitude'={biobank['longitude']}" )) except Exception as e: logMessage += " -> failed (" + str(e) + ")" warnings.append( DataCheckWarning( self.__class__.__name__, "", dir.getBiobankNN(biobank['id']), DataCheckWarningLevel.WARNING, biobank['id'], DataCheckEntityType.BIOBANK, "Reverse geocoding of the biobank location failed (" + str(e) + ")")) log.info(logMessage) else: warnings.append( DataCheckWarning( self.__class__.__name__, "", dir.getBiobankNN(biobank['id']), DataCheckWarningLevel.INFO, biobank['id'], DataCheckEntityType.BIOBANK, "Missing geographical coordinates ('latitude and/or 'longitude' attributes are empty)" )) for collection in dir.getCollections(): if 'latitude' in collection and not re.search( '^\s*$', collection['latitude'] ) and 'longitude' in collection and not re.search( '^\s*$', collection['longitude']): # we check before doing any convenience substitutions if not re.search(geocoords_pattern, collection['latitude']): warnings.append( DataCheckWarning( self.__class__.__name__, "", dir.getCollectionNN(collection['id']), DataCheckWarningLevel.ERROR, collection['id'], DataCheckEntityType.COLLECTION, "Invalid collection latitude (should be a decimal number with period without any spaces or stray characters around - the surrounding quotes are added in this report): offending value '" + collection['latitude'] + "'")) if not re.search(geocoords_pattern, collection['longitude']): warnings.append( DataCheckWarning( self.__class__.__name__, "", dir.getCollectionNN(collection['id']), DataCheckWarningLevel.ERROR, collection['id'], DataCheckEntityType.COLLECTION, "Invalid collection longitude (should be a decimal number with period without any spaces or stray characters around - the surrounding quotes are added in this report): offending value '" + collection['longitude'] + "'")) # this is for convenience - if there are commas used instead of periods, we should still do the remaining checks collection['latitude'] = re.sub(r',', r'.', collection['latitude']) collection['longitude'] = re.sub(r',', r'.', collection['longitude']) if re.search(geocoords_pattern, collection['latitude']) and re.search( geocoords_pattern, collection['longitude']): if geoCodingEnabled: logMessage = "Checking reverse geocoding for " + collection[ 'latitude'] + ", " + collection['longitude'] try: loc_string = collection[ 'latitude'] + ", " + collection['longitude'] if loc_string in cache and cache[loc_string] != "": country_code = cache[loc_string] else: location = geolocator.reverse(loc_string, language='en') country_code = location.raw['address'][ 'country_code'] cache[loc_string] = country_code logMessage += " -> OK" biobankId = dir.getCollectionBiobankId( collection['id']) biobank = dir.getBiobankById(biobankId) if ((biobank['country']['id'] != "IARC" and biobank['country']['id'] != "EU") and country_code.upper() != biobank['country']['id'] and not (country_code.upper() == "GB" and biobank['country']['id'] == "UK")): warnings.append( DataCheckWarning( self.__class__.__name__, "", dir.getCollectionNN(collection['id']), DataCheckWarningLevel.WARNING, collection['id'], DataCheckEntityType.COLLECTION, "Geolocation of the collection is likely outside of its country " + collection['country']['id'] + "; collection seems to be in " + country_code.upper() + f" based on geographical coordinates 'latitude'={collection['latitude']} 'longitude'={collection['longitude']}" )) except Exception as e: logMessage += " -> failed (" + str(e) + ")" warnings.append( DataCheckWarning( self.__class__.__name__, "", dir.getCollectionNN(collection['id']), DataCheckWarningLevel.WARNING, collection['id'], DataCheckEntityType.COLLECTION, "Reverse geocoding of the collection location failed (" + str(e) + ")")) log.info(logMessage) cache.close() return warnings
print("Could not resolve source %s or target %s for graph" % (link["source"], link["target"])) brokenlinks.append(link) graphlinks = [link for link in graphlinks if link not in brokenlinks] graphnodes = [node for _, node in graphnodes.items()] graphnodes = sorted(graphnodes, key=lambda x: x["seq"]) graph = { "batadv": { "directed": False, "graph": [], "links": graphlinks, "multigraph": False, "nodes": graphnodes }, "version": 1 } print(graph) with open("graph.json", "w") as outfile: json.dump(graph, outfile) # finalize nodes.json nodes = {"nodes": nodes, "timestamp": timestamp, "version": 2} print(nodes) with open("nodes.json", "w") as outfile: json.dump(nodes, outfile) print("Wrote %d nodes." % len(nodes["nodes"])) cache.close()
class DiskPubSubCache(object): """A DiskCache-backed cache used for PubSub channels Attributes: cache (Cache): The cache which backs this pubsub cache _subscribers (dict{str: DiskSubscription}): The subscriptions tracked by this cache _threads_registered (set(str)): The names of the threads which have registered triggers on the database _push_partial (func): The function called when an insert or update happens on the cache Args: directory (str): The path to the directory used by this cache timeout (float, optional): The number of seconds to wait before an operation times out. Defaults to 0.01 seconds """ _insert_func_name = 'push_on_insert' _update_func_name = 'push_on_update' def __init__(self, directory, timeout=0.01): self.cache = Cache(directory, timeout=timeout) self._subscribers = {} # Would be nice to use a weakref to a set so that keys with no subscribers are self._threads_registered = set() self._insert_triggers() self._push_partial = partial(self.__push_to_subscribers) def publish(self, channel, data): """Publish data to a channel Args: channel (str): Channel to publish the data to data: The data to publish the data to. The data will arrive in the same format as it was set Returns: (int): The number of subscribers which received the published data """ self.cache.set(channel, data) return len(self._subscribers.get(channel, [])) def register_callbacks(self): """Registers the trigger functions for the current thread. A thread must have trigger functions registered before it can publish data """ if threading.current_thread().name not in self._threads_registered: con = self._con for func_name in (self._insert_func_name, self._update_func_name): con.create_function(func_name, 2, self._push_partial) self._threads_registered.add(threading.current_thread().name) def _insert_triggers(self): """Inserts the original triggers into the cache, but does not create or the functions which receive the triggers """ con = self._con for func_name, operation in [(self._insert_func_name, 'INSERT'), (self._update_func_name, 'UPDATE')]: con.execute('CREATE TRIGGER IF NOT EXISTS {0} AFTER {1} ON Cache BEGIN ' 'SELECT {0}(NEW.key, NEW.value); END;'.format(func_name, operation)) def subscribe(self, channel): """Subscribe to a channel Args: channel (str): The name of the channel to subscribe to Returns: (DiskSubscription): The subscription to this channel """ subscription = DiskSubscription(channel) if channel not in self._subscribers: self._subscribers[channel] = WeakSet([subscription]) else: self._subscribers[channel].add(subscription) return subscription def __push_to_subscribers(self, channel, value): try: value = self.__get_value(value) for subscriber in self._subscribers.get(str(channel), []): subscriber.push(value) except: import traceback traceback.print_exc() raise @staticmethod def __get_value(value): if value == unsubscribe_message or isinstance(value, string_types) or isinstance(value, int) or isinstance( value, float): return value if isinstance(value, binary_type): return value.decode('utf-8') try: return pickle.load(BytesIO(value)) except (KeyError, TypeError, IndexError): return str(value) @property def _con(self): con = getattr(self.cache._local, 'con', None) if con is None: con = self.cache._local.con = sqlite3.connect( os.path.join(self.cache._directory, DBNAME), timeout=self.cache._timeout, isolation_level=None, ) # Some SQLite pragmas work on a per-connection basis so query the # Settings table and reset the pragmas. The Settings table may not # exist so catch and ignore the OperationalError that may occur. try: select = 'SELECT key, value FROM Settings' settings = con.execute(select).fetchall() except sqlite3.OperationalError: pass else: for key, value in settings: if key.startswith('sqlite_'): self.cache.reset(key, value, update=False) return con def shutdown(self): """Shuts down the connection to the cache """ self.cache.close()
class State(StateBase): # pylint: disable=too-many-instance-attributes def __init__(self, root_dir=None, tmp_dir=None, dvcignore=None): from diskcache import Cache super().__init__() self.tmp_dir = tmp_dir self.root_dir = root_dir self.dvcignore = dvcignore if not tmp_dir: return config = { "eviction_policy": "least-recently-used", "disk_pickle_protocol": 4, } self.links = Cache(directory=os.path.join(tmp_dir, "links"), **config) self.md5s = Cache(directory=os.path.join(tmp_dir, "md5s"), **config) def close(self): self.md5s.close() self.links.close() def save(self, path_info, fs, hash_info): """Save hash for the specified path info. Args: path_info (dict): path_info to save hash for. hash_info (HashInfo): hash to save. """ if not isinstance(fs, LocalFileSystem): return mtime, size = get_mtime_and_size(path_info, fs, self.dvcignore) inode = get_inode(path_info) logger.debug( "state save (%s, %s, %s) %s", inode, mtime, str(size), hash_info.value, ) self.md5s[inode] = (mtime, str(size), hash_info.value) def get(self, path_info, fs): """Gets the hash for the specified path info. Hash will be retrieved from the state database if available. Args: path_info (dict): path info to get the hash for. Returns: HashInfo or None: hash for the specified path info or None if it doesn't exist in the state database. """ from .objects.meta import Meta if not isinstance(fs, LocalFileSystem): return None, None try: mtime, size = get_mtime_and_size(path_info, fs, self.dvcignore) except FileNotFoundError: return None, None inode = get_inode(path_info) value = self.md5s.get(inode) if not value or value[0] != mtime or value[1] != str(size): return None, None return Meta(size=size), HashInfo("md5", value[2]) def save_link(self, path_info, fs): """Adds the specified path to the list of links created by dvc. This list is later used on `dvc checkout` to cleanup old links. Args: path_info (dict): path info to add to the list of links. """ if not isinstance(fs, LocalFileSystem): return try: mtime, _ = get_mtime_and_size(path_info, fs, self.dvcignore) except FileNotFoundError: return inode = get_inode(path_info) relative_path = relpath(path_info, self.root_dir) with self.links as ref: ref[relative_path] = (inode, mtime) def get_unused_links(self, used, fs): """Removes all saved links except the ones that are used. Args: used (list): list of used links that should not be removed. """ if not isinstance(fs, LocalFileSystem): return unused = [] with self.links as ref: for relative_path in ref: path = os.path.join(self.root_dir, relative_path) if path in used or not fs.exists(path): continue inode = get_inode(path) mtime, _ = get_mtime_and_size(path, fs, self.dvcignore) if ref[relative_path] == (inode, mtime): logger.debug("Removing '%s' as unused link.", path) unused.append(relative_path) return unused def remove_links(self, unused, fs): if not isinstance(fs, LocalFileSystem): return for path in unused: remove(os.path.join(self.root_dir, path)) with self.links as ref: for path in unused: del ref[path]
break log('Skipped post: "{title}"'.format(title=post["data"]["title"])) background = requests.get(url) CACHE.set(uuid4(), background, expire=cache_seconds) except requests.exceptions.ConnectionError: log("No internet, using background from cache") background = CACHE.get(choice(list(CACHE))) with open(os.path.join(CACHE_DIR, "current"), mode="w+b") as out: out.write(background.content) set_wallpaper(os.path.join(CACHE_DIR, "current"), get_desktop_environment()) if __name__ == "__main__": main() # get quote as text #res = requests.get("https://www.brainyquote.com/quotes_of_the_day.html") #soup = BeautifulSoup(res.text, "lxml") #quote = soup.find("img", {"class":"p-qotd"}) #print(quote["alt"]) # TODO # DPI/scaling issues? # dependency check CACHE.close()
class ReadCacheDataBackend(DataBackend): def __init__(self, config): read_cache_directory = config.get('dataBackend.readCache.directory', None, types=str) read_cache_maximum_size = config.get( 'dataBackend.readCache.maximumSize', None, types=int) if read_cache_directory and not read_cache_maximum_size or not read_cache_directory and read_cache_maximum_size: raise ConfigurationError( 'Both dataBackend.readCache.directory and dataBackend.readCache.maximumSize need to be set ' + 'to enable disk based caching.') if read_cache_directory and read_cache_maximum_size: os.makedirs(read_cache_directory, exist_ok=True) try: self._read_cache = Cache( read_cache_directory, size_limit=read_cache_maximum_size, eviction_policy='least-frequently-used', statistics=1, ) except Exception: logger.warning( 'Unable to enable disk based read caching. Continuing without it.' ) self._read_cache = None else: logger.debug( 'Disk based read caching instantiated (cache size {}).'. format(read_cache_maximum_size)) else: self._read_cache = None self._use_read_cache = True # Start reader and write threads after the disk cached is created, so that they see it. super().__init__(config) def _read(self, block, metadata_only): key = self._block_uid_to_key(block.uid) metadata_key = key + self._META_SUFFIX if self._read_cache is not None and self._use_read_cache: metadata = self._read_cache.get(metadata_key) if metadata and metadata_only: return block, None, metadata elif metadata: data = self._read_cache.get(key) if data: return block, data, metadata block, data, metadata = super()._read(block, metadata_only) # We always put blocks into the cache even when self._use_read_cache is False if self._read_cache is not None: self._read_cache.set(metadata_key, metadata) if not metadata_only: self._read_cache.set(key, data) return block, data, metadata def use_read_cache(self, enable): old_value = self._use_read_cache self._use_read_cache = enable return old_value def close(self): super().close() if self._read_cache is not None: (cache_hits, cache_misses) = self._read_cache.stats() logger.debug( 'Disk based cache statistics (since cache creation): {} hits, {} misses.' .format(cache_hits, cache_misses)) self._read_cache.close()