class ElementCache: map = None def __init__(self, path): self.map = Cache(path, size_limit=2**42) def add(self, key, element): self.map[key] = element def get(self, element): if element not in self.map: return None return self.map[element] def contains(self, key): return key in self.map def count_elements(self): return len(self.map) def update(self, key, element): self.map[key] = element def index_keys(self, index): keys = list(self.map.iterkeys()) for key in keys: value = self.map[key] key_index = index.to_index(key) if key_index > 0: self.map[key_index] = value del self.map[key]
class BaseCacheAnalyzer(BaseAnalyzer): def __init__(self, cache_location=None, force=False): super().__init__() self.cache_location = cache_location self.cache = None self.force = force def initialize(self): from diskcache import Cache self.cache = Cache(self.cache_location or self.uid + "_cache") def filter(self, simulation): return self.force or not self.is_in_cache(simulation.id) def to_cache(self, key, data): self.cache.set(key, data) def from_cache(self, key): return self.cache.get(key) def is_in_cache(self, key): return key in self.cache def __del__(self): if self.cache: self.cache.close() @property def keys(self): return list(self.cache.iterkeys()) if self.cache else None
def get_best_from_cache(cache: Cache, objective: Callable) -> tuple: """ Helper function for locating the best candidate according to some objective callable in a cache :param cache: diskcache Cache, as in the driver cache :param objective: callable function accepting a simulation result, and returning a scalar float objective :return: tuple containing the best candidate and result """ # init best_result = np.inf best_candidate = None # for each candidate for candidate in cache.iterkeys(): if candidate == 'meta': continue # try to compute the objective try: # user defined objective value result = objective(cache[candidate]) # update best candidate and results if result < best_result: best_result = result best_candidate = candidate # on error, go to next result except: continue return best_candidate, best_result
class FactBaseStats: def __init__(self, generator): self._cache = Cache(os.path.join(os.path.dirname(__file__), 'stats', self.__class__.__name__, generator), size_limit=int(2e9)) self._dataset = None self._table = None def init(self, dataset, table): self._dataset = dataset self._table = table self._cache.set((self._dataset, self._table), { 'exact': 0, 'strict': 0, 'loose': 0, 'none': 0 }) def _incr(self, field): entry = self._cache.get((self._dataset, self._table)) entry[field] += 1 self._cache.set((self._dataset, self._table), entry) def incr_exact(self): self._incr('exact') def incr_strict(self): self._incr('strict') def incr_loose(self): self._incr('loose') def incr_empty(self): self._incr('none') def get_dataset_stats(self, dataset): stats_keys = ['exact', 'strict', 'loose', 'none'] stats = np.zeros(4, dtype=int) tables = 0 for key in self._cache.iterkeys(): if key[0] == dataset: tables += 1 entry = self._cache.get(key) stats[0] += entry['exact'] stats[1] += entry['strict'] stats[2] += entry['loose'] stats[3] += entry['none'] cells = sum(stats) print('Dataset:', dataset) print('Tables:', tables) print('Cells:', cells) print('Stats:', dict(zip(stats_keys, zip(stats, np.round(stats / cells, 4)))))
class BaseCacheAnalyzer(BaseAnalyzer): def __init__(self, cache_location=None, force=False, delete_cache_when_done=False, **kwargs): super().__init__(**kwargs) self.cache_location = cache_location self.cache = None self.force = force self.delete_cache_when_done = delete_cache_when_done def initialize(self): from diskcache import Cache self.cache = Cache(self.cache_location or self.uid + "_cache") def filter(self, simulation): return self.force or not self.is_in_cache(simulation.id) def to_cache(self, key, data): self.cache.set(key, data) def from_cache(self, key): return self.cache.get(key) def is_in_cache(self, key): return key in self.cache def destroy(self): if self.cache: self.cache.close() if self.cache and self.delete_cache_when_done and os.path.exists( self.cache.directory): cache_directory = self.cache.directory del self.cache shutil.rmtree(cache_directory) @property def keys(self): return list(self.cache.iterkeys()) if self.cache else None
class CacheManager(object): def __init__(self, cache_path=CACHE_PATH): self.cache = Cache(cache_path) def items(self): return self.cache.iterkeys() def has_key(self, key): return key in self.cache def set(self, key, value, ttl=TTL): return self.cache.set(key=key, value=value, expire=ttl) def get(self, key): return self.cache.get(key=key) def clear_cache(self): for key in self.cache: if key != 'censys_credentials': del self.cache[key] return True def close(self): self.cache.close()
class FileDirCache(MutableMapping): def __init__( self, use_listings_cache=True, listings_expiry_time=None, listings_cache_location=None, **kwargs, ): """ Parameters ---------- use_listings_cache: bool If False, this cache never returns items, but always reports KeyError, and setting items has no effect listings_expiry_time: int or float (optional) Time in seconds that a listing is considered valid. If None, listings do not expire. listings_cache_location: str (optional) Directory path at which the listings cache file is stored. If None, an autogenerated path at the user folder is created. """ import appdirs from diskcache import Cache listings_expiry_time = listings_expiry_time and float(listings_expiry_time) if listings_cache_location: listings_cache_location = Path(listings_cache_location) / str(listings_expiry_time) listings_cache_location.mkdir(exist_ok=True, parents=True) else: listings_cache_location = Path(appdirs.user_cache_dir(appname="fsspec_dircache")) / str( listings_expiry_time ) try: listings_cache_location.mkdir(exist_ok=True, parents=True) except Exception: logger.error(f"folder for dircache could not be created at {listings_cache_location}") self.cache_location = listings_cache_location self._cache = Cache(directory=listings_cache_location) self.use_listings_cache = use_listings_cache self.listings_expiry_time = listings_expiry_time def __getitem__(self, item): """Draw item as fileobject from cache, retry if timeout occurs""" return self._cache.get(key=item, read=True, retry=True) def clear(self): self._cache.clear() def __len__(self): return len(list(self._cache.iterkeys())) def __contains__(self, item): value = self._cache.get(item, retry=True) # None, if expired if value: return True return False def __setitem__(self, key, value): if not self.use_listings_cache: return self._cache.set(key=key, value=value, expire=self.listings_expiry_time, retry=True) def __delitem__(self, key): del self._cache[key] def __iter__(self): return (k for k in self._cache.iterkeys() if k in self) def __reduce__(self): return ( FileDirCache, (self.use_listings_cache, self.listings_expiry_time, self.cache_location), )
class LStockLoader(): def __init__(self, save_root, cache_path='tmp/cache', delay=.0, start_year=2007, mode='a', is_detail=True): self.save_root = save_root if not os.path.exists(self.save_root): os.mkdir(self.save_root) self.cache = Cache(cache_path) self.delay = delay self.start_year = start_year self.mode = mode self.is_detail = is_detail def fetch_codes(self): # codes = get_codes(self.delay) codes = get_codes_sina(self.delay) for code in codes: # if code not in self.cache: self.cache[code] = None logger.info('Append Code: %s' % (code)) logger.info('Total Append %s Codes' % (len(codes))) def fetch_code(self, code): lstockData = LStockData(delay=self.delay, cache=self.cache) lstockData.search_to_h5(code, os.path.join(self.save_root, '%s.h5' % code), self.start_year, self.mode, self.is_detail) lstockData.search_to_h5_k_line( code, os.path.join(self.save_root, '%s.h5' % code), self.start_year, self.mode) def fetch_all(self): lstockData = LStockData(delay=self.delay, cache=self.cache) for code in self.cache.iterkeys(): lstockData.search_to_h5( code, os.path.join(self.save_root, '%s.h5' % code), self.start_year, self.mode, self.is_detail) lstockData.search_to_h5_k_line( code, os.path.join(self.save_root, '%s.h5' % code), self.start_year, self.mode) def fetch_all_future(self, max_workers=10): with LThreadPoolExecutor(max_workers=max_workers) as future: try: if len(self.cache) == 0: logger.error('Stock Codes Empty...') for code in self.cache.iterkeys(): h5path = os.path.join(self.save_root, '%s.h5' % code) if (os.path.exists(h5path)): now = datetime.datetime.now() modify_time = datetime.datetime.fromtimestamp( os.path.getmtime(h5path)) start_time = datetime.datetime(now.year, now.month, now.day, 15, 0) if modify_time > start_time: logger.info('Today pass %s' % code) continue future.submit(self.fetch_code, code) logger.info('Today Over...') except KeyboardInterrupt: raise except: logger.error(traceback.format_exc()) def fetch_all_future_loop(self, max_workers=10): with LThreadPoolExecutor(max_workers=max_workers) as future: while 1: try: is_over_today = True if len(self.cache) == 0: logger.error('Stock Codes Empty...') for code in self.cache.iterkeys(): h5path = os.path.join(self.save_root, '%s.h5' % code) if os.path.exists(h5path): now = datetime.datetime.now() modify_time = datetime.datetime.fromtimestamp( os.path.getmtime(h5path)) start_time = datetime.datetime( now.year, now.month, now.day, 9, 36) end_time = datetime.datetime( now.year, now.month, now.day, 15, 15) # start_yestoday = start_time - datetime.timedelta(days=1) end_yestoday = end_time - datetime.timedelta( days=1) today_zero = datetime.datetime(year=now.year, month=now.month, day=now.day) if now > start_time and now < end_time and np.is_busday( now.strftime('%Y-%m-%d') ): # now.weekday() not in (6, 7): is_over_today = False future.submit(self.fetch_code, code) elif modify_time < end_time and modify_time > start_time and np.is_busday( now.strftime('%Y-%m-%d')): is_over_today = False future.submit(self.fetch_code, code) elif modify_time < end_yestoday and np.is_busday( end_yestoday.strftime('%Y-%m-%d') ): # end_yestoday.weekday() not in (1, 7): is_over_today = False future.submit(self.fetch_code, code) else: logger.info('Today data spider: %s' % code) else: is_over_today = False future.submit(self.fetch_code, code) if is_over_today: now = datetime.datetime.now() sleep_time = 0 if now.hour >= 0 and now < datetime.datetime( now.year, now.month, now.day, 9, 37) and np.is_busday( now.strftime('%Y-%m-%d') ): # now.weekday() not in (6, 7): sleep_time = (datetime.datetime( now.year, now.month, now.day, 9, 37) - now).total_seconds() else: # next_work_day = np.busday_offset(now.strftime('%Y-%m-%d'), 1).astype('M8[ms]').astype('O') if np.is_busday(now.strftime('%Y-%m-%d')): next_work_day = np.busday_offset( now.strftime('%Y-%m-%d'), 1, roll='forward').astype(datetime.datetime) else: next_work_day = np.busday_offset( now.strftime('%Y-%m-%d'), 0, roll='forward').astype(datetime.datetime) sleep_time = (datetime.datetime( next_work_day.year, next_work_day.month, next_work_day.day, 9, 37) - now).total_seconds() logger.info('Today all data spider... Sleep %ss' % sleep_time) time.sleep(sleep_time) logger.info('Start Next...') except KeyboardInterrupt: raise except: logger.error(traceback.format_exc())