Beispiel #1
0
class ElementCache:

    map = None

    def __init__(self, path):
        self.map = Cache(path, size_limit=2**42)

    def add(self, key, element):
        self.map[key] = element

    def get(self, element):
        if element not in self.map:
            return None

        return self.map[element]

    def contains(self, key):
        return key in self.map

    def count_elements(self):
        return len(self.map)

    def update(self, key, element):
        self.map[key] = element

    def index_keys(self, index):
        keys = list(self.map.iterkeys())
        for key in keys:
            value = self.map[key]
            key_index = index.to_index(key)

            if key_index > 0:
                self.map[key_index] = value

            del self.map[key]
Beispiel #2
0
class BaseCacheAnalyzer(BaseAnalyzer):
    def __init__(self, cache_location=None, force=False):
        super().__init__()
        self.cache_location = cache_location
        self.cache = None
        self.force = force

    def initialize(self):
        from diskcache import Cache
        self.cache = Cache(self.cache_location or self.uid + "_cache")

    def filter(self, simulation):
        return self.force or not self.is_in_cache(simulation.id)

    def to_cache(self, key, data):
        self.cache.set(key, data)

    def from_cache(self, key):
        return self.cache.get(key)

    def is_in_cache(self, key):
        return key in self.cache

    def __del__(self):
        if self.cache:
            self.cache.close()

    @property
    def keys(self):
        return list(self.cache.iterkeys()) if self.cache else None
Beispiel #3
0
def get_best_from_cache(cache: Cache, objective: Callable) -> tuple:
    """
    Helper function for locating the best candidate according to some objective callable in a cache

    :param cache: diskcache Cache, as in the driver cache
    :param objective: callable function accepting a simulation result, and returning a scalar float objective
    :return: tuple containing the best candidate and result
    """
    # init
    best_result = np.inf
    best_candidate = None

    # for each candidate
    for candidate in cache.iterkeys():
        if candidate == 'meta':
            continue

        # try to compute the objective
        try:
            # user defined objective value
            result = objective(cache[candidate])

            # update best candidate and results
            if result < best_result:
                best_result = result
                best_candidate = candidate

        # on error, go to next result
        except:
            continue

    return best_candidate, best_result
Beispiel #4
0
class FactBaseStats:
    def __init__(self, generator):
        self._cache = Cache(os.path.join(os.path.dirname(__file__), 'stats',
                                         self.__class__.__name__, generator),
                            size_limit=int(2e9))
        self._dataset = None
        self._table = None

    def init(self, dataset, table):
        self._dataset = dataset
        self._table = table
        self._cache.set((self._dataset, self._table), {
            'exact': 0,
            'strict': 0,
            'loose': 0,
            'none': 0
        })

    def _incr(self, field):
        entry = self._cache.get((self._dataset, self._table))
        entry[field] += 1
        self._cache.set((self._dataset, self._table), entry)

    def incr_exact(self):
        self._incr('exact')

    def incr_strict(self):
        self._incr('strict')

    def incr_loose(self):
        self._incr('loose')

    def incr_empty(self):
        self._incr('none')

    def get_dataset_stats(self, dataset):
        stats_keys = ['exact', 'strict', 'loose', 'none']
        stats = np.zeros(4, dtype=int)
        tables = 0
        for key in self._cache.iterkeys():
            if key[0] == dataset:
                tables += 1
                entry = self._cache.get(key)
                stats[0] += entry['exact']
                stats[1] += entry['strict']
                stats[2] += entry['loose']
                stats[3] += entry['none']
        cells = sum(stats)
        print('Dataset:', dataset)
        print('Tables:', tables)
        print('Cells:', cells)
        print('Stats:',
              dict(zip(stats_keys, zip(stats, np.round(stats / cells, 4)))))
class BaseCacheAnalyzer(BaseAnalyzer):
    def __init__(self,
                 cache_location=None,
                 force=False,
                 delete_cache_when_done=False,
                 **kwargs):
        super().__init__(**kwargs)
        self.cache_location = cache_location
        self.cache = None
        self.force = force
        self.delete_cache_when_done = delete_cache_when_done

    def initialize(self):
        from diskcache import Cache
        self.cache = Cache(self.cache_location or self.uid + "_cache")

    def filter(self, simulation):
        return self.force or not self.is_in_cache(simulation.id)

    def to_cache(self, key, data):
        self.cache.set(key, data)

    def from_cache(self, key):
        return self.cache.get(key)

    def is_in_cache(self, key):
        return key in self.cache

    def destroy(self):
        if self.cache:
            self.cache.close()

        if self.cache and self.delete_cache_when_done and os.path.exists(
                self.cache.directory):
            cache_directory = self.cache.directory
            del self.cache
            shutil.rmtree(cache_directory)

    @property
    def keys(self):
        return list(self.cache.iterkeys()) if self.cache else None
Beispiel #6
0
class CacheManager(object):
    def __init__(self, cache_path=CACHE_PATH):
        self.cache = Cache(cache_path)

    def items(self):
        return self.cache.iterkeys()

    def has_key(self, key):
        return key in self.cache

    def set(self, key, value, ttl=TTL):
        return self.cache.set(key=key, value=value, expire=ttl)

    def get(self, key):
        return self.cache.get(key=key)

    def clear_cache(self):
        for key in self.cache:
            if key != 'censys_credentials':
                del self.cache[key]
        return True

    def close(self):
        self.cache.close()
class FileDirCache(MutableMapping):
    def __init__(
        self,
        use_listings_cache=True,
        listings_expiry_time=None,
        listings_cache_location=None,
        **kwargs,
    ):
        """

        Parameters
        ----------
        use_listings_cache: bool
            If False, this cache never returns items, but always reports KeyError,
            and setting items has no effect
        listings_expiry_time: int or float (optional)
            Time in seconds that a listing is considered valid. If None,
            listings do not expire.
        listings_cache_location: str (optional)
            Directory path at which the listings cache file is stored. If None,
            an autogenerated path at the user folder is created.

        """
        import appdirs
        from diskcache import Cache

        listings_expiry_time = listings_expiry_time and float(listings_expiry_time)

        if listings_cache_location:
            listings_cache_location = Path(listings_cache_location) / str(listings_expiry_time)
            listings_cache_location.mkdir(exist_ok=True, parents=True)
        else:
            listings_cache_location = Path(appdirs.user_cache_dir(appname="fsspec_dircache")) / str(
                listings_expiry_time
            )

        try:
            listings_cache_location.mkdir(exist_ok=True, parents=True)
        except Exception:
            logger.error(f"folder for dircache could not be created at {listings_cache_location}")

        self.cache_location = listings_cache_location

        self._cache = Cache(directory=listings_cache_location)
        self.use_listings_cache = use_listings_cache
        self.listings_expiry_time = listings_expiry_time

    def __getitem__(self, item):
        """Draw item as fileobject from cache, retry if timeout occurs"""
        return self._cache.get(key=item, read=True, retry=True)

    def clear(self):
        self._cache.clear()

    def __len__(self):
        return len(list(self._cache.iterkeys()))

    def __contains__(self, item):
        value = self._cache.get(item, retry=True)  # None, if expired
        if value:
            return True
        return False

    def __setitem__(self, key, value):
        if not self.use_listings_cache:
            return
        self._cache.set(key=key, value=value, expire=self.listings_expiry_time, retry=True)

    def __delitem__(self, key):
        del self._cache[key]

    def __iter__(self):
        return (k for k in self._cache.iterkeys() if k in self)

    def __reduce__(self):
        return (
            FileDirCache,
            (self.use_listings_cache, self.listings_expiry_time, self.cache_location),
        )
Beispiel #8
0
class LStockLoader():
    def __init__(self,
                 save_root,
                 cache_path='tmp/cache',
                 delay=.0,
                 start_year=2007,
                 mode='a',
                 is_detail=True):
        self.save_root = save_root
        if not os.path.exists(self.save_root):
            os.mkdir(self.save_root)

        self.cache = Cache(cache_path)

        self.delay = delay
        self.start_year = start_year
        self.mode = mode
        self.is_detail = is_detail

    def fetch_codes(self):
        # codes = get_codes(self.delay)
        codes = get_codes_sina(self.delay)
        for code in codes:
            # if code not in self.cache:
            self.cache[code] = None
            logger.info('Append Code: %s' % (code))
        logger.info('Total Append %s Codes' % (len(codes)))

    def fetch_code(self, code):
        lstockData = LStockData(delay=self.delay, cache=self.cache)
        lstockData.search_to_h5(code,
                                os.path.join(self.save_root, '%s.h5' % code),
                                self.start_year, self.mode, self.is_detail)
        lstockData.search_to_h5_k_line(
            code, os.path.join(self.save_root, '%s.h5' % code),
            self.start_year, self.mode)

    def fetch_all(self):
        lstockData = LStockData(delay=self.delay, cache=self.cache)
        for code in self.cache.iterkeys():
            lstockData.search_to_h5(
                code, os.path.join(self.save_root, '%s.h5' % code),
                self.start_year, self.mode, self.is_detail)
            lstockData.search_to_h5_k_line(
                code, os.path.join(self.save_root, '%s.h5' % code),
                self.start_year, self.mode)

    def fetch_all_future(self, max_workers=10):
        with LThreadPoolExecutor(max_workers=max_workers) as future:
            try:
                if len(self.cache) == 0:
                    logger.error('Stock Codes Empty...')

                for code in self.cache.iterkeys():
                    h5path = os.path.join(self.save_root, '%s.h5' % code)
                    if (os.path.exists(h5path)):
                        now = datetime.datetime.now()
                        modify_time = datetime.datetime.fromtimestamp(
                            os.path.getmtime(h5path))

                        start_time = datetime.datetime(now.year, now.month,
                                                       now.day, 15, 0)
                        if modify_time > start_time:
                            logger.info('Today pass %s' % code)
                            continue

                    future.submit(self.fetch_code, code)

                logger.info('Today Over...')
            except KeyboardInterrupt:
                raise
            except:
                logger.error(traceback.format_exc())

    def fetch_all_future_loop(self, max_workers=10):
        with LThreadPoolExecutor(max_workers=max_workers) as future:
            while 1:
                try:
                    is_over_today = True

                    if len(self.cache) == 0:
                        logger.error('Stock Codes Empty...')

                    for code in self.cache.iterkeys():
                        h5path = os.path.join(self.save_root, '%s.h5' % code)

                        if os.path.exists(h5path):

                            now = datetime.datetime.now()
                            modify_time = datetime.datetime.fromtimestamp(
                                os.path.getmtime(h5path))

                            start_time = datetime.datetime(
                                now.year, now.month, now.day, 9, 36)
                            end_time = datetime.datetime(
                                now.year, now.month, now.day, 15, 15)
                            # start_yestoday = start_time - datetime.timedelta(days=1)
                            end_yestoday = end_time - datetime.timedelta(
                                days=1)
                            today_zero = datetime.datetime(year=now.year,
                                                           month=now.month,
                                                           day=now.day)

                            if now > start_time and now < end_time and np.is_busday(
                                    now.strftime('%Y-%m-%d')
                            ):  # now.weekday() not in (6, 7):
                                is_over_today = False
                                future.submit(self.fetch_code, code)
                            elif modify_time < end_time and modify_time > start_time and np.is_busday(
                                    now.strftime('%Y-%m-%d')):
                                is_over_today = False
                                future.submit(self.fetch_code, code)
                            elif modify_time < end_yestoday and np.is_busday(
                                    end_yestoday.strftime('%Y-%m-%d')
                            ):  # end_yestoday.weekday() not in (1, 7):
                                is_over_today = False
                                future.submit(self.fetch_code, code)
                            else:
                                logger.info('Today data spider: %s' % code)

                        else:
                            is_over_today = False
                            future.submit(self.fetch_code, code)

                    if is_over_today:
                        now = datetime.datetime.now()

                        sleep_time = 0
                        if now.hour >= 0 and now < datetime.datetime(
                                now.year, now.month, now.day,
                                9, 37) and np.is_busday(
                                    now.strftime('%Y-%m-%d')
                                ):  # now.weekday() not in (6, 7):
                            sleep_time = (datetime.datetime(
                                now.year, now.month, now.day, 9, 37) -
                                          now).total_seconds()
                        else:
                            # next_work_day = np.busday_offset(now.strftime('%Y-%m-%d'), 1).astype('M8[ms]').astype('O')
                            if np.is_busday(now.strftime('%Y-%m-%d')):
                                next_work_day = np.busday_offset(
                                    now.strftime('%Y-%m-%d'),
                                    1,
                                    roll='forward').astype(datetime.datetime)
                            else:
                                next_work_day = np.busday_offset(
                                    now.strftime('%Y-%m-%d'),
                                    0,
                                    roll='forward').astype(datetime.datetime)
                            sleep_time = (datetime.datetime(
                                next_work_day.year, next_work_day.month,
                                next_work_day.day, 9, 37) -
                                          now).total_seconds()

                        logger.info('Today all data spider... Sleep %ss' %
                                    sleep_time)
                        time.sleep(sleep_time)

                    logger.info('Start Next...')
                except KeyboardInterrupt:
                    raise
                except:
                    logger.error(traceback.format_exc())