예제 #1
0
 def data(self) -> List[CalVT]:
     self.check()
     # If cache is enabled, then return cache directly
     if self.enable_read_cache:
         key = "orig_file" + str(self.uri)
         if key not in H["c"]:
             H["c"][key] = self._read_calendar()
         _calendar = H["c"][key]
     else:
         _calendar = self._read_calendar()
     if Freq(self._freq_file) != Freq(self.freq):
         _calendar = resam_calendar(
             np.array(list(map(pd.Timestamp, _calendar))), self._freq_file,
             self.freq, self.region)
     return _calendar
예제 #2
0
    def _freq_file(self) -> str:
        """the freq to read from file"""
        if not hasattr(self, "_freq_file_cache"):
            freq = Freq(self.freq)
            if freq not in self.support_freq:
                # NOTE: uri
                #   1. If `uri` does not exist
                #       - Get the `min_uri` of the closest `freq` under the same "directory" as the `uri`
                #       - Read data from `min_uri` and resample to `freq`

                freq = Freq.get_recent_freq(freq, self.support_freq)
                if freq is None:
                    raise ValueError(f"can't find a freq from {self.support_freq} that can resample to {self.freq}!")
            self._freq_file_cache = freq
        return self._freq_file_cache
예제 #3
0
    def data(self) -> List[CalVT]:
        # NOTE: uri
        #   1. If `uri` does not exist
        #       - Get the `min_uri` of the closest `freq` under the same "directory" as the `uri`
        #       - Read data from `min_uri` and resample to `freq`
        try:
            self.check()
            _calendar = self._read_calendar()
        except ValueError:
            freq_list = self._get_storage_freq()
            _freq = Freq.get_recent_freq(self.freq, freq_list)
            if _freq is None:
                raise ValueError(
                    f"can't find a freq from {freq_list} that can resample to {self.freq}!"
                )
            self.file_name = f"{_freq}_future.txt" if self.future else f"{_freq}.txt".lower(
            )
            # The cache is useful for the following cases
            # - multiple frequencies are sampled from the same calendar
            cache_key = self.uri
            if cache_key not in H["c"]:
                H["c"][cache_key] = self._read_calendar()
            _calendar = H["c"][cache_key]
            _calendar = resam_calendar(
                np.array(list(map(pd.Timestamp, _calendar))), _freq, self.freq)

        return _calendar
예제 #4
0
 def support_freq(self) -> List[str]:
     _v = "_support_freq"
     if hasattr(self, _v):
         return getattr(self, _v)
     if len(self.provider_uri) == 1 and C.DEFAULT_FREQ in self.provider_uri:
         freq_l = filter(
             lambda _freq: not _freq.endswith("_future"),
             map(lambda x: x.stem, self.dpm.get_data_uri(C.DEFAULT_FREQ).joinpath("calendars").glob("*.txt")),
         )
     else:
         freq_l = self.provider_uri.keys()
     freq_l = [Freq(freq) for freq in freq_l]
     setattr(self, _v, freq_l)
     return freq_l
예제 #5
0
    def set_params(self, tdx_files, data_dir, freq):
        if self.limit_nums is None:
            self.csv_files = tdx_files
        else:
            self.csv_files = tdx_files[:int(self.limit_nums)]

        self.freq = str(Freq(freq))
        self.calendar_format = self.DAILY_FORMAT if freq == "day" else self.HIGH_FREQ_FORMAT

        self._calendars_list = []

        self._calendars_dir = data_dir.joinpath(self.CALENDARS_DIR_NAME)
        self._features_dir = data_dir.joinpath(self.FEATURES_DIR_NAME)
        self._instruments_dir = data_dir.joinpath(self.INSTRUMENTS_DIR_NAME)
예제 #6
0
    def uri(self) -> Path:
        freq = self.freq
        if freq not in self.support_freq:
            # NOTE: uri
            #   1. If `uri` does not exist
            #       - Get the `min_uri` of the closest `freq` under the same "directory" as the `uri`
            #       - Read data from `min_uri` and resample to `freq`

            freq = Freq.get_recent_freq(freq, self.support_freq)
            if freq is None:
                raise ValueError(
                    f"can't find a freq from {self.support_freq} that can resample to {self.freq}!"
                )
            self.resample_freq = freq
        return self.dpm.get_data_uri(self.use_freq).joinpath(
            f"{self.storage_name}s", self.file_name)