def _prepare_calender_cache(self): """preload the calendar for cache""" # This code used the copy-on-write feature of Linux to avoid calculating the calendar multiple times in the subprocess # This code may accelerate, but may be not useful on Windows and Mac Os Cal.calendar(freq="1min") get_calendar_day(freq="1min")
def get_data_cal_range_limit(self, rtype: str = "full", raise_error: bool = False) -> Tuple[int, int]: """ get the range limit based on data calendar NOTE: it is **total** range limit instead of a single step The following assumptions are made 1) The frequency of the exchange in common_infra is the same as the data calendar 2) Users want the index mod by **day** (i.e. 240 min) Parameters ---------- rtype: str - "full": return the full limitation of the deicsion in the day - "step": return the limitation of current step raise_error: bool True: raise error if no trade_range is set False: return full trade calendar. It is useful in following cases - users want to follow the order specific trading time range when decision level trade range is not available. Raising NotImplementedError to indicates that range limit is not available Returns ------- Tuple[int, int]: the range limit in data calendar Raises ------ NotImplementedError: If the following criteria meet 1) the decision can't provide a unified start and end 2) raise_error is True """ # potential performance issue day_start = pd.Timestamp(self.start_time.date()) day_end = epsilon_change(day_start + pd.Timedelta(days=1)) freq = self.strategy.trade_exchange.freq _, _, day_start_idx, day_end_idx = Cal.locate_index(day_start, day_end, freq=freq) if self.trade_range is None: if raise_error: raise NotImplementedError(f"There is no trade_range in this case") else: return 0, day_end_idx - day_start_idx else: if rtype == "full": val_start, val_end = self.trade_range.clip_time_range(day_start, day_end) elif rtype == "step": val_start, val_end = self.trade_range.clip_time_range(self.start_time, self.end_time) else: raise ValueError(f"This type of input {rtype} is not supported") _, _, start_idx, end_index = Cal.locate_index(val_start, val_end, freq=freq) return start_idx - day_start_idx, end_index - day_start_idx
def get_calendar_day(freq="1min", future=False): """ Load High-Freq Calendar Date Using Memcache. !!!NOTE: Loading the calendar is quite slow. So loading calendar before start multiprocessing will make it faster. Parameters ---------- freq : str frequency of read calendar file. future : bool whether including future trading day. Returns ------- _calendar: array of date. """ flag = f"{freq}_future_{future}_day" if flag in H["c"]: _calendar = H["c"][flag] else: _calendar = np.array( list(map(lambda x: x.date(), Cal.load_calendar(freq, future)))) H["c"][flag] = _calendar return _calendar
def get_calendar_day(freq="day", future=False): flag = f"{freq}_future_{future}_day" if flag in H["c"]: _calendar = H["c"][flag] else: _calendar = np.array( list(map(lambda x: x.date(), Cal.load_calendar(freq, future)))) H["c"][flag] = _calendar return _calendar
def get_calendar_minute(freq="day", future=False): """Load High-Freq Calendar Minute Using Memcache""" flag = f"{freq}_future_{future}_day" if flag in H["c"]: _calendar = H["c"][flag] else: _calendar = np.array( list(map(lambda x: x.minute // 30, Cal.load_calendar(freq, future)))) H["c"][flag] = _calendar return _calendar
def setUpClass(cls, enable_1d_type="simple", enable_1min=False) -> None: # use default data super().setUpClass(enable_1d_type, enable_1min) nameDFilter = NameDFilter(name_rule_re="SH600110") instruments = D.instruments("csi300", filter_pipe=[nameDFilter]) start_time = "2005-01-04" end_time = "2005-12-31" freq = "day" instruments_d = DatasetD.get_instruments_d(instruments, freq) cls.instruments_d = instruments_d cal = Cal.calendar(start_time, end_time, freq) cls.cal = cal cls.start_time = cal[0] cls.end_time = cal[-1] cls.inst = list(instruments_d.keys())[0] cls.spans = list(instruments_d.values())[0]
def get_calendar_day(freq="day", future=False): """Load High-Freq Calendar Date Using Memcache. Parameters ---------- freq : str frequency of read calendar file. future : bool whether including future trading day. Returns ------- _calendar: array of date. """ flag = f"{freq}_future_{future}_day" if flag in H["c"]: _calendar = H["c"][flag] else: _calendar = np.array( list(map(lambda x: x.date(), Cal.load_calendar(freq, future)))) H["c"][flag] = _calendar return _calendar
}, }, } """initialize qlib""" # use yahoo_cn_1min data QLIB_INIT_CONFIG = {**HIGH_FREQ_CONFIG, **SPEC_CONF} print(QLIB_INIT_CONFIG) provider_uri = QLIB_INIT_CONFIG.get("provider_uri") if not exists_qlib_data(provider_uri): print(f"Qlib data is not found in {provider_uri}") GetData().qlib_data(target_dir=provider_uri, interval="1min", region=REG_CN) qlib.init(**QLIB_INIT_CONFIG) Cal.calendar(freq="1min") get_calendar_day(freq="1min") # get data dataset = init_instance_by_config(task["dataset"]) xtrain, xtest = dataset.prepare(["train", "test"]) print(xtrain, xtest) xtrain.to_csv("xtrain.csv") dataset_backtest = init_instance_by_config(task["dataset_backtest"]) backtest_train, backtest_test = dataset_backtest.prepare(["train", "test"]) print(backtest_train, backtest_test) # model initialization model = init_instance_by_config(task["model"]) dataset = init_instance_by_config(task["dataset"])