def __init__( self, strategies: Union[OnlineStrategy, List[OnlineStrategy]], trainer: Trainer = None, begin_time: Union[str, pd.Timestamp] = None, freq="day", ): """ Init OnlineManager. One OnlineManager must have at least one OnlineStrategy. Args: strategies (Union[OnlineStrategy, List[OnlineStrategy]]): an instance of OnlineStrategy or a list of OnlineStrategy begin_time (Union[str,pd.Timestamp], optional): the OnlineManager will begin at this time. Defaults to None for using the latest date. trainer (Trainer): the trainer to train task. None for using TrainerR. freq (str, optional): data frequency. Defaults to "day". """ self.logger = get_module_logger(self.__class__.__name__) if not isinstance(strategies, list): strategies = [strategies] self.strategies = strategies self.freq = freq if begin_time is None: begin_time = D.calendar(freq=self.freq).max() self.begin_time = pd.Timestamp(begin_time) self.cur_time = self.begin_time # OnlineManager will recorder the history of online models, which is a dict like {pd.Timestamp, {strategy, [online_models]}}. self.history = {} if trainer is None: trainer = TrainerR() self.trainer = trainer self.signals = None self.status = self.STATUS_NORMAL
def delay_prepare(self, model_kwargs={}, signal_kwargs={}): """ Prepare all models and signals if something is waiting for preparation. Args: model_kwargs: the params for `end_train` signal_kwargs: the params for `prepare_signals` """ last_models = {} signals_time = D.calendar()[0] need_prepare = False for cur_time, strategy_models in self.history.items(): self.cur_time = cur_time for strategy, models in strategy_models.items(): # only new online models need to prepare if last_models.setdefault(strategy, set()) != set(models): models = self.trainer.end_train( models, experiment_name=strategy.name_id, **model_kwargs) strategy.tool.reset_online_tag(models) need_prepare = True last_models[strategy] = set(models) if need_prepare: # NOTE: Assumption: the predictions of online models need less than next cur_time, or this method will work in a wrong way. self.prepare_signals(**signal_kwargs) if signals_time > cur_time: self.logger.warn( f"The signals have already parpred to {signals_time} by last preparation, but current time is only {cur_time}. This may be because the online models predict more than they should, which can cause signals to be contaminated by the offline models." ) need_prepare = False signals_time = self.signals.index.get_level_values( "datetime").max()
def prepare_tasks(self, cur_time) -> List[dict]: """ Prepare new tasks based on cur_time (None for the latest). You can find the last online models by OnlineToolR.online_models. Returns: List[dict]: a list of new tasks. """ latest_records, max_test = self._list_latest(self.tool.online_models()) if max_test is None: self.logger.warn(f"No latest online recorders, no new tasks.") return [] calendar_latest = D.calendar(end_time=cur_time)[-1] if cur_time is None else cur_time self.logger.info( f"The interval between current time {calendar_latest} and last rolling test begin time {max_test[0]} is {self.ta.cal_interval(calendar_latest, max_test[0])}, the rolling step is {self.rg.step}" ) if self.ta.cal_interval(calendar_latest, max_test[0]) >= self.rg.step: old_tasks = [] tasks_tmp = [] for rec in latest_records: task = rec.load_object("task") old_tasks.append(deepcopy(task)) test_begin = task["dataset"]["kwargs"]["segments"]["test"][0] # modify the test segment to generate new tasks task["dataset"]["kwargs"]["segments"]["test"] = (test_begin, calendar_latest) tasks_tmp.append(task) new_tasks_tmp = task_generator(tasks_tmp, self.rg) new_tasks = [task for task in new_tasks_tmp if task not in old_tasks] return new_tasks return []
def simulate(self, end_time, frequency="day", task_kwargs={}, model_kwargs={}, signal_kwargs={}) -> Union[pd.Series, pd.DataFrame]: """ Starting from the current time, this method will simulate every routine in OnlineManager until the end time. Considering the parallel training, the models and signals can be prepared after all routine simulating. The delay training way can be ``DelayTrainer`` and the delay preparing signals way can be ``delay_prepare``. Args: end_time: the time the simulation will end frequency: the calendar frequency task_kwargs (dict): the params for `prepare_tasks` model_kwargs (dict): the params for `prepare_online_models` signal_kwargs (dict): the params for `prepare_signals` Returns: Union[pd.Series, pd.DataFrame]: pd.Series for only one signals every datetime. pd.DataFrame for multiple signals, for example, buy and sell operations use different trading signals. """ self.status = self.STATUS_SIMULATING cal = D.calendar(start_time=self.cur_time, end_time=end_time, freq=frequency) self.first_train() simulate_level = self.SIM_LOG_LEVEL set_global_logger_level(simulate_level) logging.addLevelName(simulate_level, self.SIM_LOG_NAME) for cur_time in cal: self.logger.log(level=simulate_level, msg=f"Simulating at {str(cur_time)}......") self.routine( cur_time, task_kwargs=task_kwargs, model_kwargs=model_kwargs, signal_kwargs=signal_kwargs, ) # delay prepare the models and signals if self.trainer.is_delay(): self.delay_prepare(model_kwargs=model_kwargs, signal_kwargs=signal_kwargs) # FIXME: get logging level firstly and restore it here set_global_logger_level(logging.DEBUG) self.logger.info(f"Finished preparing signals") self.status = self.STATUS_NORMAL return self.get_signals()
def routine( self, cur_time: Union[str, pd.Timestamp] = None, task_kwargs: dict = {}, model_kwargs: dict = {}, signal_kwargs: dict = {}, ): """ Typical update process for every strategy and record the online history. The typical update process after a routine, such as day by day or month by month. The process is: Update predictions -> Prepare tasks -> Prepare online models -> Prepare signals. If using DelayTrainer, it can finish training all together after every strategy's prepare_tasks. Args: cur_time (Union[str,pd.Timestamp], optional): run routine method in this time. Defaults to None. task_kwargs (dict): the params for `prepare_tasks` model_kwargs (dict): the params for `prepare_online_models` signal_kwargs (dict): the params for `prepare_signals` """ if cur_time is None: cur_time = D.calendar(freq=self.freq).max() self.cur_time = pd.Timestamp(cur_time) # None for latest date models_list = [] for strategy in self.strategies: self.logger.info( f"Strategy `{strategy.name_id}` begins routine...") tasks = strategy.prepare_tasks(self.cur_time, **task_kwargs) models = self.trainer.train(tasks, experiment_name=strategy.name_id) models_list.append(models) self.logger.info(f"Finished training {len(models)} models.") online_models = strategy.prepare_online_models( models, **model_kwargs) self.history.setdefault(self.cur_time, {})[strategy] = online_models # The online model may changes in the above processes # So updating the predictions of online models should be the last step if self.status == self.STATUS_NORMAL: strategy.tool.update_online_pred() if not self.status == self.STATUS_SIMULATING or not self.trainer.is_delay( ): for strategy, models in zip(self.strategies, models_list): models = self.trainer.end_train( models, experiment_name=strategy.name_id) self.prepare_signals(**signal_kwargs)