Example #1
0
    def __init__(
        self,
        strategies: Union[OnlineStrategy, List[OnlineStrategy]],
        trainer: Trainer = None,
        begin_time: Union[str, pd.Timestamp] = None,
        freq="day",
    ):
        """
        Init OnlineManager.
        One OnlineManager must have at least one OnlineStrategy.

        Args:
            strategies (Union[OnlineStrategy, List[OnlineStrategy]]): an instance of OnlineStrategy or a list of OnlineStrategy
            begin_time (Union[str,pd.Timestamp], optional): the OnlineManager will begin at this time. Defaults to None for using the latest date.
            trainer (Trainer): the trainer to train task. None for using TrainerR.
            freq (str, optional): data frequency. Defaults to "day".
        """
        self.logger = get_module_logger(self.__class__.__name__)
        if not isinstance(strategies, list):
            strategies = [strategies]
        self.strategies = strategies
        self.freq = freq
        if begin_time is None:
            begin_time = D.calendar(freq=self.freq).max()
        self.begin_time = pd.Timestamp(begin_time)
        self.cur_time = self.begin_time
        # OnlineManager will recorder the history of online models, which is a dict like {pd.Timestamp, {strategy, [online_models]}}.
        self.history = {}
        if trainer is None:
            trainer = TrainerR()
        self.trainer = trainer
        self.signals = None
        self.status = self.STATUS_NORMAL
Example #2
0
    def delay_prepare(self, model_kwargs={}, signal_kwargs={}):
        """
        Prepare all models and signals if something is waiting for preparation.

        Args:
            model_kwargs: the params for `end_train`
            signal_kwargs: the params for `prepare_signals`
        """
        last_models = {}
        signals_time = D.calendar()[0]
        need_prepare = False
        for cur_time, strategy_models in self.history.items():
            self.cur_time = cur_time

            for strategy, models in strategy_models.items():
                # only new online models need to prepare
                if last_models.setdefault(strategy, set()) != set(models):
                    models = self.trainer.end_train(
                        models,
                        experiment_name=strategy.name_id,
                        **model_kwargs)
                    strategy.tool.reset_online_tag(models)
                    need_prepare = True
                last_models[strategy] = set(models)

            if need_prepare:
                # NOTE: Assumption: the predictions of online models need less than next cur_time, or this method will work in a wrong way.
                self.prepare_signals(**signal_kwargs)
                if signals_time > cur_time:
                    self.logger.warn(
                        f"The signals have already parpred to {signals_time} by last preparation, but current time is only {cur_time}. This may be because the online models predict more than they should, which can cause signals to be contaminated by the offline models."
                    )
                need_prepare = False
                signals_time = self.signals.index.get_level_values(
                    "datetime").max()
Example #3
0
    def prepare_tasks(self, cur_time) -> List[dict]:
        """
        Prepare new tasks based on cur_time (None for the latest).

        You can find the last online models by OnlineToolR.online_models.

        Returns:
            List[dict]: a list of new tasks.
        """
        latest_records, max_test = self._list_latest(self.tool.online_models())
        if max_test is None:
            self.logger.warn(f"No latest online recorders, no new tasks.")
            return []
        calendar_latest = D.calendar(end_time=cur_time)[-1] if cur_time is None else cur_time
        self.logger.info(
            f"The interval between current time {calendar_latest} and last rolling test begin time {max_test[0]} is {self.ta.cal_interval(calendar_latest, max_test[0])}, the rolling step is {self.rg.step}"
        )
        if self.ta.cal_interval(calendar_latest, max_test[0]) >= self.rg.step:
            old_tasks = []
            tasks_tmp = []
            for rec in latest_records:
                task = rec.load_object("task")
                old_tasks.append(deepcopy(task))
                test_begin = task["dataset"]["kwargs"]["segments"]["test"][0]
                # modify the test segment to generate new tasks
                task["dataset"]["kwargs"]["segments"]["test"] = (test_begin, calendar_latest)
                tasks_tmp.append(task)
            new_tasks_tmp = task_generator(tasks_tmp, self.rg)
            new_tasks = [task for task in new_tasks_tmp if task not in old_tasks]
            return new_tasks
        return []
Example #4
0
    def simulate(self,
                 end_time,
                 frequency="day",
                 task_kwargs={},
                 model_kwargs={},
                 signal_kwargs={}) -> Union[pd.Series, pd.DataFrame]:
        """
        Starting from the current time, this method will simulate every routine in OnlineManager until the end time.

        Considering the parallel training, the models and signals can be prepared after all routine simulating.

        The delay training way can be ``DelayTrainer`` and the delay preparing signals way can be ``delay_prepare``.

        Args:
            end_time: the time the simulation will end
            frequency: the calendar frequency
            task_kwargs (dict): the params for `prepare_tasks`
            model_kwargs (dict): the params for `prepare_online_models`
            signal_kwargs (dict): the params for `prepare_signals`

        Returns:
            Union[pd.Series, pd.DataFrame]: pd.Series for only one signals every datetime.
            pd.DataFrame for multiple signals, for example, buy and sell operations use different trading signals.
        """
        self.status = self.STATUS_SIMULATING
        cal = D.calendar(start_time=self.cur_time,
                         end_time=end_time,
                         freq=frequency)
        self.first_train()

        simulate_level = self.SIM_LOG_LEVEL
        set_global_logger_level(simulate_level)
        logging.addLevelName(simulate_level, self.SIM_LOG_NAME)

        for cur_time in cal:
            self.logger.log(level=simulate_level,
                            msg=f"Simulating at {str(cur_time)}......")
            self.routine(
                cur_time,
                task_kwargs=task_kwargs,
                model_kwargs=model_kwargs,
                signal_kwargs=signal_kwargs,
            )
        # delay prepare the models and signals
        if self.trainer.is_delay():
            self.delay_prepare(model_kwargs=model_kwargs,
                               signal_kwargs=signal_kwargs)

        # FIXME: get logging level firstly and restore it here
        set_global_logger_level(logging.DEBUG)
        self.logger.info(f"Finished preparing signals")
        self.status = self.STATUS_NORMAL
        return self.get_signals()
Example #5
0
    def routine(
        self,
        cur_time: Union[str, pd.Timestamp] = None,
        task_kwargs: dict = {},
        model_kwargs: dict = {},
        signal_kwargs: dict = {},
    ):
        """
        Typical update process for every strategy and record the online history.

        The typical update process after a routine, such as day by day or month by month.
        The process is: Update predictions -> Prepare tasks -> Prepare online models -> Prepare signals.

        If using DelayTrainer, it can finish training all together after every strategy's prepare_tasks.

        Args:
            cur_time (Union[str,pd.Timestamp], optional): run routine method in this time. Defaults to None.
            task_kwargs (dict): the params for `prepare_tasks`
            model_kwargs (dict): the params for `prepare_online_models`
            signal_kwargs (dict): the params for `prepare_signals`
        """
        if cur_time is None:
            cur_time = D.calendar(freq=self.freq).max()
        self.cur_time = pd.Timestamp(cur_time)  # None for latest date

        models_list = []
        for strategy in self.strategies:
            self.logger.info(
                f"Strategy `{strategy.name_id}` begins routine...")

            tasks = strategy.prepare_tasks(self.cur_time, **task_kwargs)
            models = self.trainer.train(tasks,
                                        experiment_name=strategy.name_id)
            models_list.append(models)
            self.logger.info(f"Finished training {len(models)} models.")
            online_models = strategy.prepare_online_models(
                models, **model_kwargs)
            self.history.setdefault(self.cur_time,
                                    {})[strategy] = online_models

            # The online model may changes in the above processes
            # So updating the predictions of online models should be the last step
            if self.status == self.STATUS_NORMAL:
                strategy.tool.update_online_pred()

        if not self.status == self.STATUS_SIMULATING or not self.trainer.is_delay(
        ):
            for strategy, models in zip(self.strategies, models_list):
                models = self.trainer.end_train(
                    models, experiment_name=strategy.name_id)
            self.prepare_signals(**signal_kwargs)