コード例 #1
0
ファイル: rolling_benchmark.py プロジェクト: yutiansut/qlib
 def update_rolling_rec(self):
     """
     Evaluate the combined rolling results
     """
     for rid, rec in R.list_recorders(
             experiment_name=self.COMB_EXP).items():
         for rt_cls in SigAnaRecord, PortAnaRecord:
             rt = rt_cls(recorder=rec, skip_existing=True)
             rt.generate()
     print(
         f"Your evaluation results can be found in the experiment named `{self.COMB_EXP}`."
     )
コード例 #2
0
    def setup(self, trainer=TrainerR, trainer_kwargs={}):
        """
        after running this function `self.data_ic_df` will become set.
        Each col represents a data.
        Each row represents the Timestamp of performance of that data.
        For example,

        .. code-block:: python

                       2021-06-21 2021-06-04 2021-05-21 2021-05-07 2021-04-20 2021-04-06 2021-03-22 2021-03-08  ...
                       2021-07-02 2021-06-18 2021-06-03 2021-05-20 2021-05-06 2021-04-19 2021-04-02 2021-03-19  ...
            datetime                                                                                            ...
            2018-01-02   0.079782   0.115975   0.070866   0.028849  -0.081170   0.140380   0.063864   0.110987  ...
            2018-01-03   0.123386   0.107789   0.071037   0.045278  -0.060782   0.167446   0.089779   0.124476  ...
            2018-01-04   0.140775   0.097206   0.063702   0.042415  -0.078164   0.173218   0.098914   0.114389  ...
            2018-01-05   0.030320  -0.037209  -0.044536  -0.047267  -0.081888   0.045648   0.059947   0.047652  ...
            2018-01-08   0.107201   0.009219  -0.015995  -0.036594  -0.086633   0.108965   0.122164   0.108508  ...
            ...               ...        ...        ...        ...        ...        ...        ...        ...  ...

        """

        # 1) prepare the prediction of proxy models
        perf_task_tpl = deepcopy(
            self.task_tpl
        )  # this task is supposed to contains no complicated objects

        trainer = auto_filter_kwargs(trainer)(experiment_name=self.exp_name,
                                              **trainer_kwargs)
        # NOTE:
        # The handler is initialized for only once.
        if not trainer.has_worker():
            self.dh = init_task_handler(perf_task_tpl)
        else:
            self.dh = init_instance_by_config(
                perf_task_tpl["dataset"]["kwargs"]["handler"])

        seg = perf_task_tpl["dataset"]["kwargs"]["segments"]

        # We want to split the training time period into small segments.
        perf_task_tpl["dataset"]["kwargs"]["segments"] = {
            "train": (DatasetH.get_min_time(seg), DatasetH.get_max_time(seg)),
            "test": (None, None),
        }

        # NOTE:
        # we play a trick here
        # treat the training segments as test to create the rolling tasks
        rg = RollingGen(step=self.step,
                        test_key="train",
                        train_key=None,
                        task_copy_func=deepcopy_basic_type)
        gen_task = task_generator(perf_task_tpl, [rg])

        recorders = R.list_recorders(experiment_name=self.exp_name)
        if len(gen_task) == len(recorders):
            get_module_logger("Internal Data").info(
                "the data has been initialized")
        else:
            # train new models
            assert 0 == len(
                recorders
            ), "An empty experiment is required for setup `InternalData``"
            trainer.train(gen_task)

        # 2) extract the similarity matrix
        label_df = self.dh.fetch(col_set="label")
        # for
        recorders = R.list_recorders(experiment_name=self.exp_name)

        key_l = []
        ic_l = []
        for _, rec in tqdm(recorders.items(), desc="calc"):
            pred = rec.load_object("pred.pkl")
            task = rec.load_object("task")
            data_key = task["dataset"]["kwargs"]["segments"]["train"]
            key_l.append(data_key)
            ic_l.append(
                delayed(self._calc_perf)(pred.iloc[:, 0], label_df.iloc[:, 0]))

        ic_l = Parallel(n_jobs=-1)(ic_l)
        self.data_ic_df = pd.DataFrame(dict(zip(key_l, ic_l)))
        self.data_ic_df = self.data_ic_df.sort_index().sort_index(axis=1)

        del self.dh  # handler is not useful now