def update_rolling_rec(self): """ Evaluate the combined rolling results """ for rid, rec in R.list_recorders( experiment_name=self.COMB_EXP).items(): for rt_cls in SigAnaRecord, PortAnaRecord: rt = rt_cls(recorder=rec, skip_existing=True) rt.generate() print( f"Your evaluation results can be found in the experiment named `{self.COMB_EXP}`." )
def setup(self, trainer=TrainerR, trainer_kwargs={}): """ after running this function `self.data_ic_df` will become set. Each col represents a data. Each row represents the Timestamp of performance of that data. For example, .. code-block:: python 2021-06-21 2021-06-04 2021-05-21 2021-05-07 2021-04-20 2021-04-06 2021-03-22 2021-03-08 ... 2021-07-02 2021-06-18 2021-06-03 2021-05-20 2021-05-06 2021-04-19 2021-04-02 2021-03-19 ... datetime ... 2018-01-02 0.079782 0.115975 0.070866 0.028849 -0.081170 0.140380 0.063864 0.110987 ... 2018-01-03 0.123386 0.107789 0.071037 0.045278 -0.060782 0.167446 0.089779 0.124476 ... 2018-01-04 0.140775 0.097206 0.063702 0.042415 -0.078164 0.173218 0.098914 0.114389 ... 2018-01-05 0.030320 -0.037209 -0.044536 -0.047267 -0.081888 0.045648 0.059947 0.047652 ... 2018-01-08 0.107201 0.009219 -0.015995 -0.036594 -0.086633 0.108965 0.122164 0.108508 ... ... ... ... ... ... ... ... ... ... ... """ # 1) prepare the prediction of proxy models perf_task_tpl = deepcopy( self.task_tpl ) # this task is supposed to contains no complicated objects trainer = auto_filter_kwargs(trainer)(experiment_name=self.exp_name, **trainer_kwargs) # NOTE: # The handler is initialized for only once. if not trainer.has_worker(): self.dh = init_task_handler(perf_task_tpl) else: self.dh = init_instance_by_config( perf_task_tpl["dataset"]["kwargs"]["handler"]) seg = perf_task_tpl["dataset"]["kwargs"]["segments"] # We want to split the training time period into small segments. perf_task_tpl["dataset"]["kwargs"]["segments"] = { "train": (DatasetH.get_min_time(seg), DatasetH.get_max_time(seg)), "test": (None, None), } # NOTE: # we play a trick here # treat the training segments as test to create the rolling tasks rg = RollingGen(step=self.step, test_key="train", train_key=None, task_copy_func=deepcopy_basic_type) gen_task = task_generator(perf_task_tpl, [rg]) recorders = R.list_recorders(experiment_name=self.exp_name) if len(gen_task) == len(recorders): get_module_logger("Internal Data").info( "the data has been initialized") else: # train new models assert 0 == len( recorders ), "An empty experiment is required for setup `InternalData``" trainer.train(gen_task) # 2) extract the similarity matrix label_df = self.dh.fetch(col_set="label") # for recorders = R.list_recorders(experiment_name=self.exp_name) key_l = [] ic_l = [] for _, rec in tqdm(recorders.items(), desc="calc"): pred = rec.load_object("pred.pkl") task = rec.load_object("task") data_key = task["dataset"]["kwargs"]["segments"]["train"] key_l.append(data_key) ic_l.append( delayed(self._calc_perf)(pred.iloc[:, 0], label_df.iloc[:, 0])) ic_l = Parallel(n_jobs=-1)(ic_l) self.data_ic_df = pd.DataFrame(dict(zip(key_l, ic_l))) self.data_ic_df = self.data_ic_df.sort_index().sort_index(axis=1) del self.dh # handler is not useful now