def backtest(self): self._init_qlib() model = init_instance_by_config(self.task["model"]) dataset = init_instance_by_config(self.task["dataset"]) self._train_model(model, dataset) strategy_config = { "class": "TopkDropoutStrategy", "module_path": "qlib.contrib.strategy.signal_strategy", "kwargs": { "signal": (model, dataset), "topk": 50, "n_drop": 5, }, } self.port_analysis_config["strategy"] = strategy_config self.port_analysis_config["backtest"]["benchmark"] = self.benchmark with R.start(experiment_name="backtest"): recorder = R.get_recorder() par = PortAnaRecord( recorder, self.port_analysis_config, risk_analysis_freq=["day", "30min", "5min"], indicator_analysis_freq=["day", "30min", "5min"], indicator_analysis_method="value_weighted", ) par.generate()
def train_with_sigana(): """train model followed by SigAnaRecord Returns ------- pred_score: pandas.DataFrame predict scores performance: dict model performance """ model = init_instance_by_config(task["model"]) dataset = init_instance_by_config(task["dataset"]) # start exp with R.start(experiment_name="workflow_with_sigana"): R.log_params(**flatten_dict(task)) model.fit(dataset) # predict and calculate ic and ric recorder = R.get_recorder() sar = SigAnaRecord(recorder, model=model, dataset=dataset) sar.generate() ic = sar.load(sar.get_path("ic.pkl")) ric = sar.load(sar.get_path("ric.pkl")) pred_score = sar.load("pred.pkl") smr = SignalMseRecord(recorder) smr.generate() uri_path = R.get_uri() return pred_score, {"ic": ic, "ric": ric}, uri_path
def _exe_task(task_config: dict): rec = R.get_recorder() # model & dataset initiation model: Model = init_instance_by_config(task_config["model"]) dataset: Dataset = init_instance_by_config(task_config["dataset"]) # FIXME: resume reweighter after merging data selection # reweighter: Reweighter = task_config.get("reweighter", None) # model training # auto_filter_kwargs(model.fit)(dataset, reweighter=reweighter) model.fit(dataset) R.save_objects(**{"params.pkl": model}) # this dataset is saved for online inference. So the concrete data should not be dumped dataset.config(dump_all=False, recursive=True) R.save_objects(**{"dataset": dataset}) # fill placehorder placehorder_value = {"<MODEL>": model, "<DATASET>": dataset} task_config = fill_placeholder(task_config, placehorder_value) # generate records: prediction, backtest, and analysis records = task_config.get("record", []) if isinstance(records, dict): # prevent only one dict records = [records] for record in records: # Some recorder require the parameter `model` and `dataset`. # try to automatically pass in them to the initialization function # to make defining the tasking easier r = init_instance_by_config( record, recorder=rec, default_module="qlib.workflow.record_temp", try_kwargs={ "model": model, "dataset": dataset }, ) r.generate()
def train_with_sigana(uri_path: str = None): """train model followed by SigAnaRecord Returns ------- pred_score: pandas.DataFrame predict scores performance: dict model performance """ model = init_instance_by_config(CSI300_GBDT_TASK["model"]) dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"]) # start exp with R.start(experiment_name="workflow_with_sigana", uri=uri_path): R.log_params(**flatten_dict(CSI300_GBDT_TASK)) model.fit(dataset) recorder = R.get_recorder() sr = SignalRecord(model, dataset, recorder) sr.generate() pred_score = sr.load("pred.pkl") # predict and calculate ic and ric sar = SigAnaRecord(recorder) sar.generate() ic = sar.load("ic.pkl") ric = sar.load("ric.pkl") uri_path = R.get_uri() return pred_score, {"ic": ic, "ric": ric}, uri_path
def fit( self, dataset: DatasetH, num_boost_round=None, early_stopping_rounds=None, verbose_eval=20, evals_result=None, reweighter=None, **kwargs, ): if evals_result is None: evals_result = {} # in case of unsafety of Python default values ds_l = self._prepare_data(dataset, reweighter) ds, names = list(zip(*ds_l)) self.model = lgb.train( self.params, ds[0], # training dataset num_boost_round=self.num_boost_round if num_boost_round is None else num_boost_round, valid_sets=ds, valid_names=names, early_stopping_rounds=(self.early_stopping_rounds if early_stopping_rounds is None else early_stopping_rounds), verbose_eval=verbose_eval, evals_result=evals_result, **kwargs, ) for k in names: for key, val in evals_result[k].items(): name = f"{key}.{k}" for epoch, m in enumerate(val): R.log_metrics(**{name.replace("@", "_"): m}, step=epoch)
def get_all_results(folders) -> dict: results = dict() for fn in folders: exp = R.get_exp(experiment_name=fn, create=False) recorders = exp.list_recorders() result = dict() result["annualized_return_with_cost"] = list() result["information_ratio_with_cost"] = list() result["max_drawdown_with_cost"] = list() result["ic"] = list() result["icir"] = list() result["rank_ic"] = list() result["rank_icir"] = list() for recorder_id in recorders: if recorders[recorder_id].status == "FINISHED": recorder = R.get_recorder(recorder_id=recorder_id, experiment_name=fn) metrics = recorder.list_metrics() result["annualized_return_with_cost"].append(metrics["excess_return_with_cost.annualized_return"]) result["information_ratio_with_cost"].append(metrics["excess_return_with_cost.information_ratio"]) result["max_drawdown_with_cost"].append(metrics["excess_return_with_cost.max_drawdown"]) result["ic"].append(metrics["IC"]) result["icir"].append(metrics["ICIR"]) result["rank_ic"].append(metrics["Rank IC"]) result["rank_icir"].append(metrics["Rank ICIR"]) results[fn] = result return results
def main(xargs): R.reset_default_uri(xargs.save_dir) experiments = R.list_experiments() key_map = { "IC": "IC", "ICIR": "ICIR", "Rank IC": "Rank_IC", "Rank ICIR": "Rank_ICIR", "excess_return_with_cost.annualized_return": "Annualized_Return", "excess_return_with_cost.information_ratio": "Information_Ratio", "excess_return_with_cost.max_drawdown": "Max_Drawdown", } all_keys = list(key_map.values()) print("There are {:} experiments.".format(len(experiments))) head_strs, value_strs, names = [], [], [] for idx, (key, experiment) in enumerate(experiments.items()): if experiment.id == "0": continue recorders = experiment.list_recorders() recorders, not_finished = filter_finished(recorders) print( "====>>>> {:02d}/{:02d}-th experiment {:9s} has {:02d}/{:02d} finished recorders." .format(idx, len(experiments), experiment.name, len(recorders), len(recorders) + not_finished)) result = QResult() for recorder_id, recorder in recorders.items(): result.update(recorder.list_metrics(), key_map) head_str, value_str = result.info(all_keys, show=False) head_strs.append(head_str) value_strs.append(value_str) names.append(experiment.name) compare_results(head_strs, value_strs, names, space=10)
def train_meta_model(self): """ training a meta model based on a simplified linear proxy model; """ # 1) leverage the simplified proxy forecasting model to train meta model. # - Only the dataset part is important, in current version of meta model will integrate the rb = RollingBenchmark(model_type=self.sim_task_model) sim_task = rb.basic_task() proxy_forecast_model_task = { # "model": "qlib.contrib.model.linear.LinearModel", "dataset": { "class": "qlib.data.dataset.DatasetH", "kwargs": { "handler": f"file://{(DIRNAME / 'handler_proxy.pkl').absolute()}", "segments": { "train": ("2008-01-01", "2010-12-31"), "test": ("2011-01-01", sim_task["dataset"]["kwargs"]["segments"]["test"][1]), }, }, }, # "record": ["qlib.workflow.record_temp.SignalRecord"] } # the proxy_forecast_model_task will be used to create meta tasks. # The test date of first task will be 2011-01-01. Each test segment will be about 20days # The tasks include all training tasks and test tasks. # 2) preparing meta dataset kwargs = dict( task_tpl=proxy_forecast_model_task, step=self.step, segments=0.62, # keep test period consistent with the dataset yaml trunc_days=1 + self.horizon, hist_step_n=30, fill_method="max", rolling_ext_days=0, ) # NOTE: # the input of meta model (internal data) are shared between proxy model and final forecasting model # but their task test segment are not aligned! It worked in my previous experiment. # So the misalignment will not affect the effectiveness of the method. with self._internal_data_path.open("rb") as f: internal_data = pickle.load(f) md = MetaDatasetDS(exp_name=internal_data, **kwargs) # 3) train and logging meta model with R.start(experiment_name=self.meta_exp_name): R.log_params(**kwargs) mm = MetaModelDS(step=self.step, hist_step_n=kwargs["hist_step_n"], lr=0.001, max_epoch=200, seed=43) mm.fit(md) R.save_objects(model=mm)
def query_info(save_dir, verbose, name_filter): R.set_uri(save_dir) experiments = R.list_experiments() key_map = { # "RMSE": "RMSE", "IC": "IC", "ICIR": "ICIR", "Rank IC": "Rank_IC", "Rank ICIR": "Rank_ICIR", # "excess_return_with_cost.annualized_return": "Annualized_Return", # "excess_return_with_cost.information_ratio": "Information_Ratio", # "excess_return_with_cost.max_drawdown": "Max_Drawdown", } all_keys = list(key_map.values()) if verbose: print("There are {:} experiments.".format(len(experiments))) head_strs, value_strs, names = [], [], [] for idx, (key, experiment) in enumerate(experiments.items()): if experiment.id == "0": continue if name_filter is not None and re.match(name_filter, experiment.name) is None: continue recorders = experiment.list_recorders() recorders, not_finished = filter_finished(recorders) if verbose: print( "====>>>> {:02d}/{:02d}-th experiment {:9s} has {:02d}/{:02d} finished recorders." .format( idx + 1, len(experiments), experiment.name, len(recorders), len(recorders) + not_finished, )) result = QResult() for recorder_id, recorder in recorders.items(): result.update(recorder.list_metrics(), key_map) if not len(result): print("There are no valid recorders for {:}".format(experiment)) continue else: print("There are {:} valid recorders for {:}".format( len(recorders), experiment.name)) head_str, value_str = result.info(all_keys, verbose=verbose) head_strs.append(head_str) value_strs.append(value_str) names.append(experiment.name) info_str_dict = compare_results(head_strs, value_strs, names, space=10, verbose=verbose) info_value_dict = dict(heads=head_strs, values=value_strs, names=names) return info_str_dict, info_value_dict
def query_info(save_dir, verbose, name_filter, key_map): if isinstance(save_dir, list): results = [] for x in save_dir: x = query_info(x, verbose, name_filter, key_map) results.extend(x) return results # Here, the save_dir must be a string R.set_uri(str(save_dir)) experiments = R.list_experiments() if verbose: print("There are {:} experiments.".format(len(experiments))) qresults = [] for idx, (key, experiment) in enumerate(experiments.items()): if experiment.id == "0": continue if (name_filter is not None and re.fullmatch(name_filter, experiment.name) is None): continue recorders = experiment.list_recorders() recorders, not_finished = filter_finished(recorders) if verbose: print( "====>>>> {:02d}/{:02d}-th experiment {:9s} has {:02d}/{:02d} finished recorders." .format( idx + 1, len(experiments), experiment.name, len(recorders), len(recorders) + not_finished, )) result = QResult(experiment.name) for recorder_id, recorder in recorders.items(): file_names = [ "results-train.pkl", "results-valid.pkl", "results-test.pkl" ] date2IC = OrderedDict() for file_name in file_names: xtemp = recorder.load_object(file_name)["all-IC"] timestamps, values = xtemp.index.tolist(), xtemp.tolist() for timestamp, value in zip(timestamps, values): add_to_dict(date2IC, timestamp, value) result.update(recorder.list_metrics(), key_map) result.append_path( os.path.join(recorder.uri, recorder.experiment_id, recorder.id)) result.append_date2ICs(date2IC) if not len(result): print("There are no valid recorders for {:}".format(experiment)) continue else: if verbose: print("There are {:} valid recorders for {:}".format( len(recorders), experiment.name)) qresults.append(result) return qresults
def train_multiseg(): model = init_instance_by_config(CSI300_GBDT_TASK["model"]) dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"]) with R.start(experiment_name="workflow"): R.log_params(**flatten_dict(CSI300_GBDT_TASK)) model.fit(dataset) recorder = R.get_recorder() sr = MultiSegRecord(model, dataset, recorder) sr.generate(dict(valid="valid", test="test"), True) uri = R.get_uri() return uri
def train_mse(): model = init_instance_by_config(CSI300_GBDT_TASK["model"]) dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"]) with R.start(experiment_name="workflow"): R.log_params(**flatten_dict(CSI300_GBDT_TASK)) model.fit(dataset) recorder = R.get_recorder() sr = SignalMseRecord(recorder, model=model, dataset=dataset) sr.generate() uri = R.get_uri() return uri
def __init__(self, recorder_id, experiment_id, provider_uri=r"E:\TDX\cjzq_tdx\vipdoc", region=REG_CN): self.record_id = recorder_id self.experiment_id = experiment_id qlib.init(provider_uri=provider_uri, region=region) R.set_uri('file:D:\\Code\\my_qlib\\examples\\mlruns') self.recorder = R.get_recorder(recorder_id=recorder_id, experiment_id=experiment_id) self.expr_dir = Path(self.recorder.uri[5:]).joinpath(experiment_id).joinpath(recorder_id) self.artifacts_dir = self.expr_dir.joinpath('artifacts') self.portfolio_dir = self.artifacts_dir.joinpath('portfolio_analysis') self.sig_dir = self.artifacts_dir.joinpath('sig_analysis')
def end_task_train(rec: Recorder, experiment_name: str) -> Recorder: """ Finish task training with real model fitting and saving. Args: rec (Recorder): the recorder will be resumed experiment_name (str): the name of experiment Returns: Recorder: the model recorder """ with R.start(experiment_name=experiment_name, recorder_id=rec.info["id"], resume=True): task_config = R.load_object("task") _exe_task(task_config) return rec
def begin_task_train(task_config: dict, experiment_name: str, recorder_name: str = None) -> Recorder: """ Begin task training to start a recorder and save the task config. Args: task_config (dict): the config of a task experiment_name (str): the name of experiment recorder_name (str): the given name will be the recorder name. None for using rid. Returns: Recorder: the model recorder """ with R.start(experiment_name=experiment_name, recorder_name=recorder_name): _log_task_info(task_config) return R.get_recorder()
def __init__( self, experiment, process_list=[], rec_key_func=None, rec_filter_func=None, artifacts_path={"pred": "pred.pkl"}, artifacts_key=None, ): """ Init RecorderCollector. Args: experiment (Experiment or str): an instance of an Experiment or the name of an Experiment process_list (list or Callable): the list of processors or the instance of a processor to process dict. rec_key_func (Callable): a function to get the key of a recorder. If None, use recorder id. rec_filter_func (Callable, optional): filter the recorder by return True or False. Defaults to None. artifacts_path (dict, optional): The artifacts name and its path in Recorder. Defaults to {"pred": "pred.pkl", "IC": "sig_analysis/ic.pkl"}. artifacts_key (str or List, optional): the artifacts key you want to get. If None, get all artifacts. """ super().__init__(process_list=process_list) if isinstance(experiment, str): experiment = R.get_exp(experiment_name=experiment) self.experiment = experiment self.artifacts_path = artifacts_path if rec_key_func is None: rec_key_func = lambda rec: rec.info["id"] if artifacts_key is None: artifacts_key = list(self.artifacts_path.keys()) self.rec_key_func = rec_key_func self.artifacts_key = artifacts_key self.rec_filter_func = rec_filter_func
def reset(self): print("========== reset ==========") if isinstance(self.trainer, TrainerRM): TaskManager(task_pool=self.task_pool).remove() exp = R.get_exp(experiment_name=self.experiment_name) for rid in exp.list_recorders(): exp.delete_recorder(rid)
def ens_rolling(self): rc = RecorderCollector( experiment=self.rolling_exp, artifacts_key=["pred", "label"], process_list=[RollingEnsemble()], # rec_key_func=lambda rec: (self.COMB_EXP, rec.info["id"]), artifacts_path={ "pred": "pred.pkl", "label": "label.pkl" }, ) res = rc() with R.start(experiment_name=self.COMB_EXP): R.log_params(exp_name=self.rolling_exp) R.save_objects(**{ "pred.pkl": res["pred"], "label.pkl": res["label"] })
def reset(self): for task in self.tasks + self.add_tasks: name_id = task["model"]["class"] exp = R.get_exp(experiment_name=name_id) for rid in exp.list_recorders(): exp.delete_recorder(rid) if os.path.exists(self._ROLLING_MANAGER_PATH): os.remove(self._ROLLING_MANAGER_PATH)
def train(): """train model Returns ------- pred_score: pandas.DataFrame predict scores performance: dict model performance """ # model initiaiton model = init_instance_by_config(task["model"]) dataset = init_instance_by_config(task["dataset"]) # To test __repr__ print(dataset) print(R) # start exp with R.start(experiment_name="workflow"): R.log_params(**flatten_dict(task)) model.fit(dataset) # prediction recorder = R.get_recorder() # To test __repr__ print(recorder) # To test get_local_dir print(recorder.get_local_dir()) rid = recorder.id sr = SignalRecord(model, dataset, recorder) sr.generate() pred_score = sr.load() # calculate ic and ric sar = SigAnaRecord(recorder) sar.generate() ic = sar.load(sar.get_path("ic.pkl")) ric = sar.load(sar.get_path("ric.pkl")) return pred_score, {"ic": ic, "ric": ric}, rid
def task_train(task_config: dict, experiment_name: str, recorder_name: str = None) -> Recorder: """ Task based training, will be divided into two steps. Parameters ---------- task_config : dict The config of a task. experiment_name: str The name of experiment recorder_name: str The name of recorder Returns ---------- Recorder: The instance of the recorder """ with R.start(experiment_name=experiment_name, recorder_name=recorder_name): _log_task_info(task_config) _exe_task(task_config) return R.get_recorder()
def update_rolling_rec(self): """ Evaluate the combined rolling results """ for rid, rec in R.list_recorders( experiment_name=self.COMB_EXP).items(): for rt_cls in SigAnaRecord, PortAnaRecord: rt = rt_cls(recorder=rec, skip_existing=True) rt.generate() print( f"Your evaluation results can be found in the experiment named `{self.COMB_EXP}`." )
def end_task_train(rec: Recorder, experiment_name: str) -> Recorder: """ Finish task training with real model fitting and saving. Args: rec (Recorder): the recorder will be resumed experiment_name (str): the name of experiment Returns: Recorder: the model recorder """ with R.start(experiment_name=experiment_name, recorder_id=rec.info["id"], resume=True): task_config = R.load_object("task") # model & dataset initiation model: Model = init_instance_by_config(task_config["model"]) dataset: Dataset = init_instance_by_config(task_config["dataset"]) # model training model.fit(dataset) R.save_objects(**{"params.pkl": model}) # this dataset is saved for online inference. So the concrete data should not be dumped dataset.config(dump_all=False, recursive=True) R.save_objects(**{"dataset": dataset}) # fill placehorder placehorder_value = {"<MODEL>": model, "<DATASET>": dataset} task_config = fill_placeholder(task_config, placehorder_value) # generate records: prediction, backtest, and analysis records = task_config.get("record", []) if isinstance(records, dict): # prevent only one dict records = [records] for record in records: r = init_instance_by_config(record, recorder=rec) r.generate() return rec
def query_info(save_dir, verbose, name_filter, key_map): R.set_uri(save_dir) experiments = R.list_experiments() if verbose: print("There are {:} experiments.".format(len(experiments))) qresults = [] for idx, (key, experiment) in enumerate(experiments.items()): if experiment.id == "0": continue if (name_filter is not None and re.fullmatch(name_filter, experiment.name) is None): continue recorders = experiment.list_recorders() recorders, not_finished = filter_finished(recorders) if verbose: print( "====>>>> {:02d}/{:02d}-th experiment {:9s} has {:02d}/{:02d} finished recorders." .format( idx + 1, len(experiments), experiment.name, len(recorders), len(recorders) + not_finished, )) result = QResult(experiment.name) for recorder_id, recorder in recorders.items(): result.update(recorder.list_metrics(), key_map) result.append_path( os.path.join(recorder.uri, recorder.experiment_id, recorder.id)) if not len(result): print("There are no valid recorders for {:}".format(experiment)) continue else: print("There are {:} valid recorders for {:}".format( len(recorders), experiment.name)) qresults.append(result) return qresults
def run_exp(task_config, dataset, experiment_name, recorder_name, uri): model = init_instance_by_config(task_config["model"]) model_fit_kwargs = dict(dataset=dataset) # Let's start the experiment. with R.start( experiment_name=experiment_name, recorder_name=recorder_name, uri=uri, resume=True, ): # Setup log recorder_root_dir = R.get_recorder().get_local_dir() log_file = os.path.join(recorder_root_dir, "{:}.log".format(experiment_name)) set_log_basic_config(log_file) logger = get_module_logger("q.run_exp") logger.info("task_config::\n{:}".format( pprint.pformat(task_config, indent=2))) logger.info("[{:}] - [{:}]: {:}".format(experiment_name, recorder_name, uri)) logger.info("dataset={:}".format(dataset)) # Train model R.log_params(**flatten_dict(task_config)) if "save_path" in inspect.getfullargspec(model.fit).args: model_fit_kwargs["save_path"] = os.path.join( recorder_root_dir, "model.ckp") elif "save_dir" in inspect.getfullargspec(model.fit).args: model_fit_kwargs["save_dir"] = os.path.join( recorder_root_dir, "model-ckps") model.fit(**model_fit_kwargs) # Get the recorder recorder = R.get_recorder() R.save_objects(**{"model.pkl": model}) # Generate records: prediction, backtest, and analysis for record in task_config["record"]: record = record.copy() if record["class"] == "SignalRecord": srconf = { "model": model, "dataset": dataset, "recorder": recorder } record["kwargs"].update(srconf) sr = init_instance_by_config(record) sr.generate() else: rconf = {"recorder": recorder} record["kwargs"].update(rconf) ar = init_instance_by_config(record) ar.generate()
def fit( self, dataset: DatasetH, num_boost_round=None, early_stopping_rounds=None, verbose_eval=20, evals_result=None, reweighter=None, **kwargs, ): if evals_result is None: evals_result = {} # in case of unsafety of Python default values ds_l = self._prepare_data(dataset, reweighter) ds, names = list(zip(*ds_l)) early_stopping_callback = lgb.early_stopping( self.early_stopping_rounds if early_stopping_rounds is None else early_stopping_rounds) # NOTE: if you encounter error here. Please upgrade your lightgbm verbose_eval_callback = lgb.log_evaluation(period=verbose_eval) evals_result_callback = lgb.record_evaluation(evals_result) self.model = lgb.train( self.params, ds[0], # training dataset num_boost_round=self.num_boost_round if num_boost_round is None else num_boost_round, valid_sets=ds, valid_names=names, callbacks=[ early_stopping_callback, verbose_eval_callback, evals_result_callback ], **kwargs, ) for k in names: for key, val in evals_result[k].items(): name = f"{key}.{k}" for epoch, m in enumerate(val): R.log_metrics(**{name.replace("@", "_"): m}, step=epoch)
def get_all_results(folders) -> dict: results = dict() for fn in folders: try: exp = R.get_exp(experiment_name=fn, create=False) except ValueError: # No experiment results continue recorders = exp.list_recorders() result = dict() result["annualized_return_with_cost"] = list() result["information_ratio_with_cost"] = list() result["max_drawdown_with_cost"] = list() result["ic"] = list() result["icir"] = list() result["rank_ic"] = list() result["rank_icir"] = list() for recorder_id in recorders: if recorders[recorder_id].status == "FINISHED": recorder = R.get_recorder(recorder_id=recorder_id, experiment_name=fn) metrics = recorder.list_metrics() if "1day.excess_return_with_cost.annualized_return" not in metrics: print(f"{recorder_id} is skipped due to incomplete result") continue result["annualized_return_with_cost"].append( metrics["1day.excess_return_with_cost.annualized_return"]) result["information_ratio_with_cost"].append( metrics["1day.excess_return_with_cost.information_ratio"]) result["max_drawdown_with_cost"].append( metrics["1day.excess_return_with_cost.max_drawdown"]) result["ic"].append(metrics["IC"]) result["icir"].append(metrics["ICIR"]) result["rank_ic"].append(metrics["Rank IC"]) result["rank_icir"].append(metrics["Rank ICIR"]) results[fn] = result return results
def task_train(task_config: dict, experiment_name): """ task based training Parameters ---------- task_config : dict A dict describes a task setting. """ # model initiaiton model = init_instance_by_config(task_config["model"]) dataset = init_instance_by_config(task_config["dataset"]) # start exp with R.start(experiment_name=experiment_name): # train model R.log_params(**flatten_dict(task_config)) model.fit(dataset) recorder = R.get_recorder() # generate records: prediction, backtest, and analysis for record in task_config["record"]: if record["class"] == SignalRecord.__name__: srconf = { "model": model, "dataset": dataset, "recorder": recorder } record["kwargs"].update(srconf) sr = init_instance_by_config(record) sr.generate() else: rconf = {"recorder": recorder} record["kwargs"].update(rconf) ar = init_instance_by_config(record) ar.generate()
def check_diff_freq(self): self._init_qlib() exp = R.get_exp(experiment_name="backtest") rec = next(iter(exp.list_recorders().values()) ) # assuming this will get the latest recorder for check_key in "account", "total_turnover", "total_cost": check_key = "total_cost" acc_dict = {} for freq in ["30minute", "5minute", "1day"]: acc_dict[freq] = rec.load_object( f"portfolio_analysis/report_normal_{freq}.pkl")[check_key] acc_df = pd.DataFrame(acc_dict) acc_resam = acc_df.resample("1d").last().dropna() assert (acc_resam["30minute"] == acc_resam["1day"]).all()
def _train_model(self, model, dataset): with R.start(experiment_name="train"): R.log_params(**flatten_dict(self.task)) model.fit(dataset) R.save_objects(**{"params.pkl": model}) # prediction recorder = R.get_recorder() sr = SignalRecord(model, dataset, recorder) sr.generate()