コード例 #1
0
    def backtest(self):
        self._init_qlib()
        model = init_instance_by_config(self.task["model"])
        dataset = init_instance_by_config(self.task["dataset"])
        self._train_model(model, dataset)
        strategy_config = {
            "class": "TopkDropoutStrategy",
            "module_path": "qlib.contrib.strategy.signal_strategy",
            "kwargs": {
                "signal": (model, dataset),
                "topk": 50,
                "n_drop": 5,
            },
        }
        self.port_analysis_config["strategy"] = strategy_config
        self.port_analysis_config["backtest"]["benchmark"] = self.benchmark

        with R.start(experiment_name="backtest"):

            recorder = R.get_recorder()
            par = PortAnaRecord(
                recorder,
                self.port_analysis_config,
                risk_analysis_freq=["day", "30min", "5min"],
                indicator_analysis_freq=["day", "30min", "5min"],
                indicator_analysis_method="value_weighted",
            )
            par.generate()
コード例 #2
0
ファイル: test_all_pipeline.py プロジェクト: yyll008/qlib
def train_with_sigana():
    """train model followed by SigAnaRecord

    Returns
    -------
        pred_score: pandas.DataFrame
            predict scores
        performance: dict
            model performance
    """
    model = init_instance_by_config(task["model"])
    dataset = init_instance_by_config(task["dataset"])

    # start exp
    with R.start(experiment_name="workflow_with_sigana"):
        R.log_params(**flatten_dict(task))
        model.fit(dataset)

        # predict and calculate ic and ric
        recorder = R.get_recorder()
        sar = SigAnaRecord(recorder, model=model, dataset=dataset)
        sar.generate()
        ic = sar.load(sar.get_path("ic.pkl"))
        ric = sar.load(sar.get_path("ric.pkl"))
        pred_score = sar.load("pred.pkl")

        smr = SignalMseRecord(recorder)
        smr.generate()
        uri_path = R.get_uri()
    return pred_score, {"ic": ic, "ric": ric}, uri_path
コード例 #3
0
ファイル: trainer.py プロジェクト: you-n-g/qlib
def _exe_task(task_config: dict):
    rec = R.get_recorder()
    # model & dataset initiation
    model: Model = init_instance_by_config(task_config["model"])
    dataset: Dataset = init_instance_by_config(task_config["dataset"])
    # FIXME: resume reweighter after merging data selection
    # reweighter: Reweighter = task_config.get("reweighter", None)
    # model training
    # auto_filter_kwargs(model.fit)(dataset, reweighter=reweighter)
    model.fit(dataset)
    R.save_objects(**{"params.pkl": model})
    # this dataset is saved for online inference. So the concrete data should not be dumped
    dataset.config(dump_all=False, recursive=True)
    R.save_objects(**{"dataset": dataset})
    # fill placehorder
    placehorder_value = {"<MODEL>": model, "<DATASET>": dataset}
    task_config = fill_placeholder(task_config, placehorder_value)
    # generate records: prediction, backtest, and analysis
    records = task_config.get("record", [])
    if isinstance(records, dict):  # prevent only one dict
        records = [records]
    for record in records:
        # Some recorder require the parameter `model` and `dataset`.
        # try to automatically pass in them to the initialization function
        # to make defining the tasking easier
        r = init_instance_by_config(
            record,
            recorder=rec,
            default_module="qlib.workflow.record_temp",
            try_kwargs={
                "model": model,
                "dataset": dataset
            },
        )
        r.generate()
コード例 #4
0
ファイル: test_all_pipeline.py プロジェクト: lwwang1995/qlib
def train_with_sigana(uri_path: str = None):
    """train model followed by SigAnaRecord

    Returns
    -------
        pred_score: pandas.DataFrame
            predict scores
        performance: dict
            model performance
    """
    model = init_instance_by_config(CSI300_GBDT_TASK["model"])
    dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"])
    # start exp
    with R.start(experiment_name="workflow_with_sigana", uri=uri_path):
        R.log_params(**flatten_dict(CSI300_GBDT_TASK))
        model.fit(dataset)
        recorder = R.get_recorder()

        sr = SignalRecord(model, dataset, recorder)
        sr.generate()
        pred_score = sr.load("pred.pkl")

        # predict and calculate ic and ric
        sar = SigAnaRecord(recorder)
        sar.generate()
        ic = sar.load("ic.pkl")
        ric = sar.load("ric.pkl")

        uri_path = R.get_uri()
    return pred_score, {"ic": ic, "ric": ric}, uri_path
コード例 #5
0
 def fit(
     self,
     dataset: DatasetH,
     num_boost_round=None,
     early_stopping_rounds=None,
     verbose_eval=20,
     evals_result=None,
     reweighter=None,
     **kwargs,
 ):
     if evals_result is None:
         evals_result = {}  # in case of unsafety of Python default values
     ds_l = self._prepare_data(dataset, reweighter)
     ds, names = list(zip(*ds_l))
     self.model = lgb.train(
         self.params,
         ds[0],  # training dataset
         num_boost_round=self.num_boost_round
         if num_boost_round is None else num_boost_round,
         valid_sets=ds,
         valid_names=names,
         early_stopping_rounds=(self.early_stopping_rounds
                                if early_stopping_rounds is None else
                                early_stopping_rounds),
         verbose_eval=verbose_eval,
         evals_result=evals_result,
         **kwargs,
     )
     for k in names:
         for key, val in evals_result[k].items():
             name = f"{key}.{k}"
             for epoch, m in enumerate(val):
                 R.log_metrics(**{name.replace("@", "_"): m}, step=epoch)
コード例 #6
0
def get_all_results(folders) -> dict:
    results = dict()
    for fn in folders:
        exp = R.get_exp(experiment_name=fn, create=False)
        recorders = exp.list_recorders()
        result = dict()
        result["annualized_return_with_cost"] = list()
        result["information_ratio_with_cost"] = list()
        result["max_drawdown_with_cost"] = list()
        result["ic"] = list()
        result["icir"] = list()
        result["rank_ic"] = list()
        result["rank_icir"] = list()
        for recorder_id in recorders:
            if recorders[recorder_id].status == "FINISHED":
                recorder = R.get_recorder(recorder_id=recorder_id, experiment_name=fn)
                metrics = recorder.list_metrics()
                result["annualized_return_with_cost"].append(metrics["excess_return_with_cost.annualized_return"])
                result["information_ratio_with_cost"].append(metrics["excess_return_with_cost.information_ratio"])
                result["max_drawdown_with_cost"].append(metrics["excess_return_with_cost.max_drawdown"])
                result["ic"].append(metrics["IC"])
                result["icir"].append(metrics["ICIR"])
                result["rank_ic"].append(metrics["Rank IC"])
                result["rank_icir"].append(metrics["Rank ICIR"])
        results[fn] = result
    return results
コード例 #7
0
def main(xargs):
    R.reset_default_uri(xargs.save_dir)
    experiments = R.list_experiments()

    key_map = {
        "IC": "IC",
        "ICIR": "ICIR",
        "Rank IC": "Rank_IC",
        "Rank ICIR": "Rank_ICIR",
        "excess_return_with_cost.annualized_return": "Annualized_Return",
        "excess_return_with_cost.information_ratio": "Information_Ratio",
        "excess_return_with_cost.max_drawdown": "Max_Drawdown",
    }
    all_keys = list(key_map.values())

    print("There are {:} experiments.".format(len(experiments)))
    head_strs, value_strs, names = [], [], []
    for idx, (key, experiment) in enumerate(experiments.items()):
        if experiment.id == "0":
            continue
        recorders = experiment.list_recorders()
        recorders, not_finished = filter_finished(recorders)
        print(
            "====>>>> {:02d}/{:02d}-th experiment {:9s} has {:02d}/{:02d} finished recorders."
            .format(idx, len(experiments), experiment.name, len(recorders),
                    len(recorders) + not_finished))
        result = QResult()
        for recorder_id, recorder in recorders.items():
            result.update(recorder.list_metrics(), key_map)
        head_str, value_str = result.info(all_keys, show=False)
        head_strs.append(head_str)
        value_strs.append(value_str)
        names.append(experiment.name)
    compare_results(head_strs, value_strs, names, space=10)
コード例 #8
0
ファイル: workflow.py プロジェクト: microsoft/qlib
    def train_meta_model(self):
        """
        training a meta model based on a simplified linear proxy model;
        """

        # 1) leverage the simplified proxy forecasting model to train meta model.
        # - Only the dataset part is important, in current version of meta model will integrate the
        rb = RollingBenchmark(model_type=self.sim_task_model)
        sim_task = rb.basic_task()
        proxy_forecast_model_task = {
            # "model": "qlib.contrib.model.linear.LinearModel",
            "dataset": {
                "class": "qlib.data.dataset.DatasetH",
                "kwargs": {
                    "handler":
                    f"file://{(DIRNAME / 'handler_proxy.pkl').absolute()}",
                    "segments": {
                        "train": ("2008-01-01", "2010-12-31"),
                        "test":
                        ("2011-01-01",
                         sim_task["dataset"]["kwargs"]["segments"]["test"][1]),
                    },
                },
            },
            # "record": ["qlib.workflow.record_temp.SignalRecord"]
        }
        # the proxy_forecast_model_task will be used to create meta tasks.
        # The test date of first task will be 2011-01-01. Each test segment will be about 20days
        # The tasks include all training tasks and test tasks.

        # 2) preparing meta dataset
        kwargs = dict(
            task_tpl=proxy_forecast_model_task,
            step=self.step,
            segments=0.62,  # keep test period consistent with the dataset yaml
            trunc_days=1 + self.horizon,
            hist_step_n=30,
            fill_method="max",
            rolling_ext_days=0,
        )
        # NOTE:
        # the input of meta model (internal data) are shared between proxy model and final forecasting model
        # but their task test segment are not aligned! It worked in my previous experiment.
        # So the misalignment will not affect the effectiveness of the method.
        with self._internal_data_path.open("rb") as f:
            internal_data = pickle.load(f)
        md = MetaDatasetDS(exp_name=internal_data, **kwargs)

        # 3) train and logging meta model
        with R.start(experiment_name=self.meta_exp_name):
            R.log_params(**kwargs)
            mm = MetaModelDS(step=self.step,
                             hist_step_n=kwargs["hist_step_n"],
                             lr=0.001,
                             max_epoch=200,
                             seed=43)
            mm.fit(md)
            R.save_objects(model=mm)
コード例 #9
0
def query_info(save_dir, verbose, name_filter):
    R.set_uri(save_dir)
    experiments = R.list_experiments()

    key_map = {
        # "RMSE": "RMSE",
        "IC": "IC",
        "ICIR": "ICIR",
        "Rank IC": "Rank_IC",
        "Rank ICIR": "Rank_ICIR",
        # "excess_return_with_cost.annualized_return": "Annualized_Return",
        # "excess_return_with_cost.information_ratio": "Information_Ratio",
        # "excess_return_with_cost.max_drawdown": "Max_Drawdown",
    }
    all_keys = list(key_map.values())

    if verbose:
        print("There are {:} experiments.".format(len(experiments)))
    head_strs, value_strs, names = [], [], []
    for idx, (key, experiment) in enumerate(experiments.items()):
        if experiment.id == "0":
            continue
        if name_filter is not None and re.match(name_filter,
                                                experiment.name) is None:
            continue
        recorders = experiment.list_recorders()
        recorders, not_finished = filter_finished(recorders)
        if verbose:
            print(
                "====>>>> {:02d}/{:02d}-th experiment {:9s} has {:02d}/{:02d} finished recorders."
                .format(
                    idx + 1,
                    len(experiments),
                    experiment.name,
                    len(recorders),
                    len(recorders) + not_finished,
                ))
        result = QResult()
        for recorder_id, recorder in recorders.items():
            result.update(recorder.list_metrics(), key_map)
        if not len(result):
            print("There are no valid recorders for {:}".format(experiment))
            continue
        else:
            print("There are {:} valid recorders for {:}".format(
                len(recorders), experiment.name))
        head_str, value_str = result.info(all_keys, verbose=verbose)
        head_strs.append(head_str)
        value_strs.append(value_str)
        names.append(experiment.name)
    info_str_dict = compare_results(head_strs,
                                    value_strs,
                                    names,
                                    space=10,
                                    verbose=verbose)
    info_value_dict = dict(heads=head_strs, values=value_strs, names=names)
    return info_str_dict, info_value_dict
コード例 #10
0
def query_info(save_dir, verbose, name_filter, key_map):
    if isinstance(save_dir, list):
        results = []
        for x in save_dir:
            x = query_info(x, verbose, name_filter, key_map)
            results.extend(x)
        return results
    # Here, the save_dir must be a string
    R.set_uri(str(save_dir))
    experiments = R.list_experiments()

    if verbose:
        print("There are {:} experiments.".format(len(experiments)))
    qresults = []
    for idx, (key, experiment) in enumerate(experiments.items()):
        if experiment.id == "0":
            continue
        if (name_filter is not None
                and re.fullmatch(name_filter, experiment.name) is None):
            continue
        recorders = experiment.list_recorders()
        recorders, not_finished = filter_finished(recorders)
        if verbose:
            print(
                "====>>>> {:02d}/{:02d}-th experiment {:9s} has {:02d}/{:02d} finished recorders."
                .format(
                    idx + 1,
                    len(experiments),
                    experiment.name,
                    len(recorders),
                    len(recorders) + not_finished,
                ))
        result = QResult(experiment.name)
        for recorder_id, recorder in recorders.items():
            file_names = [
                "results-train.pkl", "results-valid.pkl", "results-test.pkl"
            ]
            date2IC = OrderedDict()
            for file_name in file_names:
                xtemp = recorder.load_object(file_name)["all-IC"]
                timestamps, values = xtemp.index.tolist(), xtemp.tolist()
                for timestamp, value in zip(timestamps, values):
                    add_to_dict(date2IC, timestamp, value)
            result.update(recorder.list_metrics(), key_map)
            result.append_path(
                os.path.join(recorder.uri, recorder.experiment_id,
                             recorder.id))
            result.append_date2ICs(date2IC)
        if not len(result):
            print("There are no valid recorders for {:}".format(experiment))
            continue
        else:
            if verbose:
                print("There are {:} valid recorders for {:}".format(
                    len(recorders), experiment.name))
        qresults.append(result)
    return qresults
コード例 #11
0
def train_multiseg():
    model = init_instance_by_config(CSI300_GBDT_TASK["model"])
    dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"])
    with R.start(experiment_name="workflow"):
        R.log_params(**flatten_dict(CSI300_GBDT_TASK))
        model.fit(dataset)
        recorder = R.get_recorder()
        sr = MultiSegRecord(model, dataset, recorder)
        sr.generate(dict(valid="valid", test="test"), True)
        uri = R.get_uri()
    return uri
コード例 #12
0
def train_mse():
    model = init_instance_by_config(CSI300_GBDT_TASK["model"])
    dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"])
    with R.start(experiment_name="workflow"):
        R.log_params(**flatten_dict(CSI300_GBDT_TASK))
        model.fit(dataset)
        recorder = R.get_recorder()
        sr = SignalMseRecord(recorder, model=model, dataset=dataset)
        sr.generate()
        uri = R.get_uri()
    return uri
コード例 #13
0
    def __init__(self, recorder_id, experiment_id, provider_uri=r"E:\TDX\cjzq_tdx\vipdoc", region=REG_CN):
        self.record_id = recorder_id
        self.experiment_id = experiment_id

        qlib.init(provider_uri=provider_uri, region=region)

        R.set_uri('file:D:\\Code\\my_qlib\\examples\\mlruns')
        self.recorder = R.get_recorder(recorder_id=recorder_id, experiment_id=experiment_id)
        self.expr_dir = Path(self.recorder.uri[5:]).joinpath(experiment_id).joinpath(recorder_id)
        self.artifacts_dir = self.expr_dir.joinpath('artifacts')
        self.portfolio_dir = self.artifacts_dir.joinpath('portfolio_analysis')
        self.sig_dir = self.artifacts_dir.joinpath('sig_analysis')
コード例 #14
0
ファイル: trainer.py プロジェクト: newlyedward/qlib
def end_task_train(rec: Recorder, experiment_name: str) -> Recorder:
    """
    Finish task training with real model fitting and saving.

    Args:
        rec (Recorder): the recorder will be resumed
        experiment_name (str): the name of experiment

    Returns:
        Recorder: the model recorder
    """
    with R.start(experiment_name=experiment_name, recorder_id=rec.info["id"], resume=True):
        task_config = R.load_object("task")
        _exe_task(task_config)
    return rec
コード例 #15
0
ファイル: trainer.py プロジェクト: newlyedward/qlib
def begin_task_train(task_config: dict, experiment_name: str, recorder_name: str = None) -> Recorder:
    """
    Begin task training to start a recorder and save the task config.

    Args:
        task_config (dict): the config of a task
        experiment_name (str): the name of experiment
        recorder_name (str): the given name will be the recorder name. None for using rid.

    Returns:
        Recorder: the model recorder
    """
    with R.start(experiment_name=experiment_name, recorder_name=recorder_name):
        _log_task_info(task_config)
        return R.get_recorder()
コード例 #16
0
ファイル: collect.py プロジェクト: lipengyuan1994/qlib
    def __init__(
        self,
        experiment,
        process_list=[],
        rec_key_func=None,
        rec_filter_func=None,
        artifacts_path={"pred": "pred.pkl"},
        artifacts_key=None,
    ):
        """
        Init RecorderCollector.

        Args:
            experiment (Experiment or str): an instance of an Experiment or the name of an Experiment
            process_list (list or Callable): the list of processors or the instance of a processor to process dict.
            rec_key_func (Callable): a function to get the key of a recorder. If None, use recorder id.
            rec_filter_func (Callable, optional): filter the recorder by return True or False. Defaults to None.
            artifacts_path (dict, optional): The artifacts name and its path in Recorder. Defaults to {"pred": "pred.pkl", "IC": "sig_analysis/ic.pkl"}.
            artifacts_key (str or List, optional): the artifacts key you want to get. If None, get all artifacts.
        """
        super().__init__(process_list=process_list)
        if isinstance(experiment, str):
            experiment = R.get_exp(experiment_name=experiment)
        self.experiment = experiment
        self.artifacts_path = artifacts_path
        if rec_key_func is None:
            rec_key_func = lambda rec: rec.info["id"]
        if artifacts_key is None:
            artifacts_key = list(self.artifacts_path.keys())
        self.rec_key_func = rec_key_func
        self.artifacts_key = artifacts_key
        self.rec_filter_func = rec_filter_func
コード例 #17
0
 def reset(self):
     print("========== reset ==========")
     if isinstance(self.trainer, TrainerRM):
         TaskManager(task_pool=self.task_pool).remove()
     exp = R.get_exp(experiment_name=self.experiment_name)
     for rid in exp.list_recorders():
         exp.delete_recorder(rid)
コード例 #18
0
ファイル: rolling_benchmark.py プロジェクト: yutiansut/qlib
 def ens_rolling(self):
     rc = RecorderCollector(
         experiment=self.rolling_exp,
         artifacts_key=["pred", "label"],
         process_list=[RollingEnsemble()],
         # rec_key_func=lambda rec: (self.COMB_EXP, rec.info["id"]),
         artifacts_path={
             "pred": "pred.pkl",
             "label": "label.pkl"
         },
     )
     res = rc()
     with R.start(experiment_name=self.COMB_EXP):
         R.log_params(exp_name=self.rolling_exp)
         R.save_objects(**{
             "pred.pkl": res["pred"],
             "label.pkl": res["label"]
         })
コード例 #19
0
    def reset(self):
        for task in self.tasks + self.add_tasks:
            name_id = task["model"]["class"]
            exp = R.get_exp(experiment_name=name_id)
            for rid in exp.list_recorders():
                exp.delete_recorder(rid)

        if os.path.exists(self._ROLLING_MANAGER_PATH):
            os.remove(self._ROLLING_MANAGER_PATH)
コード例 #20
0
ファイル: test_all_pipeline.py プロジェクト: yyll008/qlib
def train():
    """train model

    Returns
    -------
        pred_score: pandas.DataFrame
            predict scores
        performance: dict
            model performance
    """

    # model initiaiton
    model = init_instance_by_config(task["model"])
    dataset = init_instance_by_config(task["dataset"])
    # To test __repr__
    print(dataset)
    print(R)

    # start exp
    with R.start(experiment_name="workflow"):
        R.log_params(**flatten_dict(task))
        model.fit(dataset)

        # prediction
        recorder = R.get_recorder()
        # To test __repr__
        print(recorder)
        # To test get_local_dir
        print(recorder.get_local_dir())
        rid = recorder.id
        sr = SignalRecord(model, dataset, recorder)
        sr.generate()
        pred_score = sr.load()

        # calculate ic and ric
        sar = SigAnaRecord(recorder)
        sar.generate()
        ic = sar.load(sar.get_path("ic.pkl"))
        ric = sar.load(sar.get_path("ric.pkl"))

    return pred_score, {"ic": ic, "ric": ric}, rid
コード例 #21
0
ファイル: trainer.py プロジェクト: newlyedward/qlib
def task_train(task_config: dict, experiment_name: str, recorder_name: str = None) -> Recorder:
    """
    Task based training, will be divided into two steps.

    Parameters
    ----------
    task_config : dict
        The config of a task.
    experiment_name: str
        The name of experiment
    recorder_name: str
        The name of recorder

    Returns
    ----------
    Recorder: The instance of the recorder
    """
    with R.start(experiment_name=experiment_name, recorder_name=recorder_name):
        _log_task_info(task_config)
        _exe_task(task_config)
        return R.get_recorder()
コード例 #22
0
ファイル: rolling_benchmark.py プロジェクト: yutiansut/qlib
 def update_rolling_rec(self):
     """
     Evaluate the combined rolling results
     """
     for rid, rec in R.list_recorders(
             experiment_name=self.COMB_EXP).items():
         for rt_cls in SigAnaRecord, PortAnaRecord:
             rt = rt_cls(recorder=rec, skip_existing=True)
             rt.generate()
     print(
         f"Your evaluation results can be found in the experiment named `{self.COMB_EXP}`."
     )
コード例 #23
0
ファイル: trainer.py プロジェクト: ycl010203/qlib
def end_task_train(rec: Recorder, experiment_name: str) -> Recorder:
    """
    Finish task training with real model fitting and saving.

    Args:
        rec (Recorder): the recorder will be resumed
        experiment_name (str): the name of experiment

    Returns:
        Recorder: the model recorder
    """
    with R.start(experiment_name=experiment_name,
                 recorder_id=rec.info["id"],
                 resume=True):
        task_config = R.load_object("task")
        # model & dataset initiation
        model: Model = init_instance_by_config(task_config["model"])
        dataset: Dataset = init_instance_by_config(task_config["dataset"])
        # model training
        model.fit(dataset)
        R.save_objects(**{"params.pkl": model})
        # this dataset is saved for online inference. So the concrete data should not be dumped
        dataset.config(dump_all=False, recursive=True)
        R.save_objects(**{"dataset": dataset})
        # fill placehorder
        placehorder_value = {"<MODEL>": model, "<DATASET>": dataset}
        task_config = fill_placeholder(task_config, placehorder_value)
        # generate records: prediction, backtest, and analysis
        records = task_config.get("record", [])
        if isinstance(records, dict):  # prevent only one dict
            records = [records]
        for record in records:
            r = init_instance_by_config(record, recorder=rec)
            r.generate()
    return rec
コード例 #24
0
def query_info(save_dir, verbose, name_filter, key_map):
    R.set_uri(save_dir)
    experiments = R.list_experiments()

    if verbose:
        print("There are {:} experiments.".format(len(experiments)))
    qresults = []
    for idx, (key, experiment) in enumerate(experiments.items()):
        if experiment.id == "0":
            continue
        if (name_filter is not None
                and re.fullmatch(name_filter, experiment.name) is None):
            continue
        recorders = experiment.list_recorders()
        recorders, not_finished = filter_finished(recorders)
        if verbose:
            print(
                "====>>>> {:02d}/{:02d}-th experiment {:9s} has {:02d}/{:02d} finished recorders."
                .format(
                    idx + 1,
                    len(experiments),
                    experiment.name,
                    len(recorders),
                    len(recorders) + not_finished,
                ))
        result = QResult(experiment.name)
        for recorder_id, recorder in recorders.items():
            result.update(recorder.list_metrics(), key_map)
            result.append_path(
                os.path.join(recorder.uri, recorder.experiment_id,
                             recorder.id))
        if not len(result):
            print("There are no valid recorders for {:}".format(experiment))
            continue
        else:
            print("There are {:} valid recorders for {:}".format(
                len(recorders), experiment.name))
        qresults.append(result)
    return qresults
コード例 #25
0
ファイル: q_exps.py プロジェクト: KarsonL/AutoDL-Projects
def run_exp(task_config, dataset, experiment_name, recorder_name, uri):

    model = init_instance_by_config(task_config["model"])
    model_fit_kwargs = dict(dataset=dataset)

    # Let's start the experiment.
    with R.start(
            experiment_name=experiment_name,
            recorder_name=recorder_name,
            uri=uri,
            resume=True,
    ):
        # Setup log
        recorder_root_dir = R.get_recorder().get_local_dir()
        log_file = os.path.join(recorder_root_dir,
                                "{:}.log".format(experiment_name))
        set_log_basic_config(log_file)
        logger = get_module_logger("q.run_exp")
        logger.info("task_config::\n{:}".format(
            pprint.pformat(task_config, indent=2)))
        logger.info("[{:}] - [{:}]: {:}".format(experiment_name, recorder_name,
                                                uri))
        logger.info("dataset={:}".format(dataset))

        # Train model
        R.log_params(**flatten_dict(task_config))
        if "save_path" in inspect.getfullargspec(model.fit).args:
            model_fit_kwargs["save_path"] = os.path.join(
                recorder_root_dir, "model.ckp")
        elif "save_dir" in inspect.getfullargspec(model.fit).args:
            model_fit_kwargs["save_dir"] = os.path.join(
                recorder_root_dir, "model-ckps")
        model.fit(**model_fit_kwargs)
        # Get the recorder
        recorder = R.get_recorder()
        R.save_objects(**{"model.pkl": model})

        # Generate records: prediction, backtest, and analysis
        for record in task_config["record"]:
            record = record.copy()
            if record["class"] == "SignalRecord":
                srconf = {
                    "model": model,
                    "dataset": dataset,
                    "recorder": recorder
                }
                record["kwargs"].update(srconf)
                sr = init_instance_by_config(record)
                sr.generate()
            else:
                rconf = {"recorder": recorder}
                record["kwargs"].update(rconf)
                ar = init_instance_by_config(record)
                ar.generate()
コード例 #26
0
ファイル: gbdt.py プロジェクト: yutiansut/qlib
 def fit(
     self,
     dataset: DatasetH,
     num_boost_round=None,
     early_stopping_rounds=None,
     verbose_eval=20,
     evals_result=None,
     reweighter=None,
     **kwargs,
 ):
     if evals_result is None:
         evals_result = {}  # in case of unsafety of Python default values
     ds_l = self._prepare_data(dataset, reweighter)
     ds, names = list(zip(*ds_l))
     early_stopping_callback = lgb.early_stopping(
         self.early_stopping_rounds
         if early_stopping_rounds is None else early_stopping_rounds)
     # NOTE: if you encounter error here. Please upgrade your lightgbm
     verbose_eval_callback = lgb.log_evaluation(period=verbose_eval)
     evals_result_callback = lgb.record_evaluation(evals_result)
     self.model = lgb.train(
         self.params,
         ds[0],  # training dataset
         num_boost_round=self.num_boost_round
         if num_boost_round is None else num_boost_round,
         valid_sets=ds,
         valid_names=names,
         callbacks=[
             early_stopping_callback, verbose_eval_callback,
             evals_result_callback
         ],
         **kwargs,
     )
     for k in names:
         for key, val in evals_result[k].items():
             name = f"{key}.{k}"
             for epoch, m in enumerate(val):
                 R.log_metrics(**{name.replace("@", "_"): m}, step=epoch)
コード例 #27
0
def get_all_results(folders) -> dict:
    results = dict()
    for fn in folders:
        try:
            exp = R.get_exp(experiment_name=fn, create=False)
        except ValueError:
            # No experiment results
            continue
        recorders = exp.list_recorders()
        result = dict()
        result["annualized_return_with_cost"] = list()
        result["information_ratio_with_cost"] = list()
        result["max_drawdown_with_cost"] = list()
        result["ic"] = list()
        result["icir"] = list()
        result["rank_ic"] = list()
        result["rank_icir"] = list()
        for recorder_id in recorders:
            if recorders[recorder_id].status == "FINISHED":
                recorder = R.get_recorder(recorder_id=recorder_id,
                                          experiment_name=fn)
                metrics = recorder.list_metrics()
                if "1day.excess_return_with_cost.annualized_return" not in metrics:
                    print(f"{recorder_id} is skipped due to incomplete result")
                    continue
                result["annualized_return_with_cost"].append(
                    metrics["1day.excess_return_with_cost.annualized_return"])
                result["information_ratio_with_cost"].append(
                    metrics["1day.excess_return_with_cost.information_ratio"])
                result["max_drawdown_with_cost"].append(
                    metrics["1day.excess_return_with_cost.max_drawdown"])
                result["ic"].append(metrics["IC"])
                result["icir"].append(metrics["ICIR"])
                result["rank_ic"].append(metrics["Rank IC"])
                result["rank_icir"].append(metrics["Rank ICIR"])
        results[fn] = result
    return results
コード例 #28
0
ファイル: trainer.py プロジェクト: NTUT-SELab/qlib
def task_train(task_config: dict, experiment_name):
    """
    task based training

    Parameters
    ----------
    task_config : dict
        A dict describes a task setting.
    """

    # model initiaiton
    model = init_instance_by_config(task_config["model"])
    dataset = init_instance_by_config(task_config["dataset"])

    # start exp
    with R.start(experiment_name=experiment_name):
        # train model
        R.log_params(**flatten_dict(task_config))
        model.fit(dataset)
        recorder = R.get_recorder()

        # generate records: prediction, backtest, and analysis
        for record in task_config["record"]:
            if record["class"] == SignalRecord.__name__:
                srconf = {
                    "model": model,
                    "dataset": dataset,
                    "recorder": recorder
                }
                record["kwargs"].update(srconf)
                sr = init_instance_by_config(record)
                sr.generate()
            else:
                rconf = {"recorder": recorder}
                record["kwargs"].update(rconf)
                ar = init_instance_by_config(record)
                ar.generate()
コード例 #29
0
    def check_diff_freq(self):
        self._init_qlib()
        exp = R.get_exp(experiment_name="backtest")
        rec = next(iter(exp.list_recorders().values())
                   )  # assuming this will get the latest recorder
        for check_key in "account", "total_turnover", "total_cost":
            check_key = "total_cost"

            acc_dict = {}
            for freq in ["30minute", "5minute", "1day"]:
                acc_dict[freq] = rec.load_object(
                    f"portfolio_analysis/report_normal_{freq}.pkl")[check_key]
            acc_df = pd.DataFrame(acc_dict)
            acc_resam = acc_df.resample("1d").last().dropna()
            assert (acc_resam["30minute"] == acc_resam["1day"]).all()
コード例 #30
0
    def _train_model(self, model, dataset):
        with R.start(experiment_name="train"):
            R.log_params(**flatten_dict(self.task))
            model.fit(dataset)
            R.save_objects(**{"params.pkl": model})

            # prediction
            recorder = R.get_recorder()
            sr = SignalRecord(model, dataset, recorder)
            sr.generate()