Esempio n. 1
0
def run_exp(task_config, dataset, experiment_name, recorder_name, uri):

    model = init_instance_by_config(task_config["model"])
    model_fit_kwargs = dict(dataset=dataset)

    # Let's start the experiment.
    with R.start(
            experiment_name=experiment_name,
            recorder_name=recorder_name,
            uri=uri,
            resume=True,
    ):
        # Setup log
        recorder_root_dir = R.get_recorder().get_local_dir()
        log_file = os.path.join(recorder_root_dir,
                                "{:}.log".format(experiment_name))
        set_log_basic_config(log_file)
        logger = get_module_logger("q.run_exp")
        logger.info("task_config::\n{:}".format(
            pprint.pformat(task_config, indent=2)))
        logger.info("[{:}] - [{:}]: {:}".format(experiment_name, recorder_name,
                                                uri))
        logger.info("dataset={:}".format(dataset))

        # Train model
        R.log_params(**flatten_dict(task_config))
        if "save_path" in inspect.getfullargspec(model.fit).args:
            model_fit_kwargs["save_path"] = os.path.join(
                recorder_root_dir, "model.ckp")
        elif "save_dir" in inspect.getfullargspec(model.fit).args:
            model_fit_kwargs["save_dir"] = os.path.join(
                recorder_root_dir, "model-ckps")
        model.fit(**model_fit_kwargs)
        # Get the recorder
        recorder = R.get_recorder()
        R.save_objects(**{"model.pkl": model})

        # Generate records: prediction, backtest, and analysis
        for record in task_config["record"]:
            record = record.copy()
            if record["class"] == "SignalRecord":
                srconf = {
                    "model": model,
                    "dataset": dataset,
                    "recorder": recorder
                }
                record["kwargs"].update(srconf)
                sr = init_instance_by_config(record)
                sr.generate()
            else:
                rconf = {"recorder": recorder}
                record["kwargs"].update(rconf)
                ar = init_instance_by_config(record)
                ar.generate()
Esempio n. 2
0
def get_all_results(folders) -> dict:
    results = dict()
    for fn in folders:
        exp = R.get_exp(experiment_name=fn, create=False)
        recorders = exp.list_recorders()
        result = dict()
        result["annualized_return_with_cost"] = list()
        result["information_ratio_with_cost"] = list()
        result["max_drawdown_with_cost"] = list()
        result["ic"] = list()
        result["icir"] = list()
        result["rank_ic"] = list()
        result["rank_icir"] = list()
        for recorder_id in recorders:
            if recorders[recorder_id].status == "FINISHED":
                recorder = R.get_recorder(recorder_id=recorder_id, experiment_name=fn)
                metrics = recorder.list_metrics()
                result["annualized_return_with_cost"].append(metrics["excess_return_with_cost.annualized_return"])
                result["information_ratio_with_cost"].append(metrics["excess_return_with_cost.information_ratio"])
                result["max_drawdown_with_cost"].append(metrics["excess_return_with_cost.max_drawdown"])
                result["ic"].append(metrics["IC"])
                result["icir"].append(metrics["ICIR"])
                result["rank_ic"].append(metrics["Rank IC"])
                result["rank_icir"].append(metrics["Rank ICIR"])
        results[fn] = result
    return results
Esempio n. 3
0
def train_with_sigana():
    """train model followed by SigAnaRecord

    Returns
    -------
        pred_score: pandas.DataFrame
            predict scores
        performance: dict
            model performance
    """
    model = init_instance_by_config(task["model"])
    dataset = init_instance_by_config(task["dataset"])

    # start exp
    with R.start(experiment_name="workflow_with_sigana"):
        R.log_params(**flatten_dict(task))
        model.fit(dataset)

        # predict and calculate ic and ric
        recorder = R.get_recorder()
        sar = SigAnaRecord(recorder, model=model, dataset=dataset)
        sar.generate()
        ic = sar.load(sar.get_path("ic.pkl"))
        ric = sar.load(sar.get_path("ric.pkl"))
        pred_score = sar.load("pred.pkl")

        smr = SignalMseRecord(recorder)
        smr.generate()
        uri_path = R.get_uri()
    return pred_score, {"ic": ic, "ric": ric}, uri_path
Esempio n. 4
0
    def backtest(self):
        self._init_qlib()
        model = init_instance_by_config(self.task["model"])
        dataset = init_instance_by_config(self.task["dataset"])
        self._train_model(model, dataset)
        strategy_config = {
            "class": "TopkDropoutStrategy",
            "module_path": "qlib.contrib.strategy.signal_strategy",
            "kwargs": {
                "signal": (model, dataset),
                "topk": 50,
                "n_drop": 5,
            },
        }
        self.port_analysis_config["strategy"] = strategy_config
        self.port_analysis_config["backtest"]["benchmark"] = self.benchmark

        with R.start(experiment_name="backtest"):

            recorder = R.get_recorder()
            par = PortAnaRecord(
                recorder,
                self.port_analysis_config,
                risk_analysis_freq=["day", "30min", "5min"],
                indicator_analysis_freq=["day", "30min", "5min"],
                indicator_analysis_method="value_weighted",
            )
            par.generate()
Esempio n. 5
0
def train_with_sigana(uri_path: str = None):
    """train model followed by SigAnaRecord

    Returns
    -------
        pred_score: pandas.DataFrame
            predict scores
        performance: dict
            model performance
    """
    model = init_instance_by_config(CSI300_GBDT_TASK["model"])
    dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"])
    # start exp
    with R.start(experiment_name="workflow_with_sigana", uri=uri_path):
        R.log_params(**flatten_dict(CSI300_GBDT_TASK))
        model.fit(dataset)
        recorder = R.get_recorder()

        sr = SignalRecord(model, dataset, recorder)
        sr.generate()
        pred_score = sr.load("pred.pkl")

        # predict and calculate ic and ric
        sar = SigAnaRecord(recorder)
        sar.generate()
        ic = sar.load("ic.pkl")
        ric = sar.load("ric.pkl")

        uri_path = R.get_uri()
    return pred_score, {"ic": ic, "ric": ric}, uri_path
Esempio n. 6
0
def _exe_task(task_config: dict):
    rec = R.get_recorder()
    # model & dataset initiation
    model: Model = init_instance_by_config(task_config["model"])
    dataset: Dataset = init_instance_by_config(task_config["dataset"])
    # FIXME: resume reweighter after merging data selection
    # reweighter: Reweighter = task_config.get("reweighter", None)
    # model training
    # auto_filter_kwargs(model.fit)(dataset, reweighter=reweighter)
    model.fit(dataset)
    R.save_objects(**{"params.pkl": model})
    # this dataset is saved for online inference. So the concrete data should not be dumped
    dataset.config(dump_all=False, recursive=True)
    R.save_objects(**{"dataset": dataset})
    # fill placehorder
    placehorder_value = {"<MODEL>": model, "<DATASET>": dataset}
    task_config = fill_placeholder(task_config, placehorder_value)
    # generate records: prediction, backtest, and analysis
    records = task_config.get("record", [])
    if isinstance(records, dict):  # prevent only one dict
        records = [records]
    for record in records:
        # Some recorder require the parameter `model` and `dataset`.
        # try to automatically pass in them to the initialization function
        # to make defining the tasking easier
        r = init_instance_by_config(
            record,
            recorder=rec,
            default_module="qlib.workflow.record_temp",
            try_kwargs={
                "model": model,
                "dataset": dataset
            },
        )
        r.generate()
Esempio n. 7
0
    def _train_model(self, model, dataset):
        with R.start(experiment_name="train"):
            R.log_params(**flatten_dict(self.task))
            model.fit(dataset)
            R.save_objects(**{"params.pkl": model})

            # prediction
            recorder = R.get_recorder()
            sr = SignalRecord(model, dataset, recorder)
            sr.generate()
Esempio n. 8
0
def train_mse():
    model = init_instance_by_config(CSI300_GBDT_TASK["model"])
    dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"])
    with R.start(experiment_name="workflow"):
        R.log_params(**flatten_dict(CSI300_GBDT_TASK))
        model.fit(dataset)
        recorder = R.get_recorder()
        sr = SignalMseRecord(recorder, model=model, dataset=dataset)
        sr.generate()
        uri = R.get_uri()
    return uri
Esempio n. 9
0
def train_multiseg():
    model = init_instance_by_config(CSI300_GBDT_TASK["model"])
    dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"])
    with R.start(experiment_name="workflow"):
        R.log_params(**flatten_dict(CSI300_GBDT_TASK))
        model.fit(dataset)
        recorder = R.get_recorder()
        sr = MultiSegRecord(model, dataset, recorder)
        sr.generate(dict(valid="valid", test="test"), True)
        uri = R.get_uri()
    return uri
Esempio n. 10
0
def run_exp(task_config, dataset, experiment_name, recorder_name, uri):

    # model initiaiton
    print("")
    print("[{:}] - [{:}]: {:}".format(experiment_name, recorder_name, uri))
    print("dataset={:}".format(dataset))

    model = init_instance_by_config(task_config["model"])

    # start exp
    with R.start(experiment_name=experiment_name,
                 recorder_name=recorder_name,
                 uri=uri):

        log_file = R.get_recorder().root_uri / "{:}.log".format(
            experiment_name)
        set_log_basic_config(log_file)

        # train model
        R.log_params(**flatten_dict(task_config))
        model.fit(dataset)
        recorder = R.get_recorder()
        R.save_objects(**{"model.pkl": model})

        # generate records: prediction, backtest, and analysis
        for record in task_config["record"]:
            record = record.copy()
            if record["class"] == "SignalRecord":
                srconf = {
                    "model": model,
                    "dataset": dataset,
                    "recorder": recorder
                }
                record["kwargs"].update(srconf)
                sr = init_instance_by_config(record)
                sr.generate()
            else:
                rconf = {"recorder": recorder}
                record["kwargs"].update(rconf)
                ar = init_instance_by_config(record)
                ar.generate()
Esempio n. 11
0
    def __init__(self, recorder_id, experiment_id, provider_uri=r"E:\TDX\cjzq_tdx\vipdoc", region=REG_CN):
        self.record_id = recorder_id
        self.experiment_id = experiment_id

        qlib.init(provider_uri=provider_uri, region=region)

        R.set_uri('file:D:\\Code\\my_qlib\\examples\\mlruns')
        self.recorder = R.get_recorder(recorder_id=recorder_id, experiment_id=experiment_id)
        self.expr_dir = Path(self.recorder.uri[5:]).joinpath(experiment_id).joinpath(recorder_id)
        self.artifacts_dir = self.expr_dir.joinpath('artifacts')
        self.portfolio_dir = self.artifacts_dir.joinpath('portfolio_analysis')
        self.sig_dir = self.artifacts_dir.joinpath('sig_analysis')
Esempio n. 12
0
def begin_task_train(task_config: dict, experiment_name: str, recorder_name: str = None) -> Recorder:
    """
    Begin task training to start a recorder and save the task config.

    Args:
        task_config (dict): the config of a task
        experiment_name (str): the name of experiment
        recorder_name (str): the given name will be the recorder name. None for using rid.

    Returns:
        Recorder: the model recorder
    """
    with R.start(experiment_name=experiment_name, recorder_name=recorder_name):
        _log_task_info(task_config)
        return R.get_recorder()
Esempio n. 13
0
def train(uri_path: str = None):
    """train model

    Returns
    -------
        pred_score: pandas.DataFrame
            predict scores
        performance: dict
            model performance
    """

    # model initiaiton
    model = init_instance_by_config(CSI300_GBDT_TASK["model"])
    dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"])
    # To test __repr__
    print(dataset)
    print(R)

    # start exp
    with R.start(experiment_name="workflow", uri=uri_path):
        R.log_params(**flatten_dict(CSI300_GBDT_TASK))
        model.fit(dataset)
        R.save_objects(trained_model=model)
        # prediction
        recorder = R.get_recorder()
        # To test __repr__
        print(recorder)
        # To test get_local_dir
        print(recorder.get_local_dir())
        rid = recorder.id
        sr = SignalRecord(model, dataset, recorder)
        sr.generate()
        pred_score = sr.load("pred.pkl")

        # calculate ic and ric
        sar = SigAnaRecord(recorder)
        sar.generate()
        ic = sar.load("ic.pkl")
        ric = sar.load("ric.pkl")

    return pred_score, {"ic": ic, "ric": ric}, rid
Esempio n. 14
0
def task_train(task_config: dict, experiment_name: str, recorder_name: str = None) -> Recorder:
    """
    Task based training, will be divided into two steps.

    Parameters
    ----------
    task_config : dict
        The config of a task.
    experiment_name: str
        The name of experiment
    recorder_name: str
        The name of recorder

    Returns
    ----------
    Recorder: The instance of the recorder
    """
    with R.start(experiment_name=experiment_name, recorder_name=recorder_name):
        _log_task_info(task_config)
        _exe_task(task_config)
        return R.get_recorder()
Esempio n. 15
0
def begin_task_train(task_config: dict,
                     experiment_name: str,
                     recorder_name: str = None) -> Recorder:
    """
    Begin task training to start a recorder and save the task config.

    Args:
        task_config (dict): the config of a task
        experiment_name (str): the name of experiment
        recorder_name (str): the given name will be the recorder name. None for using rid.

    Returns:
        Recorder: the model recorder
    """
    with R.start(experiment_name=experiment_name, recorder_name=recorder_name):
        R.log_params(**flatten_dict(task_config))
        R.save_objects(**{"task": task_config
                          })  # keep the original format and datatype
        R.set_tags(**{"hostname": socket.gethostname()})
        recorder: Recorder = R.get_recorder()
    return recorder
Esempio n. 16
0
def train():
    """train model

    Returns
    -------
        pred_score: pandas.DataFrame
            predict scores
        performance: dict
            model performance
    """

    # model initiaiton
    model = init_instance_by_config(task["model"])
    dataset = init_instance_by_config(task["dataset"])
    # To test __repr__
    print(dataset)
    print(R)

    # start exp
    with R.start(experiment_name="workflow"):
        R.log_params(**flatten_dict(task))
        model.fit(dataset)

        # prediction
        recorder = R.get_recorder()
        # To test __repr__
        print(recorder)
        rid = recorder.id
        sr = SignalRecord(model, dataset, recorder)
        sr.generate()
        pred_score = sr.load()

        # calculate ic and ric
        sar = SigAnaRecord(recorder)
        sar.generate()
        ic = sar.load(sar.get_path("ic.pkl"))
        ric = sar.load(sar.get_path("ric.pkl"))

    return pred_score, {"ic": ic, "ric": ric}, rid
Esempio n. 17
0
def task_train(task_config: dict, experiment_name):
    """
    task based training

    Parameters
    ----------
    task_config : dict
        A dict describes a task setting.
    """

    # model initiaiton
    model = init_instance_by_config(task_config["model"])
    dataset = init_instance_by_config(task_config["dataset"])

    # start exp
    with R.start(experiment_name=experiment_name):
        # train model
        R.log_params(**flatten_dict(task_config))
        model.fit(dataset)
        recorder = R.get_recorder()
        R.save_objects(**{"params.pkl": model})

        # generate records: prediction, backtest, and analysis
        for record in task_config["record"]:
            if record["class"] == SignalRecord.__name__:
                srconf = {
                    "model": model,
                    "dataset": dataset,
                    "recorder": recorder
                }
                record["kwargs"].update(srconf)
                sr = init_instance_by_config(record)
                sr.generate()
            else:
                rconf = {"recorder": recorder}
                record["kwargs"].update(rconf)
                ar = init_instance_by_config(record)
                ar.generate()
Esempio n. 18
0
def backtest_analysis(pred, rid):
    """backtest and analysis

    Parameters
    ----------
    pred : pandas.DataFrame
        predict scores
    rid : str
        the id of the recorder to be used in this function

    Returns
    -------
    analysis : pandas.DataFrame
        the analysis result

    """
    recorder = R.get_recorder(experiment_name="workflow", recorder_id=rid)
    # backtest
    par = PortAnaRecord(recorder, port_analysis_config)
    par.generate()
    analysis_df = par.load(par.get_path("port_analysis.pkl"))
    print(analysis_df)
    return analysis_df
Esempio n. 19
0
def get_all_results(folders) -> dict:
    results = dict()
    for fn in folders:
        try:
            exp = R.get_exp(experiment_name=fn, create=False)
        except ValueError:
            # No experiment results
            continue
        recorders = exp.list_recorders()
        result = dict()
        result["annualized_return_with_cost"] = list()
        result["information_ratio_with_cost"] = list()
        result["max_drawdown_with_cost"] = list()
        result["ic"] = list()
        result["icir"] = list()
        result["rank_ic"] = list()
        result["rank_icir"] = list()
        for recorder_id in recorders:
            if recorders[recorder_id].status == "FINISHED":
                recorder = R.get_recorder(recorder_id=recorder_id,
                                          experiment_name=fn)
                metrics = recorder.list_metrics()
                if "1day.excess_return_with_cost.annualized_return" not in metrics:
                    print(f"{recorder_id} is skipped due to incomplete result")
                    continue
                result["annualized_return_with_cost"].append(
                    metrics["1day.excess_return_with_cost.annualized_return"])
                result["information_ratio_with_cost"].append(
                    metrics["1day.excess_return_with_cost.information_ratio"])
                result["max_drawdown_with_cost"].append(
                    metrics["1day.excess_return_with_cost.max_drawdown"])
                result["ic"].append(metrics["IC"])
                result["icir"].append(metrics["ICIR"])
                result["rank_ic"].append(metrics["Rank IC"])
                result["rank_icir"].append(metrics["Rank ICIR"])
        results[fn] = result
    return results
Esempio n. 20
0
def main(xargs):
    dataset_config = {
        "class": "DatasetH",
        "module_path": "qlib.data.dataset",
        "kwargs": {
            "handler": {
                "class": "Alpha360",
                "module_path": "qlib.contrib.data.handler",
                "kwargs": {
                    "start_time":
                    "2008-01-01",
                    "end_time":
                    "2020-08-01",
                    "fit_start_time":
                    "2008-01-01",
                    "fit_end_time":
                    "2014-12-31",
                    "instruments":
                    xargs.market,
                    "infer_processors": [
                        {
                            "class": "RobustZScoreNorm",
                            "kwargs": {
                                "fields_group": "feature",
                                "clip_outlier": True
                            }
                        },
                        {
                            "class": "Fillna",
                            "kwargs": {
                                "fields_group": "feature"
                            }
                        },
                    ],
                    "learn_processors": [
                        {
                            "class": "DropnaLabel"
                        },
                        {
                            "class": "CSRankNorm",
                            "kwargs": {
                                "fields_group": "label"
                            }
                        },
                    ],
                    "label": ["Ref($close, -2) / Ref($close, -1) - 1"],
                },
            },
            "segments": {
                "train": ("2008-01-01", "2014-12-31"),
                "valid": ("2015-01-01", "2016-12-31"),
                "test": ("2017-01-01", "2020-08-01"),
            },
        },
    }

    model_config = {
        "class": "QuantTransformer",
        "module_path": "trade_models",
        "kwargs": {
            "loss": "mse",
            "GPU": "0",
            "metric": "loss",
        },
    }

    task = {"model": model_config, "dataset": dataset_config}

    model = init_instance_by_config(model_config)
    dataset = init_instance_by_config(dataset_config)

    # start exp to train model
    with R.start(experiment_name="train_tt_model"):
        R.log_params(**flatten_dict(task))
        model.fit(dataset)
        R.save_objects(trained_model=model)

        # prediction
        recorder = R.get_recorder()
        print(recorder)
        sr = SignalRecord(model, dataset, recorder)
        sr.generate()

        # backtest. If users want to use backtest based on their own prediction,
        # please refer to https://qlib.readthedocs.io/en/latest/component/recorder.html#record-template.
        par = PortAnaRecord(recorder, port_analysis_config)
        par.generate()
Esempio n. 21
0
def backtest_analysis(pred, rid, uri_path: str = None):
    """backtest and analysis

    Parameters
    ----------
    rid : str
        the id of the recorder to be used in this function
    uri_path: str
        mlflow uri path

    Returns
    -------
    analysis : pandas.DataFrame
        the analysis result

    """
    with R.uri_context(uri=uri_path):
        recorder = R.get_recorder(experiment_name="workflow", recorder_id=rid)

    dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"])
    model = recorder.load_object("trained_model")

    port_analysis_config = {
        "executor": {
            "class": "SimulatorExecutor",
            "module_path": "qlib.backtest.executor",
            "kwargs": {
                "time_per_step": "day",
                "generate_portfolio_metrics": True,
            },
        },
        "strategy": {
            "class": "TopkDropoutStrategy",
            "module_path": "qlib.contrib.strategy.signal_strategy",
            "kwargs": {
                "signal": (model, dataset),
                "topk": 50,
                "n_drop": 5,
            },
        },
        "backtest": {
            "start_time": "2017-01-01",
            "end_time": "2020-08-01",
            "account": 100000000,
            "benchmark": CSI300_BENCH,
            "exchange_kwargs": {
                "freq": "day",
                "limit_threshold": 0.095,
                "deal_price": "close",
                "open_cost": 0.0005,
                "close_cost": 0.0015,
                "min_cost": 5,
            },
        },
    }
    # backtest
    par = PortAnaRecord(recorder,
                        port_analysis_config,
                        risk_analysis_freq="day")
    par.generate()
    analysis_df = par.load("port_analysis_1day.pkl")
    print(analysis_df)
    return analysis_df
Esempio n. 22
0
            "close_cost": 0.0015,
            "min_cost": 5,
            "return_order": True,
        },
    }

    # model initialization
    model = init_instance_by_config(task["model"])
    dataset = init_instance_by_config(task["dataset"])

    # NOTE: This line is optional
    # It demonstrates that the dataset can be used standalone.
    example_df = dataset.prepare("train")
    print(example_df.head())

    # start exp
    with R.start(experiment_name="workflow"):
        R.log_params(**flatten_dict(task))
        model.fit(dataset)
        R.save_objects(**{"params.pkl": model})

        # prediction
        recorder = R.get_recorder()
        sr = SignalRecord(model, dataset, recorder)
        sr.generate()

        # backtest. If users want to use backtest based on their own prediction,
        # please refer to https://qlib.readthedocs.io/en/latest/component/recorder.html#record-template.
        par = PortAnaRecord(recorder, port_analysis_config)
        par.generate()
Esempio n. 23
0
    def backtest_only_daily(self):
        """
        This backtest is used for comparing the nested execution and single layer execution
        Due to the low quality daily-level and miniute-level data, they are hardly comparable.
        So it is used for detecting serious bugs which make the results different greatly.

        .. code-block:: shell

            [1724971:MainThread](2021-12-07 16:24:31,156) INFO - qlib.workflow - [record_temp.py:441] - Portfolio analysis record 'port_analysis_1day.pkl'
            has been saved as the artifact of the Experiment 2
            'The following are analysis results of benchmark return(1day).'
                                   risk
            mean               0.000651
            std                0.012472
            annualized_return  0.154967
            information_ratio  0.805422
            max_drawdown      -0.160445
            'The following are analysis results of the excess return without cost(1day).'
                                   risk
            mean               0.001375
            std                0.006103
            annualized_return  0.327204
            information_ratio  3.475016
            max_drawdown      -0.024927
            'The following are analysis results of the excess return with cost(1day).'
                                   risk
            mean               0.001184
            std                0.006091
            annualized_return  0.281801
            information_ratio  2.998749
            max_drawdown      -0.029568
            [1724971:MainThread](2021-12-07 16:24:31,170) INFO - qlib.workflow - [record_temp.py:466] - Indicator analysis record 'indicator_analysis_1day.
            pkl' has been saved as the artifact of the Experiment 2
            'The following are analysis results of indicators(1day).'
                 value
            ffr    1.0
            pa     0.0
            pos    0.0
            [1724971:MainThread](2021-12-07 16:24:31,188) INFO - qlib.timer - [log.py:113] - Time cost: 0.007s | waiting `async_log` Done

        """
        self._init_qlib()
        model = init_instance_by_config(self.task["model"])
        dataset = init_instance_by_config(self.task["dataset"])
        self._train_model(model, dataset)
        strategy_config = {
            "class": "TopkDropoutStrategy",
            "module_path": "qlib.contrib.strategy.signal_strategy",
            "kwargs": {
                "signal": (model, dataset),
                "topk": 50,
                "n_drop": 5,
            },
        }
        pa_conf = deepcopy(self.port_analysis_config)
        pa_conf["strategy"] = strategy_config
        pa_conf["executor"] = {
            "class": "SimulatorExecutor",
            "module_path": "qlib.backtest.executor",
            "kwargs": {
                "time_per_step": "day",
                "generate_portfolio_metrics": True,
                "verbose": True,
            },
        }
        pa_conf["backtest"]["benchmark"] = self.benchmark

        with R.start(experiment_name="backtest"):
            recorder = R.get_recorder()
            par = PortAnaRecord(recorder, pa_conf)
            par.generate()
Esempio n. 24
0
def run_exp(
    task_config,
    dataset,
    experiment_name,
    recorder_name,
    uri,
    model_obj_name="model.pkl",
):

    model = init_instance_by_config(task_config["model"])
    model_fit_kwargs = dict(dataset=dataset)

    # Let's start the experiment.
    with R.start(
            experiment_name=experiment_name,
            recorder_name=recorder_name,
            uri=uri,
            resume=True,
    ):
        # Setup log
        recorder_root_dir = R.get_recorder().get_local_dir()
        log_file = os.path.join(recorder_root_dir,
                                "{:}.log".format(experiment_name))

        set_log_basic_config(log_file)
        logger = get_module_logger("q.run_exp")
        logger.info("task_config::\n{:}".format(
            pprint.pformat(task_config, indent=2)))
        logger.info("[{:}] - [{:}]: {:}".format(experiment_name, recorder_name,
                                                uri))
        logger.info("dataset={:}".format(dataset))

        # Train model
        try:
            if hasattr(model, "to"):  # Recoverable model
                ori_device = model.device
                model = R.load_object(model_obj_name)
                model.to(ori_device)
            else:
                model = R.load_object(model_obj_name)
            logger.info(
                "[Find existing object from {:}]".format(model_obj_name))
        except OSError:
            R.log_params(**flatten_dict(update_gpu(task_config, None)))
            if "save_path" in inspect.getfullargspec(model.fit).args:
                model_fit_kwargs["save_path"] = os.path.join(
                    recorder_root_dir, "model.ckp")
            elif "save_dir" in inspect.getfullargspec(model.fit).args:
                model_fit_kwargs["save_dir"] = os.path.join(
                    recorder_root_dir, "model-ckps")
            model.fit(**model_fit_kwargs)
            # remove model to CPU for saving
            if hasattr(model, "to"):
                old_device = model.device
                model.to("cpu")
                R.save_objects(**{model_obj_name: model})
                model.to(old_device)
            else:
                R.save_objects(**{model_obj_name: model})
        except Exception as e:
            raise ValueError("Something wrong: {:}".format(e))
        # Get the recorder
        recorder = R.get_recorder()

        # Generate records: prediction, backtest, and analysis
        for record in task_config["record"]:
            record = deepcopy(record)
            if record["class"] == "MultiSegRecord":
                record["kwargs"] = dict(model=model,
                                        dataset=dataset,
                                        recorder=recorder)
                sr = init_instance_by_config(record)
                sr.generate(**record["generate_kwargs"])
            elif record["class"] == "SignalRecord":
                srconf = {
                    "model": model,
                    "dataset": dataset,
                    "recorder": recorder
                }
                record["kwargs"].update(srconf)
                sr = init_instance_by_config(record)
                sr.generate()
            else:
                rconf = {"recorder": recorder}
                record["kwargs"].update(rconf)
                ar = init_instance_by_config(record)
                ar.generate()