예제 #1
0
파일: ensemble.py 프로젝트: yutiansut/qlib
    def __call__(self, ensemble_dict: dict) -> pd.DataFrame:
        """using sample:
        from qlib.model.ens.ensemble import AverageEnsemble
        pred_res['new_key_name'] = AverageEnsemble()(predict_dict)

        Parameters
        ----------
        ensemble_dict : dict
            Dictionary you want to ensemble

        Returns
        -------
        pd.DataFrame
            The dictionary including ensenbling result
        """
        # need to flatten the nested dict
        ensemble_dict = flatten_dict(ensemble_dict, sep=FLATTEN_TUPLE)
        values = list(ensemble_dict.values())
        # NOTE: this may change the style underlying data!!!!
        # from pd.DataFrame to pd.Series
        results = pd.concat(values, axis=1)
        results = results.groupby("datetime").apply(
            lambda df: (df - df.mean()) / df.std())
        results = results.mean(axis=1)
        results = results.sort_index()
        return results
예제 #2
0
def train_with_sigana(uri_path: str = None):
    """train model followed by SigAnaRecord

    Returns
    -------
        pred_score: pandas.DataFrame
            predict scores
        performance: dict
            model performance
    """
    model = init_instance_by_config(CSI300_GBDT_TASK["model"])
    dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"])
    # start exp
    with R.start(experiment_name="workflow_with_sigana", uri=uri_path):
        R.log_params(**flatten_dict(CSI300_GBDT_TASK))
        model.fit(dataset)
        recorder = R.get_recorder()

        sr = SignalRecord(model, dataset, recorder)
        sr.generate()
        pred_score = sr.load("pred.pkl")

        # predict and calculate ic and ric
        sar = SigAnaRecord(recorder)
        sar.generate()
        ic = sar.load("ic.pkl")
        ric = sar.load("ric.pkl")

        uri_path = R.get_uri()
    return pred_score, {"ic": ic, "ric": ric}, uri_path
예제 #3
0
def train_with_sigana():
    """train model followed by SigAnaRecord

    Returns
    -------
        pred_score: pandas.DataFrame
            predict scores
        performance: dict
            model performance
    """
    model = init_instance_by_config(task["model"])
    dataset = init_instance_by_config(task["dataset"])

    # start exp
    with R.start(experiment_name="workflow_with_sigana"):
        R.log_params(**flatten_dict(task))
        model.fit(dataset)

        # predict and calculate ic and ric
        recorder = R.get_recorder()
        sar = SigAnaRecord(recorder, model=model, dataset=dataset)
        sar.generate()
        ic = sar.load(sar.get_path("ic.pkl"))
        ric = sar.load(sar.get_path("ric.pkl"))
        pred_score = sar.load("pred.pkl")

        smr = SignalMseRecord(recorder)
        smr.generate()
        uri_path = R.get_uri()
    return pred_score, {"ic": ic, "ric": ric}, uri_path
예제 #4
0
 def __call__(self, ensemble_dict: dict) -> pd.DataFrame:
     # need to flatten the nested dict
     ensemble_dict = flatten_dict(ensemble_dict, sep=FLATTEN_TUPLE)
     values = list(ensemble_dict.values())
     results = pd.concat(values, axis=1)
     results = results.groupby("datetime").apply(
         lambda df: (df - df.mean()) / df.std())
     results = results.mean(axis=1)
     results = results.sort_index()
     return results
예제 #5
0
    def _train_model(self, model, dataset):
        with R.start(experiment_name="train"):
            R.log_params(**flatten_dict(self.task))
            model.fit(dataset)
            R.save_objects(**{"params.pkl": model})

            # prediction
            recorder = R.get_recorder()
            sr = SignalRecord(model, dataset, recorder)
            sr.generate()
예제 #6
0
def train_mse():
    model = init_instance_by_config(CSI300_GBDT_TASK["model"])
    dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"])
    with R.start(experiment_name="workflow"):
        R.log_params(**flatten_dict(CSI300_GBDT_TASK))
        model.fit(dataset)
        recorder = R.get_recorder()
        sr = SignalMseRecord(recorder, model=model, dataset=dataset)
        sr.generate()
        uri = R.get_uri()
    return uri
예제 #7
0
def train_multiseg():
    model = init_instance_by_config(CSI300_GBDT_TASK["model"])
    dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"])
    with R.start(experiment_name="workflow"):
        R.log_params(**flatten_dict(CSI300_GBDT_TASK))
        model.fit(dataset)
        recorder = R.get_recorder()
        sr = MultiSegRecord(model, dataset, recorder)
        sr.generate(dict(valid="valid", test="test"), True)
        uri = R.get_uri()
    return uri
예제 #8
0
def run_exp(task_config, dataset, experiment_name, recorder_name, uri):

    model = init_instance_by_config(task_config["model"])
    model_fit_kwargs = dict(dataset=dataset)

    # Let's start the experiment.
    with R.start(
            experiment_name=experiment_name,
            recorder_name=recorder_name,
            uri=uri,
            resume=True,
    ):
        # Setup log
        recorder_root_dir = R.get_recorder().get_local_dir()
        log_file = os.path.join(recorder_root_dir,
                                "{:}.log".format(experiment_name))
        set_log_basic_config(log_file)
        logger = get_module_logger("q.run_exp")
        logger.info("task_config::\n{:}".format(
            pprint.pformat(task_config, indent=2)))
        logger.info("[{:}] - [{:}]: {:}".format(experiment_name, recorder_name,
                                                uri))
        logger.info("dataset={:}".format(dataset))

        # Train model
        R.log_params(**flatten_dict(task_config))
        if "save_path" in inspect.getfullargspec(model.fit).args:
            model_fit_kwargs["save_path"] = os.path.join(
                recorder_root_dir, "model.ckp")
        elif "save_dir" in inspect.getfullargspec(model.fit).args:
            model_fit_kwargs["save_dir"] = os.path.join(
                recorder_root_dir, "model-ckps")
        model.fit(**model_fit_kwargs)
        # Get the recorder
        recorder = R.get_recorder()
        R.save_objects(**{"model.pkl": model})

        # Generate records: prediction, backtest, and analysis
        for record in task_config["record"]:
            record = record.copy()
            if record["class"] == "SignalRecord":
                srconf = {
                    "model": model,
                    "dataset": dataset,
                    "recorder": recorder
                }
                record["kwargs"].update(srconf)
                sr = init_instance_by_config(record)
                sr.generate()
            else:
                rconf = {"recorder": recorder}
                record["kwargs"].update(rconf)
                ar = init_instance_by_config(record)
                ar.generate()
예제 #9
0
def fake_experiment():
    """A fake experiment workflow to test uri

    Returns
    -------
        pass_or_not_for_default_uri: bool
        pass_or_not_for_current_uri: bool
        temporary_exp_dir: str
    """

    # start exp
    default_uri = R.get_uri()
    current_uri = "file:./temp-test-exp-mag"
    with R.start(experiment_name="fake_workflow_for_expm", uri=current_uri):
        R.log_params(**flatten_dict(CSI300_GBDT_TASK))

        current_uri_to_check = R.get_uri()
    default_uri_to_check = R.get_uri()
    return default_uri == default_uri_to_check, current_uri == current_uri_to_check, current_uri
예제 #10
0
def train(uri_path: str = None):
    """train model

    Returns
    -------
        pred_score: pandas.DataFrame
            predict scores
        performance: dict
            model performance
    """

    # model initiaiton
    model = init_instance_by_config(CSI300_GBDT_TASK["model"])
    dataset = init_instance_by_config(CSI300_GBDT_TASK["dataset"])
    # To test __repr__
    print(dataset)
    print(R)

    # start exp
    with R.start(experiment_name="workflow", uri=uri_path):
        R.log_params(**flatten_dict(CSI300_GBDT_TASK))
        model.fit(dataset)
        R.save_objects(trained_model=model)
        # prediction
        recorder = R.get_recorder()
        # To test __repr__
        print(recorder)
        # To test get_local_dir
        print(recorder.get_local_dir())
        rid = recorder.id
        sr = SignalRecord(model, dataset, recorder)
        sr.generate()
        pred_score = sr.load("pred.pkl")

        # calculate ic and ric
        sar = SigAnaRecord(recorder)
        sar.generate()
        ic = sar.load("ic.pkl")
        ric = sar.load("ric.pkl")

    return pred_score, {"ic": ic, "ric": ric}, rid
예제 #11
0
def run_exp(task_config, dataset, experiment_name, recorder_name, uri):

    # model initiaiton
    print("")
    print("[{:}] - [{:}]: {:}".format(experiment_name, recorder_name, uri))
    print("dataset={:}".format(dataset))

    model = init_instance_by_config(task_config["model"])

    # start exp
    with R.start(experiment_name=experiment_name,
                 recorder_name=recorder_name,
                 uri=uri):

        log_file = R.get_recorder().root_uri / "{:}.log".format(
            experiment_name)
        set_log_basic_config(log_file)

        # train model
        R.log_params(**flatten_dict(task_config))
        model.fit(dataset)
        recorder = R.get_recorder()
        R.save_objects(**{"model.pkl": model})

        # generate records: prediction, backtest, and analysis
        for record in task_config["record"]:
            record = record.copy()
            if record["class"] == "SignalRecord":
                srconf = {
                    "model": model,
                    "dataset": dataset,
                    "recorder": recorder
                }
                record["kwargs"].update(srconf)
                sr = init_instance_by_config(record)
                sr.generate()
            else:
                rconf = {"recorder": recorder}
                record["kwargs"].update(rconf)
                ar = init_instance_by_config(record)
                ar.generate()
예제 #12
0
파일: trainer.py 프로젝트: ycl010203/qlib
def begin_task_train(task_config: dict,
                     experiment_name: str,
                     recorder_name: str = None) -> Recorder:
    """
    Begin task training to start a recorder and save the task config.

    Args:
        task_config (dict): the config of a task
        experiment_name (str): the name of experiment
        recorder_name (str): the given name will be the recorder name. None for using rid.

    Returns:
        Recorder: the model recorder
    """
    with R.start(experiment_name=experiment_name, recorder_name=recorder_name):
        R.log_params(**flatten_dict(task_config))
        R.save_objects(**{"task": task_config
                          })  # keep the original format and datatype
        R.set_tags(**{"hostname": socket.gethostname()})
        recorder: Recorder = R.get_recorder()
    return recorder
예제 #13
0
def train():
    """train model

    Returns
    -------
        pred_score: pandas.DataFrame
            predict scores
        performance: dict
            model performance
    """

    # model initiaiton
    model = init_instance_by_config(task["model"])
    dataset = init_instance_by_config(task["dataset"])
    # To test __repr__
    print(dataset)
    print(R)

    # start exp
    with R.start(experiment_name="workflow"):
        R.log_params(**flatten_dict(task))
        model.fit(dataset)

        # prediction
        recorder = R.get_recorder()
        # To test __repr__
        print(recorder)
        rid = recorder.id
        sr = SignalRecord(model, dataset, recorder)
        sr.generate()
        pred_score = sr.load()

        # calculate ic and ric
        sar = SigAnaRecord(recorder)
        sar.generate()
        ic = sar.load(sar.get_path("ic.pkl"))
        ric = sar.load(sar.get_path("ric.pkl"))

    return pred_score, {"ic": ic, "ric": ric}, rid
예제 #14
0
파일: trainer.py 프로젝트: yzh119/qlib
def task_train(task_config: dict, experiment_name):
    """
    task based training

    Parameters
    ----------
    task_config : dict
        A dict describes a task setting.
    """

    # model initiaiton
    model = init_instance_by_config(task_config["model"])
    dataset = init_instance_by_config(task_config["dataset"])

    # start exp
    with R.start(experiment_name=experiment_name):
        # train model
        R.log_params(**flatten_dict(task_config))
        model.fit(dataset)
        recorder = R.get_recorder()
        R.save_objects(**{"params.pkl": model})

        # generate records: prediction, backtest, and analysis
        for record in task_config["record"]:
            if record["class"] == SignalRecord.__name__:
                srconf = {
                    "model": model,
                    "dataset": dataset,
                    "recorder": recorder
                }
                record["kwargs"].update(srconf)
                sr = init_instance_by_config(record)
                sr.generate()
            else:
                rconf = {"recorder": recorder}
                record["kwargs"].update(rconf)
                ar = init_instance_by_config(record)
                ar.generate()
예제 #15
0
def main(xargs):
    dataset_config = {
        "class": "DatasetH",
        "module_path": "qlib.data.dataset",
        "kwargs": {
            "handler": {
                "class": "Alpha360",
                "module_path": "qlib.contrib.data.handler",
                "kwargs": {
                    "start_time":
                    "2008-01-01",
                    "end_time":
                    "2020-08-01",
                    "fit_start_time":
                    "2008-01-01",
                    "fit_end_time":
                    "2014-12-31",
                    "instruments":
                    xargs.market,
                    "infer_processors": [
                        {
                            "class": "RobustZScoreNorm",
                            "kwargs": {
                                "fields_group": "feature",
                                "clip_outlier": True
                            }
                        },
                        {
                            "class": "Fillna",
                            "kwargs": {
                                "fields_group": "feature"
                            }
                        },
                    ],
                    "learn_processors": [
                        {
                            "class": "DropnaLabel"
                        },
                        {
                            "class": "CSRankNorm",
                            "kwargs": {
                                "fields_group": "label"
                            }
                        },
                    ],
                    "label": ["Ref($close, -2) / Ref($close, -1) - 1"],
                },
            },
            "segments": {
                "train": ("2008-01-01", "2014-12-31"),
                "valid": ("2015-01-01", "2016-12-31"),
                "test": ("2017-01-01", "2020-08-01"),
            },
        },
    }

    model_config = {
        "class": "QuantTransformer",
        "module_path": "trade_models",
        "kwargs": {
            "loss": "mse",
            "GPU": "0",
            "metric": "loss",
        },
    }

    task = {"model": model_config, "dataset": dataset_config}

    model = init_instance_by_config(model_config)
    dataset = init_instance_by_config(dataset_config)

    # start exp to train model
    with R.start(experiment_name="train_tt_model"):
        R.log_params(**flatten_dict(task))
        model.fit(dataset)
        R.save_objects(trained_model=model)

        # prediction
        recorder = R.get_recorder()
        print(recorder)
        sr = SignalRecord(model, dataset, recorder)
        sr.generate()

        # backtest. If users want to use backtest based on their own prediction,
        # please refer to https://qlib.readthedocs.io/en/latest/component/recorder.html#record-template.
        par = PortAnaRecord(recorder, port_analysis_config)
        par.generate()
예제 #16
0
            "close_cost": 0.0015,
            "min_cost": 5,
            "return_order": True,
        },
    }

    # model initialization
    model = init_instance_by_config(task["model"])
    dataset = init_instance_by_config(task["dataset"])

    # NOTE: This line is optional
    # It demonstrates that the dataset can be used standalone.
    example_df = dataset.prepare("train")
    print(example_df.head())

    # start exp
    with R.start(experiment_name="workflow"):
        R.log_params(**flatten_dict(task))
        model.fit(dataset)
        R.save_objects(**{"params.pkl": model})

        # prediction
        recorder = R.get_recorder()
        sr = SignalRecord(model, dataset, recorder)
        sr.generate()

        # backtest. If users want to use backtest based on their own prediction,
        # please refer to https://qlib.readthedocs.io/en/latest/component/recorder.html#record-template.
        par = PortAnaRecord(recorder, port_analysis_config)
        par.generate()
예제 #17
0
                "deal_price": "close",
                "open_cost": 0.0005,
                "close_cost": 0.0015,
                "min_cost": 5,
            },
        },
    }

    # NOTE: This line is optional
    # It demonstrates that the dataset can be used standalone.
    example_df = dataset.prepare("train")
    print(example_df.head())

    # start exp
    with R.start(experiment_name="workflow"):
        R.log_params(**flatten_dict(CSI300_GBDT_TASK))
        model.fit(dataset)
        R.save_objects(**{"params.pkl": model})

        # prediction
        recorder = R.get_recorder()
        sr = SignalRecord(model, dataset, recorder)
        sr.generate()

        # Signal Analysis
        sar = SigAnaRecord(recorder)
        sar.generate()

        # backtest. If users want to use backtest based on their own prediction,
        # please refer to https://qlib.readthedocs.io/en/latest/component/recorder.html#record-template.
        par = PortAnaRecord(recorder, port_analysis_config, "day")
예제 #18
0
def run_exp(
    task_config,
    dataset,
    experiment_name,
    recorder_name,
    uri,
    model_obj_name="model.pkl",
):

    model = init_instance_by_config(task_config["model"])
    model_fit_kwargs = dict(dataset=dataset)

    # Let's start the experiment.
    with R.start(
            experiment_name=experiment_name,
            recorder_name=recorder_name,
            uri=uri,
            resume=True,
    ):
        # Setup log
        recorder_root_dir = R.get_recorder().get_local_dir()
        log_file = os.path.join(recorder_root_dir,
                                "{:}.log".format(experiment_name))

        set_log_basic_config(log_file)
        logger = get_module_logger("q.run_exp")
        logger.info("task_config::\n{:}".format(
            pprint.pformat(task_config, indent=2)))
        logger.info("[{:}] - [{:}]: {:}".format(experiment_name, recorder_name,
                                                uri))
        logger.info("dataset={:}".format(dataset))

        # Train model
        try:
            if hasattr(model, "to"):  # Recoverable model
                ori_device = model.device
                model = R.load_object(model_obj_name)
                model.to(ori_device)
            else:
                model = R.load_object(model_obj_name)
            logger.info(
                "[Find existing object from {:}]".format(model_obj_name))
        except OSError:
            R.log_params(**flatten_dict(update_gpu(task_config, None)))
            if "save_path" in inspect.getfullargspec(model.fit).args:
                model_fit_kwargs["save_path"] = os.path.join(
                    recorder_root_dir, "model.ckp")
            elif "save_dir" in inspect.getfullargspec(model.fit).args:
                model_fit_kwargs["save_dir"] = os.path.join(
                    recorder_root_dir, "model-ckps")
            model.fit(**model_fit_kwargs)
            # remove model to CPU for saving
            if hasattr(model, "to"):
                old_device = model.device
                model.to("cpu")
                R.save_objects(**{model_obj_name: model})
                model.to(old_device)
            else:
                R.save_objects(**{model_obj_name: model})
        except Exception as e:
            raise ValueError("Something wrong: {:}".format(e))
        # Get the recorder
        recorder = R.get_recorder()

        # Generate records: prediction, backtest, and analysis
        for record in task_config["record"]:
            record = deepcopy(record)
            if record["class"] == "MultiSegRecord":
                record["kwargs"] = dict(model=model,
                                        dataset=dataset,
                                        recorder=recorder)
                sr = init_instance_by_config(record)
                sr.generate(**record["generate_kwargs"])
            elif record["class"] == "SignalRecord":
                srconf = {
                    "model": model,
                    "dataset": dataset,
                    "recorder": recorder
                }
                record["kwargs"].update(srconf)
                sr = init_instance_by_config(record)
                sr.generate()
            else:
                rconf = {"recorder": recorder}
                record["kwargs"].update(rconf)
                ar = init_instance_by_config(record)
                ar.generate()
예제 #19
0
파일: trainer.py 프로젝트: you-n-g/qlib
def _log_task_info(task_config: dict):
    R.log_params(**flatten_dict(task_config))
    R.save_objects(**{"task":
                      task_config})  # keep the original format and datatype
    R.set_tags(**{"hostname": socket.gethostname()})