Ejemplo n.º 1
0
def end_task_train(rec: Recorder, experiment_name: str) -> Recorder:
    """
    Finish task training with real model fitting and saving.

    Args:
        rec (Recorder): the recorder will be resumed
        experiment_name (str): the name of experiment

    Returns:
        Recorder: the model recorder
    """
    with R.start(experiment_name=experiment_name,
                 recorder_id=rec.info["id"],
                 resume=True):
        task_config = R.load_object("task")
        # model & dataset initiation
        model: Model = init_instance_by_config(task_config["model"])
        dataset: Dataset = init_instance_by_config(task_config["dataset"])
        # model training
        model.fit(dataset)
        R.save_objects(**{"params.pkl": model})
        # this dataset is saved for online inference. So the concrete data should not be dumped
        dataset.config(dump_all=False, recursive=True)
        R.save_objects(**{"dataset": dataset})
        # fill placehorder
        placehorder_value = {"<MODEL>": model, "<DATASET>": dataset}
        task_config = fill_placeholder(task_config, placehorder_value)
        # generate records: prediction, backtest, and analysis
        records = task_config.get("record", [])
        if isinstance(records, dict):  # prevent only one dict
            records = [records]
        for record in records:
            r = init_instance_by_config(record, recorder=rec)
            r.generate()
    return rec
Ejemplo n.º 2
0
def end_task_train(rec: Recorder, experiment_name: str) -> Recorder:
    """
    Finish task training with real model fitting and saving.

    Args:
        rec (Recorder): the recorder will be resumed
        experiment_name (str): the name of experiment

    Returns:
        Recorder: the model recorder
    """
    with R.start(experiment_name=experiment_name, recorder_id=rec.info["id"], resume=True):
        task_config = R.load_object("task")
        _exe_task(task_config)
    return rec
Ejemplo n.º 3
0
def end_task_train(rec: Recorder, experiment_name: str) -> Recorder:
    """
    Finish task training with real model fitting and saving.

    Args:
        rec (Recorder): the recorder will be resumed
        experiment_name (str): the name of experiment

    Returns:
        Recorder: the model recorder
    """
    with R.start(experiment_name=experiment_name,
                 recorder_id=rec.info["id"],
                 resume=True):
        task_config = R.load_object("task")
        # model & dataset initiation
        model: Model = init_instance_by_config(task_config["model"])
        dataset: Dataset = init_instance_by_config(task_config["dataset"])
        # model training
        model.fit(dataset)
        R.save_objects(**{"params.pkl": model})
        # this dataset is saved for online inference. So the concrete data should not be dumped
        dataset.config(dump_all=False, recursive=True)
        R.save_objects(**{"dataset": dataset})
        # generate records: prediction, backtest, and analysis
        records = task_config.get("record", [])
        if isinstance(records, dict):  # prevent only one dict
            records = [records]
        for record in records:
            cls, kwargs = get_cls_kwargs(
                record, default_module="qlib.workflow.record_temp")
            if cls is SignalRecord:
                rconf = {"model": model, "dataset": dataset, "recorder": rec}
            else:
                rconf = {"recorder": rec}
            r = cls(**kwargs, **rconf)
            r.generate()

    return rec
Ejemplo n.º 4
0
def run_exp(
    task_config,
    dataset,
    experiment_name,
    recorder_name,
    uri,
    model_obj_name="model.pkl",
):

    model = init_instance_by_config(task_config["model"])
    model_fit_kwargs = dict(dataset=dataset)

    # Let's start the experiment.
    with R.start(
            experiment_name=experiment_name,
            recorder_name=recorder_name,
            uri=uri,
            resume=True,
    ):
        # Setup log
        recorder_root_dir = R.get_recorder().get_local_dir()
        log_file = os.path.join(recorder_root_dir,
                                "{:}.log".format(experiment_name))

        set_log_basic_config(log_file)
        logger = get_module_logger("q.run_exp")
        logger.info("task_config::\n{:}".format(
            pprint.pformat(task_config, indent=2)))
        logger.info("[{:}] - [{:}]: {:}".format(experiment_name, recorder_name,
                                                uri))
        logger.info("dataset={:}".format(dataset))

        # Train model
        try:
            if hasattr(model, "to"):  # Recoverable model
                ori_device = model.device
                model = R.load_object(model_obj_name)
                model.to(ori_device)
            else:
                model = R.load_object(model_obj_name)
            logger.info(
                "[Find existing object from {:}]".format(model_obj_name))
        except OSError:
            R.log_params(**flatten_dict(update_gpu(task_config, None)))
            if "save_path" in inspect.getfullargspec(model.fit).args:
                model_fit_kwargs["save_path"] = os.path.join(
                    recorder_root_dir, "model.ckp")
            elif "save_dir" in inspect.getfullargspec(model.fit).args:
                model_fit_kwargs["save_dir"] = os.path.join(
                    recorder_root_dir, "model-ckps")
            model.fit(**model_fit_kwargs)
            # remove model to CPU for saving
            if hasattr(model, "to"):
                old_device = model.device
                model.to("cpu")
                R.save_objects(**{model_obj_name: model})
                model.to(old_device)
            else:
                R.save_objects(**{model_obj_name: model})
        except Exception as e:
            raise ValueError("Something wrong: {:}".format(e))
        # Get the recorder
        recorder = R.get_recorder()

        # Generate records: prediction, backtest, and analysis
        for record in task_config["record"]:
            record = deepcopy(record)
            if record["class"] == "MultiSegRecord":
                record["kwargs"] = dict(model=model,
                                        dataset=dataset,
                                        recorder=recorder)
                sr = init_instance_by_config(record)
                sr.generate(**record["generate_kwargs"])
            elif record["class"] == "SignalRecord":
                srconf = {
                    "model": model,
                    "dataset": dataset,
                    "recorder": recorder
                }
                record["kwargs"].update(srconf)
                sr = init_instance_by_config(record)
                sr.generate()
            else:
                rconf = {"recorder": recorder}
                record["kwargs"].update(rconf)
                ar = init_instance_by_config(record)
                ar.generate()