예제 #1
0
 def __init__(
     self,
     provider_uri="~/.qlib/qlib_data/cn_data",
     region=REG_CN,
     task_url="mongodb://10.0.0.4:27017/",
     task_db_name="rolling_db",
     experiment_name="rolling_exp",
     task_pool="rolling_task",
     task_config=None,
     rolling_step=550,
     rolling_type=RollingGen.ROLL_SD,
 ):
     # TaskManager config
     if task_config is None:
         task_config = [
             CSI100_RECORD_XGBOOST_TASK_CONFIG,
             CSI100_RECORD_LGB_TASK_CONFIG
         ]
     mongo_conf = {
         "task_url": task_url,
         "task_db_name": task_db_name,
     }
     qlib.init(provider_uri=provider_uri, region=region, mongo=mongo_conf)
     self.experiment_name = experiment_name
     self.task_pool = task_pool
     self.task_config = task_config
     self.rolling_gen = RollingGen(step=rolling_step, rtype=rolling_type)
예제 #2
0
    def __init__(
        self,
        provider_uri="~/.qlib/qlib_data/cn_data",
        region="cn",
        task_url="mongodb://10.0.0.4:27017/",
        task_db_name="rolling_db",
        rolling_step=550,
        tasks=[task_xgboost_config],
        add_tasks=[task_lgb_config],
    ):
        mongo_conf = {
            "task_url": task_url,  # your MongoDB url
            "task_db_name": task_db_name,  # database name
        }
        qlib.init(provider_uri=provider_uri, region=region, mongo=mongo_conf)
        self.tasks = tasks
        self.add_tasks = add_tasks
        self.rolling_step = rolling_step
        strategies = []
        for task in tasks:
            name_id = task["model"][
                "class"]  # NOTE: Assumption: The model class can specify only one strategy
            strategies.append(
                RollingStrategy(
                    name_id,
                    task,
                    RollingGen(step=rolling_step, rtype=RollingGen.ROLL_SD),
                ))

        self.rolling_online_manager = OnlineManager(strategies)
예제 #3
0
 def __init__(
     self,
     provider_uri="~/.qlib/qlib_data/cn_data",
     region="cn",
     trainer=DelayTrainerRM(),  # you can choose from TrainerR, TrainerRM, DelayTrainerR, DelayTrainerRM
     task_url="mongodb://10.0.0.4:27017/",  # not necessary when using TrainerR or DelayTrainerR
     task_db_name="rolling_db",  # not necessary when using TrainerR or DelayTrainerR
     rolling_step=550,
     tasks=None,
     add_tasks=None,
 ):
     if add_tasks is None:
         add_tasks = [CSI100_RECORD_LGB_TASK_CONFIG_ROLLING]
     if tasks is None:
         tasks = [CSI100_RECORD_XGBOOST_TASK_CONFIG_ROLLING]
     mongo_conf = {
         "task_url": task_url,  # your MongoDB url
         "task_db_name": task_db_name,  # database name
     }
     qlib.init(provider_uri=provider_uri, region=region, mongo=mongo_conf)
     self.tasks = tasks
     self.add_tasks = add_tasks
     self.rolling_step = rolling_step
     strategies = []
     for task in tasks:
         name_id = task["model"]["class"]  # NOTE: Assumption: The model class can specify only one strategy
         strategies.append(
             RollingStrategy(
                 name_id,
                 task,
                 RollingGen(step=rolling_step, rtype=RollingGen.ROLL_SD),
             )
         )
     self.trainer = trainer
     self.rolling_online_manager = OnlineManager(strategies, trainer=self.trainer)
예제 #4
0
    def __init__(
        self,
        provider_uri="~/.qlib/qlib_data/cn_data",
        region="cn",
        exp_name="rolling_exp",
        task_url="mongodb://10.0.0.4:27017/",
        task_db_name="rolling_db",
        task_pool="rolling_task",
        rolling_step=80,
        start_time="2018-09-10",
        end_time="2018-10-31",
        tasks=[task_xgboost_config, task_lgb_config],
    ):
        """
        Init OnlineManagerExample.

        Args:
            provider_uri (str, optional): the provider uri. Defaults to "~/.qlib/qlib_data/cn_data".
            region (str, optional): the stock region. Defaults to "cn".
            exp_name (str, optional): the experiment name. Defaults to "rolling_exp".
            task_url (str, optional): your MongoDB url. Defaults to "mongodb://10.0.0.4:27017/".
            task_db_name (str, optional): database name. Defaults to "rolling_db".
            task_pool (str, optional): the task pool name (a task pool is a collection in MongoDB). Defaults to "rolling_task".
            rolling_step (int, optional): the step for rolling. Defaults to 80.
            start_time (str, optional): the start time of simulating. Defaults to "2018-09-10".
            end_time (str, optional): the end time of simulating. Defaults to "2018-10-31".
            tasks (dict or list[dict]): a set of the task config waiting for rolling and training
        """
        self.exp_name = exp_name
        self.task_pool = task_pool
        self.start_time = start_time
        self.end_time = end_time
        mongo_conf = {
            "task_url": task_url,
            "task_db_name": task_db_name,
        }
        qlib.init(provider_uri=provider_uri, region=region, mongo=mongo_conf)
        self.rolling_gen = RollingGen(
            step=rolling_step,
            rtype=RollingGen.ROLL_SD,
            ds_extra_mod_func=None
        )  # The rolling tasks generator, ds_extra_mod_func is None because we just need to simulate to 2018-10-31 and needn't change the handler end time.
        self.trainer = DelayTrainerRM(
            self.exp_name,
            self.task_pool)  # Also can be TrainerR, TrainerRM, DelayTrainerR
        self.rolling_online_manager = OnlineManager(
            RollingStrategy(exp_name,
                            task_template=tasks,
                            rolling_gen=self.rolling_gen),
            trainer=self.trainer,
            begin_time=self.start_time,
        )
        self.tasks = tasks
예제 #5
0
 def add_strategy(self):
     print("========== load ==========")
     self.rolling_online_manager = OnlineManager.load(self._ROLLING_MANAGER_PATH)
     print("========== add strategy ==========")
     strategies = []
     for task in self.add_tasks:
         name_id = task["model"]["class"]  # NOTE: Assumption: The model class can specify only one strategy
         strategies.append(
             RollingStrategy(
                 name_id,
                 task,
                 RollingGen(step=self.rolling_step, rtype=RollingGen.ROLL_SD),
             )
         )
     self.rolling_online_manager.add_strategy(strategies=strategies)
     print("========== dump ==========")
     self.rolling_online_manager.to_pickle(self._ROLLING_MANAGER_PATH)
예제 #6
0
 def __init__(
     self,
     provider_uri="~/.qlib/qlib_data/cn_data",
     region=REG_CN,
     task_url="mongodb://10.0.0.4:27017/",
     task_db_name="rolling_db",
     experiment_name="rolling_exp",
     task_pool="rolling_task",
     task_config=[task_xgboost_config, task_lgb_config],
     rolling_step=550,
     rolling_type=RollingGen.ROLL_SD,
 ):
     # TaskManager config
     mongo_conf = {
         "task_url": task_url,
         "task_db_name": task_db_name,
     }
     qlib.init(provider_uri=provider_uri, region=region, mongo=mongo_conf)
     self.experiment_name = experiment_name
     self.task_pool = task_pool
     self.task_config = task_config
     self.rolling_gen = RollingGen(step=rolling_step, rtype=rolling_type)
예제 #7
0
    def setup(self, trainer=TrainerR, trainer_kwargs={}):
        """
        after running this function `self.data_ic_df` will become set.
        Each col represents a data.
        Each row represents the Timestamp of performance of that data.
        For example,

        .. code-block:: python

                       2021-06-21 2021-06-04 2021-05-21 2021-05-07 2021-04-20 2021-04-06 2021-03-22 2021-03-08  ...
                       2021-07-02 2021-06-18 2021-06-03 2021-05-20 2021-05-06 2021-04-19 2021-04-02 2021-03-19  ...
            datetime                                                                                            ...
            2018-01-02   0.079782   0.115975   0.070866   0.028849  -0.081170   0.140380   0.063864   0.110987  ...
            2018-01-03   0.123386   0.107789   0.071037   0.045278  -0.060782   0.167446   0.089779   0.124476  ...
            2018-01-04   0.140775   0.097206   0.063702   0.042415  -0.078164   0.173218   0.098914   0.114389  ...
            2018-01-05   0.030320  -0.037209  -0.044536  -0.047267  -0.081888   0.045648   0.059947   0.047652  ...
            2018-01-08   0.107201   0.009219  -0.015995  -0.036594  -0.086633   0.108965   0.122164   0.108508  ...
            ...               ...        ...        ...        ...        ...        ...        ...        ...  ...

        """

        # 1) prepare the prediction of proxy models
        perf_task_tpl = deepcopy(
            self.task_tpl
        )  # this task is supposed to contains no complicated objects

        trainer = auto_filter_kwargs(trainer)(experiment_name=self.exp_name,
                                              **trainer_kwargs)
        # NOTE:
        # The handler is initialized for only once.
        if not trainer.has_worker():
            self.dh = init_task_handler(perf_task_tpl)
        else:
            self.dh = init_instance_by_config(
                perf_task_tpl["dataset"]["kwargs"]["handler"])

        seg = perf_task_tpl["dataset"]["kwargs"]["segments"]

        # We want to split the training time period into small segments.
        perf_task_tpl["dataset"]["kwargs"]["segments"] = {
            "train": (DatasetH.get_min_time(seg), DatasetH.get_max_time(seg)),
            "test": (None, None),
        }

        # NOTE:
        # we play a trick here
        # treat the training segments as test to create the rolling tasks
        rg = RollingGen(step=self.step,
                        test_key="train",
                        train_key=None,
                        task_copy_func=deepcopy_basic_type)
        gen_task = task_generator(perf_task_tpl, [rg])

        recorders = R.list_recorders(experiment_name=self.exp_name)
        if len(gen_task) == len(recorders):
            get_module_logger("Internal Data").info(
                "the data has been initialized")
        else:
            # train new models
            assert 0 == len(
                recorders
            ), "An empty experiment is required for setup `InternalData``"
            trainer.train(gen_task)

        # 2) extract the similarity matrix
        label_df = self.dh.fetch(col_set="label")
        # for
        recorders = R.list_recorders(experiment_name=self.exp_name)

        key_l = []
        ic_l = []
        for _, rec in tqdm(recorders.items(), desc="calc"):
            pred = rec.load_object("pred.pkl")
            task = rec.load_object("task")
            data_key = task["dataset"]["kwargs"]["segments"]["train"]
            key_l.append(data_key)
            ic_l.append(
                delayed(self._calc_perf)(pred.iloc[:, 0], label_df.iloc[:, 0]))

        ic_l = Parallel(n_jobs=-1)(ic_l)
        self.data_ic_df = pd.DataFrame(dict(zip(key_l, ic_l)))
        self.data_ic_df = self.data_ic_df.sort_index().sort_index(axis=1)

        del self.dh  # handler is not useful now
예제 #8
0
    def __init__(
        self,
        *,
        task_tpl: Union[dict, list],
        step: int,
        trunc_days: int = None,
        rolling_ext_days: int = 0,
        exp_name: Union[str, InternalData],
        segments: Union[Dict[Text, Tuple], float],
        hist_step_n: int = 10,
        task_mode: str = MetaTask.PROC_MODE_FULL,
        fill_method: str = "max",
    ):
        """
        A dataset for meta model.

        Parameters
        ----------
        task_tpl : Union[dict, list]
            Decide what tasks are used.
            - dict : the task template, the prepared task is generated with `step`, `trunc_days` and `RollingGen`
            - list : when list, use the list of tasks directly
                     the list is supposed to be sorted according timeline
        step : int
            the rolling step
        trunc_days: int
            days to be truncated based on the test start
        rolling_ext_days: int
            sometimes users want to train meta models for a longer test period but with smaller rolling steps for more task samples.
            the total length of test periods will be `step + rolling_ext_days`

        exp_name : Union[str, InternalData]
            Decide what meta_info are used for prediction.
            - str: the name of the experiment to store the performance of data
            - InternalData: a prepared internal data
        segments: Union[Dict[Text, Tuple], float]
            the segments to divide data
            both left and right
            if segments is a float:
                the float represents the percentage of data for training
        hist_step_n: int
            length of historical steps for the meta infomation
        task_mode : str
            Please refer to the docs of MetaTask
        """
        super().__init__(segments=segments)
        if isinstance(exp_name, InternalData):
            self.internal_data = exp_name
        else:
            self.internal_data = InternalData(task_tpl,
                                              step=step,
                                              exp_name=exp_name)
            self.internal_data.setup()
        self.task_tpl = deepcopy(
            task_tpl
        )  # FIXME: if the handler is shared, how to avoid the explosion of the memroy.
        self.trunc_days = trunc_days
        self.hist_step_n = hist_step_n
        self.step = step

        if isinstance(task_tpl, dict):
            rg = RollingGen(step=step,
                            trunc_days=trunc_days,
                            task_copy_func=deepcopy_basic_type
                            )  # NOTE: trunc_days is very important !!!!
            task_iter = rg(task_tpl)
            if rolling_ext_days > 0:
                self.ta = TimeAdjuster(future=True)
                for t in task_iter:
                    t["dataset"]["kwargs"]["segments"]["test"] = self.ta.shift(
                        t["dataset"]["kwargs"]["segments"]["test"],
                        step=rolling_ext_days,
                        rtype=RollingGen.ROLL_EX)
            if task_mode == MetaTask.PROC_MODE_FULL:
                # Only pre initializing the task when full task is req
                # initializing handler and share it.
                init_task_handler(task_tpl)
        else:
            assert isinstance(task_tpl, list)
            task_iter = task_tpl

        self.task_list = []
        self.meta_task_l = []
        logger = get_module_logger("MetaDatasetDS")
        logger.info(f"Example task for training meta model: {task_iter[0]}")
        for t in tqdm(task_iter, desc="creating meta tasks"):
            try:
                self.meta_task_l.append(
                    MetaTaskDS(t,
                               meta_info=self._prepare_meta_ipt(t),
                               mode=task_mode,
                               fill_method=fill_method))
                self.task_list.append(t)
            except ValueError as e:
                logger.warning(f"ValueError: {e}")
        assert len(
            self.meta_task_l
        ) > 0, "No meta tasks found. Please check the data and setting"
예제 #9
0
    def __init__(
        self,
        provider_uri="~/.qlib/qlib_data/cn_data",
        region="cn",
        exp_name="rolling_exp",
        task_url="mongodb://10.0.0.4:27017/",  # not necessary when using TrainerR or DelayTrainerR
        task_db_name="rolling_db",  # not necessary when using TrainerR or DelayTrainerR
        task_pool="rolling_task",
        rolling_step=80,
        start_time="2018-09-10",
        end_time="2018-10-31",
        tasks=None,
        trainer="TrainerR",
    ):
        """
        Init OnlineManagerExample.

        Args:
            provider_uri (str, optional): the provider uri. Defaults to "~/.qlib/qlib_data/cn_data".
            region (str, optional): the stock region. Defaults to "cn".
            exp_name (str, optional): the experiment name. Defaults to "rolling_exp".
            task_url (str, optional): your MongoDB url. Defaults to "mongodb://10.0.0.4:27017/".
            task_db_name (str, optional): database name. Defaults to "rolling_db".
            task_pool (str, optional): the task pool name (a task pool is a collection in MongoDB). Defaults to "rolling_task".
            rolling_step (int, optional): the step for rolling. Defaults to 80.
            start_time (str, optional): the start time of simulating. Defaults to "2018-09-10".
            end_time (str, optional): the end time of simulating. Defaults to "2018-10-31".
            tasks (dict or list[dict]): a set of the task config waiting for rolling and training
        """
        if tasks is None:
            tasks = [
                CSI100_RECORD_XGBOOST_TASK_CONFIG_ONLINE,
                CSI100_RECORD_LGB_TASK_CONFIG_ONLINE
            ]
        self.exp_name = exp_name
        self.task_pool = task_pool
        self.start_time = start_time
        self.end_time = end_time
        mongo_conf = {
            "task_url": task_url,
            "task_db_name": task_db_name,
        }
        qlib.init(provider_uri=provider_uri, region=region, mongo=mongo_conf)
        self.rolling_gen = RollingGen(
            step=rolling_step,
            rtype=RollingGen.ROLL_SD,
            ds_extra_mod_func=None
        )  # The rolling tasks generator, ds_extra_mod_func is None because we just need to simulate to 2018-10-31 and needn't change the handler end time.
        if trainer == "TrainerRM":
            self.trainer = TrainerRM(self.exp_name, self.task_pool)
        elif trainer == "TrainerR":
            self.trainer = TrainerR(self.exp_name)
        else:
            # TODO: support all the trainers: TrainerR, TrainerRM, DelayTrainerR
            raise NotImplementedError(f"This type of input is not supported")
        self.rolling_online_manager = OnlineManager(
            RollingStrategy(exp_name,
                            task_template=tasks,
                            rolling_gen=self.rolling_gen),
            trainer=self.trainer,
            begin_time=self.start_time,
        )
        self.tasks = tasks
예제 #10
0
 def create_rolling_tasks(self):
     task = self.basic_task()
     task_l = task_generator(
         task, RollingGen(step=self.step, trunc_days=self.horizon + 1)
     )  # the last two days should be truncated to avoid information leakage
     return task_l