Beispiel #1
0
def pred_autocorr(pred: pd.Series,
                  lag=1,
                  inst_col="instrument",
                  date_col="datetime"):
    """pred_autocorr.

    Limitation:
    - If the datetime is not sequential densely, the correlation will be calulated based on adjacent dates. (some users may expected NaN)

    :param pred: pd.Series with following format
                instrument  datetime
                SH600000    2016-01-04   -0.000403
                            2016-01-05   -0.000753
                            2016-01-06   -0.021801
                            2016-01-07   -0.065230
                            2016-01-08   -0.062465
    :type pred: pd.Series
    :param lag:
    """
    if isinstance(pred, pd.DataFrame):
        pred = pred.iloc[:, 0]
        get_module_logger("pred_autocorr").warning(
            f"Only the first column in {pred.columns} of `pred` is kept")
    pred_ustk = pred.sort_index().unstack(inst_col)
    corr_s = {}
    for (idx, cur), (_, prev) in zip(pred_ustk.iterrows(),
                                     pred_ustk.shift(lag).iterrows()):
        corr_s[idx] = cur.corr(prev)
    corr_s = pd.Series(corr_s).sort_index()
    return corr_s
Beispiel #2
0
    def __init__(
        self,
        strategies: Union[OnlineStrategy, List[OnlineStrategy]],
        trainer: Trainer = None,
        begin_time: Union[str, pd.Timestamp] = None,
        freq="day",
    ):
        """
        Init OnlineManager.
        One OnlineManager must have at least one OnlineStrategy.

        Args:
            strategies (Union[OnlineStrategy, List[OnlineStrategy]]): an instance of OnlineStrategy or a list of OnlineStrategy
            begin_time (Union[str,pd.Timestamp], optional): the OnlineManager will begin at this time. Defaults to None for using the latest date.
            trainer (Trainer): the trainer to train task. None for using TrainerR.
            freq (str, optional): data frequency. Defaults to "day".
        """
        self.logger = get_module_logger(self.__class__.__name__)
        if not isinstance(strategies, list):
            strategies = [strategies]
        self.strategies = strategies
        self.freq = freq
        if begin_time is None:
            begin_time = D.calendar(freq=self.freq).max()
        self.begin_time = pd.Timestamp(begin_time)
        self.cur_time = self.begin_time
        # OnlineManager will recorder the history of online models, which is a dict like {pd.Timestamp, {strategy, [online_models]}}.
        self.history = {}
        if trainer is None:
            trainer = TrainerR()
        self.trainer = trainer
        self.signals = None
        self.status = self.STATUS_NORMAL
Beispiel #3
0
    def __init__(self, task_pool: str):
        """
        Init Task Manager, remember to make the statement of MongoDB url and database name firstly.
        A TaskManager instance serves a specific task pool.
        The static method of this module serves the whole MongoDB.

        Parameters
        ----------
        task_pool: str
            the name of Collection in MongoDB
        """
        self.task_pool = getattr(get_mongodb(), task_pool)
        self.logger = get_module_logger(self.__class__.__name__)
Beispiel #4
0
    def __init__(self, task_pool: str = None):
        """
        Init Task Manager, remember to make the statement of MongoDB url and database name firstly.

        Parameters
        ----------
        task_pool: str
            the name of Collection in MongoDB
        """
        self.mdb = get_mongodb()
        if task_pool is not None:
            self.task_pool = getattr(self.mdb, task_pool)
        self.logger = get_module_logger(self.__class__.__name__)
Beispiel #5
0
def run_task(
    task_func: Callable,
    task_pool: str,
    query: dict = {},
    force_release: bool = False,
    before_status: str = TaskManager.STATUS_WAITING,
    after_status: str = TaskManager.STATUS_DONE,
    **kwargs,
):
    """
    While the task pool is not empty (has WAITING tasks), use task_func to fetch and run tasks in task_pool

    After running this method, here are 4 situations (before_status -> after_status):

        STATUS_WAITING -> STATUS_DONE: use task["def"] as `task_func` param

        STATUS_WAITING -> STATUS_PART_DONE: use task["def"] as `task_func` param

        STATUS_PART_DONE -> STATUS_PART_DONE: use task["res"] as `task_func` param

        STATUS_PART_DONE -> STATUS_DONE: use task["res"] as `task_func` param

    Parameters
    ----------
    task_func : Callable
        def (task_def, **kwargs) -> <res which will be committed>
            the function to run the task
    task_pool : str
        the name of the task pool (Collection in MongoDB)
    query: dict
        will use this dict to query task_pool when fetching task
    force_release : bool
        will the program force to release the resource
    before_status : str:
        the tasks in before_status will be fetched and trained. Can be STATUS_WAITING, STATUS_PART_DONE.
    after_status : str:
        the tasks after trained will become after_status. Can be STATUS_WAITING, STATUS_PART_DONE.
    kwargs
        the params for `task_func`
    """
    tm = TaskManager(task_pool)

    ever_run = False

    while True:
        with tm.safe_fetch_task(status=before_status, query=query) as task:
            if task is None:
                break
            get_module_logger("run_task").info(task["def"])
            # when fetching `WAITING` task, use task["def"] to train
            if before_status == TaskManager.STATUS_WAITING:
                param = task["def"]
            # when fetching `PART_DONE` task, use task["res"] to train because the middle result has been saved to task["res"]
            elif before_status == TaskManager.STATUS_PART_DONE:
                param = task["res"]
            else:
                raise ValueError(
                    "The fetched task must be `STATUS_WAITING` or `STATUS_PART_DONE`!"
                )
            if force_release:
                with concurrent.futures.ProcessPoolExecutor(
                        max_workers=1) as executor:
                    res = executor.submit(task_func, param, **kwargs).result()
            else:
                res = task_func(param, **kwargs)
            tm.commit_task_res(task, res, status=after_status)
            ever_run = True

    return ever_run
Beispiel #6
0
 def __init__(self, record: Recorder, *args, **kwargs):
     self.record = record
     self.logger = get_module_logger(self.__class__.__name__)