コード例 #1
0
ファイル: cycles.py プロジェクト: scikit-mine/scikit-mine
def get_table_dyn(S: pd.Index, n_tot: int, max_length=100):
    """
    Parameters
    ----------
    S: pd.Index or np.ndarray
        a Series of occurrences
    n_tot: int
        total number of occurrences in the original events
    max_length: int, default=None
        maximum number of occurrences for a cycle to cover,
        by default it will be set to :math:`\log_{2}\left(|S|\right)`

    """
    diffs = np.diff(S)
    triples = sliding_window_view(S, 3)
    diff_pairs = sliding_window_view(diffs, 2)
    dS = S.max() - S.min()

    score_one = residual_length(1, n_tot, dS)  # 1 really ?

    scores = sum(cycle_length(triples, diff_pairs, len(S), dS))
    change = scores > 3 * score_one
    scores[change] = 3 * score_one  # inplace replacement
    cut_points = np.array([-1] * len(scores), dtype=object)
    cut_points[~change] = None

    scores = dict(zip(((i, i + 2) for i in range(len(scores))), scores))
    cut_points = dict(zip(scores.keys(), cut_points))

    max_length = min([len(S), max_length])
    for k in range(4, max_length + 1):
        w = sliding_window_view(S, k)
        _diffs = sliding_window_view(diffs, k - 1)
        _s = sum(cycle_length(w, _diffs, len(S), dS))

        for ia, best_score in enumerate(_s):
            cut_point = None
            iz = ia + k - 1
            for im in range(ia, iz):
                if im - ia + 1 < 3:
                    score_left = score_one * (im - ia + 1)
                else:
                    score_left = scores[(ia, im)]
                if iz - im < 3:
                    score_right = score_one * (iz - im)
                else:
                    score_right = scores[(im + 1, iz)]

                if score_left + score_right < best_score:
                    best_score = score_left + score_right
                    cut_point = im
            scores[(ia, iz)] = best_score
            cut_points[(ia, iz)] = cut_point

    return scores, cut_points
コード例 #2
0
ファイル: _split.py プロジェクト: juanitorduz/sktime
def _check_cutoffs_fh_y(
    cutoffs: VALID_CUTOFF_TYPES, fh: FORECASTING_HORIZON_TYPES, y: pd.Index
) -> None:
    """Check that combination of inputs is compatible.

    Currently, only two cases are allowed:
    either both `cutoffs` and `fh` are integers, or they are datetime or timedelta.

    Parameters
    ----------
    cutoffs : np.array or pd.Index
        Cutoff points, positive and integer- or datetime-index like.
        Type should match the type of `fh` input.
    fh : int, timedelta, list or np.ndarray of ints or timedeltas
        Type should match the type of `cutoffs` input.
    y : pd.Index
        Index of time series

    Raises
    ------
    ValueError
        if max cutoff plus max `fh` is above the last observation in `y`
    TypeError
        if `cutoffs` and `fh` type combination is not supported
    """
    max_cutoff = np.max(cutoffs)
    max_fh = fh.max()

    msg = "`fh` is incompatible with given `cutoffs` and `y`."
    if is_int(x=max_cutoff) and is_int(x=max_fh):
        if max_cutoff + max_fh > y.shape[0]:
            raise ValueError(msg)
    elif is_datetime(x=max_cutoff) and is_timedelta(x=max_fh):
        if max_cutoff + max_fh > y.max():
            raise ValueError(msg)
    else:
        raise TypeError("Unsupported type of `cutoffs` and `fh`")