예제 #1
0
def test_compute_P_ABBA(T_A, T_B):
    m = 3
    n_A = T_A.shape[0]
    n_B = T_B.shape[0]
    ref_P_ABBA = np.empty(n_A - m + 1 + n_B - m + 1, dtype=np.float64)
    comp_P_ABBA = np.empty(n_A - m + 1 + n_B - m + 1, dtype=np.float64)

    ref_P_ABBA[:n_A - m + 1] = naive.stump(T_A, m, T_B)[:, 0]
    ref_P_ABBA[n_A - m + 1:] = naive.stump(T_B, m, T_A)[:, 0]
    _compute_P_ABBA(T_A, T_B, m, comp_P_ABBA)

    npt.assert_almost_equal(ref_P_ABBA, comp_P_ABBA)
예제 #2
0
def test_motifs_two_motifs():
    # Fix seed, because in some case motifs can be off by an index resulting in test
    # fails, which is caused since one of the motifs is not repeated perfectly in T.
    np.random.seed(1234)

    # The time series is random noise with two motifs for m=10:
    # * (almost) identical step functions at indices 10, 110 and 210
    # * identical linear slopes at indices 70 and 170
    T = np.random.normal(size=300)
    m = 20

    T[10:30] = 1
    T[12:28] = 2

    T[110:130] = 3
    T[112:128] = 6
    T[120] = 6.6

    T[210:230] = 1
    T[212:228] = 2
    T[220] = 1.9
    # naive.distance(naive.z_norm(T[10:30]), naive.z_norm(T[110:130])) = 0.47
    # naive.distance(naive.z_norm(T[10:30]), naive.z_norm(T[210:230])) = 0.24
    # naive.distance(naive.z_norm(T[110:130]), naive.z_norm(T[210:230])) = 0.72
    # Hence T[10:30] is the motif representative for this motif

    T[70:90] = np.arange(m) * 0.1
    T[170:190] = np.arange(m) * 0.1
    # naive.distance(naive.z_norm(T[70:90]), naive.z_norm(T[170:190])) = 0.0

    max_motifs = 2

    mp = naive.stump(T, m)

    # left_indices = [[70, 170, -1], [10, 210, 110]]
    left_profile_values = [
        [0.0, 0.0, np.nan],
        [
            0.0,
            naive.distance(core.z_norm(T[10:30]), core.z_norm(T[210:230])),
            naive.distance(core.z_norm(T[10:30]), core.z_norm(T[110:130])),
        ],
    ]

    right_distance_values, right_indices = motifs(
        T,
        mp[:, 0],
        max_motifs=max_motifs,
        max_distance=0.5,
        cutoff=np.inf,
    )

    # We ignore indices because of sorting ambiguities for equal distances.
    # As long as the distances are correct, the indices will be too.
    npt.assert_almost_equal(left_profile_values,
                            right_distance_values,
                            decimal=6)

    # Reset seed
    np.random.seed(None)
예제 #3
0
def naive_consensus_search(Ts, m):
    """
    Brute force consensus motif from
    <https://www.cs.ucr.edu/~eamonn/consensus_Motif_ICDM_Long_version.pdf>

    See Table 1

    Note that there is a bug in the pseudocode at line 8 where `i` should be `j`.
    This implementation fixes it.
    """
    k = len(Ts)

    bsf_radius = np.inf
    Ts_idx = 0
    subseq_idx = 0

    for j in range(k):
        radii = np.zeros(len(Ts[j]) - m + 1)
        for i in range(k):
            if i != j:
                mp = naive.stump(Ts[j], m, Ts[i])
                radii = np.maximum(radii, mp[:, 0])
        min_radius_idx = np.argmin(radii)
        min_radius = radii[min_radius_idx]
        if min_radius < bsf_radius:
            bsf_radius = min_radius
            Ts_idx = j
            subseq_idx = min_radius_idx

    return naive_get_central_motif(Ts, bsf_radius, Ts_idx, subseq_idx, m)
예제 #4
0
def test_motifs_max_matches():
    # This test covers the following:

    # A time series contains motif A at four locations and motif B at two.
    # If `max_motifs=2` the result should contain only the top two matches of motif A
    # and the top two matches of motif B as two separate motifs.
    T = np.array([
        0.0,  # motif A
        1.0,
        0.0,
        2.3,
        -1.0,  # motif B
        -1.0,
        -2.0,
        0.0,  # motif A
        1.0,
        0.0,
        -2.0,
        -1.0,  # motif B
        -1.03,
        -2.0,
        -0.5,
        2.0,  # motif A
        3.0,
        2.04,
        2.3,
        2.0,  # motif A
        3.0,
        2.02,
    ])
    m = 3
    max_motifs = 3

    left_indices = [[0, 7], [4, 11]]
    left_profile_values = [
        [0.0, 0.0],
        [
            0.0,
            naive.distance(
                core.z_norm(T[left_indices[1][0]:left_indices[1][0] + m]),
                core.z_norm(T[left_indices[1][1]:left_indices[1][1] + m]),
            ),
        ],
    ]

    mp = naive.stump(T, m)
    right_distance_values, right_indices = motifs(
        T,
        mp[:, 0],
        max_motifs=max_motifs,
        max_distance=0.1,
        cutoff=np.inf,
        max_matches=2,
    )

    # We ignore indices because of sorting ambiguities for equal distances.
    # As long as the distances are correct, the indices will be too.
    npt.assert_almost_equal(left_profile_values,
                            right_distance_values,
                            decimal=4)
예제 #5
0
def test_stumped_A_B_join(T_A, T_B, dask_cluster):
    with Client(dask_cluster) as dask_client:
        m = 3
        ref_mp = naive.stump(T_A, m, T_B=T_B)
        comp_mp = stumped(dask_client, T_A, m, T_B, ignore_trivial=False)
        naive.replace_inf(ref_mp)
        naive.replace_inf(comp_mp)
        npt.assert_almost_equal(ref_mp, comp_mp)
예제 #6
0
def test_stumped_A_B_join(T_A, T_B, dask_cluster):
    with Client(dask_cluster) as dask_client:
        m = 3
        left = naive.stump(T_A, m, T_B=T_B)
        right = stumped(dask_client, T_A, m, T_B, ignore_trivial=False)
        naive.replace_inf(left)
        naive.replace_inf(right)
        npt.assert_almost_equal(left, right)
예제 #7
0
def test_stumped_self_join(T_A, T_B, dask_cluster):
    with Client(dask_cluster) as dask_client:
        m = 3
        zone = int(np.ceil(m / 4))
        left = naive.stump(T_B, m, exclusion_zone=zone)
        right = stumped(dask_client, T_B, m, ignore_trivial=True)
        naive.replace_inf(left)
        naive.replace_inf(right)
        npt.assert_almost_equal(left, right)
예제 #8
0
def test_stumped_self_join_df(T_A, T_B, dask_cluster):
    with Client(dask_cluster) as dask_client:
        m = 3
        zone = int(np.ceil(m / 4))
        ref_mp = naive.stump(T_B, m, exclusion_zone=zone)
        comp_mp = stumped(dask_client, pd.Series(T_B), m, ignore_trivial=True)
        naive.replace_inf(ref_mp)
        naive.replace_inf(comp_mp)
        npt.assert_almost_equal(ref_mp, comp_mp)
예제 #9
0
def test_mpdist_vect_k(T_A, T_B, k):
    m = 3
    n_A = T_A.shape[0]
    n_B = T_B.shape[0]
    j = n_A - m + 1  # `k` is reserved for `P_ABBA` selection
    P_ABBA = np.empty(2 * j, dtype=np.float64)
    ref_mpdist_vect = np.empty(n_B - n_A + 1)

    k = min(int(k), P_ABBA.shape[0] - 1)

    for i in range(n_B - n_A + 1):
        P_ABBA[:j] = naive.stump(T_A, m, T_B[i:i + n_A])[:, 0]
        P_ABBA[j:] = naive.stump(T_B[i:i + n_A], m, T_A)[:, 0]
        P_ABBA.sort()
        ref_mpdist_vect[i] = P_ABBA[min(k, P_ABBA.shape[0] - 1)]

    comp_mpdist_vect = _mpdist_vect(T_A, T_B, m, k=k)

    npt.assert_almost_equal(ref_mpdist_vect, comp_mpdist_vect)
예제 #10
0
def test_stump_self_join_larger_window(T_A, T_B, m, dask_cluster):
    with Client(dask_cluster) as dask_client:
        if len(T_B) > m:
            zone = int(np.ceil(m / 4))
            ref_mp = naive.stump(T_B, m, exclusion_zone=zone)
            comp_mp = stumped(dask_client, T_B, m, ignore_trivial=True)
            naive.replace_inf(ref_mp)
            naive.replace_inf(comp_mp)

            npt.assert_almost_equal(ref_mp, comp_mp)
예제 #11
0
def test_stump_A_B_join(T_A, T_B):
    m = 3
    ref_mp = naive.stump(T_A, m, T_B=T_B)
    comp_mp = stump(T_A, m, T_B, ignore_trivial=False)
    naive.replace_inf(ref_mp)
    naive.replace_inf(comp_mp)
    npt.assert_almost_equal(ref_mp, comp_mp)

    comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False)
    naive.replace_inf(comp_mp)
    npt.assert_almost_equal(ref_mp, comp_mp)
예제 #12
0
def test_stump_A_B_join(T_A, T_B):
    m = 3
    left = naive.stump(T_A, m, T_B=T_B)
    right = stump(T_A, m, T_B, ignore_trivial=False)
    naive.replace_inf(left)
    naive.replace_inf(right)
    npt.assert_almost_equal(left, right)

    right = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False)
    naive.replace_inf(right)
    npt.assert_almost_equal(left, right)
예제 #13
0
def test_stumped_one_constant_subsequence_A_B_join_swap(dask_cluster):
    with Client(dask_cluster) as dask_client:
        T_A = np.random.rand(20)
        T_B = np.concatenate(
            (np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64)))
        m = 3
        ref_mp = naive.stump(T_A, m, T_B=T_B)
        comp_mp = stumped(dask_client, T_A, m, T_B, ignore_trivial=False)
        naive.replace_inf(ref_mp)
        naive.replace_inf(comp_mp)
        npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
def test_stumped_one_constant_subsequence_self_join(dask_cluster):
    with Client(dask_cluster) as dask_client:
        T_A = np.concatenate(
            (np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64)))
        m = 3
        zone = int(np.ceil(m / 4))
        left = naive.stump(T_A, m, exclusion_zone=zone)
        right = stumped(dask_client, T_A, m, ignore_trivial=True)
        naive.replace_inf(left)
        naive.replace_inf(right)
        npt.assert_almost_equal(left[:, 0], right[:, 0])  # ignore indices
예제 #15
0
def test_stump_self_join_larger_window_df(T_A, T_B, dask_cluster):
    with Client(dask_cluster) as dask_client:
        for m in [8, 16, 32]:
            if len(T_B) > m:
                zone = int(np.ceil(m / 4))
                left = naive.stump(T_B, m, exclusion_zone=zone)
                right = stumped(dask_client, pd.Series(T_B), m, ignore_trivial=True)
                naive.replace_inf(left)
                naive.replace_inf(right)

                npt.assert_almost_equal(left, right)
예제 #16
0
def test_stumped_one_constant_subsequence_self_join_df(dask_cluster):
    with Client(dask_cluster) as dask_client:
        T_A = np.concatenate(
            (np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64)))
        m = 3
        zone = int(np.ceil(m / 4))
        ref_mp = naive.stump(T_A, m, exclusion_zone=zone)
        comp_mp = stumped(dask_client, pd.Series(T_A), m, ignore_trivial=True)
        naive.replace_inf(ref_mp)
        naive.replace_inf(comp_mp)
        npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0])  # ignore indices
예제 #17
0
def test_mpdist_vect(T_A, T_B):
    m = 3
    n_A = T_A.shape[0]
    n_B = T_B.shape[0]
    j = n_A - m + 1  # `k` is reserved for `P_ABBA` selection
    P_ABBA = np.empty(2 * j, dtype=np.float64)
    ref_mpdist_vect = np.empty(n_B - n_A + 1)

    percentage = 0.05
    k = min(math.ceil(percentage * (2 * n_A)), 2 * j - 1)
    k = min(int(k), P_ABBA.shape[0] - 1)

    for i in range(n_B - n_A + 1):
        P_ABBA[:j] = naive.stump(T_A, m, T_B[i:i + n_A])[:, 0]
        P_ABBA[j:] = naive.stump(T_B[i:i + n_A], m, T_A)[:, 0]
        P_ABBA.sort()
        ref_mpdist_vect[i] = P_ABBA[k]

    comp_mpdist_vect = _mpdist_vect(T_A, T_B, m)

    npt.assert_almost_equal(ref_mpdist_vect, comp_mpdist_vect)
예제 #18
0
def test_stump_self_join(T_A, T_B):
    m = 3
    zone = int(np.ceil(m / 4))
    left = naive.stump(T_B, m, exclusion_zone=zone)
    right = stump(T_B, m, ignore_trivial=True)
    naive.replace_inf(left)
    naive.replace_inf(right)
    npt.assert_almost_equal(left, right)

    right = stump(pd.Series(T_B), m, ignore_trivial=True)
    naive.replace_inf(right)
    npt.assert_almost_equal(left, right)
예제 #19
0
def test_stump_self_join(T_A, T_B):
    m = 3
    zone = int(np.ceil(m / 4))
    ref_mp = naive.stump(T_B, m, exclusion_zone=zone)
    comp_mp = stump(T_B, m, ignore_trivial=True)
    naive.replace_inf(ref_mp)
    naive.replace_inf(comp_mp)
    npt.assert_almost_equal(ref_mp, comp_mp)

    comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True)
    naive.replace_inf(comp_mp)
    npt.assert_almost_equal(ref_mp, comp_mp)
예제 #20
0
def test_stumped_nan_zero_mean_self_join(dask_cluster):
    with Client(dask_cluster) as dask_client:
        T = np.array([-1, 0, 1, np.inf, 1, 0, -1])
        m = 3

        zone = int(np.ceil(m / 4))
        ref_mp = naive.stump(T, m, exclusion_zone=zone)
        comp_mp = stumped(dask_client, T, m, ignore_trivial=True)

        naive.replace_inf(ref_mp)
        naive.replace_inf(comp_mp)
        npt.assert_almost_equal(ref_mp, comp_mp)
def test_stumped_one_subsequence_inf_A_B_join(T_A, T_B,
                                              substitution_location_B,
                                              dask_cluster):
    with Client(dask_cluster) as dask_client:
        m = 3

        T_B_sub = T_B.copy()
        T_B_sub[substitution_location_B] = np.inf

        left = naive.stump(T_A, m, T_B=T_B_sub)
        right = stumped(dask_client, T_A, m, T_B_sub, ignore_trivial=False)
        naive.replace_inf(left)
        naive.replace_inf(right)
        npt.assert_almost_equal(left, right)
예제 #22
0
def test_stumped_one_subsequence_nan_A_B_join(T_A, T_B,
                                              substitution_location_B,
                                              dask_cluster):
    with Client(dask_cluster) as dask_client:
        m = 3

        T_B_sub = T_B.copy()
        T_B_sub[substitution_location_B] = np.nan

        ref_mp = naive.stump(T_A, m, T_B=T_B_sub)
        comp_mp = stumped(dask_client, T_A, m, T_B_sub, ignore_trivial=False)
        naive.replace_inf(ref_mp)
        naive.replace_inf(comp_mp)
        npt.assert_almost_equal(ref_mp, comp_mp)
예제 #23
0
def test_stump_constant_subsequence_self_join():
    T_A = np.concatenate(
        (np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64)))
    m = 3
    zone = int(np.ceil(m / 4))
    left = naive.stump(T_A, m, exclusion_zone=zone)
    right = stump(T_A, m, ignore_trivial=True)
    naive.replace_inf(left)
    naive.replace_inf(right)
    npt.assert_almost_equal(left[:, 0], right[:, 0])  # ignore indices

    right = stump(pd.Series(T_A), m, ignore_trivial=True)
    naive.replace_inf(right)
    npt.assert_almost_equal(left[:, 0], right[:, 0])  # ignore indices
def test_stumped_one_subsequence_inf_self_join(T_A, T_B,
                                               substitution_location_B,
                                               dask_cluster):
    with Client(dask_cluster) as dask_client:
        m = 3

        T_B_sub = T_B.copy()
        T_B_sub[substitution_location_B] = np.inf

        zone = int(np.ceil(m / 4))
        left = naive.stump(T_B_sub, m, exclusion_zone=zone)
        right = stumped(dask_client, T_B_sub, m, ignore_trivial=True)
        naive.replace_inf(left)
        naive.replace_inf(right)
        npt.assert_almost_equal(left, right)
def test_stumped_one_constant_subsequence_A_B_join_df(dask_cluster):
    with Client(dask_cluster) as dask_client:
        T_A = np.random.rand(20)
        T_B = np.concatenate(
            (np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64)))
        m = 3
        left = naive.stump(T_A, m, T_B=T_B)
        right = stumped(dask_client,
                        pd.Series(T_A),
                        m,
                        pd.Series(T_B),
                        ignore_trivial=False)
        naive.replace_inf(left)
        naive.replace_inf(right)
        npt.assert_almost_equal(left[:, 0], right[:, 0])  # ignore indices
예제 #26
0
def test_stumped_one_subsequence_nan_self_join(T_A, T_B,
                                               substitution_location_B,
                                               dask_cluster):
    with Client(dask_cluster) as dask_client:
        m = 3

        T_B_sub = T_B.copy()
        T_B_sub[substitution_location_B] = np.nan

        zone = int(np.ceil(m / 4))
        ref_mp = naive.stump(T_B_sub, m, exclusion_zone=zone)
        comp_mp = stumped(dask_client, T_B_sub, m, ignore_trivial=True)
        naive.replace_inf(ref_mp)
        naive.replace_inf(comp_mp)
        npt.assert_almost_equal(ref_mp, comp_mp)
예제 #27
0
def test_stumped_identical_subsequence_A_B_join(dask_cluster):
    with Client(dask_cluster) as dask_client:
        identical = np.random.rand(8)
        T_A = np.random.rand(20)
        T_B = np.random.rand(20)
        T_A[1:1 + identical.shape[0]] = identical
        T_B[11:11 + identical.shape[0]] = identical
        m = 3
        ref_mp = naive.stump(T_A, m, T_B=T_B)
        comp_mp = stumped(dask_client, T_A, m, T_B, ignore_trivial=False)
        naive.replace_inf(ref_mp)
        naive.replace_inf(comp_mp)
        npt.assert_almost_equal(
            ref_mp[:, 0], comp_mp[:, 0],
            decimal=config.STUMPY_TEST_PRECISION)  # ignore indices
예제 #28
0
def test_stumped_identical_subsequence_self_join(dask_cluster):
    with Client(dask_cluster) as dask_client:
        identical = np.random.rand(8)
        T_A = np.random.rand(20)
        T_A[1:1 + identical.shape[0]] = identical
        T_A[11:11 + identical.shape[0]] = identical
        m = 3
        zone = int(np.ceil(m / 4))
        ref_mp = naive.stump(T_A, m, exclusion_zone=zone)
        comp_mp = stumped(dask_client, T_A, m, ignore_trivial=True)
        naive.replace_inf(ref_mp)
        naive.replace_inf(comp_mp)
        npt.assert_almost_equal(
            ref_mp[:, 0], comp_mp[:, 0],
            decimal=config.STUMPY_TEST_PRECISION)  # ignore indices
예제 #29
0
def test_stimp_100_percent(T):
    threshold = 0.2
    percentage = 1.0
    min_m = 3
    n = T.shape[0] - min_m + 1

    pan = stimp(
        T,
        min_m=min_m,
        max_m=None,
        step=1,
        percentage=percentage,
        pre_scrump=True,
        # normalize=True,
    )

    for i in range(n):
        pan.update()

    ref_PAN = np.full((pan.M_.shape[0], T.shape[0]), fill_value=np.inf)

    for idx, m in enumerate(pan.M_[:n]):
        zone = int(np.ceil(m / 4))
        ref_mp = naive.stump(T, m, T_B=None, exclusion_zone=zone)
        ref_PAN[pan._bfs_indices[idx], :ref_mp.shape[0]] = ref_mp[:, 0]

    # Compare raw pan
    cmp_PAN = pan._PAN

    naive.replace_inf(ref_PAN)
    naive.replace_inf(cmp_PAN)

    npt.assert_almost_equal(ref_PAN, cmp_PAN)

    # Compare transformed pan
    cmp_pan = pan.PAN_
    ref_pan = naive.transform_pan(pan._PAN, pan._M, threshold,
                                  pan._bfs_indices, pan._n_processed)

    naive.replace_inf(ref_pan)
    naive.replace_inf(cmp_pan)

    npt.assert_almost_equal(ref_pan, cmp_pan)
예제 #30
0
def test_stimp_100_percent(T):
    percentage = 1.0
    min_m = 3
    n = T.shape[0] - min_m + 1

    seed = np.random.randint(100000)

    np.random.seed(seed)
    pmp = stimp(
        T,
        min_m=min_m,
        max_m=None,
        step=1,
        percentage=percentage,
        pre_scrump=True,
        normalize=True,
    )

    for i in range(n):
        pmp.update()

    ref_P = np.full((pmp.M_.shape[0], T.shape[0]), fill_value=np.inf)
    ref_I = np.ones((pmp.M_.shape[0], T.shape[0]), dtype=np.int64) * -1

    np.random.seed(seed)
    for idx, m in enumerate(pmp.M_[:n]):
        zone = int(np.ceil(m / 4))
        ref_mp = naive.stump(T, m, T_B=None, exclusion_zone=zone)
        ref_P[pmp.bfs_indices_[idx], :ref_mp.shape[0]] = ref_mp[:, 0]
        ref_I[pmp.bfs_indices_[idx], :ref_mp.shape[0]] = ref_mp[:, 1]

    comp_P = pmp.P_
    comp_I = pmp.I_

    naive.replace_inf(ref_P)
    naive.replace_inf(ref_I)
    naive.replace_inf(comp_P)
    naive.replace_inf(comp_I)

    npt.assert_almost_equal(ref_P, comp_P)
    npt.assert_almost_equal(ref_I, comp_I)