Example #1
0
def test_mass_nan(Q, T):
    T[1] = np.nan
    m = Q.shape[0]

    left = np.linalg.norm(core.z_norm(core.rolling_window(T, m), 1) -
                          core.z_norm(Q),
                          axis=1)
    left[np.isnan(left)] = np.inf

    right = core.mass(Q, T)
    npt.assert_almost_equal(left, right)
Example #2
0
def test_calculate_distance_profile(Q, T):
    m = Q.shape[0]
    left = np.linalg.norm(core.z_norm(core.rolling_window(T, m), 1) -
                          core.z_norm(Q),
                          axis=1)
    QT = core.sliding_dot_product(Q, T)
    μ_Q, σ_Q = core.compute_mean_std(Q, m)
    M_T, Σ_T = core.compute_mean_std(T, m)
    right = core.calculate_distance_profile(m, QT, μ_Q.item(0), σ_Q.item(0),
                                            M_T, Σ_T)
    npt.assert_almost_equal(left, right)
Example #3
0
def test_mass_absolute_T_inf(Q, T):
    Q = Q.copy()
    T = T.copy()
    T[1] = np.inf
    m = Q.shape[0]

    ref = np.linalg.norm(core.rolling_window(T, m) - Q, axis=1)
    ref[np.isnan(ref)] = np.inf

    comp = core.mass_absolute(Q, T)
    npt.assert_almost_equal(ref, comp)
Example #4
0
def test_calculate_squared_distance_profile(Q, T):
    m = Q.shape[0]
    ref = (np.linalg.norm(core.z_norm(core.rolling_window(T, m), 1) -
                          core.z_norm(Q),
                          axis=1)**2)
    QT = core.sliding_dot_product(Q, T)
    μ_Q, σ_Q = core.compute_mean_std(Q, m)
    M_T, Σ_T = core.compute_mean_std(T, m)
    comp = core._calculate_squared_distance_profile(m, QT, μ_Q.item(0),
                                                    σ_Q.item(0), M_T, Σ_T)
    npt.assert_almost_equal(ref, comp)
Example #5
0
def test_welford_nanstd():
    T = np.random.rand(64)
    m = 10

    ref_var = np.nanstd(T)
    comp_var = core.welford_nanstd(T)
    npt.assert_almost_equal(ref_var, comp_var)

    ref_var = np.nanstd(core.rolling_window(T, m), axis=1)
    comp_var = core.welford_nanstd(T, m)
    npt.assert_almost_equal(ref_var, comp_var)
Example #6
0
def aamp(T_A, m, T_B=None):
    T_A = np.asarray(T_A)
    T_A = T_A.copy()

    if T_B is None:
        T_B = T_A.copy()
        ignore_trivial = True
    else:
        T_B = np.asarray(T_B)
        T_B = T_B.copy()
        ignore_trivial = False

    T_A[np.isinf(T_A)] = np.nan
    T_B[np.isinf(T_B)] = np.nan

    rolling_T_A = core.rolling_window(T_A, m)
    rolling_T_B = core.rolling_window(T_B, m)

    l = T_B.shape[0] - m + 1
    out = np.empty((l, 2), dtype=object)

    D = cdist(rolling_T_B, rolling_T_A)

    if ignore_trivial:
        excl_zone = int(np.ceil(m / 4))
        excl_zone_mask = np.tri(
            D.shape[0], D.shape[0], excl_zone, dtype=np.bool) & ~np.tri(
                D.shape[0], D.shape[0], -(excl_zone + 1), dtype=np.bool)
        D[excl_zone_mask] = np.inf

    D[np.isnan(D)] = np.inf

    I = D.argmin(axis=1)
    P = D[np.arange(D.shape[0]), I]

    I[np.isinf(P)] = -1

    out[:, 0] = P
    out[:, 1] = I

    return out
Example #7
0
def test_one_constant_subsequence_A_B_join(dask_client):
    T_A = np.random.rand(20)
    T_B = np.concatenate(
        (np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64)))
    m = 3
    left = np.array(
        [utils.naive_mass(Q, T_A, m) for Q in core.rolling_window(T_B, m)],
        dtype=object)
    right = stumped(dask_client, T_A, m, T_B, ignore_trivial=False)
    utils.replace_inf(left)
    utils.replace_inf(right)
    npt.assert_almost_equal(left[:, 0], right[:, 0])  # ignore indices
Example #8
0
def test_rolling_isfinite():
    a = np.arange(12).astype(np.float64)
    w = 3

    a[1] = np.nan
    a[5] = np.nan
    a[9] = np.nan

    ref = np.all(core.rolling_window(np.isfinite(a), w), axis=1)
    comp = core.rolling_isfinite(a, w)

    npt.assert_almost_equal(ref, comp)
Example #9
0
def test_stump_self_join(T_A, T_B):
    m = 3
    zone = int(np.ceil(m / 4))
    left = np.array([
        naive_mass(Q, T_B, m, i, zone, True)
        for i, Q in enumerate(core.rolling_window(T_B, m))
    ],
                    dtype=object)
    right = stump(T_B, m, ignore_trivial=True)
    replace_inf(left)
    replace_inf(right)
    npt.assert_almost_equal(left, right)
Example #10
0
def test_prescrump(T):
    m = 3
    zone = int(np.ceil(m / 4))
    left = np.array(
        [
            utils.naive_mass(Q, T, m, i, zone, True)
            for i, Q in enumerate(core.rolling_window(T, m))
        ],
        dtype=object,
    )
    μ, σ = core.compute_mean_std(T, m)
    # Note that the below code only works for `s=1`
    right = prescrump(T, m, μ, σ, s=1)
Example #11
0
def test_stump_A_B_join(T_A, T_B):
    m = 3
    left = np.array(
        [utils.naive_mass(Q, T_A, m) for Q in core.rolling_window(T_B, m)], dtype=object
    )
    right = stump(T_A, m, T_B, ignore_trivial=False)
    utils.replace_inf(left)
    utils.replace_inf(right)
    npt.assert_almost_equal(left, right)

    right = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False)
    utils.replace_inf(right)
    npt.assert_almost_equal(left, right)
Example #12
0
def test_stumped_A_B_join_df(T_A, T_B, dask_client):
    m = 3
    left = np.array(
        [naive_mass(Q, T_A, m) for Q in core.rolling_window(T_B, m)],
        dtype=object)
    right = stumped(dask_client,
                    pd.Series(T_A),
                    m,
                    pd.Series(T_B),
                    ignore_trivial=False)
    replace_inf(left)
    replace_inf(right)
    npt.assert_almost_equal(left, right)
Example #13
0
def test_mass_T_nan(Q, T):
    Q = Q.copy()
    T = T.copy()
    T[1] = np.nan
    m = Q.shape[0]

    ref = np.linalg.norm(core.z_norm(core.rolling_window(T, m), 1) -
                         core.z_norm(Q),
                         axis=1)
    ref[np.isnan(ref)] = np.inf

    comp = core.mass(Q, T)
    npt.assert_almost_equal(ref, comp)
Example #14
0
def aamp_distance_profile(Q, T, m):
    T_inf = np.isinf(T)
    if np.any(T_inf):
        T = T.copy()
        T[T_inf] = np.nan

    Q_inf = np.isinf(Q)
    if np.any(Q_inf):
        Q = Q.copy()
        Q[Q_inf] = np.nan

    D = np.linalg.norm(core.rolling_window(T, m) - Q, axis=1)

    return D
Example #15
0
def naive_prescrump(T_A, m, T_B, s, exclusion_zone=None):
    distance_matrix = np.array([
        utils.naive_distance_profile(Q, T_B, m)
        for Q in core.rolling_window(T_A, m)
    ])

    n_A = T_A.shape[0]
    n_B = T_B.shape[0]
    l = n_B - m + 1

    P = np.empty(l)
    I = np.empty(l, dtype=np.int64)
    P[:] = np.inf
    I[:] = -1

    for i in np.random.permutation(range(0, l, s)):
        zone_start = max(0, i - exclusion_zone)
        zone_stop = min(l, i + exclusion_zone)
        distance_profile = distance_matrix[i]
        distance_profile[zone_start:zone_stop + 1] = np.inf
        I[i] = np.argmin(distance_profile)
        P[i] = distance_profile[I[i]]
        if P[i] == np.inf:
            I[i] = -1

        for j in range(l):
            if distance_profile[j] < P[j]:
                P[j] = distance_profile[j]
                I[j] = i

        j = I[i]
        for k in range(1, min(s, l - max(i, j))):
            d = distance_matrix[i + k, j + k]
            if d < P[i + k]:
                P[i + k] = d
                I[i + k] = j + k
            if d < P[j + k]:
                P[j + k] = d
                I[j + k] = i + k

        for k in range(1, min(s, i + 1, j + 1)):
            d = distance_matrix[i - k, j - k]
            if d < P[i - k]:
                P[i - k] = d
                I[i - k] = j - k
            if d < P[j - k]:
                P[j - k] = d
                I[j - k] = i - k

    return P, I
Example #16
0
def test_stumped_self_join_df(T_A, T_B, dask_client):
    m = 3
    zone = int(np.ceil(m / 4))
    left = np.array(
        [
            utils.naive_mass(Q, T_B, m, i, zone, True)
            for i, Q in enumerate(core.rolling_window(T_B, m))
        ],
        dtype=object,
    )
    right = stumped(dask_client, pd.Series(T_B), m, ignore_trivial=True)
    utils.replace_inf(left)
    utils.replace_inf(right)
    npt.assert_almost_equal(left, right)
Example #17
0
def naive_right_mp(data, m):
    mp = stump(data, m)
    k = mp.shape[0]
    right_nn = np.zeros((k, m))
    right_indices = [np.arange(IR, IR + m) for IR in mp[:, 3].tolist()]
    right_nn[:] = data[np.array(right_indices)]
    mp[:, 0] = np.linalg.norm(core.z_norm(core.rolling_window(data, m), 1) -
                              core.z_norm(right_nn, 1),
                              axis=1)
    inf_indices = np.argwhere(mp[:, 3] < 0).flatten()
    mp[inf_indices, 0] = np.inf
    mp[inf_indices, 3] = inf_indices

    return mp
Example #18
0
def test_mass_Q_inf(Q, T):
    Q = Q.copy()
    Q[1] = np.inf
    T = T.copy()
    m = Q.shape[0]

    left = np.linalg.norm(core.z_norm(core.rolling_window(T, m), 1) -
                          core.z_norm(Q),
                          axis=1)
    left[np.isnan(left)] = np.inf

    right = core.mass(Q, T)
    npt.assert_almost_equal(left, right)
    T[1] = 1e10
Example #19
0
def naive_mass(Q, T, m, trivial_idx=None, excl_zone=0):
    D = np.linalg.norm(
        utils.z_norm(core.rolling_window(T, m), 1) - utils.z_norm(Q), axis=1
    )
    if trivial_idx is not None:
        start = max(0, trivial_idx - excl_zone)
        stop = min(T.shape[0] - Q.shape[0] + 1, trivial_idx + excl_zone)
        D[start:stop] = np.inf
    I = np.argmin(D)
    P = D[I]

    if P == np.inf:
        I = -1

    return P, I
Example #20
0
def test_welford_nanvar_nan():
    T = np.random.rand(64)
    m = 10

    T[1] = np.nan
    T[10] = np.nan
    T[13:18] = np.nan

    ref_var = np.nanvar(T)
    comp_var = core.welford_nanvar(T)
    npt.assert_almost_equal(ref_var, comp_var)

    ref_var = np.nanvar(core.rolling_window(T, m), axis=1)
    comp_var = core.welford_nanvar(T, m)
    npt.assert_almost_equal(ref_var, comp_var)
def test_stumped_one_subsequence_inf_A_B_join(
    T_A, T_B, substitution_location_B, dask_client
):
    m = 3

    T_B_sub = T_B.copy()
    T_B_sub[substitution_location_B] = np.inf

    left = np.array(
        [utils.naive_mass(Q, T_A, m) for Q in core.rolling_window(T_B_sub, m)],
        dtype=object,
    )
    right = stumped(dask_client, T_A, m, T_B_sub, ignore_trivial=False)
    utils.replace_inf(left)
    utils.replace_inf(right)
    npt.assert_almost_equal(left, right)
Example #22
0
def test_stump_self_join_larger_window(T_A, T_B, dask_client):
    for m in [8, 16, 32]:
        if len(T_B) > m:
            zone = int(np.ceil(m / 4))
            left = np.array(
                [
                    utils.naive_mass(Q, T_B, m, i, zone, True)
                    for i, Q in enumerate(core.rolling_window(T_B, m))
                ],
                dtype=object,
            )
            right = stumped(dask_client, T_B, m, ignore_trivial=True)
            utils.replace_inf(left)
            utils.replace_inf(right)

            npt.assert_almost_equal(left, right)
Example #23
0
def test_get_multi_QT(T, m):
    start = 0
    Q = core.rolling_window(T, m)
    left_QT = np.empty((Q.shape[0], Q.shape[1]), dtype="float64")
    left_QT_first = np.empty((Q.shape[0], Q.shape[1]), dtype="float64")

    for dim in range(T.shape[0]):
        left_QT[dim] = naive_rolling_window_dot_product(
            T[dim, start:start + m], T[dim])
        left_QT_first[dim] = naive_rolling_window_dot_product(
            T[dim, :m], T[dim])

    right_QT, right_QT_first = _get_multi_QT(start, T, m)

    npt.assert_almost_equal(left_QT, right_QT)
    npt.assert_almost_equal(left_QT_first, right_QT_first)
Example #24
0
def test_one_constant_subsequence_self_join_df(dask_client):
    T_A = np.concatenate(
        (np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64)))
    m = 3
    zone = int(np.ceil(m / 4))
    left = np.array(
        [
            utils.naive_mass(Q, T_A, m, i, zone, True)
            for i, Q in enumerate(core.rolling_window(T_A, m))
        ],
        dtype=object,
    )
    right = stumped(dask_client, pd.Series(T_A), m, ignore_trivial=True)
    utils.replace_inf(left)
    utils.replace_inf(right)
    npt.assert_almost_equal(left[:, 0], right[:, 0])  # ignore indices
Example #25
0
def naive_mass(Q, T, m, trivial_idx=None, excl_zone=0, ignore_trivial=False):
    T = T.copy()
    Q = Q.copy()

    T[np.isinf(T)] = np.nan
    Q[np.isinf(Q)] = np.nan

    D = np.linalg.norm(z_norm(core.rolling_window(T, m), 1) - z_norm(Q),
                       axis=1)
    if ignore_trivial:
        start = max(0, trivial_idx - excl_zone)
        stop = min(T.shape[0] - Q.shape[0] + 1, trivial_idx + excl_zone)
        D[start:stop + 1] = np.inf
    D[np.isnan(D)] = np.inf

    I = np.argmin(D)
    P = D[I]

    if P == np.inf:
        I = -1

    # Get left and right matrix profiles for self-joins
    if ignore_trivial and trivial_idx > 0:
        PL = np.inf
        IL = -1
        for i in range(trivial_idx):
            if D[i] < PL:
                IL = i
                PL = D[i]
        if start <= IL < stop:
            IL = -1
    else:
        IL = -1

    if ignore_trivial and trivial_idx + 1 < D.shape[0]:
        PR = np.inf
        IR = -1
        for i in range(trivial_idx + 1, D.shape[0]):
            if D[i] < PR:
                IR = i
                PR = D[i]
        if start <= IR < stop:
            IR = -1
    else:
        IR = -1

    return P, I, IL, IR
Example #26
0
def test_stamp_nan_zero_mean_self_join():
    T = np.array([-1, 0, 1, np.inf, 1, 0, -1])
    m = 3

    zone = int(np.ceil(m / 2))
    left = np.array(
        [
            utils.naive_mass(Q, T, m, i, zone, True)
            for i, Q in enumerate(core.rolling_window(T, m))
        ],
        dtype=object,
    )
    right = stamp.stamp(T, T, m, ignore_trivial=True)

    utils.replace_inf(left)
    utils.replace_inf(right)
    npt.assert_almost_equal(left[:, :2], right)
Example #27
0
def test_scrump_nan_zero_mean_self_join():
    T = np.array([-1, 0, 1, np.inf, 1, 0, -1])
    m = 3

    zone = int(np.ceil(m / 4))
    left = np.array(
        [
            utils.naive_mass(Q, T, m, i, zone, True)
            for i, Q in enumerate(core.rolling_window(T, m))
        ],
        dtype=object,
    )
    for right in scrump(T, m):
        continue

    utils.replace_inf(left)
    utils.replace_inf(right)
    npt.assert_almost_equal(left[:, 0], right[:, 0])
Example #28
0
def naive_multi_mass(Q, T, m):
    d, n = T.shape

    D = np.empty((d, n - m + 1))
    for i in range(d):
        D[i] = np.linalg.norm(z_norm(core.rolling_window(T[i], m), 1) -
                              z_norm(Q[i]),
                              axis=1)

    D = np.sort(D, axis=0)

    D_prime = np.zeros(n - m + 1)
    D_prime_prime = np.zeros((d, n - m + 1))
    for i in range(d):
        D_prime[:] = D_prime + D[i]
        D_prime_prime[i, :] = D_prime / (i + 1)

    return D_prime_prime
def test_stumped_nan_zero_mean_self_join(dask_cluster):
    with Client(dask_cluster) as dask_client:
        T = np.array([-1, 0, 1, np.inf, 1, 0, -1])
        m = 3

        zone = int(np.ceil(m / 4))
        left = np.array(
            [
                utils.naive_mass(Q, T, m, i, zone, True)
                for i, Q in enumerate(core.rolling_window(T, m))
            ],
            dtype=object,
        )
        right = stumped(dask_client, T, m, ignore_trivial=True)

        utils.replace_inf(left)
        utils.replace_inf(right)
        npt.assert_almost_equal(left, right)
Example #30
0
def test_scrimp_self_join(T):
    m = 3
    zone = int(np.ceil(m / 4))
    left = np.array(
        [
            utils.naive_mass(Q, T, m, i, zone, True)
            for i, Q in enumerate(core.rolling_window(T, m))
        ],
        dtype=object,
    )
    right = scrimp(T, m, percentage=1.0)
    utils.replace_inf(left)
    utils.replace_inf(right)
    npt.assert_almost_equal(left[:, 0], right[:, 0])

    right = scrimp(pd.Series(T), m, percentage=1.0)
    utils.replace_inf(right)
    npt.assert_almost_equal(left[:, 0], right[:, 0])