def test_mass_nan(Q, T): T[1] = np.nan m = Q.shape[0] left = np.linalg.norm(core.z_norm(core.rolling_window(T, m), 1) - core.z_norm(Q), axis=1) left[np.isnan(left)] = np.inf right = core.mass(Q, T) npt.assert_almost_equal(left, right)
def test_calculate_distance_profile(Q, T): m = Q.shape[0] left = np.linalg.norm(core.z_norm(core.rolling_window(T, m), 1) - core.z_norm(Q), axis=1) QT = core.sliding_dot_product(Q, T) μ_Q, σ_Q = core.compute_mean_std(Q, m) M_T, Σ_T = core.compute_mean_std(T, m) right = core.calculate_distance_profile(m, QT, μ_Q.item(0), σ_Q.item(0), M_T, Σ_T) npt.assert_almost_equal(left, right)
def test_mass_absolute_T_inf(Q, T): Q = Q.copy() T = T.copy() T[1] = np.inf m = Q.shape[0] ref = np.linalg.norm(core.rolling_window(T, m) - Q, axis=1) ref[np.isnan(ref)] = np.inf comp = core.mass_absolute(Q, T) npt.assert_almost_equal(ref, comp)
def test_calculate_squared_distance_profile(Q, T): m = Q.shape[0] ref = (np.linalg.norm(core.z_norm(core.rolling_window(T, m), 1) - core.z_norm(Q), axis=1)**2) QT = core.sliding_dot_product(Q, T) μ_Q, σ_Q = core.compute_mean_std(Q, m) M_T, Σ_T = core.compute_mean_std(T, m) comp = core._calculate_squared_distance_profile(m, QT, μ_Q.item(0), σ_Q.item(0), M_T, Σ_T) npt.assert_almost_equal(ref, comp)
def test_welford_nanstd(): T = np.random.rand(64) m = 10 ref_var = np.nanstd(T) comp_var = core.welford_nanstd(T) npt.assert_almost_equal(ref_var, comp_var) ref_var = np.nanstd(core.rolling_window(T, m), axis=1) comp_var = core.welford_nanstd(T, m) npt.assert_almost_equal(ref_var, comp_var)
def aamp(T_A, m, T_B=None): T_A = np.asarray(T_A) T_A = T_A.copy() if T_B is None: T_B = T_A.copy() ignore_trivial = True else: T_B = np.asarray(T_B) T_B = T_B.copy() ignore_trivial = False T_A[np.isinf(T_A)] = np.nan T_B[np.isinf(T_B)] = np.nan rolling_T_A = core.rolling_window(T_A, m) rolling_T_B = core.rolling_window(T_B, m) l = T_B.shape[0] - m + 1 out = np.empty((l, 2), dtype=object) D = cdist(rolling_T_B, rolling_T_A) if ignore_trivial: excl_zone = int(np.ceil(m / 4)) excl_zone_mask = np.tri( D.shape[0], D.shape[0], excl_zone, dtype=np.bool) & ~np.tri( D.shape[0], D.shape[0], -(excl_zone + 1), dtype=np.bool) D[excl_zone_mask] = np.inf D[np.isnan(D)] = np.inf I = D.argmin(axis=1) P = D[np.arange(D.shape[0]), I] I[np.isinf(P)] = -1 out[:, 0] = P out[:, 1] = I return out
def test_one_constant_subsequence_A_B_join(dask_client): T_A = np.random.rand(20) T_B = np.concatenate( (np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64))) m = 3 left = np.array( [utils.naive_mass(Q, T_A, m) for Q in core.rolling_window(T_B, m)], dtype=object) right = stumped(dask_client, T_A, m, T_B, ignore_trivial=False) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices
def test_rolling_isfinite(): a = np.arange(12).astype(np.float64) w = 3 a[1] = np.nan a[5] = np.nan a[9] = np.nan ref = np.all(core.rolling_window(np.isfinite(a), w), axis=1) comp = core.rolling_isfinite(a, w) npt.assert_almost_equal(ref, comp)
def test_stump_self_join(T_A, T_B): m = 3 zone = int(np.ceil(m / 4)) left = np.array([ naive_mass(Q, T_B, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T_B, m)) ], dtype=object) right = stump(T_B, m, ignore_trivial=True) replace_inf(left) replace_inf(right) npt.assert_almost_equal(left, right)
def test_prescrump(T): m = 3 zone = int(np.ceil(m / 4)) left = np.array( [ utils.naive_mass(Q, T, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T, m)) ], dtype=object, ) μ, σ = core.compute_mean_std(T, m) # Note that the below code only works for `s=1` right = prescrump(T, m, μ, σ, s=1)
def test_stump_A_B_join(T_A, T_B): m = 3 left = np.array( [utils.naive_mass(Q, T_A, m) for Q in core.rolling_window(T_B, m)], dtype=object ) right = stump(T_A, m, T_B, ignore_trivial=False) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left, right) right = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False) utils.replace_inf(right) npt.assert_almost_equal(left, right)
def test_stumped_A_B_join_df(T_A, T_B, dask_client): m = 3 left = np.array( [naive_mass(Q, T_A, m) for Q in core.rolling_window(T_B, m)], dtype=object) right = stumped(dask_client, pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False) replace_inf(left) replace_inf(right) npt.assert_almost_equal(left, right)
def test_mass_T_nan(Q, T): Q = Q.copy() T = T.copy() T[1] = np.nan m = Q.shape[0] ref = np.linalg.norm(core.z_norm(core.rolling_window(T, m), 1) - core.z_norm(Q), axis=1) ref[np.isnan(ref)] = np.inf comp = core.mass(Q, T) npt.assert_almost_equal(ref, comp)
def aamp_distance_profile(Q, T, m): T_inf = np.isinf(T) if np.any(T_inf): T = T.copy() T[T_inf] = np.nan Q_inf = np.isinf(Q) if np.any(Q_inf): Q = Q.copy() Q[Q_inf] = np.nan D = np.linalg.norm(core.rolling_window(T, m) - Q, axis=1) return D
def naive_prescrump(T_A, m, T_B, s, exclusion_zone=None): distance_matrix = np.array([ utils.naive_distance_profile(Q, T_B, m) for Q in core.rolling_window(T_A, m) ]) n_A = T_A.shape[0] n_B = T_B.shape[0] l = n_B - m + 1 P = np.empty(l) I = np.empty(l, dtype=np.int64) P[:] = np.inf I[:] = -1 for i in np.random.permutation(range(0, l, s)): zone_start = max(0, i - exclusion_zone) zone_stop = min(l, i + exclusion_zone) distance_profile = distance_matrix[i] distance_profile[zone_start:zone_stop + 1] = np.inf I[i] = np.argmin(distance_profile) P[i] = distance_profile[I[i]] if P[i] == np.inf: I[i] = -1 for j in range(l): if distance_profile[j] < P[j]: P[j] = distance_profile[j] I[j] = i j = I[i] for k in range(1, min(s, l - max(i, j))): d = distance_matrix[i + k, j + k] if d < P[i + k]: P[i + k] = d I[i + k] = j + k if d < P[j + k]: P[j + k] = d I[j + k] = i + k for k in range(1, min(s, i + 1, j + 1)): d = distance_matrix[i - k, j - k] if d < P[i - k]: P[i - k] = d I[i - k] = j - k if d < P[j - k]: P[j - k] = d I[j - k] = i - k return P, I
def test_stumped_self_join_df(T_A, T_B, dask_client): m = 3 zone = int(np.ceil(m / 4)) left = np.array( [ utils.naive_mass(Q, T_B, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T_B, m)) ], dtype=object, ) right = stumped(dask_client, pd.Series(T_B), m, ignore_trivial=True) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left, right)
def naive_right_mp(data, m): mp = stump(data, m) k = mp.shape[0] right_nn = np.zeros((k, m)) right_indices = [np.arange(IR, IR + m) for IR in mp[:, 3].tolist()] right_nn[:] = data[np.array(right_indices)] mp[:, 0] = np.linalg.norm(core.z_norm(core.rolling_window(data, m), 1) - core.z_norm(right_nn, 1), axis=1) inf_indices = np.argwhere(mp[:, 3] < 0).flatten() mp[inf_indices, 0] = np.inf mp[inf_indices, 3] = inf_indices return mp
def test_mass_Q_inf(Q, T): Q = Q.copy() Q[1] = np.inf T = T.copy() m = Q.shape[0] left = np.linalg.norm(core.z_norm(core.rolling_window(T, m), 1) - core.z_norm(Q), axis=1) left[np.isnan(left)] = np.inf right = core.mass(Q, T) npt.assert_almost_equal(left, right) T[1] = 1e10
def naive_mass(Q, T, m, trivial_idx=None, excl_zone=0): D = np.linalg.norm( utils.z_norm(core.rolling_window(T, m), 1) - utils.z_norm(Q), axis=1 ) if trivial_idx is not None: start = max(0, trivial_idx - excl_zone) stop = min(T.shape[0] - Q.shape[0] + 1, trivial_idx + excl_zone) D[start:stop] = np.inf I = np.argmin(D) P = D[I] if P == np.inf: I = -1 return P, I
def test_welford_nanvar_nan(): T = np.random.rand(64) m = 10 T[1] = np.nan T[10] = np.nan T[13:18] = np.nan ref_var = np.nanvar(T) comp_var = core.welford_nanvar(T) npt.assert_almost_equal(ref_var, comp_var) ref_var = np.nanvar(core.rolling_window(T, m), axis=1) comp_var = core.welford_nanvar(T, m) npt.assert_almost_equal(ref_var, comp_var)
def test_stumped_one_subsequence_inf_A_B_join( T_A, T_B, substitution_location_B, dask_client ): m = 3 T_B_sub = T_B.copy() T_B_sub[substitution_location_B] = np.inf left = np.array( [utils.naive_mass(Q, T_A, m) for Q in core.rolling_window(T_B_sub, m)], dtype=object, ) right = stumped(dask_client, T_A, m, T_B_sub, ignore_trivial=False) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left, right)
def test_stump_self_join_larger_window(T_A, T_B, dask_client): for m in [8, 16, 32]: if len(T_B) > m: zone = int(np.ceil(m / 4)) left = np.array( [ utils.naive_mass(Q, T_B, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T_B, m)) ], dtype=object, ) right = stumped(dask_client, T_B, m, ignore_trivial=True) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left, right)
def test_get_multi_QT(T, m): start = 0 Q = core.rolling_window(T, m) left_QT = np.empty((Q.shape[0], Q.shape[1]), dtype="float64") left_QT_first = np.empty((Q.shape[0], Q.shape[1]), dtype="float64") for dim in range(T.shape[0]): left_QT[dim] = naive_rolling_window_dot_product( T[dim, start:start + m], T[dim]) left_QT_first[dim] = naive_rolling_window_dot_product( T[dim, :m], T[dim]) right_QT, right_QT_first = _get_multi_QT(start, T, m) npt.assert_almost_equal(left_QT, right_QT) npt.assert_almost_equal(left_QT_first, right_QT_first)
def test_one_constant_subsequence_self_join_df(dask_client): T_A = np.concatenate( (np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64))) m = 3 zone = int(np.ceil(m / 4)) left = np.array( [ utils.naive_mass(Q, T_A, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T_A, m)) ], dtype=object, ) right = stumped(dask_client, pd.Series(T_A), m, ignore_trivial=True) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices
def naive_mass(Q, T, m, trivial_idx=None, excl_zone=0, ignore_trivial=False): T = T.copy() Q = Q.copy() T[np.isinf(T)] = np.nan Q[np.isinf(Q)] = np.nan D = np.linalg.norm(z_norm(core.rolling_window(T, m), 1) - z_norm(Q), axis=1) if ignore_trivial: start = max(0, trivial_idx - excl_zone) stop = min(T.shape[0] - Q.shape[0] + 1, trivial_idx + excl_zone) D[start:stop + 1] = np.inf D[np.isnan(D)] = np.inf I = np.argmin(D) P = D[I] if P == np.inf: I = -1 # Get left and right matrix profiles for self-joins if ignore_trivial and trivial_idx > 0: PL = np.inf IL = -1 for i in range(trivial_idx): if D[i] < PL: IL = i PL = D[i] if start <= IL < stop: IL = -1 else: IL = -1 if ignore_trivial and trivial_idx + 1 < D.shape[0]: PR = np.inf IR = -1 for i in range(trivial_idx + 1, D.shape[0]): if D[i] < PR: IR = i PR = D[i] if start <= IR < stop: IR = -1 else: IR = -1 return P, I, IL, IR
def test_stamp_nan_zero_mean_self_join(): T = np.array([-1, 0, 1, np.inf, 1, 0, -1]) m = 3 zone = int(np.ceil(m / 2)) left = np.array( [ utils.naive_mass(Q, T, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T, m)) ], dtype=object, ) right = stamp.stamp(T, T, m, ignore_trivial=True) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left[:, :2], right)
def test_scrump_nan_zero_mean_self_join(): T = np.array([-1, 0, 1, np.inf, 1, 0, -1]) m = 3 zone = int(np.ceil(m / 4)) left = np.array( [ utils.naive_mass(Q, T, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T, m)) ], dtype=object, ) for right in scrump(T, m): continue utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0])
def naive_multi_mass(Q, T, m): d, n = T.shape D = np.empty((d, n - m + 1)) for i in range(d): D[i] = np.linalg.norm(z_norm(core.rolling_window(T[i], m), 1) - z_norm(Q[i]), axis=1) D = np.sort(D, axis=0) D_prime = np.zeros(n - m + 1) D_prime_prime = np.zeros((d, n - m + 1)) for i in range(d): D_prime[:] = D_prime + D[i] D_prime_prime[i, :] = D_prime / (i + 1) return D_prime_prime
def test_stumped_nan_zero_mean_self_join(dask_cluster): with Client(dask_cluster) as dask_client: T = np.array([-1, 0, 1, np.inf, 1, 0, -1]) m = 3 zone = int(np.ceil(m / 4)) left = np.array( [ utils.naive_mass(Q, T, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T, m)) ], dtype=object, ) right = stumped(dask_client, T, m, ignore_trivial=True) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left, right)
def test_scrimp_self_join(T): m = 3 zone = int(np.ceil(m / 4)) left = np.array( [ utils.naive_mass(Q, T, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T, m)) ], dtype=object, ) right = scrimp(T, m, percentage=1.0) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) right = scrimp(pd.Series(T), m, percentage=1.0) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0])