def test_aampi_profile_index_match(): T_full = np.random.rand(64) m = 3 T_full_subseq = core.rolling_window(T_full, m) warm_start = 8 T_stream = T_full[:warm_start].copy() stream = aampi(T_stream, m, egress=True) P = np.full(stream.P_.shape, np.inf) left_P = np.full(stream.left_P_.shape, np.inf) n = 0 for i in range(len(T_stream), len(T_full)): t = T_full[i] stream.update(t) P[:] = np.inf idx = np.argwhere(stream.I_ >= 0).flatten() P[idx] = naive.distance(T_full_subseq[idx + n + 1], T_full_subseq[stream.I_[idx]], axis=1) left_P[:] = np.inf idx = np.argwhere(stream.left_I_ >= 0).flatten() left_P[idx] = naive.distance(T_full_subseq[idx + n + 1], T_full_subseq[stream.left_I_[idx]], axis=1) npt.assert_almost_equal(stream.P_, P) npt.assert_almost_equal(stream.left_P_, left_P) n += 1
def test_motifs_two_motifs(): # Fix seed, because in some case motifs can be off by an index resulting in test # fails, which is caused since one of the motifs is not repeated perfectly in T. np.random.seed(1234) # The time series is random noise with two motifs for m=10: # * (almost) identical step functions at indices 10, 110 and 210 # * identical linear slopes at indices 70 and 170 T = np.random.normal(size=300) m = 20 T[10:30] = 1 T[12:28] = 2 T[110:130] = 3 T[112:128] = 6 T[120] = 6.6 T[210:230] = 1 T[212:228] = 2 T[220] = 1.9 # naive.distance(naive.z_norm(T[10:30]), naive.z_norm(T[110:130])) = 0.47 # naive.distance(naive.z_norm(T[10:30]), naive.z_norm(T[210:230])) = 0.24 # naive.distance(naive.z_norm(T[110:130]), naive.z_norm(T[210:230])) = 0.72 # Hence T[10:30] is the motif representative for this motif T[70:90] = np.arange(m) * 0.1 T[170:190] = np.arange(m) * 0.1 # naive.distance(naive.z_norm(T[70:90]), naive.z_norm(T[170:190])) = 0.0 max_motifs = 2 mp = naive.stump(T, m) # left_indices = [[70, 170, -1], [10, 210, 110]] left_profile_values = [ [0.0, 0.0, np.nan], [ 0.0, naive.distance(core.z_norm(T[10:30]), core.z_norm(T[210:230])), naive.distance(core.z_norm(T[10:30]), core.z_norm(T[110:130])), ], ] right_distance_values, right_indices = motifs( T, mp[:, 0], max_motifs=max_motifs, max_distance=0.5, cutoff=np.inf, ) # We ignore indices because of sorting ambiguities for equal distances. # As long as the distances are correct, the indices will be too. npt.assert_almost_equal(left_profile_values, right_distance_values, decimal=6) # Reset seed np.random.seed(None)
def test_naive_match_exclusion_zone(): # The query appears as a perfect match at location 1 and as very close matches # (z-normalized distance of 0.05) at location 0, 5 and 9. # However, since we apply an exclusion zone, the match at index 0 is ignored T = np.array( [0.1, 1.0, 2.0, 3.0, -1.0, 0.1, 1.0, 2.0, -0.5, 0.2, 2.0, 4.0]) Q = np.array([0.0, 1.0, 2.0]) m = Q.shape[0] excl_zone = int(np.ceil(m / 4)) left = [ [0, 1], [naive.distance(core.z_norm(Q), core.z_norm(T[5:5 + m])), 5], [naive.distance(core.z_norm(Q), core.z_norm(T[9:9 + m])), 9], ] right = list(naive_match( Q, T, excl_zone=excl_zone, max_distance=0.1, )) # To avoid sorting errors we first sort based on disance and then based on indices right.sort(key=lambda x: (x[1], x[0])) npt.assert_almost_equal(left, right)
def test_motifs_max_matches(): # This test covers the following: # A time series contains motif A at four locations and motif B at two. # If `max_motifs=2` the result should contain only the top two matches of motif A # and the top two matches of motif B as two separate motifs. T = np.array([ 0.0, # motif A 1.0, 0.0, 2.3, -1.0, # motif B -1.0, -2.0, 0.0, # motif A 1.0, 0.0, -2.0, -1.0, # motif B -1.03, -2.0, -0.5, 2.0, # motif A 3.0, 2.04, 2.3, 2.0, # motif A 3.0, 2.02, ]) m = 3 max_motifs = 3 left_indices = [[0, 7], [4, 11]] left_profile_values = [ [0.0, 0.0], [ 0.0, naive.distance( core.z_norm(T[left_indices[1][0]:left_indices[1][0] + m]), core.z_norm(T[left_indices[1][1]:left_indices[1][1] + m]), ), ], ] mp = naive.stump(T, m) right_distance_values, right_indices = motifs( T, mp[:, 0], max_motifs=max_motifs, max_distance=0.1, cutoff=np.inf, max_matches=2, ) # We ignore indices because of sorting ambiguities for equal distances. # As long as the distances are correct, the indices will be too. npt.assert_almost_equal(left_profile_values, right_distance_values, decimal=4)
def test_aamp_naive_match_exclusion_zone(): # The query appears as a perfect match at location 1 and as very close matches # (z-normalized distance of 0.05) at location 0 and 7 (at index 11, the query is # not matched in the aamp case). # However, since we apply an exclusion zone, the match at index 0 is ignored T = np.array([ 0.1, 1.0, 2.0, 0.0, 1.0, 2.0, -1.0, 0.1, 1.0, 2.0, -0.5, 0.2, 2.0, 4.0 ]) Q = np.array([0.0, 1.0, 2.0]) m = Q.shape[0] # Extra large exclusion zone to exclude the first almost perfect match excl_zone = m left = [ [0, 3], [naive.distance(Q, T[7:7 + m]), 7], ] right = list( naive_aamp_match( Q, T, excl_zone=excl_zone, max_distance=0.2, )) # To avoid sorting errors we first sort based on disance and then based on indices right.sort(key=lambda x: (x[0], x[1])) npt.assert_almost_equal(left, right)
def naive_idx_to_mp(I, T, m, normalize=True): I = I.astype(np.int64) T = T.copy() T_isfinite = np.isfinite(T) T_subseqs_isfinite = np.all(core.rolling_window(T_isfinite, m), axis=1) T[~T_isfinite] = 0.0 T_subseqs = core.rolling_window(T, m) nn_subseqs = T_subseqs[I] if normalize: P = naive.distance(naive.z_norm(T_subseqs, axis=1), naive.z_norm(nn_subseqs, axis=1), axis=1) else: P = naive.distance(T_subseqs, nn_subseqs, axis=1) P[~T_subseqs_isfinite] = np.inf P[I < 0] = np.inf return P