def test_compute_P_ABBA(T_A, T_B): m = 3 n_A = T_A.shape[0] n_B = T_B.shape[0] ref_P_ABBA = np.empty(n_A - m + 1 + n_B - m + 1, dtype=np.float64) comp_P_ABBA = np.empty(n_A - m + 1 + n_B - m + 1, dtype=np.float64) ref_P_ABBA[:n_A - m + 1] = naive.stump(T_A, m, T_B)[:, 0] ref_P_ABBA[n_A - m + 1:] = naive.stump(T_B, m, T_A)[:, 0] _compute_P_ABBA(T_A, T_B, m, comp_P_ABBA) npt.assert_almost_equal(ref_P_ABBA, comp_P_ABBA)
def test_motifs_two_motifs(): # Fix seed, because in some case motifs can be off by an index resulting in test # fails, which is caused since one of the motifs is not repeated perfectly in T. np.random.seed(1234) # The time series is random noise with two motifs for m=10: # * (almost) identical step functions at indices 10, 110 and 210 # * identical linear slopes at indices 70 and 170 T = np.random.normal(size=300) m = 20 T[10:30] = 1 T[12:28] = 2 T[110:130] = 3 T[112:128] = 6 T[120] = 6.6 T[210:230] = 1 T[212:228] = 2 T[220] = 1.9 # naive.distance(naive.z_norm(T[10:30]), naive.z_norm(T[110:130])) = 0.47 # naive.distance(naive.z_norm(T[10:30]), naive.z_norm(T[210:230])) = 0.24 # naive.distance(naive.z_norm(T[110:130]), naive.z_norm(T[210:230])) = 0.72 # Hence T[10:30] is the motif representative for this motif T[70:90] = np.arange(m) * 0.1 T[170:190] = np.arange(m) * 0.1 # naive.distance(naive.z_norm(T[70:90]), naive.z_norm(T[170:190])) = 0.0 max_motifs = 2 mp = naive.stump(T, m) # left_indices = [[70, 170, -1], [10, 210, 110]] left_profile_values = [ [0.0, 0.0, np.nan], [ 0.0, naive.distance(core.z_norm(T[10:30]), core.z_norm(T[210:230])), naive.distance(core.z_norm(T[10:30]), core.z_norm(T[110:130])), ], ] right_distance_values, right_indices = motifs( T, mp[:, 0], max_motifs=max_motifs, max_distance=0.5, cutoff=np.inf, ) # We ignore indices because of sorting ambiguities for equal distances. # As long as the distances are correct, the indices will be too. npt.assert_almost_equal(left_profile_values, right_distance_values, decimal=6) # Reset seed np.random.seed(None)
def naive_consensus_search(Ts, m): """ Brute force consensus motif from <https://www.cs.ucr.edu/~eamonn/consensus_Motif_ICDM_Long_version.pdf> See Table 1 Note that there is a bug in the pseudocode at line 8 where `i` should be `j`. This implementation fixes it. """ k = len(Ts) bsf_radius = np.inf Ts_idx = 0 subseq_idx = 0 for j in range(k): radii = np.zeros(len(Ts[j]) - m + 1) for i in range(k): if i != j: mp = naive.stump(Ts[j], m, Ts[i]) radii = np.maximum(radii, mp[:, 0]) min_radius_idx = np.argmin(radii) min_radius = radii[min_radius_idx] if min_radius < bsf_radius: bsf_radius = min_radius Ts_idx = j subseq_idx = min_radius_idx return naive_get_central_motif(Ts, bsf_radius, Ts_idx, subseq_idx, m)
def test_motifs_max_matches(): # This test covers the following: # A time series contains motif A at four locations and motif B at two. # If `max_motifs=2` the result should contain only the top two matches of motif A # and the top two matches of motif B as two separate motifs. T = np.array([ 0.0, # motif A 1.0, 0.0, 2.3, -1.0, # motif B -1.0, -2.0, 0.0, # motif A 1.0, 0.0, -2.0, -1.0, # motif B -1.03, -2.0, -0.5, 2.0, # motif A 3.0, 2.04, 2.3, 2.0, # motif A 3.0, 2.02, ]) m = 3 max_motifs = 3 left_indices = [[0, 7], [4, 11]] left_profile_values = [ [0.0, 0.0], [ 0.0, naive.distance( core.z_norm(T[left_indices[1][0]:left_indices[1][0] + m]), core.z_norm(T[left_indices[1][1]:left_indices[1][1] + m]), ), ], ] mp = naive.stump(T, m) right_distance_values, right_indices = motifs( T, mp[:, 0], max_motifs=max_motifs, max_distance=0.1, cutoff=np.inf, max_matches=2, ) # We ignore indices because of sorting ambiguities for equal distances. # As long as the distances are correct, the indices will be too. npt.assert_almost_equal(left_profile_values, right_distance_values, decimal=4)
def test_stumped_A_B_join(T_A, T_B, dask_cluster): with Client(dask_cluster) as dask_client: m = 3 ref_mp = naive.stump(T_A, m, T_B=T_B) comp_mp = stumped(dask_client, T_A, m, T_B, ignore_trivial=False) naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp)
def test_stumped_A_B_join(T_A, T_B, dask_cluster): with Client(dask_cluster) as dask_client: m = 3 left = naive.stump(T_A, m, T_B=T_B) right = stumped(dask_client, T_A, m, T_B, ignore_trivial=False) naive.replace_inf(left) naive.replace_inf(right) npt.assert_almost_equal(left, right)
def test_stumped_self_join(T_A, T_B, dask_cluster): with Client(dask_cluster) as dask_client: m = 3 zone = int(np.ceil(m / 4)) left = naive.stump(T_B, m, exclusion_zone=zone) right = stumped(dask_client, T_B, m, ignore_trivial=True) naive.replace_inf(left) naive.replace_inf(right) npt.assert_almost_equal(left, right)
def test_stumped_self_join_df(T_A, T_B, dask_cluster): with Client(dask_cluster) as dask_client: m = 3 zone = int(np.ceil(m / 4)) ref_mp = naive.stump(T_B, m, exclusion_zone=zone) comp_mp = stumped(dask_client, pd.Series(T_B), m, ignore_trivial=True) naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp)
def test_mpdist_vect_k(T_A, T_B, k): m = 3 n_A = T_A.shape[0] n_B = T_B.shape[0] j = n_A - m + 1 # `k` is reserved for `P_ABBA` selection P_ABBA = np.empty(2 * j, dtype=np.float64) ref_mpdist_vect = np.empty(n_B - n_A + 1) k = min(int(k), P_ABBA.shape[0] - 1) for i in range(n_B - n_A + 1): P_ABBA[:j] = naive.stump(T_A, m, T_B[i:i + n_A])[:, 0] P_ABBA[j:] = naive.stump(T_B[i:i + n_A], m, T_A)[:, 0] P_ABBA.sort() ref_mpdist_vect[i] = P_ABBA[min(k, P_ABBA.shape[0] - 1)] comp_mpdist_vect = _mpdist_vect(T_A, T_B, m, k=k) npt.assert_almost_equal(ref_mpdist_vect, comp_mpdist_vect)
def test_stump_self_join_larger_window(T_A, T_B, m, dask_cluster): with Client(dask_cluster) as dask_client: if len(T_B) > m: zone = int(np.ceil(m / 4)) ref_mp = naive.stump(T_B, m, exclusion_zone=zone) comp_mp = stumped(dask_client, T_B, m, ignore_trivial=True) naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp)
def test_stump_A_B_join(T_A, T_B): m = 3 ref_mp = naive.stump(T_A, m, T_B=T_B) comp_mp = stump(T_A, m, T_B, ignore_trivial=False) naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp) comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp)
def test_stump_A_B_join(T_A, T_B): m = 3 left = naive.stump(T_A, m, T_B=T_B) right = stump(T_A, m, T_B, ignore_trivial=False) naive.replace_inf(left) naive.replace_inf(right) npt.assert_almost_equal(left, right) right = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False) naive.replace_inf(right) npt.assert_almost_equal(left, right)
def test_stumped_one_constant_subsequence_A_B_join_swap(dask_cluster): with Client(dask_cluster) as dask_client: T_A = np.random.rand(20) T_B = np.concatenate( (np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64))) m = 3 ref_mp = naive.stump(T_A, m, T_B=T_B) comp_mp = stumped(dask_client, T_A, m, T_B, ignore_trivial=False) naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0]) # ignore indices
def test_stumped_one_constant_subsequence_self_join(dask_cluster): with Client(dask_cluster) as dask_client: T_A = np.concatenate( (np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64))) m = 3 zone = int(np.ceil(m / 4)) left = naive.stump(T_A, m, exclusion_zone=zone) right = stumped(dask_client, T_A, m, ignore_trivial=True) naive.replace_inf(left) naive.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices
def test_stump_self_join_larger_window_df(T_A, T_B, dask_cluster): with Client(dask_cluster) as dask_client: for m in [8, 16, 32]: if len(T_B) > m: zone = int(np.ceil(m / 4)) left = naive.stump(T_B, m, exclusion_zone=zone) right = stumped(dask_client, pd.Series(T_B), m, ignore_trivial=True) naive.replace_inf(left) naive.replace_inf(right) npt.assert_almost_equal(left, right)
def test_stumped_one_constant_subsequence_self_join_df(dask_cluster): with Client(dask_cluster) as dask_client: T_A = np.concatenate( (np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64))) m = 3 zone = int(np.ceil(m / 4)) ref_mp = naive.stump(T_A, m, exclusion_zone=zone) comp_mp = stumped(dask_client, pd.Series(T_A), m, ignore_trivial=True) naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0]) # ignore indices
def test_mpdist_vect(T_A, T_B): m = 3 n_A = T_A.shape[0] n_B = T_B.shape[0] j = n_A - m + 1 # `k` is reserved for `P_ABBA` selection P_ABBA = np.empty(2 * j, dtype=np.float64) ref_mpdist_vect = np.empty(n_B - n_A + 1) percentage = 0.05 k = min(math.ceil(percentage * (2 * n_A)), 2 * j - 1) k = min(int(k), P_ABBA.shape[0] - 1) for i in range(n_B - n_A + 1): P_ABBA[:j] = naive.stump(T_A, m, T_B[i:i + n_A])[:, 0] P_ABBA[j:] = naive.stump(T_B[i:i + n_A], m, T_A)[:, 0] P_ABBA.sort() ref_mpdist_vect[i] = P_ABBA[k] comp_mpdist_vect = _mpdist_vect(T_A, T_B, m) npt.assert_almost_equal(ref_mpdist_vect, comp_mpdist_vect)
def test_stump_self_join(T_A, T_B): m = 3 zone = int(np.ceil(m / 4)) left = naive.stump(T_B, m, exclusion_zone=zone) right = stump(T_B, m, ignore_trivial=True) naive.replace_inf(left) naive.replace_inf(right) npt.assert_almost_equal(left, right) right = stump(pd.Series(T_B), m, ignore_trivial=True) naive.replace_inf(right) npt.assert_almost_equal(left, right)
def test_stump_self_join(T_A, T_B): m = 3 zone = int(np.ceil(m / 4)) ref_mp = naive.stump(T_B, m, exclusion_zone=zone) comp_mp = stump(T_B, m, ignore_trivial=True) naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp) comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp)
def test_stumped_nan_zero_mean_self_join(dask_cluster): with Client(dask_cluster) as dask_client: T = np.array([-1, 0, 1, np.inf, 1, 0, -1]) m = 3 zone = int(np.ceil(m / 4)) ref_mp = naive.stump(T, m, exclusion_zone=zone) comp_mp = stumped(dask_client, T, m, ignore_trivial=True) naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp)
def test_stumped_one_subsequence_inf_A_B_join(T_A, T_B, substitution_location_B, dask_cluster): with Client(dask_cluster) as dask_client: m = 3 T_B_sub = T_B.copy() T_B_sub[substitution_location_B] = np.inf left = naive.stump(T_A, m, T_B=T_B_sub) right = stumped(dask_client, T_A, m, T_B_sub, ignore_trivial=False) naive.replace_inf(left) naive.replace_inf(right) npt.assert_almost_equal(left, right)
def test_stumped_one_subsequence_nan_A_B_join(T_A, T_B, substitution_location_B, dask_cluster): with Client(dask_cluster) as dask_client: m = 3 T_B_sub = T_B.copy() T_B_sub[substitution_location_B] = np.nan ref_mp = naive.stump(T_A, m, T_B=T_B_sub) comp_mp = stumped(dask_client, T_A, m, T_B_sub, ignore_trivial=False) naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp)
def test_stump_constant_subsequence_self_join(): T_A = np.concatenate( (np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64))) m = 3 zone = int(np.ceil(m / 4)) left = naive.stump(T_A, m, exclusion_zone=zone) right = stump(T_A, m, ignore_trivial=True) naive.replace_inf(left) naive.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices right = stump(pd.Series(T_A), m, ignore_trivial=True) naive.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices
def test_stumped_one_subsequence_inf_self_join(T_A, T_B, substitution_location_B, dask_cluster): with Client(dask_cluster) as dask_client: m = 3 T_B_sub = T_B.copy() T_B_sub[substitution_location_B] = np.inf zone = int(np.ceil(m / 4)) left = naive.stump(T_B_sub, m, exclusion_zone=zone) right = stumped(dask_client, T_B_sub, m, ignore_trivial=True) naive.replace_inf(left) naive.replace_inf(right) npt.assert_almost_equal(left, right)
def test_stumped_one_constant_subsequence_A_B_join_df(dask_cluster): with Client(dask_cluster) as dask_client: T_A = np.random.rand(20) T_B = np.concatenate( (np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64))) m = 3 left = naive.stump(T_A, m, T_B=T_B) right = stumped(dask_client, pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False) naive.replace_inf(left) naive.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices
def test_stumped_one_subsequence_nan_self_join(T_A, T_B, substitution_location_B, dask_cluster): with Client(dask_cluster) as dask_client: m = 3 T_B_sub = T_B.copy() T_B_sub[substitution_location_B] = np.nan zone = int(np.ceil(m / 4)) ref_mp = naive.stump(T_B_sub, m, exclusion_zone=zone) comp_mp = stumped(dask_client, T_B_sub, m, ignore_trivial=True) naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp)
def test_stumped_identical_subsequence_A_B_join(dask_cluster): with Client(dask_cluster) as dask_client: identical = np.random.rand(8) T_A = np.random.rand(20) T_B = np.random.rand(20) T_A[1:1 + identical.shape[0]] = identical T_B[11:11 + identical.shape[0]] = identical m = 3 ref_mp = naive.stump(T_A, m, T_B=T_B) comp_mp = stumped(dask_client, T_A, m, T_B, ignore_trivial=False) naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal( ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION) # ignore indices
def test_stumped_identical_subsequence_self_join(dask_cluster): with Client(dask_cluster) as dask_client: identical = np.random.rand(8) T_A = np.random.rand(20) T_A[1:1 + identical.shape[0]] = identical T_A[11:11 + identical.shape[0]] = identical m = 3 zone = int(np.ceil(m / 4)) ref_mp = naive.stump(T_A, m, exclusion_zone=zone) comp_mp = stumped(dask_client, T_A, m, ignore_trivial=True) naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal( ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION) # ignore indices
def test_stimp_100_percent(T): threshold = 0.2 percentage = 1.0 min_m = 3 n = T.shape[0] - min_m + 1 pan = stimp( T, min_m=min_m, max_m=None, step=1, percentage=percentage, pre_scrump=True, # normalize=True, ) for i in range(n): pan.update() ref_PAN = np.full((pan.M_.shape[0], T.shape[0]), fill_value=np.inf) for idx, m in enumerate(pan.M_[:n]): zone = int(np.ceil(m / 4)) ref_mp = naive.stump(T, m, T_B=None, exclusion_zone=zone) ref_PAN[pan._bfs_indices[idx], :ref_mp.shape[0]] = ref_mp[:, 0] # Compare raw pan cmp_PAN = pan._PAN naive.replace_inf(ref_PAN) naive.replace_inf(cmp_PAN) npt.assert_almost_equal(ref_PAN, cmp_PAN) # Compare transformed pan cmp_pan = pan.PAN_ ref_pan = naive.transform_pan(pan._PAN, pan._M, threshold, pan._bfs_indices, pan._n_processed) naive.replace_inf(ref_pan) naive.replace_inf(cmp_pan) npt.assert_almost_equal(ref_pan, cmp_pan)
def test_stimp_100_percent(T): percentage = 1.0 min_m = 3 n = T.shape[0] - min_m + 1 seed = np.random.randint(100000) np.random.seed(seed) pmp = stimp( T, min_m=min_m, max_m=None, step=1, percentage=percentage, pre_scrump=True, normalize=True, ) for i in range(n): pmp.update() ref_P = np.full((pmp.M_.shape[0], T.shape[0]), fill_value=np.inf) ref_I = np.ones((pmp.M_.shape[0], T.shape[0]), dtype=np.int64) * -1 np.random.seed(seed) for idx, m in enumerate(pmp.M_[:n]): zone = int(np.ceil(m / 4)) ref_mp = naive.stump(T, m, T_B=None, exclusion_zone=zone) ref_P[pmp.bfs_indices_[idx], :ref_mp.shape[0]] = ref_mp[:, 0] ref_I[pmp.bfs_indices_[idx], :ref_mp.shape[0]] = ref_mp[:, 1] comp_P = pmp.P_ comp_I = pmp.I_ naive.replace_inf(ref_P) naive.replace_inf(ref_I) naive.replace_inf(comp_P) naive.replace_inf(comp_I) npt.assert_almost_equal(ref_P, comp_P) npt.assert_almost_equal(ref_I, comp_I)