def test_two_constant_subsequences_A_B_join(): T_A = np.concatenate( (np.zeros(10, dtype=np.float64), np.ones(10, dtype=np.float64)) ) T_B = np.concatenate((np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64))) m = 3 left = np.array( [utils.naive_mass(Q, T_A, m) for Q in core.rolling_window(T_B, m)], dtype=object ) right = stump(T_A, m, T_B, ignore_trivial=False) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices right = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices # Swap inputs left = np.array( [utils.naive_mass(Q, T_B, m) for Q in core.rolling_window(T_A, m)], dtype=object ) right = stump(T_B, m, T_A, ignore_trivial=False) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices right = stump(pd.Series(T_B), m, pd.Series(T_A), ignore_trivial=False) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices
def test_stamp_nan_inf_A_B_join(T_A, T_B, substitute_A, substitute_B, substitution_locations): m = 3 T_A_sub = T_A.copy() T_B_sub = T_B.copy() for substitution_location_B in substitution_locations: for substitution_location_A in substitution_locations: T_A_sub[:] = T_A[:] T_B_sub[:] = T_B[:] T_A_sub[substitution_location_A] = substitute_A T_B_sub[substitution_location_B] = substitute_B left = np.array( [ utils.naive_mass(Q, T_A_sub, m) for Q in core.rolling_window(T_B_sub, m) ], dtype=object, ) right = stamp.stamp(T_A_sub, T_B_sub, m) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left[:, :2], right)
def test_naive_mstump(): T = np.random.uniform(-1000, 1000, [1, 1000]).astype(np.float64) m = 20 excl_zone = int(np.ceil(m / 4)) left = np.array( [ utils.naive_mass(Q, T[0], m, trivial_idx=i, ignore_trivial=True, excl_zone=excl_zone) for i, Q in enumerate(core.rolling_window(T[0], m)) ], dtype=object, ) left_P = left[np.newaxis, :, 0].T left_I = left[np.newaxis, :, 1].T right_P, right_I = utils.naive_mstump(T, m, excl_zone) npt.assert_almost_equal(left_P, right_P) npt.assert_almost_equal(left_I, right_I)
def test_parallel_gpu_stump_A_B_join(T_A, T_B): device_ids = [device.id for device in cuda.list_devices()] if len(T_B) > 10: m = 3 left = np.array( [utils.naive_mass(Q, T_A, m) for Q in core.rolling_window(T_B, m)], dtype=object, ) right = gpu_stump( T_A, m, T_B, ignore_trivial=False, threads_per_block=THREADS_PER_BLOCK, device_id=device_ids, ) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left, right) right = gpu_stump( pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False, threads_per_block=THREADS_PER_BLOCK, device_id=device_ids, ) utils.replace_inf(right) npt.assert_almost_equal(left, right)
def test_scrump_nan_inf_self_join(T, substitute, substitution_locations): m = 3 T_sub = T.copy() for substitution_location in substitution_locations: T_sub[:] = T[:] T_sub[substitution_location] = substitute zone = int(np.ceil(m / 4)) left = np.array( [ utils.naive_mass(Q, T_sub, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T_sub, m)) ], dtype=object, ) for right in scrump(T_sub, m): continue utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) for right in scrump(pd.Series(T_sub), m): continue utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0])
def test_gpu_stump_self_join_larger_window(T_A, T_B): for m in [8, 16, 32]: if len(T_B) > m: zone = int(np.ceil(m / 4)) left = np.array( [ utils.naive_mass(Q, T_B, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T_B, m)) ], dtype=object, ) right = gpu_stump(T_B, m, ignore_trivial=True, threads_per_block=THREADS_PER_BLOCK) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left, right) right = gpu_stump( pd.Series(T_B), m, ignore_trivial=True, threads_per_block=THREADS_PER_BLOCK, ) utils.replace_inf(right) npt.assert_almost_equal(left, right)
def test_stump_nan_inf_self_join(T_A, T_B, substitute_B, substitution_locations): m = 3 T_B_sub = T_B.copy() for substitution_location_B in substitution_locations: T_B_sub[:] = T_B[:] T_B_sub[substitution_location_B] = substitute_B zone = int(np.ceil(m / 4)) left = np.array( [ utils.naive_mass(Q, T_B_sub, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T_B_sub, m)) ], dtype=object, ) right = stump(T_B_sub, m, ignore_trivial=True) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left, right) right = stump(pd.Series(T_B_sub), m, ignore_trivial=True) utils.replace_inf(right) npt.assert_almost_equal(left, right)
def test_parallel_gpu_stump_self_join(T_A, T_B): device_ids = [device.id for device in cuda.list_devices()] if len(T_B) > 10: m = 3 zone = int(np.ceil(m / 4)) left = np.array( [ utils.naive_mass(Q, T_B, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T_B, m)) ], dtype=object, ) right = gpu_stump( T_B, m, ignore_trivial=True, threads_per_block=THREADS_PER_BLOCK, device_id=device_ids, ) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left, right) right = gpu_stump( pd.Series(T_B), m, ignore_trivial=True, threads_per_block=THREADS_PER_BLOCK, device_id=device_ids, ) utils.replace_inf(right) npt.assert_almost_equal(left, right)
def test_stamp_A_B_join(T_A, T_B): m = 3 left = np.array( [utils.naive_mass(Q, T_A, m) for Q in core.rolling_window(T_B, m)], dtype=object) right = stamp.stamp(T_A, T_B, m) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left[:, :2], right)
def test_stumped_A_B_join(T_A, T_B, dask_client): m = 3 left = np.array( [utils.naive_mass(Q, T_A, m) for Q in core.rolling_window(T_B, m)], dtype=object) right = stumped(dask_client, T_A, m, T_B, ignore_trivial=False) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left, right)
def test_one_constant_subsequence_A_B_join(dask_client): T_A = np.random.rand(20) T_B = np.concatenate( (np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64))) m = 3 left = np.array( [utils.naive_mass(Q, T_A, m) for Q in core.rolling_window(T_B, m)], dtype=object) right = stumped(dask_client, T_A, m, T_B, ignore_trivial=False) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices
def test_stumped_A_B_join_df(T_A, T_B, dask_cluster): with Client(dask_cluster) as dask_client: m = 3 left = np.array( [utils.naive_mass(Q, T_A, m) for Q in core.rolling_window(T_B, m)], dtype=object, ) right = stumped( dask_client, pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False ) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left, right)
def test_prescrump(T): m = 3 zone = int(np.ceil(m / 4)) left = np.array( [ utils.naive_mass(Q, T, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T, m)) ], dtype=object, ) μ, σ = core.compute_mean_std(T, m) # Note that the below code only works for `s=1` right = prescrump(T, m, μ, σ, s=1)
def test_stamp_self_join(T_A, T_B): m = 3 zone = int(np.ceil(m / 2)) left = np.array( [ utils.naive_mass(Q, T_B, m, i, zone, ignore_trivial=True) for i, Q in enumerate(core.rolling_window(T_B, m)) ], dtype=object, ) right = stamp.stamp(T_B, T_B, m, ignore_trivial=True) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left[:, :2], right)
def test_stumped_self_join_df(T_A, T_B, dask_client): m = 3 zone = int(np.ceil(m / 4)) left = np.array( [ utils.naive_mass(Q, T_B, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T_B, m)) ], dtype=object, ) right = stumped(dask_client, pd.Series(T_B), m, ignore_trivial=True) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left, right)
def test_stumped_one_subsequence_inf_A_B_join( T_A, T_B, substitution_location_B, dask_client ): m = 3 T_B_sub = T_B.copy() T_B_sub[substitution_location_B] = np.inf left = np.array( [utils.naive_mass(Q, T_A, m) for Q in core.rolling_window(T_B_sub, m)], dtype=object, ) right = stumped(dask_client, T_A, m, T_B_sub, ignore_trivial=False) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left, right)
def test_stump_self_join_larger_window(T_A, T_B, dask_client): for m in [8, 16, 32]: if len(T_B) > m: zone = int(np.ceil(m / 4)) left = np.array( [ utils.naive_mass(Q, T_B, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T_B, m)) ], dtype=object, ) right = stumped(dask_client, T_B, m, ignore_trivial=True) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left, right)
def test_one_constant_subsequence_self_join_df(dask_client): T_A = np.concatenate( (np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64))) m = 3 zone = int(np.ceil(m / 4)) left = np.array( [ utils.naive_mass(Q, T_A, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T_A, m)) ], dtype=object, ) right = stumped(dask_client, pd.Series(T_A), m, ignore_trivial=True) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices
def test_stamp_nan_zero_mean_self_join(): T = np.array([-1, 0, 1, np.inf, 1, 0, -1]) m = 3 zone = int(np.ceil(m / 2)) left = np.array( [ utils.naive_mass(Q, T, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T, m)) ], dtype=object, ) right = stamp.stamp(T, T, m, ignore_trivial=True) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left[:, :2], right)
def test_scrump_nan_zero_mean_self_join(): T = np.array([-1, 0, 1, np.inf, 1, 0, -1]) m = 3 zone = int(np.ceil(m / 4)) left = np.array( [ utils.naive_mass(Q, T, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T, m)) ], dtype=object, ) for right in scrump(T, m): continue utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0])
def test_stumped_nan_zero_mean_self_join(dask_cluster): with Client(dask_cluster) as dask_client: T = np.array([-1, 0, 1, np.inf, 1, 0, -1]) m = 3 zone = int(np.ceil(m / 4)) left = np.array( [ utils.naive_mass(Q, T, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T, m)) ], dtype=object, ) right = stumped(dask_client, T, m, ignore_trivial=True) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left, right)
def test_scrimp_self_join(T): m = 3 zone = int(np.ceil(m / 4)) left = np.array( [ utils.naive_mass(Q, T, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T, m)) ], dtype=object, ) right = scrimp(T, m, percentage=1.0) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) right = scrimp(pd.Series(T), m, percentage=1.0) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0])
def test_scrimp_self_join_larger_window(T): for m in [8, 16, 32]: if len(T) > m: zone = int(np.ceil(m / 4)) left = np.array( [ utils.naive_mass(Q, T, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T, m)) ], dtype=object, ) right = scrimp(T, m) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) right = scrimp(pd.Series(T), m) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0])
def test_constant_subsequence_A_B_join_df_swap(dask_cluster): with Client(dask_cluster) as dask_client: T_A = np.concatenate( (np.zeros(10, dtype=np.float64), np.ones(10, dtype=np.float64))) T_B = np.concatenate( (np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64))) m = 3 left = np.array( [utils.naive_mass(Q, T_B, m) for Q in core.rolling_window(T_A, m)], dtype=object, ) right = stumped(dask_client, pd.Series(T_B), m, pd.Series(T_A), ignore_trivial=False) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices
def test_stumped_one_subsequence_nan_self_join( T_A, T_B, substitution_location_B, dask_client ): m = 3 T_B_sub = T_B.copy() T_B_sub[substitution_location_B] = np.nan zone = int(np.ceil(m / 4)) left = np.array( [ utils.naive_mass(Q, T_B_sub, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T_B_sub, m)) ], dtype=object, ) right = stumped(dask_client, T_B_sub, m, ignore_trivial=True) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left, right)
def test_scrump_plus_plus_self_join(T): m = 3 zone = int(np.ceil(m / 4)) left = np.array( [ utils.naive_mass(Q, T, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T, m)) ], dtype=object, ) for right in scrump(T, m, pre_scrump=True): continue utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) for right in scrump(pd.Series(T), m, pre_scrump=True): continue utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0])
def test_constant_subsequence_self_join(): T = np.concatenate((np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64))) m = 3 zone = int(np.ceil(m / 4)) left = np.array( [ utils.naive_mass(Q, T, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T, m)) ], dtype=object, ) right = scrimp(T, m, percentage=1.0) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices right = scrimp(pd.Series(T), m, percentage=1.0) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices
def test_gpu_stump_A_B_join(T_A, T_B): m = 3 left = np.array( [utils.naive_mass(Q, T_A, m) for Q in core.rolling_window(T_B, m)], dtype=object) right = gpu_stump(T_A, m, T_B, ignore_trivial=False, threads_per_block=THREADS_PER_BLOCK) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left, right) right = gpu_stump( pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False, threads_per_block=THREADS_PER_BLOCK, ) utils.replace_inf(right) npt.assert_almost_equal(left, right)