def test_stump_two_constant_subsequences_A_B_join(): T_A = np.concatenate( (np.zeros(10, dtype=np.float64), np.ones(10, dtype=np.float64)) ) T_B = np.concatenate((np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64))) m = 3 left = naive.stamp(T_A, m, T_B=T_B) right = stump(T_A, m, T_B, ignore_trivial=False) naive.replace_inf(left) naive.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices right = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False) naive.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices # Swap inputs left = naive.stamp(T_B, m, T_B=T_A) right = stump(T_B, m, T_A, ignore_trivial=False) naive.replace_inf(left) naive.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices right = stump(pd.Series(T_B), m, pd.Series(T_A), ignore_trivial=False) naive.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices
def test_stump_nan_inf_A_B_join( T_A, T_B, substitute_A, substitute_B, substitution_locations ): m = 3 T_A_sub = T_A.copy() T_B_sub = T_B.copy() for substitution_location_B in substitution_locations: for substitution_location_A in substitution_locations: T_A_sub[:] = T_A[:] T_B_sub[:] = T_B[:] T_A_sub[substitution_location_A] = substitute_A T_B_sub[substitution_location_B] = substitute_B left = np.array( [ utils.naive_mass(Q, T_A_sub, m) for Q in core.rolling_window(T_B_sub, m) ], dtype=object, ) right = stump(T_A_sub, m, T_B_sub, ignore_trivial=False) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left, right) right = stump( pd.Series(T_A_sub), m, pd.Series(T_B_sub), ignore_trivial=False ) utils.replace_inf(right) npt.assert_almost_equal(left, right)
def test_stump_identical_subsequence_A_B_join(): identical = np.random.rand(8) T_A = np.random.rand(20) T_B = np.random.rand(20) T_A[1 : 1 + identical.shape[0]] = identical T_B[11 : 11 + identical.shape[0]] = identical m = 3 ref_mp = naive.stamp(T_A, m, T_B=T_B) comp_mp = stump(T_A, m, T_B, ignore_trivial=False) naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal( ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION ) # ignore indices comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False) naive.replace_inf(comp_mp) npt.assert_almost_equal( ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION ) # ignore indices # Swap inputs ref_mp = naive.stamp(T_B, m, T_B=T_A) comp_mp = stump(T_B, m, T_A, ignore_trivial=False) naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal( ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION ) # ignore indices
def test_two_constant_subsequences_A_B_join(): T_A = np.concatenate( (np.zeros(10, dtype=np.float64), np.ones(10, dtype=np.float64)) ) T_B = np.concatenate((np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64))) m = 3 left = np.array( [utils.naive_mass(Q, T_A, m) for Q in core.rolling_window(T_B, m)], dtype=object ) right = stump(T_A, m, T_B, ignore_trivial=False) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices right = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices # Swap inputs left = np.array( [utils.naive_mass(Q, T_B, m) for Q in core.rolling_window(T_A, m)], dtype=object ) right = stump(T_B, m, T_A, ignore_trivial=False) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices right = stump(pd.Series(T_B), m, pd.Series(T_A), ignore_trivial=False) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices
def test_stump_nan_inf_A_B_join( T_A, T_B, substitute_A, substitute_B, substitution_locations ): m = 3 T_A_sub = T_A.copy() T_B_sub = T_B.copy() for substitution_location_B in substitution_locations: for substitution_location_A in substitution_locations: T_A_sub[:] = T_A[:] T_B_sub[:] = T_B[:] T_A_sub[substitution_location_A] = substitute_A T_B_sub[substitution_location_B] = substitute_B left = naive.stamp(T_A_sub, m, T_B=T_B_sub) right = stump(T_A_sub, m, T_B_sub, ignore_trivial=False) naive.replace_inf(left) naive.replace_inf(right) npt.assert_almost_equal(left, right) right = stump( pd.Series(T_A_sub), m, pd.Series(T_B_sub), ignore_trivial=False ) naive.replace_inf(right) npt.assert_almost_equal(left, right)
def test_stump_nan_inf_self_join(T_A, T_B, substitute_B, substitution_locations): m = 3 T_B_sub = T_B.copy() for substitution_location_B in substitution_locations: T_B_sub[:] = T_B[:] T_B_sub[substitution_location_B] = substitute_B zone = int(np.ceil(m / 4)) left = np.array( [ utils.naive_mass(Q, T_B_sub, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T_B_sub, m)) ], dtype=object, ) right = stump(T_B_sub, m, ignore_trivial=True) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left, right) right = stump(pd.Series(T_B_sub), m, ignore_trivial=True) utils.replace_inf(right) npt.assert_almost_equal(left, right)
def test_stump_identical_subsequence_A_B_join(): identical = np.random.rand(8) T_A = np.random.rand(20) T_B = np.random.rand(20) T_A[1 : 1 + identical.shape[0]] = identical T_B[11 : 11 + identical.shape[0]] = identical m = 3 left = naive.stamp(T_A, m, T_B=T_B) right = stump(T_A, m, T_B, ignore_trivial=False) naive.replace_inf(left) naive.replace_inf(right) npt.assert_almost_equal( left[:, 0], right[:, 0], decimal=naive.PRECISION ) # ignore indices right = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False) naive.replace_inf(right) npt.assert_almost_equal( left[:, 0], right[:, 0], decimal=naive.PRECISION ) # ignore indices # Swap inputs left = naive.stamp(T_B, m, T_B=T_A) right = stump(T_B, m, T_A, ignore_trivial=False) naive.replace_inf(left) naive.replace_inf(right) npt.assert_almost_equal( left[:, 0], right[:, 0], decimal=naive.PRECISION ) # ignore indices
def test_stump_A_B_join(T_A, T_B): m = 3 left = naive.stamp(T_A, m, T_B=T_B) right = stump(T_A, m, T_B, ignore_trivial=False) naive.replace_inf(left) naive.replace_inf(right) npt.assert_almost_equal(left, right) right = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False) naive.replace_inf(right) npt.assert_almost_equal(left, right)
def test_stump_A_B_join(T_A, T_B): m = 3 ref_mp = naive.stump(T_A, m, T_B=T_B) comp_mp = stump(T_A, m, T_B, ignore_trivial=False) naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp) comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp)
def test_stump_self_join(T_A, T_B): m = 3 zone = int(np.ceil(m / 4)) left = naive.stamp(T_B, m, exclusion_zone=zone) right = stump(T_B, m, ignore_trivial=True) naive.replace_inf(left) naive.replace_inf(right) npt.assert_almost_equal(left, right) right = stump(pd.Series(T_B), m, ignore_trivial=True) naive.replace_inf(right) npt.assert_almost_equal(left, right)
def test_stump_self_join(T_A, T_B): m = 3 zone = int(np.ceil(m / 4)) ref_mp = naive.stump(T_B, m, exclusion_zone=zone) comp_mp = stump(T_B, m, ignore_trivial=True) naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp) comp_mp = stump(pd.Series(T_B), m, ignore_trivial=True) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp)
def test_stump_A_B_join(T_A, T_B): m = 3 left = np.array( [utils.naive_mass(Q, T_A, m) for Q in core.rolling_window(T_B, m)], dtype=object ) right = stump(T_A, m, T_B, ignore_trivial=False) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left, right) right = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False) utils.replace_inf(right) npt.assert_almost_equal(left, right)
def test_stump_constant_subsequence_self_join(): T_A = np.concatenate((np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64))) m = 3 zone = int(np.ceil(m / 4)) left = naive.stamp(T_A, m, exclusion_zone=zone) right = stump(T_A, m, ignore_trivial=True) naive.replace_inf(left) naive.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices right = stump(pd.Series(T_A), m, ignore_trivial=True) naive.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices
def test_scrump_self_join_full(T_A, T_B): m = 3 zone = int(np.ceil(m / 4)) left = naive.stamp(T_B, m, exclusion_zone=zone) left_P = left[:, 0] left_I = left[:, 1] left_left_I = left[:, 2] left_right_I = left[:, 3] approx = scrump(T_B, m, ignore_trivial=True, percentage=1.0, pre_scrump=False) approx.update() right_P = approx.P_ right_I = approx.I_ right_left_I = approx.left_I_ right_right_I = approx.right_I_ naive.replace_inf(left_P) naive.replace_inf(right_P) npt.assert_almost_equal(left_P, right_P) npt.assert_almost_equal(left_I, right_I) npt.assert_almost_equal(left_left_I, right_left_I) npt.assert_almost_equal(left_right_I, right_right_I) left = stump(T_B, m, ignore_trivial=True) left_P = left[:, 0] left_I = left[:, 1] left_left_I = left[:, 2] left_right_I = left[:, 3] npt.assert_almost_equal(left_P, right_P) npt.assert_almost_equal(left_I, right_I) npt.assert_almost_equal(left_left_I, right_left_I) npt.assert_almost_equal(left_right_I, right_right_I)
def calculate_matrix_profile(column, seq_length): import stumpy try: # stumpy needs np float old_data = np.array(column, dtype=np.floating) except ValueError: raise Exception('Can\'t convert column to float') try: if cuda.is_available(): gpu_device_ids = [device.id for device in cuda.list_devices()] mp = stumpy.gpu_stump(old_data, m=seq_length, ignore_trivial=False, device_id=gpu_device_ids) else: mp = stumpy.stump(old_data, m=seq_length, ignore_trivial=False) except TypeError as e: print('Type issue in stumpy:') raise e except ValueError as e: print('Seq_length issue in stumpy') raise e if pd.isnull(mp).any(): raise Exception( 'Matrix profile for the column contains NaN values. Try to increase the dataset size' ) return mp
def test_scrump_self_join_full(T_A, T_B): m = 3 zone = int(np.ceil(m / 4)) ref_mp = naive.stamp(T_B, m, exclusion_zone=zone) ref_P = ref_mp[:, 0] ref_I = ref_mp[:, 1] ref_left_I = ref_mp[:, 2] ref_right_I = ref_mp[:, 3] approx = scrump(T_B, m, ignore_trivial=True, percentage=1.0, pre_scrump=False) approx.update() comp_P = approx.P_ comp_I = approx.I_ comp_left_I = approx.left_I_ comp_right_I = approx.right_I_ naive.replace_inf(ref_P) naive.replace_inf(comp_P) npt.assert_almost_equal(ref_P, comp_P) npt.assert_almost_equal(ref_I, comp_I) npt.assert_almost_equal(ref_left_I, comp_left_I) npt.assert_almost_equal(ref_right_I, comp_right_I) ref_mp = stump(T_B, m, ignore_trivial=True) ref_P = ref_mp[:, 0] ref_I = ref_mp[:, 1] ref_left_I = ref_mp[:, 2] ref_right_I = ref_mp[:, 3] npt.assert_almost_equal(ref_P, comp_P) npt.assert_almost_equal(ref_I, comp_I) npt.assert_almost_equal(ref_left_I, comp_left_I) npt.assert_almost_equal(ref_right_I, comp_right_I)
def _transform(self, X, y=None): """Transform X and return a transformed version. private _transform containing the core logic, called from transform Parameters ---------- X : 2D np.ndarray Data to be transformed y : ignored argument for interface compatibility Additional data, e.g., labels for transformation Returns ------- Xt : 1D np.ndarray transformed version of X Matrix Profile of time series as output with length as (n_timepoints-window_length+1) """ import stumpy X = X.flatten() Xt = stumpy.stump(X, self.window_length) Xt = Xt[:, 0].astype("float") return Xt
def test_stump(T, m): if T.ndim > 1: T = T.copy() T = T[0] ref = stumpy.aamp(T, m) comp = stumpy.stump(T, m, normalize=False) npt.assert_almost_equal(ref, comp)
def test_floss(): data = np.random.uniform(-1000, 1000, [64]) m = 5 old_data = data[:30] n = old_data.shape[0] add_data = data[30:] mp = naive_right_mp(old_data, m) comp_mp = stump(old_data, m) k = mp.shape[0] rolling_Ts = core.rolling_window(data[1:], n) L = 5 excl_factor = 1 custom_iac = _iac(k, bidirectional=False) stream = floss(comp_mp, old_data, m, L, excl_factor, custom_iac=custom_iac) last_idx = n - m + 1 excl_zone = int(np.ceil(m / 4)) zone_start = max(0, k - excl_zone) for i, ref_T in enumerate(rolling_Ts): mp[:, 1] = -1 mp[:, 2] = -1 mp[:] = np.roll(mp, -1, axis=0) mp[-1, 0] = np.inf mp[-1, 3] = last_idx + i D = naive_distance_profile(ref_T[-m:], ref_T, m) D[zone_start:] = np.inf update_idx = np.argwhere(D < mp[:, 0]).flatten() mp[update_idx, 0] = D[update_idx] mp[update_idx, 3] = last_idx + i ref_cac_1d = _cac( mp[:, 3] - i - 1, L, bidirectional=False, excl_factor=excl_factor, custom_iac=custom_iac, ) ref_mp = mp.copy() ref_P = ref_mp[:, 0] ref_I = ref_mp[:, 3] stream.update(ref_T[-1]) comp_cac_1d = stream.cac_1d_ comp_P = stream.P_ comp_I = stream.I_ comp_T = stream.T_ naive.replace_inf(ref_P) naive.replace_inf(comp_P) npt.assert_almost_equal(ref_cac_1d, comp_cac_1d) npt.assert_almost_equal(ref_P, comp_P) npt.assert_almost_equal(ref_I, comp_I) npt.assert_almost_equal(ref_T, comp_T)
def test_stump_self_join(T_A, T_B): m = 3 zone = int(np.ceil(m / 4)) left = np.array( [ utils.naive_mass(Q, T_B, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T_B, m)) ], dtype=object, ) right = stump(T_B, m, ignore_trivial=True) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left, right) right = stump(pd.Series(T_B), m, ignore_trivial=True) utils.replace_inf(right) npt.assert_almost_equal(left, right)
def _transform(self, X, y=None): n_ts, sz, d = X.shape if d > 1: raise NotImplementedError("We currently don't support using " "multi-dimensional matrix profiles " "from the stumpy library.") output_size = sz - self.subsequence_length + 1 X_transformed = np.empty((n_ts, output_size, 1)) if self.implementation == "stump": if not STUMPY_INSTALLED: raise ImportError(stumpy_msg) for i_ts in range(n_ts): result = stumpy.stump(T_A=X[i_ts, :, 0].ravel(), m=self.subsequence_length) X_transformed[i_ts, :, 0] = result[:, 0].astype(np.float) elif self.implementation == "gpu_stump": if not STUMPY_INSTALLED: raise ImportError(stumpy_msg) for i_ts in range(n_ts): result = stumpy.gpu_stump(T_A=X[i_ts, :, 0].ravel(), m=self.subsequence_length) X_transformed[i_ts, :, 0] = result[:, 0].astype(np.float) elif self.implementation == "numpy": scaler = TimeSeriesScalerMeanVariance() band_width = int(np.ceil(self.subsequence_length / 4)) for i_ts in range(n_ts): segments = _series_to_segments(X[i_ts], self.subsequence_length) if self.scale: segments = scaler.fit_transform(segments) n_segments = segments.shape[0] segments_2d = segments.reshape( (-1, self.subsequence_length * d)) dists = squareform(pdist(segments_2d, "euclidean")) band = (np.tri( n_segments, n_segments, band_width, dtype=np.bool ) & ~np.tri( n_segments, n_segments, -(band_width + 1), dtype=np.bool)) dists[band] = np.inf X_transformed[i_ts] = dists.min(axis=1, keepdims=True) else: available_implementations = ["numpy", "stump", "gpu_stump"] raise ValueError( 'This "{}" matrix profile implementation is not' ' recognized. Available implementations are {}.'.format( self.implementation, available_implementations)) return X_transformed
def test_stump_nan_inf_self_join(T_A, T_B, substitute_B, substitution_locations): m = 3 T_B_sub = T_B.copy() for substitution_location_B in substitution_locations: T_B_sub[:] = T_B[:] T_B_sub[substitution_location_B] = substitute_B zone = int(np.ceil(m / 4)) left = naive.stamp(T_B_sub, m, exclusion_zone=zone) right = stump(T_B_sub, m, ignore_trivial=True) naive.replace_inf(left) naive.replace_inf(right) npt.assert_almost_equal(left, right) right = stump(pd.Series(T_B_sub), m, ignore_trivial=True) naive.replace_inf(right) npt.assert_almost_equal(left, right)
def apply(model, df, param): m = 24 if 'options' in param: if 'params' in param['options']: if 'm' in param['options']['params']: m = int(param['options']['params']['m']) target = param['target_variables'][0] mp = stumpy.stump(df[target], m) result = pd.DataFrame(mp[:, 0], columns=['matrix_profile']) return pd.concat([df, result], axis=1)
def test_constant_subsequence_self_join(): T_A = np.concatenate((np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64))) m = 3 zone = int(np.ceil(m / 4)) left = np.array( [ utils.naive_mass(Q, T_A, m, i, zone, True) for i, Q in enumerate(core.rolling_window(T_A, m)) ], dtype=object, ) right = stump(T_A, m, ignore_trivial=True) utils.replace_inf(left) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices right = stump(pd.Series(T_A), m, ignore_trivial=True) utils.replace_inf(right) npt.assert_almost_equal(left[:, 0], right[:, 0]) # ignore indices
def test_stump_identical_subsequence_self_join(): identical = np.random.rand(8) T_A = np.random.rand(20) T_A[1 : 1 + identical.shape[0]] = identical T_A[11 : 11 + identical.shape[0]] = identical m = 3 zone = int(np.ceil(m / 4)) ref_mp = naive.stamp(T_A, m, exclusion_zone=zone) comp_mp = stump(T_A, m, ignore_trivial=True) naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal( ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION ) # ignore indices comp_mp = stump(pd.Series(T_A), m, ignore_trivial=True) naive.replace_inf(comp_mp) npt.assert_almost_equal( ref_mp[:, 0], comp_mp[:, 0], decimal=config.STUMPY_TEST_PRECISION ) # ignore indices
def test_stump_nan_zero_mean_self_join(): T = np.array([-1, 0, 1, np.inf, 1, 0, -1]) m = 3 zone = int(np.ceil(m / 4)) ref_mp = naive.stamp(T, m, exclusion_zone=zone) comp_mp = stump(T, m, ignore_trivial=True) naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp)
def test_stump_one_constant_subsequence_A_B_join(): T_A = np.random.rand(20) T_B = np.concatenate((np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64))) m = 3 ref_mp = naive.stamp(T_A, m, T_B=T_B) comp_mp = stump(T_A, m, T_B, ignore_trivial=False) naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0]) # ignore indices comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0]) # ignore indices # Swap inputs ref_mp = naive.stamp(T_B, m, T_B=T_A) comp_mp = stump(T_B, m, T_A, ignore_trivial=False) naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp[:, 0], comp_mp[:, 0]) # ignore indices
def makeProxMat(self): window = self.mpWindow Pmats = [stu.stump(snap, window) for snap in self.snapshots] for i in range(len(Pmats)): Pmats[i][:, 0] *= (i + 1) Wprof = sum(map(self.proxMat, Pmats, repeat(self.snap, len(Pmats)))) Wprof = self.scale(Wprof, 0, 1) return Wprof
def test_stump_identical_subsequence_self_join(): identical = np.random.rand(8) T_A = np.random.rand(20) T_A[1 : 1 + identical.shape[0]] = identical T_A[11 : 11 + identical.shape[0]] = identical m = 3 zone = int(np.ceil(m / 4)) left = naive.stamp(T_A, m, exclusion_zone=zone) right = stump(T_A, m, ignore_trivial=True) naive.replace_inf(left) naive.replace_inf(right) npt.assert_almost_equal( left[:, 0], right[:, 0], decimal=naive.PRECISION ) # ignore indices right = stump(pd.Series(T_A), m, ignore_trivial=True) naive.replace_inf(right) npt.assert_almost_equal( left[:, 0], right[:, 0], decimal=naive.PRECISION ) # ignore indices
def test_stump_nan_zero_mean_self_join(): T = np.array([-1, 0, 1, np.inf, 1, 0, -1]) m = 3 zone = int(np.ceil(m / 4)) left = naive.stamp(T, m, exclusion_zone=zone) right = stump(T, m, ignore_trivial=True) naive.replace_inf(left) naive.replace_inf(right) npt.assert_almost_equal(left, right)