def _compute_matrix_profile(self): """ Compute the matrix profile using STOMP. """ mu_T, sigma_T = utils.rolling_avg_sd(self.ts1, self.window_size) QT = utils.sliding_dot_product(self.ts2[:self.window_size], self.ts1) if self._same_ts: mu_Q, sigma_Q = mu_T, sigma_T TQ = np.copy(QT) else: mu_Q, sigma_Q = utils.rolling_avg_sd(self.ts2, self.window_size) TQ = utils.sliding_dot_product(self.ts1[:self.window_size], self.ts2) D = utils.calculate_distance_profile(QT, self.window_size, mu_Q[0], sigma_Q[0], mu_T, sigma_T) if self._same_ts: lower_ez_bound = 0 upper_ez_bound = min(len(self.ts2), self.exclusion_zone) + 1 D[lower_ez_bound:upper_ez_bound] = np.inf self._matrix_profile = np.copy(D) self._index_profile = np.zeros((len(self.ts1) - self.window_size + 1,)) for idx in self._iterator: QT[1:] = QT[:len(self.ts1)-self.window_size] - self.ts1[:len(self.ts1)-self.window_size] * self.ts2[idx-1] \ + self.ts1[self.window_size:] * self.ts2[idx + self.window_size - 1] QT[0] = TQ[idx] D = utils.calculate_distance_profile(QT, self.window_size, mu_Q[idx], sigma_Q[idx], mu_T, sigma_T) self._elementwise_min(D, idx)
def test_sliding_dot_product_sanity1(self): q = np.zeros(200) t = np.random.rand(1000) sdp = utils.sliding_dot_product(q, t) assert len(sdp) == len(t) - len(q) + 1, "sliding_dot_product_sanity1: result should have correct length" assert np.array_equal(sdp, np.zeros(len(t) - len(q) + 1)), \ "sliding_dot_product_sanity1: dot product of zero vector should be zero"
def test_sliding_dot_product_data1(self): t = np.loadtxt("./data/random_walk_data.csv") q = t[:1000] sdp = utils.sliding_dot_product(q, t) ans = np.loadtxt("./data/random_walk_data_sdp.csv") assert len(sdp) == len(t) - len(q) + 1, "sliding_dot_product_data1: result should have correct length" assert np.allclose(sdp, ans), "sliding_dot_product_data1: sliding dot product should be computed correctly"
def test_sliding_dot_product_random_data(self): n = np.random.randint(100, 1000) m = np.random.randint(10, n) q = np.random.rand(m) t = np.random.rand(n) sdp = utils.sliding_dot_product(q, t) ans = helpers.naive_sliding_dot_product(q, t) assert len(sdp) == n - m + 1, "sliding_dot_product_random_data: sliding dot product should have correct length" assert np.allclose(sdp, ans), "sliding_dot_product_random_data: sliding dot product should be computed correctly"
def test_calculate_distance_profile_constant_sequence_and_query(self): n = 100 m = np.random.randint(10, n // 2) t = np.full(n, np.random.rand()) q = np.full(m, np.random.rand()) qt = utils.sliding_dot_product(q, t) rolling_mean, rolling_std = utils.rolling_avg_sd(t, m) dp = utils.calculate_distance_profile(qt, m, np.mean(q), np.std(q), rolling_mean, rolling_std) assert np.allclose(dp, np.full(n - m + 1, 0)), "calculate_distance_profile_constant_sequence_and_query: " \ "distance of constant query to constant sequence is ero by definition."
def test_calculate_distance_profile_constant_query(self): n = 100 m = np.random.randint(10, n // 2) t = np.random.rand(n) q = np.full(m, np.random.rand()) qt = utils.sliding_dot_product(q, t) rolling_mean, rolling_std = utils.rolling_avg_sd(t, m) dp = utils.calculate_distance_profile(qt, m, q[0], 0, rolling_mean, rolling_std) assert np.allclose(dp, np.full(n - m + 1, np.sqrt(m))), "calculate_distance_profile_constant_query: " \ "distance of nonconstant sequence to constant query is sqrt(m) by definition."
def test_sliding_dot_product_sanity2(self): q = np.array([1]) t = np.random.rand(1000) sdp = utils.sliding_dot_product(q, t) assert len(sdp) == len(t) - len(q) + 1, "sliding_dot_product_sanity2: result should have correct length" assert np.allclose(sdp, t), "sliding_dot_product_sanity2: dot product of a vector with [1] should contain itself"
def test_sliding_dot_product_query_too_long(self): with pytest.raises(ValueError): q = np.random.rand(1000) t = np.random.rand(200) sdp = utils.sliding_dot_product(q, t)