def _compute_matrix_profile(self): """ Compute the matrix profile using STOMP. """ mu_T, sigma_T = utils.rolling_avg_sd(self.ts1, self.window_size) QT = utils.sliding_dot_product(self.ts2[:self.window_size], self.ts1) if self._same_ts: mu_Q, sigma_Q = mu_T, sigma_T TQ = np.copy(QT) else: mu_Q, sigma_Q = utils.rolling_avg_sd(self.ts2, self.window_size) TQ = utils.sliding_dot_product(self.ts1[:self.window_size], self.ts2) D = utils.calculate_distance_profile(QT, self.window_size, mu_Q[0], sigma_Q[0], mu_T, sigma_T) if self._same_ts: lower_ez_bound = 0 upper_ez_bound = min(len(self.ts2), self.exclusion_zone) + 1 D[lower_ez_bound:upper_ez_bound] = np.inf self._matrix_profile = np.copy(D) self._index_profile = np.zeros((len(self.ts1) - self.window_size + 1,)) for idx in self._iterator: QT[1:] = QT[:len(self.ts1)-self.window_size] - self.ts1[:len(self.ts1)-self.window_size] * self.ts2[idx-1] \ + self.ts1[self.window_size:] * self.ts2[idx + self.window_size - 1] QT[0] = TQ[idx] D = utils.calculate_distance_profile(QT, self.window_size, mu_Q[idx], sigma_Q[idx], mu_T, sigma_T) self._elementwise_min(D, idx)
def _compute_matrix_profile(self): """ Compute the matrix profile using PreSCRIMP. """ try: mu_T, sigma_T = utils.rolling_avg_sd(self.ts1, self.window_size) if self._same_ts: mu_Q, sigma_Q = mu_T, sigma_T else: mu_Q, sigma_Q = utils.rolling_avg_sd(self.ts2, self.window_size) for n_iter, idx in enumerate(self._iterator): D = utils.mass(self.ts2[idx: idx+self.window_size], self.ts1) self._elementwise_min(D, idx) jdx = np.argmin(D) # the index of closest profile to the current idx # compute diagonals until the next sampled point q1 = self.ts2[idx:idx + self.sample_interval + self.window_size - 1] q2 = self.ts1[jdx:jdx + self.sample_interval + self.window_size - 1] lq = min(len(q1), len(q2)) q = q1[:lq] * q2[:lq] q = utils.rolling_sum(q, self.window_size) D = utils.calculate_distance_profile(q, self.window_size, mu_Q[idx:idx + len(q)], sigma_Q[idx:idx + len(q)], mu_T[jdx:jdx + len(q)], sigma_T[jdx:jdx + len(q)]) self._index_profile[jdx: jdx + len(q)] = np.where(D < self._matrix_profile[jdx:jdx + len(q)], np.arange(idx, idx + len(q)), self._index_profile[jdx:jdx + len(q)]) self._matrix_profile[jdx:jdx + len(q)] = np.minimum(D, self._matrix_profile[jdx:jdx + len(q)]) if self._same_ts: self._index_profile[idx:idx + len(q)] = np.where(D < self._matrix_profile[idx:idx + len(q)], np.arange(jdx, jdx + len(q)), self._index_profile[idx:idx + len(q)]) self._matrix_profile[idx:idx + len(q)] = np.minimum(D, self._matrix_profile[idx:idx + len(q)]) # compute diagonals until the previous sampled point if idx != 0 and jdx != 0: q1 = self.ts2[max(0, idx - self.sample_interval):(idx + self.window_size - 1)] q2 = self.ts1[max(0, jdx - self.sample_interval):(jdx + self.window_size - 1)] lq = min(len(q1), len(q2)) q = q1[-lq:] * q2[-lq:] q = utils.rolling_sum(q, self.window_size) D = utils.calculate_distance_profile(q, self.window_size, mu_Q[idx - len(q):idx], sigma_Q[idx - len(q):idx], mu_T[jdx - len(q):jdx], sigma_T[jdx - len(q):jdx]) self._index_profile[jdx - len(q): jdx] = np.where(D < self._matrix_profile[jdx - len(q):jdx], np.arange(idx - len(q), idx), self._index_profile[jdx - len(q):jdx]) self._matrix_profile[jdx - len(q):jdx] = np.minimum(D, self._matrix_profile[jdx - len(q):jdx]) if self._same_ts: self._index_profile[idx - len(q):idx] = np.where(D < self._matrix_profile[idx - len(q):idx], np.arange(jdx - len(q), jdx), self._index_profile[idx - len(q):idx]) self._matrix_profile[idx - len(q):idx] = np.minimum(D, self._matrix_profile[idx - len(q):idx]) except KeyboardInterrupt: if self.verbose: tqdm.write("Calculation interrupted at iteration {}. Approximate result returned.".format(n_iter))
def test_rolling_avg_sd_data1(self): t = np.loadtxt("./data/random_walk_data.csv") ra, rsd = utils.rolling_avg_sd(t, 1000) ra_ans = np.loadtxt("./data/random_walk_data_rolling_mean.csv") rsd_ans = np.loadtxt("./data/random_walk_data_rolling_std.csv") assert np.allclose(ra, ra_ans), "rolling_avg_sd_random_data: rolling sum should be computed correctly" assert np.allclose(rsd, rsd_ans), "rolling_avg_sd_random_data: rolling sd should be computed correctly"
def test_rolling_avg_sd_random_data(self): t = np.random.rand(1000) m = np.random.randint(10, 1000) ra, rsd = utils.rolling_avg_sd(t, m) naive_ra = np.array([np.mean(t[i:i+m]) for i in range(1000 - m + 1)]) naive_rsd = np.array([np.std(t[i:i+m]) for i in range(1000 - m + 1)]) assert np.allclose(ra, naive_ra), "rolling_avg_sd_random_data: rolling sum should be computed correctly" assert np.allclose(rsd, naive_rsd), "rolling_avg_sd_random_data: rolling sd should be computed correctly"
def test_calculate_distance_profile_constant_sequence_and_query(self): n = 100 m = np.random.randint(10, n // 2) t = np.full(n, np.random.rand()) q = np.full(m, np.random.rand()) qt = utils.sliding_dot_product(q, t) rolling_mean, rolling_std = utils.rolling_avg_sd(t, m) dp = utils.calculate_distance_profile(qt, m, np.mean(q), np.std(q), rolling_mean, rolling_std) assert np.allclose(dp, np.full(n - m + 1, 0)), "calculate_distance_profile_constant_sequence_and_query: " \ "distance of constant query to constant sequence is ero by definition."
def test_calculate_distance_profile_constant_query(self): n = 100 m = np.random.randint(10, n // 2) t = np.random.rand(n) q = np.full(m, np.random.rand()) qt = utils.sliding_dot_product(q, t) rolling_mean, rolling_std = utils.rolling_avg_sd(t, m) dp = utils.calculate_distance_profile(qt, m, q[0], 0, rolling_mean, rolling_std) assert np.allclose(dp, np.full(n - m + 1, np.sqrt(m))), "calculate_distance_profile_constant_query: " \ "distance of nonconstant sequence to constant query is sqrt(m) by definition."
def _compute_matrix_profile(self): """ Compute the matrix profile using SCRIMP. """ if self.verbose and self.pre_scrimp > 0: tqdm.write("SCRIMP:") try: n1 = len(self.ts1) n2 = len(self.ts2) mu_T, sigma_T = utils.rolling_avg_sd(self.ts1, self.window_size) if self._same_ts: mu_Q, sigma_Q = mu_T, sigma_T else: mu_Q, sigma_Q = utils.rolling_avg_sd(self.ts2, self.window_size) for n_iter, k in enumerate(self._iterator): if k >= 0: # compute diagonals starting from a slot in first column q = self.ts2[k:k+n1] * self.ts1[:n2-k] q = utils.rolling_sum(q, self.window_size) D = utils.calculate_distance_profile(q, self.window_size, mu_Q[k:k+len(q)], sigma_Q[k:k+len(q)], mu_T[:len(q)], sigma_T[:len(q)]) self._index_profile[:len(q)] = np.where(D < self._matrix_profile[:len(q)], np.arange(k, k + len(q)), self._index_profile[:len(q)]) self._matrix_profile[:len(q)] = np.minimum(D, self._matrix_profile[:len(q)]) if self._same_ts: self._index_profile[k:k+len(q)] = np.where(D < self._matrix_profile[k:k+len(q)], np.arange(len(q)), self._index_profile[k:k+len(q)]) self._matrix_profile[k:k+len(q)] = np.minimum(D, self._matrix_profile[k:k+len(q)]) else: # compute diagonals starting from a slot in first row k = -k q = self.ts2[:n1-k] * self.ts1[k:k+n2] q = utils.rolling_sum(q, self.window_size) D = utils.calculate_distance_profile(q, self.window_size, mu_Q[:len(q)], sigma_Q[:len(q)], mu_T[k:k+len(q)], sigma_T[k:k+len(q)]) self._index_profile[k:k+len(q)] = np.where(D < self._matrix_profile[k:k+len(q)], np.arange(len(q)), self._index_profile[k:k+len(q)]) self._matrix_profile[k:k+len(q)] = np.minimum(D, self._matrix_profile[k:k+len(q)]) except KeyboardInterrupt: if self.verbose: tqdm.write("Calculation interrupted at iteration {}. Approximate result returned.".format(n_iter))
def test_rolling_avg_sd_sanity2(self): t = np.random.rand(1000) ra, rsd = utils.rolling_avg_sd(t, 1000) assert np.allclose([np.mean(t)], ra), "rolling_avg_sd_sanity2: rolling sum of full window size should equal the series mean" assert np.allclose([np.std(t)], rsd), \ "rolling_avg_sd_sanity2: rolling sd of full window size should equal the series sd"
def test_rolling_avg_sd_sanity1(self): t = np.random.rand(1000) ra, rsd = utils.rolling_avg_sd(t, 1) assert np.allclose(t, ra), "rolling_avg_sd_sanity1: rolling sum of window size 1 should equal the series itself" assert np.max(rsd) < 1e-5, \ "rolling_avg_sd_sanity1: rolling sd of window size 1 should be all zero"
def test_rolling_avg_sd_window_too_large(self): with pytest.raises(ValueError): t = np.random.rand(500) ans = utils.rolling_avg_sd(t, 1000)