예제 #1
0
 def _compute_matrix_profile(self):
     """
     Compute the matrix profile using STOMP.
     """
     mu_T, sigma_T = utils.rolling_avg_sd(self.ts1, self.window_size)
     QT = utils.sliding_dot_product(self.ts2[:self.window_size], self.ts1)
     if self._same_ts:
         mu_Q, sigma_Q = mu_T, sigma_T
         TQ = np.copy(QT)
     else:
         mu_Q, sigma_Q = utils.rolling_avg_sd(self.ts2, self.window_size)
         TQ = utils.sliding_dot_product(self.ts1[:self.window_size], self.ts2)
     D = utils.calculate_distance_profile(QT, self.window_size, mu_Q[0], sigma_Q[0], mu_T, sigma_T)
     if self._same_ts:
         lower_ez_bound = 0
         upper_ez_bound = min(len(self.ts2), self.exclusion_zone) + 1
         D[lower_ez_bound:upper_ez_bound] = np.inf
     self._matrix_profile = np.copy(D)
     self._index_profile = np.zeros((len(self.ts1) - self.window_size + 1,))
     for idx in self._iterator:
         QT[1:] = QT[:len(self.ts1)-self.window_size] - self.ts1[:len(self.ts1)-self.window_size] * self.ts2[idx-1] \
                  + self.ts1[self.window_size:] * self.ts2[idx + self.window_size - 1]
         QT[0] = TQ[idx]
         D = utils.calculate_distance_profile(QT, self.window_size, mu_Q[idx], sigma_Q[idx], mu_T, sigma_T)
         self._elementwise_min(D, idx)
예제 #2
0
    def _compute_matrix_profile(self):
        """
        Compute the matrix profile using PreSCRIMP.
        """
        try:
            mu_T, sigma_T = utils.rolling_avg_sd(self.ts1, self.window_size)
            if self._same_ts:
                mu_Q, sigma_Q = mu_T, sigma_T
            else:
                mu_Q, sigma_Q = utils.rolling_avg_sd(self.ts2, self.window_size)
            for n_iter, idx in enumerate(self._iterator):
                D = utils.mass(self.ts2[idx: idx+self.window_size], self.ts1)
                self._elementwise_min(D, idx)
                jdx = np.argmin(D)  # the index of closest profile to the current idx

                # compute diagonals until the next sampled point
                q1 = self.ts2[idx:idx + self.sample_interval + self.window_size - 1]
                q2 = self.ts1[jdx:jdx + self.sample_interval + self.window_size - 1]
                lq = min(len(q1), len(q2))
                q = q1[:lq] * q2[:lq]
                q = utils.rolling_sum(q, self.window_size)
                D = utils.calculate_distance_profile(q, self.window_size, mu_Q[idx:idx + len(q)], sigma_Q[idx:idx + len(q)],
                                                     mu_T[jdx:jdx + len(q)], sigma_T[jdx:jdx + len(q)])
                self._index_profile[jdx: jdx + len(q)] = np.where(D < self._matrix_profile[jdx:jdx + len(q)],
                                                        np.arange(idx, idx + len(q)), self._index_profile[jdx:jdx + len(q)])
                self._matrix_profile[jdx:jdx + len(q)] = np.minimum(D, self._matrix_profile[jdx:jdx + len(q)])
                if self._same_ts:
                    self._index_profile[idx:idx + len(q)] = np.where(D < self._matrix_profile[idx:idx + len(q)],
                                                                 np.arange(jdx, jdx + len(q)), self._index_profile[idx:idx + len(q)])
                    self._matrix_profile[idx:idx + len(q)] = np.minimum(D, self._matrix_profile[idx:idx + len(q)])

                # compute diagonals until the previous sampled point
                if idx != 0 and jdx != 0:
                    q1 = self.ts2[max(0, idx - self.sample_interval):(idx + self.window_size - 1)]
                    q2 = self.ts1[max(0, jdx - self.sample_interval):(jdx + self.window_size - 1)]
                    lq = min(len(q1), len(q2))
                    q = q1[-lq:] * q2[-lq:]
                    q = utils.rolling_sum(q, self.window_size)
                    D = utils.calculate_distance_profile(q, self.window_size, mu_Q[idx - len(q):idx],
                                                         sigma_Q[idx - len(q):idx],
                                                         mu_T[jdx - len(q):jdx], sigma_T[jdx - len(q):jdx])
                    self._index_profile[jdx - len(q): jdx] = np.where(D < self._matrix_profile[jdx - len(q):jdx],
                                                                      np.arange(idx - len(q), idx),
                                                                      self._index_profile[jdx - len(q):jdx])
                    self._matrix_profile[jdx - len(q):jdx] = np.minimum(D, self._matrix_profile[jdx - len(q):jdx])
                    if self._same_ts:
                        self._index_profile[idx - len(q):idx] = np.where(D < self._matrix_profile[idx - len(q):idx],
                                                                         np.arange(jdx - len(q), jdx),
                                                                         self._index_profile[idx - len(q):idx])
                        self._matrix_profile[idx - len(q):idx] = np.minimum(D, self._matrix_profile[idx - len(q):idx])
        except KeyboardInterrupt:
            if self.verbose:
                tqdm.write("Calculation interrupted at iteration {}. Approximate result returned.".format(n_iter))
예제 #3
0
 def test_rolling_avg_sd_data1(self):
     t = np.loadtxt("./data/random_walk_data.csv")
     ra, rsd = utils.rolling_avg_sd(t, 1000)
     ra_ans = np.loadtxt("./data/random_walk_data_rolling_mean.csv")
     rsd_ans = np.loadtxt("./data/random_walk_data_rolling_std.csv")
     assert np.allclose(ra, ra_ans), "rolling_avg_sd_random_data: rolling sum should be computed correctly"
     assert np.allclose(rsd, rsd_ans), "rolling_avg_sd_random_data: rolling sd should be computed correctly"
예제 #4
0
 def test_rolling_avg_sd_random_data(self):
     t = np.random.rand(1000)
     m = np.random.randint(10, 1000)
     ra, rsd = utils.rolling_avg_sd(t, m)
     naive_ra = np.array([np.mean(t[i:i+m]) for i in range(1000 - m + 1)])
     naive_rsd = np.array([np.std(t[i:i+m]) for i in range(1000 - m + 1)])
     assert np.allclose(ra, naive_ra), "rolling_avg_sd_random_data: rolling sum should be computed correctly"
     assert np.allclose(rsd, naive_rsd), "rolling_avg_sd_random_data: rolling sd should be computed correctly"
예제 #5
0
 def test_calculate_distance_profile_constant_sequence_and_query(self):
     n = 100
     m = np.random.randint(10, n // 2)
     t = np.full(n, np.random.rand())
     q = np.full(m, np.random.rand())
     qt = utils.sliding_dot_product(q, t)
     rolling_mean, rolling_std = utils.rolling_avg_sd(t, m)
     dp = utils.calculate_distance_profile(qt, m, np.mean(q), np.std(q), rolling_mean, rolling_std)
     assert np.allclose(dp, np.full(n - m + 1, 0)), "calculate_distance_profile_constant_sequence_and_query: " \
                                     "distance of constant query to constant sequence is ero by definition."
예제 #6
0
 def test_calculate_distance_profile_constant_query(self):
     n = 100
     m = np.random.randint(10, n // 2)
     t = np.random.rand(n)
     q = np.full(m, np.random.rand())
     qt = utils.sliding_dot_product(q, t)
     rolling_mean, rolling_std = utils.rolling_avg_sd(t, m)
     dp = utils.calculate_distance_profile(qt, m, q[0], 0, rolling_mean, rolling_std)
     assert np.allclose(dp, np.full(n - m + 1, np.sqrt(m))), "calculate_distance_profile_constant_query: " \
                                     "distance of nonconstant sequence to constant query is sqrt(m) by definition."
예제 #7
0
 def _compute_matrix_profile(self):
     """
     Compute the matrix profile using SCRIMP.
     """
     if self.verbose and self.pre_scrimp > 0:
         tqdm.write("SCRIMP:")
     try:
         n1 = len(self.ts1)
         n2 = len(self.ts2)
         mu_T, sigma_T = utils.rolling_avg_sd(self.ts1, self.window_size)
         if self._same_ts:
             mu_Q, sigma_Q = mu_T, sigma_T
         else:
             mu_Q, sigma_Q = utils.rolling_avg_sd(self.ts2, self.window_size)
         for n_iter, k in enumerate(self._iterator):
             if k >= 0:
                 # compute diagonals starting from a slot in first column
                 q = self.ts2[k:k+n1] * self.ts1[:n2-k]
                 q = utils.rolling_sum(q, self.window_size)
                 D = utils.calculate_distance_profile(q, self.window_size, mu_Q[k:k+len(q)], sigma_Q[k:k+len(q)],
                                                      mu_T[:len(q)], sigma_T[:len(q)])
                 self._index_profile[:len(q)] = np.where(D < self._matrix_profile[:len(q)],
                                                 np.arange(k, k + len(q)), self._index_profile[:len(q)])
                 self._matrix_profile[:len(q)] = np.minimum(D, self._matrix_profile[:len(q)])
                 if self._same_ts:
                     self._index_profile[k:k+len(q)] = np.where(D < self._matrix_profile[k:k+len(q)],
                                                 np.arange(len(q)), self._index_profile[k:k+len(q)])
                     self._matrix_profile[k:k+len(q)] = np.minimum(D, self._matrix_profile[k:k+len(q)])
             else:
                 # compute diagonals starting from a slot in first row
                 k = -k
                 q = self.ts2[:n1-k] * self.ts1[k:k+n2]
                 q = utils.rolling_sum(q, self.window_size)
                 D = utils.calculate_distance_profile(q, self.window_size, mu_Q[:len(q)], sigma_Q[:len(q)],
                                                      mu_T[k:k+len(q)], sigma_T[k:k+len(q)])
                 self._index_profile[k:k+len(q)] = np.where(D < self._matrix_profile[k:k+len(q)],
                                                         np.arange(len(q)), self._index_profile[k:k+len(q)])
                 self._matrix_profile[k:k+len(q)] = np.minimum(D, self._matrix_profile[k:k+len(q)])
     except KeyboardInterrupt:
         if self.verbose:
             tqdm.write("Calculation interrupted at iteration {}. Approximate result returned.".format(n_iter))
예제 #8
0
 def test_rolling_avg_sd_sanity2(self):
     t = np.random.rand(1000)
     ra, rsd = utils.rolling_avg_sd(t, 1000)
     assert np.allclose([np.mean(t)], ra), "rolling_avg_sd_sanity2: rolling sum of full window size should equal the series mean"
     assert np.allclose([np.std(t)], rsd), \
         "rolling_avg_sd_sanity2: rolling sd of full window size should equal the series sd"
예제 #9
0
 def test_rolling_avg_sd_sanity1(self):
     t = np.random.rand(1000)
     ra, rsd = utils.rolling_avg_sd(t, 1)
     assert np.allclose(t, ra), "rolling_avg_sd_sanity1: rolling sum of window size 1 should equal the series itself"
     assert np.max(rsd) < 1e-5, \
         "rolling_avg_sd_sanity1: rolling sd of window size 1 should be all zero"
예제 #10
0
 def test_rolling_avg_sd_window_too_large(self):
     with pytest.raises(ValueError):
         t = np.random.rand(500)
         ans = utils.rolling_avg_sd(t, 1000)