def test_pearson_to_euclidean_2d(): a = np.array([[0.23, 0.5, 0.34, 0.67, 0.88], [0.23, 0.5, 0.34, 0.67, 0.88]]) w = [4, 4] desired = np.array([[2.48193473, 2, 2.29782506, 1.62480768, 0.9797959], [2.48193473, 2, 2.29782506, 1.62480768, 0.9797959]]) actual = core.pearson_to_euclidean(a, w) np.testing.assert_almost_equal(desired, actual)
def resize(mp, pi, n): """Helper function to resize mp and pi to be aligned with the PMP. Also convert pearson to euclidean.""" mp = core.pearson_to_euclidean(profile['mp'], window_size) infs = np.full(n - mp.shape[0], np.inf) nans = np.full(n - mp.shape[0], np.nan) mp = np.append(mp, infs) pi = np.append(profile['pi'], nans) return (mp, pi)
def resize(mp, pi, n): """Helper function to resize mp and pi to be aligned with the PMP. Also convert pearson to euclidean.""" # Only convert pearson to euclidean if not string data type if not np.issubdtype(ts.dtype, 'U'): mp = core.pearson_to_euclidean(profile['mp'], window_size) infs = np.full(n - mp.shape[0], np.inf) nans = np.full(n - mp.shape[0], np.nan) mp = np.append(mp, infs) pi = np.append(profile['pi'], nans) return (mp, pi)
def pmp_top_k_discords(profile, exclusion_zone=None, k=3): """ Computes the top K discords for the given Pan-MatrixProfile. The return values is a list of row by col indices. Notes ----- This algorithm is written to work with Euclidean distance. If you submit a PMP of Pearson metrics, then it is first converted to Euclidean. Parameters ---------- profile : dict Data structure from a PMP algorithm. exclusion_zone : int, Default window / 2 The zone to exclude around the found discords to reduce trivial findings. By default we use the row-wise window / 2. k : int Maximum number of discords to find. Returns ------- dict : profile A 2D array of indices. The first column corresponds to the row index and the second column corresponds to the column index of the submitted PMP. It is placed back on the original object passed in as 'discords' key. """ if not core.is_pmp_obj(profile): raise ValueError('Expecting PMP data structure!') # this function requires euclidean distance # convert if the metric is pearson metric = profile.get('metric', None) pmp = profile.get('pmp', None) windows = profile.get('windows', None) tmp = None if metric == 'pearson': tmp = core.pearson_to_euclidean(pmp, windows) else: tmp = np.copy(pmp).astype('d') # replace nan and infs with -infinity # for whatever reason numpy argmax finds infinity as max so # this is a way to get around it by converting to -infinity tmp[core.nan_inf_indices(tmp)] = -np.inf # iterate finding the max value k times or until negative # infinity is obtained found = [] for _ in range(k): max_idx = np.unravel_index(np.argmax(tmp), tmp.shape) window = windows[max_idx[0]] if tmp[max_idx] == -np.inf: break found.append(max_idx) # apply exclusion zone # the exclusion zone is based on 1/2 of the window size # used to compute that specific matrix profile n = tmp[max_idx[0]].shape[0] if exclusion_zone is None: exclusion_zone = int(np.floor(window / 2)) ez_start = np.max([0, max_idx[1] - exclusion_zone]) ez_stop = np.min([n, max_idx[1] + exclusion_zone]) tmp[max_idx[0]][ez_start:ez_stop] = -np.inf profile['discords'] = np.array(found) return profile
def pmp_top_k_motifs(profile, exclusion_zone=None, k=3, max_neighbors=10, radius=3): """ Find the top K number of motifs (patterns) given a pan matrix profile. By default the algorithm will find up to 3 motifs (k) and up to 10 of their neighbors with a radius of 3 * min_dist. Parameters ---------- profile : dict The output from one of the pan matrix profile algorithms. exclusion_zone : int, Default to algorithm ez Desired number of values to exclude on both sides of the motif. This avoids trivial matches. It defaults to half of the computed window size. Setting the exclusion zone to 0 makes it not apply. k : int, Default = 3 Desired number of motifs to find. neighbor_count : int, Default = 10 The maximum number of neighbors to include for a given motif. radius : int, Default = 3 The radius is used to associate a neighbor by checking if the neighbor's distance is less than or equal to dist * radius Returns ------- The original input obj with the addition of the "motifs" key. The motifs key consists of the following structure. A list of dicts containing motif indices and their corresponding neighbor indices. Note that each index is a (row, col) index corresponding to the pan matrix profile. [ { 'motifs': [first_index, second_index], 'neighbors': [index, index, index ...max_neighbors] } ] """ if not core.is_pmp_obj(profile): raise ValueError('Expecting PMP data structure!') data = profile.get('data', None) ts = data.get('ts', None) data_len = len(ts) pmp = profile.get('pmp', None) profile_len = pmp.shape[1] pmpi = profile.get('pmpi', None) windows = profile.get('windows', None) # make sure we are working with Euclidean distances tmp = None if core.is_pearson_array(pmp): tmp = core.pearson_to_euclidean(pmp, windows) else: tmp = np.copy(pmp).astype('d') # replace nan and infs with infinity tmp[core.nan_inf_indices(tmp)] = np.inf motifs = [] for _ in range(k): min_idx = np.unravel_index(np.argmin(tmp), tmp.shape) min_dist = tmp[min_idx] # nothing else to find... if core.is_nan_inf(min_dist): break # create the motif pair min_row_idx = min_idx[0] min_col_idx = min_idx[1] # motif pairs are respective to the column of the matching row first_idx = np.min([min_col_idx, pmpi[min_row_idx][min_col_idx]]) second_idx = np.max([min_col_idx, pmpi[min_row_idx][min_col_idx]]) # compute distance profile for first appearance window_size = windows[min_row_idx] query = ts[first_idx:first_idx + window_size] distance_profile = mass2(ts, query) # extend the distance profile to be as long as the original infs = np.full(profile_len - len(distance_profile), np.inf) distance_profile = np.append(distance_profile, infs) # exclude already picked motifs and neighbors mask = core.nan_inf_indices(pmp[min_row_idx]) distance_profile[mask] = np.inf # determine the exclusion zone if not set if not exclusion_zone: exclusion_zone = int(np.floor(window_size / 2)) # apply exclusion zone for motif pair for j in (first_idx, second_idx): distance_profile = core.apply_exclusion_zone( exclusion_zone, False, window_size, data_len, j, distance_profile) tmp2 = core.apply_exclusion_zone(exclusion_zone, False, window_size, data_len, j, tmp[min_row_idx]) tmp[min_row_idx] = tmp2 # find up to max_neighbors neighbors = [] for j in range(max_neighbors): neighbor_idx = np.argmin(distance_profile) neighbor_dist = np.real(distance_profile[neighbor_idx]) not_in_radius = not ((radius * min_dist) >= neighbor_dist) # no more neighbors exist based on radius if core.is_nan_inf(neighbor_dist) or not_in_radius: break # add neighbor and apply exclusion zone neighbors.append((min_row_idx, neighbor_idx)) distance_profile = core.apply_exclusion_zone( exclusion_zone, False, window_size, data_len, neighbor_idx, distance_profile) tmp2 = core.apply_exclusion_zone(exclusion_zone, False, window_size, data_len, neighbor_idx, tmp[min_row_idx]) tmp[min_row_idx] = tmp2 # add the motifs and neighbors # note that they are (row, col) indices motifs.append({ 'motifs': [(min_row_idx, first_idx), (min_row_idx, second_idx)], 'neighbors': neighbors }) profile['motifs'] = motifs return profile