def find_best_n_motifs(mt, m, parameters, col, dataset): """ Execute the operation 'find best n motifs' of Khiva. :param mt: The matrix profile and the index calculated previously, and the m used. :param m: Subsequence length value used to calculate the matrix profile. :param parameters: The parameters of the function (number of clusters, ...). :param col: The column which has been used in stomp. :param dataset: The (first) dataset which has been used in stomp. :return: Tuple with the motif distances, the motif indices and the subsequence indices. """ prof = kv.Array(mt.get("profile").to_list(), khiva_type=kv.dtype.f32) ind = kv.Array(mt.get("index").to_list(), khiva_type=kv.dtype.u32) n = parameters["n"] if not n: print('How many motifs?') while not n: n = click.prompt('', type=int) n = int(n) distance, index, subsequence = kv.find_best_n_motifs(prof, ind, m, n) stm = pd.DataFrame(index=range(m)) sub = subsequence.to_list() if col: dataset = dataset[col] if n == 1: aux = dataset[sub:sub + m] aux.index = range(m) stm["col" + str(0)] = aux else: for it in range(n): aux = dataset[sub[it]:sub[it] + m] aux.index = range(m) stm["col" + str(it)] = aux return stm
def use_find(self): plt.close('all') new_length = self.points_entry.get() y = kv.max_min_norm(kv.Array(self.subsequence), int(self.max.get()), int(self.min.get())).to_numpy() x = np.array(self.subsequence_x) y = np.array(y) new_x = np.linspace(x.min(), x.max(), new_length) new_y = sp.interpolate.interp1d(x, y)(new_x) y = new_y b = kv.Array(y, khiva_type=kv.dtype.f32) distance, profile = kv.stomp(self.kv_time_series, b, len(y)) distance_motif, index_motif, index_subsequence_motif = kv.find_best_n_motifs( distance, profile, 1) index_motif = index_motif.to_numpy() a = self.kv_time_series.to_numpy() window = int(2 * len(y)) fig = plt.figure(1) plt.plot(range(index_motif, index_motif + (len(y))), y, label="Selected pattern", color="blue") if index_motif == 0: plt.plot(range(index_motif, index_motif + window), a[index_motif:index_motif + window], label="Time series", color="orange") plt.plot(range(index_motif, (index_motif + len(y))), a[index_motif:(index_motif + len(y))], label="Discovered motif", color="red") plt.xticks( np.arange(min(range(index_motif, index_motif + window)), max(range(index_motif, index_motif + window)), 100.0)) else: plt.plot(range(index_motif - window, index_motif + window), a[index_motif - window:index_motif + window], label="Time series", color="orange") plt.plot(range(index_motif, (index_motif + len(y))), a[index_motif:(index_motif + len(y))], label="Discovered motif", color="red") plt.xticks( np.arange( min(range(index_motif - window, index_motif + window)), max(range(index_motif - window, index_motif + window)), 100.0)) plt.legend() plt.title("Motif discovery") plt.tight_layout() fig_photo = self.draw_figure(self.c, fig, loc=(1010, 250)) self.photo = fig_photo
def stomp(tt1, tt2, parameters): """ Execute the 'stomp' of Khiva for two time series. :param tt1: First time series. :param tt2: Second time series. :param parameters: The parameters of the function (subsequence length, ...). :return: Tuple with a dataframe with profile and index matrix, and the subsequence length (m). """ tts1 = kv.Array(tt1) tts2 = kv.Array(tt2) sub_len = parameters["m"] if not sub_len: print('What is the length of the subsequence?') while not sub_len: sub_len = click.prompt('', type=int) sub_len = int(sub_len) data = kv.stomp(tts1, tts2, sub_len) stm = data[0].to_pandas() stm.set_axis(["profile"], axis='columns') stm["index"] = data[1].to_pandas() return stm, sub_len
def paa(dataset, parameters): """ Executes the function paa of khiva. :param dataset: The dataset which is computed. :param parameters: The parameters of the function (number of points). :return: The timeserie with the reduced points. """ num_points = al.get_int_number(parameters) if dataset.columns.size > 1: dataset = dataset[al.obtain_column(dataset)] k_array = kv.Array(dataset) result = kv.paa(k_array, num_points) return result.to_pandas()
def kshape(tt, parameters): """ Execute the kshape of Khiva. :param parameters: The parameters of the function (number of clusters, ...). :param tt: The dataset which is computed. :return: Tuple with the centroids and labels. """ tts = kv.Array(tt) k = parameters["number"] if not k: print('How many clusters?') while not k: k = click.prompt('', type=int) data = kv.k_shape(tts, int(k)) return data[0].to_pandas(), data[1].to_pandas()
def ramer_douglas_peucker(dataset, parameters): """ Executes the function Ramer-Douglas-Peucker of khiva. :param dataset: The dataset which is computed. :param parameters: The parameters of the function (epsilon). :return: The timeserie with the reduced points. """ epsilon = al.get_float_number(parameters) if dataset.columns.size > 1: dataset = dataset[al.obtain_column(dataset)] k_array = kv.Array([range(dataset.size), dataset.to_numpy().flatten()]) result = kv.ramer_douglas_peucker(k_array, epsilon).transpose() result = result.to_pandas() result.set_index(0, inplace=True) result.set_index(result.index.astype('int32'), inplace=True) return result
def pip(dataset, parameters): """ Executes the function pip of khiva. :param dataset: The dataset which is computed. :param parameters: The parameters of the function (number of pip). :return: The timeserie with the reduced points. """ num_pip = al.get_int_number(parameters) if dataset.columns.size > 1: dataset = dataset[al.obtain_column(dataset)] k_array = kv.Array([range(dataset.size), dataset.to_numpy().flatten()]) result = kv.pip(k_array, num_pip).transpose() result = result.to_pandas() result.set_index(0, inplace=True) result.set_index(result.index.astype('int32'), inplace=True) return result
def features(dataset): """ Execute some operations of features. :param dataset: The dataset which is computed. :return: A pandas which has all the results. """ arr_tmp = kv.Array(dataset.transpose()) features = np.stack([kv.abs_energy(arr_tmp).to_numpy(), kv.absolute_sum_of_changes(arr_tmp).to_numpy(), kv.count_above_mean(arr_tmp).to_numpy(), kv.count_below_mean(arr_tmp).to_numpy(), kv.first_location_of_maximum(arr_tmp).to_numpy(), kv.first_location_of_minimum(arr_tmp).to_numpy(), kv.has_duplicates(arr_tmp).to_numpy(), kv.has_duplicate_max(arr_tmp).to_numpy(), kv.kurtosis(arr_tmp).to_numpy(), kv.last_location_of_maximum(arr_tmp).to_numpy(), kv.last_location_of_minimum(arr_tmp).to_numpy(), kv.has_duplicate_min(arr_tmp).to_numpy(), kv.longest_strike_above_mean(arr_tmp).to_numpy(), kv.longest_strike_below_mean(arr_tmp).to_numpy(), kv.maximum(arr_tmp).to_numpy(), kv.mean_absolute_change(arr_tmp).to_numpy(), kv.minimum(arr_tmp).to_numpy(), kv.number_crossing_m(arr_tmp, 0).to_numpy(), kv.mean(arr_tmp).to_numpy(), kv.median(arr_tmp).to_numpy(), kv.mean_change(arr_tmp).to_numpy(), kv.ratio_value_number_to_time_series_length(arr_tmp).to_numpy(), kv.skewness(arr_tmp).to_numpy(), kv.standard_deviation(arr_tmp).to_numpy(), kv.sum_of_reoccurring_values(arr_tmp).to_numpy(), kv.sum_values(arr_tmp).to_numpy(), kv.variance(arr_tmp).to_numpy(), kv.variance_larger_than_standard_deviation(arr_tmp).to_numpy()]) return pd.DataFrame(features)
def import_csv_data(self): csv_file_path = askopenfilename() self.data = np.genfromtxt(csv_file_path, delimiter=',') self.kv_time_series = kv.max_min_norm(kv.Array(self.data), int(200), int(-200))