Esempio n. 1
0
def find_best_n_motifs(mt, m, parameters, col, dataset):
    """
    Execute the operation 'find best n motifs' of Khiva.
    :param mt: The matrix profile and the index calculated previously, and the m used.
    :param m: Subsequence length value used to calculate the matrix profile.
    :param parameters: The parameters of the function (number of clusters, ...).
    :param col: The column which has been used in stomp.
    :param dataset: The (first) dataset which has been used in stomp.
    :return: Tuple with the motif distances, the motif indices and the subsequence indices.
    """
    prof = kv.Array(mt.get("profile").to_list(), khiva_type=kv.dtype.f32)
    ind = kv.Array(mt.get("index").to_list(), khiva_type=kv.dtype.u32)
    n = parameters["n"]
    if not n:
        print('How many motifs?')
        while not n:
            n = click.prompt('', type=int)
    n = int(n)
    distance, index, subsequence = kv.find_best_n_motifs(prof, ind, m, n)
    stm = pd.DataFrame(index=range(m))
    sub = subsequence.to_list()
    if col:
        dataset = dataset[col]
    if n == 1:
        aux = dataset[sub:sub + m]
        aux.index = range(m)
        stm["col" + str(0)] = aux
    else:
        for it in range(n):
            aux = dataset[sub[it]:sub[it] + m]
            aux.index = range(m)
            stm["col" + str(it)] = aux
    return stm
    def use_find(self):
        plt.close('all')
        new_length = self.points_entry.get()

        y = kv.max_min_norm(kv.Array(self.subsequence), int(self.max.get()),
                            int(self.min.get())).to_numpy()
        x = np.array(self.subsequence_x)
        y = np.array(y)

        new_x = np.linspace(x.min(), x.max(), new_length)
        new_y = sp.interpolate.interp1d(x, y)(new_x)
        y = new_y
        b = kv.Array(y, khiva_type=kv.dtype.f32)
        distance, profile = kv.stomp(self.kv_time_series, b, len(y))
        distance_motif, index_motif, index_subsequence_motif = kv.find_best_n_motifs(
            distance, profile, 1)
        index_motif = index_motif.to_numpy()
        a = self.kv_time_series.to_numpy()
        window = int(2 * len(y))
        fig = plt.figure(1)
        plt.plot(range(index_motif, index_motif + (len(y))),
                 y,
                 label="Selected pattern",
                 color="blue")

        if index_motif == 0:
            plt.plot(range(index_motif, index_motif + window),
                     a[index_motif:index_motif + window],
                     label="Time series",
                     color="orange")
            plt.plot(range(index_motif, (index_motif + len(y))),
                     a[index_motif:(index_motif + len(y))],
                     label="Discovered motif",
                     color="red")
            plt.xticks(
                np.arange(min(range(index_motif, index_motif + window)),
                          max(range(index_motif, index_motif + window)),
                          100.0))
        else:

            plt.plot(range(index_motif - window, index_motif + window),
                     a[index_motif - window:index_motif + window],
                     label="Time series",
                     color="orange")
            plt.plot(range(index_motif, (index_motif + len(y))),
                     a[index_motif:(index_motif + len(y))],
                     label="Discovered motif",
                     color="red")
            plt.xticks(
                np.arange(
                    min(range(index_motif - window, index_motif + window)),
                    max(range(index_motif - window, index_motif + window)),
                    100.0))

        plt.legend()
        plt.title("Motif discovery")
        plt.tight_layout()
        fig_photo = self.draw_figure(self.c, fig, loc=(1010, 250))
        self.photo = fig_photo
Esempio n. 3
0
def stomp(tt1, tt2, parameters):
    """
    Execute the 'stomp' of Khiva for two time series.
    :param tt1: First time series.
    :param tt2: Second time series.
    :param parameters: The parameters of the function (subsequence length, ...).
    :return: Tuple with a dataframe with profile and index matrix, and the subsequence length (m).
    """
    tts1 = kv.Array(tt1)
    tts2 = kv.Array(tt2)
    sub_len = parameters["m"]
    if not sub_len:
        print('What is the length of the subsequence?')
        while not sub_len:
            sub_len = click.prompt('', type=int)
    sub_len = int(sub_len)
    data = kv.stomp(tts1, tts2, sub_len)
    stm = data[0].to_pandas()
    stm.set_axis(["profile"], axis='columns')
    stm["index"] = data[1].to_pandas()

    return stm, sub_len
Esempio n. 4
0
def paa(dataset, parameters):
    """
    Executes the function paa of khiva.
    :param dataset: The dataset which is computed.
    :param parameters: The parameters of the function (number of points).
    :return: The timeserie with the reduced points.
    """

    num_points = al.get_int_number(parameters)
    if dataset.columns.size > 1:
        dataset = dataset[al.obtain_column(dataset)]
    k_array = kv.Array(dataset)
    result = kv.paa(k_array, num_points)
    return result.to_pandas()
Esempio n. 5
0
def kshape(tt, parameters):
    """
    Execute the kshape of Khiva.
    :param parameters: The parameters of the function (number of clusters, ...).
    :param tt: The dataset which is computed.
    :return: Tuple with the centroids and labels.
    """
    tts = kv.Array(tt)
    k = parameters["number"]
    if not k:
        print('How many clusters?')
        while not k:
            k = click.prompt('', type=int)
    data = kv.k_shape(tts, int(k))
    return data[0].to_pandas(), data[1].to_pandas()
Esempio n. 6
0
def ramer_douglas_peucker(dataset, parameters):
    """
    Executes the function Ramer-Douglas-Peucker of khiva.
    :param dataset: The dataset which is computed.
    :param parameters: The parameters of the function (epsilon).
    :return: The timeserie with the reduced points.
    """
    epsilon = al.get_float_number(parameters)
    if dataset.columns.size > 1:
        dataset = dataset[al.obtain_column(dataset)]
    k_array = kv.Array([range(dataset.size), dataset.to_numpy().flatten()])
    result = kv.ramer_douglas_peucker(k_array, epsilon).transpose()
    result = result.to_pandas()
    result.set_index(0, inplace=True)
    result.set_index(result.index.astype('int32'), inplace=True)
    return result
Esempio n. 7
0
def pip(dataset, parameters):
    """
    Executes the function pip of khiva.
    :param dataset: The dataset which is computed.
    :param parameters: The parameters of the function (number of pip).
    :return: The timeserie with the reduced points.
    """
    num_pip = al.get_int_number(parameters)
    if dataset.columns.size > 1:
        dataset = dataset[al.obtain_column(dataset)]
    k_array = kv.Array([range(dataset.size), dataset.to_numpy().flatten()])
    result = kv.pip(k_array, num_pip).transpose()
    result = result.to_pandas()
    result.set_index(0, inplace=True)
    result.set_index(result.index.astype('int32'), inplace=True)
    return result
Esempio n. 8
0
def features(dataset):
    """
    Execute some operations of features.
    :param dataset: The dataset which is computed.
    :return: A pandas which has all the results.
    """
    arr_tmp = kv.Array(dataset.transpose())
    features = np.stack([kv.abs_energy(arr_tmp).to_numpy(),
                         kv.absolute_sum_of_changes(arr_tmp).to_numpy(),
                         kv.count_above_mean(arr_tmp).to_numpy(),
                         kv.count_below_mean(arr_tmp).to_numpy(),
                         kv.first_location_of_maximum(arr_tmp).to_numpy(),
                         kv.first_location_of_minimum(arr_tmp).to_numpy(),
                         kv.has_duplicates(arr_tmp).to_numpy(),
                         kv.has_duplicate_max(arr_tmp).to_numpy(),
                         kv.kurtosis(arr_tmp).to_numpy(),
                         kv.last_location_of_maximum(arr_tmp).to_numpy(),
                         kv.last_location_of_minimum(arr_tmp).to_numpy(),
                         kv.has_duplicate_min(arr_tmp).to_numpy(),
                         kv.longest_strike_above_mean(arr_tmp).to_numpy(),
                         kv.longest_strike_below_mean(arr_tmp).to_numpy(),
                         kv.maximum(arr_tmp).to_numpy(),
                         kv.mean_absolute_change(arr_tmp).to_numpy(),
                         kv.minimum(arr_tmp).to_numpy(),
                         kv.number_crossing_m(arr_tmp, 0).to_numpy(),
                         kv.mean(arr_tmp).to_numpy(),
                         kv.median(arr_tmp).to_numpy(),
                         kv.mean_change(arr_tmp).to_numpy(),
                         kv.ratio_value_number_to_time_series_length(arr_tmp).to_numpy(),
                         kv.skewness(arr_tmp).to_numpy(),
                         kv.standard_deviation(arr_tmp).to_numpy(),
                         kv.sum_of_reoccurring_values(arr_tmp).to_numpy(),
                         kv.sum_values(arr_tmp).to_numpy(),
                         kv.variance(arr_tmp).to_numpy(),
                         kv.variance_larger_than_standard_deviation(arr_tmp).to_numpy()])

    return pd.DataFrame(features)
 def import_csv_data(self):
     csv_file_path = askopenfilename()
     self.data = np.genfromtxt(csv_file_path, delimiter=',')
     self.kv_time_series = kv.max_min_norm(kv.Array(self.data), int(200),
                                           int(-200))