Example #1
0
def get_subdataset_columns(parameters):
    """
    Obtains a subset of the dataset by its columns.
    :param parameters: The parameter of the function(dataset name,...).
    """
    workspace = al.Workspace()
    data_name = parameters['Dataset']
    dataset = workspace.get_dataset(data_name)
    cols = []

    if parameters["cols"]:
        cols = parameters['cols']
    else:
        stop = False
        while not stop:
            cols.append(al.obtain_column(dataset))
            print('Do you want to continue? yes or no?')

            response = al.query_input()
            if response == 'no':
                stop = True

    dataset = dataset[cols]
    num = workspace.get_counter('sub')
    name = 'subcol' + str(num) + data_name
    workspace.save_dataset(name, dataset)
    txt = 'The sub-dataset by the rows is saved as ' + name
    print(txt)
Example #2
0
def paa(dataset, parameters):
    """
    Executes the function paa of khiva.
    :param dataset: The dataset which is computed.
    :param parameters: The parameters of the function (number of points).
    :return: The timeserie with the reduced points.
    """

    num_points = al.get_int_number(parameters)
    if dataset.columns.size > 1:
        dataset = dataset[al.obtain_column(dataset)]
    k_array = kv.Array(dataset)
    result = kv.paa(k_array, num_points)
    return result.to_pandas()
Example #3
0
def ramer_douglas_peucker(dataset, parameters):
    """
    Executes the function Ramer-Douglas-Peucker of khiva.
    :param dataset: The dataset which is computed.
    :param parameters: The parameters of the function (epsilon).
    :return: The timeserie with the reduced points.
    """
    epsilon = al.get_float_number(parameters)
    if dataset.columns.size > 1:
        dataset = dataset[al.obtain_column(dataset)]
    k_array = kv.Array([range(dataset.size), dataset.to_numpy().flatten()])
    result = kv.ramer_douglas_peucker(k_array, epsilon).transpose()
    result = result.to_pandas()
    result.set_index(0, inplace=True)
    result.set_index(result.index.astype('int32'), inplace=True)
    return result
Example #4
0
def pip(dataset, parameters):
    """
    Executes the function pip of khiva.
    :param dataset: The dataset which is computed.
    :param parameters: The parameters of the function (number of pip).
    :return: The timeserie with the reduced points.
    """
    num_pip = al.get_int_number(parameters)
    if dataset.columns.size > 1:
        dataset = dataset[al.obtain_column(dataset)]
    k_array = kv.Array([range(dataset.size), dataset.to_numpy().flatten()])
    result = kv.pip(k_array, num_pip).transpose()
    result = result.to_pandas()
    result.set_index(0, inplace=True)
    result.set_index(result.index.astype('int32'), inplace=True)
    return result
Example #5
0
def plot_dataset(dataset, parameters):
    """
    Plots graphically a column of the dataset.
    :param dataset: The current dataset.
    :param parameters: The parameter for the graphic.
    """
    a = int(parameters['from'] if parameters['from'] else 0)
    b = int(parameters['to'] if parameters['to'] else dataset.index.size)
    if b < a:
        print(
            'This operation cannot be done.\nThe starting row number is greater than the last row number.'
        )
        raise Exception()
    if b:
        dataset = dataset.iloc[:b]
    if a:
        dataset = dataset.iloc[a:]

    ncol = dataset.columns.size
    if ncol > 1:
        if not parameters["columns"]:
            column_name = al.obtain_column(dataset)
            plt.figure()
            plt.plot(dataset[column_name])
            plt.title(column_name)
            plt.show(block=False)
        else:
            for column_name in parameters["columns"]:
                plt.figure()
                plt.plot(dataset[column_name])
                plt.title(column_name)
                plt.show(block=False)

    else:
        plt.figure()
        plt.plot(dataset)
        plt.title(parameters["Dataset"])
        plt.show(block=False)
Example #6
0
def do_matrix(parameters):
    """
    Do a operation of matrix.
    :param parameters: The parameters of the function (name of the operation, number of clusters, ...).
    """
    op = parameters.pop("operation")
    workspace = al.Workspace()

    if op == "stomp" and not parameters.get('Dataset2'):
        op = 'stomp_self_join'

    if op == "stomp":
        data_name = parameters["Dataset"]
        data_name2 = parameters["Dataset2"]
        dataset1 = workspace.get_dataset(data_name)
        dataset2 = workspace.get_dataset(data_name2)

        if dataset2 is None:
            if not data_name2 == "":
                print("The object " + data_name2 + " does not exist.")
                al.voice("The object " + data_name2 + " does not exist.")
            print("Please, provide the two datasets that should be stomped.")
            al.voice("Please, provide the two datasets that should be stomped.")
            return

        col = ''
        if dataset1.columns.size > 1:
            col = al.obtain_column(dataset1)
            dataset1 = dataset1[col]
        if dataset2.columns.size > 1:
            dataset2 = dataset2[al.obtain_column(dataset2)]

        (stomp, m) = al.stomp(dataset1.values, dataset2.values, parameters)
        number = workspace.get_counter('matrix_stomp')
        workspace.save_dataset('stomp' + str(number), (stomp.to_json(), m, col, dataset1.to_json()))
        print("The stomp is stored as stomp" + str(number))

    elif op == "stomp_self_join":
        data_name = parameters["Dataset"]
        dataset1 = workspace.get_dataset(data_name)

        col = ''
        if dataset1.columns.size > 1:
            col = al.obtain_column(dataset1)
            dataset1 = dataset1[col]

        (stomp, m) = al.stomp_self_join(dataset1.values, parameters)
        number = workspace.get_counter('matrix_stomp')
        workspace.save_dataset('stomp' + str(number), (stomp.to_json(), m, col, dataset1.to_json()))
        print("The stomp is stored as stomp" + str(number))

    elif op == "find_best_n_discords":
        stomp_name = parameters['Dataset']
        stomp = workspace.get_dataset(stomp_name)
        m, col, dataset = workspace.get_value(stomp_name)[1:]

        discords = al.find_best_n_discords(stomp, m, parameters, col, pd.read_json(dataset).sort_index())
        number = workspace.get_counter('matrix_best_d')
        workspace.save_dataset('discords' + str(number), discords)
        print('The best ' + str(int(parameters['n'])) + ' discord segments are stored as discords' + str(number))

    elif op == "find_best_n_motifs":
        stomp_name = parameters['Dataset']
        stomp = workspace.get_dataset(stomp_name)
        m, col, dataset = workspace.get_value(stomp_name)[1:]

        motifs = al.find_best_n_motifs(stomp, m, parameters, col, pd.read_json(dataset).sort_index())
        number = workspace.get_counter('matrix_best_m')
        workspace.save_dataset('motifs' + str(number), motifs)
        print('The best ' + str(int(parameters['n'])) + ' motifs segments are stored as motifs' + str(number))