Exemple #1
0
def create_dataset(parameters):
    """
    Creates a random dataset and saves it.
    :param parameters: The parameters for the creation (number of rows, numbers of columns,...).
    """
    workspace = al.Workspace()

    num_rows, num_col, values = rand_param(parameters)

    print('Creating the random dataset')
    al.voice('Creating the random dataset')

    tt = pd.DataFrame(index=range(num_rows))
    for n in range(num_col):
        tt['col' + str(n)] = pd.DataFrame(np.random.uniform(values[0],
                                                            values[1],
                                                            size=num_rows),
                                          dtype='float32')

    rand = workspace.get_counter('rand')
    workspace.save_dataset('random' + str(rand), tt)
    workspace.save_dataset('current', tt)
    print(
        'Created and saved as random{} which has {} columns, {} rows and values '
        'between {} and {}'.format(str(rand), num_col, num_rows, values[0],
                                   values[1]))
    al.voice(
        'Created and saved as random{} which has {} columns, {} rows and values '
        'between {} and {}'.format(str(rand), num_col, num_rows, values[0],
                                   values[1]))
Exemple #2
0
def join_by_rows(parameters):
    """
    Join two dataset with the same number of columns.
    :param parameters: The parameters of the function (dataset names).
    """
    workspace = al.Workspace()
    name_data1 = parameters['Dataset']
    name_data2 = parameters['Dataset2']
    dataset1 = workspace.get_dataset(name_data1)
    dataset2 = workspace.get_dataset(name_data2)

    if dataset2 is None:
        if not name_data2 == "":
            print("The object " + name_data2 + " does not exist.")
            al.voice("The object " + name_data2 + " does not exist.")
        print("Please, provide the two datasets that should be joined.")
        al.voice("Please, provide the two datasets that should be joined.")
        return

    if dataset1.columns.size != dataset2.columns.size:
        print(
            'Not able to execute.\nThe datasets have different number of columns'
        )
        return

    dataset = pd.concat([dataset1, dataset2], ignore_index=True)
    num = workspace.get_counter('join')
    name = 'join' + str(num)
    workspace.save_dataset(name, dataset)
    print('The resulting dataset between ' + name_data1 + ' and ' +
          name_data2 + ' is saved as ' + name)
Exemple #3
0
def split_by_rows(parameters):
    """
    Split a dataset into n datasets of m rows.
    :param parameters: The parameters of the function (dataset name, size of the split dataset for the rows).
    """
    workspace = al.Workspace()
    name_data = parameters['Dataset']
    dataset = workspace.get_dataset(name_data)

    if parameters['split']:
        div = int(parameters['split'])
    else:
        print('How many rows will each dataset have?')
        al.voice('How many rows will each dataset have?')
        query = al.query_input()
        while not al.isnumber(query):
            print(
                'Incorrect input.\nIt is not a number.\nPlease introduce one:')
            al.voice(
                'Incorrect input.\nIt is not a number.\nPlease introduce one.')
            query = al.query_input()
        div = int(query)

    it = 0
    names = []
    while it < dataset.index.size:
        div_dataset = dataset.iloc[it:it + div]
        num = workspace.get_counter('split')
        name = name_data + 'r' + str(num)
        names.append(name)
        workspace.save_dataset(name, div_dataset)
        it = it + div

    print('The splits of ' + name_data + ' are saved as: ' + str(names)[1:-1])
Exemple #4
0
def get_subdataset_columns(parameters):
    """
    Obtains a subset of the dataset by its columns.
    :param parameters: The parameter of the function(dataset name,...).
    """
    workspace = al.Workspace()
    data_name = parameters['Dataset']
    dataset = workspace.get_dataset(data_name)
    cols = []

    if parameters["cols"]:
        cols = parameters['cols']
    else:
        stop = False
        while not stop:
            cols.append(al.obtain_column(dataset))
            print('Do you want to continue? yes or no?')
            al.voice('Do you want to continue? yes or no?')
            response = al.query_input()
            if response == 'no':
                stop = True

    dataset = dataset[cols]
    num = workspace.get_counter('sub')
    name = 'subcol' + str(num) + data_name
    workspace.save_dataset(name, dataset)
    txt = 'The sub-dataset by the rows is saved as ' + name
    print(txt)
    al.voice(txt)
Exemple #5
0
def ask_for_dataset_extension(files):
    """
    Ask for the dataset extension file.
    :param files: All files with the same name.
    :return: The extension file.
    """
    print('What is the file extension (txt, csv)?')
    al.voice('What is the file extension (txt, csv)?')
    print('All the files that has been found with the same name: ' +
          str(files))
    return al.query_input()
Exemple #6
0
def ask_for_dataset_path():
    """
    Ask for the dataset path.
    :return: The path introduced.
    """
    print('Where is it located?')
    al.voice('Where is it located?')
    print('Current path is ' + os.getcwd())
    query = al.query_input()
    if query == 'here':
        path = os.getcwd()
    else:
        path = query
    return path
Exemple #7
0
def do_normalization(parameters):
    """
    Do an operation of normalization.
    :param parameters: The parameters of the function (name of the operation, dataset, ...).
    """
    op = parameters.pop("operation")
    workspace = al.Workspace()
    data_name = parameters["Dataset"]
    dataset = workspace.get_dataset(data_name)
    name = ''
    if not re.search("_in_place$", op):
        if op == 'decimal_scaling_norm':
            norm = al.decimal_scaling_norm(dataset)
            num_norm = str(workspace.get_counter('norm'))
            name = 'dec_sca_norm' + num_norm
            workspace.save_dataset(name, norm)

        elif op == 'max_min_norm':
            norm = al.max_min_norm(dataset, parameters)
            num_norm = str(workspace.get_counter('norm'))
            name = 'max_min_norm' + num_norm
            workspace.save_dataset(name, norm)

        elif op == 'mean_norm':
            norm = al.mean_norm(dataset)
            num_norm = str(workspace.get_counter('norm'))
            name = 'mean_norm' + num_norm
            workspace.save_dataset(name, norm)

        elif op == 'znorm':
            norm = al.znorm(dataset, parameters)
            num_norm = str(workspace.get_counter('norm'))
            name = 'znorm' + num_norm
            workspace.save_dataset(name, norm)

        print('The normalization is stored as ' + name)
        al.voice('The normalization is stored as ' + name)
    else:
        if op == 'decimal_scaling_norm_in_place':
            pass

        elif op == 'max_min_norm_in_place':
            pass

        elif op == 'mean_norm_in_place':
            pass

        elif op == 'znorm_in_place':
            pass
Exemple #8
0
def do_features(parameters):
    """
    Execute the feature operations.
    :param parameters: The parameters for this function (name_dataset).
    """
    workspace = al.Workspace()
    data_name = parameters['Dataset']
    dataset = workspace.get_dataset(data_name)
    features = al.features(dataset)
    num_norm = str(workspace.get_counter('feat'))
    name = data_name + 'Features' + num_norm
    workspace.save_dataset(name, features)

    print('The features are stored as ' + name)
    al.voice('The features are stored as ' + name)
def get_int_number(parameters):
    """
    Obtains or asks for a integer number.
    :param parameters: the parameters of the function(number,...).
    :return: An integer number.
    """
    if parameters["number"]:
        num_points = int(parameters["number"])
    else:
        print('How many point do you want to reduce it?')
        al.voice('How many point do you want to reduce it?')
        query = al.query_input()
        while not query.isnumeric():
            print('Incorrect input.\nIt is not a number.\nPlease introduce one.')
            al.voice('Incorrect input.\nIt is not a number.\nPlease introduce one.')
            query = al.query_input()
        num_points = int(query)
    return num_points
def get_epsilon(parameters):
    """
    Obtains or asks for the epsilon.
    :param parameters: the parameters of the function(number,...).
    :return: The epsilon value.
    """
    # Although we are asking for the epsilon, the value it is saved in the key number
    if parameters["number"]:
        num_points = parameters["number"]
    else:
        print('What is the value of epsilon?')
        al.voice('What is the value of epsilon?')
        query = al.query_input()
        while not query.isnumeric():
            print('Incorrect input.\nIt is not a number.\nPlease introduce one.')
            al.voice('Incorrect input.\nIt is not a number.\nPlease introduce one.')
            query = al.query_input()
        num_points = float(query)
    return num_points
Exemple #11
0
def exiting_yes(response):
    """
    Exit the program saving the workspace.
    :param response: The response from Dialogflow.
    """
    print('Saving the workspace')
    al.voice('Saving the workspace')
    print('Saved workspace')
    al.voice('Saved workspace')
    print('DEBUG: Fulfillment text: {}'.format(response))
    al.voice(response)
    print('Closing program')
    al.voice('Closing program')
    exit()
Exemple #12
0
def exiting_no(response):
    """
    Exit the program deleting the workspace.
    :param response: The response from Dialogflow.
    """
    print('Deleting workspace')
    al.voice('Deleting workspace')
    al.Workspace().clean_workspace()
    print('Deleted workspace')
    al.voice('Deleted workspace')
    print('DEBUG: Fulfillment text: {}'.format(response))
    al.voice(response)
    print('Closing program')
    al.voice('Closing program')
    exit()
Exemple #13
0
def get_subdataset_rows(parameters):
    """
    Obtains a subset of the dataset by its rows.
    :param parameters: The parameter of the function(dataset name,...).
    """
    workspace = al.Workspace()
    data_name = parameters['Dataset']
    dataset = workspace.get_dataset(data_name)

    if parameters["from"]:
        index_a = int(parameters["from"])
    else:
        print('From what row number?')
        al.voice('From what row number?')
        query = al.query_input()
        while not al.isnumber(query):
            print(
                'Incorrect input.\nIt is not a number.\nPlease introduce one:')
            al.voice(
                'Incorrect input.\nIt is not a number.\nPlease introduce one.')
            query = al.query_input()
        index_a = int(query)
    if parameters["to"]:
        index_b = int(parameters['to'])
    else:
        print('To what row number?')
        al.voice('To what row number?')
        query = al.query_input()
        while not al.isnumber(query):
            print(
                'Incorrect input.\nIt is not a number.\nPlease introduce one:')
            al.voice(
                'Incorrect input.\nIt is not a number.\nPlease introduce one.')
            query = al.query_input()
        index_b = int(query)

    if index_b < index_a:
        print(
            'This operation cannot be done.\nThe starting row number is greater than the last row number.'
        )
        raise Exception()

    dataset = dataset.iloc[index_a:index_b]
    num = workspace.get_counter('sub')
    name = 'subrow' + str(num) + data_name
    workspace.save_dataset(name, dataset)
    txt = 'The sub-dataset by the rows is saved as ' + name
    print(txt)
Exemple #14
0
def do_matrix(parameters):
    """
    Do a operation of matrix.
    :param parameters: The parameters of the function (name of the operation, number of clusters, ...).
    """
    op = parameters.pop("operation")
    workspace = al.Workspace()

    if op == "stomp" and not parameters.get('Dataset2'):
        op = 'stomp_self_join'

    if op == "stomp":
        data_name = parameters["Dataset"]
        data_name2 = parameters["Dataset2"]
        dataset1 = workspace.get_dataset(data_name)
        dataset2 = workspace.get_dataset(data_name2)

        if dataset2 is None:
            if not data_name2 == "":
                print("The object " + data_name2 + " does not exist.")
                al.voice("The object " + data_name2 + " does not exist.")
            print("Please, provide the two datasets that should be stomped.")
            al.voice("Please, provide the two datasets that should be stomped.")
            return

        col = ''
        if dataset1.columns.size > 1:
            col = al.obtain_column(dataset1)
            dataset1 = dataset1[col]
        if dataset2.columns.size > 1:
            dataset2 = dataset2[al.obtain_column(dataset2)]

        (stomp, m) = al.stomp(dataset1.values, dataset2.values, parameters)
        number = workspace.get_counter('matrix_stomp')
        workspace.save_dataset('stomp' + str(number), (stomp.to_json(), m, col, dataset1.to_json()))
        print("The stomp is stored as stomp" + str(number))

    elif op == "stomp_self_join":
        data_name = parameters["Dataset"]
        dataset1 = workspace.get_dataset(data_name)

        col = ''
        if dataset1.columns.size > 1:
            col = al.obtain_column(dataset1)
            dataset1 = dataset1[col]

        (stomp, m) = al.stomp_self_join(dataset1.values, parameters)
        number = workspace.get_counter('matrix_stomp')
        workspace.save_dataset('stomp' + str(number), (stomp.to_json(), m, col, dataset1.to_json()))
        print("The stomp is stored as stomp" + str(number))

    elif op == "find_best_n_discords":
        stomp_name = parameters['Dataset']
        stomp = workspace.get_dataset(stomp_name)
        m, col, dataset = workspace.get_value(stomp_name)[1:]

        discords = al.find_best_n_discords(stomp, m, parameters, col, pd.read_json(dataset).sort_index())
        number = workspace.get_counter('matrix_best_d')
        workspace.save_dataset('discords' + str(number), discords)
        print('The best ' + str(int(parameters['n'])) + ' discord segments are stored as discords' + str(number))

    elif op == "find_best_n_motifs":
        stomp_name = parameters['Dataset']
        stomp = workspace.get_dataset(stomp_name)
        m, col, dataset = workspace.get_value(stomp_name)[1:]

        motifs = al.find_best_n_motifs(stomp, m, parameters, col, pd.read_json(dataset).sort_index())
        number = workspace.get_counter('matrix_best_m')
        workspace.save_dataset('motifs' + str(number), motifs)
        print('The best ' + str(int(parameters['n'])) + ' motifs segments are stored as motifs' + str(number))
Exemple #15
0
def detect_intent_text(project_id, session_id, text, language_code):
    """
    Detects the intent of the text and execute some instruction

    Using the same `session_id` between requests allows continuation of the conversation.

    :param project_id: ID of the project
    :param session_id: ID of the session
    :param text: The text input for analyse
    :param language_code: Code of the language
    """

    session_client = dialogflow.SessionsClient()

    session = session_client.session_path(project_id, session_id)
    print('Session path: {}\n'.format(session))

    text_input = dialogflow.types.TextInput(text=text, language_code=language_code)

    query_input = dialogflow.types.QueryInput(text=text_input)

    response = session_client.detect_intent(session=session, query_input=query_input)

    """Conversion of Protocol Buffer to JSON"""
    response_json = pbjson.MessageToJson(response)
    data = json.loads(response_json)
    parameters = data['queryResult']['parameters']
    print(parameters)

    print('=' * 20)
    print('DEBUG: Query text: {}'.format(response.query_result.query_text))
    print('DEBUG: Detected intent: {} (confidence: {})\n'.format(
        response.query_result.intent.display_name,
        response.query_result.intent_detection_confidence))
    try:
        if response.query_result.intent.display_name == 'RandomDataset':
            al.create_dataset(parameters)

        elif response.query_result.intent.display_name == 'LoadDataset':
            al.load_dataset(parameters)

        elif response.query_result.intent.display_name == 'ShowWorkspace':
            workspace = al.Workspace()
            print(list(workspace.get_all_dataset()))
        
        elif response.query_result.intent.display_name == 'GetBackend':
            al.get_library_backend(parameters['library'])

        elif response.query_result.intent.display_name == 'SetBackend':
            al.set_library_backend(parameters)

        elif response.query_result.intent.display_name == 'Exit - yes':
            al.exiting_yes(response.query_result.fulfillment_text)

        elif response.query_result.intent.display_name == 'Exit - no':
            al.exiting_no(response.query_result.fulfillment_text)

        elif not re.search("^Default|Exit", response.query_result.intent.display_name):

            if not parameters.get("Dataset"):
                parameters['Dataset'] = 'current'

            if al.check_dataset(parameters):

                if response.query_result.intent.display_name == 'ChangeName':
                    al.change_name(parameters)

                elif response.query_result.intent.display_name == 'ShowResult':
                    al.execute_plot(parameters)

                elif response.query_result.intent.display_name == 'PrintResult':
                    al.execute_print(parameters)

                elif response.query_result.intent.display_name == 'SubDatasetRow':
                    al.get_subdataset_rows(parameters)

                elif response.query_result.intent.display_name == 'SubDatasetCols':
                    al.get_subdataset_columns(parameters)

                elif response.query_result.intent.display_name == 'JoinByCols':
                    al.join_by_cols(parameters)

                elif response.query_result.intent.display_name == 'JoinByRows':
                    al.join_by_rows(parameters)

                elif response.query_result.intent.display_name == 'SplitByCols':
                    al.split_by_cols(parameters)

                elif response.query_result.intent.display_name == 'SplitByRows':
                    al.split_by_rows(parameters)

                elif response.query_result.intent.display_name == 'DoDimensionality':
                    al.do_dimensionality(parameters)

                elif response.query_result.intent.display_name == 'DoClustering':
                    al.do_clustering(parameters)

                elif response.query_result.intent.display_name == 'DoMatrix_Stomp':
                    al.do_matrix(parameters)

                elif response.query_result.intent.display_name == 'DoMatrix_Best':
                    al.do_matrix(parameters)

                elif response.query_result.intent.display_name == 'DoNormalization':
                    al.do_normalization(parameters)

                elif response.query_result.intent.display_name == 'DoFeatures':
                    al.do_features(parameters)

            else:
                if parameters["Dataset"] != 'current':
                    print("The object " + parameters["Dataset"] + " does not exist.")
                    al.voice("The object " + parameters["Dataset"] + " does not exist.")
                else:
                    print("There is no loaded dataset.")
                    al.voice("There is no loaded dataset.")
                print("Please, load a dataset or use a previously stored one before using any function.")
                al.voice("Please, load a dataset or use a previously stored one before using any function.")
                return

        print('DEBUG: Fulfillment text: {}\n'.format(response.query_result.fulfillment_text))
        if response.query_result.fulfillment_text:
            al.voice(response.query_result.fulfillment_text)
    except Exception as e:
        print('An error in the execution has been raised.')
        print(e)
        return
Exemple #16
0
def rand_param(parameters):
    """
    Obtains the parameters for the random dataset generator.
    :param parameters: The parameters for the creation (number of rows, numbers of columns,...).
    :return: A tuple of the parameters.
    """
    num_rows, num_col, values = 0, 0, []
    if parameters["columns"]:
        num_col = int(parameters["columns"])
    else:
        print('How many columns?')
        al.voice('How many columns?')
        query = al.query_input()
        while not query.isnumeric():
            print(
                'Incorrect input.\nIt is not a number.\nPlease introduce one.')
            al.voice(
                'Incorrect input.\nIt is not a number.\nPlease introduce one.')
            query = al.query_input()
        num_col = int(query)

    if parameters["rows"]:
        num_rows = int(parameters["rows"])
    else:
        print('How many rows?')
        al.voice('How many rows?')
        query = al.query_input()
        while not query.isnumeric():
            print(
                'Incorrect input.\nIt is not a number.\nPlease introduce one.')
            al.voice(
                'Incorrect input.\nIt is not a number.\nPlease introduce one.')
            query = al.query_input()
        num_rows = int(query)

    if parameters["values"]:
        values = parameters["values"]
    else:
        print('What is the minimum value?')
        al.voice('What is the minimum value?')
        query = al.query_input()
        while not al.isnumber(query):
            print(
                'Incorrect input.\nIt is not a number.\nPlease introduce one:')
            al.voice(
                'Incorrect input.\nIt is not a number.\nPlease introduce one.')
            query = al.query_input()
        values.append(float(query))
        print('And the maximum?')
        al.voice('And the maximum?')
        query = al.query_input()
        while not al.isnumber(query):
            print(
                'Incorrect input.\nIt is not a number.\nPlease introduce one:')
            al.voice(
                'Incorrect input.\nIt is not a number.\nPlease introduce one.')
            query = al.query_input()
        values.append(float(query))

    return num_rows, num_col, values