Ejemplo n.º 1
0
    def test_split_cols(self):
        order = "split the dataset random0 by 2 columns"

        data = response(self, order)

        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'SplitByCols')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.8)
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'random0')
        self.assertEqual(data['queryResult']['parameters']['split'], 2)

        al.create_dataset({'columns': 10, 'rows': 200, 'values': [0, 1]})

        al.split_by_cols(data['queryResult']['parameters'])

        datasets = re.findall('random0c[0-9]*',
                              str(al.Workspace().get_all_dataset()))
        num_datasets = len(datasets)
        self.assertEqual(num_datasets, 5)
        workspace = al.Workspace()
        for num in range(num_datasets):
            dataset = workspace.get_dataset(datasets[num])
            ncol = dataset.columns.size
            nrow = dataset.index.size
            self.assertEqual(ncol, 2)
            self.assertEqual(nrow, 200)
Ejemplo n.º 2
0
    def test_join_rows(self):
        order = "join by rows the datasets random0 and random1"

        data = response(self, order)

        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'JoinByRows')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.8)
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'random0')
        self.assertEqual(data['queryResult']['parameters']['Dataset2'],
                         'random1')

        al.create_dataset({'columns': 10, 'rows': 200, 'values': [0, 1]})
        al.create_dataset({'columns': 10, 'rows': 200, 'values': [0, 1]})

        al.join_by_rows(data['queryResult']['parameters'])

        dataset = al.Workspace().get_dataset('join0')

        ncol = dataset.columns.size
        nrow = dataset.index.size

        self.assertEqual(ncol, 10)
        self.assertEqual(nrow, 400)
Ejemplo n.º 3
0
    def test_subdataset_cols(self):
        order = "obtain a subset from random0 by columns at col0, col2, and col7"

        data = response(self, order)

        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'SubDatasetCols')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.8)
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'random0')
        self.assertEqual(data['queryResult']['parameters']['cols'],
                         ['col0', 'col2', 'col7'])

        al.create_dataset({'columns': 10, 'rows': 200, 'values': [0, 1]})

        al.get_subdataset_columns(data['queryResult']['parameters'])

        dataset = al.Workspace().get_dataset('subcol0random0')

        ncol = dataset.columns.size
        cols = dataset.columns.to_list()
        expected = ['col0', 'col2', 'col7']

        self.assertEqual(ncol, 3)
        for n in range(3):
            self.assertEqual(cols[n], expected[n])
Ejemplo n.º 4
0
    def test_find_best_n_motifs(self):
        order = "Execute stomp on energy and consumption with a subsequence length of 3"

        data = response(self, order)

        tt1 = pd.DataFrame([10, 10, 10, 10, 10, 10, 9, 10, 10, 10, 10, 10, 11, 10, 9])
        tt2 = pd.DataFrame([10, 11, 10, 9])
        self.workspace.save_dataset('energy', tt1)
        self.workspace.save_dataset('consumption', tt2)

        al.do_matrix(data['queryResult']['parameters'])

        order = "Find best 1 motif from stomp0"

        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'], 'DoMatrix_Best')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'], 0.8)
        self.assertEqual(data['queryResult']['parameters']['operation'], 'find_best_n_motifs')
        self.assertEqual(data['queryResult']['parameters']['n'], 1)

        al.do_matrix(data['queryResult']['parameters'])

        find_best_n_motifs_result = al.Workspace().get_dataset('motifs0')

        for i in range(3):
            self.assertEqual(find_best_n_motifs_result['col0'][i], 10)
Ejemplo n.º 5
0
def create_dataset(parameters):
    """
    Creates a random dataset and saves it.
    :param parameters: The parameters for the creation (number of rows, numbers of columns,...).
    """
    workspace = al.Workspace()

    num_rows, num_col, values = rand_param(parameters)

    print('Creating the random dataset')

    tt = pd.DataFrame(index=range(num_rows))
    for n in range(num_col):
        tt['col' + str(n)] = pd.DataFrame(np.random.uniform(values[0],
                                                            values[1],
                                                            size=num_rows),
                                          dtype='float32')

    rand = workspace.get_counter('rand')
    workspace.save_dataset('random' + str(rand), tt)
    workspace.save_dataset('current', tt)
    print(
        'Created and saved as random{} which has {} columns, {} rows and values '
        'between {} and {}'.format(str(rand), num_col, num_rows, values[0],
                                   values[1]))
Ejemplo n.º 6
0
    def test_find_best_n_discords(self):
        order = "Execute stomp on energy and consumption with a subsequence length of 3"

        data = response(self, order)

        tt1 = pd.DataFrame([11, 10, 11, 10, 11, 10, 11, 10, 11, 10, 11, 10, 11])
        tt2 = pd.DataFrame([9, 10.1, 10.2, 10.1, 10.2, 10.1, 10.2, 10.1, 10.2, 10.1, 10.2, 10.1, 9])
        self.workspace.save_dataset('energy', tt1)
        self.workspace.save_dataset('consumption', tt2)

        al.do_matrix(data['queryResult']['parameters'])

        order = "Find best 2 discords from stomp0"

        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'], 'DoMatrix_Best')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'], 0.8)
        self.assertEqual(data['queryResult']['parameters']['operation'], 'find_best_n_discords')
        self.assertEqual(data['queryResult']['parameters']['n'], 2)

        al.do_matrix(data['queryResult']['parameters'])

        find_best_n_discord_result = al.Workspace().get_dataset('discords0')

        expected_result = [11, 10, 11]

        for i in range(1):
            col = find_best_n_discord_result['col' + str(i)].tolist()
            for j in range(2):
                self.assertEqual(col[j], expected_result[j])
Ejemplo n.º 7
0
    def test_decimal_scaling_norm(self):
        order = 'Execute the decimal scaling normalization on timeserie'

        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'DoNormalization')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.9)
        self.assertEqual(data['queryResult']['parameters']['operation'],
                         'decimal_scaling_norm')
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'timeserie')

        tt = pd.DataFrame([[0, 1, -2, 3], [40, 50, 60, -70]])
        self.workspace.save_dataset('timeserie', tt)
        al.do_normalization(data['queryResult']['parameters'])

        max_min_norm_result = al.Workspace().get_dataset(
            "dec_sca_norm0").to_numpy().flatten()
        expected = [[0.0, 0.1, -0.2, 0.3], [0.4, 0.5, 0.6, -0.7]]

        for i in range(len(expected)):
            self.assertAlmostEqual(max_min_norm_result[i],
                                   expected[0][i],
                                   delta=self.DELTA)
            self.assertAlmostEqual(max_min_norm_result[i + 4],
                                   expected[1][i],
                                   delta=self.DELTA)
Ejemplo n.º 8
0
def do_dimensionality(parameters):
    """
    Do an operation of dimensionality.
    :param parameters: The parameters of the function (name of the operation, ...).
    """

    op = parameters.pop("operation")
    workspace = al.Workspace()
    data_name = parameters["Dataset"]
    dataset = workspace.get_dataset(data_name)

    if op == "paa":
        # Calling khiva
        data = al.paa(dataset, parameters)
        num = workspace.get_counter('redux')
        workspace.save_dataset("paa" + str(num), data)
        print('The reduction of points is stored as paa' + str(num))

    elif op == 'pip':
        data = al.pip(dataset, parameters)
        num = workspace.get_counter('redux')
        workspace.save_dataset("pip" + str(num), data)
        print('The reduction of points is stored as pip' + str(num))

    elif op == 'ramer_douglas_peucker':
        data = al.ramer_douglas_peucker(dataset, parameters)
        num = workspace.get_counter('redux')
        workspace.save_dataset("RDP" + str(num), data)
        print('The reduction of points is stored as RDP' + str(num))

    elif op == 'visvalingam':
        data = al.visvalingam(dataset, parameters)
        num = workspace.get_counter('redux')
        workspace.save_dataset("visvalingam" + str(num), data)
        print('The reduction of points is stored as visvalingam' + str(num))
Ejemplo n.º 9
0
def do_clustering(parameters):
    """
    Do an operation of clustering.
    :param parameters: The parameters of the function (name of the operation, number of clusters, ...).
    """
    op = parameters.pop("operation")
    workspace = al.Workspace()
    data_name = parameters["Dataset"]
    dataset = workspace.get_dataset(data_name)

    if op == "kmeans":
        (centroids, labels) = al.kmean(dataset, parameters)
        number = workspace.get_counter('clustering')
        workspace.save_dataset('centroids' + str(number), centroids)
        workspace.save_dataset('labels' + str(number), labels)

    elif op == "kshape":
        (centroids, labels) = al.kshape(dataset, parameters)
        number = workspace.get_counter('clustering')
        workspace.save_dataset('centroids' + str(number), centroids)
        workspace.save_dataset('labels' + str(number), labels)
    else:
        return

    print("The centroids are stored in centroids" + str(number))
    print("The labels are stored in labels" + str(number))
Ejemplo n.º 10
0
    def test_mean_norm(self):
        order = 'Execute the mean norm on timeserie'

        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'DoNormalization')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.9)
        self.assertEqual(data['queryResult']['parameters']['operation'],
                         'mean_norm')
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'timeserie')

        tt = pd.DataFrame([[0, 1, 2, 3], [4, 5, 6, 7]])
        self.workspace.save_dataset('timeserie', tt)
        al.do_normalization(data['queryResult']['parameters'])

        max_min_norm_result = al.Workspace().get_dataset(
            "mean_norm0").to_numpy().flatten()
        expected = [-0.5, -0.166666667, 0.166666667, 0.5]

        for i in range(len(expected)):
            self.assertAlmostEqual(max_min_norm_result[i],
                                   expected[i],
                                   delta=self.DELTA)
            self.assertAlmostEqual(max_min_norm_result[i + 4],
                                   expected[i],
                                   delta=self.DELTA)
Ejemplo n.º 11
0
def get_subdataset_columns(parameters):
    """
    Obtains a subset of the dataset by its columns.
    :param parameters: The parameter of the function(dataset name,...).
    """
    workspace = al.Workspace()
    data_name = parameters['Dataset']
    dataset = workspace.get_dataset(data_name)
    cols = []

    if parameters["cols"]:
        cols = parameters['cols']
    else:
        stop = False
        while not stop:
            cols.append(al.obtain_column(dataset))
            print('Do you want to continue? yes or no?')

            response = al.query_input()
            if response == 'no':
                stop = True

    dataset = dataset[cols]
    num = workspace.get_counter('sub')
    name = 'subcol' + str(num) + data_name
    workspace.save_dataset(name, dataset)
    txt = 'The sub-dataset by the rows is saved as ' + name
    print(txt)
Ejemplo n.º 12
0
def join_by_rows(parameters):
    """
    Join two dataset with the same number of columns.
    :param parameters: The parameters of the function (dataset names).
    """
    workspace = al.Workspace()
    name_data1 = parameters['Dataset']
    name_data2 = parameters['Dataset2']
    dataset1 = workspace.get_dataset(name_data1)
    dataset2 = workspace.get_dataset(name_data2)

    if dataset2 is None:
        if not name_data2 == "":
            print("The object " + name_data2 + " does not exist.")

        print("Please, provide the two datasets that should be joined.")

        return

    if dataset1.columns.size != dataset2.columns.size:
        print(
            'Not able to execute.\nThe datasets have different number of columns'
        )
        return

    dataset = pd.concat([dataset1, dataset2], ignore_index=True)
    num = workspace.get_counter('join')
    name = 'join' + str(num)
    workspace.save_dataset(name, dataset)
    print('The resulting dataset between ' + name_data1 + ' and ' +
          name_data2 + ' is saved as ' + name)
Ejemplo n.º 13
0
    def test_znorm(self):
        order = 'Execute znorm on timeserie'

        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'DoNormalization')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.9)
        self.assertEqual(data['queryResult']['parameters']['operation'],
                         'znorm')
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'timeserie')

        tt = pd.DataFrame([[0, 1, 2, 3], [4, 5, 6, 7]])
        self.workspace.save_dataset('timeserie', tt)
        al.do_normalization(data['queryResult']['parameters'])
        znorm_result = al.Workspace().get_dataset(
            "znorm0").to_numpy().flatten()

        expected = [
            -1.341640786499870, -0.447213595499958, 0.447213595499958,
            1.341640786499870
        ]
        for i in range(len(expected)):
            self.assertAlmostEqual(znorm_result[i],
                                   expected[i],
                                   delta=self.DELTA)
            self.assertAlmostEqual(znorm_result[i + 4],
                                   expected[i],
                                   delta=self.DELTA)
Ejemplo n.º 14
0
    def test_subdataset_rows(self):
        order = "obtain a subset by rows from random0 from 10 to 60"

        data = response(self, order)

        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'SubDatasetRow')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.8)
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'random0')
        self.assertEqual(data['queryResult']['parameters']['from'], 10)
        self.assertEqual(data['queryResult']['parameters']['to'], 60)

        al.create_dataset({'columns': 10, 'rows': 200, 'values': [0, 1]})

        al.get_subdataset_rows(data['queryResult']['parameters'])

        dataset = al.Workspace().get_dataset('subrow0random0')

        index = dataset.index
        nrow = dataset.index.size

        self.assertEqual(index.min(),
                         data['queryResult']['parameters']['from'])
        self.assertEqual(index.max(),
                         data['queryResult']['parameters']['to'] - 1)
        self.assertEqual(nrow, 50)
Ejemplo n.º 15
0
def split_by_rows(parameters):
    """
    Split a dataset into n datasets of m rows.
    :param parameters: The parameters of the function (dataset name, size of the split dataset for the rows).
    """
    workspace = al.Workspace()
    name_data = parameters['Dataset']
    dataset = workspace.get_dataset(name_data)

    if parameters['split']:
        div = int(parameters['split'])
    else:
        print('How many rows will each dataset have?')

        query = al.query_input()
        while not al.isnumber(query):
            print(
                'Incorrect input.\nIt is not a number.\nPlease introduce one:')

            query = al.query_input()
        div = int(query)

    it = 0
    names = []
    while it < dataset.index.size:
        div_dataset = dataset.iloc[it:it + div]
        num = workspace.get_counter('split')
        name = name_data + 'r' + str(num)
        names.append(name)
        workspace.save_dataset(name, div_dataset)
        it = it + div

    print('The splits of ' + name_data + ' are saved as: ' + str(names)[1:-1])
Ejemplo n.º 16
0
    def test_max_min_norm_with_param(self):
        order = 'Execute maximal minimal normalization on timeserie with max value of 2 and min value of 1'

        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'DoNormalization')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.9)
        self.assertEqual(data['queryResult']['parameters']['operation'],
                         'max_min_norm')
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'timeserie')
        self.assertEqual(data['queryResult']['parameters']['max'], 2)
        self.assertEqual(data['queryResult']['parameters']['min'], 1)

        tt = pd.DataFrame([[0, 1, 2, 3], [4, 5, 6, 7]])
        self.workspace.save_dataset('timeserie', tt)
        al.do_normalization(data['queryResult']['parameters'])

        max_min_norm_result = al.Workspace().get_dataset(
            "max_min_norm0").to_numpy().flatten()
        expected = [1.0, 1.3333333333333, 1.66666667, 2.0]

        for i in range(len(expected)):
            self.assertAlmostEqual(max_min_norm_result[i],
                                   expected[i],
                                   delta=self.DELTA)
            self.assertAlmostEqual(max_min_norm_result[i + 4],
                                   expected[i],
                                   delta=self.DELTA)
Ejemplo n.º 17
0
def execute_print(parameters):
    """
    Execute the print function.
    :param parameters: The parameter for the print (Dataset name, ...).
    """
    workspace = al.Workspace()
    data_name = parameters["Dataset"]
    dataset = workspace.get_dataset(data_name)
    print(dataset)
Ejemplo n.º 18
0
def execute_plot(parameters):
    """
    Execute the function plot.
    :param parameters: The parameters for the graphic (dataset name, intervals,...).
    """
    workspace = al.Workspace()
    data_name = parameters["Dataset"]
    dataset = workspace.get_dataset(data_name)
    plot_dataset(dataset, parameters)
Ejemplo n.º 19
0
    def setUp(self):
        set_backend(KHIVABackend.KHIVA_BACKEND_CPU)
        self.project_id = "aljuaritmo"
        self.language_code = "en"
        self.session_client = dialogflow.SessionsClient()
        self.session = self.session_client.session_path(self.project_id, self.session_id)

        self.workspace = al.Workspace()
        self.workspace.init_current()
Ejemplo n.º 20
0
    def test_shape_dataset(self):
        order = "Do the kshape of 3 clusters for energy"

        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'DoClustering')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.8)
        self.assertEqual(data['queryResult']['parameters']['operation'],
                         'kshape')
        self.assertEqual(data['queryResult']['parameters']['number'], 3)
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'energy')

        tts = pd.DataFrame([[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
                            [0.0, 10.0, 4.0, 5.0, 7.0, -3.0, 0.0],
                            [-1.0, 15.0, -12.0, 8.0, 9.0, 4.0, 5.0],
                            [2.0, 8.0, 7.0, -6.0, -1.0, 2.0, 9.0],
                            [-5.0, -5.0, -6.0, 7.0, 9.0, 9.0, 0.0]])

        self.workspace.save_dataset('energy', tts)

        expected_c = pd.DataFrame(
            [[-0.5234, 0.1560, -0.3627, -1.2764, -0.7781, 0.9135, 1.8711],
             [-0.7825, 1.5990, 0.1701, 0.4082, 0.8845, -1.4969, -0.7825],
             [-0.6278, 1.3812, -2.0090, 0.5022, 0.6278, 0.0000, 0.1256]])

        al.do_clustering(data['queryResult']['parameters'])

        (centroid, labels) = (al.Workspace().get_dataset('centroids0'),
                              al.Workspace().get_dataset('labels0'))

        for i in range(len(expected_c)):
            self.assertAlmostEqual(centroid[0][i],
                                   expected_c[0][i],
                                   delta=self.DELTA)
            self.assertAlmostEqual(centroid[1][i],
                                   expected_c[1][i],
                                   delta=self.DELTA)
            self.assertAlmostEqual(centroid[2][i],
                                   expected_c[2][i],
                                   delta=self.DELTA)
Ejemplo n.º 21
0
def main(*args, **kwargs):
    try:
        al.Workspace().init_current()
        print("Welcome, I'm Aljuarismo, what can I do for you?")
        while True:
            query = click.prompt('')
            click.echo('DEBUG: %s' % query)
            detect_intent_text(project_id, session_id, query, language_code)
    except click.exceptions.Abort:
        print('Closing the program')
        sys.exit()
Ejemplo n.º 22
0
    def setUp(self):

        self.project_id = "aljuaritmo"
        self.language_code = "en"
        self.session_client = dialogflow.SessionsClient()
        self.session = self.session_client.session_path(
            self.project_id, self.session_id)

        self.workspace = al.Workspace()
        self.workspace.save_dataset_path('titanic', 'datasets')
        self.workspace.init_current()
Ejemplo n.º 23
0
    def setUp(self):
        set_backend(KHIVABackend.KHIVA_BACKEND_CPU)
        os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/home/franco.gonzalez/Desktop/Credentials/" \
                                                       "Aljuaritmo-3ac32e58ff41.json"
        self.project_id = "aljuaritmo"
        self.language_code = "en"
        self.session_client = dialogflow.SessionsClient()
        self.session = self.session_client.session_path(
            self.project_id, self.session_id)

        self.workspace = al.Workspace()
        self.workspace.init_current()
Ejemplo n.º 24
0
    def test_change_name(self):
        order = "rename random0 to r_a_n_d_o_m_0"

        data = response(self, order)

        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'ChangeName')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.95)
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'random0')
        self.assertEqual(data['queryResult']['parameters']['NameInto'],
                         'r_a_n_d_o_m_0')

        al.create_dataset({'columns': 10, 'rows': 200, 'values': [0, 1]})

        self.assertTrue(al.Workspace().get_dataset('random0') is not None)
        self.assertTrue(al.Workspace().get_dataset('r_a_n_d_o_m_0') is None)
        data1 = al.Workspace().get_dataset('random0')

        al.change_name(data['queryResult']['parameters'])

        self.assertTrue(al.Workspace().get_dataset('random0') is None)
        self.assertTrue(
            al.Workspace().get_dataset('r_a_n_d_o_m_0') is not None)
        data2 = al.Workspace().get_dataset('r_a_n_d_o_m_0')

        self.assertTrue(pd.DataFrame.equals(data1, data2))
Ejemplo n.º 25
0
    def test_kmeans_dataset(self):
        order = "Do the kmeans of 3 clusters for energy"

        data = response(self, order)
        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'DoClustering')
        self.assertGreater(data['queryResult']['intentDetectionConfidence'],
                           0.8)
        self.assertEqual(data['queryResult']['parameters']['operation'],
                         'kmeans')
        self.assertEqual(data['queryResult']['parameters']['number'], 3)
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'energy')

        tts = pd.DataFrame([[0.0, 1.0, 2.0, 3.0], [6.0, 7.0, 8.0, 9.0],
                            [2.0, -2.0, 4.0, -4.0], [8.0, 5.0, 3.0, 1.0],
                            [15.0, 10.0, 5.0, 0.0], [7.0, -7.0, 1.0, -1.0]])

        self.workspace.save_dataset('energy', tts)

        expected_c = pd.DataFrame([[0.0, 0.1667, 0.3333, 0.5],
                                   [1.5, -1.5, 0.8333, -0.8333],
                                   [4.8333, 3.6667, 2.6667, 1.6667]])

        al.do_clustering(data['queryResult']['parameters'])

        (centroid, labels) = (al.Workspace().get_dataset('centroids0'),
                              al.Workspace().get_dataset('labels0'))

        for i in range(len(expected_c)):
            self.assertAlmostEqual(centroid[0][i],
                                   expected_c[0][i],
                                   delta=self.DELTA)
            self.assertAlmostEqual(centroid[1][i],
                                   expected_c[1][i],
                                   delta=self.DELTA)
            self.assertAlmostEqual(centroid[2][i],
                                   expected_c[2][i],
                                   delta=self.DELTA)
Ejemplo n.º 26
0
def do_features(parameters):
    """
    Execute the feature operations.
    :param parameters: The parameters for this function (name_dataset).
    """
    workspace = al.Workspace()
    data_name = parameters['Dataset']
    dataset = workspace.get_dataset(data_name)
    features = al.features(dataset)
    num_norm = str(workspace.get_counter('feat'))
    name = data_name + 'Features' + num_norm
    workspace.save_dataset(name, features)

    print('The features are stored as ' + name)
Ejemplo n.º 27
0
def do_normalization(parameters):
    """
    Do an operation of normalization.
    :param parameters: The parameters of the function (name of the operation, dataset, ...).
    """
    op = parameters.pop("operation")
    workspace = al.Workspace()
    data_name = parameters["Dataset"]
    dataset = workspace.get_dataset(data_name)
    name = ''
    if not re.search("_in_place$", op):
        if op == 'decimal_scaling_norm':
            norm = al.decimal_scaling_norm(dataset)
            num_norm = str(workspace.get_counter('norm'))
            name = 'dec_sca_norm' + num_norm
            workspace.save_dataset(name, norm)

        elif op == 'max_min_norm':
            norm = al.max_min_norm(dataset, parameters)
            num_norm = str(workspace.get_counter('norm'))
            name = 'max_min_norm' + num_norm
            workspace.save_dataset(name, norm)

        elif op == 'mean_norm':
            norm = al.mean_norm(dataset)
            num_norm = str(workspace.get_counter('norm'))
            name = 'mean_norm' + num_norm
            workspace.save_dataset(name, norm)

        elif op == 'znorm':
            norm = al.znorm(dataset, parameters)
            num_norm = str(workspace.get_counter('norm'))
            name = 'znorm' + num_norm
            workspace.save_dataset(name, norm)

        print('The normalization is stored as ' + name)
        al.voice('The normalization is stored as ' + name)
    else:
        if op == 'decimal_scaling_norm_in_place':
            pass

        elif op == 'max_min_norm_in_place':
            pass

        elif op == 'mean_norm_in_place':
            pass

        elif op == 'znorm_in_place':
            pass
Ejemplo n.º 28
0
def exiting_no(response):
    """
    Exit the program deleting the workspace.
    :param response: The response from Dialogflow.
    """
    print('Deleting workspace')

    al.Workspace().clean_workspace()
    print('Deleted workspace')

    print('DEBUG: Fulfillment text: {}'.format(response))

    print('Closing program')

    exit()
Ejemplo n.º 29
0
def get_subdataset_rows(parameters):
    """
    Obtains a subset of the dataset by its rows.
    :param parameters: The parameter of the function(dataset name,...).
    """
    workspace = al.Workspace()
    data_name = parameters['Dataset']
    dataset = workspace.get_dataset(data_name)

    if parameters["from"]:
        index_a = int(parameters["from"])
    else:
        print('From what row number?')
        al.voice('From what row number?')
        query = al.query_input()
        while not al.isnumber(query):
            print(
                'Incorrect input.\nIt is not a number.\nPlease introduce one:')
            al.voice(
                'Incorrect input.\nIt is not a number.\nPlease introduce one.')
            query = al.query_input()
        index_a = int(query)
    if parameters["to"]:
        index_b = int(parameters['to'])
    else:
        print('To what row number?')
        al.voice('To what row number?')
        query = al.query_input()
        while not al.isnumber(query):
            print(
                'Incorrect input.\nIt is not a number.\nPlease introduce one:')
            al.voice(
                'Incorrect input.\nIt is not a number.\nPlease introduce one.')
            query = al.query_input()
        index_b = int(query)

    if index_b < index_a:
        print(
            'This operation cannot be done.\nThe starting row number is greater than the last row number.'
        )
        raise Exception()

    dataset = dataset.iloc[index_a:index_b]
    num = workspace.get_counter('sub')
    name = 'subrow' + str(num) + data_name
    workspace.save_dataset(name, dataset)
    txt = 'The sub-dataset by the rows is saved as ' + name
    print(txt)
Ejemplo n.º 30
0
    def test_load_dataset(self):
        order = "load dataset titanic.csv"

        data = response(self, order)

        self.assertEqual(data['queryResult']['intent']['displayName'],
                         'LoadDataset')
        self.assertEqual(data['queryResult']['intentDetectionConfidence'], 1.0)
        self.assertEqual(data['queryResult']['parameters']['Dataset'],
                         'titanic')

        al.load_dataset(data['queryResult']['parameters'])
        self.workspace = al.Workspace()
        dataset = self.workspace.get_dataset('current')
        titanic = pd.read_csv("datasets/titanic.csv")
        self.assertEqual(dataset.to_json(), titanic.to_json())