Ejemplo n.º 1
0
    def test_flatten_columns_with_strings_and_vectors_with_default_delimiter(self):
        data = [[1,"1,2",[1,2],"a,b"],[2,"3,4",[3,4],"c,d"],[3,"5,6",[5,6],"e,f"],[4,"7,8",[7,8],"g,h"]]
        schema = [('a', ta.int32),('b', str), ('c', ta.vector(2)), ('d', str)]
        test_frame = ta.Frame(ta.UploadRows(data,schema))

        # there are only 2 string columns.  giving 3 delimiters should give an exception.
        with self.assertRaises(Exception):
            test_frame.flatten_columns(['b', 'c', 'd'], [',',',',','])

        test_frame.flatten_columns(['b', 'c', 'd'])

        # expected data after flattening
        expected_data = [
            [1,"1",1.0,"a"],
            [1,"2",2.0,"b"],
            [2,"3",3.0,"c"],
            [2,"4",4.0,"d"],
            [3,"5",5.0,"e"],
            [3,"6",6.0,"f"],
            [4,"7",7.0,"g"],
            [4,"8",8.0,"h"]
        ]

        self.assertEqual(test_frame.row_count, 8)
        self.assertEqual(test_frame.take(test_frame.row_count), expected_data)
Ejemplo n.º 2
0
    def test_adf_column_types(self):
        """
        Tests the Augmented Dickey-Fuller test with different column types
        """
        data = [[1, "a", 1.5], [2, "b", 18.5], [4, "c", 22.1], [5, "d", 19.0],
                [7, "e", 25.6], [8, "f", 36.75]]
        schema = [("int_column", ta.int32), ("str_column", str),
                  ("float_column", ta.float32)]
        frame = ta.Frame(ta.UploadRows(data, schema))

        try:
            # string column should have an error
            frame.timeseries_augmented_dickey_fuller_test("str_column", 0)
            raise RuntimeError(
                "Expected error since the str_column is not numerical.")
        except Exception as e:
            assert ("Column str_column was not numerical" in e.message)

        # Numerical columns should not have an error
        self.assertNotEqual(
            frame.timeseries_augmented_dickey_fuller_test("int_column", 0),
            None)
        self.assertNotEqual(
            frame.timeseries_augmented_dickey_fuller_test("float_column", 0),
            None)
Ejemplo n.º 3
0
    def test_flatten_column_with_differing_size_vectors(self):
        data = [[1,[1,2,3],[8,7]],[2,[4,5,6],[6,5]],[3,[7,8,9],[4,3]],[4,[10,11,12],[2,1]]]
        schema = [('a', ta.int32), ('b', ta.vector(3)), ('c', ta.vector(2))]
        test_frame = ta.Frame(ta.UploadRows(data,schema))

        test_frame.flatten_columns(['b','c'])

        # expected data after flattening
        expected_data = [
            [1,1.0,8.0],
            [1,2.0,7.0],
            [1,3.0,0.0],
            [2,4.0,6.0],
            [2,5.0,5.0],
            [2,6.0,0.0],
            [3,7.0,4.0],
            [3,8.0,3.0],
            [3,9.0,0.0],
            [4,10.0,2.0],
            [4,11.0,1.0],
            [4,12.0,0.0]
        ]

        self.assertEqual(test_frame.row_count, 12)
        self.assertEqual(test_frame.take(test_frame.row_count), expected_data)
Ejemplo n.º 4
0
    def test_bpt_invalid_column(self):
        """
        Tests the Breusch-Pagan test with non-numerical data, and expects an error
        """
        data = [[1, "a", 1.5], [2, "b", 18.5], [4, "c", 22.1], [5, "d", 19.0],
                [7, "e", 25.6], [8, "f", 36.75]]
        schema = [("int_column", ta.int32), ("str_column", str),
                  ("float_column", ta.float32)]
        frame = ta.Frame(ta.UploadRows(data, schema))

        try:
            frame.timeseries_breusch_pagan_test("str_column",
                                                ["int_column", "float_column"])
            raise RuntimeError(
                "Expected error since the y column specified has strings")
        except Exception as e:
            assert ("Column str_column was not numerical" in e.message)

        try:
            frame.timeseries_breusch_pagan_test("float_column",
                                                ["int_column", "str_column"])
            raise RuntimeError(
                "Expected error since one of the x columns specified has strings."
            )
        except Exception as e:
            assert ("Column str_column was not numerical" in e.message)

        # numerical data should not have an error
        self.assertNotEqual(
            frame.timeseries_breusch_pagan_test("float_column",
                                                ["int_column"]), None)
Ejemplo n.º 5
0
 def test_frame_upload_raw_list_data(self):
     """does round trip with list data --> upload to frame --> 'take' back to list and compare"""
     data = [[1, 'one', [1.0, 1.1]], [2, 'two', [2.0, 2.2]],
             [3, 'three', [3.0, 3.3]]]
     schema = [('n', int), ('s', str), ('v', ta.vector(2))]
     frame = ta.Frame(ta.UploadRows(data, schema))
     taken = frame.take(5)
     self.assertEqual(len(data), len(taken))
     for r, row in enumerate(taken):
         self.assertEqual(len(data[r]), len(row))
         for c, column in enumerate(row):
             self.assertEqual(data[r][c], column)
Ejemplo n.º 6
0
    def test_kmeans_train_publish(self):

        frame = ta.Frame(
            ta.UploadRows(
                [[2, "ab"], [1, "cd"], [7, "ef"], [1, "gh"], [9, "ij"],
                 [2, "kl"], [0, "mn"], [6, "op"], [5, "qr"]],
                [("data", ta.float64), ("name", str)]))
        model = ta.KMeansModel()
        train_output = model.train(frame, ["data"], [1], 3)
        self.assertTrue(
            train_output.has_key('within_set_sum_of_squared_error'))
        model.publish()
Ejemplo n.º 7
0
    def test_flatten_columns_with_single_vector(self):
        data = [[1, [1, 2]], [2, [3, 4]], [3, [5, 6]], [4, [7, 8]]]
        schema = [('a', ta.int32), ('b', ta.vector(2))]
        test_frame = ta.Frame(ta.UploadRows(data, schema))

        test_frame.flatten_columns('b')

        # expected data after flattening
        expected_data = [[1, 1.0], [1, 2.0], [2, 3.0], [2, 4.0], [3, 5.0],
                         [3, 6.0], [4, 7.0], [4, 8.0]]

        self.assertEqual(test_frame.row_count, 8)
        self.assertEqual(test_frame.take(test_frame.row_count), expected_data)
Ejemplo n.º 8
0
    def test_missing_values_drop_rows(self):
        # Create frame with missing values using upload rows
        schema = [('a', ta.int32)]
        data = [[1], [4], [None], [None], [10], [None]]
        frame = ta.Frame(ta.UploadRows(data, schema))

        # Check that frame was correctly created
        self.assertEqual(6, frame.row_count)
        self.assertEqual(data, frame.take(frame.row_count))

        # Check that we can drop rows with missing values
        frame.drop_rows(lambda row: row['a'] == None)
        expected = [[1], [4], [10]]
        self.assertEqual(expected, frame.take(frame.row_count, columns='a'))
Ejemplo n.º 9
0
    def test_flatten_columns_with_multiple_vectors(self):
        data = [[1, [1, 2], [8, 7]], [2, [3, 4], [6, 5]], [3, [5, 6], [4, 3]],
                [4, [7, 8], [2, 1]]]
        schema = [('a', ta.int32), ('b', ta.vector(2)), ('c', ta.vector(2))]
        test_frame = ta.Frame(ta.UploadRows(data, schema))

        test_frame.flatten_columns(['b', 'c'])

        # expected data after flattening
        expected_data = [[1, 1.0, 8.0], [1, 2.0, 7.0], [2, 3.0, 6.0],
                         [2, 4.0, 5.0], [3, 5.0, 4.0], [3, 6.0, 3.0],
                         [4, 7.0, 2.0], [4, 8.0, 1.0]]

        self.assertEqual(test_frame.row_count, 8)
        self.assertEqual(test_frame.take(test_frame.row_count), expected_data)
Ejemplo n.º 10
0
def upload_rows(frame_name, names, rows):
    if mode is None or mode == 'local':
        print('Warning: Not connected to ATK')
        return
    rs = [None] * len(rows) * 100
    k = 0
    l = 0
    for r in rows:
        for j in range(len(r)):
            rs[l] = [[r[j]['DateTime'], names[k], r[j]['Value']]]
        l = l + 1
        k = k + 1

    rs1 = [r1 for r1 in rs if r1 != None]
    f = get_frame(frame_name)
    f.append(tap.UploadRows(rs, schema))
Ejemplo n.º 11
0
    def test_flatten_columns_with_strings_and_vectors_with_one_delimiter(self):
        data = [[1, "1:2", [1, 2], "a:b"], [2, "3:4", [3, 4], "c:d"],
                [3, "5:6", [5, 6], "e:f"], [4, "7:8", [7, 8], "g:h"]]
        schema = [('a', ta.int32), ('b', str), ('c', ta.vector(2)), ('d', str)]
        test_frame = ta.Frame(ta.UploadRows(data, schema))

        test_frame.flatten_columns(['b', 'c', 'd'], ':')

        # expected data after flattening
        expected_data = [[1, "1", 1.0, "a"], [1, "2", 2.0, "b"],
                         [2, "3", 3.0, "c"], [2, "4", 4.0, "d"],
                         [3, "5", 5.0, "e"], [3, "6", 6.0, "f"],
                         [4, "7", 7.0, "g"], [4, "8", 8.0, "h"]]

        self.assertEqual(test_frame.row_count, 8)
        self.assertEqual(test_frame.take(test_frame.row_count), expected_data)
Ejemplo n.º 12
0
    def test_flatten_columns_with_mismatch_delimiter_count(self):
        # we need a frame with more than three columns for this test
        data = [[1,"solo,mono,single","a,b,c","1+2+3"],[2,"duo,double","d,e","4+5"]]
        schema = [('a',ta.int32), ('b', str), ('c', str), ('d', str)]
        test_frame = ta.Frame(ta.UploadRows(data,schema))

        # when providing more than one delimiter, count must match column count
        # too few delimiters should throw an exception
        with self.assertRaises(Exception):
            test_frame.flatten_columns(['b','c','d'],[',',','])

        # too many delimiters should also throw an exception
        with self.assertRaises(Exception):
            test_frame.flatten_columns(['b','c','d'],[',',',','+','|'])

        # giving just one delimiter means that the same delimiter is used for all columns
        test_frame.flatten_columns(['b','c'], ',')
        self.assertEqual(test_frame.row_count, 5)
Ejemplo n.º 13
0
def get_frame(name):
    global frame

    if mode is None or mode == 'local':
        print('Warning: Not connected to ATK')
        return

    if not frame is None:
        return frame

    frames = tap.get_frame_names()

    if name in frames:
        return tap.get_frame(name)

    frame = tap.Frame(tap.UploadRows([], schema))
    frame.name = name

    return frame
Ejemplo n.º 14
0
    def test_adf_params(self):
        """
        Test the Augmented Dickey-Fuller test with invalid parameters
        """
        data = [[12.88969427], [13.54964408], [13.8432745], [12.13843611],
                [12.81156092], [14.2499628], [15.12102595]]
        frame = ta.Frame(ta.UploadRows(data, [("data", ta.float32)]))

        # Test calling ADF test with and without regression parameter
        self.assertNotEqual(
            frame.timeseries_augmented_dickey_fuller_test("data", 0), None)
        self.assertNotEqual(
            frame.timeseries_augmented_dickey_fuller_test("data", 0, "c"),
            None)

        try:
            frame.timeseries_augmented_dickey_fuller_test("data", 0, "bogus")
        except Exception as e:
            assert ("bogus is not c, ct, or ctt" in e.message)
Ejemplo n.º 15
0
    def test_dwtest_column_types(self):
        """
        Tests that the Durbin-Watson test only works with numerical columns
        """
        data = [[1, "a", 1.5], [2, "b", 18.5], [4, "c", 22.1], [5, "d", 19.0],
                [7, "e", 25.6], [8, "f", 36.75]]
        schema = [("int_column", ta.int32), ("str_column", str),
                  ("float_column", ta.float32)]
        frame = ta.Frame(ta.UploadRows(data, schema))

        try:
            # calling durbin-watson with a string column should fail
            frame.timeseries_durbin_watson_test("str_column")
            raise RuntimeError(
                "Expected error since the column must be numerical")
        except Exception as e:
            assert ("Column str_column was not numerical" in e.message)

        # int and float columns should not give any error
        self.assertNotEqual(frame.timeseries_durbin_watson_test("int_column"),
                            None)
        self.assertNotEqual(
            frame.timeseries_durbin_watson_test("float_column"), None)
Ejemplo n.º 16
0
    def test_missing_values_add_column(self):
        # Create frame with missing values using upload rows
        schema = [('a', ta.int32)]
        data = [[1], [4], [None], [None], [10], [None]]
        frame = ta.Frame(ta.UploadRows(data, schema))

        # Check that frame was correctly created
        self.assertEqual(6, frame.row_count)
        self.assertEqual(data, frame.take(frame.row_count))

        # Define function that replaces missing values with zero
        def noneToZero(x):
            if x is None:
                return 0
            else:
                return x

        # Use add columns to create a new column that replaces missing values with 0.
        frame.add_columns(lambda row: noneToZero(row['a']),
                          ('a_corrected', ta.int32),
                          columns_accessed='a')
        expected = [[1], [4], [0], [0], [10], [0]]
        self.assertEqual(expected,
                         frame.take(frame.row_count, columns='a_corrected'))