def test_flatten_column_with_differing_size_vectors(self):
        data = [[1,[1,2,3],[8,7]],[2,[4,5,6],[6,5]],[3,[7,8,9],[4,3]],[4,[10,11,12],[2,1]]]
        schema = [('a', ta.int32), ('b', ta.vector(3)), ('c', ta.vector(2))]
        test_frame = ta.Frame(ta.UploadRows(data,schema))

        test_frame.flatten_columns(['b','c'])

        # expected data after flattening
        expected_data = [
            [1,1.0,8.0],
            [1,2.0,7.0],
            [1,3.0,0.0],
            [2,4.0,6.0],
            [2,5.0,5.0],
            [2,6.0,0.0],
            [3,7.0,4.0],
            [3,8.0,3.0],
            [3,9.0,0.0],
            [4,10.0,2.0],
            [4,11.0,1.0],
            [4,12.0,0.0]
        ]

        self.assertEqual(test_frame.row_count, 12)
        self.assertEqual(test_frame.take(test_frame.row_count), expected_data)
Exemple #2
0
    def test_flatten_column_with_differing_size_vectors(self):
        data = [[1,[1,2,3],[8,7]],[2,[4,5,6],[6,5]],[3,[7,8,9],[4,3]],[4,[10,11,12],[2,1]]]
        schema = [('a', ta.int32), ('b', ta.vector(3)), ('c', ta.vector(2))]
        test_frame = ta.Frame(ta.UploadRows(data,schema))

        test_frame.flatten_columns(['b','c'])

        # expected data after flattening
        expected_data = [
            [1,1.0,8.0],
            [1,2.0,7.0],
            [1,3.0,0.0],
            [2,4.0,6.0],
            [2,5.0,5.0],
            [2,6.0,0.0],
            [3,7.0,4.0],
            [3,8.0,3.0],
            [3,9.0,0.0],
            [4,10.0,2.0],
            [4,11.0,1.0],
            [4,12.0,0.0]
        ]

        self.assertEqual(test_frame.row_count, 12)
        self.assertEqual(test_frame.take(test_frame.row_count), expected_data)
Exemple #3
0
    def test_flatten_columns_with_multiple_vectors(self):
        data = [[1, [1, 2], [8, 7]], [2, [3, 4], [6, 5]], [3, [5, 6], [4, 3]],
                [4, [7, 8], [2, 1]]]
        schema = [('a', ta.int32), ('b', ta.vector(2)), ('c', ta.vector(2))]
        test_frame = ta.Frame(ta.UploadRows(data, schema))

        test_frame.flatten_columns(['b', 'c'])

        # expected data after flattening
        expected_data = [[1, 1.0, 8.0], [1, 2.0, 7.0], [2, 3.0, 6.0],
                         [2, 4.0, 5.0], [3, 5.0, 4.0], [3, 6.0, 3.0],
                         [4, 7.0, 2.0], [4, 8.0, 1.0]]

        self.assertEqual(test_frame.row_count, 8)
        self.assertEqual(test_frame.take(test_frame.row_count), expected_data)
Exemple #4
0
 def test_inspect_nones(self):
     schema = [('s', str), ('v', ta.vector(2))]
     rows = [['super', [1.0095, 2.034]],
             [None, None]]
     result = repr(ui.RowsInspection(rows, schema, offset=0, format_settings=ui.InspectSettings(wrap=2, round=2, truncate=4)))
     result = '\n'.join([line.rstrip() for line in result.splitlines()])
     print result
    def test_flatten_columns_with_strings_and_vectors_with_one_delimiter(self):
        data = [
            [1, "1:2", [1, 2], "a:b"],
            [2, "3:4", [3, 4], "c:d"],
            [3, "5:6", [5, 6], "e:f"],
            [4, "7:8", [7, 8], "g:h"],
        ]
        schema = [("a", ta.int32), ("b", str), ("c", ta.vector(2)), ("d", str)]
        test_frame = ta.Frame(ta.UploadRows(data, schema))

        test_frame.flatten_columns(["b", "c", "d"], ":")

        # expected data after flattening
        expected_data = [
            [1, "1", 1.0, "a"],
            [1, "2", 2.0, "b"],
            [2, "3", 3.0, "c"],
            [2, "4", 4.0, "d"],
            [3, "5", 5.0, "e"],
            [3, "6", 6.0, "f"],
            [4, "7", 7.0, "g"],
            [4, "8", 8.0, "h"],
        ]

        self.assertEqual(test_frame.row_count, 8)
        self.assertEqual(test_frame.take(test_frame.row_count), expected_data)
    def test_flatten_columns_with_strings_and_vectors_with_default_delimiter(self):
        data = [[1,"1,2",[1,2],"a,b"],[2,"3,4",[3,4],"c,d"],[3,"5,6",[5,6],"e,f"],[4,"7,8",[7,8],"g,h"]]
        schema = [('a', ta.int32),('b', str), ('c', ta.vector(2)), ('d', str)]
        test_frame = ta.Frame(ta.UploadRows(data,schema))

        # there are only 2 string columns.  giving 3 delimiters should give an exception.
        with self.assertRaises(Exception):
            test_frame.flatten_columns(['b', 'c', 'd'], [',',',',','])

        test_frame.flatten_columns(['b', 'c', 'd'])

        # expected data after flattening
        expected_data = [
            [1,"1",1.0,"a"],
            [1,"2",2.0,"b"],
            [2,"3",3.0,"c"],
            [2,"4",4.0,"d"],
            [3,"5",5.0,"e"],
            [3,"6",6.0,"f"],
            [4,"7",7.0,"g"],
            [4,"8",8.0,"h"]
        ]

        self.assertEqual(test_frame.row_count, 8)
        self.assertEqual(test_frame.take(test_frame.row_count), expected_data)
Exemple #7
0
    def test_flatten_columns_with_strings_and_vectors_with_default_delimiter(self):
        data = [[1,"1,2",[1,2],"a,b"],[2,"3,4",[3,4],"c,d"],[3,"5,6",[5,6],"e,f"],[4,"7,8",[7,8],"g,h"]]
        schema = [('a', ta.int32),('b', str), ('c', ta.vector(2)), ('d', str)]
        test_frame = ta.Frame(ta.UploadRows(data,schema))

        # there are only 2 string columns.  giving 3 delimiters should give an exception.
        with self.assertRaises(Exception):
            test_frame.flatten_columns(['b', 'c', 'd'], [',',',',','])

        test_frame.flatten_columns(['b', 'c', 'd'])

        # expected data after flattening
        expected_data = [
            [1,"1",1.0,"a"],
            [1,"2",2.0,"b"],
            [2,"3",3.0,"c"],
            [2,"4",4.0,"d"],
            [3,"5",5.0,"e"],
            [3,"6",6.0,"f"],
            [4,"7",7.0,"g"],
            [4,"8",8.0,"h"]
        ]

        self.assertEqual(test_frame.row_count, 8)
        self.assertEqual(test_frame.take(test_frame.row_count), expected_data)
Exemple #8
0
    def test_add_columns_and_copy_where(self):
        """
        Tests UDFs for add_columns and copy(where), and uses the vector type

        Changes the 2 population strings to a vector, and then uses the vector
        to compute the change, and then copy out all the incorrect ones
        """
        frame = ta.Frame(csv)
        self.assertEquals(frame.row_count, 20, "frame should have 20 rows")
        frame.add_columns(
            lambda row: [
                float(row['pop_2010'].translate({ord(','): None})),
                float(row['population_2013'].translate({ord(','): None}))
            ], ("vpops", ta.vector(2)))
        self.assertEquals(frame.row_count, 20, "frame should have 20 rows")
        self.assertEquals(frame.column_names, [
            'rank', 'city', 'population_2013', 'pop_2010', 'change', 'county',
            'vpops'
        ])
        frame.add_columns(
            lambda row: (row.vpops[1] - row.vpops[0]) / row.vpops[0],
            ("comp_change", ta.float64))
        #print frame.inspect(20)
        bad_cities = frame.copy(columns=['city', 'change', 'comp_change'],
                                where=lambda row: row.change != "%.2f%%" %
                                round(100 * row.comp_change, 2))
        self.assertEquals(bad_cities.column_names,
                          ['city', 'change', 'comp_change'])
        self.assertEquals(bad_cities.row_count, 1)
        #print bad_cities.inspect()
        row = bad_cities.take(1)[0]
        row[2] = round(row[2], 5)
        self.assertEquals(row, [u'Tualatin', u'4.17%', 0.03167
                                ])  # should just be one bad one, Tualatin
Exemple #9
0
    def test_inspect_round(self):
        schema = [('f32', ta.float32), ('f64', ta.float64),
                  ('v', ta.vector(2))]
        rows = [[0.1234, 9.87654321, [1.0095, 2.034]],
                [1234.5, 9876.54321, [99.999, 33.33]]]
        result = repr(
            ui.RowsInspection(rows,
                              schema,
                              offset=0,
                              format_settings=ui.InspectSettings(wrap=2,
                                                                 round=2)))
        result = '\n'.join([line.rstrip() for line in result.splitlines()])
        expected = '''[#]  f32      f64      v
======================================
[0]     0.12     9.88  [1.01, 2.03]
[1]  1234.50  9876.54  [100.00, 33.33]'''
        self.assertEqual(expected, result)

        result = repr(
            ui.RowsInspection(rows,
                              schema,
                              offset=0,
                              format_settings=ui.InspectSettings(
                                  wrap='stripes', round=3)))
        result = '\n'.join([line.rstrip() for line in result.splitlines()])
        expected = '''[0]-
f32=0.123
f64=9.877
v  =[1.010, 2.034]
[1]-
f32=1234.500
f64=9876.543
v  =[99.999, 33.330]'''
        self.assertEqual(expected, result)
Exemple #10
0
 def test_frame_upload_raw_list_data(self):
     """does round trip with list data --> upload to frame --> 'take' back to list and compare"""
     data = [[1, 'one', [1.0, 1.1]], [2, 'two', [2.0, 2.2]], [3, 'three', [3.0, 3.3]]]
     schema = [('n', int), ('s', str), ('v', ta.vector(2))]
     frame = ta.Frame(ta.UploadRows(data, schema))
     taken = frame.take(5)
     self.assertEqual(len(data),len(taken))
     for r, row in enumerate(taken):
         self.assertEqual(len(data[r]),len(row))
         for c, column in enumerate(row):
             self.assertEqual(data[r][c], column)
Exemple #11
0
 def test_inspect_nones(self):
     schema = [('s', str), ('v', ta.vector(2))]
     rows = [['super', [1.0095, 2.034]], [None, None]]
     result = repr(
         ui.RowsInspection(rows,
                           schema,
                           offset=0,
                           format_settings=ui.InspectSettings(wrap=2,
                                                              round=2,
                                                              truncate=4)))
     result = '\n'.join([line.rstrip() for line in result.splitlines()])
     print result
Exemple #12
0
 def test_frame_upload_raw_list_data(self):
     """does round trip with list data --> upload to frame --> 'take' back to list and compare"""
     data = [[1, 'one', [1.0, 1.1]], [2, 'two', [2.0, 2.2]],
             [3, 'three', [3.0, 3.3]]]
     schema = [('n', int), ('s', str), ('v', ta.vector(2))]
     frame = ta.Frame(ta.UploadRows(data, schema))
     taken = frame.take(5)
     self.assertEqual(len(data), len(taken))
     for r, row in enumerate(taken):
         self.assertEqual(len(data[r]), len(row))
         for c, column in enumerate(row):
             self.assertEqual(data[r][c], column)
    def test_flatten_columns_with_multiple_vectors(self):
        data = [[1,[1,2],[8,7]],[2,[3,4],[6,5]],[3,[5,6],[4,3]],[4,[7,8],[2,1]]]
        schema = [('a', ta.int32), ('b', ta.vector(2)), ('c', ta.vector(2))]
        test_frame = ta.Frame(ta.UploadRows(data,schema))

        test_frame.flatten_columns(['b','c'])

        # expected data after flattening
        expected_data = [
            [1,1.0,8.0],
            [1,2.0,7.0],
            [2,3.0,6.0],
            [2,4.0,5.0],
            [3,5.0,4.0],
            [3,6.0,3.0],
            [4,7.0,2.0],
            [4,8.0,1.0]
        ]

        self.assertEqual(test_frame.row_count, 8)
        self.assertEqual(test_frame.take(test_frame.row_count), expected_data)
    def test_flatten_columns_with_single_vector(self):
        data = [[1, [1, 2]], [2, [3, 4]], [3, [5, 6]], [4, [7, 8]]]
        schema = [("a", ta.int32), ("b", ta.vector(2))]
        test_frame = ta.Frame(ta.UploadRows(data, schema))

        test_frame.flatten_columns("b")

        # expected data after flattening
        expected_data = [[1, 1.0], [1, 2.0], [2, 3.0], [2, 4.0], [3, 5.0], [3, 6.0], [4, 7.0], [4, 8.0]]

        self.assertEqual(test_frame.row_count, 8)
        self.assertEqual(test_frame.take(test_frame.row_count), expected_data)
Exemple #15
0
    def test_flatten_columns_with_single_vector(self):
        data = [[1, [1, 2]], [2, [3, 4]], [3, [5, 6]], [4, [7, 8]]]
        schema = [('a', ta.int32), ('b', ta.vector(2))]
        test_frame = ta.Frame(ta.UploadRows(data, schema))

        test_frame.flatten_columns('b')

        # expected data after flattening
        expected_data = [[1, 1.0], [1, 2.0], [2, 3.0], [2, 4.0], [3, 5.0],
                         [3, 6.0], [4, 7.0], [4, 8.0]]

        self.assertEqual(test_frame.row_count, 8)
        self.assertEqual(test_frame.take(test_frame.row_count), expected_data)
Exemple #16
0
    def test_flatten_columns_with_strings_and_vectors_with_one_delimiter(self):
        data = [[1, "1:2", [1, 2], "a:b"], [2, "3:4", [3, 4], "c:d"],
                [3, "5:6", [5, 6], "e:f"], [4, "7:8", [7, 8], "g:h"]]
        schema = [('a', ta.int32), ('b', str), ('c', ta.vector(2)), ('d', str)]
        test_frame = ta.Frame(ta.UploadRows(data, schema))

        test_frame.flatten_columns(['b', 'c', 'd'], ':')

        # expected data after flattening
        expected_data = [[1, "1", 1.0, "a"], [1, "2", 2.0, "b"],
                         [2, "3", 3.0, "c"], [2, "4", 4.0, "d"],
                         [3, "5", 5.0, "e"], [3, "6", 6.0, "f"],
                         [4, "7", 7.0, "g"], [4, "8", 8.0, "h"]]

        self.assertEqual(test_frame.row_count, 8)
        self.assertEqual(test_frame.take(test_frame.row_count), expected_data)
Exemple #17
0
 def test_frame_upload_pandas(self):
     """does round trip pandas DF --> upload to frame --> download back to pandas and compare"""
     import pandas as pd
     from pandas.util.testing import assert_frame_equal
     import numpy as np
     data = [[1, 'one', [1.0, 1.1]], [2, 'two', [2.0, 2.2]], [3, 'three', [3.0, 3.3]]]
     schema = [('n', ta.int64), ('s', str), ('v', ta.vector(2))]  # 'n' is int64, pandas default
     source = dict(zip(zip(*schema)[0], zip(*data)))
     df0 = pd.DataFrame(source)
     self.assertEqual(np.int64, df0['n'].dtype)
     self.assertEqual(np.object, df0['s'].dtype)
     self.assertEqual(np.object, df0['v'].dtype)
     p = ta.Pandas(df0, schema)
     frame = ta.Frame(p)
     df1 = frame.download()
     # print repr(df0)
     # print repr(df1)
     assert_frame_equal(df0, df1)
Exemple #18
0
 def test_frame_upload_pandas(self):
     """does round trip pandas DF --> upload to frame --> download back to pandas and compare"""
     import pandas as pd
     from pandas.util.testing import assert_frame_equal
     import numpy as np
     data = [[1, 'one', [1.0, 1.1]], [2, 'two', [2.0, 2.2]],
             [3, 'three', [3.0, 3.3]]]
     schema = [('n', ta.int64), ('s', str),
               ('v', ta.vector(2))]  # 'n' is int64, pandas default
     source = dict(zip(zip(*schema)[0], zip(*data)))
     df0 = pd.DataFrame(source)
     self.assertEqual(np.int64, df0['n'].dtype)
     self.assertEqual(np.object, df0['s'].dtype)
     self.assertEqual(np.object, df0['v'].dtype)
     p = ta.Pandas(df0, schema)
     frame = ta.Frame(p)
     df1 = frame.download()
     # print repr(df0)
     # print repr(df1)
     assert_frame_equal(df0, df1)
    def test_flatten_columns_with_strings_and_vectors(self):
        data = [[1,"1:2",[1,2],"a|b"],[2,"3:4",[3,4],"c|d"],[3,"5:6",[5,6],"e|f"],[4,"7:8",[7,8],"g|h"]]
        schema = [('a', ta.int32),('b', str), ('c', ta.vector(2)), ('d', str)]
        test_frame = ta.Frame(ta.UploadRows(data,schema))

        test_frame.flatten_columns(['b', 'c', 'd'], [':','|'])

        # expected data after flattening
        expected_data = [
            [1,"1",1.0,"a"],
            [1,"2",2.0,"b"],
            [2,"3",3.0,"c"],
            [2,"4",4.0,"d"],
            [3,"5",5.0,"e"],
            [3,"6",6.0,"f"],
            [4,"7",7.0,"g"],
            [4,"8",8.0,"h"]
        ]

        self.assertEqual(test_frame.row_count, 8)
        self.assertEqual(test_frame.take(test_frame.row_count), expected_data)
Exemple #20
0
    def test_add_columns_and_copy_where(self):
        """
        Tests UDFs for add_columns and copy(where), and uses the vector type

        Changes the 2 population strings to a vector, and then uses the vector
        to compute the change, and then copy out all the incorrect ones
        """
        frame = ta.Frame(csv)
        self.assertEquals(frame.row_count, 20, "frame should have 20 rows")
        frame.add_columns(lambda row: [float(row['pop_2010'].translate({ord(','): None})),
                                       float(row['population_2013'].translate({ord(','): None}))],
                          ("vpops", ta.vector(2)))
        self.assertEquals(frame.row_count, 20, "frame should have 20 rows")
        self.assertEquals(frame.column_names, ['rank', 'city', 'population_2013', 'pop_2010', 'change', 'county', 'vpops'])
        frame.add_columns(lambda row: (row.vpops[1] - row.vpops[0])/row.vpops[0], ("comp_change", ta.float64))
        #print frame.inspect(20)
        bad_cities = frame.copy(columns=['city', 'change', 'comp_change'], where=lambda row: row.change != "%.2f%%" % round(100*row.comp_change, 2))
        self.assertEquals(bad_cities.column_names, ['city', 'change', 'comp_change'])
        self.assertEquals(bad_cities.row_count, 1)
        #print bad_cities.inspect()
        row = bad_cities.take(1)[0]
        row[2] = round(row[2], 5)
        self.assertEquals(row, [u'Tualatin', u'4.17%', 0.03167])  # should just be one bad one, Tualatin
Exemple #21
0
    def test_inspect_round(self):
        schema = [('f32', ta.float32), ('f64', ta.float64), ('v', ta.vector(2))]
        rows = [[0.1234, 9.87654321, [1.0095, 2.034]],
                [1234.5, 9876.54321, [99.999, 33.33]]]
        result = repr(ui.RowsInspection(rows, schema, offset=0, format_settings=ui.InspectSettings(wrap=2, round=2)))
        result = '\n'.join([line.rstrip() for line in result.splitlines()])
        expected = '''[#]  f32      f64      v
======================================
[0]     0.12     9.88  [1.01, 2.03]
[1]  1234.50  9876.54  [100.00, 33.33]'''
        self.assertEqual(expected, result)

        result = repr(ui.RowsInspection(rows, schema, offset=0, format_settings=ui.InspectSettings(wrap='stripes', round=3)))
        result = '\n'.join([line.rstrip() for line in result.splitlines()])
        expected = '''[0]-
f32=0.123
f64=9.877
v  =[1.010, 2.034]
[1]-
f32=1234.500
f64=9876.543
v  =[99.999, 33.330]'''
        self.assertEqual(expected, result)