Example #1
0
    def test_build_numpy_value(self):
        dtp = ListDataType(element_data_types=[FloatDataType()])
        self.assertTrue((dtp.build_numpy_value([1]) == np.array(
            (1, ), [('0', '<f8')])).all())
        dtp = ListDataType(
            element_data_types=[FloatDataType(),
                                StringDataType()])
        self.assertTrue((dtp.build_numpy_value([1, "tra"]) == np.array(
            (1, "tra"), [('0', '<f8'), ('1', '<U128')])).all())

        dtp = ListDataType(element_data_types=[
            FloatDataType(),
            StringDataType(),
            ListDataType(element_data_types=[
                ArrayDataType(element_data_type=FloatDataType()),
                StringDataType()
            ])
        ])

        input_value = [12.3, "first_string", [[1, 2, 3, 4], "second_string"]]
        output_value = dtp.build_numpy_value(input_value)

        self.assertEqual(output_value[0]['0'], input_value[0])
        self.assertEqual(output_value[0]['1'], input_value[1])
        self.assertTrue(
            (output_value[0]['2'][0]['0'] == input_value[2][0]).all())
        self.assertEqual(output_value[0]['2'][0]['1'], input_value[2][1])
Example #2
0
    def test_build_python_value(self):
        dtp = ListDataType(element_data_types=[FloatDataType()])
        self.assertTrue((dtp.build_python_value([1]) == [float(1)]))
        dtp = ListDataType(
            element_data_types=[FloatDataType(),
                                StringDataType()])
        self.assertTrue(
            dtp.build_python_value([1, "tra"]) == [float(1), "tra"])

        dtp = ListDataType(element_data_types=[
            FloatDataType(),
            StringDataType(),
            ListDataType(element_data_types=[
                ArrayDataType(element_data_type=FloatDataType()),
                StringDataType()
            ])
        ])

        input_value = [12.3, "first_string", [[1, 2, 3, 4], "second_string"]]
        output_value = dtp.build_python_value(input_value)

        self.assertEqual(output_value[0], input_value[0])
        self.assertEqual(output_value[1], input_value[1])
        self.assertTrue((output_value[2][0] == input_value[2][0]))
        self.assertEqual(output_value[2][1], input_value[2][1])
Example #3
0
 def test_is_nullable(self):
     dtp = ListDataType(element_data_types=[StringDataType()],
                        nullable=False)
     self.assertFalse(dtp.is_nullable())
     dtp = ListDataType(element_data_types=[StringDataType()],
                        nullable=True)
     self.assertTrue(dtp.is_nullable())
Example #4
0
    def sample_dict_for_test_schema_v1():
        input_dict = {
            'a': 23,
            'b': {
                'c': "sa",
                'd': [{"s": 1}, 12.3],
                'e': ["a", "b", "c"]
            }
        }

        expected_output = TreeSchema(base_fork_node=ForkNode(name="base", children=[
            ChildNode(name="a", data_type=FloatDataType()),
            ForkNode(name="b", children=[
                ChildNode(name="c", data_type=StringDataType()),
                ChildNode(name="d", data_type=ListDataType(element_data_types=[
                    TreeDataType(
                        base_fork=ForkNode(name="d_0", children=[ChildNode(name="s", data_type=FloatDataType())],
                                           level=4)),
                    FloatDataType()
                ], level=3)),
                ChildNode(name="e", data_type=ArrayDataType(element_data_type=StringDataType()))
            ], level=2)
        ], level=1))

        return input_dict, expected_output
Example #5
0
    def test_transform_tree(self):
        input_data_1 = {
            "l1-f": "120.9",
            "l1-s": 34,
            "l1-d": "2018-01-04",
            "f": {
                "l2-f": "-120.9",
                "l2-s": 'YES',
                "l2-a": ["2018-01-04"]
            }
        }
        output_data_1_exp = {
            "l1-f": 120.9,
            "l1-s": "34",
            "l1-d": np.datetime64("2018-01-04"),
            "f": {
                "l2-f": -120.9,
                "l2-s": 'YES',
                "l2-a": [np.datetime64("2018-01-04")],
                'l2-missing': 'nan'
            }
        }
        fork_1 = ForkNode('base', [
            ChildNode('l1-f', FloatDataType()),
            ChildNode('l1-s', StringDataType()),
            ChildNode('l1-d',
                      DateDataType(resolution='D', format_string="%Y-%m-%d")),
            ForkNode('f', [
                ChildNode('l2-f', FloatDataType()),
                ChildNode('l2-s', StringDataType()),
                ChildNode(
                    'l2-a',
                    ArrayDataType(
                        DateDataType(resolution='D',
                                     format_string="%Y-%m-%d"))),
                ChildNode('l2-missing', StringDataType())
            ])
        ])

        tr = TreeRow(input_data_1)
        self.assertEqual(tr.transform_tree(input_data_1, fork_1, 'numpy'),
                         output_data_1_exp)

        input_data_2 = {'f': {'float': 20}}
        fork_2 = ForkNode('base', [ChildNode('f', FloatDataType())])

        with self.assertRaises(RuntimeError):
            tr = TreeRow(input_data_2)
            tr.transform_tree(input_data_2, fork_2, 'numpy')

        input_data_3 = {'f': 20}
        fork_3 = ForkNode(
            'base', [ForkNode('f', [ChildNode('float', FloatDataType())])])

        with self.assertRaises(RuntimeError):
            tr = TreeRow(input_data_3)
            tr.transform_tree(input_data_3, fork_3, 'numpy')
Example #6
0
 def test_eq(self):
     dtp1 = ArrayDataType(element_data_type=FloatDataType())
     dtp2 = ArrayDataType(element_data_type=FloatDataType())
     self.assertEqual(dtp1, dtp2)
     dtp1 = ArrayDataType(element_data_type=StringDataType())
     dtp2 = ArrayDataType(element_data_type=FloatDataType())
     self.assertNotEqual(dtp1, dtp2)
Example #7
0
 def test_get_numpy_type(self):
     dtp = ListDataType(element_data_types=[FloatDataType()])
     self.assertEqual(dtp.get_numpy_type(), np.ndarray)
     dtp = ListDataType(element_data_types=[StringDataType()])
     self.assertEqual(dtp.get_numpy_type(), np.ndarray)
     dtp = ListDataType(element_data_types=[DateDataType()])
     self.assertEqual(dtp.get_numpy_type(), np.ndarray)
Example #8
0
 def test_get_python_type(self):
     dtp = ListDataType(element_data_types=[FloatDataType()])
     self.assertEqual(dtp.get_python_type(), list)
     dtp = ListDataType(element_data_types=[StringDataType()])
     self.assertEqual(dtp.get_python_type(), list)
     dtp = ListDataType(element_data_types=[DateDataType()])
     self.assertEqual(dtp.get_python_type(), list)
Example #9
0
 def test_get_python_type(self):
     dtp = ArrayDataType(element_data_type=FloatDataType())
     self.assertEqual(dtp.get_python_type(), list)
     dtp = ArrayDataType(element_data_type=StringDataType())
     self.assertEqual(dtp.get_python_type(), list)
     dtp = ArrayDataType(element_data_type=DateDataType())
     self.assertEqual(dtp.get_python_type(), list)
Example #10
0
 def test_get_numpy_type(self):
     dtp = ArrayDataType(element_data_type=FloatDataType())
     self.assertEqual(dtp.get_numpy_type(), np.ndarray)
     dtp = ArrayDataType(element_data_type=StringDataType())
     self.assertEqual(dtp.get_numpy_type(), np.ndarray)
     dtp = ArrayDataType(element_data_type=DateDataType())
     self.assertEqual(dtp.get_numpy_type(), np.ndarray)
Example #11
0
 def test__get_numpy_dtypes(self):
     dtp = ListDataType(element_data_types=[FloatDataType()])
     self.assertEqual(dtp._get_numpy_dtypes(), [('0', '<f8')])
     dtp = ListDataType(element_data_types=[
         FloatDataType(),
         ArrayDataType(element_data_type=StringDataType())
     ])
     self.assertEqual(dtp._get_numpy_dtypes(), [('0', '<f8'),
                                                ('1', np.ndarray)])
     dtp = ListDataType(element_data_types=[
         FloatDataType(),
         ArrayDataType(element_data_type=StringDataType()),
         DateDataType(resolution='M')
     ])
     self.assertEqual(dtp._get_numpy_dtypes(), [('0', '<f8'),
                                                ('1', np.ndarray),
                                                ('2', '<M8[M]')])
Example #12
0
    def test__transform_child_value(self):
        # Case 1
        value1 = '120.28'
        leaf1 = ChildNode('case1', FloatDataType())

        self.assertEqual(
            float(value1),
            TreeRow._transform_child_value(value1, leaf1, 'numpy'))
        self.assertEqual(
            float(value1),
            TreeRow._transform_child_value(value1, leaf1, 'python'))
        with self.assertRaises(ValueError):
            TreeRow._transform_child_value(value1, leaf1, 'no')

        # Case 2
        value2 = 40
        leaf2 = ChildNode('case2', StringDataType())

        self.assertEqual(
            str(value2),
            TreeRow._transform_child_value(value2, leaf2, 'numpy'))
        self.assertEqual(
            str(value2),
            TreeRow._transform_child_value(value2, leaf2, 'python'))
        with self.assertRaises(ValueError):
            TreeRow._transform_child_value(value2, leaf2, 'no')

        # Case 3
        value3 = '2018-01-04'
        leaf3 = ChildNode(
            'case3', DateDataType(resolution='D', format_string="%Y-%m-%d"))

        self.assertEqual(
            np.datetime64(value3),
            TreeRow._transform_child_value(value3, leaf3, 'numpy'))
        self.assertEqual(
            datetime.strptime(value3, "%Y-%m-%d"),
            TreeRow._transform_child_value(value3, leaf3, 'python'))
        with self.assertRaises(ValueError):
            TreeRow._transform_child_value(value3, leaf3, 'no')

        # Case 4
        value4 = None

        self.assertTrue(
            np.isnan(TreeRow._transform_child_value(value4, leaf1, 'numpy')))
        self.assertTrue(
            TreeRow._transform_child_value(value4, leaf1, 'python') is None)
        self.assertEqual(
            TreeRow._transform_child_value(value4, leaf2, 'numpy'), 'nan')
        self.assertEqual(
            TreeRow._transform_child_value(value4, leaf2, 'python'), 'None')
        self.assertTrue(
            np.isnat(TreeRow._transform_child_value(value4, leaf3, 'numpy')))
        self.assertEqual(
            TreeRow._transform_child_value(value4, leaf3, 'python'), '')
Example #13
0
 def test_build_python_value(self):
     dtp = ArrayDataType(element_data_type=FloatDataType())
     self.assertTrue((dtp.build_python_value([1, 2,
                                              3]) == np.array([1, 2, 3],
                                                              '<f8')).all())
     dtp = ArrayDataType(element_data_type=StringDataType())
     self.assertTrue(
         (dtp.build_python_value([1, 2, 3]) == np.array([1, 2, 3],
                                                        '<U200')).all())
     dtp = ArrayDataType(element_data_type=ArrayDataType(
         element_data_type=StringDataType()))
     self.assertTrue(
         (dtp.build_python_value([["tra", "check"], ["what"]
                                  ])[0] == np.array(["tra", "check"],
                                                    '<U200')).all())
     self.assertTrue(
         (dtp.build_python_value([["tra", "check"],
                                  ["what"]])[1] == np.array(["what"],
                                                            "<U200")).all())
Example #14
0
 def test_eq(self):
     dtp1 = ListDataType(element_data_types=[FloatDataType()])
     dtp2 = ListDataType(element_data_types=[FloatDataType()])
     self.assertEqual(dtp1, dtp2)
     dtp1 = ListDataType(
         element_data_types=[FloatDataType(),
                             StringDataType()])
     dtp2 = ListDataType(
         element_data_types=[FloatDataType(),
                             StringDataType()])
     self.assertEqual(dtp1, dtp2)
     dtp1 = ListDataType(element_data_types=[
         FloatDataType(),
         StringDataType(),
         ListDataType(element_data_types=[
             ArrayDataType(element_data_type=FloatDataType()),
             StringDataType()
         ])
     ])
     dtp2 = ListDataType(element_data_types=[
         FloatDataType(),
         StringDataType(),
         ListDataType(element_data_types=[
             ArrayDataType(element_data_type=FloatDataType()),
             StringDataType()
         ])
     ])
     self.assertEqual(dtp1, dtp2)
Example #15
0
    def get_data_types():
        dt = DataType(numpy_dtype='<i8',
                      python_dtype=int,
                      numpy_na_value=np.nan,
                      python_na_value=None)
        sdt = StringDataType()
        fdt = FloatDataType()
        ddt_d = DateDataType(resolution='D')
        ddt_s = DateDataType(resolution='s')
        adt_f = ArrayDataType(element_data_type=FloatDataType())
        adt_s = ArrayDataType(element_data_type=StringDataType())
        ldt_fsd = ListDataType(element_data_types=[
            FloatDataType(), StringDataType(),
            DateDataType()
        ])
        ldt_ssd = ListDataType(element_data_types=[
            StringDataType(),
            StringDataType(),
            DateDataType()
        ])

        return dt, sdt, fdt, ddt_d, ddt_s, adt_f, adt_s, ldt_fsd, ldt_ssd
Example #16
0
    def test__assert_transformation_possible(self):
        fork1 = ForkNode('base', [
            ChildNode('c1', StringDataType()),
            ChildNode('c2', FloatDataType()),
            ForkNode('f1', [ChildNode('c2', DateDataType())])
        ])

        with self.assertRaises(RuntimeError):
            TreeRow._assert_transformation_possible(['c2'], fork1)
        with self.assertRaises(RuntimeError):
            TreeRow._assert_transformation_possible(['c1', 'c2'], fork1)
        with self.assertRaises(RuntimeError):
            TreeRow._assert_transformation_possible(['f1', 'c1', 'c2'], fork1)

        TreeRow._assert_transformation_possible(['c1'], fork1)
        TreeRow._assert_transformation_possible(['c1', 'f1'], fork1)
Example #17
0
 def test_get_numpy_type(self):
     dtp = StringDataType()
     self.assertEqual(dtp.get_numpy_type(), np.dtype('<U128'))
Example #18
0
 def test_get_python_type(self):
     dtp = StringDataType()
     self.assertEqual(dtp.get_python_type(), str)
Example #19
0
 def test_eq(self):
     dtp1 = StringDataType()
     dtp2 = StringDataType()
     self.assertEqual(dtp1, dtp2)
Example #20
0
 def test_is_nullable(self):
     dtp = StringDataType(nullable=False)
     self.assertFalse(dtp.is_nullable())
     dtp = StringDataType(nullable=True)
     self.assertTrue(dtp.is_nullable())
Example #21
0
 def test_build_numpy_value(self):
     dtp = StringDataType()
     self.assertEqual(dtp.build_numpy_value("1234567890123"),
                      "1234567890123")
     self.assertEqual(dtp.build_numpy_value("123"), "123")
     self.assertEqual(dtp.build_numpy_value("tra2"), "tra2")
Example #22
0
 def base_dict_json_same_schema_types():
     d = {
         "level1-string": StringDataType(),
         "level1-float": FloatDataType(),
         "level1-date": StringDataType(),
         "level1-array_float": ArrayDataType(FloatDataType()),
         "level1-array_string": ArrayDataType(StringDataType()),
         "level1-list_float_string": ListDataType([FloatDataType()] * 5 + [StringDataType()] * 5, level=2),
         "level1-fork": {
             "level2-string": StringDataType(),
             "level2-float": FloatDataType(),
             "level2-date": StringDataType(),
             "level2-array_float": ArrayDataType(FloatDataType()),
             "level2-array_string": ArrayDataType(StringDataType()),
             "level2-list_float_string": ListDataType([FloatDataType()] * 5 + [StringDataType()] * 5, level=3),
         },
         "level1-fork2": {
             "level2-float": FloatDataType(),
             "level2-fork": {
                 "level3-float": FloatDataType(),
                 "level3-array_tree": ArrayDataType(
                     TreeDataType(
                         base_fork=ForkNode(
                             name="level3-array_tree",
                             children=[
                                 ChildNode(name="level3-array-float", data_type=FloatDataType()),
                                 ChildNode(name="level3-array-string", data_type=StringDataType())
                             ],
                             level=5
                         )
                     )
                 ),
                 "level3-list_tree": ListDataType(
                     [
                         TreeDataType(
                             base_fork=ForkNode(
                                 name="level3-list_tree_{}".format(x),
                                 children=[
                                     ChildNode(name="level3-list-float", data_type=FloatDataType()),
                                     ChildNode(name="level3-list-string", data_type=StringDataType())
                                 ],
                                 level=5
                             )
                         )
                         for x in range(0, 5)] + [
                         TreeDataType(
                             base_fork=ForkNode(
                                 name="level3-list_tree_{}".format(x),
                                 children=[
                                     ChildNode(name="level3-list-date", data_type=StringDataType()),
                                     ChildNode(name="level3-list-string", data_type=StringDataType())
                                 ],
                                 level=5
                             )
                         )
                         for x in range(5, 10)],
                     level=4
                 )
             }
         }
     }
     return d
Example #23
0
 def test_is_nullable(self):
     dtp = ArrayDataType(element_data_type=StringDataType(), nullable=False)
     self.assertFalse(dtp.is_nullable())
     dtp = ArrayDataType(element_data_type=StringDataType(), nullable=True)
     self.assertTrue(dtp.is_nullable())
Example #24
0
    def test_apply_schema(self):
        # Case 1
        input_data_1 = {
            "l1-f": "120.9",
            "l1-s": 34,
            "l1-d": "2018-01-04",
            "f": {
                "l2-f": "-120.9",
                "l2-s": 'YES',
                "l2-a": ["2018-01-04"]
            }
        }
        output_data_1_exp = {
            "l1-f": 120.9,
            "l1-s": "34.0",
            "l1-d": np.datetime64("2018-01-04"),
            "f": {
                "l2-f": -120.9,
                "l2-s": 'YES',
                "l2-a": [np.datetime64("2018-01-04")],
                'l2-missing': 'nan'
            }
        }
        fork_1 = ForkNode('base', [
            ChildNode('l1-f', FloatDataType()),
            ChildNode('l1-s', StringDataType()),
            ChildNode('l1-d',
                      DateDataType(resolution='D', format_string="%Y-%m-%d")),
            ForkNode('f', [
                ChildNode('l2-f', FloatDataType()),
                ChildNode('l2-s', StringDataType()),
                ChildNode(
                    'l2-a',
                    ArrayDataType(
                        DateDataType(resolution='D',
                                     format_string="%Y-%m-%d"))),
                ChildNode('l2-missing', StringDataType())
            ])
        ])

        tr_1 = TreeRow(input_data_1)
        schema_1 = TreeSchema(base_fork_node=fork_1)

        assert tr_1.row is None
        tr_1 = tr_1.build_row(input_data_1, 'numpy')

        self.assertNotEqual(tr_1.row, output_data_1_exp)
        self.assertNotEqual(tr_1.get_schema(), schema_1)
        tr_1 = tr_1.set_schema(schema_1)
        tr_1 = tr_1.apply_schema('numpy')
        self.assertEqual(tr_1.row, output_data_1_exp)

        # Case 2
        input_data_2 = {'f': {'float': 20}}
        fork_2 = ForkNode('base', [ChildNode('f', FloatDataType())])

        tr_2 = TreeRow(input_data_2)
        schema_2 = TreeSchema(base_fork_node=fork_2)

        assert tr_2.row is None
        tr_2 = tr_2.build_row(input_data_2, 'numpy')

        self.assertNotEqual(tr_2.get_schema(), schema_2)

        tr_2 = tr_2.set_schema(schema_2)
        with self.assertRaises(RuntimeError):
            tr_2.apply_schema('numpy')

        # Case 3
        input_data_3 = {'f': 20}
        fork_3 = ForkNode(
            'base', [ForkNode('f', [ChildNode('float', FloatDataType())])])

        tr_3 = TreeRow(input_data_3)
        schema_3 = TreeSchema(base_fork_node=fork_3)

        assert tr_3.row is None
        tr_3 = tr_3.build_row(input_data_3, 'numpy')

        self.assertNotEqual(tr_3.get_schema(), schema_3)

        tr_3 = tr_3.set_schema(schema_3)
        with self.assertRaises(RuntimeError):
            tr_3.apply_schema('numpy')
Example #25
0
 def test_build_python_value(self):
     dtp = StringDataType()
     self.assertEqual(dtp.build_python_value(10), "10")
     self.assertEqual(dtp.build_python_value(10), "10")
     self.assertEqual(dtp.build_python_value("tra2"), "tra2")