예제 #1
0
    def sample_dict_for_test_schema_v1():
        input_dict = {
            'a': 23,
            'b': {
                'c': "sa",
                'd': [{"s": 1}, 12.3],
                'e': ["a", "b", "c"]
            }
        }

        expected_output = TreeSchema(base_fork_node=ForkNode(name="base", children=[
            ChildNode(name="a", data_type=FloatDataType()),
            ForkNode(name="b", children=[
                ChildNode(name="c", data_type=StringDataType()),
                ChildNode(name="d", data_type=ListDataType(element_data_types=[
                    TreeDataType(
                        base_fork=ForkNode(name="d_0", children=[ChildNode(name="s", data_type=FloatDataType())],
                                           level=4)),
                    FloatDataType()
                ], level=3)),
                ChildNode(name="e", data_type=ArrayDataType(element_data_type=StringDataType()))
            ], level=2)
        ], level=1))

        return input_dict, expected_output
예제 #2
0
    def test__transform_child_value(self):
        # Case 1
        value1 = '120.28'
        leaf1 = ChildNode('case1', FloatDataType())

        self.assertEqual(
            float(value1),
            TreeRow._transform_child_value(value1, leaf1, 'numpy'))
        self.assertEqual(
            float(value1),
            TreeRow._transform_child_value(value1, leaf1, 'python'))
        with self.assertRaises(ValueError):
            TreeRow._transform_child_value(value1, leaf1, 'no')

        # Case 2
        value2 = 40
        leaf2 = ChildNode('case2', StringDataType())

        self.assertEqual(
            str(value2),
            TreeRow._transform_child_value(value2, leaf2, 'numpy'))
        self.assertEqual(
            str(value2),
            TreeRow._transform_child_value(value2, leaf2, 'python'))
        with self.assertRaises(ValueError):
            TreeRow._transform_child_value(value2, leaf2, 'no')

        # Case 3
        value3 = '2018-01-04'
        leaf3 = ChildNode(
            'case3', DateDataType(resolution='D', format_string="%Y-%m-%d"))

        self.assertEqual(
            np.datetime64(value3),
            TreeRow._transform_child_value(value3, leaf3, 'numpy'))
        self.assertEqual(
            datetime.strptime(value3, "%Y-%m-%d"),
            TreeRow._transform_child_value(value3, leaf3, 'python'))
        with self.assertRaises(ValueError):
            TreeRow._transform_child_value(value3, leaf3, 'no')

        # Case 4
        value4 = None

        self.assertTrue(
            np.isnan(TreeRow._transform_child_value(value4, leaf1, 'numpy')))
        self.assertTrue(
            TreeRow._transform_child_value(value4, leaf1, 'python') is None)
        self.assertEqual(
            TreeRow._transform_child_value(value4, leaf2, 'numpy'), 'nan')
        self.assertEqual(
            TreeRow._transform_child_value(value4, leaf2, 'python'), 'None')
        self.assertTrue(
            np.isnat(TreeRow._transform_child_value(value4, leaf3, 'numpy')))
        self.assertEqual(
            TreeRow._transform_child_value(value4, leaf3, 'python'), '')
예제 #3
0
    def test_transform_tree(self):
        input_data_1 = {
            "l1-f": "120.9",
            "l1-s": 34,
            "l1-d": "2018-01-04",
            "f": {
                "l2-f": "-120.9",
                "l2-s": 'YES',
                "l2-a": ["2018-01-04"]
            }
        }
        output_data_1_exp = {
            "l1-f": 120.9,
            "l1-s": "34",
            "l1-d": np.datetime64("2018-01-04"),
            "f": {
                "l2-f": -120.9,
                "l2-s": 'YES',
                "l2-a": [np.datetime64("2018-01-04")],
                'l2-missing': 'nan'
            }
        }
        fork_1 = ForkNode('base', [
            ChildNode('l1-f', FloatDataType()),
            ChildNode('l1-s', StringDataType()),
            ChildNode('l1-d',
                      DateDataType(resolution='D', format_string="%Y-%m-%d")),
            ForkNode('f', [
                ChildNode('l2-f', FloatDataType()),
                ChildNode('l2-s', StringDataType()),
                ChildNode(
                    'l2-a',
                    ArrayDataType(
                        DateDataType(resolution='D',
                                     format_string="%Y-%m-%d"))),
                ChildNode('l2-missing', StringDataType())
            ])
        ])

        tr = TreeRow(input_data_1)
        self.assertEqual(tr.transform_tree(input_data_1, fork_1, 'numpy'),
                         output_data_1_exp)

        input_data_2 = {'f': {'float': 20}}
        fork_2 = ForkNode('base', [ChildNode('f', FloatDataType())])

        with self.assertRaises(RuntimeError):
            tr = TreeRow(input_data_2)
            tr.transform_tree(input_data_2, fork_2, 'numpy')

        input_data_3 = {'f': 20}
        fork_3 = ForkNode(
            'base', [ForkNode('f', [ChildNode('float', FloatDataType())])])

        with self.assertRaises(RuntimeError):
            tr = TreeRow(input_data_3)
            tr.transform_tree(input_data_3, fork_3, 'numpy')
예제 #4
0
    def test__assert_transformation_possible(self):
        fork1 = ForkNode('base', [
            ChildNode('c1', StringDataType()),
            ChildNode('c2', FloatDataType()),
            ForkNode('f1', [ChildNode('c2', DateDataType())])
        ])

        with self.assertRaises(RuntimeError):
            TreeRow._assert_transformation_possible(['c2'], fork1)
        with self.assertRaises(RuntimeError):
            TreeRow._assert_transformation_possible(['c1', 'c2'], fork1)
        with self.assertRaises(RuntimeError):
            TreeRow._assert_transformation_possible(['f1', 'c1', 'c2'], fork1)

        TreeRow._assert_transformation_possible(['c1'], fork1)
        TreeRow._assert_transformation_possible(['c1', 'f1'], fork1)
예제 #5
0
    def _get_schema_from_dict(self, d, key, level):
        sorted_children_names = sorted(d.keys())
        children = []
        for name in sorted_children_names:
            if isinstance(d[name], dict):
                children.append(
                    self._get_schema_from_dict(d[name], name,
                                               level + 1).base_fork_node)
            else:
                children.append(ChildNode(name=name, data_type=d[name]))

        return TreeSchema(
            base_fork_node=ForkNode(name=key, children=children, level=level))
예제 #6
0
 def test_set_schema(self):
     tr = TreeRow({'foo': "2018-01-01"})
     self.assertTrue(
         isinstance(
             tr.schema.base_fork_node.find_child('foo').get_data_type(),
             StringDataType))
     new_schema = TreeSchema(base_fork_node=ForkNode(
         name='base',
         children=[
             ChildNode(name='foo',
                       data_type=DateDataType(resolution='D',
                                              format_string="%Y-%m-%d"))
         ]))
     tr.set_schema(new_schema)
     self.assertTrue(
         isinstance(
             tr.schema.base_fork_node.find_child('foo').get_data_type(),
             DateDataType))
예제 #7
0
    def test_get_schema(self):
        tr = TreeRow({'foo': "2018-01-01"})
        self.assertTrue(isinstance(tr.get_schema(), TreeSchema))
        self.assertTrue(
            "foo" in tr.get_schema().base_fork_node.get_children_names())

        new_schema = TreeSchema(base_fork_node=ForkNode(
            name='base',
            children=[
                ChildNode(name='foo-new',
                          data_type=DateDataType(resolution='D',
                                                 format_string="%Y-%m-%d"))
            ]))
        tr.set_schema(new_schema)
        self.assertTrue(isinstance(tr.get_schema(), TreeSchema))
        self.assertNotIn("foo",
                         tr.get_schema().base_fork_node.get_children_names())
        self.assertIn("foo-new",
                      tr.get_schema().base_fork_node.get_children_names())
        self.assertEqual(tr.get_schema(), new_schema)
예제 #8
0
    def test_apply_schema(self):
        # Case 1
        input_data_1 = {
            "l1-f": "120.9",
            "l1-s": 34,
            "l1-d": "2018-01-04",
            "f": {
                "l2-f": "-120.9",
                "l2-s": 'YES',
                "l2-a": ["2018-01-04"]
            }
        }
        output_data_1_exp = {
            "l1-f": 120.9,
            "l1-s": "34.0",
            "l1-d": np.datetime64("2018-01-04"),
            "f": {
                "l2-f": -120.9,
                "l2-s": 'YES',
                "l2-a": [np.datetime64("2018-01-04")],
                'l2-missing': 'nan'
            }
        }
        fork_1 = ForkNode('base', [
            ChildNode('l1-f', FloatDataType()),
            ChildNode('l1-s', StringDataType()),
            ChildNode('l1-d',
                      DateDataType(resolution='D', format_string="%Y-%m-%d")),
            ForkNode('f', [
                ChildNode('l2-f', FloatDataType()),
                ChildNode('l2-s', StringDataType()),
                ChildNode(
                    'l2-a',
                    ArrayDataType(
                        DateDataType(resolution='D',
                                     format_string="%Y-%m-%d"))),
                ChildNode('l2-missing', StringDataType())
            ])
        ])

        tr_1 = TreeRow(input_data_1)
        schema_1 = TreeSchema(base_fork_node=fork_1)

        assert tr_1.row is None
        tr_1 = tr_1.build_row(input_data_1, 'numpy')

        self.assertNotEqual(tr_1.row, output_data_1_exp)
        self.assertNotEqual(tr_1.get_schema(), schema_1)
        tr_1 = tr_1.set_schema(schema_1)
        tr_1 = tr_1.apply_schema('numpy')
        self.assertEqual(tr_1.row, output_data_1_exp)

        # Case 2
        input_data_2 = {'f': {'float': 20}}
        fork_2 = ForkNode('base', [ChildNode('f', FloatDataType())])

        tr_2 = TreeRow(input_data_2)
        schema_2 = TreeSchema(base_fork_node=fork_2)

        assert tr_2.row is None
        tr_2 = tr_2.build_row(input_data_2, 'numpy')

        self.assertNotEqual(tr_2.get_schema(), schema_2)

        tr_2 = tr_2.set_schema(schema_2)
        with self.assertRaises(RuntimeError):
            tr_2.apply_schema('numpy')

        # Case 3
        input_data_3 = {'f': 20}
        fork_3 = ForkNode(
            'base', [ForkNode('f', [ChildNode('float', FloatDataType())])])

        tr_3 = TreeRow(input_data_3)
        schema_3 = TreeSchema(base_fork_node=fork_3)

        assert tr_3.row is None
        tr_3 = tr_3.build_row(input_data_3, 'numpy')

        self.assertNotEqual(tr_3.get_schema(), schema_3)

        tr_3 = tr_3.set_schema(schema_3)
        with self.assertRaises(RuntimeError):
            tr_3.apply_schema('numpy')
예제 #9
0
 def base_dict_json_same_schema_types():
     d = {
         "level1-string": StringDataType(),
         "level1-float": FloatDataType(),
         "level1-date": StringDataType(),
         "level1-array_float": ArrayDataType(FloatDataType()),
         "level1-array_string": ArrayDataType(StringDataType()),
         "level1-list_float_string": ListDataType([FloatDataType()] * 5 + [StringDataType()] * 5, level=2),
         "level1-fork": {
             "level2-string": StringDataType(),
             "level2-float": FloatDataType(),
             "level2-date": StringDataType(),
             "level2-array_float": ArrayDataType(FloatDataType()),
             "level2-array_string": ArrayDataType(StringDataType()),
             "level2-list_float_string": ListDataType([FloatDataType()] * 5 + [StringDataType()] * 5, level=3),
         },
         "level1-fork2": {
             "level2-float": FloatDataType(),
             "level2-fork": {
                 "level3-float": FloatDataType(),
                 "level3-array_tree": ArrayDataType(
                     TreeDataType(
                         base_fork=ForkNode(
                             name="level3-array_tree",
                             children=[
                                 ChildNode(name="level3-array-float", data_type=FloatDataType()),
                                 ChildNode(name="level3-array-string", data_type=StringDataType())
                             ],
                             level=5
                         )
                     )
                 ),
                 "level3-list_tree": ListDataType(
                     [
                         TreeDataType(
                             base_fork=ForkNode(
                                 name="level3-list_tree_{}".format(x),
                                 children=[
                                     ChildNode(name="level3-list-float", data_type=FloatDataType()),
                                     ChildNode(name="level3-list-string", data_type=StringDataType())
                                 ],
                                 level=5
                             )
                         )
                         for x in range(0, 5)] + [
                         TreeDataType(
                             base_fork=ForkNode(
                                 name="level3-list_tree_{}".format(x),
                                 children=[
                                     ChildNode(name="level3-list-date", data_type=StringDataType()),
                                     ChildNode(name="level3-list-string", data_type=StringDataType())
                                 ],
                                 level=5
                             )
                         )
                         for x in range(5, 10)],
                     level=4
                 )
             }
         }
     }
     return d