Ejemplo n.º 1
0
 def test_set_schema(self):
     tr = TreeRow({'foo': "2018-01-01"})
     self.assertTrue(
         isinstance(
             tr.schema.base_fork_node.find_child('foo').get_data_type(),
             StringDataType))
     new_schema = TreeSchema(base_fork_node=ForkNode(
         name='base',
         children=[
             ChildNode(name='foo',
                       data_type=DateDataType(resolution='D',
                                              format_string="%Y-%m-%d"))
         ]))
     tr.set_schema(new_schema)
     self.assertTrue(
         isinstance(
             tr.schema.base_fork_node.find_child('foo').get_data_type(),
             DateDataType))
Ejemplo n.º 2
0
    def test_get_schema(self):
        tr = TreeRow({'foo': "2018-01-01"})
        self.assertTrue(isinstance(tr.get_schema(), TreeSchema))
        self.assertTrue(
            "foo" in tr.get_schema().base_fork_node.get_children_names())

        new_schema = TreeSchema(base_fork_node=ForkNode(
            name='base',
            children=[
                ChildNode(name='foo-new',
                          data_type=DateDataType(resolution='D',
                                                 format_string="%Y-%m-%d"))
            ]))
        tr.set_schema(new_schema)
        self.assertTrue(isinstance(tr.get_schema(), TreeSchema))
        self.assertNotIn("foo",
                         tr.get_schema().base_fork_node.get_children_names())
        self.assertIn("foo-new",
                      tr.get_schema().base_fork_node.get_children_names())
        self.assertEqual(tr.get_schema(), new_schema)
Ejemplo n.º 3
0
    def test_apply_schema(self):
        # Case 1
        input_data_1 = {
            "l1-f": "120.9",
            "l1-s": 34,
            "l1-d": "2018-01-04",
            "f": {
                "l2-f": "-120.9",
                "l2-s": 'YES',
                "l2-a": ["2018-01-04"]
            }
        }
        output_data_1_exp = {
            "l1-f": 120.9,
            "l1-s": "34.0",
            "l1-d": np.datetime64("2018-01-04"),
            "f": {
                "l2-f": -120.9,
                "l2-s": 'YES',
                "l2-a": [np.datetime64("2018-01-04")],
                'l2-missing': 'nan'
            }
        }
        fork_1 = ForkNode('base', [
            ChildNode('l1-f', FloatDataType()),
            ChildNode('l1-s', StringDataType()),
            ChildNode('l1-d',
                      DateDataType(resolution='D', format_string="%Y-%m-%d")),
            ForkNode('f', [
                ChildNode('l2-f', FloatDataType()),
                ChildNode('l2-s', StringDataType()),
                ChildNode(
                    'l2-a',
                    ArrayDataType(
                        DateDataType(resolution='D',
                                     format_string="%Y-%m-%d"))),
                ChildNode('l2-missing', StringDataType())
            ])
        ])

        tr_1 = TreeRow(input_data_1)
        schema_1 = TreeSchema(base_fork_node=fork_1)

        assert tr_1.row is None
        tr_1 = tr_1.build_row(input_data_1, 'numpy')

        self.assertNotEqual(tr_1.row, output_data_1_exp)
        self.assertNotEqual(tr_1.get_schema(), schema_1)
        tr_1 = tr_1.set_schema(schema_1)
        tr_1 = tr_1.apply_schema('numpy')
        self.assertEqual(tr_1.row, output_data_1_exp)

        # Case 2
        input_data_2 = {'f': {'float': 20}}
        fork_2 = ForkNode('base', [ChildNode('f', FloatDataType())])

        tr_2 = TreeRow(input_data_2)
        schema_2 = TreeSchema(base_fork_node=fork_2)

        assert tr_2.row is None
        tr_2 = tr_2.build_row(input_data_2, 'numpy')

        self.assertNotEqual(tr_2.get_schema(), schema_2)

        tr_2 = tr_2.set_schema(schema_2)
        with self.assertRaises(RuntimeError):
            tr_2.apply_schema('numpy')

        # Case 3
        input_data_3 = {'f': 20}
        fork_3 = ForkNode(
            'base', [ForkNode('f', [ChildNode('float', FloatDataType())])])

        tr_3 = TreeRow(input_data_3)
        schema_3 = TreeSchema(base_fork_node=fork_3)

        assert tr_3.row is None
        tr_3 = tr_3.build_row(input_data_3, 'numpy')

        self.assertNotEqual(tr_3.get_schema(), schema_3)

        tr_3 = tr_3.set_schema(schema_3)
        with self.assertRaises(RuntimeError):
            tr_3.apply_schema('numpy')