Exemple #1
0
    def test_get_schema(self):
        tr = TreeRow({'foo': "2018-01-01"})
        self.assertTrue(isinstance(tr.get_schema(), TreeSchema))
        self.assertTrue(
            "foo" in tr.get_schema().base_fork_node.get_children_names())

        new_schema = TreeSchema(base_fork_node=ForkNode(
            name='base',
            children=[
                ChildNode(name='foo-new',
                          data_type=DateDataType(resolution='D',
                                                 format_string="%Y-%m-%d"))
            ]))
        tr.set_schema(new_schema)
        self.assertTrue(isinstance(tr.get_schema(), TreeSchema))
        self.assertNotIn("foo",
                         tr.get_schema().base_fork_node.get_children_names())
        self.assertIn("foo-new",
                      tr.get_schema().base_fork_node.get_children_names())
        self.assertEqual(tr.get_schema(), new_schema)
Exemple #2
0
    def test__get_tree_row(self):
        data = self.get_json_data_same_schema()[0]

        # Case 1: Dictionary + no schema
        expected_schema = self.get_schema_for_json_data_same_schema()
        tr = TreeDataSet._get_tree_row(input_row=data,
                                       schema=None,
                                       method='numpy')
        self.assertTrue(isinstance(tr, TreeRow))
        self.assertEqual(expected_schema, tr.schema)
        self._assert_equal_dictionaries(data, tr.row)

        # Case 2: Dictionary + single schema
        expected_schema = self.get_schema_for_json_data_same_schema()
        expected_schema = expected_schema.set_data_type(
            'level1-date',
            DateDataType(resolution='D', format_string='%Y-%m-%d'))
        expected_schema = expected_schema.set_data_type(
            'level1-fork/level2-date',
            DateDataType(resolution='D', format_string='%Y-%m-%d'))
        schema = tr.get_schema()
        schema = schema.set_data_type(
            'level1-date',
            DateDataType(resolution='D', format_string='%Y-%m-%d'))
        schema = schema.set_data_type(
            'level1-fork/level2-date',
            DateDataType(resolution='D', format_string='%Y-%m-%d'))

        tr = TreeDataSet._get_tree_row(input_row=data,
                                       schema=schema,
                                       method='numpy')

        self.assertTrue(isinstance(tr, TreeRow))
        self.assertEqual(expected_schema, tr.schema)
        self._assert_equal_dictionaries(data, tr.row)

        # Case 3: TreeRow + no schema
        tr = TreeRow(input_row=data).build_row(input_row=data, method='numpy')
        expected_schema = self.get_schema_for_json_data_same_schema()
        tr = TreeDataSet._get_tree_row(input_row=tr,
                                       schema=None,
                                       method='numpy')
        self.assertTrue(isinstance(tr, TreeRow))
        self.assertEqual(expected_schema, tr.schema)
        self._assert_equal_dictionaries(data, tr.row)

        # Case 4: TreeRow + schema
        tr = TreeRow(input_row=data).build_row(input_row=data, method='numpy')

        expected_schema = self.get_schema_for_json_data_same_schema()
        expected_schema = expected_schema.set_data_type(
            'level1-date',
            DateDataType(resolution='D', format_string='%Y-%m-%d'))
        expected_schema = expected_schema.set_data_type(
            'level1-fork/level2-date',
            DateDataType(resolution='D', format_string='%Y-%m-%d'))
        schema = tr.get_schema()
        schema = schema.set_data_type(
            'level1-date',
            DateDataType(resolution='D', format_string='%Y-%m-%d'))
        schema = schema.set_data_type(
            'level1-fork/level2-date',
            DateDataType(resolution='D', format_string='%Y-%m-%d'))

        tr = TreeDataSet._get_tree_row(input_row=tr,
                                       schema=schema,
                                       method='numpy')

        self.assertTrue(isinstance(tr, TreeRow))
        self.assertEqual(expected_schema, tr.schema)
        self._assert_equal_dictionaries(data, tr.row)
Exemple #3
0
    def test_apply_schema(self):
        # Case 1
        input_data_1 = {
            "l1-f": "120.9",
            "l1-s": 34,
            "l1-d": "2018-01-04",
            "f": {
                "l2-f": "-120.9",
                "l2-s": 'YES',
                "l2-a": ["2018-01-04"]
            }
        }
        output_data_1_exp = {
            "l1-f": 120.9,
            "l1-s": "34.0",
            "l1-d": np.datetime64("2018-01-04"),
            "f": {
                "l2-f": -120.9,
                "l2-s": 'YES',
                "l2-a": [np.datetime64("2018-01-04")],
                'l2-missing': 'nan'
            }
        }
        fork_1 = ForkNode('base', [
            ChildNode('l1-f', FloatDataType()),
            ChildNode('l1-s', StringDataType()),
            ChildNode('l1-d',
                      DateDataType(resolution='D', format_string="%Y-%m-%d")),
            ForkNode('f', [
                ChildNode('l2-f', FloatDataType()),
                ChildNode('l2-s', StringDataType()),
                ChildNode(
                    'l2-a',
                    ArrayDataType(
                        DateDataType(resolution='D',
                                     format_string="%Y-%m-%d"))),
                ChildNode('l2-missing', StringDataType())
            ])
        ])

        tr_1 = TreeRow(input_data_1)
        schema_1 = TreeSchema(base_fork_node=fork_1)

        assert tr_1.row is None
        tr_1 = tr_1.build_row(input_data_1, 'numpy')

        self.assertNotEqual(tr_1.row, output_data_1_exp)
        self.assertNotEqual(tr_1.get_schema(), schema_1)
        tr_1 = tr_1.set_schema(schema_1)
        tr_1 = tr_1.apply_schema('numpy')
        self.assertEqual(tr_1.row, output_data_1_exp)

        # Case 2
        input_data_2 = {'f': {'float': 20}}
        fork_2 = ForkNode('base', [ChildNode('f', FloatDataType())])

        tr_2 = TreeRow(input_data_2)
        schema_2 = TreeSchema(base_fork_node=fork_2)

        assert tr_2.row is None
        tr_2 = tr_2.build_row(input_data_2, 'numpy')

        self.assertNotEqual(tr_2.get_schema(), schema_2)

        tr_2 = tr_2.set_schema(schema_2)
        with self.assertRaises(RuntimeError):
            tr_2.apply_schema('numpy')

        # Case 3
        input_data_3 = {'f': 20}
        fork_3 = ForkNode(
            'base', [ForkNode('f', [ChildNode('float', FloatDataType())])])

        tr_3 = TreeRow(input_data_3)
        schema_3 = TreeSchema(base_fork_node=fork_3)

        assert tr_3.row is None
        tr_3 = tr_3.build_row(input_data_3, 'numpy')

        self.assertNotEqual(tr_3.get_schema(), schema_3)

        tr_3 = tr_3.set_schema(schema_3)
        with self.assertRaises(RuntimeError):
            tr_3.apply_schema('numpy')