def test_get_schema(self): tr = TreeRow({'foo': "2018-01-01"}) self.assertTrue(isinstance(tr.get_schema(), TreeSchema)) self.assertTrue( "foo" in tr.get_schema().base_fork_node.get_children_names()) new_schema = TreeSchema(base_fork_node=ForkNode( name='base', children=[ ChildNode(name='foo-new', data_type=DateDataType(resolution='D', format_string="%Y-%m-%d")) ])) tr.set_schema(new_schema) self.assertTrue(isinstance(tr.get_schema(), TreeSchema)) self.assertNotIn("foo", tr.get_schema().base_fork_node.get_children_names()) self.assertIn("foo-new", tr.get_schema().base_fork_node.get_children_names()) self.assertEqual(tr.get_schema(), new_schema)
def test__get_tree_row(self): data = self.get_json_data_same_schema()[0] # Case 1: Dictionary + no schema expected_schema = self.get_schema_for_json_data_same_schema() tr = TreeDataSet._get_tree_row(input_row=data, schema=None, method='numpy') self.assertTrue(isinstance(tr, TreeRow)) self.assertEqual(expected_schema, tr.schema) self._assert_equal_dictionaries(data, tr.row) # Case 2: Dictionary + single schema expected_schema = self.get_schema_for_json_data_same_schema() expected_schema = expected_schema.set_data_type( 'level1-date', DateDataType(resolution='D', format_string='%Y-%m-%d')) expected_schema = expected_schema.set_data_type( 'level1-fork/level2-date', DateDataType(resolution='D', format_string='%Y-%m-%d')) schema = tr.get_schema() schema = schema.set_data_type( 'level1-date', DateDataType(resolution='D', format_string='%Y-%m-%d')) schema = schema.set_data_type( 'level1-fork/level2-date', DateDataType(resolution='D', format_string='%Y-%m-%d')) tr = TreeDataSet._get_tree_row(input_row=data, schema=schema, method='numpy') self.assertTrue(isinstance(tr, TreeRow)) self.assertEqual(expected_schema, tr.schema) self._assert_equal_dictionaries(data, tr.row) # Case 3: TreeRow + no schema tr = TreeRow(input_row=data).build_row(input_row=data, method='numpy') expected_schema = self.get_schema_for_json_data_same_schema() tr = TreeDataSet._get_tree_row(input_row=tr, schema=None, method='numpy') self.assertTrue(isinstance(tr, TreeRow)) self.assertEqual(expected_schema, tr.schema) self._assert_equal_dictionaries(data, tr.row) # Case 4: TreeRow + schema tr = TreeRow(input_row=data).build_row(input_row=data, method='numpy') expected_schema = self.get_schema_for_json_data_same_schema() expected_schema = expected_schema.set_data_type( 'level1-date', DateDataType(resolution='D', format_string='%Y-%m-%d')) expected_schema = expected_schema.set_data_type( 'level1-fork/level2-date', DateDataType(resolution='D', format_string='%Y-%m-%d')) schema = tr.get_schema() schema = schema.set_data_type( 'level1-date', DateDataType(resolution='D', format_string='%Y-%m-%d')) schema = schema.set_data_type( 'level1-fork/level2-date', DateDataType(resolution='D', format_string='%Y-%m-%d')) tr = TreeDataSet._get_tree_row(input_row=tr, schema=schema, method='numpy') self.assertTrue(isinstance(tr, TreeRow)) self.assertEqual(expected_schema, tr.schema) self._assert_equal_dictionaries(data, tr.row)
def test_apply_schema(self): # Case 1 input_data_1 = { "l1-f": "120.9", "l1-s": 34, "l1-d": "2018-01-04", "f": { "l2-f": "-120.9", "l2-s": 'YES', "l2-a": ["2018-01-04"] } } output_data_1_exp = { "l1-f": 120.9, "l1-s": "34.0", "l1-d": np.datetime64("2018-01-04"), "f": { "l2-f": -120.9, "l2-s": 'YES', "l2-a": [np.datetime64("2018-01-04")], 'l2-missing': 'nan' } } fork_1 = ForkNode('base', [ ChildNode('l1-f', FloatDataType()), ChildNode('l1-s', StringDataType()), ChildNode('l1-d', DateDataType(resolution='D', format_string="%Y-%m-%d")), ForkNode('f', [ ChildNode('l2-f', FloatDataType()), ChildNode('l2-s', StringDataType()), ChildNode( 'l2-a', ArrayDataType( DateDataType(resolution='D', format_string="%Y-%m-%d"))), ChildNode('l2-missing', StringDataType()) ]) ]) tr_1 = TreeRow(input_data_1) schema_1 = TreeSchema(base_fork_node=fork_1) assert tr_1.row is None tr_1 = tr_1.build_row(input_data_1, 'numpy') self.assertNotEqual(tr_1.row, output_data_1_exp) self.assertNotEqual(tr_1.get_schema(), schema_1) tr_1 = tr_1.set_schema(schema_1) tr_1 = tr_1.apply_schema('numpy') self.assertEqual(tr_1.row, output_data_1_exp) # Case 2 input_data_2 = {'f': {'float': 20}} fork_2 = ForkNode('base', [ChildNode('f', FloatDataType())]) tr_2 = TreeRow(input_data_2) schema_2 = TreeSchema(base_fork_node=fork_2) assert tr_2.row is None tr_2 = tr_2.build_row(input_data_2, 'numpy') self.assertNotEqual(tr_2.get_schema(), schema_2) tr_2 = tr_2.set_schema(schema_2) with self.assertRaises(RuntimeError): tr_2.apply_schema('numpy') # Case 3 input_data_3 = {'f': 20} fork_3 = ForkNode( 'base', [ForkNode('f', [ChildNode('float', FloatDataType())])]) tr_3 = TreeRow(input_data_3) schema_3 = TreeSchema(base_fork_node=fork_3) assert tr_3.row is None tr_3 = tr_3.build_row(input_data_3, 'numpy') self.assertNotEqual(tr_3.get_schema(), schema_3) tr_3 = tr_3.set_schema(schema_3) with self.assertRaises(RuntimeError): tr_3.apply_schema('numpy')