def test_custom_transform_with_one_phase(self): # Arrange test_schema = { ("field_1", ): FieldData(["string"], [], [], None), ("field_2", ): FieldData(["int"], [], [], None), ("field_3", ): FieldData(["int"], [], [custom_functions_for_tests.sum(5)], None), } input_record = { ("field_1", ): "hola", ("field_2", ): 56, ("field_3", ): 7 } expected_record = { ("field_1", ): "hola", ("field_2", ): 56, ("field_3", ): 12 } # Act res_record, res_schema = TransformApplier({}).apply( input_record, test_schema) # Assert self.assertDictEqual(res_record, expected_record) self.assertDictEqual(res_schema, test_schema) # Without Changes
def test_apply(self): # Arrange def test_selector(key, record, custom_variables): return "NestSchema1" test_flat_schemas = { "base_schema": { ("field_1", ): FieldData(["string"], ["another_field"], [], None), ("field_2", ): FieldData(["NestSchema1", "NestSchema2"], [], [], test_selector), }, "NestSchema1": { ("nested_field_1", ): FieldData(["int"], [], [], None) }, "NestedSchema2": { ("nested_field_2", ): FieldData(["int"], [], [], None) } } test_input_record = {("field_1", ): "hola"} # Act ne = NestedSchemaSelectorApplier(test_flat_schemas, {}) res_record, res_complete_flat_schema = ne.apply( test_input_record, test_flat_schemas["base_schema"]) # Assert self.assertDictEqual(res_record, test_input_record) # Without changes self.assertListEqual(list(res_complete_flat_schema.keys()), [("field_1", ), ("field_2", "nested_field_1")])
def test_transform_with_nested_schemas(self): # Arrange test_schema = { ("field_1", ): FieldData(["string"], [], [], None), ("field_2", ): FieldData(["int"], [], [None, copyFrom("field_3")], None), ("field_3", ): FieldData(["int"], [], [custom_functions_for_tests.sum(1), copyFrom("field_2")], None), ("field_4", ): FieldData(["ExampleNestedSchema"], [], [], None), ("field_4", "nested_field_1"): FieldData(["int"], [], [], None), ("field_4", "nested_field_2"): FieldData(["int"], [], [None, copyFrom("field_3")], None), ("field_4", "nested_field_3"): FieldData(["int"], [], [ None, None, copyFrom("field_3"), custom_functions_for_tests.sum(1) ], None), ("field_4", "nested_field_4"): FieldData(["int"], [], [toNull()], None), ("field_4", "nested_field_5"): FieldData(["int"], [], [get_from_custom_variable("example_variable_2")], None) } input_record = { ("field_1", ): "hola", ("field_2", ): 56, ("field_3", ): 7 } expected_record = { ("field_1", ): "hola", ("field_2", ): 8, ("field_3", ): 56, ("field_4", "nested_field_2"): 8, ("field_4", "nested_field_3"): 57, ("field_4", "nested_field_4"): None, ("field_4", "nested_field_5"): 9 } # Act res_record, res_schema = TransformApplier({ "example_variable_1": 5, "example_variable_2": 9, "example_variable_3": 1 }).apply(input_record, test_schema) # Assert self.assertDictEqual(res_record, expected_record) self.assertDictEqual(res_schema, test_schema)
def select_nested_schema_and_add_their_fields( self, record: dict, flat_schema: dict, field_key: str, field_data: FieldData) -> dict: """Select a nested schema and add their fields to the resulting flat schema. It uses the defined function in "selector" value of the FieldData. :param record: The input record. It will be used as argument :type record: dict :param flat_schema: The input flat schema. :type flat_schema: dict :param field_key: The key to "analyze". :type field_key: str :param field_data: The FieldData object of the previous key. :type field_data: tuple :raises RuntimeError: This function will raise if the selected nested schema name is not valid. :return: The modified flat Schema. :rtype: dict """ # Identify the nested schema name. nested_schema_name = field_data.selector(field_key, record, self.custom_variables) \ if (field_data.selector is not None) \ and (field_data.selector(field_key, record, self.custom_variables) is not None) \ else None # If there is no a nested schema, just skip. if nested_schema_name is None: return flat_schema # If there is a schema selected, add their fields to the complete schema. elif nested_schema_name in self.flat_schemas: nested_schema = self.flat_schemas[nested_schema_name] fields_to_add = dict([ (field_key + nested_key, nested_field_data) for nested_key, nested_field_data in nested_schema.items() ]) return {**flat_schema, **fields_to_add} else: raise RuntimeError( f"Invalid nested schema name: {nested_schema_name}")
def test_apply(self): # Arrange test_flat_schema = { ("field_1", ): FieldData(["string"], [], [], []), ("field_renamed", ): FieldData(["string"], [], [], []) } input_record = {("field_1", ): "hola", ("field_3", ): "adios"} expected_record = {("field_1", ): "hola"} # Act res_record, res_schema = CleanApplier({}).apply( input_record, test_flat_schema) # Assert self.assertEquals(res_record, expected_record) self.assertEquals(res_schema, test_flat_schema) # Without changes
def test_basic_renaming(self): # Arrange test_flat_schema = { ("field_1", ): FieldData(["string"], [], [], []), ("field_renamed", ): FieldData(["string"], ["field_2"], [], []) } input_record = {("field_1", ): "hola", ("field_2", ): 56} expected_record = { ("field_1", ): "hola", ("field_2", ): 56, ("field_renamed", ): 56 } # Act res_record, res_schema = RenameApplier({}).apply( input_record, test_flat_schema) # Assert self.assertDictEqual(res_record, expected_record) self.assertDictEqual(res_schema, test_flat_schema) # Without changes
def test_transform_with_one_phase(self): # Arrange test_schema = { ("field_1", ): FieldData(["string"], [], [], None), ("field_2", ): FieldData(["int"], [], [], None), ("field_3", ): FieldData(["int"], [], [copyFrom("field_2")], None), } input_record = {("field_1", ): "hola", ("field_2", ): 56} expected_record = { ("field_1", ): "hola", ("field_2", ): 56, ("field_3", ): 56 } # Act res_record, res_schema = TransformApplier({}).apply( input_record, test_schema) # Assert self.assertDictEqual(res_record, expected_record) self.assertDictEqual(res_schema, test_schema) # Without changes
def test_record_transform_function(self): test_schema = { ("field_1", ): FieldData(["string"], [], [], None), ("field_2", ): FieldData(["int"], [], [ None, copyFrom("field_3"), custom_functions_for_tests.collapse_values( "field_2", "collapse_dict") ], None), ("field_3", ): FieldData(["int"], [], [], None), ("field_4", ): FieldData(["int"], [], [], None), ("field_5", ): FieldData(["int"], [], [ custom_functions_for_tests.collapse_values( "field_5", "collapse_dict") ], None), ("field_6", ): FieldData(["int"], [], [], None) } input_record = { ("field_1", ): "hola", ("field_2", ): 56, ("field_3", ): 7, ("field_4", ): 78, ("field_6", ): 12 } expected_record = { ("field_1", ): "hola", ("field_2", ): None, ("field_3", ): 7, ("field_4", ): 78, ("field_5", ): None, ("field_6", ): 12, ("collapse_dict", ): { "field_2": 7, "field_5": None } } # Act res_record, res_schema = TransformApplier({}).apply( input_record, test_schema) print("res_record:", res_record) # Assert self.assertDictEqual(res_record, expected_record) self.assertDictEqual(res_schema, test_schema)
def test_several_transform_phases(self): test_schema = { ("field_1", ): FieldData(["string"], [], [], None), ("field_2", ): FieldData(["int"], [], [None, copyFrom("field_3")], None), ("field_3", ): FieldData(["int"], [], [custom_functions_for_tests.sum(1), copyFrom("field_2")], None), ("field_4", ): FieldData(["int"], [], [None, None, toNull()], None), ("field_5", ): FieldData(["int"], [], [None, None, copyFrom("field_4")], None), ("field_6", ): FieldData(["int"], [], [custom_functions_for_tests.sum(4)], None) } input_record = { ("field_1", ): "hola", ("field_2", ): 56, ("field_3", ): 7, ("field_4", ): 78, ("field_6", ): 12 } expected_record = { ("field_1", ): "hola", ("field_2", ): 8, ("field_3", ): 56, ("field_4", ): None, ("field_5", ): 78, ("field_6", ): 16 } # Act res_record, res_schema = TransformApplier({}).apply( input_record, test_schema) # Assert self.assertDictEqual(res_record, expected_record) self.assertDictEqual(res_schema, test_schema)