def test_custom_transform_with_one_phase(self):

        # Arrange
        test_schema = {
            ("field_1", ):
            FieldData(["string"], [], [], None),
            ("field_2", ):
            FieldData(["int"], [], [], None),
            ("field_3", ):
            FieldData(["int"], [], [custom_functions_for_tests.sum(5)], None),
        }

        input_record = {
            ("field_1", ): "hola",
            ("field_2", ): 56,
            ("field_3", ): 7
        }

        expected_record = {
            ("field_1", ): "hola",
            ("field_2", ): 56,
            ("field_3", ): 12
        }

        # Act
        res_record, res_schema = TransformApplier({}).apply(
            input_record, test_schema)

        # Assert
        self.assertDictEqual(res_record, expected_record)
        self.assertDictEqual(res_schema, test_schema)  # Without Changes
Beispiel #2
0
    def test_apply(self):

        # Arrange
        def test_selector(key, record, custom_variables):

            return "NestSchema1"

        test_flat_schemas = {
            "base_schema": {
                ("field_1", ):
                FieldData(["string"], ["another_field"], [], None),
                ("field_2", ):
                FieldData(["NestSchema1", "NestSchema2"], [], [],
                          test_selector),
            },
            "NestSchema1": {
                ("nested_field_1", ): FieldData(["int"], [], [], None)
            },
            "NestedSchema2": {
                ("nested_field_2", ): FieldData(["int"], [], [], None)
            }
        }

        test_input_record = {("field_1", ): "hola"}

        # Act
        ne = NestedSchemaSelectorApplier(test_flat_schemas, {})
        res_record, res_complete_flat_schema = ne.apply(
            test_input_record, test_flat_schemas["base_schema"])

        # Assert
        self.assertDictEqual(res_record, test_input_record)  # Without changes

        self.assertListEqual(list(res_complete_flat_schema.keys()),
                             [("field_1", ), ("field_2", "nested_field_1")])
    def test_transform_with_nested_schemas(self):
        # Arrange
        test_schema = {
            ("field_1", ):
            FieldData(["string"], [], [], None),
            ("field_2", ):
            FieldData(["int"], [], [None, copyFrom("field_3")], None),
            ("field_3", ):
            FieldData(["int"], [],
                      [custom_functions_for_tests.sum(1),
                       copyFrom("field_2")], None),
            ("field_4", ):
            FieldData(["ExampleNestedSchema"], [], [], None),
            ("field_4", "nested_field_1"):
            FieldData(["int"], [], [], None),
            ("field_4", "nested_field_2"):
            FieldData(["int"], [], [None, copyFrom("field_3")], None),
            ("field_4", "nested_field_3"):
            FieldData(["int"], [], [
                None, None,
                copyFrom("field_3"),
                custom_functions_for_tests.sum(1)
            ], None),
            ("field_4", "nested_field_4"):
            FieldData(["int"], [], [toNull()], None),
            ("field_4", "nested_field_5"):
            FieldData(["int"], [],
                      [get_from_custom_variable("example_variable_2")], None)
        }

        input_record = {
            ("field_1", ): "hola",
            ("field_2", ): 56,
            ("field_3", ): 7
        }

        expected_record = {
            ("field_1", ): "hola",
            ("field_2", ): 8,
            ("field_3", ): 56,
            ("field_4", "nested_field_2"): 8,
            ("field_4", "nested_field_3"): 57,
            ("field_4", "nested_field_4"): None,
            ("field_4", "nested_field_5"): 9
        }

        # Act
        res_record, res_schema = TransformApplier({
            "example_variable_1": 5,
            "example_variable_2": 9,
            "example_variable_3": 1
        }).apply(input_record, test_schema)

        # Assert
        self.assertDictEqual(res_record, expected_record)
        self.assertDictEqual(res_schema, test_schema)
    def select_nested_schema_and_add_their_fields(
            self, record: dict, flat_schema: dict, field_key: str,
            field_data: FieldData) -> dict:
        """Select a nested schema and add their fields to the resulting flat schema.

        It uses the defined function in "selector" value of the FieldData.

        :param record: The input record. It will be used as argument
        :type record: dict
        :param flat_schema: The input flat schema.
        :type flat_schema: dict
        :param field_key: The key to "analyze".
        :type field_key: str
        :param field_data: The FieldData object of the previous key.
        :type field_data: tuple
        :raises RuntimeError: This function will raise if the selected nested
            schema name is not valid.
        :return: The modified flat Schema.
        :rtype: dict
        """

        # Identify the nested schema name.
        nested_schema_name = field_data.selector(field_key, record, self.custom_variables) \
            if (field_data.selector is not None) \
               and (field_data.selector(field_key, record, self.custom_variables) is not None) \
            else None

        # If there is no a nested schema, just skip.
        if nested_schema_name is None:
            return flat_schema

        # If there is a schema selected, add their fields to the complete schema.
        elif nested_schema_name in self.flat_schemas:

            nested_schema = self.flat_schemas[nested_schema_name]

            fields_to_add = dict([
                (field_key + nested_key, nested_field_data)
                for nested_key, nested_field_data in nested_schema.items()
            ])

            return {**flat_schema, **fields_to_add}

        else:
            raise RuntimeError(
                f"Invalid nested schema name: {nested_schema_name}")
    def test_apply(self):

        # Arrange
        test_flat_schema = {
            ("field_1", ): FieldData(["string"], [], [], []),
            ("field_renamed", ): FieldData(["string"], [], [], [])
        }

        input_record = {("field_1", ): "hola", ("field_3", ): "adios"}

        expected_record = {("field_1", ): "hola"}

        # Act
        res_record, res_schema = CleanApplier({}).apply(
            input_record, test_flat_schema)

        # Assert
        self.assertEquals(res_record, expected_record)
        self.assertEquals(res_schema, test_flat_schema)  # Without changes
Beispiel #6
0
    def test_basic_renaming(self):

        # Arrange

        test_flat_schema = {
            ("field_1", ): FieldData(["string"], [], [], []),
            ("field_renamed", ): FieldData(["string"], ["field_2"], [], [])
        }

        input_record = {("field_1", ): "hola", ("field_2", ): 56}

        expected_record = {
            ("field_1", ): "hola",
            ("field_2", ): 56,
            ("field_renamed", ): 56
        }

        # Act
        res_record, res_schema = RenameApplier({}).apply(
            input_record, test_flat_schema)

        # Assert
        self.assertDictEqual(res_record, expected_record)
        self.assertDictEqual(res_schema, test_flat_schema)  # Without changes
    def test_transform_with_one_phase(self):

        # Arrange
        test_schema = {
            ("field_1", ): FieldData(["string"], [], [], None),
            ("field_2", ): FieldData(["int"], [], [], None),
            ("field_3", ): FieldData(["int"], [], [copyFrom("field_2")], None),
        }

        input_record = {("field_1", ): "hola", ("field_2", ): 56}

        expected_record = {
            ("field_1", ): "hola",
            ("field_2", ): 56,
            ("field_3", ): 56
        }

        # Act
        res_record, res_schema = TransformApplier({}).apply(
            input_record, test_schema)

        # Assert
        self.assertDictEqual(res_record, expected_record)
        self.assertDictEqual(res_schema, test_schema)  # Without changes
    def test_record_transform_function(self):

        test_schema = {
            ("field_1", ):
            FieldData(["string"], [], [], None),
            ("field_2", ):
            FieldData(["int"], [], [
                None,
                copyFrom("field_3"),
                custom_functions_for_tests.collapse_values(
                    "field_2", "collapse_dict")
            ], None),
            ("field_3", ):
            FieldData(["int"], [], [], None),
            ("field_4", ):
            FieldData(["int"], [], [], None),
            ("field_5", ):
            FieldData(["int"], [], [
                custom_functions_for_tests.collapse_values(
                    "field_5", "collapse_dict")
            ], None),
            ("field_6", ):
            FieldData(["int"], [], [], None)
        }

        input_record = {
            ("field_1", ): "hola",
            ("field_2", ): 56,
            ("field_3", ): 7,
            ("field_4", ): 78,
            ("field_6", ): 12
        }

        expected_record = {
            ("field_1", ): "hola",
            ("field_2", ): None,
            ("field_3", ): 7,
            ("field_4", ): 78,
            ("field_5", ): None,
            ("field_6", ): 12,
            ("collapse_dict", ): {
                "field_2": 7,
                "field_5": None
            }
        }

        # Act
        res_record, res_schema = TransformApplier({}).apply(
            input_record, test_schema)

        print("res_record:", res_record)

        # Assert
        self.assertDictEqual(res_record, expected_record)
        self.assertDictEqual(res_schema, test_schema)
    def test_several_transform_phases(self):

        test_schema = {
            ("field_1", ):
            FieldData(["string"], [], [], None),
            ("field_2", ):
            FieldData(["int"], [], [None, copyFrom("field_3")], None),
            ("field_3", ):
            FieldData(["int"], [],
                      [custom_functions_for_tests.sum(1),
                       copyFrom("field_2")], None),
            ("field_4", ):
            FieldData(["int"], [], [None, None, toNull()], None),
            ("field_5", ):
            FieldData(["int"], [],
                      [None, None, copyFrom("field_4")], None),
            ("field_6", ):
            FieldData(["int"], [], [custom_functions_for_tests.sum(4)], None)
        }

        input_record = {
            ("field_1", ): "hola",
            ("field_2", ): 56,
            ("field_3", ): 7,
            ("field_4", ): 78,
            ("field_6", ): 12
        }

        expected_record = {
            ("field_1", ): "hola",
            ("field_2", ): 8,
            ("field_3", ): 56,
            ("field_4", ): None,
            ("field_5", ): 78,
            ("field_6", ): 16
        }

        # Act
        res_record, res_schema = TransformApplier({}).apply(
            input_record, test_schema)

        # Assert
        self.assertDictEqual(res_record, expected_record)
        self.assertDictEqual(res_schema, test_schema)