Esempio n. 1
0
    def test_transform_dataset_shape_strips_whitespace(self):
        dataset_shape_transformer = dataset_shape.DatasetShapeTransformer(
            MEMBER_ORGANIZATION_ID,
            TEST_SCHEMA,
            TEST_COLUMN_MAPPING,
            row_format=True,
            multiple_val_delimiter=",",
        )
        dataset = pd.DataFrame(data={"field1": ["  1", "2   ", "   3   "]})

        actual_shaped_dataset = dataset_shape_transformer.transform_dataset_shape(
            dataset
        )

        expected_shaped_dataset = pd.DataFrame(
            data={
                "field1": ["1", "2", "3"],
                "MemberOrganization": ["sample_id", "sample_id", "sample_id"],
                "ForceOverWrite": ["1", "1", "1"],
            }
        )

        pd.util.testing.assert_frame_equal(
            expected_shaped_dataset, actual_shaped_dataset
        )
Esempio n. 2
0
    def test_transform_dataset_shape_multiple_values_blank_values(self):
        dataset_shape_transformer = dataset_shape.DatasetShapeTransformer(
            MEMBER_ORGANIZATION_ID,
            TEST_SCHEMA,
            TEST_COLUMN_MAPPING,
            row_format=True,
            multiple_val_delimiter=",",
        )
        dataset = pd.DataFrame(
            data={
                "internal_column_name1": ["1", "2"],
                "internal_column_name2": ["3", "4"],
                "internal_column_name3": ["", "4"],
            }
        )

        actual_shaped_dataset = dataset_shape_transformer.transform_dataset_shape(
            dataset
        )

        expected_shaped_dataset = pd.DataFrame(
            data={
                "field1": ["1", "2"],
                "field2": ["3", "4"],
                "field3": [[""], ["4"]],
                "MemberOrganization": ["sample_id", "sample_id"],
                "ForceOverWrite": ["1", "1"],
            }
        )

        pd.util.testing.assert_frame_equal(
            expected_shaped_dataset, actual_shaped_dataset
        )
Esempio n. 3
0
    def test_transform_dataset_shape_row_format_some_columns_present(self):
        dataset_shape_transformer = dataset_shape.DatasetShapeTransformer(
            MEMBER_ORGANIZATION_ID, TEST_SCHEMA, TEST_COLUMN_MAPPING, row_format=True
        )
        dataset = pd.DataFrame(
            data={"internal_column_name1": [1, 2], "internal_column_name2": [3, 4]}
        )

        actual_shaped_dataset = dataset_shape_transformer.transform_dataset_shape(
            dataset
        )

        expected_column_names = [
            "field1",
            "field2",
            "MemberOrganization",
            "ForceOverWrite",
        ]
        self.assertTrue(
            all(
                [
                    a == b
                    for a, b in zip(
                        actual_shaped_dataset.columns.values, expected_column_names
                    )
                ]
            )
        )
Esempio n. 4
0
    def test_transform_dataset_shape_col_format(self):
        dataset_shape_transformer = dataset_shape.DatasetShapeTransformer(
            MEMBER_ORGANIZATION_ID, TEST_SCHEMA_COL, {}, row_format=False
        )
        dataset = pd.DataFrame(
            data={
                "field1": ["field1_1", "field1_2"],
                "Intakefield2": ["field2_1", "field2_2"],
                "Intakefield3": ["field3_1", "field3_2"],
                "Exitfield3": ["field3_3", "field3_4"],
                "actual_field4": ["field4_1", "field4_1"],
            }
        )

        actual_shaped_dataset = dataset_shape_transformer.transform_dataset_shape(
            dataset
        )

        expected_shaped_dataset = pd.DataFrame(
            data={
                "MilestoneFlag": [
                    "Intake",
                    "Intake",
                    "Exit",
                    "Exit",
                    "NinetyDays",
                    "NinetyDays",
                ],
                "field1": [
                    "field1_1",
                    "field1_2",
                    "field1_1",
                    "field1_2",
                    "field1_1",
                    "field1_2",
                ],
                "field2": ["field2_1", "field2_2", "", "", "", ""],
                "field3": ["field3_1", "field3_2", "field3_3", "field3_4", "", ""],
                "field4": ["", "", "", "", "field4_1", "field4_1"],
                "MemberOrganization": [
                    "sample_id",
                    "sample_id",
                    "sample_id",
                    "sample_id",
                    "sample_id",
                    "sample_id",
                ],
                "ForceOverWrite": ["1", "1", "1", "1", "1", "1"],
            }
        )
        pd.util.testing.assert_frame_equal(
            expected_shaped_dataset, actual_shaped_dataset
        )
Esempio n. 5
0
    def test_transform_dataset_shape_datset_is_empty(self):
        dataset_shape_transformer = dataset_shape.DatasetShapeTransformer(
            MEMBER_ORGANIZATION_ID, TEST_SCHEMA, TEST_COLUMN_MAPPING, row_format=True
        )

        empty_dataset = pd.DataFrame(columns=[])

        actual_shaped_dataset = dataset_shape_transformer.transform_dataset_shape(
            empty_dataset
        )

        expected_shaped_dataset = pd.DataFrame(columns=[])
        pd.util.testing.assert_frame_equal(
            expected_shaped_dataset, actual_shaped_dataset
        )