def test__make_anonymization_mappings(self, mock_table): """Test that ``_make_anonymization_mappings`` creates the expected mappings. The ``_make_anonymization_mappings`` method should map values in the original data to fake values for non-id fields that are labeled pii. Setup: - Create a Table that has metadata about three fields (one pii field, one id field, and one non-pii field). Input: - Data that contains a pii field, an id field, and a non-pii field. Side Effects: - Expect ``_get_fake_values`` to be called with the number of unique values of the pii field. - Expect the resulting `_ANONYMIZATION_MAPPINGS` field to contain the pii field, with the correct number of mappings and keys. """ # Setup metadata = Mock() metadata._ANONYMIZATION_MAPPINGS = {} foo_metadata = { 'type': 'categorical', 'pii': True, 'pii_category': 'email', } metadata._fields_metadata = { 'foo': foo_metadata, 'bar': { 'type': 'categorical', }, 'baz': { 'type': 'id', } } foo_values = [ '*****@*****.**', '*****@*****.**', '*****@*****.**' ] data = pd.DataFrame({ 'foo': foo_values, 'bar': ['a', 'b', 'c'], 'baz': [1, 2, 3], }) # Run Table._make_anonymization_mappings(metadata, data) # Assert assert mock_table._get_fake_values.called_once_with(foo_metadata, 3) mappings = metadata._ANONYMIZATION_MAPPINGS[id(metadata)] assert len(mappings) == 1 foo_mappings = mappings['foo'] assert len(foo_mappings) == 3 assert list(foo_mappings.keys()) == foo_values
def test__make_anonymization_mappings_unique_faked_value_in_field( self, mock_table): """Test that ``_make_anonymization_mappings`` method creates mappings for anonymized values. The ``_make_anonymization_mappings`` method should map equal values in the original data to the same faked value. Input: - DataFrame with a field that should be anonymized based on the metadata description. Side Effect: - Mappings are created from the original values to faked values. """ # Setup metadata = Mock() metadata._ANONYMIZATION_MAPPINGS = {} foo_metadata = { 'type': 'categorical', 'pii': True, 'pii_category': 'email' } metadata._fields_metadata = {'foo': foo_metadata} data = pd.DataFrame({ 'foo': ['*****@*****.**', '*****@*****.**', '*****@*****.**'] }) # Run Table._make_anonymization_mappings(metadata, data) # Assert assert mock_table._get_fake_values.called_once_with(foo_metadata, 2) mappings = metadata._ANONYMIZATION_MAPPINGS[id(metadata)] assert len(mappings) == 1 foo_mappings = mappings['foo'] assert len(foo_mappings) == 2 assert list( foo_mappings.keys()) == ['*****@*****.**', '*****@*****.**']