def test_given_name_and_phone_number_then_we_anonymize_correctly(): text = "hello world, my name is Jane Doe. My number is: 03-4453334" anonymizer_config = { "DEFAULT": OperatorConfig("mask", { "masking_char": "*", "chars_to_mask": 20, "from_end": False }), "PHONE_NUMBER": OperatorConfig("mask", { "masking_char": "*", "chars_to_mask": 6, "from_end": True }) } analyzer_results = [ RecognizerResult(start=24, end=32, score=0.8, entity_type="NAME"), RecognizerResult(start=48, end=57, score=0.95, entity_type="PHONE_NUMBER") ] expected_result = ( '{"text": "hello world, my name is ********. My number is: ' '03-******4", "items": [{"start": 48, "end": 57, "entity_type": ' '"PHONE_NUMBER", "text": "03-******", "operator": "mask"}, ' '{"start": 24, "end": 32, "entity_type": "NAME", ' '"text": "********", "operator": "mask"}]}') run_engine_and_validate(text, anonymizer_config, analyzer_results, expected_result)
def test_given_anonymize_with_encrypt_then_text_returned_with_encrypted_content(): unencrypted_text = "My name is " expected_encrypted_text = "Chloë" text = unencrypted_text + expected_encrypted_text start_index = 11 end_index = 16 key = "WmZq4t7w!z%C&F)J" analyzer_results = [RecognizerResult("PERSON", start_index, end_index, 0.8)] anonymizers_config = {"PERSON": OperatorConfig("encrypt", {"key": key})} actual_anonymize_result = ( AnonymizerEngine().anonymize(text, analyzer_results, anonymizers_config) ) assert len(actual_anonymize_result.items) == 1 anonymized_entities = [ AnonymizerResult.from_operator_result(actual_anonymize_result.items[0]) ] engine = DeanonymizeEngine() decryption = engine.deanonymize( actual_anonymize_result.text, anonymized_entities, {"PERSON": OperatorConfig(Decrypt.NAME, {"key": key})} ) assert decryption.text == "My name is Chloë" assert len(decryption.items) == 1 assert decryption.items[0].text == "Chloë" assert decryption.items[0].end == 16 assert decryption.items[0].start == 11 assert decryption.items[0].entity_type == "PERSON"
def test_given_invalid_json_then_we_fail_to_parse_it_to_operator_config(): expected_error = "Invalid input, operator config must contain operator_name" with pytest.raises(InvalidParamException, match=expected_error): OperatorConfig.from_json({ "masking_char": "*", "chars_to_mask": 4, "from_end": True })
def analyze_and_anonymize(self, text) -> str: analyzer_results = self.analyzer.analyze(text=text, language='en') operators = {"DEFAULT": OperatorConfig("redact")} anonymizer_results = self.anonymizer.anonymize( text=text, analyzer_results=analyzer_results, operators=operators) return anonymizer_results.text
def test_given_name_and_phone_number_without_anonymizers_then_we_use_default(): text = "hello world, my name is Jane Doe. My number is: 03-4453334" anonymizer_config = { "ABC": OperatorConfig("mask", { "masking_char": "*", "chars_to_mask": 6, "from_end": True }) } analyzer_results = [ RecognizerResult(start=24, end=32, score=0.8, entity_type="NAME"), RecognizerResult(start=48, end=57, score=0.95, entity_type="PHONE_NUMBER") ] expected_result = ( '{"text": "hello world, my name is <NAME>. My number is: ' '<PHONE_NUMBER>4", "items": [{"start": 46, "end": 60, ' '"entity_type": "PHONE_NUMBER", "text": "<PHONE_NUMBER>", ' '"operator": "replace"}, {"start": 24, "end": 30, ' '"entity_type": "NAME", "text": "<NAME>", ' '"operator": "replace"}]}') run_engine_and_validate(text, anonymizer_config, analyzer_results, expected_result)
def __check_or_add_default_operator(operators: Dict[ str, OperatorConfig]) -> \ Dict[str, OperatorConfig]: default_operator = OperatorConfig(DEFAULT) if not operators: return {"DEFAULT": default_operator} if not operators.get("DEFAULT"): operators["DEFAULT"] = default_operator return operators
def test_given_operator_decrypt_then_we_fail(): text = "hello world, my name is Jane Doe. My number is: 03-4453334" anonymizers_config = {"DEFAULT": OperatorConfig("decrypt", {"key": "key"})} analyzer_results = [ RecognizerResult(start=24, end=32, score=0.8, entity_type="NAME"), ] engine = AnonymizerEngine() with pytest.raises( InvalidParamException, match="Invalid operator class 'decrypt'.", ): engine.anonymize(text, analyzer_results, anonymizers_config)
def operators_config_from_json(data: Dict) -> Dict[str, 'OperatorConfig']: """ Go over the operators list and get the relevant create operator config entity. :param data: contains the list of configuration value - OperatorConfig """ if data is not None: return { key: OperatorConfig.from_json(operator_json) for (key, operator_json) in data.items() } return {}
def test_given_redact_and_replace_then_we_anonymize_successfully(): text = "hello world, my name is Jane Doe. My number is: 03-4453334" anonymizer_config = { "NAME": OperatorConfig("redact", {"new_value": "ANONYMIZED"}), "PHONE_NUMBER": OperatorConfig("replace", {"new_value": ""}) } analyzer_results = [ RecognizerResult(start=24, end=32, score=0.8, entity_type="NAME"), RecognizerResult(start=48, end=57, score=0.95, entity_type="PHONE_NUMBER") ] expected_result = ( '{"text": "hello world, my name is . My number is: ' '<PHONE_NUMBER>4", "items": [{"start": 40, "end": 54, ' '"entity_type": "PHONE_NUMBER", "text": "<PHONE_NUMBER>", ' '"operator": "replace"}, {"start": 24, "end": 24, ' '"entity_type": "NAME", "text": "", "operator": ' '"redact"}]}') run_engine_and_validate(text, anonymizer_config, analyzer_results, expected_result)
def test_given_valid_json_then_we_parse_it_to_operator_config(): operator_config = OperatorConfig.from_json({ "type": "mask", "masking_char": "*", "chars_to_mask": 4, "from_end": True }) assert operator_config.operator_name == "mask" assert operator_config.params == { "masking_char": "*", "chars_to_mask": 4, "from_end": True }
def anonymizeName(text_to_anonymize): analyzer_results = analyzer.analyze(text=text_to_anonymize, entities=["PERSON"], language='en') anonymized_results = anonymizer.anonymize( text=text_to_anonymize, analyzer_results=analyzer_results, operators={ "PERSON": OperatorConfig("replace", {"new_value": generateToken(20)}) }) return anonymized_results.text
def anonymize_text(text: str) -> str: try: analyzer = broadcasted_analyzer.value anonymizer = broadcasted_anonymizer.value analyzer_results = analyzer.analyze(text=text, language="en") anonymized_results = anonymizer.anonymize( text=text, analyzer_results=analyzer_results, operators={ "DEFAULT": OperatorConfig("replace", {"new_value": "<ANONYMIZED>"}) }, ) return anonymized_results.text except Exception as e: print(f"An exception occurred. {e}")
def test_given_short_key_then_we_fail(): text = "My name is S184CMt9Drj7QaKQ21JTrpYzghnboTF9pn/neN8JME0=" encryption_results = [ AnonymizerResult( start=11, end=55, entity_type="PERSON" ), ] engine = DeanonymizeEngine() expected_result = "Invalid input, key must be of length 128, 192 or 256 bits" with pytest.raises(InvalidParamException, match=expected_result): engine.deanonymize( text, encryption_results, {"PERSON": OperatorConfig(Decrypt.NAME, {"key": "1234"})} )
def test_given_hash_then_we_anonymize_correctly(hash_type, result): text = "hello world, my name is Jane Doe. My number is: 034453334" params = {} if hash_type: params = {"hash_type": hash_type} anonymizer_config = {"DEFAULT": OperatorConfig("hash", params)} analyzer_results = [ RecognizerResult(start=48, end=57, score=0.95, entity_type="PHONE_NUMBER"), RecognizerResult(start=24, end=28, score=0.8, entity_type="FIRST_NAME"), RecognizerResult(start=29, end=32, score=0.6, entity_type="LAST_NAME"), RecognizerResult(start=24, end=32, score=0.8, entity_type="NAME") ] run_engine_and_validate(text, anonymizer_config, analyzer_results, result)
def test_given_operator_decrypt_with_valid_params_then_decrypt_text_successfully(): text = "My name is S184CMt9Drj7QaKQ21JTrpYzghnboTF9pn/neN8JME0=" encryption_results = [ AnonymizerResult( start=11, end=55, entity_type="PERSON" ), ] engine = DeanonymizeEngine() decryption = engine.deanonymize( text, encryption_results, {"DEFAULT": OperatorConfig(Decrypt.NAME, {"key": "WmZq4t7w!z%C&F)J"})} ) assert decryption.text == "My name is Chloë" assert len(decryption.items) == 1 assert decryption.items[0].text == "Chloë" assert decryption.items[0].end == 16 assert decryption.items[0].start == 11 assert decryption.items[0].entity_type == "PERSON"
def test_given_anonymize_with_encrypt_then_text_returned_with_encrypted_content( ): unencrypted_text = "My name is " expected_encrypted_text = "Chloë" text = unencrypted_text + expected_encrypted_text start_index = 11 end_index = 16 key = "WmZq4t7w!z%C&F)J" analyzer_results = [ RecognizerResult("PERSON", start_index, end_index, 0.8) ] anonymizers_config = {"PERSON": OperatorConfig("encrypt", {"key": key})} actual_anonymize_result = (AnonymizerEngine().anonymize( text, analyzer_results, anonymizers_config).text) assert actual_anonymize_result[:start_index] == unencrypted_text actual_encrypted_text = actual_anonymize_result[start_index:] assert actual_encrypted_text != expected_encrypted_text actual_decrypted_text = AESCipher.decrypt(key.encode(), actual_encrypted_text) assert actual_decrypted_text == expected_encrypted_text
def test_given_anonymize_called_with_error_scenarios_then_expected_errors_returned( ): text = "hello world, my name is Jane Doe. My number is: 03-4453334" anonymizers = { "PHONE_NUMBER": OperatorConfig("mask", { "masking_char": "non_character", "chars_to_mask": 6, "from_end": True }) } analyzer_results = [RecognizerResult("PHONE_NUMBER", 48, 57, 0.95)] engine = AnonymizerEngine() try: actual_anonymize_result = engine.anonymize(text, analyzer_results, anonymizers) except Exception as e: actual_anonymize_result = str(e) assert actual_anonymize_result == "Invalid input, masking_char must be a character"
assert result_a.end == same_result_in_content.get("end") @pytest.mark.parametrize( "anonymizer_json, result", [({ "anonymizers": {} }, {}), ({}, {}), ({ "anonymizers": { "PHONE": { "type": "replace" } } }, { "PHONE": OperatorConfig("replace") }), ({ "anonymizers": { "PHONE": { "type": "redact", "param": "param", "param_1": "param_1" } } }, { "PHONE": OperatorConfig("redact", { "param": "param", "param_1": "param_1" })
from presidio_analyzer import AnalyzerEngine, PatternRecognizer from presidio_anonymizer import AnonymizerEngine from presidio_anonymizer.entities.engine import OperatorConfig text_to_anonymize = "His name is Tom and his phone number is 212-555-5555" analyzer = AnalyzerEngine() anonymizer = AnonymizerEngine() analyzer_results = analyzer.analyze(text=text_to_anonymize, language='en') print("\nPII Detection:") print(analyzer_results) anonymized_results = anonymizer.anonymize( text=text_to_anonymize, analyzer_results=analyzer_results, operators={ "DEFAULT": OperatorConfig("replace", {"new_value": "<ANONYMIZED>"}) }) print("\nPII Anonymization:") print(anonymized_results.to_json())
"chars_to_mask": 4, "from_end": True }) def test_given_two_identical_entities_then_we_verify_they_are_equal(): one = OperatorConfig("name", {"key", "key"}) two = OperatorConfig("name", {"key", "key"}) assert one == two @pytest.mark.parametrize( # fmt: off "anonymizer_config", [ OperatorConfig("name1", {"key", "key"}), OperatorConfig("name1", {}), ], # fmt: on ) def test_given_two_different_entities_then_we_verify_they_are_equal( anonymizer_config): one = OperatorConfig("name", {"key", "key"}) assert one != anonymizer_config @pytest.mark.parametrize( # fmt: off "class_name", ["hash", "mask", "redact", "replace"], # fmt: on
def test_given_two_identical_entities_then_we_verify_they_are_equal(): one = OperatorConfig("name", {"key", "key"}) two = OperatorConfig("name", {"key", "key"}) assert one == two
def test_given_two_different_entities_then_we_verify_they_are_equal( anonymizer_config): one = OperatorConfig("name", {"key", "key"}) assert one != anonymizer_config
def test_given_json_then_anonymizer_config_is_created_properly(class_name): json = {"type": class_name, "param_1": "my_parameter"} operator_config = OperatorConfig.from_json(json) assert operator_config.operator_name == class_name assert operator_config.params == {"param_1": "my_parameter"}