def test_given_name_and_phone_number_then_we_anonymize_correctly():
    text = "hello world, my name is Jane Doe. My number is: 03-4453334"
    anonymizer_config = {
        "DEFAULT":
        OperatorConfig("mask", {
            "masking_char": "*",
            "chars_to_mask": 20,
            "from_end": False
        }),
        "PHONE_NUMBER":
        OperatorConfig("mask", {
            "masking_char": "*",
            "chars_to_mask": 6,
            "from_end": True
        })
    }
    analyzer_results = [
        RecognizerResult(start=24, end=32, score=0.8, entity_type="NAME"),
        RecognizerResult(start=48,
                         end=57,
                         score=0.95,
                         entity_type="PHONE_NUMBER")
    ]
    expected_result = (
        '{"text": "hello world, my name is ********. My number is: '
        '03-******4", "items": [{"start": 48, "end": 57, "entity_type": '
        '"PHONE_NUMBER", "text": "03-******", "operator": "mask"}, '
        '{"start": 24, "end": 32, "entity_type": "NAME", '
        '"text": "********", "operator": "mask"}]}')
    run_engine_and_validate(text, anonymizer_config, analyzer_results,
                            expected_result)
def test_given_anonymize_with_encrypt_then_text_returned_with_encrypted_content():
    unencrypted_text = "My name is "
    expected_encrypted_text = "Chloë"
    text = unencrypted_text + expected_encrypted_text
    start_index = 11
    end_index = 16
    key = "WmZq4t7w!z%C&F)J"
    analyzer_results = [RecognizerResult("PERSON", start_index, end_index, 0.8)]
    anonymizers_config = {"PERSON": OperatorConfig("encrypt", {"key": key})}

    actual_anonymize_result = (
        AnonymizerEngine().anonymize(text, analyzer_results, anonymizers_config)
    )

    assert len(actual_anonymize_result.items) == 1
    anonymized_entities = [
        AnonymizerResult.from_operator_result(actual_anonymize_result.items[0])
    ]
    engine = DeanonymizeEngine()
    decryption = engine.deanonymize(
        actual_anonymize_result.text, anonymized_entities,
        {"PERSON": OperatorConfig(Decrypt.NAME, {"key": key})}
    )
    assert decryption.text == "My name is Chloë"
    assert len(decryption.items) == 1
    assert decryption.items[0].text == "Chloë"
    assert decryption.items[0].end == 16
    assert decryption.items[0].start == 11
    assert decryption.items[0].entity_type == "PERSON"
Esempio n. 3
0
def test_given_invalid_json_then_we_fail_to_parse_it_to_operator_config():
    expected_error = "Invalid input, operator config must contain operator_name"
    with pytest.raises(InvalidParamException, match=expected_error):
        OperatorConfig.from_json({
            "masking_char": "*",
            "chars_to_mask": 4,
            "from_end": True
        })
    def analyze_and_anonymize(self, text) -> str:
        analyzer_results = self.analyzer.analyze(text=text, language='en')
        operators = {"DEFAULT": OperatorConfig("redact")}
        anonymizer_results = self.anonymizer.anonymize(
            text=text, analyzer_results=analyzer_results, operators=operators)

        return anonymizer_results.text
def test_given_name_and_phone_number_without_anonymizers_then_we_use_default():
    text = "hello world, my name is Jane Doe. My number is: 03-4453334"
    anonymizer_config = {
        "ABC":
        OperatorConfig("mask", {
            "masking_char": "*",
            "chars_to_mask": 6,
            "from_end": True
        })
    }
    analyzer_results = [
        RecognizerResult(start=24, end=32, score=0.8, entity_type="NAME"),
        RecognizerResult(start=48,
                         end=57,
                         score=0.95,
                         entity_type="PHONE_NUMBER")
    ]
    expected_result = (
        '{"text": "hello world, my name is <NAME>. My number is: '
        '<PHONE_NUMBER>4", "items": [{"start": 46, "end": 60, '
        '"entity_type": "PHONE_NUMBER", "text": "<PHONE_NUMBER>", '
        '"operator": "replace"}, {"start": 24, "end": 30, '
        '"entity_type": "NAME", "text": "<NAME>", '
        '"operator": "replace"}]}')
    run_engine_and_validate(text, anonymizer_config, analyzer_results,
                            expected_result)
Esempio n. 6
0
 def __check_or_add_default_operator(operators: Dict[
     str, OperatorConfig]) -> \
         Dict[str, OperatorConfig]:
     default_operator = OperatorConfig(DEFAULT)
     if not operators:
         return {"DEFAULT": default_operator}
     if not operators.get("DEFAULT"):
         operators["DEFAULT"] = default_operator
     return operators
def test_given_operator_decrypt_then_we_fail():
    text = "hello world, my name is Jane Doe. My number is: 03-4453334"
    anonymizers_config = {"DEFAULT": OperatorConfig("decrypt", {"key": "key"})}
    analyzer_results = [
        RecognizerResult(start=24, end=32, score=0.8, entity_type="NAME"),
    ]
    engine = AnonymizerEngine()
    with pytest.raises(
            InvalidParamException,
            match="Invalid operator class 'decrypt'.",
    ):
        engine.anonymize(text, analyzer_results, anonymizers_config)
Esempio n. 8
0
    def operators_config_from_json(data: Dict) -> Dict[str, 'OperatorConfig']:
        """
        Go over the operators list and get the relevant create operator config entity.

        :param data: contains the list of configuration
        value - OperatorConfig
        """
        if data is not None:
            return {
                key: OperatorConfig.from_json(operator_json)
                for (key, operator_json) in data.items()
            }
        return {}
def test_given_redact_and_replace_then_we_anonymize_successfully():
    text = "hello world, my name is Jane Doe. My number is: 03-4453334"
    anonymizer_config = {
        "NAME": OperatorConfig("redact", {"new_value": "ANONYMIZED"}),
        "PHONE_NUMBER": OperatorConfig("replace", {"new_value": ""})
    }
    analyzer_results = [
        RecognizerResult(start=24, end=32, score=0.8, entity_type="NAME"),
        RecognizerResult(start=48,
                         end=57,
                         score=0.95,
                         entity_type="PHONE_NUMBER")
    ]
    expected_result = (
        '{"text": "hello world, my name is . My number is: '
        '<PHONE_NUMBER>4", "items": [{"start": 40, "end": 54, '
        '"entity_type": "PHONE_NUMBER", "text": "<PHONE_NUMBER>", '
        '"operator": "replace"}, {"start": 24, "end": 24, '
        '"entity_type": "NAME", "text": "", "operator": '
        '"redact"}]}')
    run_engine_and_validate(text, anonymizer_config, analyzer_results,
                            expected_result)
Esempio n. 10
0
def test_given_valid_json_then_we_parse_it_to_operator_config():
    operator_config = OperatorConfig.from_json({
        "type": "mask",
        "masking_char": "*",
        "chars_to_mask": 4,
        "from_end": True
    })
    assert operator_config.operator_name == "mask"
    assert operator_config.params == {
        "masking_char": "*",
        "chars_to_mask": 4,
        "from_end": True
    }
Esempio n. 11
0
def anonymizeName(text_to_anonymize):
    analyzer_results = analyzer.analyze(text=text_to_anonymize,
                                        entities=["PERSON"],
                                        language='en')

    anonymized_results = anonymizer.anonymize(
        text=text_to_anonymize,
        analyzer_results=analyzer_results,
        operators={
            "PERSON": OperatorConfig("replace",
                                     {"new_value": generateToken(20)})
        })

    return anonymized_results.text
def anonymize_text(text: str) -> str:
    try:
        analyzer = broadcasted_analyzer.value
        anonymizer = broadcasted_anonymizer.value
        analyzer_results = analyzer.analyze(text=text, language="en")
        anonymized_results = anonymizer.anonymize(
            text=text,
            analyzer_results=analyzer_results,
            operators={
                "DEFAULT": OperatorConfig("replace",
                                          {"new_value": "<ANONYMIZED>"})
            },
        )
        return anonymized_results.text
    except Exception as e:
        print(f"An exception occurred. {e}")
def test_given_short_key_then_we_fail():
    text = "My name is S184CMt9Drj7QaKQ21JTrpYzghnboTF9pn/neN8JME0="
    encryption_results = [
        AnonymizerResult(
            start=11,
            end=55,
            entity_type="PERSON"
        ),
    ]
    engine = DeanonymizeEngine()
    expected_result = "Invalid input, key must be of length 128, 192 or 256 bits"
    with pytest.raises(InvalidParamException,
                       match=expected_result):
        engine.deanonymize(
            text, encryption_results,
            {"PERSON": OperatorConfig(Decrypt.NAME, {"key": "1234"})}
        )
Esempio n. 14
0
def test_given_hash_then_we_anonymize_correctly(hash_type, result):
    text = "hello world, my name is Jane Doe. My number is: 034453334"
    params = {}
    if hash_type:
        params = {"hash_type": hash_type}
    anonymizer_config = {"DEFAULT": OperatorConfig("hash", params)}
    analyzer_results = [
        RecognizerResult(start=48,
                         end=57,
                         score=0.95,
                         entity_type="PHONE_NUMBER"),
        RecognizerResult(start=24, end=28, score=0.8,
                         entity_type="FIRST_NAME"),
        RecognizerResult(start=29, end=32, score=0.6, entity_type="LAST_NAME"),
        RecognizerResult(start=24, end=32, score=0.8, entity_type="NAME")
    ]
    run_engine_and_validate(text, anonymizer_config, analyzer_results, result)
def test_given_operator_decrypt_with_valid_params_then_decrypt_text_successfully():
    text = "My name is S184CMt9Drj7QaKQ21JTrpYzghnboTF9pn/neN8JME0="
    encryption_results = [
        AnonymizerResult(
            start=11,
            end=55,
            entity_type="PERSON"
        ),
    ]
    engine = DeanonymizeEngine()
    decryption = engine.deanonymize(
        text, encryption_results,
        {"DEFAULT": OperatorConfig(Decrypt.NAME, {"key": "WmZq4t7w!z%C&F)J"})}
    )
    assert decryption.text == "My name is Chloë"
    assert len(decryption.items) == 1
    assert decryption.items[0].text == "Chloë"
    assert decryption.items[0].end == 16
    assert decryption.items[0].start == 11
    assert decryption.items[0].entity_type == "PERSON"
Esempio n. 16
0
def test_given_anonymize_with_encrypt_then_text_returned_with_encrypted_content(
):
    unencrypted_text = "My name is "
    expected_encrypted_text = "Chloë"
    text = unencrypted_text + expected_encrypted_text
    start_index = 11
    end_index = 16
    key = "WmZq4t7w!z%C&F)J"
    analyzer_results = [
        RecognizerResult("PERSON", start_index, end_index, 0.8)
    ]
    anonymizers_config = {"PERSON": OperatorConfig("encrypt", {"key": key})}

    actual_anonymize_result = (AnonymizerEngine().anonymize(
        text, analyzer_results, anonymizers_config).text)

    assert actual_anonymize_result[:start_index] == unencrypted_text
    actual_encrypted_text = actual_anonymize_result[start_index:]
    assert actual_encrypted_text != expected_encrypted_text
    actual_decrypted_text = AESCipher.decrypt(key.encode(),
                                              actual_encrypted_text)
    assert actual_decrypted_text == expected_encrypted_text
Esempio n. 17
0
def test_given_anonymize_called_with_error_scenarios_then_expected_errors_returned(
):
    text = "hello world, my name is Jane Doe. My number is: 03-4453334"
    anonymizers = {
        "PHONE_NUMBER":
        OperatorConfig("mask", {
            "masking_char": "non_character",
            "chars_to_mask": 6,
            "from_end": True
        })
    }
    analyzer_results = [RecognizerResult("PHONE_NUMBER", 48, 57, 0.95)]

    engine = AnonymizerEngine()

    try:
        actual_anonymize_result = engine.anonymize(text, analyzer_results,
                                                   anonymizers)
    except Exception as e:
        actual_anonymize_result = str(e)

    assert actual_anonymize_result == "Invalid input, masking_char must be a character"
        assert result_a.end == same_result_in_content.get("end")


@pytest.mark.parametrize(
    "anonymizer_json, result",
    [({
        "anonymizers": {}
    }, {}), ({}, {}),
     ({
         "anonymizers": {
             "PHONE": {
                 "type": "replace"
             }
         }
     }, {
         "PHONE": OperatorConfig("replace")
     }),
     ({
         "anonymizers": {
             "PHONE": {
                 "type": "redact",
                 "param": "param",
                 "param_1": "param_1"
             }
         }
     }, {
         "PHONE":
         OperatorConfig("redact", {
             "param": "param",
             "param_1": "param_1"
         })
Esempio n. 19
0
from presidio_analyzer import AnalyzerEngine, PatternRecognizer
from presidio_anonymizer import AnonymizerEngine
from presidio_anonymizer.entities.engine import OperatorConfig

text_to_anonymize = "His name is Tom and his phone number is 212-555-5555"

analyzer = AnalyzerEngine()
anonymizer = AnonymizerEngine()

analyzer_results = analyzer.analyze(text=text_to_anonymize, language='en')
print("\nPII Detection:")
print(analyzer_results)

anonymized_results = anonymizer.anonymize(
    text=text_to_anonymize,
    analyzer_results=analyzer_results,
    operators={
        "DEFAULT": OperatorConfig("replace", {"new_value": "<ANONYMIZED>"})
    })
print("\nPII Anonymization:")
print(anonymized_results.to_json())
Esempio n. 20
0
            "chars_to_mask": 4,
            "from_end": True
        })


def test_given_two_identical_entities_then_we_verify_they_are_equal():
    one = OperatorConfig("name", {"key", "key"})
    two = OperatorConfig("name", {"key", "key"})
    assert one == two


@pytest.mark.parametrize(
    # fmt: off
    "anonymizer_config",
    [
        OperatorConfig("name1", {"key", "key"}),
        OperatorConfig("name1", {}),
    ],
    # fmt: on
)
def test_given_two_different_entities_then_we_verify_they_are_equal(
        anonymizer_config):
    one = OperatorConfig("name", {"key", "key"})
    assert one != anonymizer_config


@pytest.mark.parametrize(
    # fmt: off
    "class_name",
    ["hash", "mask", "redact", "replace"],
    # fmt: on
Esempio n. 21
0
def test_given_two_identical_entities_then_we_verify_they_are_equal():
    one = OperatorConfig("name", {"key", "key"})
    two = OperatorConfig("name", {"key", "key"})
    assert one == two
Esempio n. 22
0
def test_given_two_different_entities_then_we_verify_they_are_equal(
        anonymizer_config):
    one = OperatorConfig("name", {"key", "key"})
    assert one != anonymizer_config
Esempio n. 23
0
def test_given_json_then_anonymizer_config_is_created_properly(class_name):
    json = {"type": class_name, "param_1": "my_parameter"}
    operator_config = OperatorConfig.from_json(json)
    assert operator_config.operator_name == class_name
    assert operator_config.params == {"param_1": "my_parameter"}