def test_given_name_and_phone_number_without_anonymizers_then_we_use_default():
    text = "hello world, my name is Jane Doe. My number is: 03-4453334"
    anonymizer_config = {
        "ABC":
        OperatorConfig("mask", {
            "masking_char": "*",
            "chars_to_mask": 6,
            "from_end": True
        })
    }
    analyzer_results = [
        RecognizerResult(start=24, end=32, score=0.8, entity_type="NAME"),
        RecognizerResult(start=48,
                         end=57,
                         score=0.95,
                         entity_type="PHONE_NUMBER")
    ]
    expected_result = (
        '{"text": "hello world, my name is <NAME>. My number is: '
        '<PHONE_NUMBER>4", "items": [{"start": 46, "end": 60, '
        '"entity_type": "PHONE_NUMBER", "text": "<PHONE_NUMBER>", '
        '"operator": "replace"}, {"start": 24, "end": 30, '
        '"entity_type": "NAME", "text": "<NAME>", '
        '"operator": "replace"}]}')
    run_engine_and_validate(text, anonymizer_config, analyzer_results,
                            expected_result)
def test_given_name_and_phone_number_then_we_anonymize_correctly():
    text = "hello world, my name is Jane Doe. My number is: 03-4453334"
    anonymizer_config = {
        "DEFAULT":
        OperatorConfig("mask", {
            "masking_char": "*",
            "chars_to_mask": 20,
            "from_end": False
        }),
        "PHONE_NUMBER":
        OperatorConfig("mask", {
            "masking_char": "*",
            "chars_to_mask": 6,
            "from_end": True
        })
    }
    analyzer_results = [
        RecognizerResult(start=24, end=32, score=0.8, entity_type="NAME"),
        RecognizerResult(start=48,
                         end=57,
                         score=0.95,
                         entity_type="PHONE_NUMBER")
    ]
    expected_result = (
        '{"text": "hello world, my name is ********. My number is: '
        '03-******4", "items": [{"start": 48, "end": 57, "entity_type": '
        '"PHONE_NUMBER", "text": "03-******", "operator": "mask"}, '
        '{"start": 24, "end": 32, "entity_type": "NAME", '
        '"text": "********", "operator": "mask"}]}')
    run_engine_and_validate(text, anonymizer_config, analyzer_results,
                            expected_result)
Exemple #3
0
def test_given_several_results_then_we_filter_them_and_get_correct_mocked_result(
):
    analyzer_results = [
        RecognizerResult(start=48, end=57, score=0.55, entity_type="SSN"),
        RecognizerResult(start=24, end=32, score=0.6, entity_type="FULL_NAME"),
        RecognizerResult(start=24, end=28, score=0.9,
                         entity_type="FIRST_NAME"),
        RecognizerResult(start=29, end=32, score=0.6, entity_type="LAST_NAME"),
        RecognizerResult(start=24, end=30, score=0.8, entity_type="NAME"),
        RecognizerResult(start=18, end=32, score=0.8, entity_type="BLA"),
        RecognizerResult(start=23, end=35, score=0.8, entity_type="BLA"),
        RecognizerResult(start=28, end=36, score=0.8, entity_type="BLA"),
        RecognizerResult(start=48,
                         end=57,
                         score=0.95,
                         entity_type="PHONE_NUMBER")
    ]

    operator_config = OperatorConfig("replace", {})
    operator_config.operator_name = ""
    engine = AnonymizerEngine()
    engine._operate = _operate
    result = engine.anonymize(
        "hello world, my name is Jane Doe. My number is: 034453334",
        analyzer_results, {"DEFAULT": operator_config})

    assert result.text == "Number: I am your new text!"
    assert len(result.items) == 1
    assert result.items[0].operator == "hash"
    assert result.items[0].entity_type == "type"
    assert result.items[0].start == 0
    assert result.items[0].end == 35
    assert result.items[0].text == "text"
def test_given_hash_then_we_anonymize_correctly(hash_type, result):
    text = "hello world, my name is Jane Doe. My number is: 034453334"
    params = {}
    if hash_type:
        params = {"hash_type": hash_type}
    anonymizer_config = {"DEFAULT": OperatorConfig("hash", params)}
    analyzer_results = [
        RecognizerResult(start=48,
                         end=57,
                         score=0.95,
                         entity_type="PHONE_NUMBER"),
        RecognizerResult(start=24, end=28, score=0.8,
                         entity_type="FIRST_NAME"),
        RecognizerResult(start=29, end=32, score=0.6, entity_type="LAST_NAME"),
        RecognizerResult(start=24, end=32, score=0.8, entity_type="NAME")
    ]
    run_engine_and_validate(text, anonymizer_config, analyzer_results, result)
Exemple #5
0
def test_given_analyzer_result_with_an_incorrect_text_positions_then_we_fail(
        original_text, start, end):
    engine = AnonymizerEngine()
    analyzer_result = RecognizerResult("type", start, end, 0.5)
    err_msg = (f"Invalid analyzer result, start: {start} and end: "
               f"{end}, while text length is only 11.")
    with pytest.raises(InvalidParamException, match=err_msg):
        engine.anonymize(original_text, [analyzer_result], {})
Exemple #6
0
def test_given_default_anonymizer_then_we_use_it():
    engine = AnonymizerEngine()
    text = "please REPLACE ME."
    analyzer_result = RecognizerResult("SSN", 7, 17, 0.8)
    anonymizer_config = OperatorConfig("replace",
                                       {"new_value": "and thank you"})
    result = engine.anonymize(text, [analyzer_result], {
        "DEFAULT": anonymizer_config
    }).text
    assert result == "please and thank you."
Exemple #7
0
def _operate(text: str, text_metadata: List[PIIEntity],
             operators: Dict[str, OperatorConfig],
             operator: OperatorType) -> EngineResult:
    assert text == "hello world, my name is Jane Doe. My number is: 034453334"
    assert len(text_metadata) == 4
    expected = [
        RecognizerResult(start=48,
                         end=57,
                         entity_type="PHONE_NUMBER",
                         score=0.95),
        RecognizerResult(start=18, end=32, entity_type="BLA", score=0.8),
        RecognizerResult(start=23, end=35, entity_type="BLA", score=0.8),
        RecognizerResult(start=28, end=36, entity_type="BLA", score=0.8)
    ]
    assert all(elem in text_metadata for elem in expected)
    assert len(operators) == 1
    assert operators["DEFAULT"]
    assert operator == OperatorType.Anonymize
    return EngineResult("Number: I am your new text!",
                        [OperatorResult("text", "hash", 0, 35, "type")])
def test_given_operator_decrypt_then_we_fail():
    text = "hello world, my name is Jane Doe. My number is: 03-4453334"
    anonymizers_config = {"DEFAULT": OperatorConfig("decrypt", {"key": "key"})}
    analyzer_results = [
        RecognizerResult(start=24, end=32, score=0.8, entity_type="NAME"),
    ]
    engine = AnonymizerEngine()
    with pytest.raises(
            InvalidParamException,
            match="Invalid operator class 'decrypt'.",
    ):
        engine.anonymize(text, analyzer_results, anonymizers_config)
def test_given_intersacting_entities_then_we_anonymize_correctly():
    text = "hello world, my name is Jane Doe. My number is: 03-4453334"
    anonymizer_config = {}
    analyzer_results = [
        RecognizerResult(start=24, end=32, score=0.6, entity_type="FULL_NAME"),
        RecognizerResult(start=48,
                         end=56,
                         score=0.95,
                         entity_type="PHONE_NUMBER"),
        RecognizerResult(start=54, end=57, score=0.8, entity_type="SSN"),
        RecognizerResult(start=24, end=28, score=0.9,
                         entity_type="FIRST_NAME"),
        RecognizerResult(start=29, end=33, score=0.6, entity_type="LAST_NAME"),
        RecognizerResult(start=24, end=30, score=0.8, entity_type="NAME")
    ]
    expected_result = (
        '{"text": "hello world, my name is <FULL_NAME><LAST_NAME> My '
        'number is: <PHONE_NUMBER><SSN>4", "items": [{"start": 75, '
        '"end": 80, "entity_type": "SSN", "text": "<SSN>", '
        '"operator": "replace"}, {"start": 61, "end": 75, '
        '"entity_type": "PHONE_NUMBER", "text": "<PHONE_NUMBER>", '
        '"operator": "replace"}, {"start": 35, "end": 46, '
        '"entity_type": "LAST_NAME", "text": "<LAST_NAME>", '
        '"operator": "replace"}, {"start": 24, "end": 35, '
        '"entity_type": "FULL_NAME", "text": "<FULL_NAME>", '
        '"operator": "replace"}]}')
    run_engine_and_validate(text, anonymizer_config, analyzer_results,
                            expected_result)
def test_given_redact_and_replace_then_we_anonymize_successfully():
    text = "hello world, my name is Jane Doe. My number is: 03-4453334"
    anonymizer_config = {
        "NAME": OperatorConfig("redact", {"new_value": "ANONYMIZED"}),
        "PHONE_NUMBER": OperatorConfig("replace", {"new_value": ""})
    }
    analyzer_results = [
        RecognizerResult(start=24, end=32, score=0.8, entity_type="NAME"),
        RecognizerResult(start=48,
                         end=57,
                         score=0.95,
                         entity_type="PHONE_NUMBER")
    ]
    expected_result = (
        '{"text": "hello world, my name is . My number is: '
        '<PHONE_NUMBER>4", "items": [{"start": 40, "end": 54, '
        '"entity_type": "PHONE_NUMBER", "text": "<PHONE_NUMBER>", '
        '"operator": "replace"}, {"start": 24, "end": 24, '
        '"entity_type": "NAME", "text": "", "operator": '
        '"redact"}]}')
    run_engine_and_validate(text, anonymizer_config, analyzer_results,
                            expected_result)
Exemple #11
0
    def analyzer_results_from_json(
            data: List[Dict]) -> List['RecognizerResult']:
        """
        Go over analyzer results, validate them and convert to List[RecognizerResult].

        :param data: contains the anonymizers and analyzer_results_json
        """
        if data is None:
            raise InvalidParamException(
                "Invalid input, "
                "request must contain analyzer results")
        return [
            RecognizerResult.from_json(analyzer_result)
            for analyzer_result in data
        ]
def test_given_anonymize_called_with_error_scenarios_then_expected_errors_returned(
):
    text = "hello world, my name is Jane Doe. My number is: 03-4453334"
    anonymizers = {
        "PHONE_NUMBER":
        OperatorConfig("mask", {
            "masking_char": "non_character",
            "chars_to_mask": 6,
            "from_end": True
        })
    }
    analyzer_results = [RecognizerResult("PHONE_NUMBER", 48, 57, 0.95)]

    engine = AnonymizerEngine()

    try:
        actual_anonymize_result = engine.anonymize(text, analyzer_results,
                                                   anonymizers)
    except Exception as e:
        actual_anonymize_result = str(e)

    assert actual_anonymize_result == "Invalid input, masking_char must be a character"
def test_given_anonymize_with_encrypt_then_text_returned_with_encrypted_content(
):
    unencrypted_text = "My name is "
    expected_encrypted_text = "Chloë"
    text = unencrypted_text + expected_encrypted_text
    start_index = 11
    end_index = 16
    key = "WmZq4t7w!z%C&F)J"
    analyzer_results = [
        RecognizerResult("PERSON", start_index, end_index, 0.8)
    ]
    anonymizers_config = {"PERSON": OperatorConfig("encrypt", {"key": key})}

    actual_anonymize_result = (AnonymizerEngine().anonymize(
        text, analyzer_results, anonymizers_config).text)

    assert actual_anonymize_result[:start_index] == unencrypted_text
    actual_encrypted_text = actual_anonymize_result[start_index:]
    assert actual_encrypted_text != expected_encrypted_text
    actual_decrypted_text = AESCipher.decrypt(key.encode(),
                                              actual_encrypted_text)
    assert actual_decrypted_text == expected_encrypted_text
        anonymizer_json, result):
    anonymizers_config = AppEntitiesConvertor.operators_config_from_json(
        anonymizer_json.get("anonymizers"))
    assert anonymizers_config == result


@pytest.mark.parametrize(
    "analyzer_json, result",
    [
        ([], []),
        ([{
            "start": 24,
            "end": 32,
            "score": 0.8,
            "entity_type": "NAME"
        }], [RecognizerResult("NAME", 24, 32, 0.8)]),
    ],
)
def test_given_anonymize_called_with_multiple_scenarios_then_expected_results_returned(
        analyzer_json, result):
    analyzer_results = AppEntitiesConvertor.analyzer_results_from_json(
        analyzer_json)

    assert analyzer_results == result


def test_given_valid_json_then_we_convert_it_to_decrypt_entities_list():
    data = {
        "text": "THIS IS MY TEXT",
        "anonymizer_results": [{
            "start": 0,
Exemple #15
0
def test_given_none_as_anonymziers_list_then_we_fall_to_default():
    engine = AnonymizerEngine()
    text = "please REPLACE ME."
    analyzer_result = RecognizerResult("SSN", 7, 17, 0.8)
    result = engine.anonymize(text, [analyzer_result]).text
    assert result == "please <SSN>."
Exemple #16
0
def test_given_empty_text_to_engine_then_we_fail():
    engine = AnonymizerEngine()
    analyzer_result = RecognizerResult("SSN", 0, 1, 0.5)
    with pytest.raises(InvalidParamException,
                       match="Invalid input, text can not be empty"):
        engine.anonymize("", [analyzer_result], {})
Exemple #17
0
def test_given_invalid_json_for_anonymizers_then_we_fail(
        anonymizers, result_text):
    with pytest.raises(InvalidParamException, match=result_text):
        AnonymizerEngine().anonymize("this is my text",
                                     [RecognizerResult("number", 0, 4, 0)],
                                     anonymizers)