def test_given_anonymize_with_encrypt_then_text_returned_with_encrypted_content():
    unencrypted_text = "My name is "
    expected_encrypted_text = "Chloë"
    text = unencrypted_text + expected_encrypted_text
    start_index = 11
    end_index = 16
    key = "WmZq4t7w!z%C&F)J"
    analyzer_results = [RecognizerResult("PERSON", start_index, end_index, 0.8)]
    anonymizers_config = {"PERSON": OperatorConfig("encrypt", {"key": key})}

    actual_anonymize_result = (
        AnonymizerEngine().anonymize(text, analyzer_results, anonymizers_config)
    )

    assert len(actual_anonymize_result.items) == 1
    anonymized_entities = [
        AnonymizerResult.from_operator_result(actual_anonymize_result.items[0])
    ]
    engine = DeanonymizeEngine()
    decryption = engine.deanonymize(
        actual_anonymize_result.text, anonymized_entities,
        {"PERSON": OperatorConfig(Decrypt.NAME, {"key": key})}
    )
    assert decryption.text == "My name is Chloë"
    assert len(decryption.items) == 1
    assert decryption.items[0].text == "Chloë"
    assert decryption.items[0].end == 16
    assert decryption.items[0].start == 11
    assert decryption.items[0].entity_type == "PERSON"
def test_given_empty_text_to_engine_then_we_fail():
    engine = AnonymizerEngine()
    analyzer_result = RecognizerResult("SSN", 0, 1, 0.5)
    with pytest.raises(
        InvalidParamException, match="Invalid input, text can not be empty"
    ):
        engine.anonymize("", [analyzer_result], {})
def test_given_default_anonymizer_then_we_use_it():
    engine = AnonymizerEngine()
    text = "please REPLACE ME."
    analyzer_result = RecognizerResult("SSN", 7, 17, 0.8)
    anonymizer_config = AnonymizerConfig("replace", {"new_value": "and thank you"})
    result = engine.anonymize(
        text, [analyzer_result], {"DEFAULT": anonymizer_config}
    ).text
    assert result == "please and thank you."
def test_given_analyzer_result_with_an_incorrect_text_positions_then_we_fail(
    original_text, start, end
):
    engine = AnonymizerEngine()
    analyzer_result = RecognizerResult("type", start, end, 0.5)
    err_msg = (
        f"Invalid analyzer result, start: {start} and end: "
        f"{end}, while text length is only 11."
    )
    with pytest.raises(InvalidParamException, match=err_msg):
        engine.anonymize(original_text, [analyzer_result], {})
def test_given_several_anonymizers_then_we_use_the_correct_one():
    analyzer_result = RecognizerResult.from_json(
        {"score": 0.5, "entity_type": "PHONE_NUMBER", "start": 8, "end": 18}
    )
    anonymizer_config = AnonymizerConfig("replace", {})
    anonymizer_config.anonymizer_class = MockAnonymizer
    text = (
        AnonymizerEngine()
        .anonymize(
            "Number: 0554555556", [analyzer_result], {"PHONE_NUMBER": anonymizer_config}
        )
        .text
    )
    assert text == "Number: I am your new text!"
Esempio n. 6
0
    def handle_analyzer_results_json(cls, data: Dict) -> AnalyzerResults:
        """
        Go over analyzer results, validate them and convert to List[AnalyzeResult].

        :param data: contains the anonymizers and analyzer_results_json
        """
        analyzer_results = AnalyzerResults()
        analyzer_results_json = data.get("analyzer_results")
        if analyzer_results_json is None:
            cls.logger.debug(
                "invalid input, json missing field: analyzer_results_json")
            raise InvalidParamException(
                "Invalid input, "
                "request must contain analyzer results")
        for analyzer_result in analyzer_results_json:
            analyzer_result = RecognizerResult.from_json(analyzer_result)
            analyzer_results.append(analyzer_result)
        return analyzer_results
Esempio n. 7
0
def test_given_anonymize_with_encrypt_then_text_returned_with_encrypted_content(
):
    unencrypted_text = "My name is "
    expected_encrypted_text = "Chloë"
    text = unencrypted_text + expected_encrypted_text
    start_index = 11
    end_index = 16
    key = "WmZq4t7w!z%C&F)J"
    analyzer_results = [
        RecognizerResult("PERSON", start_index, end_index, 0.8)
    ]
    anonymizers_config = {"PERSON": AnonymizerConfig("encrypt", {"key": key})}

    actual_anonymize_result = (AnonymizerEngine().anonymize(
        text, analyzer_results, anonymizers_config).text)

    assert actual_anonymize_result[:start_index] == unencrypted_text
    actual_encrypted_text = actual_anonymize_result[start_index:]
    assert actual_encrypted_text != expected_encrypted_text
    actual_decrypted_text = AESCipher.decrypt(key.encode(),
                                              actual_encrypted_text)
    assert actual_decrypted_text == expected_encrypted_text
def test_given_none_as_anonymziers_list_then_we_fall_to_default():
    engine = AnonymizerEngine()
    text = "please REPLACE ME."
    analyzer_result = RecognizerResult("SSN", 7, 17, 0.8)
    result = engine.anonymize(text, [analyzer_result]).text
    assert result == "please <SSN>."
def create_recognizer_result(entity_type: str, score: float, start: int, end: int):
    data = {"entity_type": entity_type, "score": score, "start": start, "end": end}
    return RecognizerResult.from_json(data)
Esempio n. 10
0
def test_given_json_for_creating_recognizer_result_without_text_then_creation_fails(
    request_json, result_text
):
    with pytest.raises(InvalidParamException) as e:
        RecognizerResult.from_json(request_json)
    assert result_text == e.value.err_msg