def test_given_specific_anonymizer_then_we_use_it():
    engine = AnonymizerEngine()
    text = "please REPLACE ME."
    analyzer_result = AnalyzerResult("SSN", 7, 17, 0.8)
    anonymizer_config = AnonymizerConfig("replace",
                                         {"new_value": "and thank you"})
    ssn_anonymizer_config = AnonymizerConfig("redact", {})
    result = engine.anonymize(text, [analyzer_result], {
        "DEFAULT": anonymizer_config,
        "SSN": ssn_anonymizer_config
    })
    assert result == "please ."
def test_given_several_anonymizers_then_we_use_the_correct_one():
    analyzer_result = AnalyzerResult.from_json({
        "score": 0.5,
        "entity_type": "PHONE_NUMBER",
        "start": 8,
        "end": 18
    })
    anonymizer_config = AnonymizerConfig("replace", {})
    anonymizer_config.anonymizer_class = MockAnonymizer
    text = AnonymizerEngine().anonymize("Number: 0554555556",
                                        [analyzer_result],
                                        {"PHONE_NUMBER": anonymizer_config})
    assert text == "Number: I am your new text!"
def obfuscate(text):
    analyzer_results = analyze(text)
    anonymizer = AnonymizerEngine()
    anonymized_results = anonymizer.anonymize(
        text=text,
        analyzer_results=analyzer_results,
        anonymizers_config={"DEFAULT": AnonymizerConfig("replace", {"new_value": "<ANONYMIZED>"}),
                            "PHONE_NUMBER": AnonymizerConfig("mask",
                                                             {"type": "mask", "masking_char": "*", "chars_to_mask": 12,
                                                              "from_end": True}),
                            }
    )
    output = anonymized_results
    return output
 def run_anonymizer(self, text):
     results = self.analyzer_engine.analyze(text=text,
                                            entities=[],
                                            language='en',
                                            score_threshold=0.5)
     if results:
         config = {"PERSON": AnonymizerConfig("replace", {"replace_text": "[GDPRREDACT]"})}
         return self.anonymizer_engine.anonymize(text, results, config)
def test_given_default_anonymizer_then_we_use_it():
    engine = AnonymizerEngine()
    text = "please REPLACE ME."
    analyzer_result = RecognizerResult("SSN", 7, 17, 0.8)
    anonymizer_config = AnonymizerConfig("replace", {"new_value": "and thank you"})
    result = engine.anonymize(
        text, [analyzer_result], {"DEFAULT": anonymizer_config}
    ).text
    assert result == "please and thank you."
Exemple #6
0
def anonymizeName(text_to_anonymize): 
    analyzer_results = analyzer.analyze(text=text_to_anonymize, entities=["PERSON"], language='en')

    anonymized_results = anonymizer.anonymize(
        text=text_to_anonymize,
        analyzer_results=analyzer_results,    
        anonymizers_config={"PERSON": AnonymizerConfig("replace", {"new_value": generateToken(20)})}
    )

    return anonymized_results
Exemple #7
0
def anonymizeEmail(text_to_anonymize): 
    analyzer_results = analyzer.analyze(text=text_to_anonymize, entities=["EMAIL_ADDRESS"], language='en')

    anonymized_results = anonymizer.anonymize(
        text=text_to_anonymize,
        analyzer_results=analyzer_results,    
        anonymizers_config={"EMAIL_ADDRESS": AnonymizerConfig("replace", {"new_value": generateToken(20)})}
    )

    return anonymized_results
def anonymize_text(text: str) -> str:
    analyzer = AnalyzerEngine()
    anonymizer = AnonymizerEngine()
    analyzer_results = analyzer.analyze(text=text, language="en")
    anonymized_results = anonymizer.anonymize(
        text=text,
        analyzer_results=analyzer_results,
        anonymizers_config={
            "DEFAULT": AnonymizerConfig("replace",
                                        {"new_value": "<ANONYMIZED>"})
        },
    )
    return anonymized_results
 def __get_anonymizer_config_by_entity_type(
         entity_type: str,
         anonymizers_config: Dict[str,
                                  AnonymizerConfig]) -> AnonymizerConfig:
     # We try to get the anonymizer from the list by entity_type.
     # If it does not exist, we try to get the default from the list.
     # If there is no default we fallback into the current DEFAULT which is replace.
     anonymizer = anonymizers_config.get(entity_type)
     if not anonymizer:
         anonymizer = anonymizers_config.get("DEFAULT")
         if not anonymizer:
             anonymizer = AnonymizerConfig(DEFAULT, {})
     return anonymizer
def anonymize_text(text: str) -> str:
    try:
        analyzer = AnalyzerEngine()
        anonymizer = AnonymizerEngine()
        analyzer_results = analyzer.analyze(text=text, language="en")
        anonymized_results = anonymizer.anonymize(
            text=text,
            analyzer_results=analyzer_results,
            operators={
                "DEFAULT":
                AnonymizerConfig("replace", {"new_value": "<ANONYMIZED>"})
            },
        )
        return anonymized_results.text
    except Exception as e:
        print(f"An exception occurred. {e}")
 def __extract_anonymizer_and_anonymize(
     self,
     entity_type: str,
     anonymizer_config: AnonymizerConfig,
     text_to_anonymize: str,
 ) -> str:
     self.logger.debug(f"getting anonymizer for {entity_type}")
     anonymizer = anonymizer_config.anonymizer_class()
     self.logger.debug(
         f"validating anonymizer {anonymizer} for {entity_type}")
     anonymizer.validate(params=anonymizer_config.params)
     params = anonymizer_config.params
     params["entity_type"] = entity_type
     self.logger.debug(f"anonymizing {entity_type} with {anonymizer}")
     anonymized_text = anonymizer.anonymize(params=params,
                                            text=text_to_anonymize)
     return anonymized_text
Exemple #12
0
def test_given_anonymize_with_encrypt_then_text_returned_with_encrypted_content(
):
    unencrypted_text = "My name is "
    expected_encrypted_text = "Chloë"
    text = unencrypted_text + expected_encrypted_text
    start_index = 11
    end_index = 16
    key = "WmZq4t7w!z%C&F)J"
    analyzer_results = [
        RecognizerResult("PERSON", start_index, end_index, 0.8)
    ]
    anonymizers_config = {"PERSON": AnonymizerConfig("encrypt", {"key": key})}

    actual_anonymize_result = (AnonymizerEngine().anonymize(
        text, analyzer_results, anonymizers_config).text)

    assert actual_anonymize_result[:start_index] == unencrypted_text
    actual_encrypted_text = actual_anonymize_result[start_index:]
    assert actual_encrypted_text != expected_encrypted_text
    actual_decrypted_text = AESCipher.decrypt(key.encode(),
                                              actual_encrypted_text)
    assert actual_decrypted_text == expected_encrypted_text
def test_given_json_with_bad_anonymizer_name_then_we_fail(class_name):
    json = {"type": class_name, "param_1": "my_parameter"}
    with pytest.raises(
        InvalidParamException, match=f"Invalid anonymizer class '{class_name}'."
    ):
        AnonymizerConfig.from_json(json)
def test_given_json_then_anonymizer_config_is_created_properly(class_name):
    json = {"type": class_name, "param_1": "my_parameter"}
    anonymizer_config = AnonymizerConfig.from_json(json)
    assert anonymizer_config.anonymizer_class
    assert anonymizer_config.anonymizer_class().anonymizer_name() == class_name
    assert anonymizer_config.params == {"param_1": "my_parameter"}