def test_given_specific_anonymizer_then_we_use_it(): engine = AnonymizerEngine() text = "please REPLACE ME." analyzer_result = AnalyzerResult("SSN", 7, 17, 0.8) anonymizer_config = AnonymizerConfig("replace", {"new_value": "and thank you"}) ssn_anonymizer_config = AnonymizerConfig("redact", {}) result = engine.anonymize(text, [analyzer_result], { "DEFAULT": anonymizer_config, "SSN": ssn_anonymizer_config }) assert result == "please ."
def test_given_several_anonymizers_then_we_use_the_correct_one(): analyzer_result = AnalyzerResult.from_json({ "score": 0.5, "entity_type": "PHONE_NUMBER", "start": 8, "end": 18 }) anonymizer_config = AnonymizerConfig("replace", {}) anonymizer_config.anonymizer_class = MockAnonymizer text = AnonymizerEngine().anonymize("Number: 0554555556", [analyzer_result], {"PHONE_NUMBER": anonymizer_config}) assert text == "Number: I am your new text!"
def obfuscate(text): analyzer_results = analyze(text) anonymizer = AnonymizerEngine() anonymized_results = anonymizer.anonymize( text=text, analyzer_results=analyzer_results, anonymizers_config={"DEFAULT": AnonymizerConfig("replace", {"new_value": "<ANONYMIZED>"}), "PHONE_NUMBER": AnonymizerConfig("mask", {"type": "mask", "masking_char": "*", "chars_to_mask": 12, "from_end": True}), } ) output = anonymized_results return output
def run_anonymizer(self, text): results = self.analyzer_engine.analyze(text=text, entities=[], language='en', score_threshold=0.5) if results: config = {"PERSON": AnonymizerConfig("replace", {"replace_text": "[GDPRREDACT]"})} return self.anonymizer_engine.anonymize(text, results, config)
def test_given_default_anonymizer_then_we_use_it(): engine = AnonymizerEngine() text = "please REPLACE ME." analyzer_result = RecognizerResult("SSN", 7, 17, 0.8) anonymizer_config = AnonymizerConfig("replace", {"new_value": "and thank you"}) result = engine.anonymize( text, [analyzer_result], {"DEFAULT": anonymizer_config} ).text assert result == "please and thank you."
def anonymizeName(text_to_anonymize): analyzer_results = analyzer.analyze(text=text_to_anonymize, entities=["PERSON"], language='en') anonymized_results = anonymizer.anonymize( text=text_to_anonymize, analyzer_results=analyzer_results, anonymizers_config={"PERSON": AnonymizerConfig("replace", {"new_value": generateToken(20)})} ) return anonymized_results
def anonymizeEmail(text_to_anonymize): analyzer_results = analyzer.analyze(text=text_to_anonymize, entities=["EMAIL_ADDRESS"], language='en') anonymized_results = anonymizer.anonymize( text=text_to_anonymize, analyzer_results=analyzer_results, anonymizers_config={"EMAIL_ADDRESS": AnonymizerConfig("replace", {"new_value": generateToken(20)})} ) return anonymized_results
def anonymize_text(text: str) -> str: analyzer = AnalyzerEngine() anonymizer = AnonymizerEngine() analyzer_results = analyzer.analyze(text=text, language="en") anonymized_results = anonymizer.anonymize( text=text, analyzer_results=analyzer_results, anonymizers_config={ "DEFAULT": AnonymizerConfig("replace", {"new_value": "<ANONYMIZED>"}) }, ) return anonymized_results
def __get_anonymizer_config_by_entity_type( entity_type: str, anonymizers_config: Dict[str, AnonymizerConfig]) -> AnonymizerConfig: # We try to get the anonymizer from the list by entity_type. # If it does not exist, we try to get the default from the list. # If there is no default we fallback into the current DEFAULT which is replace. anonymizer = anonymizers_config.get(entity_type) if not anonymizer: anonymizer = anonymizers_config.get("DEFAULT") if not anonymizer: anonymizer = AnonymizerConfig(DEFAULT, {}) return anonymizer
def anonymize_text(text: str) -> str: try: analyzer = AnalyzerEngine() anonymizer = AnonymizerEngine() analyzer_results = analyzer.analyze(text=text, language="en") anonymized_results = anonymizer.anonymize( text=text, analyzer_results=analyzer_results, operators={ "DEFAULT": AnonymizerConfig("replace", {"new_value": "<ANONYMIZED>"}) }, ) return anonymized_results.text except Exception as e: print(f"An exception occurred. {e}")
def __extract_anonymizer_and_anonymize( self, entity_type: str, anonymizer_config: AnonymizerConfig, text_to_anonymize: str, ) -> str: self.logger.debug(f"getting anonymizer for {entity_type}") anonymizer = anonymizer_config.anonymizer_class() self.logger.debug( f"validating anonymizer {anonymizer} for {entity_type}") anonymizer.validate(params=anonymizer_config.params) params = anonymizer_config.params params["entity_type"] = entity_type self.logger.debug(f"anonymizing {entity_type} with {anonymizer}") anonymized_text = anonymizer.anonymize(params=params, text=text_to_anonymize) return anonymized_text
def test_given_anonymize_with_encrypt_then_text_returned_with_encrypted_content( ): unencrypted_text = "My name is " expected_encrypted_text = "Chloë" text = unencrypted_text + expected_encrypted_text start_index = 11 end_index = 16 key = "WmZq4t7w!z%C&F)J" analyzer_results = [ RecognizerResult("PERSON", start_index, end_index, 0.8) ] anonymizers_config = {"PERSON": AnonymizerConfig("encrypt", {"key": key})} actual_anonymize_result = (AnonymizerEngine().anonymize( text, analyzer_results, anonymizers_config).text) assert actual_anonymize_result[:start_index] == unencrypted_text actual_encrypted_text = actual_anonymize_result[start_index:] assert actual_encrypted_text != expected_encrypted_text actual_decrypted_text = AESCipher.decrypt(key.encode(), actual_encrypted_text) assert actual_decrypted_text == expected_encrypted_text
def test_given_json_with_bad_anonymizer_name_then_we_fail(class_name): json = {"type": class_name, "param_1": "my_parameter"} with pytest.raises( InvalidParamException, match=f"Invalid anonymizer class '{class_name}'." ): AnonymizerConfig.from_json(json)
def test_given_json_then_anonymizer_config_is_created_properly(class_name): json = {"type": class_name, "param_1": "my_parameter"} anonymizer_config = AnonymizerConfig.from_json(json) assert anonymizer_config.anonymizer_class assert anonymizer_config.anonymizer_class().anonymizer_name() == class_name assert anonymizer_config.params == {"param_1": "my_parameter"}