def _update_recognizers_based_on_entities_to_keep( self, analyzer_engine: AnalyzerEngine): """Check if there are any entities not supported by this presidio instance. Add ORGANIZATION as it is removed by default """ supported_entities = analyzer_engine.get_supported_entities( language=self.language) print("Entities supported by this Presidio Analyzer instance:") print(", ".join(supported_entities)) if not self.entities: self.entities = supported_entities for entity in self.entities: if entity not in supported_entities: print( f"Entity {entity} is not supported by this instance of Presidio Analyzer Engine" ) if "ORGANIZATION" in self.entities and "ORGANIZATION" not in supported_entities: recognizers = analyzer_engine.get_recognizers() spacy_recognizer = [ rec for rec in recognizers if rec.name == "SpacyRecognizer" or rec.name == "StanzaRecognizer" ] if len(spacy_recognizer): spacy_recognizer = spacy_recognizer[0] spacy_recognizer.supported_entities.append("ORGANIZATION") self.entities.append("ORGANIZATION") print( "Added ORGANIZATION as a supported entity from spaCy/Stanza" )
def test_when_get_supported_fields_then_return_all_languages( mock_registry, unit_test_guid, nlp_engine): analyzer = AnalyzerEngine(registry=mock_registry, nlp_engine=nlp_engine) entities = analyzer.get_supported_entities() assert len(entities) == 3 assert "CREDIT_CARD" in entities assert "DOMAIN_NAME" in entities assert "PHONE_NUMBER" in entities
def test_when_get_supported_fields_specific_language_then_return_single_result( loaded_registry, unit_test_guid, nlp_engine): pattern = Pattern("rocket pattern", r"\W*(rocket)\W*", 0.8) pattern_recognizer = PatternRecognizer( "ROCKET", name="Rocket recognizer RU", patterns=[pattern], supported_language="ru", ) analyzer = AnalyzerEngine(registry=loaded_registry, nlp_engine=nlp_engine) analyzer.registry.add_recognizer(pattern_recognizer) entities = analyzer.get_supported_entities(language="ru") assert len(entities) == 1 assert "ROCKET" in entities