def _update_recognizers_based_on_entities_to_keep(
            self, analyzer_engine: AnalyzerEngine):
        """Check if there are any entities not supported by this presidio instance.
        Add ORGANIZATION as it is removed by default

        """
        supported_entities = analyzer_engine.get_supported_entities(
            language=self.language)
        print("Entities supported by this Presidio Analyzer instance:")
        print(", ".join(supported_entities))

        if not self.entities:
            self.entities = supported_entities

        for entity in self.entities:
            if entity not in supported_entities:
                print(
                    f"Entity {entity} is not supported by this instance of Presidio Analyzer Engine"
                )

        if "ORGANIZATION" in self.entities and "ORGANIZATION" not in supported_entities:
            recognizers = analyzer_engine.get_recognizers()
            spacy_recognizer = [
                rec for rec in recognizers if rec.name == "SpacyRecognizer"
                or rec.name == "StanzaRecognizer"
            ]
            if len(spacy_recognizer):
                spacy_recognizer = spacy_recognizer[0]
                spacy_recognizer.supported_entities.append("ORGANIZATION")
                self.entities.append("ORGANIZATION")
                print(
                    "Added ORGANIZATION as a supported entity from spaCy/Stanza"
                )
Ejemplo n.º 2
0
def test_when_get_recognizers_then_returns_supported_language():
    pattern = Pattern("rocket pattern", r"\W*(rocket)\W*", 0.8)
    pattern_recognizer = PatternRecognizer(
        "ROCKET",
        name="Rocket recognizer RU",
        patterns=[pattern],
        supported_language="ru",
    )
    mock_recognizer_registry = RecognizerRegistryMock()
    mock_recognizer_registry.add_recognizer(pattern_recognizer)
    analyze_engine = AnalyzerEngine(
        registry=mock_recognizer_registry,
        nlp_engine=NlpEngineMock(),
    )
    response = analyze_engine.get_recognizers(language="ru")
    # there is only 1 mocked russian recognizer
    assert len(response) == 1