Python NlpArtifacts Examples

Programming Language: Python

Namespace/Package Name: presidio_analyzer.nlp_engine

Class/Type: NlpArtifacts

Examples at hotexamples.com: 8

Python NlpArtifacts - 8 examples found. These are the top rated real world Python examples of presidio_analyzer.nlp_engine.NlpArtifacts extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

NlpArtifacts(8)

Frequently Used Methods

NlpArtifacts (8)

Example #1

Show file

File: nlp_engine_mock.py Project: yiliaofan/presidio

 def __init__(self, stopwords=[], punct_words=[], nlp_artifacts=None):
     self.stopwords = stopwords
     self.punct_words = punct_words
     if nlp_artifacts is None:
         self.nlp_artifacts = NlpArtifacts([], [], [], [], None, "en")
     else:
         self.nlp_artifacts = nlp_artifacts

Example #2

Show file

def loaded_analyzer_engine(loaded_registry, app_tracer):
    mock_nlp_artifacts = NlpArtifacts([], [], [], [], None, "en")
    analyzer_engine = AnalyzerEngine(
        loaded_registry,
        NlpEngineMock(stopwords=[], punct_words=[], nlp_artifacts=mock_nlp_artifacts),
        app_tracer=app_tracer,
        log_decision_process=True,
    )
    return analyzer_engine

Example #3

Show file

File: spacy_nlp_engine.py Project: yiliaofan/presidio

 def doc_to_nlp_artifact(self, doc, language):
     tokens = [token.text for token in doc]
     lemmas = [token.lemma_ for token in doc]
     tokens_indices = [token.idx for token in doc]
     entities = doc.ents
     return NlpArtifacts(entities=entities,
                         tokens=tokens,
                         tokens_indices=tokens_indices,
                         lemmas=lemmas,
                         nlp_engine=self,
                         language=language)

Example #4

Show file

 def __init__(self, *args, **kwargs):
     super(TestAnalyzerEngine, self).__init__(*args, **kwargs)
     self.loaded_registry = MockRecognizerRegistry(RecognizerStoreApiMock())
     mock_nlp_artifacts = NlpArtifacts([], [], [], [], None, "en")
     self.app_tracer = AppTracerMock(enable_interpretability=True)
     self.loaded_analyzer_engine = AnalyzerEngine(self.loaded_registry,
                                                  MockNlpEngine(stopwords=[],
                                                                punct_words=[],
                                                                nlp_artifacts=mock_nlp_artifacts),
                                                  app_tracer=self.app_tracer,
                                                  enable_trace_pii=True)
     self.unit_test_guid = "00000000-0000-0000-0000-000000000000"

Example #5

Show file

 def _doc_to_nlp_artifact(self, doc: Doc, language: str) -> NlpArtifacts:
     lemmas = [token.lemma_ for token in doc]
     tokens_indices = [token.idx for token in doc]
     entities = doc.ents
     return NlpArtifacts(
         entities=entities,
         tokens=doc,
         tokens_indices=tokens_indices,
         lemmas=lemmas,
         nlp_engine=self,
         language=language,
     )

Example #6

Show file

File: test_context_support.py Project: yiliaofan/presidio

    def test_text_with_context_improves_score(self):
        nlp_engine = TESTS_NLP_ENGINE
        mock_nlp_artifacts = NlpArtifacts([], [], [], [], None, "en")

        for item in self.context_sentences:
            text = item[0]
            recognizer = item[1]
            entities = item[2]
            nlp_artifacts = nlp_engine.process_text(text, "en")
            results_without_context = recognizer.analyze(text, entities, mock_nlp_artifacts)
            results_with_context = recognizer.analyze(text, entities, nlp_artifacts)

            assert(len(results_without_context) == len(results_with_context))
            for i in range(len(results_with_context)):
                assert(results_without_context[i].score < results_with_context[i].score)

Example #7

Show file

File: test_context_support.py Project: yiliaofan/presidio

    def test_context_custom_recognizer(self):
        nlp_engine = TESTS_NLP_ENGINE
        mock_nlp_artifacts = NlpArtifacts([], [], [], [], None, "en")

        # This test checks that a custom recognizer is also enhanced by context.
        # However this test also verifies a specific case in which the pattern also
        # includes a preceeding space (' rocket'). This in turn cause for a misalignment
        # between the tokens and the regex match (the token will be just 'rocket').
        # This misalignment is handled in order to find the correct context window.
        rocket_recognizer = PatternRecognizer(supported_entity="ROCKET",
                                              name="rocketrecognizer",
                                              context=["cool"],
                                              patterns=[Pattern("rocketpattern",
                                                                "\\s+(rocket)",
                                                                0.3)])
        text = "hi, this is a cool ROCKET"
        recognizer = rocket_recognizer
        entities = ["ROCKET"]
        nlp_artifacts = nlp_engine.process_text(text, "en")
        results_without_context = recognizer.analyze(text, entities, mock_nlp_artifacts)
        results_with_context = recognizer.analyze(text, entities, nlp_artifacts)
        assert(len(results_without_context) == len(results_with_context))
        for i in range(len(results_with_context)):
            assert(results_without_context[i].score < results_with_context[i].score)

Example #8

Show file

def mock_nlp_artifacts():
    return NlpArtifacts([], [], [], [], None, "en")