def test_remove_pattern_recognizer(self):
        pattern = Pattern("spaceship pattern", r'\W*(spaceship)\W*', 0.8)
        pattern_recognizer = PatternRecognizer("SPACESHIP",
                                               name="Spaceship recognizer",
                                               patterns=[pattern])
        # Make sure the analyzer doesn't get this entity
        recognizers_store_api_mock = RecognizerStoreApiMock()
        recognizer_registry = RecognizerRegistry(recognizers_store_api_mock)

        # Expects zero custom recognizers
        recognizers = recognizer_registry.get_custom_recognizers()
        assert len(recognizers) == 0

        # Add a new recognizer for the word "rocket" (case insensitive)
        recognizers_store_api_mock.add_custom_pattern_recognizer(
            pattern_recognizer)

        # Expects one custom recognizer
        recognizers = recognizer_registry.get_custom_recognizers()
        assert len(recognizers) == 1

        # Remove recognizer
        recognizers_store_api_mock.remove_recognizer("Spaceship recognizer")

        # Expects zero custom recognizers
        recognizers = recognizer_registry.get_custom_recognizers()
        assert len(recognizers) == 0
Exemple #2
0
 def __init__(self,
              registry=RecognizerRegistry(),
              nlp_engine=SpacyNlpEngine()):
     # load nlp module
     self.nlp_engine = nlp_engine
     # prepare registry
     self.registry = registry
     # load all recognizers
     registry.load_predefined_recognizers()
    def __init__(self,
                 registry=None,
                 nlp_engine=None,
                 app_tracer=None,
                 enable_trace_pii=False,
                 default_score_threshold=None):
        """
        AnalyzerEngine class: Orchestrating the detection of PII entities
        and all related logic
        :param registry: instance of type RecognizerRegistry
        :param nlp_engine: instance of type NlpEngine
        (for example SpacyNlpEngine)
        :param app_tracer: instance of type AppTracer,
        used to trace the logic used during each request
        :param enable_trace_pii: bool,
        defines whether PII values should be traced or not.
        :param default_score_threshold: Minimum confidence value
        for detected entities to be returned
        """
        if not nlp_engine:
            from analyzer.nlp_engine import SpacyNlpEngine
            nlp_engine = SpacyNlpEngine()
        if not registry:
            from analyzer import RecognizerRegistry
            registry = RecognizerRegistry()
        if not app_tracer:
            app_tracer = AppTracer()

        # load nlp module
        self.nlp_engine = nlp_engine
        # prepare registry
        self.registry = registry
        # load all recognizers
        if not registry.recognizers:
            registry.load_predefined_recognizers()

        self.app_tracer = app_tracer
        self.enable_trace_pii = enable_trace_pii

        if default_score_threshold is None:
            self.default_score_threshold = 0
        else:
            self.default_score_threshold = default_score_threshold
    def test_add_pattern_recognizer(self):
        pattern = Pattern("rocket pattern", r'\W*(rocket)\W*', 0.8)
        pattern_recognizer = PatternRecognizer("ROCKET",
                                               name="Rocket recognizer",
                                               patterns=[pattern])

        # Make sure the analyzer doesn't get this entity
        recognizers_store_api_mock = RecognizerStoreApiMock()
        recognizer_registry = RecognizerRegistry(recognizers_store_api_mock)
        recognizers = recognizer_registry.get_custom_recognizers()
        assert len(recognizers) == 0

        # Add a new recognizer for the word "rocket" (case insensitive)
        recognizers_store_api_mock.add_custom_pattern_recognizer(
            pattern_recognizer)

        recognizers = recognizer_registry.get_custom_recognizers()
        assert len(recognizers) == 1
        assert recognizers[0].patterns[0].name == "rocket pattern"
        assert recognizers[0].name == "Rocket recognizer"
Exemple #5
0
 def test_when_allFields_is_true_and_entities_not_empty_exception(self):
     analyze_engine = AnalyzerEngine(registry=RecognizerRegistry())
     request = AnalyzeRequest()
     request.text = "My name is David and I live in Seattle." \
                    "Domain: microsoft.com "
     request.analyzeTemplate.allFields = True
     new_field = request.analyzeTemplate.fields.add()
     new_field.name = 'CREDIT_CARD'
     new_field.minScore = '0.5'
     with pytest.raises(ValueError):
         analyze_engine.Apply(request, None)
Exemple #6
0
 def test_when_allFields_is_true_full_recognizers_list_return_all_fields(
         self):
     analyze_engine = AnalyzerEngine(RecognizerRegistry())
     request = AnalyzeRequest()
     request.analyzeTemplate.allFields = True
     request.text = "My name is David and I live in Seattle." \
         "Domain: microsoft.com "
     response = analyze_engine.Apply(request, None)
     returned_entities = [
         field.field.name for field in response.analyzeResults
     ]
     assert response.analyzeResults is not None
     assert "PERSON" in returned_entities
     assert "LOCATION" in returned_entities
     assert "DOMAIN_NAME" in returned_entities
Exemple #7
0
    def test_cache_logic(self):
        pattern = Pattern("rocket pattern", r'\W*(rocket)\W*', 0.8)
        pattern_recognizer = PatternRecognizer("ROCKET",
                                               name="Rocket recognizer",
                                               patterns=[pattern])

        # Negative flow
        recognizers_store_api_mock = RecognizerStoreApiMock()
        recognizer_registry = RecognizerRegistry(recognizers_store_api_mock)
        custom_recognizers = recognizer_registry.get_custom_recognizers()
        # Nothing should be returned
        assert len(custom_recognizers) == 0
        # Since no hash was returned, then no access to storage is expected
        assert recognizers_store_api_mock.times_accessed_storage == 0

        # Add a new recognizer
        recognizers_store_api_mock.add_custom_pattern_recognizer(
            pattern_recognizer,
            skip_hash_update=True)

        # Since the hash wasn't updated the recognizers are stale from the cache
        # without the newly added one
        custom_recognizers = recognizer_registry.get_custom_recognizers()
        assert len(custom_recognizers) == 0
        # And we also didn't accessed the underlying storage
        assert recognizers_store_api_mock.times_accessed_storage == 0

        # Positive flow
        # Now do the same only this time update the hash so it should work properly
        recognizers_store_api_mock = RecognizerStoreApiMock()
        recognizer_registry = RecognizerRegistry(recognizers_store_api_mock)

        recognizer_registry.get_custom_recognizers()
        assert recognizers_store_api_mock.times_accessed_storage == 0
        recognizers_store_api_mock.add_custom_pattern_recognizer(
            pattern_recognizer,
            skip_hash_update=False)
        custom_recognizers = recognizer_registry.get_custom_recognizers()
        assert len(custom_recognizers) == 1
        # Accessed again
        assert recognizers_store_api_mock.times_accessed_storage == 1
 def get_mock_recognizer_registry(self):
     pattern_recognizer1 = self.get_mock_pattern_recognizer(
         "en", "PERSON", "1")
     pattern_recognizer2 = self.get_mock_pattern_recognizer(
         "de", "PERSON", "2")
     pattern_recognizer3 = self.get_mock_pattern_recognizer(
         "de", "ADDRESS", "3")
     pattern_recognizer4 = self.get_mock_pattern_recognizer(
         "he", "ADDRESS", "4")
     pattern_recognizer5 = self.get_mock_custom_recognizer(
         "he", ["PERSON", "ADDRESS"], "5")
     recognizers_store_api_mock = RecognizerStoreApiMock()
     return RecognizerRegistry(recognizers_store_api_mock, [
         pattern_recognizer1, pattern_recognizer2, pattern_recognizer3,
         pattern_recognizer4, pattern_recognizer5
     ])