def loaded_analyzer_engine(loaded_registry, app_tracer): mock_nlp_artifacts = NlpArtifacts([], [], [], [], None, "en") analyzer_engine = AnalyzerEngine( loaded_registry, NlpEngineMock(stopwords=[], punct_words=[], nlp_artifacts=mock_nlp_artifacts), app_tracer=app_tracer, log_decision_process=True, ) return analyzer_engine
def test_when_allFields_is_true_and_entities_not_empty_exception(): analyze_engine = AnalyzerEngine(registry=RecognizerRegistry(), nlp_engine=NlpEngineMock()) request = AnalyzeRequest() request.text = "My name is David and I live in Seattle." "Domain: microsoft.com " request.analyzeTemplate.allFields = True new_field = request.analyzeTemplate.fields.add() new_field.name = "CREDIT_CARD" new_field.minScore = "0.5" with pytest.raises(ValueError): analyze_engine.Apply(request, None)
def test_removed_pattern_recognizer_doesnt_work(unit_test_guid): pattern = Pattern("spaceship pattern", r"\W*(spaceship)\W*", 0.8) pattern_recognizer = PatternRecognizer("SPACESHIP", name="Spaceship recognizer", patterns=[pattern]) # Make sure the analyzer doesn't get this entity recognizers_store_api_mock = RecognizerStoreApiMock() analyze_engine = AnalyzerEngine( registry=MockRecognizerRegistry(recognizers_store_api_mock), nlp_engine=NlpEngineMock(), ) text = "spaceship is my favorite transportation" entities = ["CREDIT_CARD", "SPACESHIP"] results = analyze_engine.analyze( correlation_id=unit_test_guid, text=text, entities=entities, language="en", all_fields=False, ) assert len(results) == 0 # Add a new recognizer for the word "rocket" (case insensitive) recognizers_store_api_mock.add_custom_pattern_recognizer( pattern_recognizer) # Check that the entity is recognized: results = analyze_engine.analyze( correlation_id=unit_test_guid, text=text, entities=entities, language="en", all_fields=False, ) assert len(results) == 1 assert_result(results[0], "SPACESHIP", 0, 10, 0.8) # Remove recognizer recognizers_store_api_mock.remove_recognizer("Spaceship recognizer") # Test again to see we didn't get any results results = analyze_engine.analyze( correlation_id=unit_test_guid, text=text, entities=entities, language="en", all_fields=False, ) assert len(results) == 0
def test_when_entities_is_none_then_return_all_fields(loaded_registry): analyze_engine = AnalyzerEngine(registry=loaded_registry, nlp_engine=NlpEngineMock()) threshold = 0 text = (" Credit card: 4095-2609-9393-4932, my phone is 425 8829090 " "Domain: microsoft.com") response = analyze_engine.analyze(text=text, score_threshold=threshold, language="en") returned_entities = [response.entity_type for response in response] assert response is not None assert "CREDIT_CARD" in returned_entities assert "PHONE_NUMBER" in returned_entities assert "DOMAIN_NAME" in returned_entities
def test_when_allFields_is_true_return_all_fields(): analyze_engine = AnalyzerEngine(registry=MockRecognizerRegistry(), nlp_engine=NlpEngineMock()) request = AnalyzeRequest() request.analyzeTemplate.allFields = True request.analyzeTemplate.resultsScoreThreshold = 0 request.text = ( " Credit card: 4095-2609-9393-4932, my phone is 425 8829090 " "Domain: microsoft.com") response = analyze_engine.Apply(request, None) returned_entities = [field.field.name for field in response.analyzeResults] assert response.analyzeResults is not None assert "CREDIT_CARD" in returned_entities assert "PHONE_NUMBER" in returned_entities assert "DOMAIN_NAME" in returned_entities
def test_when_get_recognizers_then_returns_supported_language(): pattern = Pattern("rocket pattern", r"\W*(rocket)\W*", 0.8) pattern_recognizer = PatternRecognizer( "ROCKET", name="Rocket recognizer RU", patterns=[pattern], supported_language="ru", ) mock_recognizer_registry = RecognizerRegistryMock() mock_recognizer_registry.add_recognizer(pattern_recognizer) analyze_engine = AnalyzerEngine( registry=mock_recognizer_registry, nlp_engine=NlpEngineMock(), ) response = analyze_engine.get_recognizers(language="ru") # there is only 1 mocked russian recognizer assert len(response) == 1
def test_when_default_threshold_is_zero_then_all_results_pass( loaded_registry, unit_test_guid): text = " Credit card: 4095-2609-9393-4932, my phone is 425 8829090" language = "en" entities = ["CREDIT_CARD", "PHONE_NUMBER"] # This analyzer engine is different from the global one, as this one # also loads SpaCy so it can detect the phone number entity analyzer_engine = AnalyzerEngine(registry=loaded_registry, nlp_engine=NlpEngineMock()) results = analyzer_engine.analyze( correlation_id=unit_test_guid, text=text, entities=entities, language=language, ) assert len(results) == 2
def test_get_recognizers_returns_supported_language(): pattern = Pattern("rocket pattern", r"\W*(rocket)\W*", 0.8) pattern_recognizer = PatternRecognizer( "ROCKET", name="Rocket recognizer RU", patterns=[pattern], supported_language="ru", ) recognizers_store_api_mock = RecognizerStoreApiMock() recognizers_store_api_mock.add_custom_pattern_recognizer( pattern_recognizer) analyze_engine = AnalyzerEngine( registry=MockRecognizerRegistry(recognizers_store_api_mock), nlp_engine=NlpEngineMock(), ) request = RecognizersAllRequest(language="ru") response = analyze_engine.GetAllRecognizers(request, None) # there is only 1 mocked russian recognizer assert len(response) == 1
def test_get_recognizers_returns_added_custom(): pattern = Pattern("rocket pattern", r"\W*(rocket)\W*", 0.8) pattern_recognizer = PatternRecognizer("ROCKET", name="Rocket recognizer", patterns=[pattern]) recognizers_store_api_mock = RecognizerStoreApiMock() analyze_engine = AnalyzerEngine( registry=MockRecognizerRegistry(recognizers_store_api_mock), nlp_engine=NlpEngineMock(), ) request = RecognizersAllRequest(language="en") response = analyze_engine.GetAllRecognizers(request, None) # there are 15 predefined recognizers assert len(response) == 15 recognizers_store_api_mock.add_custom_pattern_recognizer( pattern_recognizer) response = analyze_engine.GetAllRecognizers(request, None) # there are 15 predefined recognizers and one custom assert len(response) == 16
def test_added_pattern_recognizer_works(unit_test_guid): pattern = Pattern("rocket pattern", r"\W*(rocket)\W*", 0.8) pattern_recognizer = PatternRecognizer("ROCKET", name="Rocket recognizer", patterns=[pattern]) # Make sure the analyzer doesn't get this entity recognizers_store_api_mock = RecognizerStoreApiMock() analyze_engine = AnalyzerEngine( registry=MockRecognizerRegistry(recognizers_store_api_mock), nlp_engine=NlpEngineMock(), ) text = "rocket is my favorite transportation" entities = ["CREDIT_CARD", "ROCKET"] results = analyze_engine.analyze( correlation_id=unit_test_guid, text=text, entities=entities, language="en", all_fields=False, ) assert len(results) == 0 # Add a new recognizer for the word "rocket" (case insensitive) recognizers_store_api_mock.add_custom_pattern_recognizer( pattern_recognizer) # Check that the entity is recognized: results = analyze_engine.analyze( correlation_id=unit_test_guid, text=text, entities=entities, language="en", all_fields=False, ) assert len(results) == 1 assert_result(results[0], "ROCKET", 0, 7, 0.8)
def test_when_analyze_added_pattern_recognizer_then_succeed(unit_test_guid): pattern = Pattern("rocket pattern", r"\W*(rocket)\W*", 0.8) pattern_recognizer = PatternRecognizer("ROCKET", name="Rocket recognizer", patterns=[pattern]) mock_recognizer_registry = RecognizerRegistryMock() # Make sure the analyzer doesn't get this entity analyze_engine = AnalyzerEngine( registry=mock_recognizer_registry, nlp_engine=NlpEngineMock(), ) text = "rocket is my favorite transportation" entities = ["CREDIT_CARD", "ROCKET"] results = analyze_engine.analyze( correlation_id=unit_test_guid, text=text, entities=entities, language="en", ) assert len(results) == 0 # Add a new recognizer for the word "rocket" (case insensitive) mock_recognizer_registry.add_recognizer(pattern_recognizer) # Check that the entity is recognized: results = analyze_engine.analyze( correlation_id=unit_test_guid, text=text, entities=entities, language="en", ) assert len(results) == 1 assert_result(results[0], "ROCKET", 0, 7, 0.8)