예제 #1
0
def loaded_analyzer_engine(loaded_registry, app_tracer):
    mock_nlp_artifacts = NlpArtifacts([], [], [], [], None, "en")
    analyzer_engine = AnalyzerEngine(
        loaded_registry,
        NlpEngineMock(stopwords=[], punct_words=[], nlp_artifacts=mock_nlp_artifacts),
        app_tracer=app_tracer,
        log_decision_process=True,
    )
    return analyzer_engine
def test_when_allFields_is_true_and_entities_not_empty_exception():
    analyze_engine = AnalyzerEngine(registry=RecognizerRegistry(),
                                    nlp_engine=NlpEngineMock())
    request = AnalyzeRequest()
    request.text = "My name is David and I live in Seattle." "Domain: microsoft.com "
    request.analyzeTemplate.allFields = True
    new_field = request.analyzeTemplate.fields.add()
    new_field.name = "CREDIT_CARD"
    new_field.minScore = "0.5"
    with pytest.raises(ValueError):
        analyze_engine.Apply(request, None)
def test_removed_pattern_recognizer_doesnt_work(unit_test_guid):
    pattern = Pattern("spaceship pattern", r"\W*(spaceship)\W*", 0.8)
    pattern_recognizer = PatternRecognizer("SPACESHIP",
                                           name="Spaceship recognizer",
                                           patterns=[pattern])

    # Make sure the analyzer doesn't get this entity
    recognizers_store_api_mock = RecognizerStoreApiMock()
    analyze_engine = AnalyzerEngine(
        registry=MockRecognizerRegistry(recognizers_store_api_mock),
        nlp_engine=NlpEngineMock(),
    )
    text = "spaceship is my favorite transportation"
    entities = ["CREDIT_CARD", "SPACESHIP"]

    results = analyze_engine.analyze(
        correlation_id=unit_test_guid,
        text=text,
        entities=entities,
        language="en",
        all_fields=False,
    )

    assert len(results) == 0

    # Add a new recognizer for the word "rocket" (case insensitive)
    recognizers_store_api_mock.add_custom_pattern_recognizer(
        pattern_recognizer)
    # Check that the entity is recognized:
    results = analyze_engine.analyze(
        correlation_id=unit_test_guid,
        text=text,
        entities=entities,
        language="en",
        all_fields=False,
    )
    assert len(results) == 1
    assert_result(results[0], "SPACESHIP", 0, 10, 0.8)

    # Remove recognizer
    recognizers_store_api_mock.remove_recognizer("Spaceship recognizer")
    # Test again to see we didn't get any results
    results = analyze_engine.analyze(
        correlation_id=unit_test_guid,
        text=text,
        entities=entities,
        language="en",
        all_fields=False,
    )

    assert len(results) == 0
예제 #4
0
def test_when_entities_is_none_then_return_all_fields(loaded_registry):
    analyze_engine = AnalyzerEngine(registry=loaded_registry,
                                    nlp_engine=NlpEngineMock())
    threshold = 0
    text = (" Credit card: 4095-2609-9393-4932,  my phone is 425 8829090 "
            "Domain: microsoft.com")
    response = analyze_engine.analyze(text=text,
                                      score_threshold=threshold,
                                      language="en")
    returned_entities = [response.entity_type for response in response]

    assert response is not None
    assert "CREDIT_CARD" in returned_entities
    assert "PHONE_NUMBER" in returned_entities
    assert "DOMAIN_NAME" in returned_entities
def test_when_allFields_is_true_return_all_fields():
    analyze_engine = AnalyzerEngine(registry=MockRecognizerRegistry(),
                                    nlp_engine=NlpEngineMock())
    request = AnalyzeRequest()
    request.analyzeTemplate.allFields = True
    request.analyzeTemplate.resultsScoreThreshold = 0
    request.text = (
        " Credit card: 4095-2609-9393-4932,  my phone is 425 8829090 "
        "Domain: microsoft.com")
    response = analyze_engine.Apply(request, None)
    returned_entities = [field.field.name for field in response.analyzeResults]

    assert response.analyzeResults is not None
    assert "CREDIT_CARD" in returned_entities
    assert "PHONE_NUMBER" in returned_entities
    assert "DOMAIN_NAME" in returned_entities
예제 #6
0
def test_when_get_recognizers_then_returns_supported_language():
    pattern = Pattern("rocket pattern", r"\W*(rocket)\W*", 0.8)
    pattern_recognizer = PatternRecognizer(
        "ROCKET",
        name="Rocket recognizer RU",
        patterns=[pattern],
        supported_language="ru",
    )
    mock_recognizer_registry = RecognizerRegistryMock()
    mock_recognizer_registry.add_recognizer(pattern_recognizer)
    analyze_engine = AnalyzerEngine(
        registry=mock_recognizer_registry,
        nlp_engine=NlpEngineMock(),
    )
    response = analyze_engine.get_recognizers(language="ru")
    # there is only 1 mocked russian recognizer
    assert len(response) == 1
예제 #7
0
def test_when_default_threshold_is_zero_then_all_results_pass(
        loaded_registry, unit_test_guid):
    text = " Credit card: 4095-2609-9393-4932,  my phone is 425 8829090"
    language = "en"
    entities = ["CREDIT_CARD", "PHONE_NUMBER"]

    # This analyzer engine is different from the global one, as this one
    # also loads SpaCy so it can detect the phone number entity

    analyzer_engine = AnalyzerEngine(registry=loaded_registry,
                                     nlp_engine=NlpEngineMock())
    results = analyzer_engine.analyze(
        correlation_id=unit_test_guid,
        text=text,
        entities=entities,
        language=language,
    )

    assert len(results) == 2
def test_get_recognizers_returns_supported_language():
    pattern = Pattern("rocket pattern", r"\W*(rocket)\W*", 0.8)
    pattern_recognizer = PatternRecognizer(
        "ROCKET",
        name="Rocket recognizer RU",
        patterns=[pattern],
        supported_language="ru",
    )

    recognizers_store_api_mock = RecognizerStoreApiMock()
    recognizers_store_api_mock.add_custom_pattern_recognizer(
        pattern_recognizer)
    analyze_engine = AnalyzerEngine(
        registry=MockRecognizerRegistry(recognizers_store_api_mock),
        nlp_engine=NlpEngineMock(),
    )
    request = RecognizersAllRequest(language="ru")
    response = analyze_engine.GetAllRecognizers(request, None)
    # there is only 1 mocked russian recognizer
    assert len(response) == 1
def test_get_recognizers_returns_added_custom():
    pattern = Pattern("rocket pattern", r"\W*(rocket)\W*", 0.8)
    pattern_recognizer = PatternRecognizer("ROCKET",
                                           name="Rocket recognizer",
                                           patterns=[pattern])

    recognizers_store_api_mock = RecognizerStoreApiMock()

    analyze_engine = AnalyzerEngine(
        registry=MockRecognizerRegistry(recognizers_store_api_mock),
        nlp_engine=NlpEngineMock(),
    )
    request = RecognizersAllRequest(language="en")
    response = analyze_engine.GetAllRecognizers(request, None)
    # there are 15 predefined recognizers
    assert len(response) == 15
    recognizers_store_api_mock.add_custom_pattern_recognizer(
        pattern_recognizer)
    response = analyze_engine.GetAllRecognizers(request, None)
    # there are 15 predefined recognizers and one custom
    assert len(response) == 16
def test_added_pattern_recognizer_works(unit_test_guid):
    pattern = Pattern("rocket pattern", r"\W*(rocket)\W*", 0.8)
    pattern_recognizer = PatternRecognizer("ROCKET",
                                           name="Rocket recognizer",
                                           patterns=[pattern])

    # Make sure the analyzer doesn't get this entity
    recognizers_store_api_mock = RecognizerStoreApiMock()
    analyze_engine = AnalyzerEngine(
        registry=MockRecognizerRegistry(recognizers_store_api_mock),
        nlp_engine=NlpEngineMock(),
    )
    text = "rocket is my favorite transportation"
    entities = ["CREDIT_CARD", "ROCKET"]

    results = analyze_engine.analyze(
        correlation_id=unit_test_guid,
        text=text,
        entities=entities,
        language="en",
        all_fields=False,
    )

    assert len(results) == 0

    # Add a new recognizer for the word "rocket" (case insensitive)
    recognizers_store_api_mock.add_custom_pattern_recognizer(
        pattern_recognizer)

    # Check that the entity is recognized:
    results = analyze_engine.analyze(
        correlation_id=unit_test_guid,
        text=text,
        entities=entities,
        language="en",
        all_fields=False,
    )

    assert len(results) == 1
    assert_result(results[0], "ROCKET", 0, 7, 0.8)
예제 #11
0
def test_when_analyze_added_pattern_recognizer_then_succeed(unit_test_guid):
    pattern = Pattern("rocket pattern", r"\W*(rocket)\W*", 0.8)
    pattern_recognizer = PatternRecognizer("ROCKET",
                                           name="Rocket recognizer",
                                           patterns=[pattern])

    mock_recognizer_registry = RecognizerRegistryMock()

    # Make sure the analyzer doesn't get this entity
    analyze_engine = AnalyzerEngine(
        registry=mock_recognizer_registry,
        nlp_engine=NlpEngineMock(),
    )
    text = "rocket is my favorite transportation"
    entities = ["CREDIT_CARD", "ROCKET"]

    results = analyze_engine.analyze(
        correlation_id=unit_test_guid,
        text=text,
        entities=entities,
        language="en",
    )

    assert len(results) == 0

    # Add a new recognizer for the word "rocket" (case insensitive)
    mock_recognizer_registry.add_recognizer(pattern_recognizer)

    # Check that the entity is recognized:
    results = analyze_engine.analyze(
        correlation_id=unit_test_guid,
        text=text,
        entities=entities,
        language="en",
    )

    assert len(results) == 1
    assert_result(results[0], "ROCKET", 0, 7, 0.8)