Beispiel #1
0
def test_given_ad_hoc_deny_list_recognizer_the_right_entities_are_returned():
    request_body = r"""
    {
        "text": "Mr. John Smith's drivers license is AC432223",
        "language": "en",
        "ad_hoc_recognizers":[
            {
            "name": "Mr. Recognizer",
            "supported_language": "en",
            "deny_list": ["Mr", "Mr.", "Mister"],
            "supported_entity":"MR_TITLE"
            },
            {
            "name": "Ms. Recognizer",
            "supported_language": "en",
            "deny_list": ["Ms", "Ms.", "Miss", "Mrs", "Mrs."],
            "supported_entity":"MS_TITLE"
            }
        ]
    }
     """

    response_status, response_content = analyze(request_body)

    expected_response = """
     [
         {"entity_type": "PERSON", "start": 4, "end": 14, "score": 0.85, "analysis_explanation":null},
         {"entity_type": "US_DRIVER_LICENSE", "start": 36, "end": 44, "score": 0.6499999999999999, "analysis_explanation":null},
         {"entity_type": "MR_TITLE", "start": 0, "end": 3, "score": 1.0, "analysis_explanation":null}
     ]
     """
    assert response_status == 200
    assert equal_json_strings(expected_response, response_content)
Beispiel #2
0
def test_given_ad_hoc_pattern_recognizer_context_raises_confidence():
    request_body = r"""
     {
         "text": "John Smith drivers license is AC432223. Zip code: 10023",
         "language": "en",
         "ad_hoc_recognizers":[
             {
                "name": "Zip code Recognizer",
                "supported_language": "en",
                "patterns": [
                    {
                    "name": "zip code (weak)", 
                    "regex": "(\\b\\d{5}(?:\\-\\d{4})?\\b)", 
                    "score": 0.01
                    }
                ],
                "context": ["zip", "code"],
                "supported_entity":"ZIP"
            }
        ]
     }
     """

    response_status, response_content = analyze(request_body)

    expected_response = """
     [
         {"entity_type": "PERSON", "start": 0, "end": 10, "score": 0.85, "analysis_explanation":null},
         {"entity_type": "US_DRIVER_LICENSE", "start": 30, "end": 38, "score": 0.6499999999999999, "analysis_explanation":null},
         {"entity_type": "ZIP", "start": 50, "end": 55, "score": 0.4, "analysis_explanation":null}
     ]
     """
    assert response_status == 200
    assert equal_json_strings(expected_response, response_content)
Beispiel #3
0
def test_given_wrong_ad_hoc_json_exception_is_given():
    malformed_request_body = r"""
      {
          "text": "John Smith drivers license is AC432223. Zip code: 10023",
          "language": "en",
          "ad_hoc_recognizers":[
              {
                 "name": "Zip code Recognizer",
                 "supported_language": "en",
                 "patterns": [
                     {
                     "type": "zip code (weak)", 
                     "bebex": "(\\b\\d{5}(?:\\-\\d{4})?\\b)", 
                     "confidence": 0.01
                     }
                 ],
                 "supported_entity":"ZIP"
             }
         ]
      }
      """
    response_status, response_content = analyze(malformed_request_body)

    expected_response = """
    {
        "error":"Failed to parse /analyze request for AnalyzerEngine.analyze(). __init__() got an unexpected keyword argument \'type\'"
    }
    """

    assert equal_json_strings(expected_response, response_content)
    assert response_status == 400
Beispiel #4
0
def test_given_decision_process_enabled_for_analyze_input_then_return_response_with_decision_process(
):
    request_body = """
    {
        "text": "John Smith drivers license is AC432223", "language": "en", "return_decision_process": true
    }
    """
    response_status, response_content = analyze(request_body)

    expected_response = """
    [
        {"entity_type": "PERSON", "start": 0, "end": 10, "score": 0.85, 
        "analysis_explanation": {
            "recognizer": "SpacyRecognizer", "pattern_name": null, "pattern": null, "original_score": 0.85, "score": 0.85, 
            "textual_explanation": "Identified as PERSON by Spacy's Named Entity Recognition", 
            "score_context_improvement": 0, "supportive_context_word": "", "validation_result": null 
            }
        },
        {"entity_type": "US_DRIVER_LICENSE", "start": 30, "end": 38, "score": 0.6499999999999999, 
        "analysis_explanation": {
            "recognizer": "UsLicenseRecognizer", "pattern_name": "Driver License - Alphanumeric (weak)", 
            "pattern": "\\\\b([A-Z][0-9]{3,6}|[A-Z][0-9]{5,9}|[A-Z][0-9]{6,8}|[A-Z][0-9]{4,8}|[A-Z][0-9]{9,11}|[A-Z]{1,2}[0-9]{5,6}|H[0-9]{8}|V[0-9]{6}|X[0-9]{8}|A-Z]{2}[0-9]{2,5}|[A-Z]{2}[0-9]{3,7}|[0-9]{2}[A-Z]{3}[0-9]{5,6}|[A-Z][0-9]{13,14}|[A-Z][0-9]{18}|[A-Z][0-9]{6}R|[A-Z][0-9]{9}|[A-Z][0-9]{1,12}|[0-9]{9}[A-Z]|[A-Z]{2}[0-9]{6}[A-Z]|[0-9]{8}[A-Z]{2}|[0-9]{3}[A-Z]{2}[0-9]{4}|[A-Z][0-9][A-Z][0-9][A-Z]|[0-9]{7,8}[A-Z])\\\\b", 
            "original_score": 0.3, "score": 0.6499999999999999, "textual_explanation": null, 
            "score_context_improvement": 0.3499999999999999, "supportive_context_word": "driver", "validation_result": null
            }
        }
    ]
    """
    assert response_status == 200
    assert equal_json_strings(expected_response, response_content)
Beispiel #5
0
def test_given_no_analyze_text_input_then_return_error():
    request_body = "{}"

    response_status, response_content = analyze(request_body)

    expected_response = """
        {"error": "No text provided"}
    """
    assert response_status == 500
    assert equal_json_strings(expected_response, response_content)
Beispiel #6
0
def test_given_a_trace_true_analyze_input_then_return_normal_response():
    request_body = """
    {
        "text": "John Smith drivers license is AC432223", 
        "language": "en", "trace": "1"
    }
    """

    response_status, response_content = analyze(request_body)

    assert response_status == 200
Beispiel #7
0
def test_demo_website_text_returns_correct_anonymized_version():
    # Analyzer request info

    dir_path = Path(__file__).resolve().parent.parent
    with open(Path(dir_path, "resources", "demo.txt"), encoding="utf-8") as f:
        text_into_rows = f.read().split("\n")

    text_into_rows = [txt.strip() for txt in text_into_rows]
    text = " ".join(text_into_rows)
    language = "en"
    score_threshold = 0.35

    analyzer_request = {
        "text": text,
        "language": language,
        "score_threshold": score_threshold,
    }

    # Call analyzer

    analyzer_status_code, analyzer_content = analyze(json.dumps(analyzer_request))

    analyzer_data = json.loads(analyzer_content)

    # Anonymizer request info

    anonymizer_request = {
        "text": analyzer_request["text"],
        "analyzer_results": analyzer_data,
    }

    # Call anonymizer

    anonymizer_status_code, anonymizer_response = anonymize(
        json.dumps(anonymizer_request)
    )

    anonymizer_response_dict = json.loads(anonymizer_response)
    actual_anonymized_text = anonymizer_response_dict["text"]

    # Expected output:

    with open(
            Path(dir_path, "resources", "demo_anonymized.txt"), encoding="utf-8"
    ) as f_exp:
        text_into_rows = f_exp.read().split("\n")

    text_into_rows = [txt.strip() for txt in text_into_rows]
    expected_anonymized_text = " ".join(text_into_rows)

    # Assert equal

    assert expected_anonymized_text == actual_anonymized_text
Beispiel #8
0
def test_given_a_incorrect_analyze_language_input_then_return_error():
    request_body = """
    {
        "text": "John Smith drivers license is AC432223", "language": "zz"
    }
    """

    response_status, response_content = analyze(request_body)

    assert response_status == 500
    expected_response = """ 
         {"error": "No matching recognizers were found to serve the request."}
    """
    assert equal_json_strings(expected_response, response_content)
Beispiel #9
0
def test_given_analyze_text_no_language_input_then_return_error():
    request_body = """
    {
        "text": "John Smith drivers license is AC432223"
    }
    """

    response_status, response_content = analyze(request_body)

    expected_response = """ 
        {"error": "No language provided"} 
    """
    assert response_status == 500
    assert equal_json_strings(expected_response, response_content)
Beispiel #10
0
def test_given_a_correct_analyze_input_then_return_full_response():
    request_body = """
    {
        "text": "John Smith drivers license is AC432223",
        "language": "en"
    }
    """

    response_status, response_content = analyze(request_body)

    expected_response = """
    [
        {"entity_type": "PERSON", "start": 0, "end": 10, "score": 0.85, "analysis_explanation":null},
        {"entity_type": "US_DRIVER_LICENSE", "start": 30, "end": 38, "score": 0.6499999999999999, "analysis_explanation":null}
    ]
    """
    assert response_status == 200
    assert equal_json_strings(expected_response, response_content)
Beispiel #11
0
def test_given_analyze_threshold_input_then_return_result_above_threshold():
    request_body = """
    {
        "text": "John Smith drivers license is AC432223", 
        "language": "en", "score_threshold": 0.7
    }
    """

    response_status, response_content = analyze(request_body)

    expected_response = """
    [
        {"entity_type": "PERSON", "start": 0, "end": 10, "score": 0.85, 
        "analysis_explanation": null
        }
    ]
    """
    assert response_status == 200
    assert equal_json_strings(expected_response, response_content)
def analyze_and_assert(analyzer_request, expected_response):
    response_status, response_content = analyze(json.dumps(analyzer_request))
    assert response_status == 200
    assert equal_json_strings(expected_response, response_content)
    analyzer_data = json.loads(response_content)
    return analyzer_data