Example #1
0
def test_given_a_unsupported_language_for_supported_entities_then_expect_an_error(
):
    language_query_parameter = "language=he"

    response_status, response_content = analyzer_supported_entities(
        language_query_parameter)

    expected_response = """
       {"error": "No matching recognizers were found to serve the request."}
    """
    assert response_status == 500
    assert equal_json_strings(expected_response, response_content)
Example #2
0
def test_given_an_illegal_input_for_supported_entities_then_igonre_and_proceed(
):
    language_query_parameter = "uknown=input"

    response_status, response_content = analyzer_supported_entities(
        language_query_parameter)

    expected_response = """ 
        ["PHONE_NUMBER", "US_DRIVER_LICENSE", "US_PASSPORT", "SG_NRIC_FIN", "LOCATION", "CREDIT_CARD", 
         "CRYPTO", "UK_NHS", "US_SSN", "US_BANK_NUMBER", "EMAIL_ADDRESS", "DATE_TIME", "IP_ADDRESS",
          "PERSON", "IBAN_CODE", "NRP", "US_ITIN", "DOMAIN_NAME"]
    """
    assert response_status == 200
    assert equal_json_strings(expected_response, response_content)
def test_given_a_correct_analyze_input_with_high_threshold_and_unmatched_entities_then_anonymize_partially(
):
    language_query_parameter = "language=en"

    response_status, response_content = analyzer_supported_entities(
        language_query_parameter)

    assert response_status == 200
    suppotred_entities = json.loads(response_content)

    analyzer_request = {
        "text": "John Smith drivers license is AC432223",
        "language": "en",
        "score_threshold": 0.7,
        "entities": suppotred_entities,
    }

    expected_response = """
    [
        {"entity_type": "PERSON", "start": 0, "end": 10, "score": 0.85, "analysis_explanation": null}
    ]
    """

    analyzer_data = analyze_and_assert(analyzer_request, expected_response)

    anonymizer_request = {
        "text": analyzer_request["text"],
        "anonymizers": {
            "DEFAULT": {
                "type": "replace",
                "new_value": "ANONYMIZED"
            },
            "US_DRIVER_LICENSE": {
                "type": "mask",
                "masking_char": "*",
                "chars_to_mask": 4,
                "from_end": True,
            },
            "PERSON": {
                "type": "replace",
                "new_value": "<PERSON>"
            },
        },
        "analyzer_results": analyzer_data,
    }

    expected_response = """{"text": "<PERSON> drivers license is AC432223", "items": [{"operator": "replace", "entity_type": "PERSON", "start": 0, "end": 8, "text": "<PERSON>"}]}"""

    anonymize_and_assert(anonymizer_request, expected_response)