Ejemplo n.º 1
0
def test_given_anonymize_called_with_valid_request_then_expected_valid_response_returned(
):
    request_body = """
    {
        "text": "hello world, my name is Jane Doe. My number is: 034453334",
        "anonymizers": {
            "DEFAULT": { "type": "replace", "new_value": "ANONYMIZED" },
            "PHONE_NUMBER": { "type": "mask", "masking_char": "*", "chars_to_mask": 4, "from_end": true }
        },
        "analyzer_results": [
            { "start": 24, "end": 32, "score": 0.8, "entity_type": "NAME" },
            { "start": 24, "end": 28, "score": 0.8, "entity_type": "FIRST_NAME" },
            { "start": 29, "end": 32, "score": 0.6, "entity_type": "LAST_NAME" },
            { "start": 48, "end": 57,  "score": 0.95,
                "entity_type": "PHONE_NUMBER" }
        ]
    }
    """

    response_status, response_content = anonymize(request_body)

    expected_response = (
        """{"text": "hello world, my name is ANONYMIZED. My number is: 03445****", "items": [{"anonymizer": "mask", "entity_type": "PHONE_NUMBER", "start": 50, "end": 59, "anonymized_text": "03445****"}, {"anonymizer": "replace", "entity_type": "NAME", "start": 24, "end": 34, "anonymized_text": "ANONYMIZED"}]}"""
    )
    assert response_status == 200
    assert equal_json_strings(expected_response, response_content)
Ejemplo n.º 2
0
def test_given_decision_process_enabled_for_analyze_input_then_return_response_with_decision_process(
):
    request_body = """
    {
        "text": "John Smith drivers license is AC432223", "language": "en", "return_decision_process": true
    }
    """
    response_status, response_content = analyze(request_body)

    expected_response = """
    [
        {"entity_type": "PERSON", "start": 0, "end": 10, "score": 0.85, 
        "analysis_explanation": {
            "recognizer": "SpacyRecognizer", "pattern_name": null, "pattern": null, "original_score": 0.85, "score": 0.85, 
            "textual_explanation": "Identified as PERSON by Spacy's Named Entity Recognition", 
            "score_context_improvement": 0, "supportive_context_word": "", "validation_result": null 
            }
        },
        {"entity_type": "US_DRIVER_LICENSE", "start": 30, "end": 38, "score": 0.6499999999999999, 
        "analysis_explanation": {
            "recognizer": "UsLicenseRecognizer", "pattern_name": "Driver License - Alphanumeric (weak)", 
            "pattern": "\\\\b([A-Z][0-9]{3,6}|[A-Z][0-9]{5,9}|[A-Z][0-9]{6,8}|[A-Z][0-9]{4,8}|[A-Z][0-9]{9,11}|[A-Z]{1,2}[0-9]{5,6}|H[0-9]{8}|V[0-9]{6}|X[0-9]{8}|A-Z]{2}[0-9]{2,5}|[A-Z]{2}[0-9]{3,7}|[0-9]{2}[A-Z]{3}[0-9]{5,6}|[A-Z][0-9]{13,14}|[A-Z][0-9]{18}|[A-Z][0-9]{6}R|[A-Z][0-9]{9}|[A-Z][0-9]{1,12}|[0-9]{9}[A-Z]|[A-Z]{2}[0-9]{6}[A-Z]|[0-9]{8}[A-Z]{2}|[0-9]{3}[A-Z]{2}[0-9]{4}|[A-Z][0-9][A-Z][0-9][A-Z]|[0-9]{7,8}[A-Z])\\\\b", 
            "original_score": 0.3, "score": 0.6499999999999999, "textual_explanation": null, 
            "score_context_improvement": 0.3499999999999999, "supportive_context_word": "driver", "validation_result": null
            }
        }
    ]
    """
    assert response_status == 200
    assert equal_json_strings(expected_response, response_content)
Ejemplo n.º 3
0
def test_given_decrypt_called_with_invalid_key_then_invalid_input_response_returned(
):
    text = "e6HnOMnIxbd4a8Qea44LshQDnjvxwzBIaAz + YqHNnMW2mC5r3AWoay8Spsoajyyy"
    request_body = {
        "text":
        text,
        "deanonymizers": {
            "NUMBER": {
                "type": "decrypt",
                "key": "invalidkey"
            }
        },
        "anonymizer_results": [{
            "start": 0,
            "end": len(text),
            "entity_type": "NUMBER"
        }],
    }

    response_status, response_content = deanonymize(json.dumps(request_body))

    expected_response = """
    {
        "error": "Invalid input, key must be of length 128, 192 or 256 bits"
    }
    """

    assert response_status == 422
    assert equal_json_strings(expected_response, response_content)
Ejemplo n.º 4
0
def test_given_decrypt_called_with_encrypted_text_then_decrypted_text_returned(
):
    text = "e6HnOMnIxbd4a8Qea44LshQDnjvxwzBIaAz+YqHNnMW2mC5r3AWoay8Spsoajyyy"
    request_body = {
        "text":
        text,
        "deanonymizers": {
            "NUMBER": {
                "type": "decrypt",
                "key": "1111111111111111"
            }
        },
        "anonymizer_results": [{
            "start": 0,
            "end": len(text),
            "entity_type": "NUMBER"
        }],
    }

    response_status, response_content = deanonymize(json.dumps(request_body))

    expected_response = """{"text": "text_for_encryption", "items": [{"start": 0, "end": 19, "operator":"decrypt", "text": "text_for_encryption","entity_type":"NUMBER"}]}"""

    assert response_status == 200
    assert equal_json_strings(expected_response, response_content)
Ejemplo n.º 5
0
def test_given_ad_hoc_deny_list_recognizer_the_right_entities_are_returned():
    request_body = r"""
    {
        "text": "Mr. John Smith's drivers license is AC432223",
        "language": "en",
        "ad_hoc_recognizers":[
            {
            "name": "Mr. Recognizer",
            "supported_language": "en",
            "deny_list": ["Mr", "Mr.", "Mister"],
            "supported_entity":"MR_TITLE"
            },
            {
            "name": "Ms. Recognizer",
            "supported_language": "en",
            "deny_list": ["Ms", "Ms.", "Miss", "Mrs", "Mrs."],
            "supported_entity":"MS_TITLE"
            }
        ]
    }
     """

    response_status, response_content = analyze(request_body)

    expected_response = """
     [
         {"entity_type": "PERSON", "start": 4, "end": 14, "score": 0.85, "analysis_explanation":null},
         {"entity_type": "US_DRIVER_LICENSE", "start": 36, "end": 44, "score": 0.6499999999999999, "analysis_explanation":null},
         {"entity_type": "MR_TITLE", "start": 0, "end": 3, "score": 1.0, "analysis_explanation":null}
     ]
     """
    assert response_status == 200
    assert equal_json_strings(expected_response, response_content)
Ejemplo n.º 6
0
def test_given_ad_hoc_pattern_recognizer_context_raises_confidence():
    request_body = r"""
     {
         "text": "John Smith drivers license is AC432223. Zip code: 10023",
         "language": "en",
         "ad_hoc_recognizers":[
             {
                "name": "Zip code Recognizer",
                "supported_language": "en",
                "patterns": [
                    {
                    "name": "zip code (weak)", 
                    "regex": "(\\b\\d{5}(?:\\-\\d{4})?\\b)", 
                    "score": 0.01
                    }
                ],
                "context": ["zip", "code"],
                "supported_entity":"ZIP"
            }
        ]
     }
     """

    response_status, response_content = analyze(request_body)

    expected_response = """
     [
         {"entity_type": "PERSON", "start": 0, "end": 10, "score": 0.85, "analysis_explanation":null},
         {"entity_type": "US_DRIVER_LICENSE", "start": 30, "end": 38, "score": 0.6499999999999999, "analysis_explanation":null},
         {"entity_type": "ZIP", "start": 50, "end": 55, "score": 0.4, "analysis_explanation":null}
     ]
     """
    assert response_status == 200
    assert equal_json_strings(expected_response, response_content)
Ejemplo n.º 7
0
def test_given_wrong_ad_hoc_json_exception_is_given():
    malformed_request_body = r"""
      {
          "text": "John Smith drivers license is AC432223. Zip code: 10023",
          "language": "en",
          "ad_hoc_recognizers":[
              {
                 "name": "Zip code Recognizer",
                 "supported_language": "en",
                 "patterns": [
                     {
                     "type": "zip code (weak)", 
                     "bebex": "(\\b\\d{5}(?:\\-\\d{4})?\\b)", 
                     "confidence": 0.01
                     }
                 ],
                 "supported_entity":"ZIP"
             }
         ]
      }
      """
    response_status, response_content = analyze(malformed_request_body)

    expected_response = """
    {
        "error":"Failed to parse /analyze request for AnalyzerEngine.analyze(). __init__() got an unexpected keyword argument \'type\'"
    }
    """

    assert equal_json_strings(expected_response, response_content)
    assert response_status == 400
Ejemplo n.º 8
0
def test_given_no_image_then_we_fail():
    # black redact
    expected_response = """
        {"error": "Invalid parameter, please add image data"}
    """
    response = redact("")
    assert response.status_code == 422
    assert equal_json_strings(response.content.decode(), expected_response)
Ejemplo n.º 9
0
def test_given_anonymizers_called_then_expected_anonymizers_list_returned():
    response_status, response_content = anonymizers()

    expected_response = """
        ["hash", "mask", "redact", "replace", "encrypt", "custom"]
    """

    assert response_status == 200
    assert equal_json_strings(expected_response, response_content)
Ejemplo n.º 10
0
def test_given_no_analyze_text_input_then_return_error():
    request_body = "{}"

    response_status, response_content = analyze(request_body)

    expected_response = """
        {"error": "No text provided"}
    """
    assert response_status == 500
    assert equal_json_strings(expected_response, response_content)
Ejemplo n.º 11
0
def test_given_a_unsupported_language_for_supported_entities_then_expect_an_error(
):
    language_query_parameter = "language=he"

    response_status, response_content = analyzer_supported_entities(
        language_query_parameter)

    expected_response = """
       {"error": "No matching recognizers were found to serve the request."}
    """
    assert response_status == 500
    assert equal_json_strings(expected_response, response_content)
Ejemplo n.º 12
0
def test_given_decrypt_called_with_missing_key_then_invalid_input_response_returned(
):
    request_body = """
    {
        "text": "e6HnOMnIxbd4a8Qea44LshQDnjvxwzBIaAz+YqHNnMW2mC5r3AWoay8Spsoajyyy"
    }
    """

    response_status, response_content = deanonymize(request_body)

    expected_response = """{"text": "e6HnOMnIxbd4a8Qea44LshQDnjvxwzBIaAz+YqHNnMW2mC5r3AWoay8Spsoajyyy", "items": []}"""
    assert response_status == 200
    assert equal_json_strings(expected_response, response_content)
Ejemplo n.º 13
0
def test_given_a_incorrect_analyze_language_input_then_return_error():
    request_body = """
    {
        "text": "John Smith drivers license is AC432223", "language": "zz"
    }
    """

    response_status, response_content = analyze(request_body)

    assert response_status == 500
    expected_response = """ 
         {"error": "No matching recognizers were found to serve the request."}
    """
    assert equal_json_strings(expected_response, response_content)
Ejemplo n.º 14
0
def test_given_analyze_text_no_language_input_then_return_error():
    request_body = """
    {
        "text": "John Smith drivers license is AC432223"
    }
    """

    response_status, response_content = analyze(request_body)

    expected_response = """ 
        {"error": "No language provided"} 
    """
    assert response_status == 500
    assert equal_json_strings(expected_response, response_content)
Ejemplo n.º 15
0
def test_given_an_illegal_input_for_supported_entities_then_igonre_and_proceed(
):
    language_query_parameter = "uknown=input"

    response_status, response_content = analyzer_supported_entities(
        language_query_parameter)

    expected_response = """ 
        ["PHONE_NUMBER", "US_DRIVER_LICENSE", "US_PASSPORT", "SG_NRIC_FIN", "LOCATION", "CREDIT_CARD", 
         "CRYPTO", "UK_NHS", "US_SSN", "US_BANK_NUMBER", "EMAIL_ADDRESS", "DATE_TIME", "IP_ADDRESS",
          "PERSON", "IBAN_CODE", "NRP", "US_ITIN", "DOMAIN_NAME"]
    """
    assert response_status == 200
    assert equal_json_strings(expected_response, response_content)
Ejemplo n.º 16
0
def test_given_decrypt_called_with_missing_payload_then_bad_request_response_returned(
):
    request_body = """
    { }
    """

    response_status, response_content = deanonymize(request_body)

    expected_response = """
    {
        "error": "Invalid request json"
    }
    """

    assert response_status == 400
    assert equal_json_strings(expected_response, response_content)
Ejemplo n.º 17
0
def test_given_anonymize_called_with_custom_then_bad_request_error_returned():
    request_body = """
    {
        "text": "The user has the following two emails: [email protected] and [email protected]",
        "anonymizers": {
            "DEFAULT": { "type": "custom", "new_value": "lambda x:  x[::-1]" }            
        },
        "analyzer_results": [
            { "start": 39, "end": 55, "score": 1.0, "entity_type": "EMAIL_ADDRESS" },
            { "start": 60, "end": 76, "score": 1.0, "entity_type": "EMAIL_ADDRESS" }
        ]
    }
    """
    response_status, response_content = anonymize(request_body)

    expected_response = '{"error": "Custom type anonymizer is not supported"}'
    assert response_status == 400
    assert equal_json_strings(expected_response, response_content)
Ejemplo n.º 18
0
def test_given_decrypt_called_with_missing_text_then_invalid_input_response_returned(
):
    request_body = """
    {
        "key": "1111111111111111"
    }
    """

    response_status, response_content = deanonymize(request_body)

    expected_response = """
    {
        "error": "Invalid input, text can not be empty"
    }
    """

    assert response_status == 422
    assert equal_json_strings(expected_response, response_content)
Ejemplo n.º 19
0
def test_given_a_correct_analyze_input_then_return_full_response():
    request_body = """
    {
        "text": "John Smith drivers license is AC432223",
        "language": "en"
    }
    """

    response_status, response_content = analyze(request_body)

    expected_response = """
    [
        {"entity_type": "PERSON", "start": 0, "end": 10, "score": 0.85, "analysis_explanation":null},
        {"entity_type": "US_DRIVER_LICENSE", "start": 30, "end": 38, "score": 0.6499999999999999, "analysis_explanation":null}
    ]
    """
    assert response_status == 200
    assert equal_json_strings(expected_response, response_content)
Ejemplo n.º 20
0
def test_given_anonymize_called_with_deformed_body_then_internal_server_error_returned(
):
    request_body = """
    {
        "text": "hello world, my name is Jane Doe. My number is: 034453334",
        "anonymizers": {
            "DEFAULT": {"type": "replace", "new_value": "ANONYMIZED"},
        },
        "analyzer_results": [
            {"start": 24, "end": 32, "score": 0.8, "entity_type": "NAME"},
            {"start": 24, "end": 28, "score": 0.8, "entity_type": "FIRST_NAME"},
        ]
    }
    """
    response_status, response_content = anonymize(request_body)

    expected_response = '{"error": "Internal server error"}'
    assert response_status == 500
    assert equal_json_strings(expected_response, response_content)
Ejemplo n.º 21
0
def test_given_anonymize_called_with_empty_analyzer_results_then_unchanged_text_is_returned(
):
    request_body = """
    {
        "text": "hello world! nice to meet you!",
        "anonymizers": {
            "DEFAULT": { "type": "replace", "new_value": "ANONYMIZED" },
            "PHONE_NUMBER": { "type": "mask", "masking_char": "*", "chars_to_mask": 4, "from_end": true }
        },
        "analyzer_results": [

        ]
    }
    """
    response_status, response_content = anonymize(request_body)

    expected_response = """{"text": "hello world! nice to meet you!", "items": []}"""
    assert response_status == 200
    assert equal_json_strings(expected_response, response_content)
Ejemplo n.º 22
0
def test_given_anonymize_called_with_empty_text_then_invalid_input_message_returned(
):
    request_body = """
    {
        "text": "",
        "anonymizers": {
            "DEFAULT": { "type": "replace", "new_value": "ANONYMIZED" }
        },
        "analyzer_results": [
            { "start": 24, "end": 32, "score": 0.8, "entity_type": "NAME" }
        ]
    }
    """

    response_status, response_content = anonymize(request_body)

    expected_response = '{"error": "Invalid input, text can not be empty"}'
    assert response_status == 422
    assert equal_json_strings(expected_response, response_content)
Ejemplo n.º 23
0
def test_given_decrypt_called_with_missing_text_then_invalid_input_response_returned(
):

    request_body = """
    {
        "key": "1111111111111111"
    }
    """

    response_status, response_content = decrypt(request_body)

    expected_response = """
    {
        "error": "Expected parameter text"
    }
    """

    assert response_status == 422
    assert equal_json_strings(expected_response, response_content)
Ejemplo n.º 24
0
def test_given_analyze_threshold_input_then_return_result_above_threshold():
    request_body = """
    {
        "text": "John Smith drivers license is AC432223", 
        "language": "en", "score_threshold": 0.7
    }
    """

    response_status, response_content = analyze(request_body)

    expected_response = """
    [
        {"entity_type": "PERSON", "start": 0, "end": 10, "score": 0.85, 
        "analysis_explanation": null
        }
    ]
    """
    assert response_status == 200
    assert equal_json_strings(expected_response, response_content)
Ejemplo n.º 25
0
def test_given_decrypt_called_with_invalid_key_then_invalid_input_response_returned(
):

    request_body = """
    {
        "key": "invalidkey",
        "text": "e6HnOMnIxbd4a8Qea44LshQDnjvxwzBIaAz+YqHNnMW2mC5r3AWoay8Spsoajyyy"
    }
    """

    response_status, response_content = decrypt(request_body)

    expected_response = """
    {
        "error": "Invalid input, key must be of length 128, 192 or 256 bits"
    }
    """

    assert response_status == 422
    assert equal_json_strings(expected_response, response_content)
Ejemplo n.º 26
0
def test_given_decrypt_called_with_encrypted_text_then_decrypted_text_returned(
):

    request_body = """
    {
        "key": "1111111111111111",
        "text": "e6HnOMnIxbd4a8Qea44LshQDnjvxwzBIaAz+YqHNnMW2mC5r3AWoay8Spsoajyyy"
    }
    """

    response_status, response_content = decrypt(request_body)

    expected_response = """
    {
        "result": "text_for_encryption"
    }
    """

    assert response_status == 200
    assert equal_json_strings(expected_response, response_content)
def anonymize_and_assert(anonymizer_request, expected_response):
    response_status, response_content = anonymize(json.dumps(anonymizer_request))
    assert response_status == 200
    assert equal_json_strings(expected_response, response_content)
def analyze_and_assert(analyzer_request, expected_response):
    response_status, response_content = analyze(json.dumps(analyzer_request))
    assert response_status == 200
    assert equal_json_strings(expected_response, response_content)
    analyzer_data = json.loads(response_content)
    return analyzer_data