コード例 #1
0
    def test_redact_with_pii_and_only_redaction(self):
        comprehend_client = MagicMock()

        comprehend_client.contains_pii_entities.return_value = [
            Document(text="Some Random text", pii_classification={'SSN': 0.53})
        ]
        comprehend_client.detect_pii_documents.return_value = [
            Document(text="Some Random text",
                     pii_classification={'SSN': 0.53},
                     pii_entities=[{
                         'Score': 0.534,
                         'Type': 'SSN',
                         'BeginOffset': 0,
                         'EndOffset': 4
                     }])
        ]

        document = redact("Some Random text",
                          Segmenter(DOCUMENT_MAX_SIZE_CONTAINS_PII_ENTITIES),
                          Segmenter(DOCUMENT_MAX_SIZE_DETECT_PII_ENTITIES),
                          Redactor(RedactionConfig()), comprehend_client,
                          RedactionConfig(), DEFAULT_LANGUAGE_CODE)
        comprehend_client.contains_pii_entities.assert_not_called()
        comprehend_client.detect_pii_documents.assert_called_once()
        assert document.redacted_text == "**** Random text"
コード例 #2
0
 def test_get_interested_pii_true(self):
     assert len(
         get_interested_pii(
             Document(text="Some Random text",
                      pii_classification={'SSN': 0.534}),
             RedactionConfig())) > 0
     assert len(
         get_interested_pii(
             Document(text="Some Random text",
                      pii_classification={'SSN': 0.734}),
             RedactionConfig(pii_entity_types=['SSN'],
                             confidence_threshold=0.7))) > 0
コード例 #3
0
    def test_redact_with_no_pii_and_classification(self):
        comprehend_client = MagicMock()

        comprehend_client.contains_pii_entities.return_value = [
            Document(text="Some Random text", pii_classification={})
        ]
        document = redact("Some Random text",
                          Segmenter(DOCUMENT_MAX_SIZE_CONTAINS_PII_ENTITIES),
                          Segmenter(DOCUMENT_MAX_SIZE_DETECT_PII_ENTITIES),
                          Redactor(RedactionConfig()), comprehend_client,
                          RedactionConfig(), DEFAULT_LANGUAGE_CODE)
        comprehend_client.contains_pii_entities.assert_called_once()
        comprehend_client.detect_pii_documents.assert_not_called()
        assert document.redacted_text == "Some Random text"
def redact_pii_documents_handler(event, context):
    """Redaction Lambda function handler."""
    LOG.info('Received event with requestId: %s', event[REQUEST_ID])
    LOG.debug(f'Raw event {event}')

    InputEventValidator.validate(event)
    invoke_args = json.loads(event[S3OL_CONFIGURATION][PAYLOAD]) if event[S3OL_CONFIGURATION][PAYLOAD] else {}
    language_code = invoke_args.get(LANGUAGE_CODE, DEFAULT_LANGUAGE_CODE)
    redaction_config = RedactionConfig(**invoke_args)
    object_get_context = event[GET_OBJECT_CONTEXT]
    s3ol_access_point = event[S3OL_CONFIGURATION][S3OL_ACCESS_POINT_ARN]
    s3 = S3Client(s3ol_access_point)
    cloud_watch = CloudWatchClient()
    comprehend = ComprehendClient(s3ol_access_point=s3ol_access_point, session_id=event[REQUEST_ID], user_agent=DEFAULT_USER_AGENT,
                                  endpoint_url=COMPREHEND_ENDPOINT_URL)

    exception_handler = ExceptionHandler(s3)

    LOG.debug("Pii Entity Types to be redacted:" + str(redaction_config.pii_entity_types))
    processed_document = False
    document = Document('')

    try:
        def time_bound_task():
            nonlocal processed_document
            nonlocal document
            PartialObjectRequestValidator.validate(event)
            pii_classification_segmenter = Segmenter(DOCUMENT_MAX_SIZE_CONTAINS_PII_ENTITIES)
            pii_redaction_segmenter = Segmenter(DOCUMENT_MAX_SIZE_DETECT_PII_ENTITIES)
            redactor = Redactor(redaction_config)
            time1 = time.time()
            text, http_headers, status_code = s3.download_file_from_presigned_url(object_get_context[INPUT_S3_URL],
                                                                                  event[USER_REQUEST][HEADERS])
            time2 = time.time()
            LOG.info(f"Downloaded the file in : {(time2 - time1)} seconds")
            document = redact(text, pii_classification_segmenter, pii_redaction_segmenter, redactor,
                              comprehend, redaction_config, language_code)
            processed_document = True
            time1 = time.time()
            LOG.info(f"Pii redaction completed within {(time1 - time2)} seconds. Returning back the response to S3")
            redacted_text_bytes = document.redacted_text.encode('utf-8')
            http_headers[CONTENT_LENGTH] = len(redacted_text_bytes)
            s3.respond_back_with_data(redacted_text_bytes, http_headers, object_get_context[REQUEST_ROUTE],
                                      object_get_context[REQUEST_TOKEN], status_code)

        execute_task_with_timeout(context.get_remaining_time_in_millis() - RESERVED_TIME_FOR_CLEANUP, time_bound_task)
    except Exception as generated_exception:
        exception_handler.handle_exception(generated_exception, object_get_context[REQUEST_ROUTE], object_get_context[REQUEST_TOKEN])
    finally:
        if PUBLISH_CLOUD_WATCH_METRICS:
            pii_entities = get_interested_pii(document, redaction_config)
            publish_metrics(cloud_watch, s3, comprehend, processed_document, len(pii_entities) > 0, language_code,
                            s3ol_access_point, pii_entities)

    LOG.info("Responded back to s3 successfully")
コード例 #5
0
 def test_redaction_default_redaction_config(self):
     text = "Hello Zhang Wei. Your AnyCompany Financial Services, LLC credit card account 1111-0000-1111-0000 has a minimum payment of $24.53"
     redactor = Redactor(RedactionConfig())
     redacted_text = redactor.redact(text, [{
         'Score': 0.234,
         'Type': 'NAME',
         'BeginOffset': 6,
         'EndOffset': 16
     }, {
         'Score': 0.765,
         'Type': 'CREDIT_DEBIT_NUMBER',
         'BeginOffset': 77,
         'EndOffset': 96
     }])
     expected_redaction = "Hello Zhang Wei. Your AnyCompany Financial Services, LLC credit card account ******************* has a minimum payment of $24.53"
     assert expected_redaction == redacted_text
コード例 #6
0
 def test_redaction_with_replace_entity_type(self):
     text = "Hello Zhang Wei. Your AnyCompany Financial Services, LLC credit card account 1111-0000-1111-0000 has a minimum payment of $24.53"
     redactor = Redactor(
         RedactionConfig(pii_entity_types=['NAME'],
                         mask_mode=REPLACE_WITH_PII_ENTITY_TYPE,
                         confidence_threshold=0.6))
     redacted_text = redactor.redact(text, [{
         'Score': 0.634,
         'Type': 'NAME',
         'BeginOffset': 6,
         'EndOffset': 15
     }, {
         'Score': 0.765,
         'Type': 'CREDIT_DEBIT_NUMBER',
         'BeginOffset': 77,
         'EndOffset': 96
     }])
     expected_redaction = "Hello [NAME]. Your AnyCompany Financial Services, LLC credit card account 1111-0000-1111-0000 has a minimum payment of $24.53"
     assert expected_redaction == redacted_text
コード例 #7
0
 def test_redaction_with_no_entities(self):
     text = "Hello Zhang Wei. Your AnyCompany Financial Services, LLC credit card account 1111-0000-1111-0000 has a minimum payment of $24.53"
     redactor = Redactor(RedactionConfig())
     redacted_text = redactor.redact(text, [])
     assert text == redacted_text