def test_s3_client_download_file_from_presigned_retry(self, mocked_get):
        s3_client = S3Client(s3ol_access_point="Random_access_point")
        self.assertRaises(S3DownloadException,
                          s3_client.download_file_from_presigned_url,
                          PRESIGNED_URL_TEST, {})

        assert mocked_get.call_count == 5
    def test_s3_client_download_file_from_presigned_unicode_decode_error_error(
            self, mocked_get):
        s3_client = S3Client(s3ol_access_point="Random_access_point")
        self.assertRaises(UnsupportedFileException,
                          s3_client.download_file_from_presigned_url,
                          PRESIGNED_URL_TEST, {})

        mocked_get.assert_called_once()
    def test_s3_client_download_file_from_presigned_url_access_denied(
            self, mocked_get):
        s3_client = S3Client(s3ol_access_point="Random_access_point")
        self.assertRaises(S3DownloadException,
                          s3_client.download_file_from_presigned_url,
                          PRESIGNED_URL_TEST, {})

        mocked_get.assert_called_once()
def redact_pii_documents_handler(event, context):
    """Redaction Lambda function handler."""
    LOG.info('Received event with requestId: %s', event[REQUEST_ID])
    LOG.debug(f'Raw event {event}')

    InputEventValidator.validate(event)
    invoke_args = json.loads(event[S3OL_CONFIGURATION][PAYLOAD]) if event[S3OL_CONFIGURATION][PAYLOAD] else {}
    language_code = invoke_args.get(LANGUAGE_CODE, DEFAULT_LANGUAGE_CODE)
    redaction_config = RedactionConfig(**invoke_args)
    object_get_context = event[GET_OBJECT_CONTEXT]
    s3ol_access_point = event[S3OL_CONFIGURATION][S3OL_ACCESS_POINT_ARN]
    s3 = S3Client(s3ol_access_point)
    cloud_watch = CloudWatchClient()
    comprehend = ComprehendClient(s3ol_access_point=s3ol_access_point, session_id=event[REQUEST_ID], user_agent=DEFAULT_USER_AGENT,
                                  endpoint_url=COMPREHEND_ENDPOINT_URL)

    exception_handler = ExceptionHandler(s3)

    LOG.debug("Pii Entity Types to be redacted:" + str(redaction_config.pii_entity_types))
    processed_document = False
    document = Document('')

    try:
        def time_bound_task():
            nonlocal processed_document
            nonlocal document
            PartialObjectRequestValidator.validate(event)
            pii_classification_segmenter = Segmenter(DOCUMENT_MAX_SIZE_CONTAINS_PII_ENTITIES)
            pii_redaction_segmenter = Segmenter(DOCUMENT_MAX_SIZE_DETECT_PII_ENTITIES)
            redactor = Redactor(redaction_config)
            time1 = time.time()
            text, http_headers, status_code = s3.download_file_from_presigned_url(object_get_context[INPUT_S3_URL],
                                                                                  event[USER_REQUEST][HEADERS])
            time2 = time.time()
            LOG.info(f"Downloaded the file in : {(time2 - time1)} seconds")
            document = redact(text, pii_classification_segmenter, pii_redaction_segmenter, redactor,
                              comprehend, redaction_config, language_code)
            processed_document = True
            time1 = time.time()
            LOG.info(f"Pii redaction completed within {(time1 - time2)} seconds. Returning back the response to S3")
            redacted_text_bytes = document.redacted_text.encode('utf-8')
            http_headers[CONTENT_LENGTH] = len(redacted_text_bytes)
            s3.respond_back_with_data(redacted_text_bytes, http_headers, object_get_context[REQUEST_ROUTE],
                                      object_get_context[REQUEST_TOKEN], status_code)

        execute_task_with_timeout(context.get_remaining_time_in_millis() - RESERVED_TIME_FOR_CLEANUP, time_bound_task)
    except Exception as generated_exception:
        exception_handler.handle_exception(generated_exception, object_get_context[REQUEST_ROUTE], object_get_context[REQUEST_TOKEN])
    finally:
        if PUBLISH_CLOUD_WATCH_METRICS:
            pii_entities = get_interested_pii(document, redaction_config)
            publish_metrics(cloud_watch, s3, comprehend, processed_document, len(pii_entities) > 0, language_code,
                            s3ol_access_point, pii_entities)

    LOG.info("Responded back to s3 successfully")
 def test_s3_client_download_file_from_presigned_url_200_ok(
         self, mocked_get):
     s3_client = S3Client(s3ol_access_point="Random_access_point")
     http_header = {'some-header': 'header-value'}
     text, response_http_headers, status_code = s3_client.download_file_from_presigned_url(
         PRESIGNED_URL_TEST, http_header)
     assert text == 'Test'
     assert response_http_headers == {'Content-Length': '4'}
     assert status_code == S3_STATUS_CODES.OK_200
     mocked_get.assert_called_with(PRESIGNED_URL_TEST,
                                   timeout=10,
                                   headers=http_header)
    def test_s3_client_respond_back_with_error(self, mocked_boto3):
        mocked_client = MagicMock()
        mocked_boto3.client.return_value = mocked_client
        s3_client = S3Client(s3ol_access_point="Random_access_point")
        s3_client.respond_back_with_error(
            status_code=S3_STATUS_CODES.PRECONDITION_FAILED_412,
            error_code=S3_ERROR_CODES.PreconditionFailed,
            error_message="Some Error",
            request_route="Route",
            request_token="q2334")

        mocked_client.write_get_object_response.assert_called_once_with(
            StatusCode=412,
            ErrorCode='PreconditionFailed',
            ErrorMessage="Some Error",
            RequestRoute='Route',
            RequestToken="q2334")
    def test_s3_client_respond_back_with_data_partial_data(self, mocked_boto3):
        mocked_client = MagicMock()
        mocked_boto3.client.return_value = mocked_client
        s3_client = S3Client(s3ol_access_point="Random_access_point")
        s3_client.respond_back_with_data(
            data='SomeData',
            headers={
                "Content-Range": "0-1200",
                "SomeRandomHeader": '0123'
            },
            request_route="Route",
            request_token="q2334",
            status_code=S3_STATUS_CODES.PARTIAL_CONTENT_206)

        mocked_client.write_get_object_response.assert_called_once_with(
            Body='SomeData',
            ContentRange="0-1200",
            RequestRoute='Route',
            RequestToken="q2334",
            StatusCode=206)
    def test_s3_client_respond_back_with_data_default_status_code(
            self, mocked_boto3):
        mocked_client = MagicMock()
        mocked_boto3.client.return_value = mocked_client
        s3_client = S3Client(s3ol_access_point="Random_access_point")
        s3_client.respond_back_with_data(data='SomeData',
                                         headers={
                                             "ContentRange": "0-100",
                                             "SomeRandomHeader": '0123',
                                             "Content-Length": "101"
                                         },
                                         request_route="Route",
                                         request_token="q2334")

        mocked_client.write_get_object_response.assert_called_once_with(
            Body='SomeData',
            ContentLength=101,
            RequestRoute='Route',
            RequestToken="q2334",
            StatusCode=200)