def test_s3_client_download_file_from_presigned_retry(self, mocked_get): s3_client = S3Client(s3ol_access_point="Random_access_point") self.assertRaises(S3DownloadException, s3_client.download_file_from_presigned_url, PRESIGNED_URL_TEST, {}) assert mocked_get.call_count == 5
def test_s3_client_download_file_from_presigned_unicode_decode_error_error( self, mocked_get): s3_client = S3Client(s3ol_access_point="Random_access_point") self.assertRaises(UnsupportedFileException, s3_client.download_file_from_presigned_url, PRESIGNED_URL_TEST, {}) mocked_get.assert_called_once()
def test_s3_client_download_file_from_presigned_url_access_denied( self, mocked_get): s3_client = S3Client(s3ol_access_point="Random_access_point") self.assertRaises(S3DownloadException, s3_client.download_file_from_presigned_url, PRESIGNED_URL_TEST, {}) mocked_get.assert_called_once()
def redact_pii_documents_handler(event, context): """Redaction Lambda function handler.""" LOG.info('Received event with requestId: %s', event[REQUEST_ID]) LOG.debug(f'Raw event {event}') InputEventValidator.validate(event) invoke_args = json.loads(event[S3OL_CONFIGURATION][PAYLOAD]) if event[S3OL_CONFIGURATION][PAYLOAD] else {} language_code = invoke_args.get(LANGUAGE_CODE, DEFAULT_LANGUAGE_CODE) redaction_config = RedactionConfig(**invoke_args) object_get_context = event[GET_OBJECT_CONTEXT] s3ol_access_point = event[S3OL_CONFIGURATION][S3OL_ACCESS_POINT_ARN] s3 = S3Client(s3ol_access_point) cloud_watch = CloudWatchClient() comprehend = ComprehendClient(s3ol_access_point=s3ol_access_point, session_id=event[REQUEST_ID], user_agent=DEFAULT_USER_AGENT, endpoint_url=COMPREHEND_ENDPOINT_URL) exception_handler = ExceptionHandler(s3) LOG.debug("Pii Entity Types to be redacted:" + str(redaction_config.pii_entity_types)) processed_document = False document = Document('') try: def time_bound_task(): nonlocal processed_document nonlocal document PartialObjectRequestValidator.validate(event) pii_classification_segmenter = Segmenter(DOCUMENT_MAX_SIZE_CONTAINS_PII_ENTITIES) pii_redaction_segmenter = Segmenter(DOCUMENT_MAX_SIZE_DETECT_PII_ENTITIES) redactor = Redactor(redaction_config) time1 = time.time() text, http_headers, status_code = s3.download_file_from_presigned_url(object_get_context[INPUT_S3_URL], event[USER_REQUEST][HEADERS]) time2 = time.time() LOG.info(f"Downloaded the file in : {(time2 - time1)} seconds") document = redact(text, pii_classification_segmenter, pii_redaction_segmenter, redactor, comprehend, redaction_config, language_code) processed_document = True time1 = time.time() LOG.info(f"Pii redaction completed within {(time1 - time2)} seconds. Returning back the response to S3") redacted_text_bytes = document.redacted_text.encode('utf-8') http_headers[CONTENT_LENGTH] = len(redacted_text_bytes) s3.respond_back_with_data(redacted_text_bytes, http_headers, object_get_context[REQUEST_ROUTE], object_get_context[REQUEST_TOKEN], status_code) execute_task_with_timeout(context.get_remaining_time_in_millis() - RESERVED_TIME_FOR_CLEANUP, time_bound_task) except Exception as generated_exception: exception_handler.handle_exception(generated_exception, object_get_context[REQUEST_ROUTE], object_get_context[REQUEST_TOKEN]) finally: if PUBLISH_CLOUD_WATCH_METRICS: pii_entities = get_interested_pii(document, redaction_config) publish_metrics(cloud_watch, s3, comprehend, processed_document, len(pii_entities) > 0, language_code, s3ol_access_point, pii_entities) LOG.info("Responded back to s3 successfully")
def test_s3_client_download_file_from_presigned_url_200_ok( self, mocked_get): s3_client = S3Client(s3ol_access_point="Random_access_point") http_header = {'some-header': 'header-value'} text, response_http_headers, status_code = s3_client.download_file_from_presigned_url( PRESIGNED_URL_TEST, http_header) assert text == 'Test' assert response_http_headers == {'Content-Length': '4'} assert status_code == S3_STATUS_CODES.OK_200 mocked_get.assert_called_with(PRESIGNED_URL_TEST, timeout=10, headers=http_header)
def test_s3_client_respond_back_with_error(self, mocked_boto3): mocked_client = MagicMock() mocked_boto3.client.return_value = mocked_client s3_client = S3Client(s3ol_access_point="Random_access_point") s3_client.respond_back_with_error( status_code=S3_STATUS_CODES.PRECONDITION_FAILED_412, error_code=S3_ERROR_CODES.PreconditionFailed, error_message="Some Error", request_route="Route", request_token="q2334") mocked_client.write_get_object_response.assert_called_once_with( StatusCode=412, ErrorCode='PreconditionFailed', ErrorMessage="Some Error", RequestRoute='Route', RequestToken="q2334")
def test_s3_client_respond_back_with_data_partial_data(self, mocked_boto3): mocked_client = MagicMock() mocked_boto3.client.return_value = mocked_client s3_client = S3Client(s3ol_access_point="Random_access_point") s3_client.respond_back_with_data( data='SomeData', headers={ "Content-Range": "0-1200", "SomeRandomHeader": '0123' }, request_route="Route", request_token="q2334", status_code=S3_STATUS_CODES.PARTIAL_CONTENT_206) mocked_client.write_get_object_response.assert_called_once_with( Body='SomeData', ContentRange="0-1200", RequestRoute='Route', RequestToken="q2334", StatusCode=206)
def test_s3_client_respond_back_with_data_default_status_code( self, mocked_boto3): mocked_client = MagicMock() mocked_boto3.client.return_value = mocked_client s3_client = S3Client(s3ol_access_point="Random_access_point") s3_client.respond_back_with_data(data='SomeData', headers={ "ContentRange": "0-100", "SomeRandomHeader": '0123', "Content-Length": "101" }, request_route="Route", request_token="q2334") mocked_client.write_get_object_response.assert_called_once_with( Body='SomeData', ContentLength=101, RequestRoute='Route', RequestToken="q2334", StatusCode=200)