def test_it_handles_old_version_delete_failures(
    mock_handle,
    mock_delete,
    mock_s3,
    mock_delete_versions,
    mock_save,
    message_stub,
):
    mock_s3.S3FileSystem.return_value = mock_s3
    mock_s3.open.return_value = mock_s3
    mock_s3.__enter__.return_value = MagicMock(version_id="abc123")
    mock_save.return_value = "new_version123"
    mock_delete.return_value = pa.BufferOutputStream(), {"DeletedRows": 1}
    mock_delete_versions.side_effect = DeleteOldVersionsError(
        errors=["access denied"])
    execute(
        "https://queue/url",
        message_stub(
            RoleArn="arn:aws:iam:account_id:role/rolename",
            DeleteOldVersions=True,
            Object="s3://bucket/path/basic.parquet",
        ),
        "receipt_handle",
    )
    mock_handle.assert_called_with(
        ANY, ANY, "Unable to delete previous versions: access denied")
def test_happy_path_when_queue_not_empty_for_compressed_json(
    mock_save,
    mock_emit,
    mock_delete,
    mock_s3,
    mock_session,
    mock_verify_integrity,
    message_stub,
):
    mock_s3.S3FileSystem.return_value = mock_s3
    column = {"Column": "customer_id", "MatchIds": ["12345", "23456"]}
    mock_file = MagicMock(version_id="abc123")
    mock_save.return_value = "new_version123"
    mock_s3.open.return_value = mock_s3
    mock_s3.__enter__.return_value = mock_file
    mock_delete.return_value = pa.BufferOutputStream(), {"DeletedRows": 1}
    execute(
        "https://queue/url",
        message_stub(Object="s3://bucket/path/basic.json.gz", Format="json"),
        "receipt_handle",
    )
    mock_s3.open.assert_called_with("s3://bucket/path/basic.json.gz", "rb")
    mock_delete.assert_called_with(mock_file, [column], "json", True)
    mock_save.assert_called_with(ANY, ANY, ANY, "bucket", "path/basic.json.gz",
                                 "abc123")
    mock_emit.assert_called()
    mock_session.assert_called_with(None)
    mock_verify_integrity.assert_called_with(ANY, "bucket",
                                             "path/basic.json.gz", "abc123",
                                             "new_version123")
    buf = mock_save.call_args[0][2]
    assert buf.read
    assert isinstance(buf,
                      pa.BufferReader)  # must be BufferReader for zero-copy
def test_it_provides_logs_for_failed_version_integrity_check_and_performs_rollback(
    mock_error_handler,
    mock_delete,
    mock_load,
    mock_verify_integrity,
    rollback_mock,
    message_stub,
):
    parquet_file = MagicMock()
    parquet_file.num_row_groups = 1
    mock_load.return_value = parquet_file
    mock_verify_integrity.side_effect = IntegrityCheckFailedError(
        "Some error", MagicMock(), "bucket", "path/basic.parquet",
        "new_version")

    mock_delete.return_value = pa.BufferOutputStream(), {"DeletedRows": 1}
    execute("https://queue/url", message_stub(), "receipt_handle")
    mock_verify_integrity.assert_called()
    mock_error_handler.assert_called_with(
        ANY, ANY, "Object version integrity check failed: Some error")
    rollback_mock.assert_called_with(ANY,
                                     "bucket",
                                     "path/basic.parquet",
                                     "new_version",
                                     on_error=ANY)
Exemple #4
0
def test_happy_path_when_queue_not_empty(mock_save, mock_emit, mock_delete,
                                         mock_s3, mock_load, mock_session,
                                         mock_verify_integrity, message_stub):
    mock_s3.S3FileSystem.return_value = mock_s3
    column = {"Column": "customer_id", "MatchIds": ["12345", "23456"]}
    parquet_file = MagicMock()
    parquet_file.num_row_groups = 1
    mock_save.return_value = "new_version123"
    mock_s3.open.return_value = mock_s3
    mock_s3.__enter__.return_value = MagicMock(version_id="abc123")
    mock_load.return_value = parquet_file
    mock_delete.return_value = pa.BufferOutputStream(), {"DeletedRows": 1}
    execute("https://queue/url",
            message_stub(Object="s3://bucket/path/basic.parquet"),
            "receipt_handle")
    mock_s3.open.assert_called_with("s3://bucket/path/basic.parquet", "rb")
    mock_delete.assert_called_with(parquet_file, [column])
    mock_save.assert_called_with(ANY, ANY, ANY, "bucket", "path/basic.parquet",
                                 "abc123")
    mock_emit.assert_called()
    mock_session.assert_called_with(None)
    mock_verify_integrity.assert_called_with(ANY, 'bucket',
                                             'path/basic.parquet', 'abc123',
                                             'new_version123')
    buf = mock_save.call_args[0][2]
    assert buf.read
    assert isinstance(buf,
                      pa.BufferReader)  # must be BufferReader for zero-copy
Exemple #5
0
def test_it_handles_s3_permission_issues(mock_error_handler, mock_s3, message_stub):
    mock_s3.S3FileSystem.return_value = mock_s3
    mock_s3.open.side_effect = ClientError({}, "GetObject")
    # Act
    execute("https://queue/url", message_stub(), "receipt_handle")
    # Assert
    msg = mock_error_handler.call_args[0][2]
    assert msg.startswith("ClientError:")
Exemple #6
0
def test_it_handles_io_errors(mock_error_handler, mock_s3, message_stub):
    # Arrange
    mock_s3.S3FileSystem.return_value = mock_s3
    mock_s3.open.side_effect = IOError("an error")
    # Act
    execute("https://queue/url", message_stub(), "receipt_handle")
    # Assert
    mock_error_handler.assert_called_with(
        ANY, ANY, "Unable to retrieve object: an error")
def test_it_handles_arrow_exceptions(mock_error_handler, mock_delete,
                                     message_stub):
    # Arrange
    mock_delete.side_effect = ArrowException("FAIL")
    # Act
    execute("https://queue/url", message_stub(), "receipt_handle")
    # Assert
    mock_error_handler.assert_called_with(
        ANY, ANY, "Apache Arrow processing error: FAIL")
Exemple #8
0
def test_it_handles_file_too_big(mock_error_handler, mock_s3, message_stub):
    # Arrange
    mock_s3.S3FileSystem.return_value = mock_s3
    mock_s3.open.side_effect = MemoryError("Too big")
    # Act
    execute("https://queue/url", message_stub(), "receipt_handle")
    # Assert
    mock_error_handler.assert_called_with(
        ANY, ANY, "Insufficient memory to work on object: Too big")
Exemple #9
0
def test_it_handles_generic_error(mock_error_handler, mock_s3, message_stub):
    # Arrange
    mock_s3.S3FileSystem.return_value = mock_s3
    mock_s3.open.side_effect = RuntimeError("Some Error")
    # Act
    execute("https://queue/url", message_stub(), "receipt_handle")
    # Assert
    mock_error_handler.assert_called_with(
        ANY, ANY, "Unknown error during message processing: Some Error")
Exemple #10
0
def test_it_handles_unversioned_buckets(mock_error_handler, mock_s3,
                                        mock_versioning, message_stub):
    # Arrange
    mock_s3.S3FileSystem.return_value = mock_s3
    mock_versioning.side_effect = ValueError("Versioning validation Error")
    # Act
    execute("https://queue/url", message_stub(), "receipt_handle")
    # Assert
    mock_error_handler.assert_called_with(
        ANY, ANY, "Unprocessable message: Versioning validation Error")
    mock_versioning.assert_called_with(ANY, 'bucket')
Exemple #11
0
def test_it_handles_missing_col_exceptions(
    mock_build_matches, mock_error_handler, mock_delete, message_stub
):
    # Arrange
    mock_delete.side_effect = KeyError("FAIL")
    # Act
    execute("https://queue/url", message_stub(), "receipt_handle")
    # Assert
    mock_error_handler.assert_called_with(
        ANY, ANY, "Apache Arrow processing error: 'FAIL'"
    )
Exemple #12
0
def test_it_handles_arrow_exceptions(mock_error_handler, mock_delete,
                                     mock_load, message_stub):
    # Arrange
    parquet_file = MagicMock()
    parquet_file.num_row_groups = 1
    mock_load.return_value = parquet_file
    mock_delete.side_effect = ArrowException("FAIL")
    # Act
    execute("https://queue/url", message_stub(), "receipt_handle")
    # Assert
    mock_error_handler.assert_called_with(ANY, ANY,
                                          "Parquet processing error: FAIL")
def test_it_provides_logs_for_acl_fail(mock_save, mock_error_handler,
                                       mock_delete, message_stub):
    mock_save.side_effect = ClientError({}, "PutObjectAcl")
    mock_delete.return_value = pa.BufferOutputStream(), {"DeletedRows": 1}
    execute("https://queue/url", message_stub(), "receipt_handle")
    mock_save.assert_called()
    mock_error_handler.assert_called_with(
        ANY,
        ANY,
        "ClientError: An error occurred (Unknown) when calling the PutObjectAcl "
        "operation: Unknown. Redacted object uploaded successfully but unable to "
        "restore WRITE ACL",
    )
def test_it_assumes_role(mock_delete, mock_s3, mock_session, message_stub):
    mock_s3.S3FileSystem.return_value = mock_s3
    mock_s3.open.return_value = mock_s3
    mock_s3.__enter__.return_value = MagicMock(version_id="abc123")
    mock_delete.return_value = pa.BufferOutputStream(), {"DeletedRows": 1}
    execute(
        "https://queue/url",
        message_stub(
            RoleArn="arn:aws:iam:account_id:role/rolename",
            Object="s3://bucket/path/basic.parquet",
        ),
        "receipt_handle",
    )
    mock_session.assert_called_with("arn:aws:iam:account_id:role/rolename")
Exemple #15
0
def test_it_provides_logs_for_get_latest_version_fail(
    mock_error_handler, mock_delete, mock_verify_integrity, message_stub
):
    mock_verify_integrity.side_effect = get_list_object_versions_error()
    mock_delete.return_value = pa.BufferOutputStream(), {"DeletedRows": 1}
    execute("https://queue/url", message_stub(), "receipt_handle")
    mock_verify_integrity.assert_called()
    mock_error_handler.assert_called_with(
        ANY,
        ANY,
        "ClientError: An error occurred (InvalidArgument) when calling the "
        "ListObjectVersions operation: Invalid version id specified. Could "
        "not verify redacted object version integrity",
    )
def test_it_removes_old_versions(mock_delete, mock_s3, mock_delete_versions,
                                 mock_save, message_stub):
    mock_s3.S3FileSystem.return_value = mock_s3
    mock_s3.open.return_value = mock_s3
    mock_s3.__enter__.return_value = MagicMock(version_id="abc123")
    mock_save.return_value = "new_version123"
    mock_delete.return_value = pa.BufferOutputStream(), {"DeletedRows": 1}
    execute(
        "https://queue/url",
        message_stub(
            RoleArn="arn:aws:iam:account_id:role/rolename",
            DeleteOldVersions=True,
            Object="s3://bucket/path/basic.parquet",
        ),
        "receipt_handle",
    )
    mock_delete_versions.assert_called_with(ANY, ANY, ANY, "new_version123")
def test_it_handles_no_deletions(mock_handle, mock_save, mock_emit,
                                 mock_delete, mock_s3, message_stub):
    mock_s3.S3FileSystem.return_value = mock_s3
    column = {"Column": "customer_id", "MatchIds": ["12345", "23456"]}
    mock_delete.return_value = pa.BufferOutputStream(), {"DeletedRows": 0}
    execute(
        "https://queue/url",
        message_stub(Object="s3://bucket/path/basic.parquet"),
        "receipt_handle",
    )
    mock_s3.open.assert_called_with("s3://bucket/path/basic.parquet", "rb")
    mock_save.assert_not_called()
    mock_emit.assert_not_called()
    mock_handle.assert_called_with(
        ANY,
        ANY,
        "Unprocessable message: The object s3://bucket/path/basic.parquet "
        "was processed successfully but no rows required deletion",
    )
def test_it_provides_logs_for_failed_rollback_generic_error(
        mock_error_handler, mock_delete, mock_verify_integrity, message_stub):
    mock_s3 = MagicMock()
    mock_s3.delete_object.side_effect = Exception("error!!")
    mock_verify_integrity.side_effect = IntegrityCheckFailedError(
        "Some error", mock_s3, "bucket", "test/basic.parquet", "new_version")
    mock_delete.return_value = pa.BufferOutputStream(), {"DeletedRows": 1}
    execute("https://queue/url", message_stub(), "receipt_handle")
    mock_verify_integrity.assert_called()
    assert mock_error_handler.call_args_list == [
        call(ANY, ANY, "Object version integrity check failed: Some error"),
        call(
            ANY,
            ANY,
            "Unknown error: error!!. Version rollback caused by version integrity conflict failed",
            "ObjectRollbackFailed",
            False,
        ),
    ]
Exemple #19
0
def test_it_provides_logs_for_failed_rollback_client_error(
        mock_error_handler, mock_delete, mock_load, mock_verify_integrity,
        message_stub):
    parquet_file = MagicMock()
    parquet_file.num_row_groups = 1
    mock_load.return_value = parquet_file
    mock_s3 = MagicMock()
    mock_s3.delete_object.side_effect = ClientError({}, "DeleteObject")
    mock_verify_integrity.side_effect = IntegrityCheckFailedError(
        "Some error", mock_s3, 'bucket', 'test/basic.parquet', 'new_version')
    mock_delete.return_value = pa.BufferOutputStream(), {"DeletedRows": 1}
    execute("https://queue/url", message_stub(), "receipt_handle")
    mock_verify_integrity.assert_called()
    assert mock_error_handler.call_args_list == [
        call(ANY, ANY, "Object version integrity check failed: Some error"),
        call(
            ANY, ANY,
            "ClientError: An error occurred (Unknown) when calling the DeleteObject operation: Unknown. "
            "Version rollback caused by version integrity conflict failed",
            "ObjectRollbackFailed", False)
    ]
Exemple #20
0
def test_it_validates_messages_with_invalid_body(mock_error_handler):
    # Act
    execute("https://queue/url", "NOT JSON", "receipt_handle")
    mock_error_handler.assert_called()
Exemple #21
0
def test_it_validates_messages_with_missing_keys(mock_error_handler):
    # Act
    execute("https://queue/url", "{}", "receipt_handle")
    # Assert
    mock_error_handler.assert_called()