def test_delete_remove_leaked_stored_objects_and_uploaded_files(self): workflow = Workflow.create_and_init() # If the user deletes a workflow, all data associated with that # workflow should disappear. Postgres handles DB objects; but Django's # ORM doesn't do a great job with StoredObjects and UploadedFiles. # # This test isn't about minutae. It's just: if the user deletes a # Workflow, make sure all data gets deleted. # # TODO fix all other bugs that leak data. wf_module = workflow.tabs.first().wf_modules.create( order=0, slug="step-1", module_id_name="x" ) # "Leak" a StoredObject by writing its file to S3 but neglecting to # write an accompanying StoredObject record. stored_object_key = f"{workflow.id}/{wf_module.id}/1234.dat" minio.put_bytes(minio.StoredObjectsBucket, stored_object_key, b"1234") # Add UploadedFile, missing a DB entry. (Even if we fix all bugs that # leak an S3 object after deleting a DB entry [and 2019-06-03 there are # still more] we'll still need to handle missing DB entries from legacy # code.) uploaded_file_key = f"{wf_module.uploaded_file_prefix}{uuid.uuid4()}.csv" minio.put_bytes(minio.UserFilesBucket, uploaded_file_key, b"A\nb") workflow.delete() self.assertFalse(minio.exists(minio.StoredObjectsBucket, stored_object_key)) self.assertFalse(minio.exists(minio.UserFilesBucket, uploaded_file_key))
def healthz(request): """ Return 200 OK if database and minio connections are ok. """ minio.exists(minio.UserFilesBucket, "healthz") # do not crash with connection.cursor() as cursor: cursor.execute("SELECT 1") return HttpResponse(b"OK", content_type="text/plain; charset=utf-8")
def test_convert_to_uploaded_file_happy_path(self): workflow = Workflow.create_and_init() wf_module = workflow.tabs.first().wf_modules.create(order=0, slug="step-1", module_id_name="x") ipu = wf_module.in_progress_uploads.create() minio.put_bytes(ipu.Bucket, ipu.get_upload_key(), b"1234567") uploaded_file = ipu.convert_to_uploaded_file("test sheet.xlsx") self.assertEqual(uploaded_file.uuid, str(ipu.id)) final_key = wf_module.uploaded_file_prefix + str(ipu.id) + ".xlsx" # New file on S3 has the right bytes and metadata self.assertEqual( minio.get_object_with_data(minio.UserFilesBucket, final_key)["Body"], b"1234567", ) self.assertEqual( minio.client.head_object(Bucket=minio.UserFilesBucket, Key=final_key)["ContentDisposition"], "attachment; filename*=UTF-8''test%20sheet.xlsx", ) # InProgressUpload is completed self.assertEqual(ipu.is_completed, True) ipu.refresh_from_db() self.assertEqual(ipu.is_completed, True) # also on DB # Uploaded file is deleted self.assertFalse( minio.exists(minio.UserFilesBucket, ipu.get_upload_key()))
def test_delete_deletes_from_s3(self): minio.put_bytes(minio.StoredObjectsBucket, "test.dat", b"abcd") workflow = Workflow.create_and_init() wf_module = workflow.tabs.first().wf_modules.create(order=0, slug="step-1") so = wf_module.stored_objects.create(size=4, key="test.dat") so.delete() self.assertFalse(minio.exists(minio.StoredObjectsBucket, "test.dat"))
def test_delete_wfmodule(self): result = RenderResult( arrow_table({"A": [1]}), [RenderError(I18nMessage("X", []), [])], {} ) cache_render_result(self.workflow, self.wf_module, self.delta.id, result) parquet_key = crr_parquet_key(self.wf_module.cached_render_result) self.wf_module.delete() self.assertFalse(minio.exists(BUCKET, parquet_key))
def test_delete_tab_deletes_from_s3(self): minio.put_bytes(minio.StoredObjectsBucket, "test.dat", b"abcd") workflow = Workflow.create_and_init() tab = workflow.tabs.create(position=1) wf_module = tab.wf_modules.create(order=0, slug="step-1") wf_module.stored_objects.create(size=4, bucket=minio.StoredObjectsBucket, key="test.dat") tab.delete() self.assertFalse(minio.exists(minio.StoredObjectsBucket, "test.dat"))
def test_clear(self): result = RenderResult(arrow_table({"A": [1]})) cache_render_result(self.workflow, self.wf_module, self.delta.id, result) parquet_key = crr_parquet_key(self.wf_module.cached_render_result) clear_cached_render_result_for_wf_module(self.wf_module) db_wf_module = WfModule.objects.get(id=self.wf_module.id) self.assertIsNone(db_wf_module.cached_render_result) self.assertFalse(minio.exists(BUCKET, parquet_key))
def test_delete_s3_data_leaked_file(self): # Delete a file with our UUID but without an UploadedFile. workflow = Workflow.create_and_init() wf_module = workflow.tabs.first().wf_modules.create(order=0, slug="step-1", module_id_name="x") ipu = wf_module.in_progress_uploads.create() key = wf_module.uploaded_file_prefix + str(ipu.id) + ".xlsx" minio.put_bytes(minio.UserFilesBucket, key, b"1234567") ipu.delete_s3_data() self.assertFalse(minio.exists(minio.UserFilesBucket, key))
def test_delete_remove_uploaded_data_by_prefix_in_case_model_missing(self): workflow = Workflow.create_and_init() wf_module = workflow.tabs.first().wf_modules.create(order=0, slug="step-1") uuid = str(uuidgen.uuid4()) key = wf_module.uploaded_file_prefix + uuid minio.put_bytes(minio.UserFilesBucket, key, b"A\n1") # Don't create the UploadedFile. Simulates races during upload/delete # that could write a file on S3 but not in our database. # wf_module.uploaded_files.create(name='t.csv', size=3, uuid=uuid, key=key) wf_module.delete() # do not crash self.assertFalse(minio.exists(minio.UserFilesBucket, key))
def test_complete_happy_path(self, queue_render, send_update): send_update.side_effect = async_noop queue_render.side_effect = async_noop _init_module("x") self.kernel.migrate_params.side_effect = lambda m, p: p workflow = Workflow.create_and_init() wf_module = workflow.tabs.first().wf_modules.create( order=0, slug="step-123", module_id_name="x", file_upload_api_token="abc123", params={"file": None}, ) upload = wf_module.in_progress_uploads.create() uuid = str(upload.id) key = upload.get_upload_key() minio.put_bytes(upload.Bucket, key, b"1234567") with self.assertLogs(level=logging.INFO): # Logs ChangeParametersCommand's migrate_params() response = self.client.post( f"/api/v1/workflows/{workflow.id}/steps/step-123/uploads/{upload.id}", {"filename": "test.csv"}, content_type="application/json", HTTP_AUTHORIZATION="Bearer abc123", ) self.assertEqual(response.status_code, 200) # Upload and its S3 data were deleted self.assertFalse(minio.exists(upload.Bucket, key)) upload.refresh_from_db() self.assertTrue(upload.is_completed) # Final upload was created uploaded_file = wf_module.uploaded_files.first() self.assertEqual(uploaded_file.key, f"wf-{workflow.id}/wfm-{wf_module.id}/{uuid}.csv") self.assertEqual( minio.get_object_with_data(minio.UserFilesBucket, uploaded_file.key)["Body"], b"1234567", ) self.assertEqual(uploaded_file.name, "test.csv") # Return value includes uuid data = json.loads(response.content) self.assertEqual(data["uuid"], uuid) self.assertEqual(data["name"], "test.csv") self.assertEqual(data["size"], 7) # ChangeParametersCommand ran wf_module.refresh_from_db() self.assertEqual(wf_module.params, {"file": uuid}) # Send deltas send_update.assert_called() queue_render.assert_called()
def test_delete_s3_data_ignore_non_leaked_file(self): workflow = Workflow.create_and_init() wf_module = workflow.tabs.first().wf_modules.create(order=0, slug="step-1", module_id_name="x") ipu = wf_module.in_progress_uploads.create() key = wf_module.uploaded_file_prefix + str(ipu.id) + ".xlsx" minio.put_bytes(minio.UserFilesBucket, key, b"1234567") wf_module.uploaded_files.create(name="text.xlsx", size=7, uuid=str(self.id), key=key) ipu.delete_s3_data() self.assertFalse(minio.exists(minio.UserFilesBucket, key))
def test_finish_upload_happy_path(self, send_update): user = User.objects.create(username="******", email="*****@*****.**") workflow = Workflow.create_and_init(owner=user) wf_module = workflow.tabs.first().wf_modules.create(order=0, slug="step-1", module_id_name="x") in_progress_upload = wf_module.in_progress_uploads.create( id="147a9f5d-5b3e-41c3-a968-a84a5a9d587f") key = in_progress_upload.get_upload_key() minio.put_bytes(in_progress_upload.Bucket, key, b"1234567") send_update.side_effect = async_noop response = self.run_handler( finish_upload, user=user, workflow=workflow, wfModuleId=wf_module.id, key=key, filename="test sheet.csv", ) self.assertResponse( response, data={"uuid": "147a9f5d-5b3e-41c3-a968-a84a5a9d587f"}) # The uploaded file is deleted self.assertFalse(minio.exists(in_progress_upload.Bucket, key)) # A new upload is created uploaded_file = wf_module.uploaded_files.first() self.assertEqual(uploaded_file.name, "test sheet.csv") self.assertEqual(uploaded_file.size, 7) self.assertEqual(uploaded_file.uuid, "147a9f5d-5b3e-41c3-a968-a84a5a9d587f") self.assertEqual(uploaded_file.bucket, in_progress_upload.Bucket) final_key = f"wf-{workflow.id}/wfm-{wf_module.id}/147a9f5d-5b3e-41c3-a968-a84a5a9d587f.csv" self.assertEqual(uploaded_file.key, final_key) # The file has the right bytes and metadata self.assertEqual( minio.get_object_with_data(minio.UserFilesBucket, final_key)["Body"], b"1234567", ) self.assertEqual( minio.client.head_object(Bucket=minio.UserFilesBucket, Key=final_key)["ContentDisposition"], "attachment; filename*=UTF-8''test%20sheet.csv", ) # wf_module is updated send_update.assert_called()
def test_abort_upload_happy_path_after_complete(self): user = User.objects.create(username="******", email="*****@*****.**") workflow = Workflow.create_and_init(owner=user) wf_module = workflow.tabs.first().wf_modules.create(order=0, slug="step-1", module_id_name="x") in_progress_upload = wf_module.in_progress_uploads.create( id="147a9f5d-5b3e-41c3-a968-a84a5a9d587f") key = in_progress_upload.get_upload_key() minio.put_bytes(in_progress_upload.Bucket, key, b"1234567") response = self.run_handler(abort_upload, user=user, workflow=workflow, wfModuleId=wf_module.id, key=key) self.assertResponse(response, data=None) wf_module.refresh_from_db() self.assertFalse(minio.exists(in_progress_upload.Bucket, key))
def test_abort(self): _init_module("x") workflow = Workflow.create_and_init() wf_module = workflow.tabs.first().wf_modules.create( order=0, slug="step-123", module_id_name="x", file_upload_api_token="abc123" ) upload = wf_module.in_progress_uploads.create() key = upload.get_upload_key() minio.put_bytes(upload.Bucket, key, b"1234567") response = self.client.delete( f"/api/v1/workflows/{workflow.id}/steps/step-123/uploads/{upload.id}", HTTP_AUTHORIZATION="Bearer abc123", ) self.assertEqual(response.status_code, 200) self.assertEqual(json.loads(response.content), {}) self.assertFalse(minio.exists(upload.Bucket, key)) # file was deleted upload.refresh_from_db() self.assertTrue(upload.is_completed)
def move_uploaded_file(workflow, wf_module, uploaded_file): """ Move files from /uuid.ext to /wf-1/wfm-2/uuid.ext. This helps delete leaked files and find problem files. """ from cjwstate import minio bucket = uploaded_file.bucket old_key = uploaded_file.key if "/" in old_key: return new_key = f"wf-{workflow.id}/wfm-{wf_module.id}/{old_key}" logger.info(f"Move %s/%s to %s/%s", bucket, old_key, bucket, new_key) try: minio.copy(bucket, new_key, f"{bucket}/{old_key}") minio.remove(bucket, old_key) except minio.error.NoSuchKey: # old_key is missing. Two possibilities: # # 1. We're re-running this script after it failed once with # atomic=True (which used to be set, by accident); the move already # succeeded but the DB doesn't know it. In that case, continue # because this error actually means, "all is well." # 2. The file didn't exist to begin with. In that case, write a blank # file in its stead. That way the user will remark, "hey, Workbench # ate my file!" instead of undefined behavior (which is worse). # https://www.pivotaltracker.com/story/show/163336822 if minio.exists(bucket, new_key): pass # "all is well" else: # write an empty file minio.put_bytes(bucket, new_key, b"") uploaded_file.size = 0 uploaded_file.save(update_fields=["size"]) uploaded_file.key = new_key uploaded_file.save(update_fields=["key"])