Exemple #1
0
    def test_delete_remove_leaked_stored_objects_and_uploaded_files(self):
        workflow = Workflow.create_and_init()
        # If the user deletes a workflow, all data associated with that
        # workflow should disappear. Postgres handles DB objects; but Django's
        # ORM doesn't do a great job with StoredObjects and UploadedFiles.
        #
        # This test isn't about minutae. It's just: if the user deletes a
        # Workflow, make sure all data gets deleted.
        #
        # TODO fix all other bugs that leak data.
        wf_module = workflow.tabs.first().wf_modules.create(
            order=0, slug="step-1", module_id_name="x"
        )

        # "Leak" a StoredObject by writing its file to S3 but neglecting to
        # write an accompanying StoredObject record.
        stored_object_key = f"{workflow.id}/{wf_module.id}/1234.dat"
        minio.put_bytes(minio.StoredObjectsBucket, stored_object_key, b"1234")

        # Add UploadedFile, missing a DB entry. (Even if we fix all bugs that
        # leak an S3 object after deleting a DB entry [and 2019-06-03 there are
        # still more] we'll still need to handle missing DB entries from legacy
        # code.)
        uploaded_file_key = f"{wf_module.uploaded_file_prefix}{uuid.uuid4()}.csv"
        minio.put_bytes(minio.UserFilesBucket, uploaded_file_key, b"A\nb")
        workflow.delete()
        self.assertFalse(minio.exists(minio.StoredObjectsBucket, stored_object_key))
        self.assertFalse(minio.exists(minio.UserFilesBucket, uploaded_file_key))
Exemple #2
0
def healthz(request):
    """
    Return 200 OK if database and minio connections are ok.
    """
    minio.exists(minio.UserFilesBucket, "healthz")  # do not crash
    with connection.cursor() as cursor:
        cursor.execute("SELECT 1")
    return HttpResponse(b"OK", content_type="text/plain; charset=utf-8")
Exemple #3
0
 def test_convert_to_uploaded_file_happy_path(self):
     workflow = Workflow.create_and_init()
     wf_module = workflow.tabs.first().wf_modules.create(order=0,
                                                         slug="step-1",
                                                         module_id_name="x")
     ipu = wf_module.in_progress_uploads.create()
     minio.put_bytes(ipu.Bucket, ipu.get_upload_key(), b"1234567")
     uploaded_file = ipu.convert_to_uploaded_file("test sheet.xlsx")
     self.assertEqual(uploaded_file.uuid, str(ipu.id))
     final_key = wf_module.uploaded_file_prefix + str(ipu.id) + ".xlsx"
     # New file on S3 has the right bytes and metadata
     self.assertEqual(
         minio.get_object_with_data(minio.UserFilesBucket,
                                    final_key)["Body"],
         b"1234567",
     )
     self.assertEqual(
         minio.client.head_object(Bucket=minio.UserFilesBucket,
                                  Key=final_key)["ContentDisposition"],
         "attachment; filename*=UTF-8''test%20sheet.xlsx",
     )
     # InProgressUpload is completed
     self.assertEqual(ipu.is_completed, True)
     ipu.refresh_from_db()
     self.assertEqual(ipu.is_completed, True)  # also on DB
     # Uploaded file is deleted
     self.assertFalse(
         minio.exists(minio.UserFilesBucket, ipu.get_upload_key()))
Exemple #4
0
 def test_delete_deletes_from_s3(self):
     minio.put_bytes(minio.StoredObjectsBucket, "test.dat", b"abcd")
     workflow = Workflow.create_and_init()
     wf_module = workflow.tabs.first().wf_modules.create(order=0,
                                                         slug="step-1")
     so = wf_module.stored_objects.create(size=4, key="test.dat")
     so.delete()
     self.assertFalse(minio.exists(minio.StoredObjectsBucket, "test.dat"))
Exemple #5
0
    def test_delete_wfmodule(self):
        result = RenderResult(
            arrow_table({"A": [1]}), [RenderError(I18nMessage("X", []), [])], {}
        )
        cache_render_result(self.workflow, self.wf_module, self.delta.id, result)

        parquet_key = crr_parquet_key(self.wf_module.cached_render_result)
        self.wf_module.delete()
        self.assertFalse(minio.exists(BUCKET, parquet_key))
 def test_delete_tab_deletes_from_s3(self):
     minio.put_bytes(minio.StoredObjectsBucket, "test.dat", b"abcd")
     workflow = Workflow.create_and_init()
     tab = workflow.tabs.create(position=1)
     wf_module = tab.wf_modules.create(order=0, slug="step-1")
     wf_module.stored_objects.create(size=4,
                                     bucket=minio.StoredObjectsBucket,
                                     key="test.dat")
     tab.delete()
     self.assertFalse(minio.exists(minio.StoredObjectsBucket, "test.dat"))
Exemple #7
0
    def test_clear(self):
        result = RenderResult(arrow_table({"A": [1]}))
        cache_render_result(self.workflow, self.wf_module, self.delta.id,
                            result)
        parquet_key = crr_parquet_key(self.wf_module.cached_render_result)
        clear_cached_render_result_for_wf_module(self.wf_module)

        db_wf_module = WfModule.objects.get(id=self.wf_module.id)
        self.assertIsNone(db_wf_module.cached_render_result)

        self.assertFalse(minio.exists(BUCKET, parquet_key))
Exemple #8
0
 def test_delete_s3_data_leaked_file(self):
     # Delete a file with our UUID but without an UploadedFile.
     workflow = Workflow.create_and_init()
     wf_module = workflow.tabs.first().wf_modules.create(order=0,
                                                         slug="step-1",
                                                         module_id_name="x")
     ipu = wf_module.in_progress_uploads.create()
     key = wf_module.uploaded_file_prefix + str(ipu.id) + ".xlsx"
     minio.put_bytes(minio.UserFilesBucket, key, b"1234567")
     ipu.delete_s3_data()
     self.assertFalse(minio.exists(minio.UserFilesBucket, key))
Exemple #9
0
 def test_delete_remove_uploaded_data_by_prefix_in_case_model_missing(self):
     workflow = Workflow.create_and_init()
     wf_module = workflow.tabs.first().wf_modules.create(order=0,
                                                         slug="step-1")
     uuid = str(uuidgen.uuid4())
     key = wf_module.uploaded_file_prefix + uuid
     minio.put_bytes(minio.UserFilesBucket, key, b"A\n1")
     # Don't create the UploadedFile. Simulates races during upload/delete
     # that could write a file on S3 but not in our database.
     # wf_module.uploaded_files.create(name='t.csv', size=3, uuid=uuid, key=key)
     wf_module.delete()  # do not crash
     self.assertFalse(minio.exists(minio.UserFilesBucket, key))
Exemple #10
0
 def test_complete_happy_path(self, queue_render, send_update):
     send_update.side_effect = async_noop
     queue_render.side_effect = async_noop
     _init_module("x")
     self.kernel.migrate_params.side_effect = lambda m, p: p
     workflow = Workflow.create_and_init()
     wf_module = workflow.tabs.first().wf_modules.create(
         order=0,
         slug="step-123",
         module_id_name="x",
         file_upload_api_token="abc123",
         params={"file": None},
     )
     upload = wf_module.in_progress_uploads.create()
     uuid = str(upload.id)
     key = upload.get_upload_key()
     minio.put_bytes(upload.Bucket, key, b"1234567")
     with self.assertLogs(level=logging.INFO):
         # Logs ChangeParametersCommand's migrate_params()
         response = self.client.post(
             f"/api/v1/workflows/{workflow.id}/steps/step-123/uploads/{upload.id}",
             {"filename": "test.csv"},
             content_type="application/json",
             HTTP_AUTHORIZATION="Bearer abc123",
         )
     self.assertEqual(response.status_code, 200)
     # Upload and its S3 data were deleted
     self.assertFalse(minio.exists(upload.Bucket, key))
     upload.refresh_from_db()
     self.assertTrue(upload.is_completed)
     # Final upload was created
     uploaded_file = wf_module.uploaded_files.first()
     self.assertEqual(uploaded_file.key,
                      f"wf-{workflow.id}/wfm-{wf_module.id}/{uuid}.csv")
     self.assertEqual(
         minio.get_object_with_data(minio.UserFilesBucket,
                                    uploaded_file.key)["Body"],
         b"1234567",
     )
     self.assertEqual(uploaded_file.name, "test.csv")
     # Return value includes uuid
     data = json.loads(response.content)
     self.assertEqual(data["uuid"], uuid)
     self.assertEqual(data["name"], "test.csv")
     self.assertEqual(data["size"], 7)
     # ChangeParametersCommand ran
     wf_module.refresh_from_db()
     self.assertEqual(wf_module.params, {"file": uuid})
     # Send deltas
     send_update.assert_called()
     queue_render.assert_called()
Exemple #11
0
 def test_delete_s3_data_ignore_non_leaked_file(self):
     workflow = Workflow.create_and_init()
     wf_module = workflow.tabs.first().wf_modules.create(order=0,
                                                         slug="step-1",
                                                         module_id_name="x")
     ipu = wf_module.in_progress_uploads.create()
     key = wf_module.uploaded_file_prefix + str(ipu.id) + ".xlsx"
     minio.put_bytes(minio.UserFilesBucket, key, b"1234567")
     wf_module.uploaded_files.create(name="text.xlsx",
                                     size=7,
                                     uuid=str(self.id),
                                     key=key)
     ipu.delete_s3_data()
     self.assertFalse(minio.exists(minio.UserFilesBucket, key))
 def test_finish_upload_happy_path(self, send_update):
     user = User.objects.create(username="******", email="*****@*****.**")
     workflow = Workflow.create_and_init(owner=user)
     wf_module = workflow.tabs.first().wf_modules.create(order=0,
                                                         slug="step-1",
                                                         module_id_name="x")
     in_progress_upload = wf_module.in_progress_uploads.create(
         id="147a9f5d-5b3e-41c3-a968-a84a5a9d587f")
     key = in_progress_upload.get_upload_key()
     minio.put_bytes(in_progress_upload.Bucket, key, b"1234567")
     send_update.side_effect = async_noop
     response = self.run_handler(
         finish_upload,
         user=user,
         workflow=workflow,
         wfModuleId=wf_module.id,
         key=key,
         filename="test sheet.csv",
     )
     self.assertResponse(
         response, data={"uuid": "147a9f5d-5b3e-41c3-a968-a84a5a9d587f"})
     # The uploaded file is deleted
     self.assertFalse(minio.exists(in_progress_upload.Bucket, key))
     # A new upload is created
     uploaded_file = wf_module.uploaded_files.first()
     self.assertEqual(uploaded_file.name, "test sheet.csv")
     self.assertEqual(uploaded_file.size, 7)
     self.assertEqual(uploaded_file.uuid,
                      "147a9f5d-5b3e-41c3-a968-a84a5a9d587f")
     self.assertEqual(uploaded_file.bucket, in_progress_upload.Bucket)
     final_key = f"wf-{workflow.id}/wfm-{wf_module.id}/147a9f5d-5b3e-41c3-a968-a84a5a9d587f.csv"
     self.assertEqual(uploaded_file.key, final_key)
     # The file has the right bytes and metadata
     self.assertEqual(
         minio.get_object_with_data(minio.UserFilesBucket,
                                    final_key)["Body"],
         b"1234567",
     )
     self.assertEqual(
         minio.client.head_object(Bucket=minio.UserFilesBucket,
                                  Key=final_key)["ContentDisposition"],
         "attachment; filename*=UTF-8''test%20sheet.csv",
     )
     # wf_module is updated
     send_update.assert_called()
 def test_abort_upload_happy_path_after_complete(self):
     user = User.objects.create(username="******", email="*****@*****.**")
     workflow = Workflow.create_and_init(owner=user)
     wf_module = workflow.tabs.first().wf_modules.create(order=0,
                                                         slug="step-1",
                                                         module_id_name="x")
     in_progress_upload = wf_module.in_progress_uploads.create(
         id="147a9f5d-5b3e-41c3-a968-a84a5a9d587f")
     key = in_progress_upload.get_upload_key()
     minio.put_bytes(in_progress_upload.Bucket, key, b"1234567")
     response = self.run_handler(abort_upload,
                                 user=user,
                                 workflow=workflow,
                                 wfModuleId=wf_module.id,
                                 key=key)
     self.assertResponse(response, data=None)
     wf_module.refresh_from_db()
     self.assertFalse(minio.exists(in_progress_upload.Bucket, key))
Exemple #14
0
 def test_abort(self):
     _init_module("x")
     workflow = Workflow.create_and_init()
     wf_module = workflow.tabs.first().wf_modules.create(
         order=0, slug="step-123", module_id_name="x", file_upload_api_token="abc123"
     )
     upload = wf_module.in_progress_uploads.create()
     key = upload.get_upload_key()
     minio.put_bytes(upload.Bucket, key, b"1234567")
     response = self.client.delete(
         f"/api/v1/workflows/{workflow.id}/steps/step-123/uploads/{upload.id}",
         HTTP_AUTHORIZATION="Bearer abc123",
     )
     self.assertEqual(response.status_code, 200)
     self.assertEqual(json.loads(response.content), {})
     self.assertFalse(minio.exists(upload.Bucket, key))  # file was deleted
     upload.refresh_from_db()
     self.assertTrue(upload.is_completed)
def move_uploaded_file(workflow, wf_module, uploaded_file):
    """
    Move files from /uuid.ext to /wf-1/wfm-2/uuid.ext.

    This helps delete leaked files and find problem files.
    """
    from cjwstate import minio

    bucket = uploaded_file.bucket
    old_key = uploaded_file.key
    if "/" in old_key:
        return

    new_key = f"wf-{workflow.id}/wfm-{wf_module.id}/{old_key}"

    logger.info(f"Move %s/%s to %s/%s", bucket, old_key, bucket, new_key)
    try:
        minio.copy(bucket, new_key, f"{bucket}/{old_key}")
        minio.remove(bucket, old_key)
    except minio.error.NoSuchKey:
        # old_key is missing. Two possibilities:
        #
        # 1. We're re-running this script after it failed once with
        #    atomic=True (which used to be set, by accident); the move already
        #    succeeded but the DB doesn't know it. In that case, continue
        #    because this error actually means, "all is well."
        # 2. The file didn't exist to begin with. In that case, write a blank
        #    file in its stead. That way the user will remark, "hey, Workbench
        #    ate my file!" instead of undefined behavior (which is worse).
        #    https://www.pivotaltracker.com/story/show/163336822
        if minio.exists(bucket, new_key):
            pass  # "all is well"
        else:
            # write an empty file
            minio.put_bytes(bucket, new_key, b"")
            uploaded_file.size = 0
            uploaded_file.save(update_fields=["size"])
    uploaded_file.key = new_key
    uploaded_file.save(update_fields=["key"])