def test_convert_to_uploaded_file_happy_path(self): workflow = Workflow.create_and_init() wf_module = workflow.tabs.first().wf_modules.create(order=0, slug="step-1", module_id_name="x") ipu = wf_module.in_progress_uploads.create() minio.put_bytes(ipu.Bucket, ipu.get_upload_key(), b"1234567") uploaded_file = ipu.convert_to_uploaded_file("test sheet.xlsx") self.assertEqual(uploaded_file.uuid, str(ipu.id)) final_key = wf_module.uploaded_file_prefix + str(ipu.id) + ".xlsx" # New file on S3 has the right bytes and metadata self.assertEqual( minio.get_object_with_data(minio.UserFilesBucket, final_key)["Body"], b"1234567", ) self.assertEqual( minio.client.head_object(Bucket=minio.UserFilesBucket, Key=final_key)["ContentDisposition"], "attachment; filename*=UTF-8''test%20sheet.xlsx", ) # InProgressUpload is completed self.assertEqual(ipu.is_completed, True) ipu.refresh_from_db() self.assertEqual(ipu.is_completed, True) # also on DB # Uploaded file is deleted self.assertFalse( minio.exists(minio.UserFilesBucket, ipu.get_upload_key()))
def test_db_minio_syntax_error_is_runtime_error(self): mv = ModuleVersion.create_or_replace_from_spec( { "id_name": "regtest9", "name": "regtest9 v1", "category": "Clean", "parameters": [{ "id_name": "url", "type": "string" }], }, source_version_hash="b1c2d3", ) bio = io.BytesIO() with zipfile.ZipFile(bio, mode="w") as zf: zf.writestr( "regtest9.yaml", json.dumps({ **mv.spec, "parameters": "not an Array" }).encode("utf-8"), ) zf.writestr("regtest9.py", b"def render(") minio.put_bytes( minio.ExternalModulesBucket, "regtest9/regtest9.b1c2d3.zip", bytes(bio.getbuffer()), ) with self.assertRaises(RuntimeError) as cm: MODULE_REGISTRY.latest("regtest9") self.assertIsInstance(cm.exception.__cause__, SyntaxError)
def test_db_minio_use_cache_for_same_version(self): mv = ModuleVersion.create_or_replace_from_spec( { "id_name": "regtest4", "name": "regtest4 v1", "category": "Clean", "parameters": [{ "id_name": "url", "type": "string" }], }, source_version_hash="b1c2d2", ) bio = io.BytesIO() with zipfile.ZipFile(bio, mode="w") as zf: zf.writestr("regtest4.yaml", json.dumps(mv.spec).encode("utf-8")) zf.writestr("regtest4.py", b"def render(table, params):\n return table") minio.put_bytes( minio.ExternalModulesBucket, "regtest4/regtest4.b1c2d2.zip", bytes(bio.getbuffer()), ) zf1 = MODULE_REGISTRY.latest("regtest4") zf2 = MODULE_REGISTRY.latest("regtest4") self.assertIs(zf2, zf1)
def test_delete_remove_leaked_stored_objects_and_uploaded_files(self): workflow = Workflow.create_and_init() # If the user deletes a workflow, all data associated with that # workflow should disappear. Postgres handles DB objects; but Django's # ORM doesn't do a great job with StoredObjects and UploadedFiles. # # This test isn't about minutae. It's just: if the user deletes a # Workflow, make sure all data gets deleted. # # TODO fix all other bugs that leak data. wf_module = workflow.tabs.first().wf_modules.create( order=0, slug="step-1", module_id_name="x" ) # "Leak" a StoredObject by writing its file to S3 but neglecting to # write an accompanying StoredObject record. stored_object_key = f"{workflow.id}/{wf_module.id}/1234.dat" minio.put_bytes(minio.StoredObjectsBucket, stored_object_key, b"1234") # Add UploadedFile, missing a DB entry. (Even if we fix all bugs that # leak an S3 object after deleting a DB entry [and 2019-06-03 there are # still more] we'll still need to handle missing DB entries from legacy # code.) uploaded_file_key = f"{wf_module.uploaded_file_prefix}{uuid.uuid4()}.csv" minio.put_bytes(minio.UserFilesBucket, uploaded_file_key, b"A\nb") workflow.delete() self.assertFalse(minio.exists(minio.StoredObjectsBucket, stored_object_key)) self.assertFalse(minio.exists(minio.UserFilesBucket, uploaded_file_key))
def test_db_minio_validate_code_with_kernel(self): mv = ModuleVersion.create_or_replace_from_spec( { "id_name": "regtest7", "name": "regtest7 v1", "category": "Clean", "parameters": [{ "id_name": "url", "type": "string" }], }, source_version_hash="b1c2d3", ) bio = io.BytesIO() with zipfile.ZipFile(bio, mode="w") as zf: zf.writestr("regtest7.yaml", json.dumps(mv.spec).encode("utf-8")) zf.writestr( "regtest7.py", b"def render(table, params):\n return table\nfoo()") minio.put_bytes( minio.ExternalModulesBucket, "regtest7/regtest7.b1c2d3.zip", bytes(bio.getbuffer()), ) with self.assertRaises(RuntimeError) as cm: MODULE_REGISTRY.latest("regtest7") self.assertIsInstance(cm.exception.__cause__, ModuleExitedError)
def test_db_minio_latest_load_deprecated_simple(self): mv = ModuleVersion.create_or_replace_from_spec( { "id_name": "regtest2", "name": "regtest2 v1", "category": "Clean", "parameters": [{ "id_name": "url", "type": "string" }], }, source_version_hash="b1c2d2", ) minio.put_bytes( minio.ExternalModulesBucket, "regtest2/b1c2d2/regtest2.py", "def render(table, params):\n return table", ) minio.put_bytes( minio.ExternalModulesBucket, "regtest2/b1c2d2/regtest2.yaml", json.dumps(mv.spec).encode("utf-8"), ) zf = MODULE_REGISTRY.latest("regtest2") self.assertEqual(zf.get_spec(), ModuleSpec(**mv.spec)) self.assertIsNone(zf.get_optional_html())
def test_clean_file_happy_path(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.wf_modules.create(module_id_name="uploadfile", order=0, slug="step-1") id = str(uuid.uuid4()) key = f"wf-${workflow.id}/wfm-${step.id}/${id}" minio.put_bytes(minio.UserFilesBucket, key, b"1234") UploadedFile.objects.create( wf_module=step, name="x.csv.gz", size=4, uuid=id, bucket=minio.UserFilesBucket, key=key, ) with ExitStack() as inner_stack: context = self._render_context(wf_module_id=step.id, exit_stack=inner_stack) result: Path = clean_value(ParamDType.File(), id, context) self.assertIsInstance(result, Path) self.assertEqual(result.read_bytes(), b"1234") self.assertEqual(result.suffixes, [".csv", ".gz"]) # Assert that once `exit_stack` goes out of scope, file is deleted self.assertFalse(result.exists())
def test_clean_file_wrong_wf_module(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.wf_modules.create(module_id_name="uploadfile", order=0, slug="step-1") step2 = tab.wf_modules.create(module_id_name="uploadfile", order=1, slug="step-2") id = str(uuid.uuid4()) key = f"wf-${workflow.id}/wfm-${step.id}/${id}" minio.put_bytes(minio.UserFilesBucket, key, b"1234") UploadedFile.objects.create( wf_module=step2, name="x.csv.gz", size=4, uuid=id, bucket=minio.UserFilesBucket, key=key, ) context = self._render_context(wf_module_id=step.id) result = clean_value(ParamDType.File(), id, context) self.assertIsNone(result) # Assert that if a temporary file was created to house the download, it # no longer exists. self.assertListEqual(list(self.basedir.iterdir()), [])
def test_fetch_integration(self, send_update, queue_render): queue_render.side_effect = async_value(None) send_update.side_effect = async_value(None) workflow = Workflow.create_and_init() ModuleVersion.create_or_replace_from_spec( {"id_name": "mod", "name": "Mod", "category": "Clean", "parameters": []}, source_version_hash="abc123", ) wf_module = workflow.tabs.first().wf_modules.create( order=0, slug="step-1", module_id_name="mod" ) minio.put_bytes( minio.ExternalModulesBucket, "mod/abc123/code.py", b"import pandas as pd\ndef fetch(params): return pd.DataFrame({'A': [1]})\ndef render(table, params): return table", ) cjwstate.modules.init_module_system() now = timezone.now() with self.assertLogs(level=logging.INFO): self.run_with_async_db( fetch.fetch(workflow_id=workflow.id, wf_module_id=wf_module.id, now=now) ) wf_module.refresh_from_db() so = wf_module.stored_objects.get(stored_at=wf_module.stored_data_version) with minio.temporarily_download(so.bucket, so.key) as parquet_path: table = pyarrow.parquet.read_table(str(parquet_path), use_threads=False) assert_arrow_table_equals(table, {"A": [1]}) workflow.refresh_from_db() queue_render.assert_called_with(workflow.id, workflow.last_delta_id) send_update.assert_called()
def test_delete_deletes_from_s3(self): minio.put_bytes(minio.StoredObjectsBucket, "test.dat", b"abcd") workflow = Workflow.create_and_init() wf_module = workflow.tabs.first().wf_modules.create(order=0, slug="step-1") so = wf_module.stored_objects.create(size=4, key="test.dat") so.delete() self.assertFalse(minio.exists(minio.StoredObjectsBucket, "test.dat"))
def test_invalid_parquet_is_corrupt_cache_error(self): result = RenderResult(arrow_table({"A": [1]})) cache_render_result(self.workflow, self.wf_module, self.delta.id, result) crr = self.wf_module.cached_render_result minio.put_bytes(BUCKET, crr_parquet_key(crr), b"NOT PARQUET") with tempfile_context() as arrow_path: with self.assertRaises(CorruptCacheError): load_cached_render_result(crr, arrow_path)
def test_delete_tab_deletes_from_s3(self): minio.put_bytes(minio.StoredObjectsBucket, "test.dat", b"abcd") workflow = Workflow.create_and_init() tab = workflow.tabs.create(position=1) wf_module = tab.wf_modules.create(order=0, slug="step-1") wf_module.stored_objects.create(size=4, bucket=minio.StoredObjectsBucket, key="test.dat") tab.delete() self.assertFalse(minio.exists(minio.StoredObjectsBucket, "test.dat"))
def test_resume_backtrack_on_corrupt_cache_error(self): module_zipfile = create_module_zipfile("mod") workflow = Workflow.create_and_init() tab = workflow.tabs.first() # step1: cached result is fresh -- but CORRUPT step1 = tab.wf_modules.create( order=0, slug="step-1", module_id_name="mod", last_relevant_delta_id=workflow.last_delta_id, ) rendercache.cache_render_result( workflow, step1, workflow.last_delta_id, RenderResult(arrow_table({"A": [1]})), ) minio.put_bytes( # Write corrupted data -- will lead to CorruptCacheError rendercache.io.BUCKET, rendercache.io.crr_parquet_key(step1.cached_render_result), b"CORRUPT", ) # step2: no cached result -- must re-render step2 = tab.wf_modules.create(order=1, slug="step-2", module_id_name="mod") tab_flow = TabFlow( tab.to_arrow(), [ ExecuteStep(step1, module_zipfile, {}), ExecuteStep(step2, module_zipfile, {}), ], ) with patch.object(Kernel, "render", side_effect=mock_render({"B": [2]})): with self._execute(workflow, tab_flow, {}, expect_log_level=logging.ERROR) as result: expected = RenderResult(arrow_table({"B": [2]})) assert_render_result_equals(result, expected) self.assertEqual( # called with step1, then step2 Kernel.render.call_count, 2, ) self.assertRegex( # Output is to the correct file Kernel.render.call_args[1]["output_filename"], r"execute-tab-output.*\.arrow", )
def test_delete_s3_data_leaked_file(self): # Delete a file with our UUID but without an UploadedFile. workflow = Workflow.create_and_init() wf_module = workflow.tabs.first().wf_modules.create(order=0, slug="step-1", module_id_name="x") ipu = wf_module.in_progress_uploads.create() key = wf_module.uploaded_file_prefix + str(ipu.id) + ".xlsx" minio.put_bytes(minio.UserFilesBucket, key, b"1234567") ipu.delete_s3_data() self.assertFalse(minio.exists(minio.UserFilesBucket, key))
def test_delete_remove_uploaded_data_by_prefix_in_case_model_missing(self): workflow = Workflow.create_and_init() wf_module = workflow.tabs.first().wf_modules.create(order=0, slug="step-1") uuid = str(uuidgen.uuid4()) key = wf_module.uploaded_file_prefix + uuid minio.put_bytes(minio.UserFilesBucket, key, b"A\n1") # Don't create the UploadedFile. Simulates races during upload/delete # that could write a file on S3 but not in our database. # wf_module.uploaded_files.create(name='t.csv', size=3, uuid=uuid, key=key) wf_module.delete() # do not crash self.assertFalse(minio.exists(minio.UserFilesBucket, key))
def test_resume_backtrack_on_corrupt_cache_error(self, fake_load_module): ModuleVersion.create_or_replace_from_spec( {"id_name": "mod", "name": "Mod", "category": "Clean", "parameters": []} ) workflow = Workflow.create_and_init() tab = workflow.tabs.first() # step1: cached result is fresh -- but CORRUPT step1 = tab.wf_modules.create( order=0, slug="step-1", module_id_name="mod", last_relevant_delta_id=workflow.last_delta_id, ) rendercache.cache_render_result( workflow, step1, workflow.last_delta_id, RenderResult(arrow_table({"A": [1]})), ) minio.put_bytes( # Write corrupted data -- will lead to CorruptCacheError rendercache.io.BUCKET, rendercache.io.crr_parquet_key(step1.cached_render_result), b"CORRUPT", ) # step2: no cached result -- must re-render step2 = tab.wf_modules.create(order=1, slug="step-2", module_id_name="mod") tab_flow = TabFlow( tab.to_arrow(), [ ExecuteStep(step1, ParamDType.Dict({}), {}), ExecuteStep(step2, ParamDType.Dict({}), {}), ], ) expected = RenderResult(arrow_table({"B": [2]})) fake_load_module.return_value.render.return_value = expected with self._execute( workflow, tab_flow, {}, expect_log_level=logging.ERROR ) as result: assert_render_result_equals(result, expected) self.assertEqual( # called with step1, then step2 fake_load_module.return_value.render.call_count, 2, ) self.assertRegex( # Output is to the correct file fake_load_module.return_value.render.call_args[1]["output_filename"], r"execute-tab-output.*\.arrow", )
def create_module_zipfile( module_id: str = "testmodule", *, version: Optional[str] = None, spec_kwargs: Dict[str, Any] = {}, python_code: str = "", html: Optional[str] = None, js_module: str = "", extra_file_contents: Dict[str, bytes] = {}, ) -> ModuleZipfile: """ Create a ModuleZipfile, stored in the database and minio. If `version` is not supplied, generate one using the sha1 of the zipfile. This is usually what you want: minio reads on overwrites are _eventually_ consistent, so if you 1. write a file; 2. overwrite it; and 3. read it, the read might result in the file from step 1 or the file from step 2. A sha1 version means overwrites will never modify data, solving the problem. """ spec = { "id_name": module_id, "name": "Test Module", "category": "Clean", "parameters": [], **spec_kwargs, } bio = io.BytesIO() with zipfile.ZipFile(bio, mode="w") as zf: zf.writestr(module_id + ".yaml", json.dumps(spec)) zf.writestr(module_id + ".py", python_code.encode("utf-8")) if html is not None: zf.writestr(module_id + ".html", html.encode("utf-8")) if js_module: zf.writestr(module_id + ".js", js_module.encode("utf-8")) for path, content in extra_file_contents.items(): zf.writestr(path, content) data = bytes(bio.getbuffer()) if version is None: sha1 = hashlib.sha1() sha1.update(data) version = sha1.hexdigest() minio.put_bytes( minio.ExternalModulesBucket, "%s/%s.%s.zip" % (module_id, module_id, version), data, ) ModuleVersion.objects.create( id_name=module_id, source_version_hash=version, spec=spec, js_module=js_module ) return MODULE_REGISTRY.latest(module_id)
def test_complete_happy_path(self, queue_render, send_update): send_update.side_effect = async_noop queue_render.side_effect = async_noop _init_module("x") self.kernel.migrate_params.side_effect = lambda m, p: p workflow = Workflow.create_and_init() wf_module = workflow.tabs.first().wf_modules.create( order=0, slug="step-123", module_id_name="x", file_upload_api_token="abc123", params={"file": None}, ) upload = wf_module.in_progress_uploads.create() uuid = str(upload.id) key = upload.get_upload_key() minio.put_bytes(upload.Bucket, key, b"1234567") with self.assertLogs(level=logging.INFO): # Logs ChangeParametersCommand's migrate_params() response = self.client.post( f"/api/v1/workflows/{workflow.id}/steps/step-123/uploads/{upload.id}", {"filename": "test.csv"}, content_type="application/json", HTTP_AUTHORIZATION="Bearer abc123", ) self.assertEqual(response.status_code, 200) # Upload and its S3 data were deleted self.assertFalse(minio.exists(upload.Bucket, key)) upload.refresh_from_db() self.assertTrue(upload.is_completed) # Final upload was created uploaded_file = wf_module.uploaded_files.first() self.assertEqual(uploaded_file.key, f"wf-{workflow.id}/wfm-{wf_module.id}/{uuid}.csv") self.assertEqual( minio.get_object_with_data(minio.UserFilesBucket, uploaded_file.key)["Body"], b"1234567", ) self.assertEqual(uploaded_file.name, "test.csv") # Return value includes uuid data = json.loads(response.content) self.assertEqual(data["uuid"], uuid) self.assertEqual(data["name"], "test.csv") self.assertEqual(data["size"], 7) # ChangeParametersCommand ran wf_module.refresh_from_db() self.assertEqual(wf_module.params, {"file": uuid}) # Send deltas send_update.assert_called() queue_render.assert_called()
def test_delete_s3_data_ignore_non_leaked_file(self): workflow = Workflow.create_and_init() wf_module = workflow.tabs.first().wf_modules.create(order=0, slug="step-1", module_id_name="x") ipu = wf_module.in_progress_uploads.create() key = wf_module.uploaded_file_prefix + str(ipu.id) + ".xlsx" minio.put_bytes(minio.UserFilesBucket, key, b"1234567") wf_module.uploaded_files.create(name="text.xlsx", size=7, uuid=str(self.id), key=key) ipu.delete_s3_data() self.assertFalse(minio.exists(minio.UserFilesBucket, key))
def test_db_minio_refresh_cache_for_new_version(self): v1 = ModuleVersion.create_or_replace_from_spec( { "id_name": "regtest5", "name": "regtest5 v1", "category": "Clean", "parameters": [{ "id_name": "url", "type": "string" }], }, source_version_hash="b1c2d2", ) bio = io.BytesIO() with zipfile.ZipFile(bio, mode="w") as zf: zf.writestr("regtest5.yaml", json.dumps(v1.spec).encode("utf-8")) zf.writestr("regtest5.py", b"def render(table, params):\n return table") minio.put_bytes( minio.ExternalModulesBucket, "regtest5/regtest5.b1c2d2.zip", bytes(bio.getbuffer()), ) zipfile1 = MODULE_REGISTRY.latest("regtest5") ModuleVersion.create_or_replace_from_spec( { "id_name": "regtest5", "name": "regtest5 v2", "category": "Clean", "parameters": [{ "id_name": "url", "type": "string" }], }, source_version_hash="b1c2d3", ) minio.put_bytes( minio.ExternalModulesBucket, "regtest5/regtest5.b1c2d3.zip", bytes(bio.getbuffer()), # reuse zipfile to save lines of code ) zipfile2 = MODULE_REGISTRY.latest("regtest5") self.assertIsNot(zipfile2, zipfile1) self.assertEqual(zipfile2.version, "b1c2d3")
def test_duplicate_bytes(self): key = f"{self.workflow.id}/{self.step1.id}/{uuid1()}" minio.put_bytes(minio.StoredObjectsBucket, key, b"12345") self.step2 = self.step1.tab.wf_modules.create(order=1, slug="step-2") so1 = self.step1.stored_objects.create( bucket=minio.StoredObjectsBucket, key=key, size=5) so2 = so1.duplicate(self.step2) # new StoredObject should have same time, # different file with same contents self.assertEqual(so1.stored_at, so2.stored_at) self.assertEqual(so1.size, so2.size) self.assertEqual(so1.bucket, so2.bucket) self.assertNotEqual(so1.key, so2.key) self.assertEqual( minio.get_object_with_data(so2.bucket, so2.key)["Body"], b"12345")
def test_abort_missing_upload_is_404(self): _init_module("x") workflow = Workflow.create_and_init() wf_module = workflow.tabs.first().wf_modules.create( order=0, slug="step-123", module_id_name="x", file_upload_api_token="abc123" ) upload = wf_module.in_progress_uploads.create() key = upload.get_upload_key() minio.put_bytes(upload.Bucket, key, b"1234567") response = self.client.delete( f"/api/v1/workflows/{workflow.id}/steps/step-123/uploads/dcc00084-812d-4769-bf77-94518f18ff3d", HTTP_AUTHORIZATION="Bearer abc123", ) self.assertEqual(response.status_code, 404) self.assertEqual( json.loads(response.content)["error"]["code"], "upload-not-found" )
def test_finish_upload_happy_path(self, send_update): user = User.objects.create(username="******", email="*****@*****.**") workflow = Workflow.create_and_init(owner=user) wf_module = workflow.tabs.first().wf_modules.create(order=0, slug="step-1", module_id_name="x") in_progress_upload = wf_module.in_progress_uploads.create( id="147a9f5d-5b3e-41c3-a968-a84a5a9d587f") key = in_progress_upload.get_upload_key() minio.put_bytes(in_progress_upload.Bucket, key, b"1234567") send_update.side_effect = async_noop response = self.run_handler( finish_upload, user=user, workflow=workflow, wfModuleId=wf_module.id, key=key, filename="test sheet.csv", ) self.assertResponse( response, data={"uuid": "147a9f5d-5b3e-41c3-a968-a84a5a9d587f"}) # The uploaded file is deleted self.assertFalse(minio.exists(in_progress_upload.Bucket, key)) # A new upload is created uploaded_file = wf_module.uploaded_files.first() self.assertEqual(uploaded_file.name, "test sheet.csv") self.assertEqual(uploaded_file.size, 7) self.assertEqual(uploaded_file.uuid, "147a9f5d-5b3e-41c3-a968-a84a5a9d587f") self.assertEqual(uploaded_file.bucket, in_progress_upload.Bucket) final_key = f"wf-{workflow.id}/wfm-{wf_module.id}/147a9f5d-5b3e-41c3-a968-a84a5a9d587f.csv" self.assertEqual(uploaded_file.key, final_key) # The file has the right bytes and metadata self.assertEqual( minio.get_object_with_data(minio.UserFilesBucket, final_key)["Body"], b"1234567", ) self.assertEqual( minio.client.head_object(Bucket=minio.UserFilesBucket, Key=final_key)["ContentDisposition"], "attachment; filename*=UTF-8''test%20sheet.csv", ) # wf_module is updated send_update.assert_called()
def test_abort_upload_happy_path_after_complete(self): user = User.objects.create(username="******", email="*****@*****.**") workflow = Workflow.create_and_init(owner=user) wf_module = workflow.tabs.first().wf_modules.create(order=0, slug="step-1", module_id_name="x") in_progress_upload = wf_module.in_progress_uploads.create( id="147a9f5d-5b3e-41c3-a968-a84a5a9d587f") key = in_progress_upload.get_upload_key() minio.put_bytes(in_progress_upload.Bucket, key, b"1234567") response = self.run_handler(abort_upload, user=user, workflow=workflow, wfModuleId=wf_module.id, key=key) self.assertResponse(response, data=None) wf_module.refresh_from_db() self.assertFalse(minio.exists(in_progress_upload.Bucket, key))
def test_abort(self): _init_module("x") workflow = Workflow.create_and_init() wf_module = workflow.tabs.first().wf_modules.create( order=0, slug="step-123", module_id_name="x", file_upload_api_token="abc123" ) upload = wf_module.in_progress_uploads.create() key = upload.get_upload_key() minio.put_bytes(upload.Bucket, key, b"1234567") response = self.client.delete( f"/api/v1/workflows/{workflow.id}/steps/step-123/uploads/{upload.id}", HTTP_AUTHORIZATION="Bearer abc123", ) self.assertEqual(response.status_code, 200) self.assertEqual(json.loads(response.content), {}) self.assertFalse(minio.exists(upload.Bucket, key)) # file was deleted upload.refresh_from_db() self.assertTrue(upload.is_completed)
def test_complete_json_form_error(self): _init_module("x") workflow = Workflow.create_and_init() wf_module = workflow.tabs.first().wf_modules.create( order=0, slug="step-123", module_id_name="x", file_upload_api_token="abc123" ) upload = wf_module.in_progress_uploads.create() key = upload.get_upload_key() minio.put_bytes(upload.Bucket, key, b"1234567") response = self.client.post( f"/api/v1/workflows/{workflow.id}/steps/step-123/uploads/{upload.id}", {"filename": None}, content_type="application/json", HTTP_AUTHORIZATION="Bearer abc123", ) self.assertEqual(response.status_code, 400) error = json.loads(response.content)["error"] self.assertEqual(error["code"], "body-has-errors") self.assertIn("filename", error["errors"])
def test_wf_module_duplicate_copy_uploaded_file(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() wf_module = tab.wf_modules.create(order=0, slug="step-1", module_id_name="upload") uuid = str(uuidgen.uuid4()) key = f"{wf_module.uploaded_file_prefix}{uuid}.csv" minio.put_bytes(minio.UserFilesBucket, key, b"1234567") # Write the uuid to the old module -- we'll check the new module points # to a valid file wf_module.params = {"file": uuid, "has_header": True} wf_module.save(update_fields=["params"]) uploaded_file = wf_module.uploaded_files.create( name="t.csv", uuid=uuid, bucket=minio.UserFilesBucket, key=key, size=7) workflow2 = Workflow.create_and_init() tab2 = workflow2.tabs.first() wf_module2 = wf_module.duplicate_into_new_workflow(tab2) uploaded_file2 = wf_module2.uploaded_files.first() self.assertIsNotNone(uploaded_file2) # New file gets same uuid -- because it's the same file and we don't # want to edit params during copy self.assertEqual(uploaded_file2.uuid, uuid) self.assertEqual(wf_module2.params["file"], uuid) self.assertTrue( # The new file should be in a different path uploaded_file2.key.startswith(wf_module2.uploaded_file_prefix)) self.assertEqual(uploaded_file2.name, "t.csv") self.assertEqual(uploaded_file2.size, 7) self.assertEqual(uploaded_file2.created_at, uploaded_file.created_at) self.assertEqual( minio.get_object_with_data(uploaded_file2.bucket, uploaded_file2.key)["Body"], b"1234567", )
def test_db_minio_latest_order_by_last_update_time(self): # old version ModuleVersion.create_or_replace_from_spec( { "id_name": "regtest1", "name": "regtest1 v1", "category": "Clean", "parameters": [{ "id_name": "url", "type": "string" }], }, source_version_hash="b1c2d3", ) time.sleep(0.000002) # guarantee new timestamp # new version v2 = ModuleVersion.create_or_replace_from_spec( { "id_name": "regtest1", "name": "regtest1 v2", "category": "Clean", "parameters": [{ "id_name": "url", "type": "string" }], }, source_version_hash="b1c2d2", ) bio = io.BytesIO() with zipfile.ZipFile(bio, mode="w") as zf: zf.writestr("regtest1.yaml", json.dumps(v2.spec).encode("utf-8")) zf.writestr("regtest1.py", b"def render(table, params):\n return table") minio.put_bytes( minio.ExternalModulesBucket, "regtest1/regtest1.b1c2d2.zip", bytes(bio.getbuffer()), ) zf = MODULE_REGISTRY.latest("regtest1") self.assertEqual(zf.get_spec(), ModuleSpec(**v2.spec))
def test_db_minio_latest_load_deprecated_html(self): mv = ModuleVersion.create_or_replace_from_spec( { "id_name": "regtest3", "name": "regtest3 v2", "category": "Clean", "parameters": [{ "id_name": "url", "type": "string" }], }, source_version_hash="b1c2d2", ) minio.put_bytes( minio.ExternalModulesBucket, "regtest3/b1c2d2/regtest3.py", "def render(table, params):\n return table", ) minio.put_bytes( minio.ExternalModulesBucket, "regtest3/b1c2d2/regtest3.yaml", json.dumps(mv.spec).encode("utf-8"), ) html = "<!DOCTYPE html><html><head><title>Hi</title></head><body>Hello, world!</body></html>" minio.put_bytes( minio.ExternalModulesBucket, "regtest3/b1c2d2/regtest3.html", html.encode("utf-8"), ) zf = MODULE_REGISTRY.latest("regtest3") self.assertEqual(zf.get_optional_html(), html)
def move_uploaded_file(workflow, wf_module, uploaded_file): """ Move files from /uuid.ext to /wf-1/wfm-2/uuid.ext. This helps delete leaked files and find problem files. """ from cjwstate import minio bucket = uploaded_file.bucket old_key = uploaded_file.key if "/" in old_key: return new_key = f"wf-{workflow.id}/wfm-{wf_module.id}/{old_key}" logger.info(f"Move %s/%s to %s/%s", bucket, old_key, bucket, new_key) try: minio.copy(bucket, new_key, f"{bucket}/{old_key}") minio.remove(bucket, old_key) except minio.error.NoSuchKey: # old_key is missing. Two possibilities: # # 1. We're re-running this script after it failed once with # atomic=True (which used to be set, by accident); the move already # succeeded but the DB doesn't know it. In that case, continue # because this error actually means, "all is well." # 2. The file didn't exist to begin with. In that case, write a blank # file in its stead. That way the user will remark, "hey, Workbench # ate my file!" instead of undefined behavior (which is worse). # https://www.pivotaltracker.com/story/show/163336822 if minio.exists(bucket, new_key): pass # "all is well" else: # write an empty file minio.put_bytes(bucket, new_key, b"") uploaded_file.size = 0 uploaded_file.save(update_fields=["size"]) uploaded_file.key = new_key uploaded_file.save(update_fields=["key"])