def _do_create_result( workflow_id: int, wf_module: WfModule, result: FetchResult, now: timezone.datetime ) -> None: """ Do database manipulations for create_result(). Modify `wf_module` in-place. Do *not* do the logic in ChangeDataVersionCommand. We're creating a new version, not doing something undoable. Raise WfModule.DoesNotExist or Workflow.DoesNotExist in case of a race. """ error = "" if result.errors: if result.errors[0].message.id != "TODO_i18n": raise RuntimeError("TODO handle i18n-ready fetch-result errors") elif result.errors[0].quick_fixes: raise RuntimeError("TODO handle quick fixes from fetches") else: error = result.errors[0].message.args["text"] with _locked_wf_module(workflow_id, wf_module): storedobjects.create_stored_object( workflow_id, wf_module.id, result.path, stored_at=now ) storedobjects.enforce_storage_limits(wf_module) wf_module.fetch_error = error wf_module.is_busy = False wf_module.last_update_check = now wf_module.save(update_fields=["fetch_error", "is_busy", "last_update_check"])
def test_fetch_result_deleted_file_means_none(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() wf_module = tab.wf_modules.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, ) with parquet_file({"A": [1]}) as path: so = create_stored_object(workflow.id, wf_module.id, path) wf_module.stored_data_version = so.stored_at wf_module.save(update_fields=["stored_data_version"]) # Now delete the file on S3 -- but leave the DB pointing to it. minio.remove(so.bucket, so.key) def render(*args, fetch_result, **kwargs): self.assertIsNone(fetch_result) return RenderResult() with self._stub_module(render): self.run_with_async_db( execute_wfmodule( self.chroot_context, workflow, wf_module, {}, Tab(tab.slug, tab.name), RenderResult(), {}, self.output_path, ))
def test_fetch_result_happy_path(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() wf_module = tab.wf_modules.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, fetch_error="maybe an error", ) with parquet_file({"A": [1]}) as path: so = create_stored_object(workflow.id, wf_module.id, path) wf_module.stored_data_version = so.stored_at wf_module.save(update_fields=["stored_data_version"]) def render(*args, fetch_result, **kwargs): self.assertEqual( fetch_result.errors, [RenderError(I18nMessage.TODO_i18n("maybe an error"))], ) assert_arrow_table_equals( pyarrow.parquet.read_table(str(fetch_result.path)), {"A": [1]}) return RenderResult() with self._stub_module(render): self.run_with_async_db( execute_wfmodule( workflow, wf_module, {}, Tab(tab.slug, tab.name), RenderResult(), {}, Path("/unused"), ))
def test_load_selected_stored_object(self): workflow = Workflow.create_and_init() step = workflow.tabs.first().steps.create( order=0, slug="step-1", module_id_name="foodeleted" ) with parquet_file({"A": [1]}) as path1: storedobjects.create_stored_object(workflow.id, step.id, path1) with parquet_file({"A": [2]}) as path2: so2 = storedobjects.create_stored_object(workflow.id, step.id, path2) with parquet_file({"A": [3]}) as path3: storedobjects.create_stored_object(workflow.id, step.id, path3) step.stored_data_version = so2.stored_at step.save(update_fields=["stored_data_version"]) result = self.run_with_async_db( fetch.load_database_objects(workflow.id, step.id) ) self.assertEqual(result[3], so2) self.assertEqual(result.stored_object, so2)
def test_wf_module_duplicate(self): workflow = Workflow.create_and_init() step1 = workflow.tabs.first().wf_modules.create(order=0, slug="step-1") # store data to test that it is duplicated with tempfile_context() as path1: path1.write_bytes(b"12345") create_stored_object(workflow.id, step1.id, path1) with tempfile_context() as path2: path1.write_bytes(b"23456") so2 = create_stored_object(workflow.id, step1.id, path2) step1.secrets = {"do not copy": {"name": "evil", "secret": "evil"}} step1.stored_data_version = so2.stored_at step1.save(update_fields=["stored_data_version"]) # duplicate into another workflow, as we would do when duplicating a workflow workflow2 = Workflow.create_and_init() tab2 = workflow2.tabs.first() step1d = step1.duplicate_into_new_workflow(tab2) step1d.refresh_from_db() # test what we actually have in the db self.assertEqual(step1d.slug, "step-1") self.assertEqual(step1d.workflow, workflow2) self.assertEqual(step1d.module_id_name, step1.module_id_name) self.assertEqual(step1d.order, step1.order) self.assertEqual(step1d.notes, step1.notes) self.assertEqual(step1d.last_update_check, step1.last_update_check) self.assertEqual(step1d.is_collapsed, step1.is_collapsed) self.assertEqual(step1d.params, step1.params) self.assertEqual(step1d.secrets, {}) # Stored data should contain a clone of content only, not complete version history self.assertEqual(step1d.stored_objects.count(), 1) self.assertEqual(step1d.stored_data_version, step1.stored_data_version) so2d = step1d.stored_objects.first() # The StoredObject was copied byte for byte into a different file self.assertNotEqual(so2d.key, so2.key) self.assertEqual( minio.get_object_with_data(minio.StoredObjectsBucket, so2d.key)["Body"], minio.get_object_with_data(minio.StoredObjectsBucket, so2.key)["Body"], )
def test_fetch_result_happy_path(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.steps.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, fetch_errors=[ RenderError(I18nMessage("foo", {}, "module")), RenderError(I18nMessage("bar", {"x": "y"}, "cjwmodule")), ], ) with parquet_file({"A": [1]}) as path: so = create_stored_object(workflow.id, step.id, path) step.stored_data_version = so.stored_at step.save(update_fields=["stored_data_version"]) module_zipfile = create_module_zipfile( "x", spec_kwargs={"loads_data": True}, python_code=textwrap.dedent( """ import pyarrow as pa import pandas as pd from pandas.testing import assert_frame_equal from cjwkernel.types import RenderError, I18nMessage def render(table, params, *, fetch_result, **kwargs): assert fetch_result.errors == [ RenderError(I18nMessage("foo", {}, "module")), RenderError(I18nMessage("bar", {"x": "y"}, "cjwmodule")), ] fetch_dataframe = pa.parquet.read_table(str(fetch_result.path)) assert_frame_equal(fetch_dataframe, pd.DataFrame({"A": [1]})) return pd.DataFrame() """ ), ) with self.assertLogs(level=logging.INFO): self.run_with_async_db( execute_step( chroot_context=self.chroot_context, workflow=workflow, step=step, module_zipfile=module_zipfile, params={}, tab_name=tab.name, input_path=self.empty_table_path, input_table_columns=[], tab_results={}, output_path=self.output_path, ) )
def _do_create_result( workflow_id: int, step: Step, result: FetchResult, now: datetime.datetime ) -> None: """Do database manipulations for create_result(). Modify `step` in-place. Do *not* do the logic in SetStepDataVersion. We're creating a new version, not doing something undoable. Raise Step.DoesNotExist or Workflow.DoesNotExist in case of a race. """ with _locked_step(workflow_id, step): storedobjects.create_stored_object( workflow_id, step.id, result.path, stored_at=now ) storedobjects.delete_old_files_to_enforce_storage_limits(step=step) # Assume caller sends new list to clients via SetStepDataVersion step.fetch_errors = result.errors step.is_busy = False step.last_update_check = now step.save(update_fields=["fetch_errors", "is_busy", "last_update_check"])
def _do_create_result(workflow_id: int, wf_module: WfModule, result: FetchResult, now: timezone.datetime) -> None: """ Do database manipulations for create_result(). Modify `wf_module` in-place. Do *not* do the logic in ChangeDataVersionCommand. We're creating a new version, not doing something undoable. Raise WfModule.DoesNotExist or Workflow.DoesNotExist in case of a race. """ with _locked_wf_module(workflow_id, wf_module): storedobjects.create_stored_object(workflow_id, wf_module.id, result.path, stored_at=now) storedobjects.enforce_storage_limits(wf_module) wf_module.fetch_errors = result.errors wf_module.is_busy = False wf_module.last_update_check = now wf_module.save( update_fields=["fetch_errors", "is_busy", "last_update_check"])
def test_fetch_result_deleted_file_means_none(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.steps.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, ) with parquet_file({"A": [1]}) as path: so = create_stored_object(workflow.id, step.id, path) step.stored_data_version = so.stored_at step.save(update_fields=["stored_data_version"]) # Now delete the file on S3 -- but leave the DB pointing to it. s3.remove(s3.StoredObjectsBucket, so.key) module_zipfile = create_module_zipfile( "x", spec_kwargs={"loads_data": True}, python_code=textwrap.dedent( """ import pandas as pd def render(table, params, *, fetch_result, **kwargs): assert fetch_result is None return pd.DataFrame() """ ), ) with self.assertLogs(level=logging.INFO): self.run_with_async_db( execute_step( chroot_context=self.chroot_context, workflow=workflow, step=step, module_zipfile=module_zipfile, params={}, tab_name=tab.name, input_path=self.empty_table_path, input_table_columns=[], tab_results={}, output_path=self.output_path, ) )
def test_fetch_result_deleted_file_means_none(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() wf_module = tab.wf_modules.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, ) with parquet_file({"A": [1]}) as path: so = create_stored_object(workflow.id, wf_module.id, path) wf_module.stored_data_version = so.stored_at wf_module.save(update_fields=["stored_data_version"]) # Now delete the file on S3 -- but leave the DB pointing to it. minio.remove(minio.StoredObjectsBucket, so.key) def render(*args, fetch_result, **kwargs): self.assertIsNone(fetch_result) return RenderResult() module_zipfile = create_module_zipfile( "x", python_code=textwrap.dedent(""" import pandas as pd def render(table, params, *, fetch_result, **kwargs): assert fetch_result is None return pd.DataFrame() """), ) with self.assertLogs(level=logging.INFO): self.run_with_async_db( execute_wfmodule( self.chroot_context, workflow, wf_module, module_zipfile, {}, Tab(tab.slug, tab.name), RenderResult(), {}, self.output_path, ))