def test_fetch_result_happy_path(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() wf_module = tab.wf_modules.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, fetch_error="maybe an error", ) with parquet_file({"A": [1]}) as path: so = create_stored_object(workflow.id, wf_module.id, path) wf_module.stored_data_version = so.stored_at wf_module.save(update_fields=["stored_data_version"]) def render(*args, fetch_result, **kwargs): self.assertEqual( fetch_result.errors, [RenderError(I18nMessage.TODO_i18n("maybe an error"))], ) assert_arrow_table_equals( pyarrow.parquet.read_table(str(fetch_result.path)), {"A": [1]}) return RenderResult() with self._stub_module(render): self.run_with_async_db( execute_wfmodule( workflow, wf_module, {}, Tab(tab.slug, tab.name), RenderResult(), {}, Path("/unused"), ))
def test_fetch_result_deleted_file_means_none(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() wf_module = tab.wf_modules.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, ) with parquet_file({"A": [1]}) as path: so = create_stored_object(workflow.id, wf_module.id, path) wf_module.stored_data_version = so.stored_at wf_module.save(update_fields=["stored_data_version"]) # Now delete the file on S3 -- but leave the DB pointing to it. minio.remove(so.bucket, so.key) def render(*args, fetch_result, **kwargs): self.assertIsNone(fetch_result) return RenderResult() with self._stub_module(render): self.run_with_async_db( execute_wfmodule( self.chroot_context, workflow, wf_module, {}, Tab(tab.slug, tab.name), RenderResult(), {}, self.output_path, ))
def _load_tab_flows(workflow: Workflow, delta_id: int) -> List[TabFlow]: """ Query `workflow` for each tab's `TabFlow` (ordered by tab position). Raise `ModuleError` or `ValueError` if migrate_params() fails. Failed migration means the whole execute can't happen. """ ret = [] with workflow.cooperative_lock(): # reloads workflow if workflow.last_delta_id != delta_id: raise UnneededExecution for tab_model in workflow.live_tabs.all(): steps = [ ExecuteStep( step, (step.module_version.param_schema if step.module_version is not None else ParamDType.Dict({})), # We need to invoke the kernel and migrate _all_ modules' # params (WfModule.get_params), because we can only check # for tab cycles after migrating (and before calling any # render()). _get_migrated_params(step), ) for step in tab_model.live_wf_modules.all() ] ret.append(TabFlow(Tab(tab_model.slug, tab_model.name), steps)) return ret
def test_fetch_result_no_bucket_or_key_stored_object_means_none(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() wf_module = tab.wf_modules.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, stored_data_version=timezone.now(), ) wf_module.stored_objects.create( stored_at=wf_module.stored_data_version, bucket="", key="", size=0, hash="whatever", ) def render(*args, fetch_result, **kwargs): self.assertIsNone(fetch_result) return RenderResult() with self._stub_module(render): self.run_with_async_db( execute_wfmodule( self.chroot_context, workflow, wf_module, {}, Tab(tab.slug, tab.name), RenderResult(), {}, self.output_path, ))
def test_fetch_result_no_stored_object_means_none(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() wf_module = tab.wf_modules.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, ) module_zipfile = create_module_zipfile( "x", python_code=textwrap.dedent(""" import pandas as pd def render(table, params, *, fetch_result, **kwargs): assert fetch_result is None return pd.DataFrame() """), ) with self.assertLogs(level=logging.INFO): self.run_with_async_db( execute_wfmodule( self.chroot_context, workflow, wf_module, module_zipfile, {}, Tab(tab.slug, tab.name), RenderResult(), {}, self.output_path, ))
def test_report_module_error(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() wf_module = tab.wf_modules.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, ) def render(*args, fetch_result, **kwargs): raise ModuleExitedError(-9, "") with self._stub_module(render): result = self.run_with_async_db( execute_wfmodule( self.chroot_context, workflow, wf_module, {}, Tab(tab.slug, tab.name), RenderResult(), {}, self.output_path, )) self.assertEqual( result, RenderResult(errors=[ RenderError( I18nMessage.TODO_i18n( "Something unexpected happened. We have been notified and are " "working to fix it. If this persists, contact us. Error code: " "SIGKILL")) ]), )
def test_render_using_tab_output(self): def render(table, params): self.assertEqual(params["tabparam"].slug, "tab-1") self.assertEqual(params["tabparam"].name, "Tab 1") self.assertEqual( params["tabparam"].columns, { "X": ptypes.RenderColumn("X", "number", "{:,d}"), "Y": ptypes.RenderColumn("Y", "text", None), }, ) assert_frame_equal(params["tabparam"].dataframe, pd.DataFrame({ "X": [1], "Y": ["y"] })) with arrow_table_context( { "X": [1], "Y": ["y"] }, columns=[ Column("X", ColumnType.Number("{:,d}")), Column("Y", ColumnType.Text()), ], dir=self.basedir, ) as atable: self._test_render( render, params={"tabparam": TabOutput(Tab("tab-1", "Tab 1"), atable)})
def test_clean_tabs_happy_path(self): tab2 = Tab("tab-2", "Tab 2") tab2_output = arrow_table({"B": [1]}) tab3 = Tab("tab-3", "Tab 3") tab3_output = arrow_table({"C": [1]}) context = self._render_context(tab_results={ tab2: RenderResult(tab2_output), tab3: RenderResult(tab3_output), }) result = clean_value(ParamDType.Multitab(), ["tab-2", "tab-3"], context) self.assertEqual( result, [TabOutput(tab2, tab2_output), TabOutput(tab3, tab3_output)])
def test_render_without_input_or_loads_data_raises_no_loaded_data(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.steps.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, ) module_zipfile = create_module_zipfile( "x", spec_kwargs={"loads_data": False}, python_code="def render(table, params): return None", ) result = self.run_with_async_db( execute_step( self.chroot_context, workflow, step, module_zipfile, {}, Tab(tab.slug, tab.name), RenderResult(), {}, self.output_path, )) assert_render_result_equals( result, RenderResult(errors=[ RenderError( I18nMessage("py.renderer.execute.step.NoLoadedDataError", {}, None)) ]), )
def _test_render( self, render_fn, arrow_table_dict={}, arrow_table=None, params={}, tab=Tab("tab-1", "Tab 1"), fetch_result=None, output_filename=None, ): with ExitStack() as ctx: if arrow_table is None: arrow_table = ctx.enter_context( arrow_table_context(arrow_table_dict, dir=self.basedir)) ctx.enter_context(patch.object(module, "render", render_fn)) out_filename = ctx.enter_context( tempfile_context(dir=self.basedir)).name thrift_result = module.render_thrift( ttypes.RenderRequest( str(self.basedir), arrow_arrow_table_to_thrift(arrow_table), arrow_params_to_thrift(Params(params)), arrow_tab_to_thrift(tab), arrow_fetch_result_to_thrift(fetch_result) if fetch_result is not None else None, out_filename, )) return thrift_render_result_to_arrow(thrift_result, self.basedir)
def test_fetch_result_no_stored_object_means_none(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() wf_module = tab.wf_modules.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, ) def render(*args, fetch_result, **kwargs): self.assertIsNone(fetch_result) return RenderResult() with self._stub_module(render): self.run_with_async_db( execute_wfmodule( self.chroot_context, workflow, wf_module, {}, Tab(tab.slug, tab.name), RenderResult(), {}, self.output_path, ))
def test_fetch_result_deleted_stored_object_means_none(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() wf_module = tab.wf_modules.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, # wf_module.stored_data_version is buggy: it can point at a nonexistent # StoredObject. Let's do that. stored_data_version=timezone.now(), ) def render(*args, fetch_result, **kwargs): self.assertIsNone(fetch_result) return RenderResult() with self._stub_module(render): self.run_with_async_db( execute_wfmodule( self.chroot_context, workflow, wf_module, {}, Tab(tab.slug, tab.name), RenderResult(), {}, self.output_path, ))
def test_clean_tabs_preserve_ordering(self): tab2 = Tab("tab-2", "Tab 2") tab2_output = arrow_table({"B": [1]}) tab3 = Tab("tab-3", "Tab 3") tab3_output = arrow_table({"C": [1]}) context = self._render_context( # RenderContext's dict ordering determines desired tab order. # (Python 3.7 spec: dict is ordered in insertion order. CPython 3.6 # and PyPy 7 do this, too.) tab_results={ tab3: RenderResult(tab3_output), tab2: RenderResult(tab2_output), }) # Supply wrongly-ordered tabs; renderprep should reorder them. result = clean_value(ParamDType.Multitab(), ["tab-2", "tab-3"], context) self.assertEqual([t.tab.slug for t in result], ["tab-3", "tab-2"])
def render( self, compiled_module: CompiledModule, chroot_context: ChrootContext, basedir: Path, input_table: ArrowTable, params: Params, tab: Tab, fetch_result: Optional[FetchResult], output_filename: str, ) -> RenderResult: """ Run the module's `render_thrift()` function and return its result. Raise ModuleError if the module has a bug. """ chroot_dir = chroot_context.chroot.root basedir_seen_by_module = Path("/") / basedir.relative_to(chroot_dir) request = ttypes.RenderRequest( str(basedir_seen_by_module), input_table.to_thrift(), params.to_thrift(), tab.to_thrift(), None if fetch_result is None else fetch_result.to_thrift(), output_filename, ) try: with chroot_context.writable_file(basedir / output_filename): result = self._run_in_child( chroot_dir=chroot_dir, network_config=pyspawner.NetworkConfig(), # TODO disallow networking compiled_module=compiled_module, timeout=self.render_timeout, result=ttypes.RenderResult(), function="render_thrift", args=[request], ) finally: chroot_context.clear_unowned_edits() if result.table.filename and result.table.filename != output_filename: raise ModuleExitedError(0, "Module wrote to wrong output file") try: # RenderResult.from_thrift() verifies all filenames passed by the # module are in the directory the module has access to. It assumes # the Arrow file (if there is one) is untrusted, so it can raise # ValidateError render_result = RenderResult.from_thrift(result, basedir) except ValidateError as err: raise ModuleExitedError(0, "Module produced invalid data: %s" % str(err)) return render_result
def test_fetch_result_happy_path(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.steps.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, fetch_errors=[ RenderError(I18nMessage("foo", {}, "module")), RenderError(I18nMessage("bar", {"x": "y"}, "cjwmodule")), ], ) with parquet_file({"A": [1]}) as path: so = create_stored_object(workflow.id, step.id, path) step.stored_data_version = so.stored_at step.save(update_fields=["stored_data_version"]) module_zipfile = create_module_zipfile( "x", spec_kwargs={"loads_data": True}, python_code=textwrap.dedent(""" import pyarrow as pa import pandas as pd from pandas.testing import assert_frame_equal from cjwkernel.types import RenderError, I18nMessage def render(table, params, *, fetch_result, **kwargs): assert fetch_result.errors == [ RenderError(I18nMessage("foo", {}, "module")), RenderError(I18nMessage("bar", {"x": "y"}, "cjwmodule")), ] fetch_dataframe = pa.parquet.read_table(str(fetch_result.path)) assert_frame_equal(fetch_dataframe, pd.DataFrame({"A": [1]})) return pd.DataFrame() """), ) with self.assertLogs(level=logging.INFO): self.run_with_async_db( execute_step( self.chroot_context, workflow, step, module_zipfile, {}, Tab(tab.slug, tab.name), RenderResult(), {}, self.output_path, ))
def test_email_delta_when_errors_change(self, email_delta): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.steps.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id - 1, notifications=True, ) # We need to actually populate the cache to set up the test. The code # under test will only try to open the render result if the database # says there's something there. rendercache.cache_render_result( workflow, step, workflow.last_delta_id - 1, RenderResult(errors=[ RenderError( I18nMessage("py.renderer.execute.step.noModule", {}, None)) ]), ) step.last_relevant_delta_id = workflow.last_delta_id step.save(update_fields=["last_relevant_delta_id"]) module_zipfile = create_module_zipfile( "x", spec_kwargs={"loads_data": True}, # returns different error python_code= 'import pandas as pd\ndef render(table, params): return [{"id": "err"}]', ) with self.assertLogs(level=logging.INFO): self.run_with_async_db( execute_step( self.chroot_context, workflow, step, module_zipfile, {}, Tab(tab.slug, tab.name), RenderResult(), {}, self.output_path, )) email_delta.assert_called() # there's new data
def test_email_delta_when_stale_crr_is_unreachable(self, email_delta, read_cache): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.steps.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id - 1, notifications=True, ) # We need to actually populate the cache to set up the test. The code # under test will only try to open the render result if the database # says there's something there. rendercache.cache_render_result( workflow, step, workflow.last_delta_id - 1, RenderResult(arrow_table({})), # does not write a Parquet file ) step.last_relevant_delta_id = workflow.last_delta_id step.save(update_fields=["last_relevant_delta_id"]) module_zipfile = create_module_zipfile( "x", spec_kwargs={"loads_data": True}, # returns different data python_code= 'import pandas as pd\ndef render(table, params): return pd.DataFrame({"A": [2]})', ) with self.assertLogs(level=logging.INFO): self.run_with_async_db( execute_step( self.chroot_context, workflow, step, module_zipfile, {}, Tab(tab.slug, tab.name), RenderResult(), {}, self.output_path, )) read_cache.assert_not_called() # it would give CorruptCacheError email_delta.assert_called() # there's new data
def test_email_delta(self, email_delta): workflow = Workflow.create_and_init() tab = workflow.tabs.first() wf_module = tab.wf_modules.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id - 1, notifications=True, ) rendercache.cache_render_result( workflow, wf_module, workflow.last_delta_id - 1, RenderResult(arrow_table({"A": [1]})), ) wf_module.last_relevant_delta_id = workflow.last_delta_id wf_module.save(update_fields=["last_relevant_delta_id"]) module_zipfile = create_module_zipfile( "x", python_code= 'import pandas as pd\ndef render(table, params): return pd.DataFrame({"A": [2]})', ) with self.assertLogs(level=logging.INFO): self.run_with_async_db( execute_wfmodule( self.chroot_context, workflow, wf_module, module_zipfile, {}, Tab(tab.slug, tab.name), RenderResult(), {}, self.output_path, )) email_delta.assert_called() delta = email_delta.call_args[0][0] self.assertEqual(delta.user, workflow.owner) self.assertEqual(delta.workflow, workflow) self.assertEqual(delta.wf_module, wf_module) self.assertEqual(delta.old_result, RenderResult(arrow_table({"A": [1]}))) self.assertEqual(delta.new_result, RenderResult(arrow_table({"A": [2]})))
def test_email_delta_ignore_corrupt_cache_error(self, email_delta, read_cache): read_cache.side_effect = rendercache.CorruptCacheError workflow = Workflow.create_and_init() tab = workflow.tabs.first() wf_module = tab.wf_modules.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id - 1, notifications=True, ) # We need to actually populate the cache to set up the test. The code # under test will only try to open the render result if the database # says there's something there. rendercache.cache_render_result( workflow, wf_module, workflow.last_delta_id - 1, RenderResult(arrow_table({"A": [1]})), ) wf_module.last_relevant_delta_id = workflow.last_delta_id wf_module.save(update_fields=["last_relevant_delta_id"]) module_zipfile = create_module_zipfile( "x", # returns different data -- but CorruptCacheError means we won't care. python_code= 'import pandas as pd\ndef render(table, params): return pd.DataFrame({"A": [2]})', ) with self.assertLogs(level=logging.ERROR): self.run_with_async_db( execute_wfmodule( self.chroot_context, workflow, wf_module, module_zipfile, {}, Tab(tab.slug, tab.name), RenderResult(), {}, self.output_path, )) email_delta.assert_not_called()
def test_email_delta(self, email_delta): workflow = Workflow.create_and_init() tab = workflow.tabs.first() wf_module = tab.wf_modules.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id - 1, notifications=True, ) rendercache.cache_render_result( workflow, wf_module, workflow.last_delta_id - 1, RenderResult(arrow_table({"A": [1]})), ) wf_module.last_relevant_delta_id = workflow.last_delta_id wf_module.save(update_fields=["last_relevant_delta_id"]) with arrow_table_context({"A": [2]}) as table2: def render(*args, **kwargs): return RenderResult(table2) with self._stub_module(render): self.run_with_async_db( execute_wfmodule( self.chroot_context, workflow, wf_module, {}, Tab(tab.slug, tab.name), RenderResult(), {}, self.output_path, )) email_delta.assert_called() delta = email_delta.call_args[0][0] self.assertEqual(delta.user, workflow.owner) self.assertEqual(delta.workflow, workflow) self.assertEqual(delta.wf_module, wf_module) self.assertEqual(delta.old_result, RenderResult(arrow_table({"A": [1]}))) self.assertEqual(delta.new_result, RenderResult(arrow_table({"A": [2]})))
def test_fetch_result_deleted_file_means_none(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.steps.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, ) with parquet_file({"A": [1]}) as path: so = create_stored_object(workflow.id, step.id, path) step.stored_data_version = so.stored_at step.save(update_fields=["stored_data_version"]) # Now delete the file on S3 -- but leave the DB pointing to it. s3.remove(s3.StoredObjectsBucket, so.key) def render(*args, fetch_result, **kwargs): self.assertIsNone(fetch_result) return RenderResult() module_zipfile = create_module_zipfile( "x", spec_kwargs={"loads_data": True}, python_code=textwrap.dedent(""" import pandas as pd def render(table, params, *, fetch_result, **kwargs): assert fetch_result is None return pd.DataFrame() """), ) with self.assertLogs(level=logging.INFO): self.run_with_async_db( execute_step( self.chroot_context, workflow, step, module_zipfile, {}, Tab(tab.slug, tab.name), RenderResult(), {}, self.output_path, ))
def test_email_delta_ignore_corrupt_cache_error(self, email_delta, read_cache): read_cache.side_effect = rendercache.CorruptCacheError workflow = Workflow.create_and_init() tab = workflow.tabs.first() wf_module = tab.wf_modules.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id - 1, notifications=True, ) # We need to actually populate the cache to set up the test. The code # under test will only try to open the render result if the database # says there's something there. rendercache.cache_render_result( workflow, wf_module, workflow.last_delta_id - 1, RenderResult(arrow_table({"A": [1]})), ) wf_module.last_relevant_delta_id = workflow.last_delta_id wf_module.save(update_fields=["last_relevant_delta_id"]) with arrow_table_context({"A": [2]}) as table2: def render(*args, **kwargs): return RenderResult(table2) with self._stub_module(render): with self.assertLogs(level=logging.ERROR): self.run_with_async_db( execute_wfmodule( self.chroot_context, workflow, wf_module, {}, Tab(tab.slug, tab.name), RenderResult(), {}, self.output_path, )) email_delta.assert_not_called()
def test_report_module_error(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.steps.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, ) module_zipfile = create_module_zipfile( "x", spec_kwargs={"loads_data": True}, python_code="def render(table, params):\n undefined()", ) with self.assertLogs(level=logging.INFO): result = self.run_with_async_db( execute_step( self.chroot_context, workflow, step, module_zipfile, {}, Tab(tab.slug, tab.name), RenderResult(), {}, self.output_path, )) assert_render_result_equals( result, RenderResult(errors=[ RenderError( I18nMessage( "py.renderer.execute.step.user_visible_bug_during_render", { "message": "exit code 1: NameError: name 'undefined' is not defined" }, None, )) ]), )
def test_clean_multicolumn_from_other_tab(self): tab2 = Tab("tab-2", "Tab 2") tab2_output_table = arrow_table({"A-from-tab-2": [1, 2]}) schema = ParamDType.Dict({ "tab": ParamDType.Tab(), "columns": ParamDType.Multicolumn(tab_parameter="tab"), }) params = {"tab": "tab-2", "columns": ["A-from-tab-1", "A-from-tab-2"]} context = self._render_context( input_table=arrow_table({"A-from-tab-1": [1]}), tab_results={tab2: RenderResult(tab2_output_table)}, params=params, ) result = clean_value(schema, params, context) # result['tab'] is not what we're testing here self.assertEqual(result["columns"], ["A-from-tab-2"])
def _load_tab_flows(workflow: Workflow, delta_id: int) -> List[TabFlow]: """Query `workflow` for each tab's `TabFlow` (ordered by tab position). Raise `ModuleError` or `ValueError` if migrate_params() fails. Failed migration means the whole execute can't happen. """ ret = [] with workflow.cooperative_lock(): # reloads workflow if workflow.last_delta_id != delta_id: raise UnneededExecution module_zipfiles = MODULE_REGISTRY.all_latest() for tab_model in workflow.live_tabs.all(): steps = [ _build_execute_step(step, module_zipfiles=module_zipfiles) for step in tab_model.live_steps.all() ] ret.append(TabFlow(Tab(tab_model.slug, tab_model.name), steps)) return ret
def render( self, compiled_module: CompiledModule, basedir: Path, input_table: ArrowTable, params: Params, tab: Tab, fetch_result: Optional[FetchResult], output_filename: str, ) -> RenderResult: request = ttypes.RenderRequest( str(basedir), input_table.to_thrift(), params.to_thrift(), tab.to_thrift(), None if fetch_result is None else fetch_result.to_thrift(), output_filename, ) with _chroot_dir_context(provide_paths=[basedir], extract_paths=[basedir / output_filename ]) as chroot: result = self._run_in_child( chroot=chroot, chroot_paths=[basedir] + DATA_PATHS + PARQUET_PATHS + NETWORKING_PATHS, # TODO nix networking compiled_module=compiled_module, timeout=self.render_timeout, result=ttypes.RenderResult(), function="render_thrift", args=[request], ) if result.table.filename and result.table.filename != output_filename: raise ModuleExitedError(0, "Module wrote to wrong output file") # RenderResult.from_thrift() verifies all filenames passed by the # module are in the directory the module has access to. render_result = RenderResult.from_thrift(result, basedir) if render_result.table.table is not None: validate(render_result.table.table, render_result.table.metadata) return render_result
def test_fetch_result_no_bucket_or_key_stored_object_means_none(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.steps.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, stored_data_version=datetime.datetime.now(), ) step.stored_objects.create(stored_at=step.stored_data_version, key="", size=0, hash="whatever") module_zipfile = create_module_zipfile( "x", spec_kwargs={"loads_data": True}, python_code=textwrap.dedent(""" import pandas as pd def render(table, params, *, fetch_result, **kwargs): assert fetch_result is None return pd.DataFrame() """), ) with self.assertLogs(level=logging.INFO): self.run_with_async_db( execute_step( self.chroot_context, workflow, step, module_zipfile, {}, Tab(tab.slug, tab.name), RenderResult(), {}, self.output_path, ))
def test_email_no_delta_when_errors_stay_the_same(self, email_delta): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.steps.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id - 1, notifications=True, ) # We need to actually populate the cache to set up the test. The code # under test will only try to open the render result if the database # says there's something there. rendercache.cache_render_result( workflow, step, workflow.last_delta_id - 1, RenderResult(errors=[ RenderError( I18nMessage("py.renderer.execute.step.noModule", {}, None)) ]), ) step.last_relevant_delta_id = workflow.last_delta_id step.save(update_fields=["last_relevant_delta_id"]) self.run_with_async_db( execute_step( self.chroot_context, workflow, step, None, # module_zipfile {}, Tab(tab.slug, tab.name), RenderResult(), {}, self.output_path, )) email_delta.assert_not_called() # error is the same error
def test_load_dynamic(self): code = b"def render(table, params):\n return table * 2" minio.client.put_object( Bucket=minio.ExternalModulesBucket, Key="imported/abcdef/imported.py", Body=code, ContentLength=len(code), ) with self.assertLogs("cjwstate.modules.loaded_module"): lm = LoadedModule.for_module_version( MockModuleVersion("imported", "abcdef", ParamDType.Dict({}), "now") ) self.assertEqual(lm.name, "imported:abcdef") # This ends up being kinda an integration test. with ExitStack() as ctx: basedir = Path(ctx.enter_context(tempdir_context(prefix="test-basedir-"))) basedir.chmod(0o755) input_table = ctx.enter_context( arrow_table_context({"A": [1]}, dir=basedir) ) input_table.path.chmod(0o644) output_tf = ctx.enter_context(tempfile.NamedTemporaryFile(dir=basedir)) ctx.enter_context(self.assertLogs("cjwstate.modules.loaded_module")) result = lm.render( basedir=basedir, input_table=input_table, params=Params({"col": "A"}), tab=Tab("tab-1", "Tab 1"), fetch_result=None, output_filename=Path(output_tf.name).name, ) assert_render_result_equals(result, RenderResult(arrow_table({"A": [2]})))
def test_render_with_tab_name(self): def render(table, params, *, tab_name): self.assertEqual(tab_name, "Tab X") self._test_render(render, tab=Tab("tab-1", "Tab X"))