def test_clean_tab_omit_unused_tabs_from_tab_outputs(self): result = self._call_prep_params( ParamSchema.Dict({"x": ParamSchema.Tab()}), {"x": "tab-1"}, tab_results={ Tab("tab-1", "Tab 1"): StepResult(Path("tab-1.arrow"), [TEXT("A")]), Tab("tab-2", "Tab 2"): StepResult(Path("tab-2.arrow"), [TEXT("A")]), Tab("tab-3", "Tab 3"): StepResult(Path("tab-3.arrow"), [TEXT("A")]), }, ) self.assertEqual(result.tab_outputs, {"tab-1": TabOutput("Tab 1", "tab-1.arrow")})
def test_execute_empty_tab(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() tab_flow = TabFlow(Tab(tab.slug, tab.name), []) with self._execute(workflow, tab_flow, {}) as (result, path): self.assertEqual(result, StepResult(path, [])) self.assertEqual(load_trusted_arrow_file(path), make_table())
def test_clean_tabs_tab_cycle(self): with self.assertRaises(TabCycleError): self._call_clean_value( ParamSchema.Multitab(), ["tab-1"], tab_results={Tab("tab-1", "Tab 1"): None}, )
def test_clean_tab_unreachable(self): tab = Tab("tab-error", "Buggy Tab") with self.assertRaises(TabOutputUnreachableError): self._call_clean_value( ParamSchema.Tab(), "tab-error", tab_results={tab: StepResult(Path("tab-error.arrow"), [])}, )
def test_clean_tabs_tab_unreachable(self): with self.assertRaises(TabOutputUnreachableError): self._call_clean_value( ParamSchema.Multitab(), ["tab-1"], tab_results={ Tab("tab-1", "Tab 1"): StepResult(Path("tab-1.arrow"), []) }, )
def test_resume_backtrack_on_corrupt_cache_error(self): module_zipfile = create_module_zipfile( "mod", spec_kwargs={"loads_data": True}) workflow = Workflow.create_and_init() tab = workflow.tabs.first() # step1: cached result is fresh -- but CORRUPT step1 = tab.steps.create( order=0, slug="step-1", module_id_name="mod", last_relevant_delta_id=workflow.last_delta_id, ) write_to_rendercache(workflow, step1, workflow.last_delta_id, make_table(make_column("A", [1]))) step1.refresh_from_db() s3.put_bytes( # Write corrupted data -- will lead to CorruptCacheError rendercache.io.BUCKET, rendercache.io.crr_parquet_key(step1.cached_render_result), b"CORRUPT", ) # step2: no cached result -- must re-render step2 = tab.steps.create(order=1, slug="step-2", module_id_name="mod") tab_flow = TabFlow( Tab(tab.slug, tab.name), [ ExecuteStep(step1, module_zipfile, {}), ExecuteStep(step2, module_zipfile, {}), ], ) new_table = make_table(make_column("B", ["b"])) with patch.object(Kernel, "render", side_effect=mock_render(new_table)): with self._execute(workflow, tab_flow, {}, expect_log_level=logging.ERROR) as (result, path): self.assertEqual( result, StepResult(path, [Column("B", ColumnType.Text())])) self.assertEqual( # called with step1, then step2 Kernel.render.call_count, 2, ) self.assertRegex( # Output is to the correct file Kernel.render.call_args[1]["output_filename"], r"execute-tab-output.*\.arrow", )
def test_execute_partial_cache_hit(self): module_zipfile = create_module_zipfile( "mod", spec_kwargs={"loads_data": True}) workflow = Workflow.create_and_init() tab = workflow.tabs.first() # step1: cached result is fresh. Should not render. step1 = tab.steps.create( order=0, slug="step-1", module_id_name="mod", last_relevant_delta_id=workflow.last_delta_id, ) write_to_rendercache(workflow, step1, workflow.last_delta_id, make_table(make_column("A", ["a"]))) # step2: cached result is stale, so must be re-rendered step2 = tab.steps.create( order=1, slug="step-2", module_id_name="mod", last_relevant_delta_id=workflow.last_delta_id, ) write_to_rendercache( workflow, step2, workflow.last_delta_id - 1, make_table(make_column("B", ["b"])), ) tab_flow = TabFlow( Tab(tab.slug, tab.name), [ ExecuteStep(step1, module_zipfile, {}), ExecuteStep(step2, module_zipfile, {}), ], ) new_table = make_table(make_column("C", ["c"])) with patch.object(Kernel, "render", side_effect=mock_render(new_table)): with self._execute(workflow, tab_flow, {}) as (result, path): self.assertEqual( result, StepResult(path, [Column("C", ColumnType.Text())])) assert_arrow_table_equals(load_trusted_arrow_file(path), new_table) Kernel.render.assert_called_once() # step2, not step1 self.assertRegex( # Output is to the correct file Kernel.render.call_args[1]["output_filename"], r"execute-tab-output.*\.arrow", )
def test_clean_tabs_happy_path(self): self.assertEqual( self._call_prep_params( ParamSchema.Dict({"x": ParamSchema.Multitab()}), {"x": ["tab-2", "tab-3"]}, tab_results={ Tab("tab-2", "Tab 2"): StepResult(Path("tab-2.arrow"), [NUMBER("B")]), Tab("tab-3", "Tab 3"): StepResult(Path("tab-3.arrow"), [NUMBER("C")]), }, ), PrepParamsResult( {"x": ["tab-2", "tab-3"]}, { "tab-2": TabOutput("Tab 2", "tab-2.arrow"), "tab-3": TabOutput("Tab 3", "tab-3.arrow"), }, uploaded_files={}, ), )
def test_clean_tabs_preserve_ordering(self): # "x" gives wrongly-ordered tabs; renderprep should reorder them. result = self._call_prep_params( ParamSchema.Dict({"x": ParamSchema.Multitab()}), {"x": ["tab-2", "tab-3"]}, tab_results={ Tab("tab-3", "Tab 3"): StepResult(Path("tab-3.arrow"), [NUMBER("C")]), Tab("tab-2", "Tab 2"): StepResult(Path("tab-2.arrow"), [NUMBER("B")]), }, ) self.assertEqual( result, PrepParamsResult( {"x": ["tab-3", "tab-2"]}, { "tab-3": TabOutput("Tab 3", "tab-3.arrow"), "tab-2": TabOutput("Tab 2", "tab-2.arrow"), }, uploaded_files={}, ), )
def test_clean_tab_happy_path(self): result = self._call_prep_params( ParamSchema.Dict({"x": ParamSchema.Tab()}), {"x": "tab-1"}, tab_results={ Tab("tab-1", "Tab 1"): StepResult(Path("tab-1.arrow"), [TEXT("A")]) }, ) self.assertEqual( result, PrepParamsResult( {"x": "tab-1"}, tab_outputs={"tab-1": TabOutput("Tab 1", "tab-1.arrow")}, uploaded_files={}, ), )
def test_clean_multicolumn_from_other_tab(self): schema = ParamSchema.Dict({ "tab": ParamSchema.Tab(), "columns": ParamSchema.Multicolumn(tab_parameter="tab"), }) params = {"tab": "tab-2", "columns": ["A-from-tab-1", "A-from-tab-2"]} result = self._call_prep_params( schema, params, input_table_columns=[NUMBER("A-from-tab-1")], tab_results={ Tab("tab-2", "Tab 2"): StepResult(Path("tab-2.arrow"), [NUMBER("A-from-tab-2")]) }, ) self.assertEqual(result.params["columns"], ["A-from-tab-2"])
def test_execute_cache_hit(self): cached_table1 = make_table(make_column("A", [1])) cached_table2 = make_table(make_column("B", [2], format="${:,}")) module_zipfile = create_module_zipfile( "mod", spec_kwargs={"loads_data": True}) workflow = Workflow.create_and_init() tab = workflow.tabs.first() step1 = tab.steps.create(order=0, slug="step-1", last_relevant_delta_id=workflow.last_delta_id) write_to_rendercache(workflow, step1, workflow.last_delta_id, cached_table1) step2 = tab.steps.create(order=1, slug="step-2", last_relevant_delta_id=workflow.last_delta_id) write_to_rendercache(workflow, step2, workflow.last_delta_id, cached_table2) tab_flow = TabFlow( Tab(tab.slug, tab.name), [ ExecuteStep(step1, module_zipfile, {}), ExecuteStep(step2, module_zipfile, {}), ], ) unwanted_table = make_table(make_column("No", ["bad"])) with patch.object(Kernel, "render", side_effect=mock_render(unwanted_table)): with self._execute(workflow, tab_flow, {}) as (result, path): self.assertEqual( result, StepResult( path, [Column("B", ColumnType.Number(format="${:,}"))]), ) assert_arrow_table_equals(load_trusted_arrow_file(path), cached_table2) Kernel.render.assert_not_called()
def test_execute_cache_miss(self): module_zipfile = create_module_zipfile( "mod", spec_kwargs={"loads_data": True}) workflow = Workflow.create_and_init() tab = workflow.tabs.first() step1 = tab.steps.create( order=0, slug="step-1", module_id_name="mod", last_relevant_delta_id=workflow.last_delta_id, ) step2 = tab.steps.create( order=1, slug="step-2", module_id_name="mod", last_relevant_delta_id=workflow.last_delta_id, ) tab_flow = TabFlow( Tab(tab.slug, tab.name), [ ExecuteStep(step1, module_zipfile, {}), ExecuteStep(step2, module_zipfile, {}), ], ) table = make_table(make_column("A", ["a"])) with patch.object(Kernel, "render", side_effect=mock_render(table)): with self._execute(workflow, tab_flow, {}) as (result, path): self.assertEqual( result, StepResult(path, [Column("A", ColumnType.Text())])) assert_arrow_table_equals(load_trusted_arrow_file(path), table) self.assertEqual(Kernel.render.call_count, 2) # step2, not step1 self.assertRegex( # Output is to the correct file Kernel.render.call_args[1]["output_filename"], r"execute-tab-output.*\.arrow", )
def test_clean_tab_cycle(self): tab = Tab("tab-1", "Tab 1") with self.assertRaises(TabCycleError): self._call_clean_value(ParamSchema.Tab(), "tab-1", tab_results={tab: None})