def test_execute_empty_tab(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() tab_flow = TabFlow(Tab(tab.slug, tab.name), []) with self._execute(workflow, tab_flow, {}) as (result, path): self.assertEqual(result, StepResult(path, [])) self.assertEqual(load_trusted_arrow_file(path), make_table())
def test_resume_backtrack_on_corrupt_cache_error(self): module_zipfile = create_module_zipfile("mod") workflow = Workflow.create_and_init() tab = workflow.tabs.first() # step1: cached result is fresh -- but CORRUPT step1 = tab.wf_modules.create( order=0, slug="step-1", module_id_name="mod", last_relevant_delta_id=workflow.last_delta_id, ) rendercache.cache_render_result( workflow, step1, workflow.last_delta_id, RenderResult(arrow_table({"A": [1]})), ) minio.put_bytes( # Write corrupted data -- will lead to CorruptCacheError rendercache.io.BUCKET, rendercache.io.crr_parquet_key(step1.cached_render_result), b"CORRUPT", ) # step2: no cached result -- must re-render step2 = tab.wf_modules.create(order=1, slug="step-2", module_id_name="mod") tab_flow = TabFlow( tab.to_arrow(), [ ExecuteStep(step1, module_zipfile, {}), ExecuteStep(step2, module_zipfile, {}), ], ) with patch.object(Kernel, "render", side_effect=mock_render({"B": [2]})): with self._execute(workflow, tab_flow, {}, expect_log_level=logging.ERROR) as result: expected = RenderResult(arrow_table({"B": [2]})) assert_render_result_equals(result, expected) self.assertEqual( # called with step1, then step2 Kernel.render.call_count, 2, ) self.assertRegex( # Output is to the correct file Kernel.render.call_args[1]["output_filename"], r"execute-tab-output.*\.arrow", )
def test_execute_partial_cache_hit(self): module_zipfile = create_module_zipfile("mod") workflow = Workflow.create_and_init() tab = workflow.tabs.first() # step1: cached result is fresh. Should not render. step1 = tab.wf_modules.create( order=0, slug="step-1", module_id_name="mod", last_relevant_delta_id=workflow.last_delta_id, ) rendercache.cache_render_result( workflow, step1, workflow.last_delta_id, RenderResult(arrow_table({"A": [1]})), ) # step2: cached result is stale, so must be re-rendered step2 = tab.wf_modules.create( order=1, slug="step-2", module_id_name="mod", last_relevant_delta_id=workflow.last_delta_id - 1, ) rendercache.cache_render_result( workflow, step2, workflow.last_delta_id - 1, RenderResult(arrow_table({"B": [2]})), ) step2.last_relevant_delta_id = workflow.last_delta_id step2.save(update_fields=["last_relevant_delta_id"]) tab_flow = TabFlow( tab.to_arrow(), [ ExecuteStep(step1, module_zipfile, {}), ExecuteStep(step2, module_zipfile, {}), ], ) with patch.object(Kernel, "render", side_effect=mock_render({"B": [3]})): with self._execute(workflow, tab_flow, {}) as result: expected = RenderResult(arrow_table({"B": [3]})) assert_render_result_equals(result, expected) Kernel.render.assert_called_once() # step2, not step1 self.assertRegex( # Output is to the correct file Kernel.render.call_args[1]["output_filename"], r"execute-tab-output.*\.arrow", )
def test_execute_partial_cache_hit(self, fake_load_module): ModuleVersion.create_or_replace_from_spec( {"id_name": "mod", "name": "Mod", "category": "Clean", "parameters": []} ) workflow = Workflow.create_and_init() tab = workflow.tabs.first() # step1: cached result is fresh. Should not render. step1 = tab.wf_modules.create( order=0, slug="step-1", module_id_name="mod", last_relevant_delta_id=workflow.last_delta_id, ) rendercache.cache_render_result( workflow, step1, workflow.last_delta_id, RenderResult(arrow_table({"A": [1]})), ) # step2: cached result is stale, so must be re-rendered step2 = tab.wf_modules.create( order=1, slug="step-2", module_id_name="mod", last_relevant_delta_id=workflow.last_delta_id - 1, ) rendercache.cache_render_result( workflow, step2, workflow.last_delta_id - 1, RenderResult(arrow_table({"B": [2]})), ) step2.last_relevant_delta_id = workflow.last_delta_id step2.save(update_fields=["last_relevant_delta_id"]) tab_flow = TabFlow( tab.to_arrow(), [ ExecuteStep(step1, ParamDType.Dict({}), {}), ExecuteStep(step2, ParamDType.Dict({}), {}), ], ) expected = RenderResult(arrow_table({"B": [3]})) fake_load_module.return_value.render.return_value = expected with self._execute(workflow, tab_flow, {}) as result: assert_render_result_equals(result, expected) fake_load_module.return_value.render.assert_called_once() # step2, not step1 self.assertRegex( # Output is to the correct file fake_load_module.return_value.render.call_args[1]["output_filename"], r"execute-tab-output.*\.arrow", )
def test_resume_backtrack_on_corrupt_cache_error(self, fake_load_module): ModuleVersion.create_or_replace_from_spec( {"id_name": "mod", "name": "Mod", "category": "Clean", "parameters": []} ) workflow = Workflow.create_and_init() tab = workflow.tabs.first() # step1: cached result is fresh -- but CORRUPT step1 = tab.wf_modules.create( order=0, slug="step-1", module_id_name="mod", last_relevant_delta_id=workflow.last_delta_id, ) rendercache.cache_render_result( workflow, step1, workflow.last_delta_id, RenderResult(arrow_table({"A": [1]})), ) minio.put_bytes( # Write corrupted data -- will lead to CorruptCacheError rendercache.io.BUCKET, rendercache.io.crr_parquet_key(step1.cached_render_result), b"CORRUPT", ) # step2: no cached result -- must re-render step2 = tab.wf_modules.create(order=1, slug="step-2", module_id_name="mod") tab_flow = TabFlow( tab.to_arrow(), [ ExecuteStep(step1, ParamDType.Dict({}), {}), ExecuteStep(step2, ParamDType.Dict({}), {}), ], ) expected = RenderResult(arrow_table({"B": [2]})) fake_load_module.return_value.render.return_value = expected with self._execute( workflow, tab_flow, {}, expect_log_level=logging.ERROR ) as result: assert_render_result_equals(result, expected) self.assertEqual( # called with step1, then step2 fake_load_module.return_value.render.call_count, 2, ) self.assertRegex( # Output is to the correct file fake_load_module.return_value.render.call_args[1]["output_filename"], r"execute-tab-output.*\.arrow", )
def test_resume_backtrack_on_corrupt_cache_error(self): module_zipfile = create_module_zipfile( "mod", spec_kwargs={"loads_data": True}) workflow = Workflow.create_and_init() tab = workflow.tabs.first() # step1: cached result is fresh -- but CORRUPT step1 = tab.steps.create( order=0, slug="step-1", module_id_name="mod", last_relevant_delta_id=workflow.last_delta_id, ) write_to_rendercache(workflow, step1, workflow.last_delta_id, make_table(make_column("A", [1]))) step1.refresh_from_db() s3.put_bytes( # Write corrupted data -- will lead to CorruptCacheError rendercache.io.BUCKET, rendercache.io.crr_parquet_key(step1.cached_render_result), b"CORRUPT", ) # step2: no cached result -- must re-render step2 = tab.steps.create(order=1, slug="step-2", module_id_name="mod") tab_flow = TabFlow( Tab(tab.slug, tab.name), [ ExecuteStep(step1, module_zipfile, {}), ExecuteStep(step2, module_zipfile, {}), ], ) new_table = make_table(make_column("B", ["b"])) with patch.object(Kernel, "render", side_effect=mock_render(new_table)): with self._execute(workflow, tab_flow, {}, expect_log_level=logging.ERROR) as (result, path): self.assertEqual( result, StepResult(path, [Column("B", ColumnType.Text())])) self.assertEqual( # called with step1, then step2 Kernel.render.call_count, 2, ) self.assertRegex( # Output is to the correct file Kernel.render.call_args[1]["output_filename"], r"execute-tab-output.*\.arrow", )
def test_execute_partial_cache_hit(self): module_zipfile = create_module_zipfile( "mod", spec_kwargs={"loads_data": True}) workflow = Workflow.create_and_init() tab = workflow.tabs.first() # step1: cached result is fresh. Should not render. step1 = tab.steps.create( order=0, slug="step-1", module_id_name="mod", last_relevant_delta_id=workflow.last_delta_id, ) write_to_rendercache(workflow, step1, workflow.last_delta_id, make_table(make_column("A", ["a"]))) # step2: cached result is stale, so must be re-rendered step2 = tab.steps.create( order=1, slug="step-2", module_id_name="mod", last_relevant_delta_id=workflow.last_delta_id, ) write_to_rendercache( workflow, step2, workflow.last_delta_id - 1, make_table(make_column("B", ["b"])), ) tab_flow = TabFlow( Tab(tab.slug, tab.name), [ ExecuteStep(step1, module_zipfile, {}), ExecuteStep(step2, module_zipfile, {}), ], ) new_table = make_table(make_column("C", ["c"])) with patch.object(Kernel, "render", side_effect=mock_render(new_table)): with self._execute(workflow, tab_flow, {}) as (result, path): self.assertEqual( result, StepResult(path, [Column("C", ColumnType.Text())])) assert_arrow_table_equals(load_trusted_arrow_file(path), new_table) Kernel.render.assert_called_once() # step2, not step1 self.assertRegex( # Output is to the correct file Kernel.render.call_args[1]["output_filename"], r"execute-tab-output.*\.arrow", )
def test_execute_cache_hit(self): cached_table1 = make_table(make_column("A", [1])) cached_table2 = make_table(make_column("B", [2], format="${:,}")) module_zipfile = create_module_zipfile( "mod", spec_kwargs={"loads_data": True}) workflow = Workflow.create_and_init() tab = workflow.tabs.first() step1 = tab.steps.create(order=0, slug="step-1", last_relevant_delta_id=workflow.last_delta_id) write_to_rendercache(workflow, step1, workflow.last_delta_id, cached_table1) step2 = tab.steps.create(order=1, slug="step-2", last_relevant_delta_id=workflow.last_delta_id) write_to_rendercache(workflow, step2, workflow.last_delta_id, cached_table2) tab_flow = TabFlow( Tab(tab.slug, tab.name), [ ExecuteStep(step1, module_zipfile, {}), ExecuteStep(step2, module_zipfile, {}), ], ) unwanted_table = make_table(make_column("No", ["bad"])) with patch.object(Kernel, "render", side_effect=mock_render(unwanted_table)): with self._execute(workflow, tab_flow, {}) as (result, path): self.assertEqual( result, StepResult( path, [Column("B", ColumnType.Number(format="${:,}"))]), ) assert_arrow_table_equals(load_trusted_arrow_file(path), cached_table2) Kernel.render.assert_not_called()
def test_execute_cache_miss(self, fake_load_module): ModuleVersion.create_or_replace_from_spec( {"id_name": "mod", "name": "Mod", "category": "Clean", "parameters": []} ) workflow = Workflow.create_and_init() tab = workflow.tabs.first() step1 = tab.wf_modules.create( order=0, slug="step-1", module_id_name="mod", last_relevant_delta_id=workflow.last_delta_id, ) step2 = tab.wf_modules.create( order=1, slug="step-2", module_id_name="mod", last_relevant_delta_id=workflow.last_delta_id, ) tab_flow = TabFlow( tab.to_arrow(), [ ExecuteStep(step1, ParamDType.Dict({}), {}), ExecuteStep(step2, ParamDType.Dict({}), {}), ], ) expected = RenderResult(arrow_table({"B": [2]})) fake_load_module.return_value.render.return_value = expected with self._execute(workflow, tab_flow, {}) as result: assert_render_result_equals(result, expected) self.assertEqual( fake_load_module.return_value.render.call_count, 2 # step2, not step1 ) self.assertRegex( # Output is to the correct file fake_load_module.return_value.render.call_args[1]["output_filename"], r"execute-tab-output.*\.arrow", )
def test_execute_cache_miss(self): module_zipfile = create_module_zipfile( "mod", spec_kwargs={"loads_data": True}) workflow = Workflow.create_and_init() tab = workflow.tabs.first() step1 = tab.steps.create( order=0, slug="step-1", module_id_name="mod", last_relevant_delta_id=workflow.last_delta_id, ) step2 = tab.steps.create( order=1, slug="step-2", module_id_name="mod", last_relevant_delta_id=workflow.last_delta_id, ) tab_flow = TabFlow( Tab(tab.slug, tab.name), [ ExecuteStep(step1, module_zipfile, {}), ExecuteStep(step2, module_zipfile, {}), ], ) table = make_table(make_column("A", ["a"])) with patch.object(Kernel, "render", side_effect=mock_render(table)): with self._execute(workflow, tab_flow, {}) as (result, path): self.assertEqual( result, StepResult(path, [Column("A", ColumnType.Text())])) assert_arrow_table_equals(load_trusted_arrow_file(path), table) self.assertEqual(Kernel.render.call_count, 2) # step2, not step1 self.assertRegex( # Output is to the correct file Kernel.render.call_args[1]["output_filename"], r"execute-tab-output.*\.arrow", )
def test_execute_cache_hit(self): module_zipfile = create_module_zipfile("mod") workflow = Workflow.create_and_init() tab = workflow.tabs.first() step1 = tab.wf_modules.create( order=0, slug="step-1", last_relevant_delta_id=workflow.last_delta_id) rendercache.cache_render_result( workflow, step1, workflow.last_delta_id, RenderResult(arrow_table({"A": [1]})), ) step2 = tab.wf_modules.create( order=1, slug="step-2", last_relevant_delta_id=workflow.last_delta_id) rendercache.cache_render_result( workflow, step2, workflow.last_delta_id, RenderResult(arrow_table({"B": [2]})), ) tab_flow = TabFlow( tab.to_arrow(), [ ExecuteStep(step1, module_zipfile, {}), ExecuteStep(step2, module_zipfile, {}), ], ) with patch.object(Kernel, "render", side_effect=mock_render({"No": ["bad"]})): with self._execute(workflow, tab_flow, {}) as result: assert_render_result_equals( result, RenderResult(arrow_table({"B": [2]}), []))
def test_execute_cache_miss(self): module_zipfile = create_module_zipfile("mod") workflow = Workflow.create_and_init() tab = workflow.tabs.first() step1 = tab.wf_modules.create( order=0, slug="step-1", module_id_name="mod", last_relevant_delta_id=workflow.last_delta_id, ) step2 = tab.wf_modules.create( order=1, slug="step-2", module_id_name="mod", last_relevant_delta_id=workflow.last_delta_id, ) tab_flow = TabFlow( tab.to_arrow(), [ ExecuteStep(step1, module_zipfile, {}), ExecuteStep(step2, module_zipfile, {}), ], ) with patch.object(Kernel, "render", side_effect=mock_render({"B": [2]})): with self._execute(workflow, tab_flow, {}) as result: expected = RenderResult(arrow_table({"B": [2]})) assert_render_result_equals(result, expected) self.assertEqual(Kernel.render.call_count, 2) # step2, not step1 self.assertRegex( # Output is to the correct file Kernel.render.call_args[1]["output_filename"], r"execute-tab-output.*\.arrow", )
def test_execute_cache_hit(self, fake_module): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step1 = tab.wf_modules.create( order=0, slug="step-1", last_relevant_delta_id=workflow.last_delta_id ) rendercache.cache_render_result( workflow, step1, workflow.last_delta_id, RenderResult(arrow_table({"A": [1]})), ) step2 = tab.wf_modules.create( order=1, slug="step-2", last_relevant_delta_id=workflow.last_delta_id ) rendercache.cache_render_result( workflow, step2, workflow.last_delta_id, RenderResult(arrow_table({"B": [2]})), ) tab_flow = TabFlow( tab.to_arrow(), [ ExecuteStep(step1, ParamDType.Dict({}), {}), ExecuteStep(step2, ParamDType.Dict({}), {}), ], ) with self._execute(workflow, tab_flow, {}) as result: assert_render_result_equals( result, RenderResult(arrow_table({"B": [2]}), []) ) fake_module.assert_not_called()
def test_execute_empty_tab(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() tab_flow = TabFlow(tab.to_arrow(), []) with self._execute(workflow, tab_flow, {}) as result: assert_render_result_equals(result, RenderResult())