def test_duplicate_copies_fresh_cache(self): # The cache's filename depends on workflow_id and step_id. # Duplicating it would need more complex code :). table = make_table(make_column("A", [1], format="${:,.2f}")) write_to_rendercache( self.workflow, self.step, 1, table=table, errors=[RenderError(I18nMessage("X", {}, None))], json={"foo": "bar"}, ) workflow2 = Workflow.objects.create() tab2 = workflow2.tabs.create(position=0) dup = self.step.duplicate_into_new_workflow(tab2) dup_cached_result = dup.cached_render_result self.assertEqual( dup_cached_result, replace( self.step.cached_render_result, workflow_id=workflow2.id, step_id=dup.id, delta_id=0, ), ) with open_cached_render_result(dup_cached_result) as result2: assert_arrow_table_equals(result2.table, table) self.assertEqual(result2.errors, [RenderError(I18nMessage("X", {}, None))]) self.assertEqual(result2.json, {"foo": "bar"})
def test_execute_new_revision(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() create_module_zipfile( "mod", spec_kwargs={"loads_data": True}, python_code= 'import pandas as pd\ndef render(table, params): return pd.DataFrame({"B": [2]})', ) step = tab.steps.create( order=0, slug="step-1", last_relevant_delta_id=2, module_id_name="mod", ) # stale write_to_rendercache(workflow, step, 1, make_table(make_column("A", ["a"]))) self._execute(workflow) step.refresh_from_db() with open_cached_render_result(step.cached_render_result) as result: assert_arrow_table_equals(result.table, make_table(make_column("B", [2])))
def test_deprecated_current_table_json(self): write_to_rendercache( self.workflow, self.step2, 2, make_table(make_column("A", ["a", "b"])) ) response = self.client.get(f"/public/moduledata/live/{self.step2.id}.json") self.assertEqual(response.status_code, 200) self.assertEqual(read_streaming_json(response), [{"A": "a"}, {"A": "b"}])
def test_deprecated_current_table_csv(self): write_to_rendercache( self.workflow, self.step2, 2, make_table(make_column("A", ["a", "b"])) ) response = self.client.get(f"/public/moduledata/live/{self.step2.id}.csv") self.assertEqual(response.status_code, 200) self.assertEqual(b"".join(response.streaming_content), b"A\na\nb")
def test_tile_row_out_of_bounds(self): write_to_rendercache( self.workflow, self.step2, 2, make_table(make_column("A", [1])) ) response = self.client.get( f"/workflows/{self.workflow.id}/tiles/step-2/delta-2/1,0.json" ) self.assertEqual(response.status_code, status.NOT_FOUND) self.assertEqual(json.loads(response.content), {"error": "tile out of bounds"})
def test_json(self): write_to_rendercache( self.workflow, self.step2, 2, make_table(make_column("A", ["a", "b"])) ) response = self.client.get( f"/workflows/{self.workflow.id}/tiles/step-2/delta-2/0,0.json" ) self.assertEqual(response.status_code, 200) self.assertEqual(json.loads(response.content), {"rows": [["a"], ["b"]]})
def test_current_table_json(self): write_to_rendercache( self.workflow, self.step2, 2, make_table(make_column("A", ["a", "b"])) ) response = self.client.get( f"/workflows/{self.workflow.id}/steps/step-2/current-result-table.json" ) self.assertEqual(response.status_code, 200) self.assertEqual(read_streaming_json(response), [{"A": "a"}, {"A": "b"}])
def test_current_table_csv(self): write_to_rendercache( self.workflow, self.step2, 2, make_table(make_column("A", ["a", "b"])) ) response = self.client.get( f"/workflows/{self.workflow.id}/steps/step-2/current-result-table.csv" ) self.assertEqual(response.status_code, 200) self.assertEqual(b"".join(response.streaming_content), b"A\na\nb")
def test_empty_json(self): write_to_rendercache( self.workflow, self.step, 1, make_table(make_column("A", [1])), json={} ) response = self._request() self.assertEqual(response.status_code, status.NOT_FOUND) self.assertEqual( json.loads(response.content), {"error": "render result has no JSON"} )
def test_disallow_non_text(self): write_to_rendercache( self.workflow, self.step1, self.step1.last_relevant_delta_id, make_table(make_column("A", [1, 2, 3, 2, 1])), ) response = self._request("A") self.assertEqual(response.status_code, 200) self.assertEqual(json.loads(response.content), {"values": {}})
def test_resume_backtrack_on_corrupt_cache_error(self): module_zipfile = create_module_zipfile( "mod", spec_kwargs={"loads_data": True}) workflow = Workflow.create_and_init() tab = workflow.tabs.first() # step1: cached result is fresh -- but CORRUPT step1 = tab.steps.create( order=0, slug="step-1", module_id_name="mod", last_relevant_delta_id=workflow.last_delta_id, ) write_to_rendercache(workflow, step1, workflow.last_delta_id, make_table(make_column("A", [1]))) step1.refresh_from_db() s3.put_bytes( # Write corrupted data -- will lead to CorruptCacheError rendercache.io.BUCKET, rendercache.io.crr_parquet_key(step1.cached_render_result), b"CORRUPT", ) # step2: no cached result -- must re-render step2 = tab.steps.create(order=1, slug="step-2", module_id_name="mod") tab_flow = TabFlow( Tab(tab.slug, tab.name), [ ExecuteStep(step1, module_zipfile, {}), ExecuteStep(step2, module_zipfile, {}), ], ) new_table = make_table(make_column("B", ["b"])) with patch.object(Kernel, "render", side_effect=mock_render(new_table)): with self._execute(workflow, tab_flow, {}, expect_log_level=logging.ERROR) as (result, path): self.assertEqual( result, StepResult(path, [Column("B", ColumnType.Text())])) self.assertEqual( # called with step1, then step2 Kernel.render.call_count, 2, ) self.assertRegex( # Output is to the correct file Kernel.render.call_args[1]["output_filename"], r"execute-tab-output.*\.arrow", )
def test_execute_partial_cache_hit(self): module_zipfile = create_module_zipfile( "mod", spec_kwargs={"loads_data": True}) workflow = Workflow.create_and_init() tab = workflow.tabs.first() # step1: cached result is fresh. Should not render. step1 = tab.steps.create( order=0, slug="step-1", module_id_name="mod", last_relevant_delta_id=workflow.last_delta_id, ) write_to_rendercache(workflow, step1, workflow.last_delta_id, make_table(make_column("A", ["a"]))) # step2: cached result is stale, so must be re-rendered step2 = tab.steps.create( order=1, slug="step-2", module_id_name="mod", last_relevant_delta_id=workflow.last_delta_id, ) write_to_rendercache( workflow, step2, workflow.last_delta_id - 1, make_table(make_column("B", ["b"])), ) tab_flow = TabFlow( Tab(tab.slug, tab.name), [ ExecuteStep(step1, module_zipfile, {}), ExecuteStep(step2, module_zipfile, {}), ], ) new_table = make_table(make_column("C", ["c"])) with patch.object(Kernel, "render", side_effect=mock_render(new_table)): with self._execute(workflow, tab_flow, {}) as (result, path): self.assertEqual( result, StepResult(path, [Column("C", ColumnType.Text())])) assert_arrow_table_equals(load_trusted_arrow_file(path), new_table) Kernel.render.assert_called_once() # step2, not step1 self.assertRegex( # Output is to the correct file Kernel.render.call_args[1]["output_filename"], r"execute-tab-output.*\.arrow", )
def test_duplicate_ignores_stale_cache(self): # write to the wrong delta ID: "stale" write_to_rendercache(self.workflow, self.step, 5, make_table(make_column("A", [1]))) workflow2 = Workflow.objects.create() tab2 = workflow2.tabs.create(position=0) dup = self.step.duplicate_into_new_workflow(tab2) dup_cached_result = dup.cached_render_result self.assertIsNone(dup_cached_result) self.assertEqual(dup.cached_render_result_status, None)
def test_delete_step(self): write_to_rendercache( self.workflow, self.step, 1, table=make_table(make_column("A", [1])), errors=[RenderError(I18nMessage("X", {}, None), [])], json={"foo": "bar"}, ) parquet_key = crr_parquet_key(self.step.cached_render_result) self.step.delete() self.assertFalse(s3.exists(BUCKET, parquet_key))
def test_json(self): write_to_rendercache( self.workflow, self.step, 1, make_table(make_column("A", [1])), json={"hello": "world!"}, ) response = self._request() self.assertEqual(response.status_code, status.OK) self.assertEqual(json.loads(response.content), {"hello": "world!"})
def test_str(self): write_to_rendercache( self.workflow, self.step1, self.step1.last_relevant_delta_id, make_table(make_column("A", ["a", "b", "b", "a", "c", None])), ) response = self._request("A") self.assertEqual(response.status_code, 200) self.assertEqual( json.loads(response.content), {"values": {"a": 2, "b": 2, "c": 1}} )
def test_corrupt_cache_error(self): write_to_rendercache( self.workflow, self.step2, 2, make_table(make_column("A", [1])) ) delete_parquet_files_for_step(self.workflow.id, self.step2.id) response = self.client.get( f"/workflows/{self.workflow.id}/tiles/step-2/delta-2/0,0.json" ) self.assertEqual(response.status_code, status.NOT_FOUND) self.assertEqual( json.loads(response.content), {"error": "result went away; please try again with another delta_id"}, )
def test_wrong_column(self): write_to_rendercache( self.workflow, self.step1, self.step1.last_relevant_delta_id, make_table(make_column("A", ["a"])), ) response = self._request("B") self.assertEqual(response.status_code, status.NOT_FOUND) self.assertEqual( json.loads(response.content), {"error": 'column "B" not found'} )
def test_init_state(self): create_module_zipfile("chart", spec_kwargs={"html_output": True}, html="hi") write_to_rendercache( self.workflow, self.step, 1, make_table(make_column("A", [1])), json={} ) with self.assertLogs("cjwstate.params", level="INFO"): response = self._request() self.assertEqual(response.status_code, status.OK) init_state_match = re.search(br"window.initState =([^\n]*)", response.content) init_state = json.loads(init_state_match.group(1)) self.assertEqual(init_state["workflow"]["id"], self.workflow.id) self.assertEqual(init_state["step"]["module"], "chart") self.assertEqual(init_state["step"]["slug"], "step-1")
def test_load_input_cached_render_result(self): input_table = make_table(make_column("A", [1])) workflow = Workflow.create_and_init() step1 = workflow.tabs.first().steps.create( order=0, slug="step-1", last_relevant_delta_id=workflow.last_delta_id) write_to_rendercache(workflow, step1, workflow.last_delta_id, input_table) step2 = workflow.tabs.first().steps.create(order=1, slug="step-2") result = self.run_with_async_db( fetch.load_database_objects(workflow.id, step2.id)) self.assertEqual(result[4], step1.cached_render_result) self.assertEqual(result.input_cached_render_result, step1.cached_render_result)
def test_email_delta_when_errors_change(self, email_delta): user = create_test_user() workflow = Workflow.create_and_init(owner_id=user.id) tab = workflow.tabs.first() step = tab.steps.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, notifications=True, ) # We need to actually populate the cache to set up the test. The code # under test will only try to open the render result if the database # says there's something there. write_to_rendercache( workflow, step, workflow.last_delta_id - 1, # stale table=make_table(), errors=[ RenderError(I18nMessage("py.renderer.execute.step.noModule", {}, None)) ], ) module_zipfile = create_module_zipfile( "x", spec_kwargs={"loads_data": True}, # returns different error python_code='import pandas as pd\ndef render(table, params): return [{"id": "err"}]', ) with self.assertLogs(level=logging.INFO): self.run_with_async_db( execute_step( chroot_context=self.chroot_context, workflow=workflow, step=step, module_zipfile=module_zipfile, params={}, tab_name=tab.name, input_path=self.empty_table_path, input_table_columns=[], tab_results={}, output_path=self.output_path, ) ) email_delta.assert_called() # there's new data
def test_auth_report_viewer_allowed_auto_report_chart(self): write_to_rendercache( self.workflow, self.step, 1, make_table(make_column("A", [1])), json={"hello": "world!"}, ) user = create_test_user("alice", "*****@*****.**") self.workflow.acl.create(email="*****@*****.**", role=Role.REPORT_VIEWER) self.client.force_login(user) create_module_zipfile("chart", spec_kwargs={"html_output": True}) self.step.module_id_name = "chart" self.step.save(update_fields=["module_id_name"]) response = self._request() self.assertEqual(response.status_code, 200, "Should have access to Chart step")
def test_current_table_zero_columns(self): write_to_rendercache(self.workflow, self.step2, 2, make_table()) # CSV response = self.client.get( f"/workflows/{self.workflow.id}/steps/step-2/current-result-table.csv" ) self.assertEqual(response.status_code, status.OK) self.assertEqual(list(response.streaming_content), []) # JSON response = self.client.get( f"/workflows/{self.workflow.id}/steps/step-2/current-result-table.json" ) self.assertEqual(response.status_code, status.OK) self.assertEqual(read_streaming_json(response), [])
def test_workflow_view_triggers_render_if_stale_cache(self): step = self.tab1.steps.create( order=0, slug="step-1", last_relevant_delta_id=1, cached_render_result_delta_id=1, ) # Cache a stale result write_to_rendercache(self.workflow1, step, 1, make_table(make_column("A", ["a"]))) step.last_relevant_delta_id = 2 step.save(update_fields=["last_relevant_delta_id"]) self.client.force_login(self.user) self.client.get("/workflows/%d/" % self.workflow1.id) self.queue_render.assert_called_with(self.workflow1.id, self.workflow1.last_delta_id)
def test_cached_result_has_wrong_delta_id(self): write_to_rendercache( self.workflow, self.step2, 2, make_table(make_column("A", [1])) ) self.step2.cached_render_result_delta_id = 3 self.step2.last_relevant_delta_id = 3 self.step2.save( update_fields=["cached_render_result_delta_id", "last_relevant_delta_id"] ) response = self.client.get( f"/workflows/{self.workflow.id}/tiles/step-2/delta-2/0,0.json" ) self.assertEqual(response.status_code, status.NOT_FOUND) self.assertEqual( json.loads(response.content), {"error": "delta_id result not cached"} )
def test_cached_result_has_wrong_delta_id(self): write_to_rendercache( self.workflow, self.step, 1, make_table(make_column("A", [1])), json={"hello": "world"}, ) self.step.last_relevant_delta_id = 3 self.step.save(update_fields=["last_relevant_delta_id"]) response = self._request() self.assertEqual(response.status_code, status.NOT_FOUND) self.assertEqual( json.loads(response.content), {"error": "render result not in cache"} )
def test_email_delta_ignore_corrupt_cache_error(self, email_delta, read_cache): user = create_test_user() workflow = Workflow.create_and_init(owner_id=user.id) tab = workflow.tabs.first() step = tab.steps.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, notifications=True, ) # We need to actually populate the cache to set up the test. The code # under test will only try to open the render result if the database # says there's something there. write_to_rendercache( workflow, step, workflow.last_delta_id - 1, # stale make_table(make_column("A", [1])), ) read_cache.side_effect = rendercache.CorruptCacheError module_zipfile = create_module_zipfile( "x", spec_kwargs={"loads_data": True}, # returns different data -- but CorruptCacheError means we won't care. python_code='import pandas as pd\ndef render(table, params): return pd.DataFrame({"A": [2]})', ) with self.assertLogs(level=logging.ERROR): self.run_with_async_db( execute_step( chroot_context=self.chroot_context, workflow=workflow, step=step, module_zipfile=module_zipfile, params={}, tab_name=tab.name, input_path=self.empty_table_path, input_table_columns=[], tab_results={}, output_path=self.output_path, ) ) email_delta.assert_not_called()
def test_auth_report_viewer_allowed_custom_report_chart(self): write_to_rendercache( self.workflow, self.step, 1, make_table(make_column("A", [1])), json={"hello": "world!"}, ) user = create_test_user("alice", "*****@*****.**") self.workflow.acl.create(email="*****@*****.**", role=Role.REPORT_VIEWER) self.workflow.has_custom_report = True self.workflow.save(update_fields=["has_custom_report"]) self.workflow.blocks.create( position=0, slug="block-1", block_type="Chart", step_id=self.step.id ) self.client.force_login(user) response = self._request() self.assertEqual(response.status_code, 200, "Should have access to Chart step")
def test_email_delta(self, email_delta): user = create_test_user() workflow = Workflow.create_and_init(owner_id=user.id) tab = workflow.tabs.first() step = tab.steps.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, notifications=True, ) write_to_rendercache( workflow, step, workflow.last_delta_id - 1, # stale make_table(make_column("A", [1])), ) module_zipfile = create_module_zipfile( "x", spec_kwargs={"loads_data": True}, python_code='import pandas as pd\ndef render(table, params): return pd.DataFrame({"A": [2]})', ) with self.assertLogs(level=logging.INFO): self.run_with_async_db( execute_step( chroot_context=self.chroot_context, workflow=workflow, step=step, module_zipfile=module_zipfile, params={}, tab_name=tab.name, input_path=self.empty_table_path, input_table_columns=[], tab_results={}, output_path=self.output_path, ) ) email_delta.assert_called() delta = email_delta.call_args[0][0] self.assertEqual(delta.user, workflow.owner) self.assertEqual(delta.workflow, workflow) self.assertEqual(delta.step, step)
def test_email_delta_when_fresh_crr_is_unreachable(self, email_delta): user = create_test_user() workflow = Workflow.create_and_init(owner_id=user.id) tab = workflow.tabs.first() step = tab.steps.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, notifications=True, ) write_to_rendercache( workflow, step, workflow.last_delta_id - 1, make_table(make_column("A", [1])), ) module_zipfile = create_module_zipfile( "x", spec_kwargs={"loads_data": True}, # returns empty result -- meaning, "unreachable" python_code="import pandas as pd\ndef render(table, params): return pd.DataFrame({})", ) with self.assertLogs(level=logging.INFO): self.run_with_async_db( execute_step( chroot_context=self.chroot_context, workflow=workflow, step=step, module_zipfile=module_zipfile, params={}, tab_name=tab.name, input_path=self.empty_table_path, input_table_columns=[], tab_results={}, output_path=self.output_path, ) ) email_delta.assert_called() # there's new data -- or, well, non-data