def test_clean_tab_tab_delete_race_raises_unneededexecution(self): """ If a user deletes the tab during render, raise UnneededExecution. It doesn't really matter _what_ the return value is, since the render() result will never be saved if this WfModule's delta has changed. UnneededExecution just seems like the quickest way out of this mess: it's an error the caller is meant to raise anyway, unlike `Tab.DoesNotExist`. """ # tab_output is what 'render' _thinks_ the output should be tab_output = ProcessResult(pd.DataFrame({'A': [1, 2]})) workflow = Workflow.create_and_init() tab = workflow.tabs.first() wfm = tab.wf_modules.create( order=0, last_relevant_delta_id=workflow.last_delta_id) wfm.cache_render_result(workflow.last_delta_id, tab_output) tab.is_deleted = True tab.save(update_fields=['is_deleted']) # Simulate reality: wfm.last_relevant_delta_id will change wfm.last_relevant_delta_id += 1 wfm.save(update_fields=['last_relevant_delta_id']) context = RenderContext( workflow.id, None, None, { tab.slug: StepResultShape('ok', tab_output.table_shape), }, None) with self.assertRaises(UnneededExecution): clean_value(ParamDType.Tab(), tab.slug, context)
def test_clean_tab_wf_module_changed_raises_unneededexecution(self): """ If a user changes tabs' output during render, raise UnneededExecution. It doesn't really matter _what_ the return value is, since the render() result will never be saved if this WfModule's delta has changed. UnneededExecution seems like the simplest contract to enforce. """ # tab_output is what 'render' _thinks_ the output should be tab_output = ProcessResult(pd.DataFrame({'A': [1, 2]})) workflow = Workflow.create_and_init() tab = workflow.tabs.first() wfm = tab.wf_modules.create( order=0, last_relevant_delta_id=workflow.last_delta_id) wfm.cache_render_result(workflow.last_delta_id, tab_output) # Simulate reality: wfm.last_relevant_delta_id will change wfm.last_relevant_delta_id += 1 wfm.save(update_fields=['last_relevant_delta_id']) context = RenderContext( workflow.id, None, None, { tab.slug: StepResultShape('ok', tab_output.table_shape), }, None) with self.assertRaises(UnneededExecution): clean_value(ParamDType.Tab(), tab.slug, context)
def test_clean_multicolumn_from_other_tab(self): tab_output = ProcessResult(pd.DataFrame({'A-from-tab-2': [1, 2]})) workflow = Workflow.create_and_init() tab = workflow.tabs.first() wfm = tab.wf_modules.create( order=0, last_relevant_delta_id=workflow.last_delta_id) wfm.cache_render_result(workflow.last_delta_id, tab_output) schema = ParamDType.Dict({ 'tab': ParamDType.Tab(), 'columns': ParamDType.Multicolumn(tab_parameter='tab'), }) params = {'tab': tab.slug, 'columns': ['A-from-tab-1', 'A-from-tab-2']} context = RenderContext( workflow.id, None, TableShape(3, [ Column('A-from-tab-1', ColumnType.NUMBER()), ]), { tab.slug: StepResultShape('ok', tab_output.table_shape), }, params) result = clean_value(schema, params, context) # result['tab'] is not what we're testing here self.assertEqual(result['columns'], ['A-from-tab-2'])
def test_clean_multicolumn_from_other_tab(self): tab_output = ProcessResult(pd.DataFrame({"A-from-tab-2": [1, 2]})) workflow = Workflow.create_and_init() tab = workflow.tabs.first() wfm = tab.wf_modules.create( order=0, slug="step-1", last_relevant_delta_id=workflow.last_delta_id) wfm.cache_render_result(workflow.last_delta_id, tab_output) schema = ParamDType.Dict({ "tab": ParamDType.Tab(), "columns": ParamDType.Multicolumn(tab_parameter="tab"), }) params = {"tab": tab.slug, "columns": ["A-from-tab-1", "A-from-tab-2"]} context = RenderContext( workflow.id, None, TableShape(3, [Column("A-from-tab-1", ColumnType.NUMBER())]), {tab.slug: StepResultShape("ok", tab_output.table_shape)}, params, ) result = clean_value(schema, params, context) # result['tab'] is not what we're testing here self.assertEqual(result["columns"], ["A-from-tab-2"])
def test_clean_tabs_preserve_ordering(self): tab2_output = ProcessResult(pd.DataFrame({"A": [1, 2]})) tab3_output = ProcessResult(pd.DataFrame({"B": [2, 3]})) workflow = Workflow.create_and_init() tab1 = workflow.tabs.first() tab2 = workflow.tabs.create(position=1, slug="tab-2", name="Tab 2") tab3 = workflow.tabs.create(position=1, slug="tab-3", name="Tab 3") wfm2 = tab2.wf_modules.create( order=0, slug="step-1", last_relevant_delta_id=workflow.last_delta_id) wfm2.cache_render_result(workflow.last_delta_id, tab2_output) wfm3 = tab3.wf_modules.create( order=0, slug="step-2", last_relevant_delta_id=workflow.last_delta_id) wfm3.cache_render_result(workflow.last_delta_id, tab3_output) # RenderContext's dict ordering determines desired tab order. (Python # 3.7 spec: dict is ordered in insertion order. CPython 3.6 and PyPy 7 # do this, too.) context = RenderContext( workflow.id, None, None, { tab1.slug: None, tab2.slug: StepResultShape("ok", tab2_output.table_shape), tab3.slug: StepResultShape("ok", tab3_output.table_shape), }, None, ) # Supply wrongly-ordered tabs. Cleaned, they should be in order. result = clean_value(ParamDType.Multitab(), [tab3.slug, tab2.slug], context) self.assertEqual(result[0].slug, tab2.slug) self.assertEqual(result[0].name, tab2.name) self.assertEqual(result[0].columns, {"A": RenderColumn("A", "number", "{:,}")}) assert_frame_equal(result[0].dataframe, pd.DataFrame({"A": [1, 2]})) self.assertEqual(result[1].slug, tab3.slug) self.assertEqual(result[1].name, tab3.name) self.assertEqual(result[1].columns, {"B": RenderColumn("B", "number", "{:,}")}) assert_frame_equal(result[1].dataframe, pd.DataFrame({"B": [2, 3]}))
def test_clean_tabs_preserve_ordering(self): tab2_output = ProcessResult(pd.DataFrame({'A': [1, 2]})) tab3_output = ProcessResult(pd.DataFrame({'B': [2, 3]})) workflow = Workflow.create_and_init() tab1 = workflow.tabs.first() tab2 = workflow.tabs.create(position=1, slug='tab-2', name='Tab 2') tab3 = workflow.tabs.create(position=1, slug='tab-3', name='Tab 3') wfm2 = tab2.wf_modules.create( order=0, last_relevant_delta_id=workflow.last_delta_id) wfm2.cache_render_result(workflow.last_delta_id, tab2_output) wfm3 = tab3.wf_modules.create( order=0, last_relevant_delta_id=workflow.last_delta_id) wfm3.cache_render_result(workflow.last_delta_id, tab3_output) # RenderContext's dict ordering determines desired tab order. (Python # 3.7 spec: dict is ordered in insertion order. CPython 3.6 and PyPy 7 # do this, too.) context = RenderContext( workflow.id, None, { tab1.slug: None, tab2.slug: StepResultShape('ok', tab2_output.table_shape), tab3.slug: StepResultShape('ok', tab3_output.table_shape), }, None) schema = ParamDType.Dict({'tabs': ParamDType.Multitab()}) # Supply wrongly-ordered tabs. Cleaned, they should be in order. result = clean_value(schema, {'tabs': [tab3.slug, tab2.slug]}, context) self.assertEqual(result['tabs'][0].slug, tab2.slug) self.assertEqual(result['tabs'][0].name, tab2.name) self.assertEqual(result['tabs'][0].columns, { 'A': RenderColumn('A', 'number'), }) assert_frame_equal(result['tabs'][0].dataframe, pd.DataFrame({'A': [1, 2]})) self.assertEqual(result['tabs'][1].slug, tab3.slug) self.assertEqual(result['tabs'][1].name, tab3.name) self.assertEqual(result['tabs'][1].columns, { 'B': RenderColumn('B', 'number'), }) assert_frame_equal(result['tabs'][1].dataframe, pd.DataFrame({'B': [2, 3]}))
async def execute_workflow(workflow: Workflow, delta_id: int) -> None: """ Ensure all `workflow.tabs[*].live_wf_modules` cache fresh render results. Raise UnneededExecution if the inputs become stale (at which point we don't care about results any more). WEBSOCKET NOTES: each wf_module is executed in turn. After each execution, we notify clients of its new columns and status. """ # raises UnneededExecution pending_tab_flows = await _load_tab_flows(workflow, delta_id) # tab_shapes: keep track of outputs of each tab. (Outputs are used as # inputs into other tabs.) Before render begins, all outputs are `None`. # We'll execute tabs dependencies-first; if a WfModule depends on a # `tab_shape` we haven't rendered yet, that's because it _couldn't_ be # rendered first -- prompting a `TabCycleError`. # # `tab_shapes.keys()` returns tab slugs in the Workflow's tab order -- that # is, the order the user determines. tab_shapes: Dict[str, Optional[StepResultShape]] = dict( (flow.tab_slug, None) for flow in pending_tab_flows) # Execute one tab_flow at a time. # # We don't hold a DB lock throughout the loop: the loop can take a long # time; it might be run multiple times simultaneously (even on different # computers); and `await` doesn't work with locks. while pending_tab_flows: ready_flows, dependent_flows = partition_ready_and_dependent( pending_tab_flows) if not ready_flows: # All flows are dependent -- meaning they all have cycles. Execute # them last; they can detect their cycles through `tab_shapes`. break for tab_flow in ready_flows: result = await execute_tab_flow(workflow, tab_flow, tab_shapes) tab_shape = StepResultShape(result.status, result.table_shape) del result # recover ram tab_shapes[tab_flow.tab_slug] = tab_shape pending_tab_flows = dependent_flows # iterate # Now, `pending_tab_flows` only contains flows with cycles. Execute them, # but don't update `tab_shapes` because none of them should see the output # from any other. (If tab1 and tab 2 depend on each other, they should both # have the same error: "Cycle"; their order of execution shouldn't matter.) for tab_flow in pending_tab_flows: await execute_tab_flow(workflow, tab_flow, tab_shapes)
def test_clean_tabs_happy_path(self): tab1_output = ProcessResult(pd.DataFrame({'A': [1, 2]})) workflow = Workflow.create_and_init() tab1 = workflow.tabs.first() wfm = tab1.wf_modules.create( order=0, last_relevant_delta_id=workflow.last_delta_id) wfm.cache_render_result(workflow.last_delta_id, tab1_output) context = RenderContext( workflow.id, None, None, { tab1.slug: StepResultShape('ok', tab1_output.table_shape), }, None) result = clean_value(ParamDType.Multitab(), [tab1.slug], context) self.assertEqual(result[0].slug, tab1.slug) self.assertEqual(result[0].name, tab1.name) self.assertEqual(result[0].columns, { 'A': RenderColumn('A', 'number', '{:,}'), }) assert_frame_equal(result[0].dataframe, pd.DataFrame({'A': [1, 2]}))
def test_clean_tab_happy_path(self): tab_output = ProcessResult(pd.DataFrame({"A": [1, 2]})) workflow = Workflow.create_and_init() tab = workflow.tabs.first() wfm = tab.wf_modules.create( order=0, slug="step-1", last_relevant_delta_id=workflow.last_delta_id) wfm.cache_render_result(workflow.last_delta_id, tab_output) context = RenderContext( workflow.id, None, None, {tab.slug: StepResultShape("ok", tab_output.table_shape)}, None, ) result = clean_value(ParamDType.Tab(), tab.slug, context) self.assertEqual(result.slug, tab.slug) self.assertEqual(result.name, tab.name) self.assertEqual(result.columns, {"A": RenderColumn("A", "number", "{:,}")}) assert_frame_equal(result.dataframe, pd.DataFrame({"A": [1, 2]}))
def test_clean_tab_tab_error_raises_cycle(self): shape = StepResultShape('error', TableShape(0, [])) context = RenderContext(None, None, None, {'tab-1': shape}, None) with self.assertRaises(TabOutputUnreachableError): clean_value(ParamDType.Tab(), 'tab-1', context)