def test_clean_tab_wf_module_changed_raises_unneededexecution(self): """ If a user changes tabs' output during render, raise UnneededExecution. It doesn't really matter _what_ the return value is, since the render() result will never be saved if this WfModule's delta has changed. UnneededExecution seems like the simplest contract to enforce. """ # tab_output is what 'render' _thinks_ the output should be tab_output = ProcessResult(pd.DataFrame({'A': [1, 2]})) workflow = Workflow.create_and_init() tab = workflow.tabs.first() wfm = tab.wf_modules.create( order=0, last_relevant_delta_id=workflow.last_delta_id) wfm.cache_render_result(workflow.last_delta_id, tab_output) # Simulate reality: wfm.last_relevant_delta_id will change wfm.last_relevant_delta_id += 1 wfm.save(update_fields=['last_relevant_delta_id']) context = RenderContext( workflow.id, None, None, { tab.slug: StepResultShape('ok', tab_output.table_shape), }, None) with self.assertRaises(UnneededExecution): clean_value(ParamDType.Tab(), tab.slug, context)
def test_clean_column_missing_becomes_empty_string(self): context = RenderContext( None, None, TableShape(3, [ Column('A', ColumnType.NUMBER()), ]), None, None) result = clean_value(ParamDType.Column(), 'B', context) self.assertEqual(result, '')
def test_clean_column_valid(self): context = RenderContext( None, None, TableShape(3, [ Column('A', ColumnType.NUMBER()), ]), None, None) result = clean_value(ParamDType.Column(), 'A', context) self.assertEqual(result, 'A')
def test_clean_file_no_uploaded_file(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() wfm = tab.wf_modules.create(module_id_name='uploadfile', order=0) context = RenderContext(workflow.id, wfm.id, None, None, None) result = clean_value(ParamDType.File(), str(uuid.uuid4()), context) self.assertIsNone(result)
def test_clean_multichartseries_non_number_is_prompting_error(self): context = RenderContext( None, None, TableShape(3, [ Column("A", ColumnType.TEXT()), Column("B", ColumnType.DATETIME()) ]), None, None, ) value = [ { "column": "A", "color": "#aaaaaa" }, { "column": "B", "color": "#cccccc" }, ] with self.assertRaises(PromptingError) as cm: clean_value(ParamDType.Multichartseries(), value, context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "datetime", frozenset({"number"})), ], )
def test_clean_multicolumn_from_other_tab(self): tab_output = ProcessResult(pd.DataFrame({"A-from-tab-2": [1, 2]})) workflow = Workflow.create_and_init() tab = workflow.tabs.first() wfm = tab.wf_modules.create( order=0, slug="step-1", last_relevant_delta_id=workflow.last_delta_id) wfm.cache_render_result(workflow.last_delta_id, tab_output) schema = ParamDType.Dict({ "tab": ParamDType.Tab(), "columns": ParamDType.Multicolumn(tab_parameter="tab"), }) params = {"tab": tab.slug, "columns": ["A-from-tab-1", "A-from-tab-2"]} context = RenderContext( workflow.id, None, TableShape(3, [Column("A-from-tab-1", ColumnType.NUMBER())]), {tab.slug: StepResultShape("ok", tab_output.table_shape)}, params, ) result = clean_value(schema, params, context) # result['tab'] is not what we're testing here self.assertEqual(result["columns"], ["A-from-tab-2"])
def test_clean_tab_tab_delete_race_raises_unneededexecution(self): """ If a user deletes the tab during render, raise UnneededExecution. It doesn't really matter _what_ the return value is, since the render() result will never be saved if this WfModule's delta has changed. UnneededExecution just seems like the quickest way out of this mess: it's an error the caller is meant to raise anyway, unlike `Tab.DoesNotExist`. """ # tab_output is what 'render' _thinks_ the output should be tab_output = ProcessResult(pd.DataFrame({'A': [1, 2]})) workflow = Workflow.create_and_init() tab = workflow.tabs.first() wfm = tab.wf_modules.create( order=0, last_relevant_delta_id=workflow.last_delta_id) wfm.cache_render_result(workflow.last_delta_id, tab_output) tab.is_deleted = True tab.save(update_fields=['is_deleted']) # Simulate reality: wfm.last_relevant_delta_id will change wfm.last_relevant_delta_id += 1 wfm.save(update_fields=['last_relevant_delta_id']) context = RenderContext( workflow.id, None, None, { tab.slug: StepResultShape('ok', tab_output.table_shape), }, None) with self.assertRaises(UnneededExecution): clean_value(ParamDType.Tab(), tab.slug, context)
def test_clean_file_happy_path(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() wfm = tab.wf_modules.create(module_id_name="uploadfile", order=0, slug="step-1") id = str(uuid.uuid4()) key = f"wf-${workflow.id}/wfm-${wfm.id}/${id}" minio.put_bytes(minio.UserFilesBucket, key, b"1234") UploadedFile.objects.create( wf_module=wfm, name="x.csv.gz", size=4, uuid=id, bucket=minio.UserFilesBucket, key=key, ) context = RenderContext(workflow.id, wfm.id, None, None, None) result = clean_value(ParamDType.File(), id, context) self.assertIsInstance(result, pathlib.Path) self.assertEqual(result.read_bytes(), b"1234") self.assertEqual(result.suffixes, [".csv", ".gz"]) # Assert that once `path` goes out of scope, it's deleted str_path = str(result) # get the filesystem path del result # should finalize, deleting the file on the filesystem with self.assertRaises(FileNotFoundError): os.open(str_path, 0)
def test_clean_multicolumn_prompting_error_convert_to_text(self): # TODO make this _automatic_ instead of quick-fix? # ... but for now: prompt for a Quick Fix. context = RenderContext( None, None, TableShape( 3, [ Column("A", ColumnType.NUMBER()), Column("B", ColumnType.DATETIME()), Column("C", ColumnType.TEXT()), ], ), None, None, ) with self.assertRaises(PromptingError) as cm: schema = ParamDType.Multicolumn(column_types=frozenset({"text"})) clean_value(schema, "A,B", context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"})) ], )
def test_clean_multichartseries_non_number_is_prompting_error(self): context = RenderContext( None, None, TableShape(3, [ Column('A', ColumnType.TEXT()), Column('B', ColumnType.DATETIME()), ]), None, None) value = [ { 'column': 'A', 'color': '#aaaaaa' }, { 'column': 'B', 'color': '#cccccc' }, ] with self.assertRaises(PromptingError) as cm: clean_value(ParamDType.Multichartseries(), value, context) self.assertEqual(cm.exception.errors, [ PromptingError.WrongColumnType(['A'], 'text', frozenset({'number' })), PromptingError.WrongColumnType(['B'], 'datetime', frozenset({'number'})), ])
def test_dict_prompting_error_concatenate_different_types(self): context = RenderContext( None, None, TableShape(3, [ Column("A", ColumnType.TEXT()), Column("B", ColumnType.DATETIME()) ]), None, None, ) schema = ParamDType.Dict({ "x": ParamDType.Column(column_types=frozenset({"number"})), "y": ParamDType.Column(column_types=frozenset({"number"})), }) with self.assertRaises(PromptingError) as cm: clean_value(schema, {"x": "A", "y": "B"}, context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "datetime", frozenset({"number"})), ], )
def test_list_prompting_error_concatenate_different_type_to_text(self): context = RenderContext( None, None, TableShape( 3, [ Column("A", ColumnType.NUMBER()), Column("B", ColumnType.DATETIME()) ], ), None, None, ) schema = ParamDType.List(inner_dtype=ParamDType.Column( column_types=frozenset({"text"}))) with self.assertRaises(PromptingError) as cm: clean_value(schema, ["A", "B"], context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"})) ], )
def test_clean_multicolumn_from_other_tab(self): tab_output = ProcessResult(pd.DataFrame({'A-from-tab-2': [1, 2]})) workflow = Workflow.create_and_init() tab = workflow.tabs.first() wfm = tab.wf_modules.create( order=0, last_relevant_delta_id=workflow.last_delta_id) wfm.cache_render_result(workflow.last_delta_id, tab_output) schema = ParamDType.Dict({ 'tab': ParamDType.Tab(), 'columns': ParamDType.Multicolumn(tab_parameter='tab'), }) params = {'tab': tab.slug, 'columns': ['A-from-tab-1', 'A-from-tab-2']} context = RenderContext( workflow.id, None, TableShape(3, [ Column('A-from-tab-1', ColumnType.NUMBER()), ]), { tab.slug: StepResultShape('ok', tab_output.table_shape), }, params) result = clean_value(schema, params, context) # result['tab'] is not what we're testing here self.assertEqual(result['columns'], ['A-from-tab-2'])
def test_clean_multicolumn_sort_in_table_order(self): context = RenderContext( None, None, TableShape(3, [ Column('B', ColumnType.NUMBER()), Column('A', ColumnType.NUMBER()), ]), None, None) result = clean_value(ParamDType.Multicolumn(), ['A', 'B'], context) self.assertEqual(result, ['B', 'A'])
def test_clean_tab_missing_tab_selected_gives_none(self): """ If the user has selected a nonexistent tab, pretend tab is blank. The JS side of things will see the nonexistent tab, but not render(). """ context = RenderContext(None, None, None, {}, None) result = clean_value(ParamDType.Tab(), 'tab-XXX', context) self.assertEqual(result, None)
def test_clean_normal_dict(self): context = RenderContext(None, None, None, None, None) schema = ParamDType.Dict({ 'str': ParamDType.String(), 'int': ParamDType.Integer(), }) value = {'str': 'foo', 'int': 3} expected = dict(value) # no-op result = clean_value(schema, value, context) self.assertEqual(result, expected)
def test_clean_multicolumn_missing_is_removed(self): context = RenderContext( None, None, TableShape(3, [ Column('A', ColumnType.NUMBER()), Column('B', ColumnType.NUMBER()), ]), None, None) result = clean_value(ParamDType.Multicolumn(), ['A', 'X', 'B'], context) self.assertEqual(result, ['A', 'B'])
def test_clean_column_prompting_error_convert_to_number(self): context = RenderContext( None, None, TableShape(3, [ Column('A', ColumnType.TEXT()), ]), None, None) with self.assertRaises(PromptingError) as cm: clean_value(ParamDType.Column(column_types=frozenset({'number'})), 'A', context) self.assertEqual(cm.exception.errors, [ PromptingError.WrongColumnType(['A'], 'text', frozenset({'number' })), ])
def test_clean_multicolumn_sort_in_table_order(self): context = RenderContext( None, None, TableShape(3, [ Column("B", ColumnType.NUMBER()), Column("A", ColumnType.NUMBER()) ]), None, None, ) result = clean_value(ParamDType.Multicolumn(), ["A", "B"], context) self.assertEqual(result, ["B", "A"])
def test_clean_multicolumn_missing_is_removed(self): context = RenderContext( None, None, TableShape(3, [ Column("A", ColumnType.NUMBER()), Column("B", ColumnType.NUMBER()) ]), None, None, ) result = clean_value(ParamDType.Multicolumn(), ["A", "X", "B"], context) self.assertEqual(result, ["A", "B"])
def test_list_prompting_error_concatenate_same_type(self): context = RenderContext( None, None, TableShape(3, [ Column('A', ColumnType.TEXT()), Column('B', ColumnType.TEXT()), ]), None, None) schema = ParamDType.List(inner_dtype=ParamDType.Column( column_types=frozenset({'number'}))) with self.assertRaises(PromptingError) as cm: clean_value(schema, ['A', 'B'], context) self.assertEqual(cm.exception.errors, [ PromptingError.WrongColumnType(['A', 'B'], 'text', frozenset({'number'})), ])
def test_clean_file_wrong_wf_module(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() wfm = tab.wf_modules.create(module_id_name='uploadfile', order=0) wfm2 = tab.wf_modules.create(module_id_name='uploadfile', order=1) id = str(uuid.uuid4()) key = f'wf-${workflow.id}/wfm-${wfm.id}/${id}' minio.put_bytes(minio.UserFilesBucket, key, b'1234') UploadedFile.objects.create(wf_module=wfm2, name='x.csv.gz', size=4, uuid=id, bucket=minio.UserFilesBucket, key=key) context = RenderContext(workflow.id, wfm.id, None, None, None) result = clean_value(ParamDType.File(), id, context) self.assertIsNone(result)
def test_clean_tabs_preserve_ordering(self): tab2_output = ProcessResult(pd.DataFrame({"A": [1, 2]})) tab3_output = ProcessResult(pd.DataFrame({"B": [2, 3]})) workflow = Workflow.create_and_init() tab1 = workflow.tabs.first() tab2 = workflow.tabs.create(position=1, slug="tab-2", name="Tab 2") tab3 = workflow.tabs.create(position=1, slug="tab-3", name="Tab 3") wfm2 = tab2.wf_modules.create( order=0, slug="step-1", last_relevant_delta_id=workflow.last_delta_id) wfm2.cache_render_result(workflow.last_delta_id, tab2_output) wfm3 = tab3.wf_modules.create( order=0, slug="step-2", last_relevant_delta_id=workflow.last_delta_id) wfm3.cache_render_result(workflow.last_delta_id, tab3_output) # RenderContext's dict ordering determines desired tab order. (Python # 3.7 spec: dict is ordered in insertion order. CPython 3.6 and PyPy 7 # do this, too.) context = RenderContext( workflow.id, None, None, { tab1.slug: None, tab2.slug: StepResultShape("ok", tab2_output.table_shape), tab3.slug: StepResultShape("ok", tab3_output.table_shape), }, None, ) # Supply wrongly-ordered tabs. Cleaned, they should be in order. result = clean_value(ParamDType.Multitab(), [tab3.slug, tab2.slug], context) self.assertEqual(result[0].slug, tab2.slug) self.assertEqual(result[0].name, tab2.name) self.assertEqual(result[0].columns, {"A": RenderColumn("A", "number", "{:,}")}) assert_frame_equal(result[0].dataframe, pd.DataFrame({"A": [1, 2]})) self.assertEqual(result[1].slug, tab3.slug) self.assertEqual(result[1].name, tab3.name) self.assertEqual(result[1].columns, {"B": RenderColumn("B", "number", "{:,}")}) assert_frame_equal(result[1].dataframe, pd.DataFrame({"B": [2, 3]}))
def test_clean_column_prompting_error_convert_to_text(self): # TODO make this _automatic_ instead of quick-fix? # Consider Regex. We probably want to pass the module a text Series # _separately_ from the input DataFrame. That way Regex can output # a new Text column but preserve its input column's data type. # # ... but for now: prompt for a Quick Fix. context = RenderContext( None, None, TableShape(3, [Column("A", ColumnType.NUMBER())]), None, None) with self.assertRaises(PromptingError) as cm: clean_value(ParamDType.Column(column_types=frozenset({"text"})), "A", context) self.assertEqual( cm.exception.errors, [PromptingError.WrongColumnType(["A"], None, frozenset({"text"}))], )
def _render_context( self, *, wf_module_id=None, input_table=None, tab_results={}, params={}, exit_stack=None, ) -> RenderContext: if exit_stack is None: exit_stack = self.exit_stack return RenderContext( wf_module_id=wf_module_id, input_table=input_table, tab_results=tab_results, basedir=self.basedir, exit_stack=exit_stack, params=params, )
def test_clean_tabs_happy_path(self): tab1_output = ProcessResult(pd.DataFrame({'A': [1, 2]})) workflow = Workflow.create_and_init() tab1 = workflow.tabs.first() wfm = tab1.wf_modules.create( order=0, last_relevant_delta_id=workflow.last_delta_id) wfm.cache_render_result(workflow.last_delta_id, tab1_output) context = RenderContext( workflow.id, None, None, { tab1.slug: StepResultShape('ok', tab1_output.table_shape), }, None) result = clean_value(ParamDType.Multitab(), [tab1.slug], context) self.assertEqual(result[0].slug, tab1.slug) self.assertEqual(result[0].name, tab1.name) self.assertEqual(result[0].columns, { 'A': RenderColumn('A', 'number', '{:,}'), }) assert_frame_equal(result[0].dataframe, pd.DataFrame({'A': [1, 2]}))
def test_clean_multichartseries_missing_is_removed(self): context = RenderContext( None, None, TableShape(3, [ Column('A', ColumnType.NUMBER()), Column('B', ColumnType.NUMBER()), ]), None, None) value = [ { 'column': 'A', 'color': '#aaaaaa' }, { 'column': 'C', 'color': '#cccccc' }, ] result = clean_value(ParamDType.Multichartseries(), value, context) self.assertEqual(result, [{'column': 'A', 'color': '#aaaaaa'}])
def test_clean_multicolumn_prompting_error_convert_to_text(self): # TODO make this _automatic_ instead of quick-fix? # ... but for now: prompt for a Quick Fix. context = RenderContext( None, None, TableShape(3, [ Column('A', ColumnType.NUMBER()), Column('B', ColumnType.DATETIME()), Column('C', ColumnType.TEXT()), ]), None, None) with self.assertRaises(PromptingError) as cm: schema = ParamDType.Multicolumn(column_types=frozenset({'text'})) clean_value(schema, 'A,B', context) self.assertEqual(cm.exception.errors, [ PromptingError.WrongColumnType(['A'], 'number', frozenset({'text' })), PromptingError.WrongColumnType(['B'], 'datetime', frozenset({'text'})), ])
def test_clean_multicolumn_from_other_tab_that_does_not_exist(self): # The other tab would not exist if the user selected and then deleted # it. workflow = Workflow.create_and_init() workflow.tabs.first() schema = ParamDType.Dict({ 'tab': ParamDType.Tab(), 'columns': ParamDType.Multicolumn(tab_parameter='tab'), }) params = {'tab': 'tab-missing', 'columns': ['A-from-tab']} context = RenderContext( workflow.id, None, TableShape(3, [ Column('A-from-tab-1', ColumnType.NUMBER()), ]), {}, params) result = clean_value(schema, params, context) # result['tab'] is not what we're testing here self.assertEqual(result['columns'], [])
def test_dict_prompting_error_concatenate_different_types(self): context = RenderContext( None, None, TableShape(3, [ Column('A', ColumnType.TEXT()), Column('B', ColumnType.DATETIME()), ]), None, None) schema = ParamDType.Dict({ 'x': ParamDType.Column(column_types=frozenset({'number'})), 'y': ParamDType.Column(column_types=frozenset({'number'})), }) with self.assertRaises(PromptingError) as cm: clean_value(schema, {'x': 'A', 'y': 'B'}, context) self.assertEqual(cm.exception.errors, [ PromptingError.WrongColumnType(['A'], 'text', frozenset({'number' })), PromptingError.WrongColumnType(['B'], 'datetime', frozenset({'number'})), ])