def test_clean_multicolumn_sort_in_table_order(self): input_shape = TableShape(3, [ Column('B', ColumnType.NUMBER()), Column('A', ColumnType.NUMBER()), ]) result = clean_value(ParamDType.Multicolumn(), ['A', 'B'], input_shape) self.assertEqual(result, ['B', 'A'])
def test_clean_multicolumn_from_other_tab(self): tab_output = ProcessResult(pd.DataFrame({'A-from-tab-2': [1, 2]})) workflow = Workflow.create_and_init() tab = workflow.tabs.first() wfm = tab.wf_modules.create( order=0, last_relevant_delta_id=workflow.last_delta_id) wfm.cache_render_result(workflow.last_delta_id, tab_output) schema = ParamDType.Dict({ 'tab': ParamDType.Tab(), 'columns': ParamDType.Multicolumn(tab_parameter='tab'), }) params = {'tab': tab.slug, 'columns': ['A-from-tab-1', 'A-from-tab-2']} context = RenderContext( workflow.id, None, TableShape(3, [ Column('A-from-tab-1', ColumnType.NUMBER()), ]), { tab.slug: StepResultShape('ok', tab_output.table_shape), }, params) result = clean_value(schema, params, context) # result['tab'] is not what we're testing here self.assertEqual(result['columns'], ['A-from-tab-2'])
def test_list_prompting_error_concatenate_different_type_to_text(self): context = RenderContext( None, None, TableShape( 3, [ Column("A", ColumnType.NUMBER()), Column("B", ColumnType.DATETIME()) ], ), None, None, ) schema = ParamDType.List(inner_dtype=ParamDType.Column( column_types=frozenset({"text"}))) with self.assertRaises(PromptingError) as cm: clean_value(schema, ["A", "B"], context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"})) ], )
def test_dict_prompting_error_concatenate_different_types(self): context = RenderContext( None, None, TableShape(3, [ Column("A", ColumnType.TEXT()), Column("B", ColumnType.DATETIME()) ]), None, None, ) schema = ParamDType.Dict({ "x": ParamDType.Column(column_types=frozenset({"number"})), "y": ParamDType.Column(column_types=frozenset({"number"})), }) with self.assertRaises(PromptingError) as cm: clean_value(schema, {"x": "A", "y": "B"}, context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "datetime", frozenset({"number"})), ], )
def test_clean_multichartseries_non_number_is_prompting_error(self): context = RenderContext( None, None, TableShape(3, [ Column('A', ColumnType.TEXT()), Column('B', ColumnType.DATETIME()), ]), None, None) value = [ { 'column': 'A', 'color': '#aaaaaa' }, { 'column': 'B', 'color': '#cccccc' }, ] with self.assertRaises(PromptingError) as cm: clean_value(ParamDType.Multichartseries(), value, context) self.assertEqual(cm.exception.errors, [ PromptingError.WrongColumnType(['A'], 'text', frozenset({'number' })), PromptingError.WrongColumnType(['B'], 'datetime', frozenset({'number'})), ])
def test_clean_multicolumn_from_other_tab(self): tab_output = ProcessResult(pd.DataFrame({"A-from-tab-2": [1, 2]})) workflow = Workflow.create_and_init() tab = workflow.tabs.first() wfm = tab.wf_modules.create( order=0, slug="step-1", last_relevant_delta_id=workflow.last_delta_id) wfm.cache_render_result(workflow.last_delta_id, tab_output) schema = ParamDType.Dict({ "tab": ParamDType.Tab(), "columns": ParamDType.Multicolumn(tab_parameter="tab"), }) params = {"tab": tab.slug, "columns": ["A-from-tab-1", "A-from-tab-2"]} context = RenderContext( workflow.id, None, TableShape(3, [Column("A-from-tab-1", ColumnType.NUMBER())]), {tab.slug: StepResultShape("ok", tab_output.table_shape)}, params, ) result = clean_value(schema, params, context) # result['tab'] is not what we're testing here self.assertEqual(result["columns"], ["A-from-tab-2"])
def from_wf_module(wf_module: "WfModule") -> "CachedRenderResult": """ Read the CachedRenderResult or None from a WfModule. This does not read the _result_ from disk. If you want a "snapshot in time" of the ProcessResult you need a lock, like this: # Lock the workflow, making sure we don't overwrite data with workflow.cooperative_lock(): # Read from database cached_result = wf_module.get_cached_render_result() # Read from disk cached_result.result (There's not much point in reading from disk within this method, because a "snapshot in time" is impossible anyway: half the data is in the database and the other half is on disk.) """ if wf_module.cached_render_result_delta_id is None: return None delta_id = wf_module.cached_render_result_delta_id status = wf_module.cached_render_result_status error = wf_module.cached_render_result_error columns = wf_module.cached_render_result_columns nrows = wf_module.cached_render_result_nrows # TODO [2019-01-24] once we've deployed and wiped all caches, nix this # 'columns' check and assume 'columns' is always set when we get here if columns is None: # this cached value is stale because _Workbench_ has been updated # and doesn't support it any more return None # cached_render_result_json is sometimes a memoryview json_bytes = bytes(wf_module.cached_render_result_json) if json_bytes: json_dict = json.loads(json_bytes) else: json_dict = None quick_fixes = wf_module.cached_render_result_quick_fixes if not quick_fixes: quick_fixes = [] # Coerce from dict to QuickFixes quick_fixes = [QuickFix(**qf) for qf in quick_fixes] ret = CachedRenderResult( workflow_id=wf_module.workflow_id, wf_module_id=wf_module.id, delta_id=delta_id, status=status, error=error, json=json_dict, quick_fixes=quick_fixes, table_shape=TableShape(nrows, columns), ) # Keep in mind: ret.result has not been loaded yet. It might not exist # when we do try reading it. return ret
def test_clean_column_happy_path(self): input_shape = TableShape(3, [Column("A", ColumnType.NUMBER())]) self.assertEqual( clean_value(ParamDType.Column(column_types=frozenset({"number"})), "A", input_shape), "A", )
def test_clean_column_valid(self): context = RenderContext( None, None, TableShape(3, [ Column('A', ColumnType.NUMBER()), ]), None, None) result = clean_value(ParamDType.Column(), 'A', context) self.assertEqual(result, 'A')
def test_clean_column_missing_becomes_empty_string(self): context = RenderContext( None, None, TableShape(3, [ Column('A', ColumnType.NUMBER()), ]), None, None) result = clean_value(ParamDType.Column(), 'B', context) self.assertEqual(result, '')
def test_clean_multicolumn_sort_in_table_order(self): input_shape = TableShape(3, [ Column("B", ColumnType.NUMBER()), Column("A", ColumnType.NUMBER()) ]) result = clean_value(ParamDType.Multicolumn(), ["A", "B"], input_shape) self.assertEqual(result, ["B", "A"])
def test_clean_multichartseries_non_number_is_prompting_error(self): context = RenderContext( None, None, TableShape(3, [ Column("A", ColumnType.TEXT()), Column("B", ColumnType.DATETIME()) ]), None, None, ) value = [ { "column": "A", "color": "#aaaaaa" }, { "column": "B", "color": "#cccccc" }, ] with self.assertRaises(PromptingError) as cm: clean_value(ParamDType.Multichartseries(), value, context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "datetime", frozenset({"number"})), ], )
def test_clean_multicolumn_prompting_error_convert_to_text(self): # TODO make this _automatic_ instead of quick-fix? # ... but for now: prompt for a Quick Fix. context = RenderContext( None, None, TableShape( 3, [ Column("A", ColumnType.NUMBER()), Column("B", ColumnType.DATETIME()), Column("C", ColumnType.TEXT()), ], ), None, None, ) with self.assertRaises(PromptingError) as cm: schema = ParamDType.Multicolumn(column_types=frozenset({"text"})) clean_value(schema, "A,B", context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"})) ], )
def test_clean_multicolumn_missing_is_removed(self): input_shape = TableShape(3, [ Column("A", ColumnType.NUMBER()), Column("B", ColumnType.NUMBER()) ]) result = clean_value(ParamDType.Multicolumn(), ["A", "X", "B"], input_shape) self.assertEqual(result, ["A", "B"])
def test_clean_multicolumn_missing_is_removed(self): input_shape = TableShape(3, [ Column('A', ColumnType.NUMBER()), Column('B', ColumnType.NUMBER()), ]) result = clean_value(ParamDType.Multicolumn(), ['A', 'X', 'B'], input_shape) self.assertEqual(result, ['A', 'B'])
def test_clean_normal_dict(self): input_shape = TableShape(3, [Column("A", ColumnType.NUMBER())]) schema = ParamDType.Dict( {"str": ParamDType.String(), "int": ParamDType.Integer()} ) value = {"str": "foo", "int": 3} expected = dict(value) # no-op result = clean_value(schema, value, input_shape) self.assertEqual(result, expected)
def test_clean_multicolumn_sort_in_table_order(self): context = RenderContext( None, None, TableShape(3, [ Column('B', ColumnType.NUMBER()), Column('A', ColumnType.NUMBER()), ]), None, None) result = clean_value(ParamDType.Multicolumn(), ['A', 'B'], context) self.assertEqual(result, ['B', 'A'])
def test_clean_column_prompting_error_convert_to_number(self): input_shape = TableShape(3, [Column('A', ColumnType.TEXT())]) with self.assertRaises(PromptingError) as cm: clean_value(ParamDType.Column(column_types=frozenset({'number'})), 'A', input_shape) self.assertEqual(cm.exception.errors, [ PromptingError.WrongColumnType(['A'], 'text', frozenset({'number' })), ])
def test_clean_normal_dict(self): input_shape = TableShape(3, [Column('A', ColumnType.NUMBER())]) schema = ParamDType.Dict({ 'str': ParamDType.String(), 'int': ParamDType.Integer(), }) value = {'str': 'foo', 'int': 3} expected = dict(value) # no-op result = clean_value(schema, value, input_shape) self.assertEqual(result, expected)
def test_clean_multicolumn_missing_is_removed(self): context = RenderContext( None, None, TableShape(3, [ Column('A', ColumnType.NUMBER()), Column('B', ColumnType.NUMBER()), ]), None, None) result = clean_value(ParamDType.Multicolumn(), ['A', 'X', 'B'], context) self.assertEqual(result, ['A', 'B'])
def test_clean_multichartseries_missing_is_removed(self): context = RenderContext(None, None, TableShape(3, [ Column('A', ColumnType.NUMBER()), Column('B', ColumnType.NUMBER()), ]), None, None) value = [ {'column': 'A', 'color': '#aaaaaa'}, {'column': 'C', 'color': '#cccccc'}, ] result = clean_value(ParamDType.Multichartseries(), value, context) self.assertEqual(result, [{'column': 'A', 'color': '#aaaaaa'}])
def test_clean_column_missing_becomes_empty_string(self): context = RenderContext( None, TableShape(3, [ Column('A', ColumnType.NUMBER), ]), None, None) schema = ParamDType.Dict({ 'column': ParamDType.Column(), }) value = {'column': 'B'} result = clean_value(schema, value, context) self.assertEqual(result, {'column': ''})
def test_clean_multicolumn_sort_in_table_order(self): context = RenderContext( None, None, TableShape(3, [ Column("B", ColumnType.NUMBER()), Column("A", ColumnType.NUMBER()) ]), None, None, ) result = clean_value(ParamDType.Multicolumn(), ["A", "B"], context) self.assertEqual(result, ["B", "A"])
def test_clean_multicolumn_missing_is_removed(self): context = RenderContext( None, TableShape(3, [ Column('A', ColumnType.NUMBER), Column('B', ColumnType.NUMBER), ]), None, None) schema = ParamDType.Dict({ 'columns': ParamDType.Multicolumn(), }) value = {'columns': 'A,X,B'} result = clean_value(schema, value, context) self.assertEqual(result, {'columns': 'A,B'})
def test_clean_column_prompting_error_convert_to_number(self): context = RenderContext( None, None, TableShape(3, [Column("A", ColumnType.TEXT())]), None, None) with self.assertRaises(PromptingError) as cm: clean_value(ParamDType.Column(column_types=frozenset({"number"})), "A", context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})) ], )
def test_clean_multicolumn_missing_is_removed(self): context = RenderContext( None, None, TableShape(3, [ Column("A", ColumnType.NUMBER()), Column("B", ColumnType.NUMBER()) ]), None, None, ) result = clean_value(ParamDType.Multicolumn(), ["A", "X", "B"], context) self.assertEqual(result, ["A", "B"])
def test_list_prompting_error_concatenate_same_type(self): context = RenderContext(None, None, TableShape(3, [ Column('A', ColumnType.TEXT()), Column('B', ColumnType.TEXT()), ]), None, None) schema = ParamDType.List( inner_dtype=ParamDType.Column(column_types=frozenset({'number'})) ) with self.assertRaises(PromptingError) as cm: clean_value(schema, ['A', 'B'], context) self.assertEqual(cm.exception.errors, [ PromptingError.WrongColumnType(['A', 'B'], 'text', frozenset({'number'})), ])
def test_clean_column_prompting_error_convert_to_text(self): # TODO make this _automatic_ instead of quick-fix? # Consider Regex. We probably want to pass the module a text Series # _separately_ from the input DataFrame. That way Regex can output # a new Text column but preserve its input column's data type. # # ... but for now: prompt for a Quick Fix. input_shape = TableShape(3, [Column('A', ColumnType.NUMBER())]) with self.assertRaises(PromptingError) as cm: clean_value(ParamDType.Column(column_types=frozenset({'text'})), 'A', input_shape) self.assertEqual(cm.exception.errors, [ PromptingError.WrongColumnType(['A'], 'number', frozenset({'text' })), ])
def test_clean_multicolumn_prompting_error_convert_to_text(self): # TODO make this _automatic_ instead of quick-fix? # ... but for now: prompt for a Quick Fix. input_shape = TableShape(3, [ Column('A', ColumnType.NUMBER()), Column('B', ColumnType.DATETIME()), Column('C', ColumnType.TEXT()), ]) with self.assertRaises(PromptingError) as cm: schema = ParamDType.Multicolumn(column_types=frozenset({'text'})) clean_value(schema, 'A,B', input_shape) self.assertEqual(cm.exception.errors, [ PromptingError.WrongColumnType(['A'], 'number', frozenset({'text' })), PromptingError.WrongColumnType(['B'], 'datetime', frozenset({'text'})), ])
def test_clean_multicolumn_from_other_tab_that_does_not_exist(self): # The other tab would not exist if the user selected and then deleted # it. workflow = Workflow.create_and_init() workflow.tabs.first() schema = ParamDType.Dict({ 'tab': ParamDType.Tab(), 'columns': ParamDType.Multicolumn(tab_parameter='tab'), }) param_values = {'tab': 'tab-missing', 'columns': ['A-from-tab']} params = Params(schema, param_values, {}) context = RenderContext(workflow.id, None, TableShape(3, [ Column('A-from-tab-1', ColumnType.NUMBER()), ]), {}, params) result = clean_value(schema, param_values, context) # result['tab'] is not what we're testing here self.assertEqual(result['columns'], [])