def test_dict_prompting_error_concatenate_different_types(self): context = RenderContext( None, None, TableShape(3, [ Column("A", ColumnType.TEXT()), Column("B", ColumnType.DATETIME()) ]), None, None, ) schema = ParamDType.Dict({ "x": ParamDType.Column(column_types=frozenset({"number"})), "y": ParamDType.Column(column_types=frozenset({"number"})), }) with self.assertRaises(PromptingError) as cm: clean_value(schema, {"x": "A", "y": "B"}, context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "datetime", frozenset({"number"})), ], )
def test_clean_condition_timestamp_wrong_value(self): context = self._render_context( input_table=arrow_table( {"A": pa.array([datetime.now()], pa.timestamp("ns"))} ) ) with self.assertRaises(PromptingError) as cm: clean_value( ParamDType.Condition(), { "operation": "timestamp_is_greater_than", "column": "A", "value": "Yesterday", "isCaseSensitive": False, "isRegex": False, }, context, ) self.assertEqual( cm.exception.errors, [ PromptingError.CannotCoerceValueToTimestamp("Yesterday"), ], )
def test_list_prompting_error_concatenate_different_type_to_text(self): context = RenderContext( None, None, TableShape( 3, [ Column("A", ColumnType.NUMBER()), Column("B", ColumnType.DATETIME()) ], ), None, None, ) schema = ParamDType.List(inner_dtype=ParamDType.Column( column_types=frozenset({"text"}))) with self.assertRaises(PromptingError) as cm: clean_value(schema, ["A", "B"], context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"})) ], )
def test_clean_tab_tab_delete_race_raises_unneededexecution(self): """ If a user deletes the tab during render, raise UnneededExecution. It doesn't really matter _what_ the return value is, since the render() result will never be saved if this WfModule's delta has changed. UnneededExecution just seems like the quickest way out of this mess: it's an error the caller is meant to raise anyway, unlike `Tab.DoesNotExist`. """ # tab_output is what 'render' _thinks_ the output should be tab_output = ProcessResult(pd.DataFrame({'A': [1, 2]})) workflow = Workflow.create_and_init() tab = workflow.tabs.first() wfm = tab.wf_modules.create( order=0, last_relevant_delta_id=workflow.last_delta_id) wfm.cache_render_result(workflow.last_delta_id, tab_output) tab.is_deleted = True tab.save(update_fields=['is_deleted']) # Simulate reality: wfm.last_relevant_delta_id will change wfm.last_relevant_delta_id += 1 wfm.save(update_fields=['last_relevant_delta_id']) context = RenderContext( workflow.id, None, None, { tab.slug: StepResultShape('ok', tab_output.table_shape), }, None) with self.assertRaises(UnneededExecution): clean_value(ParamDType.Tab(), tab.slug, context)
def test_clean_multichartseries_non_number_is_prompting_error(self): context = RenderContext( None, None, TableShape(3, [ Column("A", ColumnType.TEXT()), Column("B", ColumnType.DATETIME()) ]), None, None, ) value = [ { "column": "A", "color": "#aaaaaa" }, { "column": "B", "color": "#cccccc" }, ] with self.assertRaises(PromptingError) as cm: clean_value(ParamDType.Multichartseries(), value, context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "datetime", frozenset({"number"})), ], )
def test_clean_multichartseries_non_number_is_prompting_error(self): context = self._render_context(input_table=arrow_table({ "A": ["a"], "B": pa.array([datetime.now()], pa.timestamp("ns")) })) value = [ { "column": "A", "color": "#aaaaaa" }, { "column": "B", "color": "#cccccc" }, ] with self.assertRaises(PromptingError) as cm: clean_value(ParamDType.Multichartseries(), value, context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "datetime", frozenset({"number"})), ], )
def test_clean_tab_wf_module_changed_raises_unneededexecution(self): """ If a user changes tabs' output during render, raise UnneededExecution. It doesn't really matter _what_ the return value is, since the render() result will never be saved if this WfModule's delta has changed. UnneededExecution seems like the simplest contract to enforce. """ # tab_output is what 'render' _thinks_ the output should be tab_output = ProcessResult(pd.DataFrame({'A': [1, 2]})) workflow = Workflow.create_and_init() tab = workflow.tabs.first() wfm = tab.wf_modules.create( order=0, last_relevant_delta_id=workflow.last_delta_id) wfm.cache_render_result(workflow.last_delta_id, tab_output) # Simulate reality: wfm.last_relevant_delta_id will change wfm.last_relevant_delta_id += 1 wfm.save(update_fields=['last_relevant_delta_id']) context = RenderContext( workflow.id, None, None, { tab.slug: StepResultShape('ok', tab_output.table_shape), }, None) with self.assertRaises(UnneededExecution): clean_value(ParamDType.Tab(), tab.slug, context)
def test_clean_multicolumn_prompting_error_convert_to_text(self): # TODO make this _automatic_ instead of quick-fix? # ... but for now: prompt for a Quick Fix. context = RenderContext( None, None, TableShape( 3, [ Column("A", ColumnType.NUMBER()), Column("B", ColumnType.DATETIME()), Column("C", ColumnType.TEXT()), ], ), None, None, ) with self.assertRaises(PromptingError) as cm: schema = ParamDType.Multicolumn(column_types=frozenset({"text"})) clean_value(schema, "A,B", context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"})) ], )
def test_clean_multichartseries_non_number_is_prompting_error(self): context = RenderContext( None, None, TableShape(3, [ Column('A', ColumnType.TEXT()), Column('B', ColumnType.DATETIME()), ]), None, None) value = [ { 'column': 'A', 'color': '#aaaaaa' }, { 'column': 'B', 'color': '#cccccc' }, ] with self.assertRaises(PromptingError) as cm: clean_value(ParamDType.Multichartseries(), value, context) self.assertEqual(cm.exception.errors, [ PromptingError.WrongColumnType(['A'], 'text', frozenset({'number' })), PromptingError.WrongColumnType(['B'], 'datetime', frozenset({'number'})), ])
def test_clean_column_prompting_error_convert_to_number(self): context = self._render_context(input_table=arrow_table({"A": ["1"]})) with self.assertRaises(PromptingError) as cm: clean_value( ParamDType.Column(column_types=frozenset({"number"})), "A", context ) self.assertEqual( cm.exception.errors, [PromptingError.WrongColumnType(["A"], "text", frozenset({"number"}))], )
def test_clean_column_prompting_error_convert_to_number(self): context = RenderContext( None, None, TableShape(3, [ Column('A', ColumnType.TEXT()), ]), None, None) with self.assertRaises(PromptingError) as cm: clean_value(ParamDType.Column(column_types=frozenset({'number'})), 'A', context) self.assertEqual(cm.exception.errors, [ PromptingError.WrongColumnType(['A'], 'text', frozenset({'number' })), ])
def test_list_prompting_error_concatenate_same_type(self): context = self._render_context( input_table=arrow_table({"A": ["1"], "B": ["2"]}) ) schema = ParamDType.List( inner_dtype=ParamDType.Column(column_types=frozenset({"number"})) ) with self.assertRaises(PromptingError) as cm: clean_value(schema, ["A", "B"], context) self.assertEqual( cm.exception.errors, [PromptingError.WrongColumnType(["A", "B"], "text", frozenset({"number"}))], )
def test_list_prompting_error_concatenate_different_type_to_text(self): context = self._render_context( input_table=arrow_table({"A": [1], "B": [datetime.now()]}) ) schema = ParamDType.List( inner_dtype=ParamDType.Column(column_types=frozenset({"text"})) ) with self.assertRaises(PromptingError) as cm: clean_value(schema, ["A", "B"], context) self.assertEqual( cm.exception.errors, [PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"}))], )
def test_clean_multicolumn_prompting_error_convert_to_text(self): # TODO make this _automatic_ instead of quick-fix? # ... but for now: prompt for a Quick Fix. context = self._render_context( input_table=arrow_table({"A": [1], "B": [datetime.now()], "C": ["x"]}) ) with self.assertRaises(PromptingError) as cm: schema = ParamDType.Multicolumn(column_types=frozenset({"text"})) clean_value(schema, ["A", "B"], context) self.assertEqual( cm.exception.errors, [PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"}))], )
def test_clean_file_happy_path(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() wfm = tab.wf_modules.create(module_id_name="uploadfile", order=0, slug="step-1") id = str(uuid.uuid4()) key = f"wf-${workflow.id}/wfm-${wfm.id}/${id}" minio.put_bytes(minio.UserFilesBucket, key, b"1234") UploadedFile.objects.create( wf_module=wfm, name="x.csv.gz", size=4, uuid=id, bucket=minio.UserFilesBucket, key=key, ) context = RenderContext(workflow.id, wfm.id, None, None, None) result = clean_value(ParamDType.File(), id, context) self.assertIsInstance(result, pathlib.Path) self.assertEqual(result.read_bytes(), b"1234") self.assertEqual(result.suffixes, [".csv", ".gz"]) # Assert that once `path` goes out of scope, it's deleted str_path = str(result) # get the filesystem path del result # should finalize, deleting the file on the filesystem with self.assertRaises(FileNotFoundError): os.open(str_path, 0)
def test_clean_column_missing_becomes_empty_string(self): context = RenderContext( None, None, TableShape(3, [ Column('A', ColumnType.NUMBER()), ]), None, None) result = clean_value(ParamDType.Column(), 'B', context) self.assertEqual(result, '')
def test_clean_float_with_int_value(self): # ParamDType.Float can have `int` values (because values come from # json.parse(), which only gives Numbers so can give "3" instead of # "3.0". We want to pass that as `float` in the `params` dict. result = clean_value(ParamDType.Float(), 3, None) self.assertEqual(result, 3.0) self.assertIsInstance(result, float)
def test_list_prompting_error_concatenate_same_type(self): context = RenderContext( None, None, TableShape(3, [ Column('A', ColumnType.TEXT()), Column('B', ColumnType.TEXT()), ]), None, None) schema = ParamDType.List(inner_dtype=ParamDType.Column( column_types=frozenset({'number'}))) with self.assertRaises(PromptingError) as cm: clean_value(schema, ['A', 'B'], context) self.assertEqual(cm.exception.errors, [ PromptingError.WrongColumnType(['A', 'B'], 'text', frozenset({'number'})), ])
def test_clean_file_wrong_wf_module(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.wf_modules.create(module_id_name="uploadfile", order=0, slug="step-1") step2 = tab.wf_modules.create(module_id_name="uploadfile", order=1, slug="step-2") id = str(uuid.uuid4()) key = f"wf-${workflow.id}/wfm-${step.id}/${id}" minio.put_bytes(minio.UserFilesBucket, key, b"1234") UploadedFile.objects.create( wf_module=step2, name="x.csv.gz", size=4, uuid=id, bucket=minio.UserFilesBucket, key=key, ) context = self._render_context(wf_module_id=step.id) result = clean_value(ParamDType.File(), id, context) self.assertIsNone(result) # Assert that if a temporary file was created to house the download, it # no longer exists. self.assertListEqual(list(self.basedir.iterdir()), [])
def test_clean_column_valid(self): context = RenderContext( None, None, TableShape(3, [ Column('A', ColumnType.NUMBER()), ]), None, None) result = clean_value(ParamDType.Column(), 'A', context) self.assertEqual(result, 'A')
def test_clean_file_happy_path(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.wf_modules.create(module_id_name="uploadfile", order=0, slug="step-1") id = str(uuid.uuid4()) key = f"wf-${workflow.id}/wfm-${step.id}/${id}" minio.put_bytes(minio.UserFilesBucket, key, b"1234") UploadedFile.objects.create( wf_module=step, name="x.csv.gz", size=4, uuid=id, bucket=minio.UserFilesBucket, key=key, ) with ExitStack() as inner_stack: context = self._render_context(wf_module_id=step.id, exit_stack=inner_stack) result: Path = clean_value(ParamDType.File(), id, context) self.assertIsInstance(result, Path) self.assertEqual(result.read_bytes(), b"1234") self.assertEqual(result.suffixes, [".csv", ".gz"]) # Assert that once `exit_stack` goes out of scope, file is deleted self.assertFalse(result.exists())
def test_clean_condition_and_or_simplify(self): context = self._render_context(input_table=arrow_table({"A": [1]})) self.assertEqual( clean_value( ParamDType.Condition(), { "operation": "and", "conditions": [ { "operation": "or", "conditions": [ { "operation": "cell_is_blank", "column": "A", "value": "", "isCaseSensitive": False, "isRegex": False, }, ], }, ], }, context, ), { "operation": "cell_is_blank", "column": "A", }, )
def test_clean_multicolumn_sort_in_table_order(self): context = self._render_context(input_table=arrow_table({ "B": [1], "A": [2] })) result = clean_value(ParamDType.Multicolumn(), ["A", "B"], context) self.assertEqual(result, ["B", "A"])
def test_clean_column_prompting_error_convert_to_text(self): # TODO make this _automatic_ instead of quick-fix? # Consider Regex. We probably want to pass the module a text Series # _separately_ from the input DataFrame. That way Regex can output # a new Text column but preserve its input column's data type. # # ... but for now: prompt for a Quick Fix. context = self._render_context(input_table=arrow_table({"A": [1]})) with self.assertRaises(PromptingError) as cm: clean_value(ParamDType.Column(column_types=frozenset({"text"})), "A", context) self.assertEqual( cm.exception.errors, [PromptingError.WrongColumnType(["A"], None, frozenset({"text"}))], )
def test_clean_condition_not(self): context = self._render_context(input_table=arrow_table({"A": ["a"]})) self.assertEqual( clean_value( ParamDType.Condition(), { "operation": "text_is_not", "column": "A", "value": "a", "isCaseSensitive": False, "isRegex": False, }, context, ), { "operation": "not", "condition": { "operation": "text_is", "column": "A", "value": "a", "isCaseSensitive": False, "isRegex": False, }, }, )
def test_clean_multicolumn_from_other_tab(self): tab_output = ProcessResult(pd.DataFrame({'A-from-tab-2': [1, 2]})) workflow = Workflow.create_and_init() tab = workflow.tabs.first() wfm = tab.wf_modules.create( order=0, last_relevant_delta_id=workflow.last_delta_id) wfm.cache_render_result(workflow.last_delta_id, tab_output) schema = ParamDType.Dict({ 'tab': ParamDType.Tab(), 'columns': ParamDType.Multicolumn(tab_parameter='tab'), }) params = {'tab': tab.slug, 'columns': ['A-from-tab-1', 'A-from-tab-2']} context = RenderContext( workflow.id, None, TableShape(3, [ Column('A-from-tab-1', ColumnType.NUMBER()), ]), { tab.slug: StepResultShape('ok', tab_output.table_shape), }, params) result = clean_value(schema, params, context) # result['tab'] is not what we're testing here self.assertEqual(result['columns'], ['A-from-tab-2'])
def test_clean_file_no_uploaded_file(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() wfm = tab.wf_modules.create(module_id_name='uploadfile', order=0) context = RenderContext(workflow.id, wfm.id, None, None, None) result = clean_value(ParamDType.File(), str(uuid.uuid4()), context) self.assertIsNone(result)
def test_clean_multicolumn_from_other_tab(self): tab_output = ProcessResult(pd.DataFrame({"A-from-tab-2": [1, 2]})) workflow = Workflow.create_and_init() tab = workflow.tabs.first() wfm = tab.wf_modules.create( order=0, slug="step-1", last_relevant_delta_id=workflow.last_delta_id) wfm.cache_render_result(workflow.last_delta_id, tab_output) schema = ParamDType.Dict({ "tab": ParamDType.Tab(), "columns": ParamDType.Multicolumn(tab_parameter="tab"), }) params = {"tab": tab.slug, "columns": ["A-from-tab-1", "A-from-tab-2"]} context = RenderContext( workflow.id, None, TableShape(3, [Column("A-from-tab-1", ColumnType.NUMBER())]), {tab.slug: StepResultShape("ok", tab_output.table_shape)}, params, ) result = clean_value(schema, params, context) # result['tab'] is not what we're testing here self.assertEqual(result["columns"], ["A-from-tab-2"])
def test_clean_multichartseries_missing_is_removed(self): context = self._render_context(input_table=arrow_table({"A": [1], "B": [1]})) value = [ {"column": "A", "color": "#aaaaaa"}, {"column": "C", "color": "#cccccc"}, ] result = clean_value(ParamDType.Multichartseries(), value, context) self.assertEqual(result, [{"column": "A", "color": "#aaaaaa"}])
def test_clean_multicolumn_missing_is_removed(self): context = self._render_context(input_table=arrow_table({ "A": [1], "B": [1] })) result = clean_value(ParamDType.Multicolumn(), ["A", "X", "B"], context) self.assertEqual(result, ["A", "B"])