def test_clean_column_happy_path(self): input_shape = TableMetadata(3, [Column("A", ColumnType.Number())]) self.assertEqual( clean_value(ParamDType.Column(column_types=frozenset({"number"})), "A", input_shape), "A", )
def test_dict_prompting_error_concatenate_same_type(self): context = self._render_context( input_table=arrow_table({"A": ["1"], "B": ["2"]}) ) schema = ParamDType.Dict( { "x": ParamDType.Column(column_types=frozenset({"number"})), "y": ParamDType.Column(column_types=frozenset({"number"})), } ) with self.assertRaises(PromptingError) as cm: clean_value(schema, {"x": "A", "y": "B"}, context) self.assertEqual( cm.exception.errors, [PromptingError.WrongColumnType(["A", "B"], "text", frozenset({"number"}))], )
def test_clean_column_prompting_error_convert_to_number(self): context = self._render_context(input_table=arrow_table({"A": ["1"]})) with self.assertRaises(PromptingError) as cm: clean_value( ParamDType.Column(column_types=frozenset({"number"})), "A", context ) self.assertEqual( cm.exception.errors, [PromptingError.WrongColumnType(["A"], "text", frozenset({"number"}))], )
def test_clean_column_prompting_error_convert_to_number(self): input_shape = TableMetadata(3, [Column("A", ColumnType.Text())]) with self.assertRaises(PromptingError) as cm: clean_value(ParamDType.Column(column_types=frozenset({"number"})), "A", input_shape) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})) ], )
def test_list_prompting_error_concatenate_different_type_to_text(self): context = self._render_context( input_table=arrow_table({"A": [1], "B": [datetime.now()]}) ) schema = ParamDType.List( inner_dtype=ParamDType.Column(column_types=frozenset({"text"})) ) with self.assertRaises(PromptingError) as cm: clean_value(schema, ["A", "B"], context) self.assertEqual( cm.exception.errors, [PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"}))], )
def test_dict_prompting_error(self): input_shape = TableMetadata( 3, [Column("A", ColumnType.Text()), Column("B", ColumnType.Text())]) schema = ParamDType.Dict({ "col1": ParamDType.Column(column_types=frozenset({"number"})), "col2": ParamDType.Column(column_types=frozenset({"datetime"})), }) with self.assertRaises(PromptingError) as cm: clean_value(schema, {"col1": "A", "col2": "B"}, input_shape) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "text", frozenset({"datetime"})), ], )
def test_dict_prompting_error_concatenate_different_types(self): context = self._render_context(input_table=arrow_table({ "A": ["1"], "B": pa.array([datetime.now()], pa.timestamp("ns")) })) schema = ParamDType.Dict({ "x": ParamDType.Column(column_types=frozenset({"number"})), "y": ParamDType.Column(column_types=frozenset({"number"})), }) with self.assertRaises(PromptingError) as cm: clean_value(schema, {"x": "A", "y": "B"}, context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "datetime", frozenset({"number"})), ], )
def test_clean_column_prompting_error_convert_to_text(self): # TODO make this _automatic_ instead of quick-fix? # Consider Regex. We probably want to pass the module a text Series # _separately_ from the input DataFrame. That way Regex can output # a new Text column but preserve its input column's data type. # # ... but for now: prompt for a Quick Fix. context = self._render_context(input_table=arrow_table({"A": [1]})) with self.assertRaises(PromptingError) as cm: clean_value(ParamDType.Column(column_types=frozenset({"text"})), "A", context) self.assertEqual( cm.exception.errors, [PromptingError.WrongColumnType(["A"], None, frozenset({"text"}))], )
def test_clean_column_missing_becomes_empty_string(self): context = self._render_context(input_table=arrow_table({"A": [1]})) result = clean_value(ParamDType.Column(), "B", context) self.assertEqual(result, "")
def test_clean_column_valid(self): context = self._render_context(input_table=arrow_table({"A": [1]})) result = clean_value(ParamDType.Column(), "A", context) self.assertEqual(result, "A")
def test_clean_column_missing(self): input_shape = TableMetadata(3, [Column("A", ColumnType.Number())]) self.assertEqual(clean_value(ParamDType.Column(), "B", input_shape), "")
def test_clean_column_tab_parameter_is_error(self): input_shape = TableMetadata(3, [Column("A", ColumnType.Number())]) with self.assertRaisesRegex( RuntimeError, "Unsupported: fetch column with tab_parameter"): clean_value(ParamDType.Column(tab_parameter="tab-2"), "A", input_shape)
def test_clean_column_no_input_is_empty(self): self.assertEqual( clean_value(ParamDType.Column(), "A", TableMetadata()), "")