def test_quick_fixes_convert_to_text(self): err = PromptingError([ PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"})) ]) result = err.as_render_errors() self.assertEqual( result, [ RenderError( I18nMessage( "py.renderer.execute.types.PromptingError.WrongColumnType.as_error_message.shouldBeText", { "columns": 2, "0": "A", "1": "B" }, None, ), [ QuickFix( I18nMessage( "py.renderer.execute.types.PromptingError.WrongColumnType.as_quick_fixes.shouldBeText", {}, None, ), QuickFixAction.PrependStep( "converttotext", {"colnames": ["A", "B"]}), ) ], ) ], )
def test_clean_multichartseries_non_number_is_prompting_error(self): with self.assertRaises(PromptingError) as cm: self._call_clean_value( ParamSchema.Multichartseries(), [ { "column": "A", "color": "#aaaaaa" }, { "column": "B", "color": "#cccccc" }, ], input_table_columns=[TEXT("A"), TIMESTAMP("B")], ) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "timestamp", frozenset({"number"})), ], )
def _(schema: ParamSchema.Column, value: str, input_metadata: TableMetadata) -> str: if schema.tab_parameter: raise RuntimeError("Unsupported: fetch column with tab_parameter") if not input_metadata.columns: return "" valid_columns = {c.name: c for c in input_metadata.columns} if value not in valid_columns: return "" # Null column column = valid_columns[value] if ( schema.column_types and _column_type_name(column.type) not in schema.column_types ): if "text" in schema.column_types: found_type = None else: found_type = _column_type_name(column.type) raise PromptingError( [PromptingError.WrongColumnType([value], found_type, schema.column_types)] ) return value
def test_dict_prompting_error_concatenate_different_types(self): schema = ParamSchema.Dict({ "x": ParamSchema.Column(column_types=frozenset({"number"})), "y": ParamSchema.Column(column_types=frozenset({"number"})), }) with self.assertRaises(PromptingError) as cm: self._call_clean_value( schema, { "x": "A", "y": "B" }, input_table_columns=[TEXT("A"), TIMESTAMP("B")], ) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "timestamp", frozenset({"number"})), ], )
def test_quick_fixes_no_conversions_yet(self): # Let's see how our users get stuck and *then* decide whether to build # other, more esoteric converters. [2021-05-03, adamhooper] *I* would # love a UNIX timestamp <=> integer converter; but would other users be # too confused if a quick-fix suggested to add one in the wrong place? err = PromptingError([ PromptingError.WrongColumnType(["A"], "timestamp", frozenset({"number"})) ]) result = err.as_render_errors() self.assertEqual( result, [ RenderError( I18nMessage( "py.renderer.execute.types.PromptingError.WrongColumnType.general.message.without_convert_buttons", { "columns": 1, "0": "A", "found_type": "timestamp", "best_wanted_type": "number", }, None, ), [], ), ], )
def test_quick_fixes(self): err = PromptingError([ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B", "C"], "text", frozenset({"number"})), ]) result = err.as_render_errors() self.assertEqual( result, [ RenderError( I18nMessage( "py.renderer.execute.types.PromptingError.WrongColumnType.general.message.before_convert_buttons", { "columns": 1, "0": "A", "found_type": "text", }, None, ), [ QuickFix( I18nMessage( "py.renderer.execute.types.PromptingError.WrongColumnType.general.quick_fix", {"wanted_type": "number"}, None, ), QuickFixAction.PrependStep("converttexttonumber", {"colnames": ["A"]}), ) ], ), RenderError( I18nMessage( "py.renderer.execute.types.PromptingError.WrongColumnType.general.message.before_convert_buttons", { "columns": 2, "0": "B", "1": "C", "found_type": "text", }, None, ), [ QuickFix( I18nMessage( "py.renderer.execute.types.PromptingError.WrongColumnType.general.quick_fix", {"wanted_type": "number"}, None, ), QuickFixAction.PrependStep( "converttexttonumber", {"colnames": ["B", "C"]}), ) ], ), ], )
def test_quick_fixes_multiple_conversions(self): # For example, "linechart" X axis may be temporal or number err = PromptingError([ PromptingError.WrongColumnType( ["A"], "text", frozenset({"number", "date", "timestamp"})) ]) result = err.as_render_errors() self.assertEqual( result, [ RenderError( I18nMessage( "py.renderer.execute.types.PromptingError.WrongColumnType.general.message.before_convert_buttons", { "columns": 1, "0": "A", "found_type": "text", }, None, ), [ QuickFix( I18nMessage( "py.renderer.execute.types.PromptingError.WrongColumnType.general.quick_fix", {"wanted_type": "date"}, None, ), QuickFixAction.PrependStep("converttexttodate", {"colnames": ["A"]}), ), QuickFix( I18nMessage( "py.renderer.execute.types.PromptingError.WrongColumnType.general.quick_fix", {"wanted_type": "number"}, None, ), QuickFixAction.PrependStep("converttexttonumber", {"colnames": ["A"]}), ), QuickFix( I18nMessage( "py.renderer.execute.types.PromptingError.WrongColumnType.general.quick_fix", {"wanted_type": "timestamp"}, None, ), QuickFixAction.PrependStep("convert-date", {"colnames": ["A"]}), ), ], ), ], )
def _( schema: ParamSchema.Multicolumn, value: List[str], input_metadata: TableMetadata ) -> str: if schema.tab_parameter: raise RuntimeError("Unsupported: fetch multicolumn with tab_parameter") error_agg = PromptingErrorAggregator() requested_colnames = set(value) valid_colnames = [] # ignore colnames not in valid_columns # iterate in table order for column in input_metadata.columns: if column.name not in requested_colnames: continue if ( schema.column_types and _column_type_name(column.type) not in schema.column_types ): if "text" in schema.column_types: found_type = None else: found_type = _column_type_name(column.type) error_agg.add( PromptingError.WrongColumnType( [column.name], found_type, schema.column_types ) ) else: valid_colnames.append(column.name) error_agg.raise_if_nonempty() return valid_colnames
def _(self, schema: ParamSchema.Multicolumn, value: List[str]) -> str: valid_columns = self.output_columns_for_tab_parameter( schema.tab_parameter) error_agg = PromptingErrorAggregator() requested_colnames = set(value) valid_colnames = [] # ignore colnames not in valid_columns # iterate in table order for colname, column in valid_columns.items(): if colname not in requested_colnames: continue if (schema.column_types and _column_type_name(column.type) not in schema.column_types): if "text" in schema.column_types: found_type = None else: found_type = _column_type_name(column.type) error_agg.add( PromptingError.WrongColumnType([column.name], found_type, schema.column_types)) else: valid_colnames.append(column.name) error_agg.raise_if_nonempty() return valid_colnames
def _(self, schema: ParamSchema.Column, value: str) -> str: valid_columns = self.output_columns_for_tab_parameter( schema.tab_parameter) if value not in valid_columns: return "" # Null column column = valid_columns[value] if (schema.column_types and _column_type_name(column.type) not in schema.column_types): if "text" in schema.column_types: found_type = None else: found_type = _column_type_name(column.type) raise PromptingError([ PromptingError.WrongColumnType([value], found_type, schema.column_types) ]) return value
def test_clean_column_prompting_error_convert_to_text(self): with self.assertRaises(PromptingError) as cm: self._call_clean_value( ParamSchema.Column(column_types=frozenset({"text"})), "A", input_table_columns=[NUMBER("A")], ) self.assertEqual( cm.exception.errors, [PromptingError.WrongColumnType(["A"], None, frozenset({"text"}))], )
def test_clean_condition_timestamp_wrong_column_type_and_wrong_value(self): with self.assertRaises(PromptingError) as cm: self._call_clean_value( ParamSchema.Condition(), { "operation": "timestamp_is_greater_than", "column": "A", "value": "Yesterday", "isCaseSensitive": False, "isRegex": False, }, input_table_columns=[NUMBER("A")], ) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType( ["A"], "number", frozenset({"date", "timestamp"})), PromptingError.CannotCoerceValueToTimestamp("Yesterday"), ], )
def test_clean_condition_number_wrong_column_type_and_wrong_value(self): with self.assertRaises(PromptingError) as cm: self._call_clean_value( ParamSchema.Condition(), { "operation": "number_is", "column": "A", "value": "bad", "isCaseSensitive": False, "isRegex": False, }, input_table_columns=[TEXT("A")], ) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.CannotCoerceValueToNumber("bad"), ], )
def test_clean_column_prompting_error_convert_to_number(self): input_shape = TableMetadata(3, [Column("A", ColumnType.Text())]) with self.assertRaises(PromptingError) as cm: clean_value(ParamSchema.Column(column_types=frozenset({"number"})), "A", input_shape) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})) ], )
def test_dict_prompting_error(self): input_shape = TableMetadata( 3, [Column("A", ColumnType.Text()), Column("B", ColumnType.Text())]) schema = ParamSchema.Dict({ "col1": ParamSchema.Column(column_types=frozenset({"number"})), "col2": ParamSchema.Column(column_types=frozenset({"timestamp"})), }) with self.assertRaises(PromptingError) as cm: clean_value(schema, {"col1": "A", "col2": "B"}, input_shape) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "text", frozenset({"timestamp"})), ], )
def test_list_prompting_error_concatenate_same_type(self): schema = ParamSchema.List(inner_schema=ParamSchema.Column( column_types=frozenset({"number"}))) with self.assertRaises(PromptingError) as cm: self._call_clean_value(schema, ["A", "B"], input_table_columns=[TEXT("A"), TEXT("B")]) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A", "B"], "text", frozenset({"number"})) ], )
def test_list_prompting_error_concatenate_different_type_to_text(self): schema = ParamSchema.List(inner_schema=ParamSchema.Column( column_types=frozenset({"text"}))) with self.assertRaises(PromptingError) as cm: self._call_clean_value( schema, ["A", "B"], input_table_columns=[NUMBER("A"), TIMESTAMP("B")]) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"})) ], )
def test_clean_column_prompting_error_convert_to_text(self): # TODO make this _automatic_ instead of quick-fix? # Consider Regex. We probably want to pass the module a text Series # _separately_ from the input DataFrame. That way Regex can output # a new Text column but preserve its input column's data type. # # ... but for now: prompt for a Quick Fix. input_shape = TableMetadata(3, [Column("A", ColumnType.Number())]) with self.assertRaises(PromptingError) as cm: clean_value(ParamSchema.Column(column_types=frozenset({"text"})), "A", input_shape) self.assertEqual( cm.exception.errors, [PromptingError.WrongColumnType(["A"], None, frozenset({"text"}))], )
def test_clean_condition_number_wrong_value(self): with self.assertRaises(PromptingError) as cm: self._call_clean_value( ParamSchema.Condition(), { "operation": "number_is", "column": "A", "value": "bad", "isCaseSensitive": False, "isRegex": False, }, input_table_columns=[NUMBER("A")], ) self.assertEqual(cm.exception.errors, [PromptingError.CannotCoerceValueToNumber("bad")])
def test_clean_multicolumn_prompting_error_convert_to_text(self): with self.assertRaises(PromptingError) as cm: schema = ParamSchema.Multicolumn(column_types=frozenset({"text"})) self._call_clean_value( schema, ["A", "B"], input_table_columns=[NUMBER("A"), TIMESTAMP("B"), TEXT("C")], ) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"})) ], )
def test_clean_condition_not_with_subclause_error(self): with self.assertRaises(PromptingError) as cm: self._call_clean_value( ParamSchema.Condition(), { "operation": "text_is", "column": "A", "value": "", "isCaseSensitive": False, "isRegex": False, }, input_table_columns=[NUMBER("A")], ) self.assertEqual( cm.exception.errors, [PromptingError.WrongColumnType(["A"], None, frozenset({"text"}))], )
def test_clean_condition_timestamp_wrong_value(self): with self.assertRaises(PromptingError) as cm: self._call_clean_value( ParamSchema.Condition(), { "operation": "timestamp_is_greater_than", "column": "A", "value": "Yesterday", "isCaseSensitive": False, "isRegex": False, }, input_table_columns=[TIMESTAMP("A")], ) self.assertEqual( cm.exception.errors, [ PromptingError.CannotCoerceValueToTimestamp("Yesterday"), ], )
def test_clean_multicolumn_prompting_error_convert_to_text(self): # TODO make this _automatic_ instead of quick-fix? # ... but for now: prompt for a Quick Fix. input_shape = TableMetadata( 3, [ Column("A", ColumnType.Number()), Column("B", ColumnType.Timestamp()), Column("C", ColumnType.Text()), ], ) with self.assertRaises(PromptingError) as cm: schema = ParamSchema.Multicolumn(column_types=frozenset({"text"})) clean_value(schema, "A,B", input_shape) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"})) ], )
def _clean_condition_recursively( value: Dict[str, Any], column_types: Dict[str, str] ) -> Tuple[Optional[Dict[str, Any]], List[PromptingError]]: if value["operation"] == "": return None, [] elif value["operation"] in {"and", "or"}: errors = [] conditions = [] for entry in value["conditions"]: clean_condition, clean_errors = _clean_condition_recursively( entry, column_types) errors.extend(clean_errors) if clean_condition is not None: conditions.append(clean_condition) if len(conditions) == 0: return None, errors elif len(conditions) == 1: return conditions[0], errors else: return { "operation": value["operation"], "conditions": conditions, }, errors elif value["operation"] in _InverseOperations: clean_condition, errors = _clean_condition_recursively( { **value, "operation": _InverseOperations[value["operation"]] }, column_types, ) if clean_condition is None: return None, errors else: return {"operation": "not", "condition": clean_condition}, errors else: clean_condition = None errors = [] if value["column"] not in column_types: # No valid column selected. # # It would be nice to warn on invalid column ... but [2020-11-16] # we don't have a way to do that, because the default params are # empty and we validate them. More-general problem of the # same flavor: https://www.pivotaltracker.com/story/show/174473146 pass else: column_type = column_types[value["column"]] if value["operation"].startswith("text"): if column_type != "text": errors.append( PromptingError.WrongColumnType([value["column"]], None, frozenset(["text"]))) else: clean_condition = value elif value["operation"].startswith("number"): if column_type != "number": errors.append( PromptingError.WrongColumnType([value["column"]], column_type, frozenset(["number"]))) try: number_value = float(value["value"]) except ValueError: errors.append( PromptingError.CannotCoerceValueToNumber( value["value"])) if not errors: clean_condition = { "operation": value["operation"], "column": value["column"], "value": number_value, } elif value["operation"].startswith("timestamp"): if column_type not in {"date", "timestamp"}: errors.append( PromptingError.WrongColumnType( [value["column"]], column_type, frozenset(["date", "timestamp"]), )) try: _validate_iso8601_string(value["value"]) except ValueError: errors.append( PromptingError.CannotCoerceValueToTimestamp( value["value"])) if not errors: clean_condition = { "operation": value["operation"], "column": value["column"], "value": value["value"], } else: assert value["operation"].startswith("cell") clean_condition = { "operation": value["operation"], "column": value["column"], } return clean_condition, errors