def test_param_schema_explicit(self): spec = ModuleSpec( id_name="x", name="x", category="Clean", parameters=[{ "id_name": "whee", "type": "custom" }], param_schema={ "id_name": { "type": "dict", "properties": { "x": { "type": "integer" }, "y": { "type": "string", "default": "X" }, }, } }, ) self.assertEqual( spec.get_param_schema(), ParamDType.Dict({ "id_name": ParamDType.Dict({ "x": ParamDType.Integer(), "y": ParamDType.String(default="X") }) }), )
def test_clean_normal_dict(self): context = self._render_context() schema = ParamDType.Dict( {"str": ParamDType.String(), "int": ParamDType.Integer()} ) value = {"str": "foo", "int": 3} expected = dict(value) # no-op result = clean_value(schema, value, context) self.assertEqual(result, expected)
def test_clean_normal_dict(self): input_shape = TableMetadata(3, [Column("A", ColumnType.Number())]) schema = ParamDType.Dict({ "str": ParamDType.String(), "int": ParamDType.Integer() }) value = {"str": "foo", "int": 3} expected = dict(value) # no-op result = clean_value(schema, value, input_shape) self.assertEqual(result, expected)
def test_param_schema_implicit(self): spec = ModuleSpec( id_name="googlesheets", name="x", category="Clean", parameters=[ { "id_name": "foo", "type": "string", "default": "X" }, { "id_name": "bar", "type": "secret", "secret_logic": { "provider": "oauth2", "service": "google" }, }, { "id_name": "baz", "type": "menu", "options": [ { "value": "a", "label": "A" }, "separator", { "value": "c", "label": "C" }, ], "default": "c", }, ], ) self.assertEqual( spec.get_param_schema(), ParamDType.Dict({ "foo": ParamDType.String(default="X"), # secret is not in param_schema "baz": ParamDType.Enum(choices=frozenset({"a", "c"}), default="c"), }), )
def get_param_schema(self) -> ParamDType.Dict: if self.param_schema is not None: # Module author wrote a schema in the YAML, to define storage of 'custom' parameters json_schema = self.param_schema return ParamDType.parse({ "type": "dict", "properties": json_schema }) else: # Usual case: infer schema from module parameter types # Use of dict here means schema is not sensitive to parameter ordering, which is good return ParamDType.Dict( dict((f.id_name, f.dtype) for f in self.param_fields if f.dtype is not None))
def test_list_prompting_error_concatenate_same_type(self): context = self._render_context( input_table=arrow_table({"A": ["1"], "B": ["2"]}) ) schema = ParamDType.List( inner_dtype=ParamDType.Column(column_types=frozenset({"number"})) ) with self.assertRaises(PromptingError) as cm: clean_value(schema, ["A", "B"], context) self.assertEqual( cm.exception.errors, [PromptingError.WrongColumnType(["A", "B"], "text", frozenset({"number"}))], )
def test_clean_multicolumn_sort_in_table_order(self): input_shape = TableMetadata(3, [ Column("B", ColumnType.Number()), Column("A", ColumnType.Number()) ]) result = clean_value(ParamDType.Multicolumn(), ["A", "B"], input_shape) self.assertEqual(result, ["B", "A"])
def test_clean_condition_timestamp_wrong_value(self): context = self._render_context( input_table=arrow_table( {"A": pa.array([datetime.now()], pa.timestamp("ns"))} ) ) with self.assertRaises(PromptingError) as cm: clean_value( ParamDType.Condition(), { "operation": "timestamp_is_greater_than", "column": "A", "value": "Yesterday", "isCaseSensitive": False, "isRegex": False, }, context, ) self.assertEqual( cm.exception.errors, [ PromptingError.CannotCoerceValueToTimestamp("Yesterday"), ], )
def test_clean_condition_and_or_simplify(self): context = self._render_context(input_table=arrow_table({"A": [1]})) self.assertEqual( clean_value( ParamDType.Condition(), { "operation": "and", "conditions": [ { "operation": "or", "conditions": [ { "operation": "cell_is_blank", "column": "A", "value": "", "isCaseSensitive": False, "isRegex": False, }, ], }, ], }, context, ), { "operation": "cell_is_blank", "column": "A", }, )
def test_clean_float_with_int_value(self): # ParamDType.Float can have `int` values (because values come from # json.parse(), which only gives Numbers so can give "3" instead of # "3.0". We want to pass that as `float` in the `params` dict. result = clean_value(ParamDType.Float(), 3, None) self.assertEqual(result, 3.0) self.assertIsInstance(result, float)
def test_list_prompting_error_concatenate_different_type_to_text(self): context = self._render_context( input_table=arrow_table( {"A": [1], "B": pa.array([datetime.now()], pa.timestamp("ns"))} ) ) schema = ParamDType.List( inner_dtype=ParamDType.Column(column_types=frozenset({"text"})) ) with self.assertRaises(PromptingError) as cm: clean_value(schema, ["A", "B"], context) self.assertEqual( cm.exception.errors, [PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"}))], )
def test_clean_column_happy_path(self): input_shape = TableMetadata(3, [Column("A", ColumnType.Number())]) self.assertEqual( clean_value(ParamDType.Column(column_types=frozenset({"number"})), "A", input_shape), "A", )
def test_clean_condition_not(self): context = self._render_context(input_table=arrow_table({"A": ["a"]})) self.assertEqual( clean_value( ParamDType.Condition(), { "operation": "text_is_not", "column": "A", "value": "a", "isCaseSensitive": False, "isRegex": False, }, context, ), { "operation": "not", "condition": { "operation": "text_is", "column": "A", "value": "a", "isCaseSensitive": False, "isRegex": False, }, }, )
def test_clean_multicolumn_sort_in_table_order(self): context = self._render_context(input_table=arrow_table({ "B": [1], "A": [2] })) result = clean_value(ParamDType.Multicolumn(), ["A", "B"], context) self.assertEqual(result, ["B", "A"])
def test_clean_multichartseries_non_number_is_prompting_error(self): context = self._render_context(input_table=arrow_table({ "A": ["a"], "B": pa.array([datetime.now()], pa.timestamp("ns")) })) value = [ { "column": "A", "color": "#aaaaaa" }, { "column": "B", "color": "#cccccc" }, ] with self.assertRaises(PromptingError) as cm: clean_value(ParamDType.Multichartseries(), value, context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "datetime", frozenset({"number"})), ], )
def test_clean_multichartseries_missing_is_removed(self): context = self._render_context(input_table=arrow_table({"A": [1], "B": [1]})) value = [ {"column": "A", "color": "#aaaaaa"}, {"column": "C", "color": "#cccccc"}, ] result = clean_value(ParamDType.Multichartseries(), value, context) self.assertEqual(result, [{"column": "A", "color": "#aaaaaa"}])
def test_clean_multicolumn_missing_is_removed(self): context = self._render_context(input_table=arrow_table({ "A": [1], "B": [1] })) result = clean_value(ParamDType.Multicolumn(), ["A", "X", "B"], context) self.assertEqual(result, ["A", "B"])
def test_clean_multicolumn_missing_is_removed(self): input_shape = TableMetadata(3, [ Column("A", ColumnType.Number()), Column("B", ColumnType.Number()) ]) result = clean_value(ParamDType.Multicolumn(), ["A", "X", "B"], input_shape) self.assertEqual(result, ["A", "B"])
def test_clean_multicolumn_from_other_tab_that_does_not_exist(self): # The other tab would not exist if the user selected and then deleted # it. schema = ParamDType.Dict({ "tab": ParamDType.Tab(), "columns": ParamDType.Multicolumn(tab_parameter="tab"), }) params = {"tab": "tab-missing", "columns": ["A-from-tab-1"]} context = self._render_context( input_table=arrow_table({"A-from-tab-1": [1]}), tab_results={}, params=params, ) result = clean_value(schema, params, context) # result['tab'] is not what we're testing here self.assertEqual(result["columns"], [])
def test_clean_tab_missing_tab_selected_gives_none(self): """ If the user has selected a nonexistent tab, pretend tab is blank. JS sees nonexistent tab slugs. render() doesn't. """ context = self._render_context(tab_results={}) result = clean_value(ParamDType.Tab(), "tab-XXX", context) self.assertEqual(result, None)
def test_map_parse(self): dtype = ParamDType.parse( { "type": "map", "value_dtype": { "type": "dict", # test nesting "properties": {"foo": {"type": "string"}}, }, } ) self.assertEqual( repr(dtype), repr( ParamDType.Map( value_dtype=ParamDType.Dict(properties={"foo": ParamDType.String()}) ) ), )
def test_clean_column_prompting_error_convert_to_number(self): context = self._render_context(input_table=arrow_table({"A": ["1"]})) with self.assertRaises(PromptingError) as cm: clean_value( ParamDType.Column(column_types=frozenset({"number"})), "A", context ) self.assertEqual( cm.exception.errors, [PromptingError.WrongColumnType(["A"], "text", frozenset({"number"}))], )
def test_clean_file_no_uploaded_file(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.steps.create(module_id_name="uploadfile", order=0, slug="step-1") context = self._render_context(step_id=step.id) result = clean_value(ParamDType.File(), str(uuid.uuid4()), context) self.assertIsNone(result) # Assert that if a temporary file was created to house the download, it # no longer exists. self.assertListEqual(list(self.basedir.iterdir()), [])
def test_clean_multicolumn_from_other_tab(self): tab2 = Tab("tab-2", "Tab 2") tab2_output_table = arrow_table({"A-from-tab-2": [1, 2]}) schema = ParamDType.Dict({ "tab": ParamDType.Tab(), "columns": ParamDType.Multicolumn(tab_parameter="tab"), }) params = {"tab": "tab-2", "columns": ["A-from-tab-1", "A-from-tab-2"]} context = self._render_context( input_table=arrow_table({"A-from-tab-1": [1]}), tab_results={tab2: RenderResult(tab2_output_table)}, params=params, ) result = clean_value(schema, params, context) # result['tab'] is not what we're testing here self.assertEqual(result["columns"], ["A-from-tab-2"])
def test_dict_prompting_error(self): context = self._render_context( input_table=arrow_table({"A": ["a"], "B": ["b"]}) ) schema = ParamDType.Dict( { "col1": ParamDType.Column(column_types=frozenset({"number"})), "col2": ParamDType.Column(column_types=frozenset({"timestamp"})), } ) with self.assertRaises(PromptingError) as cm: clean_value(schema, {"col1": "A", "col2": "B"}, context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "text", frozenset({"timestamp"})), ], )
def test_clean_condition_empty_column_is_none(self): context = self._render_context(input_table=arrow_table({"A": [1]})) self.assertEqual( clean_value( ParamDType.Condition(), { "operation": "text_is", "column": "", "value": "", "isCaseSensitive": False, "isRegex": False, }, context, ), None, ) # And test it in the context of a broader and/or self.assertEqual( clean_value( ParamDType.Condition(), { "operation": "and", "conditions": [ { "operation": "or", "conditions": [ { "operation": "text_is", "column": "", "value": "", "isCaseSensitive": False, "isRegex": False, } ], } ], }, context, ), None, )
def test_clean_column_prompting_error_convert_to_number(self): input_shape = TableMetadata(3, [Column("A", ColumnType.Text())]) with self.assertRaises(PromptingError) as cm: clean_value(ParamDType.Column(column_types=frozenset({"number"})), "A", input_shape) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})) ], )
def test_clean_condition_empty_and_and_or_are_none(self): context = self._render_context(input_table=arrow_table({"A": [1]})) self.assertEqual( clean_value( ParamDType.Condition(), { "operation": "and", "conditions": [{"operation": "or", "conditions": []}], }, context, ), None, )
def dtype(self) -> Optional[ParamDType]: return ParamDType.Option( ParamDType.Dict({ "id": ParamDType.String(), "name": ParamDType.String(), "url": ParamDType.String(), "mimeType": ParamDType.String(), }))
def test_list_dtype(self): # Check that ParamSpec's with List type produce correct nested DTypes param_spec = ParamSpec.from_dict( dict( id_name="p", type="list", child_parameters=[ { "id_name": "intparam", "type": "integer", "name": "my number" }, { "id_name": "colparam", "type": "column", "name": "my column" }, ], )) self.assertEqual( param_spec, ParamSpec.List( id_name="p", child_parameters=[ ParamSpec.Integer(id_name="intparam", name="my number"), ParamSpec.Column(id_name="colparam", name="my column"), ], ), ) dtype = param_spec.dtype expected_dtype = DT.List( DT.Dict({ "intparam": DT.Integer(), "colparam": DT.Column() })) # effectively do a deep compare with repr self.assertEqual(repr(dtype), repr(expected_dtype))