def test_param_schema_explicit(): spec = load_spec( dict( id_name="x", name="x", category="Clean", parameters=[{"id_name": "whee", "type": "custom"}], param_schema={ "id_name": { "type": "dict", "properties": { "x": {"type": "integer"}, "y": {"type": "string", "default": "X"}, }, } }, ) ) assert spec.param_schema == ParamSchema.Dict( { "id_name": ParamSchema.Dict( {"x": ParamSchema.Integer(), "y": ParamSchema.String(default="X")} ) } )
def test_render_uploaded_files(self): def render_arrow_v1(table, params, *, uploaded_files, **kwargs): self.assertEqual(params["file"], "406b5e37-f217-4e87-b6b2-eede3bec6492") uploaded_file = uploaded_files[params["file"]] self.assertEqual(uploaded_file.name, "x.data") self.assertEqual(uploaded_file.uploaded_at, datetime(2021, 4, 21, 12, 4, 5)) return ArrowRenderResult(make_table()) param_schema = ParamSchema.Dict({"file": ParamSchema.File()}) with ModuleTestEnv(param_schema=param_schema, render_arrow_v1=render_arrow_v1) as env: temp_path = env.basedir / "406b5e37-f217-4e87-b6b2-eede3bec6492_x.data" temp_path.write_bytes(b"hello, world!") env.call_render( make_table(), params={"file": "406b5e37-f217-4e87-b6b2-eede3bec6492"}, uploaded_files={ "406b5e37-f217-4e87-b6b2-eede3bec6492": UploadedFile( name="x.data", filename=temp_path.name, uploaded_at=datetime(2021, 4, 21, 12, 4, 5), ), }, )
def test_render_using_tab_output(self): def render(table, params): self.assertEqual(params["tabparam"].name, "Tab 1") self.assertEqual( params["tabparam"].columns, { "X": ptypes.RenderColumn("X", "number", "{:,d}"), "Y": ptypes.RenderColumn("Y", "text", None), }, ) assert_frame_equal(params["tabparam"].dataframe, pd.DataFrame({ "X": [1], "Y": ["y"] })) param_schema = ParamSchema.Dict({"tabparam": ParamSchema.Tab()}) with ModuleTestEnv(param_schema=param_schema, render=render) as env: with arrow_table_context( make_column("X", [1], format="{:,d}"), make_column("Y", ["y"]), dir=env.basedir, ) as (path, _): env.call_render( make_table(), params={"tabparam": "tab-1"}, tab_outputs={ "tab-1": TabOutput(tab_name="Tab 1", table_filename=path.name) }, )
def test_render_empty_file_param(self): def render(arrow_table, params, output_path, *args, **kwargs): self.assertIsNone(params["file"]) param_schema = ParamSchema.Dict({"file": ParamSchema.File()}) with ModuleTestEnv(param_schema=param_schema, render=render) as env: env.call_render(make_table(), {"file": None})
def test_param_schema_implicit(): spec = load_spec( dict( id_name="googlesheets", name="x", category="Clean", parameters=[ {"id_name": "foo", "type": "string", "default": "X"}, { "id_name": "bar", "type": "secret", "secret_logic": {"provider": "oauth2", "service": "google"}, }, { "id_name": "baz", "type": "menu", "options": [ {"value": "a", "label": "A"}, "separator", {"value": "c", "label": "C"}, ], "default": "c", }, ], ) ) assert spec.param_schema == ParamSchema.Dict( { "foo": ParamSchema.String(default="X"), # secret is not in param_schema "baz": ParamSchema.Enum(choices=frozenset({"a", "c"}), default="c"), } )
def test_dict_prompting_error_concatenate_different_types(self): schema = ParamSchema.Dict({ "x": ParamSchema.Column(column_types=frozenset({"number"})), "y": ParamSchema.Column(column_types=frozenset({"number"})), }) with self.assertRaises(PromptingError) as cm: self._call_clean_value( schema, { "x": "A", "y": "B" }, input_table_columns=[TEXT("A"), TIMESTAMP("B")], ) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "timestamp", frozenset({"number"})), ], )
def test_render_tab_outputs(self): def render_arrow_v1(table, params, *, tab_outputs, **kwargs): self.assertEqual(params["tab"], "tab-x") self.assertEqual(tab_outputs["tab-x"].tab_name, "Tab X") assert_arrow_table_equals( tab_outputs["tab-x"].table, make_table( make_column("X", [1], format="{:,d}"), make_column("Y", ["y"]), ), ) return ArrowRenderResult(make_table()) param_schema = ParamSchema.Dict({"tab": ParamSchema.Tab()}) with ModuleTestEnv(param_schema=param_schema, render_arrow_v1=render_arrow_v1) as env: with arrow_table_context( make_column("X", [1], format="{:,d}"), make_column("Y", ["y"]), dir=env.basedir, ) as (path, _): env.call_render( make_table(), params={"tab": "tab-x"}, tab_outputs={ "tab-x": TabOutput(tab_name="Tab X", table_filename=path.name) }, )
def test_clean_file_safe_filename(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.steps.create(module_id_name="uploadfile", order=0, slug="step-1") key = f"wf-${workflow.id}/wfm-${step.id}/6e00511a-8ac4-4b72-9acc-9d069992b5cf" s3.put_bytes(s3.UserFilesBucket, key, b"1234") model = UploadedFileModel.objects.create( step=step, name="/etc/passwd.$/etc/passwd", size=4, uuid="6e00511a-8ac4-4b72-9acc-9d069992b5cf", key=key, ) with ExitStack() as inner_stack: result = self._call_prep_params( ParamSchema.Dict({"file": ParamSchema.File()}), {"file": "6e00511a-8ac4-4b72-9acc-9d069992b5cf"}, step_id=step.id, exit_stack=inner_stack, ) self.assertEqual( result.uploaded_files["6e00511a-8ac4-4b72-9acc-9d069992b5cf"], UploadedFile( "/etc/passwd.$/etc/passwd", "6e00511a-8ac4-4b72-9acc-9d069992b5cf_-etc-passwd.--etc-passwd", model.created_at, ), )
def test_dict_recurse(): assert parse({ "type": "dict", "properties": { "x": { "type": "string" } } }) == ParamSchema.Dict(properties={"x": ParamSchema.String()})
def test_clean_normal_dict(self): schema = ParamSchema.Dict({ "str": ParamSchema.String(), "int": ParamSchema.Integer() }) value = {"str": "foo", "int": 3} expected = dict(value) # no-op result = self._call_clean_value(schema, value) self.assertEqual(result, expected)
def test_clean_normal_dict(self): input_shape = TableMetadata(3, [Column("A", ColumnType.Number())]) schema = ParamSchema.Dict({ "str": ParamSchema.String(), "int": ParamSchema.Integer() }) value = {"str": "foo", "int": 3} expected = dict(value) # no-op result = clean_value(schema, value, input_shape) self.assertEqual(result, expected)
def test_render_with_no_kwargs(self): def render(table, params): return table * params["n"] param_schema = ParamSchema.Dict({"n": ParamSchema.Float()}) with ModuleTestEnv(param_schema=param_schema, render=render) as env: outcome = env.call_render(make_table(make_column("A", [1])), {"n": 2}) assert_arrow_table_equals(outcome.read_table(), make_table(make_column("A", [2])))
def test_clean_tab_omit_unused_tabs_from_tab_outputs(self): result = self._call_prep_params( ParamSchema.Dict({"x": ParamSchema.Tab()}), {"x": "tab-1"}, tab_results={ Tab("tab-1", "Tab 1"): StepResult(Path("tab-1.arrow"), [TEXT("A")]), Tab("tab-2", "Tab 2"): StepResult(Path("tab-2.arrow"), [TEXT("A")]), Tab("tab-3", "Tab 3"): StepResult(Path("tab-3.arrow"), [TEXT("A")]), }, ) self.assertEqual(result.tab_outputs, {"tab-1": TabOutput("Tab 1", "tab-1.arrow")})
def test_clean_file_happy_path(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.steps.create(module_id_name="uploadfile", order=0, slug="step-1") key = f"wf-${workflow.id}/wfm-${step.id}/6e00511a-8ac4-4b72-9acc-9d069992b5cf" s3.put_bytes(s3.UserFilesBucket, key, b"1234") model = UploadedFileModel.objects.create( step=step, name="x.csv.gz", size=4, uuid="6e00511a-8ac4-4b72-9acc-9d069992b5cf", key=key, ) with ExitStack() as inner_stack: result = self._call_prep_params( ParamSchema.Dict({"file": ParamSchema.File()}), {"file": "6e00511a-8ac4-4b72-9acc-9d069992b5cf"}, step_id=step.id, exit_stack=inner_stack, ) self.assertEqual( result, PrepParamsResult( {"file": "6e00511a-8ac4-4b72-9acc-9d069992b5cf"}, tab_outputs={}, uploaded_files={ "6e00511a-8ac4-4b72-9acc-9d069992b5cf": UploadedFile( "x.csv.gz", "6e00511a-8ac4-4b72-9acc-9d069992b5cf_x.csv.gz", model.created_at, ) }, ), ) self.assertEqual( (self.basedir / "6e00511a-8ac4-4b72-9acc-9d069992b5cf_x.csv.gz").read_bytes(), b"1234", ) # Assert that once `exit_stack` goes out of scope, file is deleted self.assertFalse( (self.basedir / "6e00511a-8ac4-4b72-9acc-9d069992b5cf_x.csv.gz").exists())
def test_render_file_param(self): def render(arrow_table, params, output_path, *args, **kwargs): self.assertEqual(params["file"].read_bytes(), b"hi") param_schema = ParamSchema.Dict({"file": ParamSchema.File()}) with ModuleTestEnv(param_schema=param_schema, render=render) as env: filename = "839526fa-1adb-4eec-9d29-f5b4d2fbba30_x.tar.gz" (env.basedir / filename).write_bytes(b"hi") env.call_render( make_table(), {"file": "839526fa-1adb-4eec-9d29-f5b4d2fbba30"}, uploaded_files={ "839526fa-1adb-4eec-9d29-f5b4d2fbba30": UploadedFile( "x.tar.gz", filename, datetime.now() ) }, )
def test_clean_tab_happy_path(self): result = self._call_prep_params( ParamSchema.Dict({"x": ParamSchema.Tab()}), {"x": "tab-1"}, tab_results={ Tab("tab-1", "Tab 1"): StepResult(Path("tab-1.arrow"), [TEXT("A")]) }, ) self.assertEqual( result, PrepParamsResult( {"x": "tab-1"}, tab_outputs={"tab-1": TabOutput("Tab 1", "tab-1.arrow")}, uploaded_files={}, ), )
def test_clean_multicolumn_from_other_tab(self): schema = ParamSchema.Dict({ "tab": ParamSchema.Tab(), "columns": ParamSchema.Multicolumn(tab_parameter="tab"), }) params = {"tab": "tab-2", "columns": ["A-from-tab-1", "A-from-tab-2"]} result = self._call_prep_params( schema, params, input_table_columns=[NUMBER("A-from-tab-1")], tab_results={ Tab("tab-2", "Tab 2"): StepResult(Path("tab-2.arrow"), [NUMBER("A-from-tab-2")]) }, ) self.assertEqual(result.params["columns"], ["A-from-tab-2"])
def test_clean_multicolumn_from_other_tab_that_does_not_exist(self): # The other tab would not exist if the user selected and then deleted # it. result = self._call_prep_params( schema=ParamSchema.Dict({ "tab": ParamSchema.Tab(), "columns": ParamSchema.Multicolumn(tab_parameter="tab"), }), params={ "tab": "tab-missing", "columns": ["A-from-tab-1"] }, input_table_columns=[NUMBER("A-from-tab-1")], tab_results={}, ) # result.params['tab'] is not what we're testing here self.assertEqual(result.params["columns"], [])
def test_clean_tabs_happy_path(self): self.assertEqual( self._call_prep_params( ParamSchema.Dict({"x": ParamSchema.Multitab()}), {"x": ["tab-2", "tab-3"]}, tab_results={ Tab("tab-2", "Tab 2"): StepResult(Path("tab-2.arrow"), [NUMBER("B")]), Tab("tab-3", "Tab 3"): StepResult(Path("tab-3.arrow"), [NUMBER("C")]), }, ), PrepParamsResult( {"x": ["tab-2", "tab-3"]}, { "tab-2": TabOutput("Tab 2", "tab-2.arrow"), "tab-3": TabOutput("Tab 3", "tab-3.arrow"), }, uploaded_files={}, ), )
def test_param_schema_includes_empty_tuples(): # Bug on 2021-04-21: empty NamedTuple ParamSchema classes evaluate to # False; but they should still be included in the param_schema. spec = load_spec( dict( id_name="x", name="x", category="Clean", parameters=[ dict(id_name="timezone", name="timezone", type="timezone"), dict(id_name="tab", name="tab", type="tab"), dict(id_name="condition", type="condition"), ], ) ) assert spec.param_schema == ParamSchema.Dict( { "timezone": ParamSchema.Timezone(), "tab": ParamSchema.Tab(), "condition": ParamSchema.Condition(), } )
def test_dict_prompting_error(self): input_shape = TableMetadata( 3, [Column("A", ColumnType.Text()), Column("B", ColumnType.Text())]) schema = ParamSchema.Dict({ "col1": ParamSchema.Column(column_types=frozenset({"number"})), "col2": ParamSchema.Column(column_types=frozenset({"timestamp"})), }) with self.assertRaises(PromptingError) as cm: clean_value(schema, {"col1": "A", "col2": "B"}, input_shape) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "text", frozenset({"timestamp"})), ], )
def _(self, schema: ParamSchema.Multichartseries, value: List[Dict[str, str]]) -> List[Dict[str, str]]: # Recurse to clean_value(ParamSchema.Column) to clear missing columns inner_schema = ParamSchema.Dict({ "color": ParamSchema.String(default="#000000"), "column": ParamSchema.Column(column_types=frozenset(["number"])), }) ret = [] error_agg = PromptingErrorAggregator() for v in value: try: clean_v = self.clean_value(inner_schema, v) if clean_v["column"]: # it's a valid column ret.append(clean_v) except PromptingError as err: error_agg.extend(err.errors) error_agg.raise_if_nonempty() return ret
def test_clean_tabs_preserve_ordering(self): # "x" gives wrongly-ordered tabs; renderprep should reorder them. result = self._call_prep_params( ParamSchema.Dict({"x": ParamSchema.Multitab()}), {"x": ["tab-2", "tab-3"]}, tab_results={ Tab("tab-3", "Tab 3"): StepResult(Path("tab-3.arrow"), [NUMBER("C")]), Tab("tab-2", "Tab 2"): StepResult(Path("tab-2.arrow"), [NUMBER("B")]), }, ) self.assertEqual( result, PrepParamsResult( {"x": ["tab-3", "tab-2"]}, { "tab-3": TabOutput("Tab 3", "tab-3.arrow"), "tab-2": TabOutput("Tab 2", "tab-2.arrow"), }, uploaded_files={}, ), )
def test_validate_invalid_child(self): with pytest.raises(ValueError, match="not a string"): S.Dict({"foo": S.String()}).validate({"foo": 3})
def __init__(self, param_schema: ParamSchema = ParamSchema.Dict({}), **defs): self.defs = {"ModuleSpec": MockModuleSpec(param_schema), **defs}
def test_default(self): assert S.Dict( {"foo": S.String(default="FOO"), "bar": S.Integer(default=3)} ).default == {"foo": "FOO", "bar": 3}
def test_validate_ok(self): S.Dict({"foo": S.String(default="FOO"), "bar": S.Integer(default=3)}).validate( {"foo": "FOO", "bar": 3} )
class MockModuleVersion(NamedTuple): id_name: str = "mod" source_version_hash: str = "abc123" param_schema: ParamSchema.Dict = ParamSchema.Dict({})
def test_validate_not_dict(self): with pytest.raises(ValueError, match="not a dict"): S.Dict({"foo": S.String()}).validate([])
def test_validate_extra_key(self): with pytest.raises(ValueError, match="wrong keys"): S.Dict({"foo": S.String()}).validate({"foo": "x", "bar": "y"})