def test_migrate_params_impl_missing_output(self): lm = LoadedModule('x', '1', migrate_params_impl=lambda x: x) with self.assertRaises(ValueError): lm.migrate_params( ParamDTypeDict({'x': ParamDTypeString()}), {} # should have 'x' key )
def test_migrate_params_impl_extra_output(self): lm = LoadedModule('x', '1', ParamDType.Dict({}), migrate_params_impl=lambda x: x) with self.assertRaises(ValueError): lm.migrate_params({'x': 'should not be here'})
def test_migrate_params_impl_wrong_output_type(self): lm = LoadedModule('x', '1', migrate_params_impl=lambda x: x) with self.assertRaises(ValueError): lm.migrate_params( ParamDTypeDict({'x': ParamDTypeString()}), {'x': 2} # should be str )
def test_render_dynamic_default(self): lm = LoadedModule('int', '1', ParamDType.Dict({})) with self.assertLogs(): result = lm.render(ProcessResult(pd.DataFrame({'A': [1]})), {}, tab_name='x', fetch_result=None) self.assertEqual(result, ProcessResult(pd.DataFrame({'A': [1]})))
def test_render_dynamic_default(self): lm = LoadedModule('int', '1', True) with self.assertLogs(): result = lm.render(MockParams(), pd.DataFrame({'A': [1]}), fetch_result=None) self.assertEqual(result, ProcessResult(pd.DataFrame({'A': [1]})))
def test_migrate_params_impl_exception(self): def migrate_params(params): {}['a'] lm = LoadedModule('x', '1', migrate_params_impl=migrate_params) with self.assertRaisesRegex(ValueError, r'migrate_params\(\) raised KeyError'): lm.migrate_params([], {})
def test_migrate_params_impl_exception(self): def migrate_params(params): {}["a"] lm = LoadedModule( "x", "1", ParamDType.Dict({}), migrate_params_impl=migrate_params ) with self.assertRaisesRegex(ValueError, r"migrate_params\(\) raised KeyError"): lm.migrate_params({})
def test_migrate_params_impl_wrong_output_type(self): lm = LoadedModule( "x", "1", ParamDType.Dict({"x": ParamDType.String()}), migrate_params_impl=lambda x: x, ) with self.assertRaises(ValueError): # should be str lm.migrate_params({"x": 2})
def test_migrate_params_impl_missing_output(self): lm = LoadedModule( "x", "1", ParamDType.Dict({"x": ParamDType.String()}), migrate_params_impl=lambda x: x, ) with self.assertRaises(ValueError): # should have 'x' key lm.migrate_params({})
def test_render_dynamic_default(self): lm = LoadedModule("int", "1", ParamDType.Dict({})) with self.assertLogs(): result = lm.render( ProcessResult(pd.DataFrame({"A": [1]})), {}, tab_name="x", fetch_result=None, ) self.assertEqual(result, ProcessResult(pd.DataFrame({"A": [1]})))
def test_render_truncate_and_sanitize(self): calls = [] retval = ProcessResult(pd.DataFrame({'A': [1]})) retval.truncate_in_place_if_too_big = lambda: calls.append('truncate') retval.sanitize_in_place = lambda: calls.append('sanitize') lm = LoadedModule('int', '1', False, render_impl=lambda _a, _b: retval) with self.assertLogs(): lm.render(MockParams(), pd.DataFrame(), fetch_result=None) self.assertEqual(calls, ['truncate', 'sanitize'])
def test_migrate_params_impl(self): def migrate_params(params): return {"x": params["a"], "y": params["b"]} lm = LoadedModule( "x", "1", ParamDType.Dict({"x": ParamDType.Integer(), "y": ParamDType.Integer()}), migrate_params_impl=migrate_params, ) result = lm.migrate_params({"a": 1, "b": 2}) self.assertEqual(result, {"x": 1, "y": 2})
def test_render_truncate(self): calls = [] retval = ProcessResult(pd.DataFrame({"A": [1]})) retval.truncate_in_place_if_too_big = lambda: calls.append("truncate") lm = LoadedModule( "int", "1", ParamDType.Dict({}), render_impl=lambda _a, _b: retval ) with self.assertLogs(): lm.render(ProcessResult(), {}, tab_name="x", fetch_result=None) self.assertEqual(calls, ["truncate"])
def test_migrate_params_impl(self): def migrate_params(params): return {'x': params['a'], 'y': params['b']} schema = ParamDTypeDict({ 'x': ParamDTypeInteger(), 'y': ParamDTypeInteger(), }) lm = LoadedModule('x', '1', migrate_params_impl=migrate_params) result = lm.migrate_params(schema, {'a': 1, 'b': 2}) self.assertEqual(result, {'x': 1, 'y': 2})
def test_migrate_params_default(self): lm = LoadedModule('x', '1', migrate_params_impl=None) result = lm.migrate_params( ParamDTypeDict({ 'missing': ParamDTypeString(default='x'), 'wrong_type': ParamDTypeBoolean(), 'ok': ParamDTypeInteger(), }), { 'wrong_type': 'true', 'ok': 3 }) self.assertEqual(result, {'missing': 'x', 'wrong_type': True, 'ok': 3})
def test_render_use_input_columns_as_try_fallback_columns(self): def render(table, params): return pd.DataFrame({'A': [1]}) lm = LoadedModule('int', '1', ParamDType.Dict({}), render_impl=render) column = Column('A', ColumnType.NUMBER('{:,d}')) with self.assertLogs(): result = lm.render(ProcessResult(pd.DataFrame({'A': [1]}), columns=[column]), {}, tab_name='x', fetch_result=None) self.assertEqual(result.columns, [column])
def test_render_format_columns(self): # More of an integration test.... def render(table, params): return { "dataframe": pd.DataFrame({"A": [1]}), "column_formats": {"A": "{:,d}"}, } lm = LoadedModule("int", "1", ParamDType.Dict({}), render_impl=render) with self.assertLogs(): result = lm.render(ProcessResult(), {}, tab_name="x", fetch_result=None) self.assertEqual(result.columns, [Column("A", ColumnType.NUMBER("{:,d}"))])
def test_render_truncate(self): calls = [] retval = ProcessResult(pd.DataFrame({'A': [1]})) retval.truncate_in_place_if_too_big = lambda: calls.append('truncate') lm = LoadedModule('int', '1', ParamDType.Dict({}), render_impl=lambda _a, _b: retval) with self.assertLogs(): lm.render(ProcessResult(), {}, tab_name='x', fetch_result=None) self.assertEqual(calls, ['truncate'])
def test_render_with_tab_name(self): passed_tab_name = None def render(table, params, *, tab_name): nonlocal passed_tab_name passed_tab_name = tab_name in_result = ProcessResult(pd.DataFrame({'A': [0]})) lm = LoadedModule('int', '1', ParamDType.Dict({}), render_impl=render) with self.assertLogs(): lm.render(in_result, {}, 'Tab X', None) self.assertEqual(passed_tab_name, 'Tab X')
def test_render_use_input_columns_as_try_fallback_columns(self): def render(table, params): return pd.DataFrame({"A": [1]}) lm = LoadedModule("int", "1", ParamDType.Dict({}), render_impl=render) column = Column("A", ColumnType.NUMBER("{:,d}")) with self.assertLogs(): result = lm.render( ProcessResult(pd.DataFrame({"A": [1]}), columns=[column]), {}, tab_name="x", fetch_result=None, ) self.assertEqual(result.columns, [column])
def test_migrate_params_default(self): lm = LoadedModule( "x", "1", ParamDType.Dict( { "missing": ParamDType.String(default="x"), "wrong_type": ParamDType.Boolean(), "ok": ParamDType.Integer(), } ), migrate_params_impl=None, ) result = lm.migrate_params({"wrong_type": "true", "ok": 3}) self.assertEqual(result, {"missing": "x", "wrong_type": True, "ok": 3})
def test_render_cannot_coerce_output(self): """Log and display error to user when module output is invalid.""" def render(table, params, **kwargs): return {'foo': 'bar'} # not a valid retval lm = LoadedModule('int', '1', render_impl=render) with self.assertLogs(level=logging.ERROR): result = lm.render(ProcessResult(), {}, tab_name='x', fetch_result=None) _, lineno = inspect.getsourcelines(render) self.assertRegex(result.error, (r'ValueError: ProcessResult input must only contain ' r'\{dataframe, error, json, quick_fixes\} '))
def test_load_dynamic_is_cached(self): code = b'def render(table, params):\n return table * 2' minio.minio_client.put_object(minio.ExternalModulesBucket, 'imported/abcdef/imported.py', io.BytesIO(code), len(code)) with self.assertLogs('server.models.loaded_module'): lm = LoadedModule.for_module_version_sync( MockModuleVersion('imported', 'abcdef', 'now')) with patch('importlib.util.module_from_spec', None): lm2 = LoadedModule.for_module_version_sync( MockModuleVersion('imported', 'abcdef', 'now')) self.assertIs(lm.render_impl, lm2.render_impl)
def test_load_dynamic(self): code = b'def render(table, params):\n return table * 2' minio.client.put_object(Bucket=minio.ExternalModulesBucket, Key='imported/abcdef/imported.py', Body=code, ContentLength=len(code)) with self.assertLogs('server.models.loaded_module'): lm = LoadedModule.for_module_version_sync( MockModuleVersion('imported', 'abcdef', ParamDType.Dict({}), 'now')) self.assertEqual(lm.name, 'imported:abcdef') # We can't test that render_impl is exactly something, because we # don't have a handle on the loaded Python module outside of # LoadedModule. So we'll test by executing it. # # This ends up being kinda an integration test. with self.assertLogs('server.models.loaded_module'): result = lm.render(ProcessResult(pd.DataFrame({'A': [1, 2]})), {'col': 'A'}, tab_name='x', fetch_result=ProcessResult()) self.assertEqual(result.error, '') assert_frame_equal(result.dataframe, pd.DataFrame({'A': [2, 4]}))
def test_change_parameters_deny_invalid_params(self, load_module): workflow = Workflow.create_and_init() wf_module = workflow.tabs.first().wf_modules.create( order=0, module_id_name='x', last_relevant_delta_id=workflow.last_delta_id, params={'x': 1} ) ModuleVersion.create_or_replace_from_spec({ 'id_name': 'x', 'name': 'x', 'category': 'Clean', 'parameters': [ {'id_name': 'x', 'type': 'integer'}, ] }) load_module.return_value = LoadedModule('x', '1', ParamDType.Dict({ 'x': ParamDType.Integer(), }), migrate_params_impl=lambda x: x) with self.assertRaises(ValueError): # Now the user requests to change params, giving an invalid param. self.run_with_async_db(ChangeParametersCommand.create( workflow=workflow, wf_module=wf_module, new_values={'x': 'Threeve'} ))
def test_load_static(self): # Test with a _real_ static module lm = LoadedModule.for_module_version_sync( MockModuleVersion('pastecsv', '(ignored)', ParamDType.Dict({}), 'now')) self.assertEqual(lm.name, 'pastecsv:internal') self.assertEqual(lm.render_impl, server.modules.pastecsv.render)
def _test_fetch(self, fn, wf_module, save, load) -> ProcessResult: """ Stub out a `fetch` method for `wf_module`. Return result. """ if wf_module.module_version is None: # White-box: we aren't testing what happens in the (valid) case # that a ModuleVersion has been deleted while in use. Pretend it's # there. wf_module._module_version = ModuleVersion(spec={'parameters': []}) try: workflow_id = wf_module.workflow_id except AttributeError: # No tab/workflow in database workflow_id = 1 # Mock the module we load, so it calls fn() directly. load.return_value = LoadedModule('test', '1', False, fetch_impl=fn) load.return_value.fetch = fn save.return_value = future_none # Mock wf_module.save(), which we aren't testing. wf_module.save = Mock() with self.assertLogs(fetch.__name__, logging.DEBUG): self.run_with_async_db( fetch.fetch_wf_module(workflow_id, wf_module, timezone.now())) save.assert_called_once() self.assertEqual(save.call_args[0][0], workflow_id) self.assertEqual(save.call_args[0][1], wf_module) result = save.call_args[0][2] return result
def test_fetch_invalid_retval_is_error(self): async def fetch(params, *, secrets, **kwargs): # params passed to fetch() must include secrets self.assertEqual(params, { 'url': 'http://example.org', }) self.assertEqual(secrets, { 'a-secret': { 'name': 'DO NOT LOG', 'secret': 'NO NO NO' }, }) return pd.DataFrame({'A': [1, '2']}) # mixed types -- invalid lm = LoadedModule('int', '1', ParamDType.Dict({}), fetch_impl=fetch) params = dict(url='http://example.org') secrets = {'a-secret': {'name': 'DO NOT LOG', 'secret': 'NO NO NO'}} with self.assertLogs(level=logging.ERROR) as cm: result = call_fetch(lm, params, secrets=secrets) # Should log an exception, which will email us helpful debugging # info self.assertRegex(cm.output[0], r'int\.fetch gave invalid output') self.assertRegex(cm.output[0], r'workflow=1') self.assertRegex(cm.output[0], r'{"url": "http://example.org"}') self.assertNotRegex(cm.output[0], r'DO NOT LOG') self.assertRegex(cm.output[0], r"invalid value 1 in column 'A'") # Should inform the user, who can follow up with the dev self.assertEqual( result, ProcessResult(error=( "Fetch produced invalid data: invalid value 1 in column 'A' " '(object values must all be str)')))
def test_fetch_static_params(self): async def fetch(params, *, workflow_id, **kwargs): # Params are a Params object return ProcessResult( pd.DataFrame({ 'foo': [params['foo']], 'bar': [params['bar']], })) return ProcessResult(params.items(), columns=['key', 'val']) lm = LoadedModule('int', '1', ParamDType.Dict({}), fetch_impl=fetch) with self.assertLogs(): result = call_fetch(lm, { 'foo': 'bar', 'bar': 'baz' }, workflow_id=123) self.assertEqual(result.error, '') self.assertEqual( result, ProcessResult(pd.DataFrame({ 'foo': ['bar'], 'bar': ['baz'], })))
def test_render_invalid_return_dict_is_error(self): def render(table, params): return {'table': pd.DataFrame({'A': [1]})} # should be 'dataframe' lm = LoadedModule('int', '1', ParamDType.Dict({}), render_impl=render) with self.assertLogs(): result = lm.render(ProcessResult(), {}, tab_name='x', fetch_result=None) self.assertRegex( result.error, ('Something unexpected happened. We have been notified and are ' 'working to fix it. If this persists, contact us. Error code: ' 'ProcessResult input must only contain {dataframe, error, json, ' 'quick_fixes, column_formats}'))