def test_migrate_params_impl_extra_output(self): lm = LoadedModule('x', '1', ParamDType.Dict({}), migrate_params_impl=lambda x: x) with self.assertRaises(ValueError): lm.migrate_params({'x': 'should not be here'})
def test_load_static(self): # Test with a _real_ static module lm = LoadedModule.for_module_version_sync( MockModuleVersion('pastecsv', '(ignored)', ParamDType.Dict({}), 'now')) self.assertEqual(lm.name, 'pastecsv:internal') self.assertEqual(lm.render_impl, server.modules.pastecsv.render)
def test_fetch_static_params(self): async def fetch(params, *, workflow_id, **kwargs): # Params are a Params object return ProcessResult( pd.DataFrame({ 'foo': [params['foo']], 'bar': [params['bar']], })) return ProcessResult(params.items(), columns=['key', 'val']) lm = LoadedModule('int', '1', ParamDType.Dict({}), fetch_impl=fetch) with self.assertLogs(): result = call_fetch(lm, { 'foo': 'bar', 'bar': 'baz' }, workflow_id=123) self.assertEqual(result.error, '') self.assertEqual( result, ProcessResult(pd.DataFrame({ 'foo': ['bar'], 'bar': ['baz'], })))
def test_load_dynamic_ignore_test_py(self): code = b'def render(table, params):\n return table * 2' minio.client.put_object(Bucket=minio.ExternalModulesBucket, Key='imported/abcdef/imported.py', Body=code, ContentLength=len(code)) # write other .py files that aren't module code and should be ignored minio.client.put_object(Bucket=minio.ExternalModulesBucket, Key='imported/abcdef/setup.py', Body=b'', ContentLength=0) minio.client.put_object(Bucket=minio.ExternalModulesBucket, Key='imported/abcdef/test_imported.py', Body=b'', ContentLength=0) with self.assertLogs('server.models.loaded_module'): lm = LoadedModule.for_module_version_sync( MockModuleVersion('imported', 'abcdef', ParamDType.Dict({}), 'now')) # We can't test that render_impl is exactly something, because we # don't have a handle on the loaded Python module outside of # LoadedModule. So we'll test by executing it. # # This ends up being kinda an integration test. with self.assertLogs('server.models.loaded_module'): result = lm.render(ProcessResult(pd.DataFrame({'A': [1, 2]})), {'col': 'A'}, tab_name='x', fetch_result=ProcessResult()) self.assertEqual(result.error, '') assert_frame_equal(result.dataframe, pd.DataFrame({'A': [2, 4]}))
def test_crashing_fetch(self, load_module): async def fake_fetch(*args, **kwargs): raise ValueError('boo') fake_module = Mock(LoadedModule) load_module.return_value = fake_module fake_module.param_schema = ParamDType.Dict({}) fake_module.migrate_params.side_effect = lambda x: x fake_module.fetch.side_effect = fake_fetch workflow = Workflow.objects.create() tab = workflow.tabs.create(position=0) wf_module = tab.wf_modules.create( order=0, next_update=parser.parse('Aug 28 1999 2:24PM UTC'), update_interval=600, module_id_name='x') wf_module._module_version = ModuleVersion(spec={'parameters': []}), now = parser.parse('Aug 28 1999 2:34:02PM UTC') due_for_update = parser.parse('Aug 28 1999 2:44PM UTC') with self.assertLogs(fetch.__name__, level='ERROR') as cm: # We should log the actual error self.run_with_async_db( fetch.fetch_wf_module(workflow.id, wf_module, now)) self.assertEqual(cm.records[0].exc_info[0], ValueError) wf_module.refresh_from_db() # [adamhooper, 2018-10-26] while fiddling with tests, I changed the # behavior to record the update check even when module fetch fails. # Previously, an exception would prevent updating last_update_check, # and I think that must be wrong. self.assertEqual(wf_module.last_update_check, now) self.assertEqual(wf_module.next_update, due_for_update)
def test_change_parameters_deny_invalid_params(self, load_module): workflow = Workflow.create_and_init() wf_module = workflow.tabs.first().wf_modules.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, params={"x": 1}, ) ModuleVersion.create_or_replace_from_spec({ "id_name": "x", "name": "x", "category": "Clean", "parameters": [{ "id_name": "x", "type": "integer" }], }) load_module.return_value = LoadedModule( "x", "1", ParamDType.Dict({"x": ParamDType.Integer()}), migrate_params_impl=lambda x: x, ) with self.assertRaises(ValueError): # Now the user requests to change params, giving an invalid param. self.run_with_async_db( ChangeParametersCommand.create(workflow=workflow, wf_module=wf_module, new_values={"x": "Threeve"}))
def test_fetch_wf_module(self, save_result, load_module): result = ProcessResult(pd.DataFrame({"A": [1]}), error="hi") async def fake_fetch(*args, **kwargs): return result fake_module = Mock(LoadedModule) load_module.return_value = fake_module fake_module.param_schema = ParamDType.Dict({}) fake_module.migrate_params.side_effect = lambda x: x fake_module.fetch.side_effect = fake_fetch save_result.side_effect = async_noop workflow = Workflow.create_and_init() wf_module = workflow.tabs.first().wf_modules.create( order=0, slug="step-1", auto_update_data=True, next_update=parser.parse("Aug 28 1999 2:24PM UTC"), update_interval=600, ) wf_module._module_version = ModuleVersion(spec={"parameters": []}) now = parser.parse("Aug 28 1999 2:24:02PM UTC") due_for_update = parser.parse("Aug 28 1999 2:34PM UTC") self.run_with_async_db( fetch.fetch_wf_module(workflow.id, wf_module, now)) save_result.assert_called_with(workflow.id, wf_module, result) wf_module.refresh_from_db() self.assertEqual(wf_module.last_update_check, now) self.assertEqual(wf_module.next_update, due_for_update)
def test_fetch_invalid_retval_is_error(self): async def fetch(params, *, secrets, **kwargs): # params passed to fetch() must include secrets self.assertEqual(params, {"url": "http://example.org"}) self.assertEqual( secrets, {"a-secret": {"name": "DO NOT LOG", "secret": "NO NO NO"}} ) return pd.DataFrame({"A": [1, "2"]}) # mixed types -- invalid lm = LoadedModule("int", "1", ParamDType.Dict({}), fetch_impl=fetch) params = dict(url="http://example.org") secrets = {"a-secret": {"name": "DO NOT LOG", "secret": "NO NO NO"}} with self.assertLogs(level=logging.ERROR) as cm: result = call_fetch(lm, params, secrets=secrets) # Should log an exception, which will email us helpful debugging # info self.assertRegex(cm.output[0], r"int\.fetch gave invalid output") self.assertRegex(cm.output[0], r"workflow=1") self.assertRegex(cm.output[0], r'{"url": "http://example.org"}') self.assertNotRegex(cm.output[0], r"DO NOT LOG") self.assertRegex(cm.output[0], r"invalid value 1 in column 'A'") # Should inform the user, who can follow up with the dev self.assertEqual( result, ProcessResult( error=( "Fetch produced invalid data: invalid value 1 in column 'A' " "(object values must all be str)" ) ), )
def test_load_dynamic(self): code = b"def render(table, params):\n return table * 2" minio.client.put_object( Bucket=minio.ExternalModulesBucket, Key="imported/abcdef/imported.py", Body=code, ContentLength=len(code), ) with self.assertLogs("server.models.loaded_module"): lm = LoadedModule.for_module_version_sync( MockModuleVersion("imported", "abcdef", ParamDType.Dict({}), "now") ) self.assertEqual(lm.name, "imported:abcdef") # We can't test that render_impl is exactly something, because we # don't have a handle on the loaded Python module outside of # LoadedModule. So we'll test by executing it. # # This ends up being kinda an integration test. with self.assertLogs("server.models.loaded_module"): result = lm.render( ProcessResult(pd.DataFrame({"A": [1, 2]})), {"col": "A"}, tab_name="x", fetch_result=ProcessResult(), ) self.assertEqual(result.error, "") assert_frame_equal(result.dataframe, pd.DataFrame({"A": [2, 4]}))
def test_fetch_invalid_retval_is_error(self): async def fetch(params, *, secrets, **kwargs): # params passed to fetch() must include secrets self.assertEqual(params, { 'url': 'http://example.org', }) self.assertEqual(secrets, { 'a-secret': { 'name': 'DO NOT LOG', 'secret': 'NO NO NO' }, }) return pd.DataFrame({'A': [1, '2']}) # mixed types -- invalid lm = LoadedModule('int', '1', ParamDType.Dict({}), fetch_impl=fetch) params = dict(url='http://example.org') secrets = {'a-secret': {'name': 'DO NOT LOG', 'secret': 'NO NO NO'}} with self.assertLogs(level=logging.ERROR) as cm: result = call_fetch(lm, params, secrets=secrets) # Should log an exception, which will email us helpful debugging # info self.assertRegex(cm.output[0], r'int\.fetch gave invalid output') self.assertRegex(cm.output[0], r'workflow=1') self.assertRegex(cm.output[0], r'{"url": "http://example.org"}') self.assertNotRegex(cm.output[0], r'DO NOT LOG') self.assertRegex(cm.output[0], r"invalid value 1 in column 'A'") # Should inform the user, who can follow up with the dev self.assertEqual( result, ProcessResult(error=( "Fetch produced invalid data: invalid value 1 in column 'A' " '(object values must all be str)')))
def test_list_dtype(self): # Check that ParamSpec's with List type produce correct nested DTypes param_spec = ParamSpec.from_dict( dict(id_name='p', type='list', child_parameters=[ { 'id_name': 'intparam', 'type': 'integer', 'name': 'my number' }, { 'id_name': 'colparam', 'type': 'column', 'name': 'my column' }, ])) self.assertEqual( param_spec, ParamSpec.List(id_name='p', child_parameters=[ ParamSpec.Integer(id_name='intparam', name='my number'), ParamSpec.Column(id_name='colparam', name='my column'), ])) dtype = param_spec.dtype expected_dtype = DT.List( DT.Dict({ 'intparam': DT.Integer(), 'colparam': DT.Column(), })) # effectively do a deep compare with repr self.assertEqual(repr(dtype), repr(expected_dtype))
def test_render_dynamic_default(self): lm = LoadedModule('int', '1', ParamDType.Dict({})) with self.assertLogs(): result = lm.render(ProcessResult(pd.DataFrame({'A': [1]})), {}, tab_name='x', fetch_result=None) self.assertEqual(result, ProcessResult(pd.DataFrame({'A': [1]})))
def test_migrate_params_impl_missing_output(self): lm = LoadedModule('x', '1', ParamDType.Dict({'x': ParamDType.String()}), migrate_params_impl=lambda x: x) with self.assertRaises(ValueError): # should have 'x' key lm.migrate_params({})
def test_migrate_params_impl_wrong_output_type(self): lm = LoadedModule('x', '1', ParamDType.Dict({'x': ParamDType.String()}), migrate_params_impl=lambda x: x) with self.assertRaises(ValueError): # should be str lm.migrate_params({'x': 2})
def test_migrate_params_impl_exception(self): def migrate_params(params): {}["a"] lm = LoadedModule( "x", "1", ParamDType.Dict({}), migrate_params_impl=migrate_params ) with self.assertRaisesRegex(ValueError, r"migrate_params\(\) raised KeyError"): lm.migrate_params({})
def test_fetch_workflow_id(self): async def fetch(params, *, workflow_id, **kwargs): return ProcessResult(pd.DataFrame({'A': [workflow_id]})) lm = LoadedModule('int', '1', ParamDType.Dict({}), fetch_impl=fetch) with self.assertLogs(): result = call_fetch(lm, {}, workflow_id=123) self.assertEqual(result, ProcessResult(pd.DataFrame({'A': [123]})))
def test_load_dynamic_is_cached(self): code = b'def render(table, params):\n return table * 2' minio.client.put_object(Bucket=minio.ExternalModulesBucket, Key='imported/abcdef/imported.py', Body=code, ContentLength=len(code)) with self.assertLogs('server.models.loaded_module'): lm = LoadedModule.for_module_version_sync( MockModuleVersion('imported', 'abcdef', ParamDType.Dict({}), 'now')) with patch('importlib.util.module_from_spec', None): lm2 = LoadedModule.for_module_version_sync( MockModuleVersion('imported', 'abcdef', ParamDType.Dict({}), 'now')) self.assertIs(lm.render_impl, lm2.render_impl)
def test_change_parameters_across_module_versions(self, load_module): workflow = Workflow.create_and_init() # Initialize a WfModule that used module 'x' version '1' (which we # don't need to write in code -- after all, that version might be long # gone when ChangeParametersCommand is called. wf_module = workflow.tabs.first().wf_modules.create( order=0, module_id_name='x', last_relevant_delta_id=workflow.last_delta_id, params={'version': 'v1', 'x': 1} # version-'1' params ) # Now install version '2' of module 'x'. # # Version '2''s migrate_params() could do anything; in this test, it # simply changes 'version' from 'v1' to 'v2' ModuleVersion.create_or_replace_from_spec({ 'id_name': 'x', 'name': 'x', 'category': 'Clean', 'parameters': [ {'id_name': 'version', 'type': 'string'}, {'id_name': 'x', 'type': 'integer'}, ] }, source_version_hash='2') load_module.return_value = LoadedModule( 'x', '2', ParamDType.Dict({ 'version': ParamDType.String(), 'x': ParamDType.Integer(), }), migrate_params_impl=lambda params: {**params, 'version': 'v2'} ) # Now the user requests to change params. # # The user was _viewing_ version '2' of module 'x', though # `wf_module.params` was at version 1. (Workbench ran # `migrate_params()` without saving the result when it # presented `params` to the user.) So the changes should apply atop # _migrated_ params. cmd = self.run_with_async_db(ChangeParametersCommand.create( workflow=workflow, wf_module=wf_module, new_values={'x': 2} )) self.assertEqual(wf_module.params, { 'version': 'v2', # migrate_params() ran 'x': 2, # and we applied changes on top of its output }) self.run_with_async_db(cmd.backward()) self.assertEqual(wf_module.params, { 'version': 'v1', # exactly what we had before 'x': 1, })
def test_fetch_workflow_id(self): workflow = Workflow.objects.create() tab = workflow.tabs.create(position=0) wf_module = tab.wf_modules.create(order=0, slug="step-1") async def fetch(params, *, workflow_id, **kwargs): self.assertEqual(workflow_id, workflow.id) self._test_fetch(fetch, DefaultMigrateParams, wf_module, ParamDType.Dict({}))
def test_fetch_get_stored_dataframe_no_stored_data_frame(self): workflow = Workflow.objects.create() tab = workflow.tabs.create(position=0) wf_module = tab.wf_modules.create(order=0, slug="step-1") async def fetch(params, *, get_stored_dataframe, **kwargs): self.assertIsNone(await get_stored_dataframe()) self._test_fetch(fetch, DefaultMigrateParams, wf_module, ParamDType.Dict({}))
def test_fetch_get_params(self): workflow = Workflow.objects.create() tab = workflow.tabs.create(position=0) wf_module = tab.wf_modules.create(order=0, params={'foo': 'bar'}) async def fetch(params, **kwargs): self.assertEqual(params, {'foo': 'bar'}) self._test_fetch(fetch, DefaultMigrateParams, wf_module, ParamDType.Dict({'foo': ParamDType.String()}))
def test_fetch_get_workflow_owner_anonymous(self): workflow = Workflow.objects.create(owner=None) tab = workflow.tabs.create(position=0) wf_module = tab.wf_modules.create(order=0) async def fetch(params, *, get_workflow_owner, **kwargs): self.assertIsNone(await get_workflow_owner()) self._test_fetch(fetch, DefaultMigrateParams, wf_module, ParamDType.Dict({}))
def test_fetch_get_workflow_owner(self): owner = User.objects.create(username='******', email='*****@*****.**') workflow = Workflow.objects.create(owner=owner) tab = workflow.tabs.create(position=0) wf_module = tab.wf_modules.create(order=0) async def fetch(params, *, get_workflow_owner, **kwargs): self.assertEqual(await get_workflow_owner(), owner) self._test_fetch(fetch, DefaultMigrateParams, wf_module, ParamDType.Dict({}))
def test_load_dynamic_is_cached(self): code = b"def render(table, params):\n return table * 2" minio.client.put_object( Bucket=minio.ExternalModulesBucket, Key="imported/abcdef/imported.py", Body=code, ContentLength=len(code), ) with self.assertLogs("server.models.loaded_module"): lm = LoadedModule.for_module_version_sync( MockModuleVersion("imported", "abcdef", ParamDType.Dict({}), "now") ) with patch("importlib.util.module_from_spec", None): lm2 = LoadedModule.for_module_version_sync( MockModuleVersion("imported", "abcdef", ParamDType.Dict({}), "now") ) self.assertIs(lm.render_impl, lm2.render_impl)
def test_migrate_params_impl_exception(self): def migrate_params(params): {}['a'] lm = LoadedModule('x', '1', ParamDType.Dict({}), migrate_params_impl=migrate_params) with self.assertRaisesRegex(ValueError, r'migrate_params\(\) raised KeyError'): lm.migrate_params({})
def test_render_dynamic_default(self): lm = LoadedModule("int", "1", ParamDType.Dict({})) with self.assertLogs(): result = lm.render( ProcessResult(pd.DataFrame({"A": [1]})), {}, tab_name="x", fetch_result=None, ) self.assertEqual(result, ProcessResult(pd.DataFrame({"A": [1]})))
def test_fetch_get_stored_dataframe(self): get_stored_dataframe = async_mock(return_value=pd.DataFrame({"A": [1]})) async def fetch(params, *, get_stored_dataframe, **kwargs): return ProcessResult(await get_stored_dataframe()) lm = LoadedModule("int", "1", ParamDType.Dict({}), fetch_impl=fetch) with self.assertLogs(): result = call_fetch(lm, {}, get_stored_dataframe=get_stored_dataframe) self.assertEqual(result, ProcessResult(pd.DataFrame({"A": [1]})))
def test_fetch_sync_no_params(self): table = pd.DataFrame({'A': [1]}) def fetch(params): return table lm = LoadedModule('int', 1, ParamDType.Dict({}), fetch_impl=fetch) with self.assertLogs(): result = call_fetch(lm, {}) self.assertEqual(result.error, '') assert_frame_equal(result.dataframe, table)
def test_migrate_params_impl(self): def migrate_params(params): return {"x": params["a"], "y": params["b"]} lm = LoadedModule( "x", "1", ParamDType.Dict({"x": ParamDType.Integer(), "y": ParamDType.Integer()}), migrate_params_impl=migrate_params, ) result = lm.migrate_params({"a": 1, "b": 2}) self.assertEqual(result, {"x": 1, "y": 2})
def test_fetch_secrets(self): # No need to make get_workflow_owner return a User: we're mocking async def fetch(params, *, secrets, **kwargs): return pd.DataFrame({'A': [repr(secrets)]}) lm = LoadedModule('int', '1', ParamDType.Dict({}), fetch_impl=fetch) secret = {'name': 'n', 'secret': 's'} with self.assertLogs(): result = call_fetch(lm, {}, {'x': secret}) self.assertEqual( result, ProcessResult(pd.DataFrame({'A': [repr({'x': secret})]})))