def test_wf_module_duplicate(self): self.createTestWorkflow() wfm1 = self.wfmodule1 # store data to test that it is duplicated s1 = wfm1.store_fetched_table(mock_csv_table) s2 = wfm1.store_fetched_table(mock_csv_table2) wfm1.set_fetched_data_version(s2) self.assertEqual(len(wfm1.list_fetched_data_versions()), 2) # duplicate into another workflow, as we would do when duplicating a workflow workflow2 = add_new_workflow("Test Workflow 2") InitWorkflowCommand.create(workflow2) wfm1d = wfm1.duplicate(workflow2) wfm1d.refresh_from_db() # test what we actually have in the db self.assertEqual(wfm1d.workflow, workflow2) self.assertEqual(wfm1d.module_version, wfm1.module_version) self.assertEqual(wfm1d.order, wfm1.order) self.assertEqual(wfm1d.notes, wfm1.notes) self.assertEqual(wfm1d.last_update_check, wfm1.last_update_check) self.assertEqual(wfm1d.is_collapsed, wfm1.is_collapsed) self.assertEqual(wfm1d.stored_data_version, wfm1.stored_data_version) # parameters should be duplicated self.assertEqual( ParameterVal.objects.filter(wf_module=wfm1d).count(), ParameterVal.objects.filter(wf_module=wfm1).count()) # Stored data should contain a clone of content only, not complete version history self.assertIsNotNone(wfm1d.stored_data_version) self.assertEqual(wfm1d.stored_data_version, wfm1.stored_data_version) self.assertTrue(wfm1d.retrieve_fetched_table().equals( wfm1.retrieve_fetched_table())) self.assertEqual(len(wfm1d.list_fetched_data_versions()), 1)
def test_execute_new_revision(self, fake_load_module): workflow = Workflow.objects.create() delta1 = InitWorkflowCommand.create(workflow) wf_module = workflow.wf_modules.create( order=1, last_relevant_delta_id=delta1.id) result1 = ProcessResult(pd.DataFrame({'A': [1]})) wf_module.cache_render_result(delta1.id, result1) wf_module.save() result2 = ProcessResult(pd.DataFrame({'B': [2]})) delta2 = InitWorkflowCommand.create(workflow) wf_module.last_relevant_delta_id = delta2.id wf_module.save(update_fields=['last_relevant_delta_id']) fake_module = Mock(LoadedModule) fake_load_module.return_value = fake_module fake_module.render.return_value = result2 self._execute(workflow) wf_module.refresh_from_db() self.assertEqual( wf_module.get_cached_render_result(only_fresh=True).result, result2)
def clear_deltas(self): """Become a single-Delta Workflow.""" from server.models.commands import InitWorkflowCommand try: from server.models import Delta first_delta = self.deltas.get(prev_delta_id=None) except Delta.DoesNotExist: # Invariant failed. Defensive programming: recover. first_delta = InitWorkflowCommand.create(self) if not isinstance(first_delta, InitWorkflowCommand): # Invariant failed: first delta should be InitWorkflowCommand. # Defensive programming: recover. Delete _every_ Delta, and then # add the one that belongs. first_delta.delete() first_delta = InitWorkflowCommand.create(self) else: self.last_delta_id = first_delta.id self.save(update_fields=['last_delta_id']) try: # Select the _second_ delta. second_delta = first_delta.next_delta except Delta.DoesNotExist: # We're already a 1-delta Workflow return second_delta.delete_with_successors() self.delete_orphan_soft_deleted_models()
def create_and_init(**kwargs): """Create and return a _valid_ Workflow: one with a Tab and a Delta.""" from server.models.commands import InitWorkflowCommand with transaction.atomic(): workflow = Workflow.objects.create(**kwargs) InitWorkflowCommand.create(workflow) workflow.tabs.create(position=0, slug='tab-1', name='Tab 1') return workflow
def test_duplicate_copies_fresh_cache(self): # The cache's filename depends on workflow_id and wf_module_id. # Duplicating it would need more complex code :). result = ProcessResult(pandas.DataFrame({"a": [1]})) self.wf_module.cache_render_result(self.delta.id, result) workflow2 = Workflow.objects.create() tab2 = workflow2.tabs.create(position=0) InitWorkflowCommand.create(workflow2) dup = self.wf_module.duplicate_into_new_workflow(tab2) dup_cached_result = dup.cached_render_result self.assertIsNotNone(dup_cached_result) self.assertEqual(dup_cached_result.result, result)
def test_duplicate_ignores_stale_cache(self): # The cache's filename depends on workflow_id and wf_module_id. # Duplicating it would need more complex code :). result = ProcessResult(pandas.DataFrame({'a': [1]})) self.wf_module.last_relevant_delta_id = 1 self.wf_module.cache_render_result(2, result) self.wf_module.save() workflow2 = Workflow.objects.create() InitWorkflowCommand.create(workflow2) dup = self.wf_module.duplicate(workflow2) dup_cached_result = dup.get_cached_render_result() self.assertIsNone(dup_cached_result)
def test_duplicate_ignores_stale_cache(self): # The cache's filename depends on workflow_id and wf_module_id. # Duplicating it would need more complex code :). result = ProcessResult(pandas.DataFrame({"a": [1]})) self.wf_module.cache_render_result(self.delta.id, result) # Now simulate a new delta that hasn't been rendered self.wf_module.last_relevant_delta_id += 1 self.wf_module.save(update_fields=["last_relevant_delta_id"]) workflow2 = Workflow.objects.create() tab2 = workflow2.tabs.create(position=0) InitWorkflowCommand.create(workflow2) dup = self.wf_module.duplicate_into_new_workflow(tab2) dup_cached_result = dup.cached_render_result self.assertIsNone(dup_cached_result)
def test_wf_module_duplicate_disable_auto_update(self): """ Duplicates should be lightweight by default: no auto-updating. """ workflow = Workflow.objects.create() InitWorkflowCommand.create(workflow) wf_module = workflow.wf_modules.create(order=0, auto_update_data=True, update_interval=600) workflow2 = Workflow.objects.create() InitWorkflowCommand.create(workflow2) wf_module2 = wf_module.duplicate(workflow2) self.assertEqual(wf_module2.auto_update_data, False) self.assertEqual(wf_module2.update_interval, 600)
def setUp(self): super().setUp() # log in self.queue_render_patcher = patch.object(rabbitmq, "queue_render") self.queue_render = self.queue_render_patcher.start() self.queue_render.return_value = future_none self.log_patcher = patch("server.utils.log_user_event_from_request") self.log_patch = self.log_patcher.start() self.factory = APIRequestFactory() self.workflow1 = Workflow.objects.create(name="Workflow 1", owner=self.user) self.delta = InitWorkflowCommand.create(self.workflow1) self.tab1 = self.workflow1.tabs.create(position=0) self.module_version1 = ModuleVersion.create_or_replace_from_spec({ "id_name": "module1", "name": "Module 1", "category": "Clean", "parameters": [], }) # Add another user, with one public and one private workflow self.otheruser = User.objects.create(username="******", email="*****@*****.**", password="******") self.other_workflow_private = Workflow.objects.create( name="Other workflow private", owner=self.otheruser) self.other_workflow_public = Workflow.objects.create( name="Other workflow public", owner=self.otheruser, public=True)
def test_render_other_renderer_rendering_so_skip(self, execute): execute.side_effect = async_noop workflow = Workflow.objects.create() delta = InitWorkflowCommand.create(workflow) ack = Mock(name='ack', side_effect=async_noop) requeue = Mock(name='requeue', side_effect=async_noop) async def inner(): with self.assertLogs('renderer', level='INFO') as cm: await render_workflow_and_maybe_requeue( FailedRenderLocker(), workflow.id, delta.id, ack, requeue, ) self.assertEqual(cm.output, [ (f'INFO:renderer.render:Workflow {workflow.id} is ' 'being rendered elsewhere; ignoring'), ]) self.run_with_async_db(inner()) execute.assert_not_called() ack.assert_called() requeue.assert_not_called()
def test_render_unknown_error_so_crash(self, execute): # Test what happens when our `renderer.execute` module is buggy and # raises something it shouldn't raise. execute.side_effect = FileNotFoundError workflow = Workflow.objects.create() delta = InitWorkflowCommand.create(workflow) ack = Mock(name='ack', side_effect=async_noop) requeue = Mock(name='requeue', side_effect=async_noop) async def inner(): with self.assertLogs('renderer', level='INFO') as cm: await render_workflow_and_maybe_requeue( SuccessfulRenderLocker(), workflow.id, delta.id, ack, requeue, ) self.assertRegex( cm.output[0], '^ERROR:renderer.render:Error during render of workflow \d' ) self.run_with_async_db(inner()) ack.assert_called() requeue.assert_not_called()
def test_fetch_get_input_dataframe_stale_cache(self): table = pd.DataFrame({'A': [1]}) workflow = Workflow.objects.create() delta1 = InitWorkflowCommand.create(workflow) delta2 = InitWorkflowCommand.create(workflow) wfm1 = workflow.wf_modules.create(order=0, last_relevant_delta_id=delta2.id) wfm1.cache_render_result(delta1.id, ProcessResult(table)) wfm1.save() wfm2 = workflow.wf_modules.create(order=1) async def fetch(params, *, get_input_dataframe, **kwargs): self.assertIsNone(await get_input_dataframe()) self._test_fetch(fetch, wfm2)
def test_change_parameters_on_soft_deleted_tab(self): workflow = Workflow.objects.create() delta = InitWorkflowCommand.create(workflow) tab = workflow.tabs.create(position=0, is_deleted=True) ModuleVersion.create_or_replace_from_spec({ 'id_name': 'loadurl', 'name': 'loadurl', 'category': 'Clean', 'parameters': [ {'id_name': 'url', 'type': 'string'}, ] }) wf_module = tab.wf_modules.create( order=0, module_id_name='loadurl', last_relevant_delta_id=delta.id, params={'url': ''} ) cmd = self.run_with_async_db(ChangeParametersCommand.create( workflow=workflow, wf_module=wf_module, new_values={'url': 'https://example.com'} )) self.assertIsNone(cmd)
def test_change_parameters_on_soft_deleted_tab(self): workflow = Workflow.objects.create() delta = InitWorkflowCommand.create(workflow) tab = workflow.tabs.create(position=0, is_deleted=True) ModuleVersion.create_or_replace_from_spec({ "id_name": "loadurl", "name": "loadurl", "category": "Clean", "parameters": [{ "id_name": "url", "type": "string" }], }) wf_module = tab.wf_modules.create( order=0, slug="step-1", module_id_name="loadurl", last_relevant_delta_id=delta.id, params={"url": ""}, ) cmd = self.run_with_async_db( ChangeParametersCommand.create( workflow=workflow, wf_module=wf_module, new_values={"url": "https://example.com"}, )) self.assertIsNone(cmd)
def setUp(self): super().setUp() # log in self.queue_render_patcher = patch.object(rabbitmq, 'queue_render') self.queue_render = self.queue_render_patcher.start() self.queue_render.return_value = future_none self.log_patcher = patch('server.utils.log_user_event_from_request') self.log_patch = self.log_patcher.start() self.factory = APIRequestFactory() self.workflow1 = Workflow.objects.create(name='Workflow 1', owner=self.user) self.delta = InitWorkflowCommand.create(self.workflow1) self.tab1 = self.workflow1.tabs.create(position=0) self.module_version1 = ModuleVersion.create_or_replace_from_spec({ 'id_name': 'module1', 'name': 'Module 1', 'category': 'Clean', 'parameters': [] }) # Add another user, with one public and one private workflow self.otheruser = User.objects.create(username='******', email='*****@*****.**', password='******') self.other_workflow_private = Workflow.objects.create( name="Other workflow private", owner=self.otheruser) self.other_workflow_public = Workflow.objects.create( name="Other workflow public", owner=self.otheruser, public=True)
def test_wf_module_duplicate_disable_auto_update(self): """ Duplicates should be lightweight by default: no auto-updating. """ workflow = Workflow.create_and_init() tab = workflow.tabs.first() wf_module = tab.wf_modules.create(order=0, auto_update_data=True, update_interval=600) workflow2 = Workflow.create_and_init() InitWorkflowCommand.create(workflow2) tab2 = workflow2.tabs.create(position=0) wf_module2 = wf_module.duplicate(tab2) self.assertEqual(wf_module2.auto_update_data, False) self.assertEqual(wf_module2.update_interval, 600)
def _init_workflow_for_lesson(workflow, lesson): InitWorkflowCommand.create(workflow) # Create each wfModule of each tab tab_dicts = lesson.initial_workflow.tabs for position, tab_dict in enumerate(tab_dicts): # Set selected module to last wfmodule in stack tab = workflow.tabs.create( position=position, slug=f'tab-{position + 1}', name=tab_dict['name'], selected_wf_module_position=len(tab_dict['wfModules']) - 1 ) for order, wfm in enumerate(tab_dict['wfModules']): _add_wf_module_to_tab(wfm, order, tab, workflow.last_delta_id, lesson)
def setUp(self): super().setUp() self.workflow = Workflow.objects.create() self.delta = InitWorkflowCommand.create(self.workflow) self.tab = self.workflow.tabs.create(position=0) self.wf_module = self.tab.wf_modules.create( order=0, slug="step-1", last_relevant_delta_id=self.delta.id )
def setUp(self): super().setUp() self.user = User.objects.create(username='******', email='*****@*****.**') self.workflow = Workflow.objects.create(owner=self.user) self.tab = self.workflow.tabs.create(position=0) self.delta = InitWorkflowCommand.create(self.workflow) self.wf_module = self.tab.wf_modules.create( order=0, last_relevant_delta_id=self.delta.id)
def test_email_no_delta_when_not_changed(self, email, fake_load_module): workflow = Workflow.objects.create() delta1 = InitWorkflowCommand.create(workflow) delta2 = InitWorkflowCommand.create(workflow) wf_module = workflow.wf_modules.create( order=0, last_relevant_delta_id=delta2.id) wf_module.cache_render_result(delta1.id, ProcessResult(pd.DataFrame({'A': [1]}))) wf_module.notifications = True wf_module.save() fake_loaded_module = Mock(LoadedModule) fake_load_module.return_value = fake_loaded_module result2 = ProcessResult(pd.DataFrame({'A': [1]})) fake_loaded_module.render.return_value = result2 self._execute(workflow) email.assert_not_called()
def test_fetch_get_input_dataframe_empty_cache(self): workflow = Workflow.objects.create() delta = InitWorkflowCommand.create(workflow) workflow.wf_modules.create(order=0, last_relevant_delta_id=delta.id) wfm2 = workflow.wf_modules.create(order=1) async def fetch(params, *, get_input_dataframe, **kwargs): self.assertIsNone(await get_input_dataframe()) self._test_fetch(fetch, wfm2)
def test_fetch_get_input_dataframe_stale_cache(self): table = pd.DataFrame({'A': [1]}) workflow = Workflow.objects.create() tab = workflow.tabs.create(position=0) delta1 = InitWorkflowCommand.create(workflow) wfm1 = tab.wf_modules.create(order=0, last_relevant_delta_id=delta1.id) wfm1.cache_render_result(delta1.id, ProcessResult(table)) # Now make wfm1's output stale delta2 = InitWorkflowCommand.create(workflow) wfm1.last_relevant_delta_id = delta2.id wfm1.save(update_fields=['last_relevant_delta_id']) wfm2 = tab.wf_modules.create(order=1) async def fetch(params, *, get_input_dataframe, **kwargs): self.assertIsNone(await get_input_dataframe()) self._test_fetch(fetch, wfm2)
def test_fetch_get_input_dataframe_empty_cache(self): workflow = Workflow.objects.create() tab = workflow.tabs.create(position=0) delta = InitWorkflowCommand.create(workflow) tab.wf_modules.create(order=0, last_relevant_delta_id=delta.id) wfm2 = tab.wf_modules.create(order=1) async def fetch(params, *, get_input_dataframe, **kwargs): self.assertIsNone(await get_input_dataframe()) self._test_fetch(fetch, DefaultMigrateParams, wfm2, ParamDType.Dict({}))
def _duplicate(self, name: str, owner: Optional[User], session_key: Optional[str]) -> 'Workflow': with self.cooperative_lock(): wf = Workflow.objects.create( name=name, owner=owner, original_workflow_id=self.pk, anonymous_owner_session_key=session_key, selected_tab_position=self.selected_tab_position, public=False, last_delta=None) # Set wf.last_delta and wf.last_delta_id, so we can render. # Import here to avoid circular deps from server.models.commands import InitWorkflowCommand InitWorkflowCommand.create(wf) tabs = list(self.live_tabs) for tab in tabs: tab.duplicate(wf) return wf
def test_render_or_requeue_render(self, execute): execute.return_value = future_none workflow = Workflow.objects.create() delta = InitWorkflowCommand.create(workflow) requeue = Mock(name='requeue', return_value=future_none) with self.assertLogs(): self.run_with_async_db( render_or_requeue(SuccessfulRenderLocker, requeue, workflow.id, delta.id)) execute.assert_called_with(workflow, delta.id) requeue.assert_not_called()
def test_execute_cache_hit(self, fake_module): workflow = Workflow.objects.create() tab = workflow.tabs.create(position=0) delta = InitWorkflowCommand.create(workflow) wf_module1 = tab.wf_modules.create(order=0, last_relevant_delta_id=delta.id) result1 = ProcessResult(pd.DataFrame({'A': [1]})) wf_module1.cache_render_result(delta.id, result1) wf_module2 = tab.wf_modules.create(order=1, last_relevant_delta_id=delta.id) result2 = ProcessResult(pd.DataFrame({'B': [2]})) wf_module2.cache_render_result(delta.id, result2) fake_module.assert_not_called()
def test_execute_mark_unreachable(self, send_delta_async, fake_load_module): send_delta_async.return_value = fake_future workflow = Workflow.objects.create() delta = InitWorkflowCommand.create(workflow) wf_module1 = workflow.wf_modules.create( order=0, last_relevant_delta_id=delta.id) wf_module2 = workflow.wf_modules.create( order=1, last_relevant_delta_id=delta.id) wf_module3 = workflow.wf_modules.create( order=2, last_relevant_delta_id=delta.id) fake_module = Mock(LoadedModule) fake_load_module.return_value = fake_module fake_module.render.return_value = ProcessResult(error='foo') self._execute(workflow) wf_module1.refresh_from_db() self.assertEqual(wf_module1.get_cached_render_result().status, 'error') self.assertEqual(wf_module1.get_cached_render_result().result, ProcessResult(error='foo')) wf_module2.refresh_from_db() self.assertEqual(wf_module2.get_cached_render_result().status, 'unreachable') self.assertEqual(wf_module2.get_cached_render_result().result, ProcessResult()) wf_module3.refresh_from_db() self.assertEqual(wf_module3.get_cached_render_result().status, 'unreachable') self.assertEqual(wf_module3.get_cached_render_result().result, ProcessResult()) send_delta_async.assert_called_with( workflow.id, { 'updateWfModules': { str(wf_module3.id): { 'output_status': 'unreachable', 'quick_fixes': [], 'output_error': '', 'output_columns': [], 'output_n_rows': 0, 'last_relevant_delta_id': delta.id, 'cached_render_result_delta_id': delta.id, } } })
def test_fetch_get_input_dataframe_happy_path(self): table = pd.DataFrame({'A': [1]}) workflow = Workflow.objects.create() tab = workflow.tabs.create(position=0) delta = InitWorkflowCommand.create(workflow) wfm1 = tab.wf_modules.create(order=0, last_relevant_delta_id=delta.id) wfm1.cache_render_result(delta.id, ProcessResult(table)) wfm1.save() wfm2 = tab.wf_modules.create(order=1) async def fetch(params, *, get_input_dataframe, **kwargs): assert_frame_equal(await get_input_dataframe(), table) self._test_fetch(fetch, wfm2)
def test_email_no_delta_when_not_changed(self, email, fake_load_module): workflow = Workflow.objects.create() tab = workflow.tabs.create(position=0) delta1 = InitWorkflowCommand.create(workflow) wf_module = tab.wf_modules.create(order=0, last_relevant_delta_id=delta1.id, notifications=True) wf_module.cache_render_result(delta1.id, ProcessResult(pd.DataFrame({'A': [1]}))) # Now make a new delta, so we need to re-render. The render function's # output won't change. delta2 = InitWorkflowCommand.create(workflow) wf_module.last_relevant_delta_id = delta2.id wf_module.save(update_fields=['last_relevant_delta_id']) fake_loaded_module = Mock(LoadedModule) fake_load_module.return_value = fake_loaded_module result2 = ProcessResult(pd.DataFrame({'A': [1]})) fake_loaded_module.render.return_value = result2 self._execute(workflow) email.assert_not_called()
def setUp(self): super().setUp() self.workflow = Workflow.objects.create() self.tab = self.workflow.tabs.create(position=0) self.module_version = ModuleVersion.create_or_replace_from_spec({ 'id_name': 'loadurl', 'name': 'Load URL', 'category': 'Clean', 'parameters': [ {'id_name': 'url', 'type': 'string'}, ], }, source_version_hash='1.0') self.delta = InitWorkflowCommand.create(self.workflow)