def test_get_state(self): """Test get_state method.""" modules = list() modules.append( ModuleHandle(identifier='MOD0', command=python_cell(source='print 2+2'), external_form='TEST MODULE', state=state.MODULE_SUCCESS)) modules.append( ModuleHandle(identifier='MOD1', command=python_cell(source='print 2+2'), external_form='TEST MODULE', state=state.MODULE_SUCCESS)) wf = WorkflowHandle(identifier='0', branch_id='0', modules=modules, descriptor=WorkflowDescriptor( identifier='0', action=ACTION_CREATE)) self.assertTrue(wf.get_state().is_success) modules.append( ModuleHandle(identifier='MOD1', command=python_cell(source='print 2+2'), external_form='TEST MODULE', state=state.MODULE_CANCELED)) modules.append( ModuleHandle(identifier='MOD1', command=python_cell(source='print 2+2'), external_form='TEST MODULE', state=state.MODULE_SUCCESS)) wf = WorkflowHandle(identifier='0', branch_id='0', modules=modules, descriptor=WorkflowDescriptor( identifier='0', action=ACTION_CREATE)) self.assertTrue(wf.get_state().is_canceled)
def test_pending_append(self): """Test appending a workflow with pending modules to a branch.""" base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC') os.makedirs(base_path) vt = OSViztrailHandle.create_viztrail(identifier='ABC', properties={}, base_path=base_path) branch = vt.get_default_branch() for i in range(10): ts = get_current_time() command = python_cell(source='print ' + str(i) + '+' + str(i)) module = OSModuleHandle.create_module( command=command, external_form='print ' + str(i) + '+' + str(i), state=MODULE_SUCCESS, outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]), provenance=ModuleProvenance(), timestamp=ModuleTimestamp(created_at=ts, started_at=ts, finished_at=ts), module_folder=vt.modules_folder, object_store=vt.object_store) if not branch.head is None: modules = branch.head.modules + [module] else: modules = [module] branch.append_workflow(modules=modules, action=ACTION_INSERT, command=command) head_modules = branch.get_head().modules before_ids = [m.identifier for m in head_modules] modules = head_modules[:5] pending_modules = [ ModuleHandle(command=m.command, external_form=m.external_form) for m in head_modules[5:] ] wf = branch.append_workflow(modules=head_modules[:5], pending_modules=pending_modules, action=ACTION_DELETE, command=head_modules[-1].command) for m in wf.modules[:5]: self.assertTrue(m.identifier in before_ids) for m in wf.modules[5:]: self.assertFalse(m.identifier in before_ids) self.assertEqual(len(wf.modules), 10) self.assertEqual(wf.descriptor.identifier, '0000000A') self.assertEqual(wf.descriptor.action, ACTION_DELETE) self.assertEqual(wf.descriptor.package_id, PACKAGE_PYTHON) self.assertEqual(wf.descriptor.command_id, PYTHON_CODE) vt = OSViztrailHandle.load_viztrail(base_path) branch = vt.get_default_branch() history = branch.get_history() self.assertEqual(len(history), 11) wf = branch.get_head() for m in wf.modules[:5]: self.assertTrue(m.identifier in before_ids) for m in wf.modules[5:]: self.assertFalse(m.identifier in before_ids) self.assertEqual(len(wf.modules), 10) self.assertEqual(wf.descriptor.identifier, '0000000A') self.assertEqual(wf.descriptor.action, ACTION_DELETE) self.assertEqual(wf.descriptor.package_id, PACKAGE_PYTHON) self.assertEqual(wf.descriptor.command_id, PYTHON_CODE)
def replace_workflow_module(self, project_id, branch_id, module_id, command): """Replace an existing module in the workflow at the head of the specified viztrail branch. The modified workflow is executed and the result is the new head of the branch. Returns the list of affected modules in the modified workflow. The result is None if the specified project or branch do not exist. Raises ValueError if the current head of the branch is active. Parameters ---------- project_id: string Unique project identifier branch_id : string, optional Unique branch identifier module_id : string Identifier of the module that is being replaced command : vizier.viztrail.command.ModuleCommand Specification of the command that is to be evaluated Returns ------- list(vizier.viztrail.module.base.ModuleHandle) """ with self.backend.lock: # Get the handle for the specified branch and the branch head branch = self.projects.get_branch(project_id=project_id, branch_id=branch_id) if branch is None: return None head = branch.get_head() if head is None or len(head.modules) == 0: return None # Raise ValueError if the head workflow is active if head.is_active: raise ValueError('cannot replace in active workflow') # Get the index of the module that is being replaced module_index = None modules = head.modules for i in range(len(modules)): if modules[i].identifier == module_id: module_index = i break if module_index is None: return None # Get handle for the replaced module. Keep any resource information # from the provenance object of the previous module execution. The # state of the module depends on the state of the backend. if module_index > 0: datasets = modules[module_index - 1].datasets else: datasets = dict() replaced_module = ModuleHandle( command=command, state=self.backend.next_task_state(), external_form=command.to_external_form( command=self.packages[command.package_id].get( command.command_id), datasets=datasets), provenance=ModuleProvenance( resources=modules[module_index].provenance.resources)) # Create list of pending modules for the new workflow pending_modules = [replaced_module] for m in modules[module_index + 1:]: pending_modules.append( ModuleHandle(command=m.command, external_form=m.external_form, datasets=m.datasets, outputs=m.outputs, provenance=m.provenance)) workflow = branch.append_workflow(modules=modules[:module_index], action=wf.ACTION_REPLACE, command=replaced_module.command, pending_modules=pending_modules) self.execute_module(project_id=project_id, branch_id=branch_id, module=workflow.modules[module_index], datasets=datasets) return workflow.modules[module_index:]
def insert_workflow_module(self, project_id, branch_id, before_module_id, command): """Insert a new module to the workflow at the head of the given viztrail branch. The modified workflow will be executed. The result is the new head of the branch. The module is inserted into the sequence of workflow modules before the module with the identifier that is given as the before_module_id argument. Returns the list of affected modules in the modified workflow. The result is None if the specified project, branch or module do not exist. Raises ValueError if the current head of the branch is active. Parameters ---------- project_id: string Unique project identifier branch_id : string Unique branch identifier before_module_id : string Insert new module before module with given identifier. command : vizier.viztrail.command.ModuleCommand Specification of the command that is to be executed by the inserted workflow module Returns ------- list(vizier.viztrail.module.base.ModuleHandle) """ with self.backend.lock: # Get the handle for the specified branch and the branch head branch = self.projects.get_branch(project_id=project_id, branch_id=branch_id) if branch is None: return None head = branch.get_head() if head is None or len(head.modules) == 0: return None # Get the index of the module at which the new module is inserted module_index = None modules = head.modules for i in range(len(modules)): if modules[i].identifier == before_module_id: module_index = i break if module_index is None: return None # Get handle for the inserted module if module_index > 0: datasets = modules[module_index - 1].datasets else: datasets = dict() # Create handle for the inserted module. The state of the module # depends on the state of the backend. if head.is_active: state = mstate.MODULE_PENDING else: state = self.backend.next_task_state() inserted_module = ModuleHandle( command=command, state=state, external_form=command.to_external_form( command=self.packages[command.package_id].get( command.command_id), datasets=datasets)) # Create list of pending modules for the new workflow. pending_modules = [inserted_module] for m in modules[module_index:]: pending_modules.append( ModuleHandle(command=m.command, external_form=m.external_form, datasets=m.datasets, outputs=m.outputs, provenance=m.provenance)) workflow = branch.append_workflow(modules=modules[:module_index], action=wf.ACTION_INSERT, command=inserted_module.command, pending_modules=pending_modules) if not head.is_active: self.execute_module(project_id=project_id, branch_id=branch_id, module=workflow.modules[module_index], datasets=datasets) return workflow.modules[module_index:]
def delete_workflow_module(self, project_id, branch_id, module_id): """Delete the module with the given identifier from the workflow at the head of the viztrail branch. The resulting workflow is executed and will be the new head of the branch. Returns the list of remaining modules in the modified workflow that are affected by the deletion. The result is None if the specified project or branch do not exist. Raises ValueError if the current head of the branch is active. Parameters ---------- project_id: string Unique project identifier branch_id: string Unique branch identifier module_id : string Unique module identifier Returns ------- list(vizier.viztrail.module.base.ModuleHandle) """ with self.backend.lock: # Get the handle for the specified branch and the branch head branch = self.projects.get_branch(project_id=project_id, branch_id=branch_id) if branch is None: return None head = branch.get_head() if head is None or len(head.modules) == 0: return None # Raise ValueError if the head workflow is active if head.is_active: raise ValueError('cannot delete from active workflow') # Get the index of the module that is being deleted module_index = None for i in range(len(head.modules)): if head.modules[i].identifier == module_id: module_index = i break if module_index is None: return None deleted_module = head.modules[module_index] # Create module list for new workflow modules = head.modules[:module_index] + head.modules[module_index + 1:] # Re-execute modules unless the last module was deleted or the # module list is empty module_count = len(modules) if module_count > 0 and module_index < module_count: # Get the context for the first module that requires # re-execution if module_index > 0: datasets = modules[module_index - 1].datasets else: datasets = dict() # Keep track of the first remaining module that was affected # by the delete first_remaining_module = module_index while not modules[module_index].provenance.requires_exec( datasets): if module_index == module_count - 1: # Update the counter before we exit the loop. Otherwise # the last module would be executed. print('No need to execute anything') module_index = module_count break else: m = modules[module_index] datasets = m.provenance.get_database_state(datasets) m.datasets = datasets module_index += 1 if module_index < module_count: # The module that module_index points to has to be executed. # Create a workflow that contains pending copies of all # modules that require execution and run the first of these # modules. command = modules[module_index].command external_form = command.to_external_form( command=self.packages[command.package_id].get( command.command_id), datasets=datasets) # Replace original modules with pending modules for those # that need to be executed. The state of the first module # depends on the state of the backend. All following modules # will be in pending state. pending_modules = [ ModuleHandle( command=command, state=self.backend.next_task_state(), external_form=external_form, provenance=modules[module_index].provenance) ] for i in range(module_index + 1, module_count): m = modules[i] pending_modules.append( ModuleHandle(command=m.command, external_form=m.external_form, datasets=m.datasets, outputs=m.outputs, provenance=m.provenance)) workflow = branch.append_workflow( modules=modules[:module_index], action=wf.ACTION_DELETE, command=deleted_module.command, pending_modules=pending_modules) self.execute_module(project_id=project_id, branch_id=branch_id, module=workflow.modules[module_index], datasets=datasets) return workflow.modules[first_remaining_module:] else: # None of the module required execution and the workflow is # complete branch.append_workflow(modules=modules, action=wf.ACTION_DELETE, command=deleted_module.command) else: branch.append_workflow(modules=modules, action=wf.ACTION_DELETE, command=deleted_module.command) return list()
def append_workflow_module(self, project_id, branch_id, command): """Append module to the workflow at the head of the given viztrail branch. The modified workflow will be executed. The result is the new head of the branch. Returns the handle for the new module in the modified workflow. The result is None if the specified project or branch do not exist. Parameters ---------- project_id: string Unique project identifier branch_id : string Unique branch identifier command : vizier.viztrail.command.ModuleCommand Specification of the command that is to be executed by the appended workflow module Returns ------- vizier.viztrail.module.base.ModuleHandle """ with self.backend.lock: # Get the handle for the specified branch branch = self.projects.get_branch(project_id=project_id, branch_id=branch_id) if branch is None: return None # Get the current database state from the last module in the current # branch head. At the same time we retrieve the list of modules for # the current head of the branch. head = branch.get_head() if not head is None and len(head.modules) > 0: datasets = head.modules[-1].datasets modules = head.modules is_active = head.is_active is_error = head.modules[-1].is_error or head.modules[ -1].is_canceled else: datasets = dict() modules = list() is_active = False is_error = False # Get the external representation for the command external_form = command.to_external_form( command=self.packages[command.package_id].get( command.command_id), datasets=datasets) # If the workflow is not active and the command can be executed # synchronously we run the command immediately and return the # completed workflow. Otherwise, a pending workflow is created. if not is_active and self.backend.can_execute(command): ts_start = get_current_time() result = self.backend.execute(task=TaskHandle( task_id=get_unique_identifier(), project_id=project_id, controller=self), command=command, context=task_context(datasets)) ts = ModuleTimestamp(created_at=ts_start, started_at=ts_start, finished_at=get_current_time()) # Depending on the execution outcome create a handle for the # executed module if result.is_success: module = ModuleHandle( state=mstate.MODULE_SUCCESS, command=command, external_form=external_form, timestamp=ts, datasets=result.provenance.get_database_state( modules[-1].datasets if len(modules) > 0 else dict( )), outputs=result.outputs, provenance=result.provenance) else: module = ModuleHandle(state=mstate.MODULE_ERROR, command=command, external_form=external_form, timestamp=ts, outputs=result.outputs) workflow = branch.append_workflow(modules=modules, action=wf.ACTION_APPEND, command=command, pending_modules=[module]) else: # Create new workflow by appending one module to the current # head of the branch. The module state is pending if the # workflow is active otherwise it depends on the associated # backend. if is_active: state = mstate.MODULE_PENDING elif is_error: state = mstate.MODULE_CANCELED else: state = self.backend.next_task_state() workflow = branch.append_workflow( modules=modules, action=wf.ACTION_APPEND, command=command, pending_modules=[ ModuleHandle(state=state, command=command, external_form=external_form) ]) if not is_active and not state == mstate.MODULE_CANCELED: self.execute_module(project_id=project_id, branch_id=branch_id, module=workflow.modules[-1], datasets=datasets) return workflow.modules[-1]