Esempio n. 1
0
 def test_get_state(self):
     """Test get_state method."""
     modules = list()
     modules.append(
         ModuleHandle(identifier='MOD0',
                      command=python_cell(source='print 2+2'),
                      external_form='TEST MODULE',
                      state=state.MODULE_SUCCESS))
     modules.append(
         ModuleHandle(identifier='MOD1',
                      command=python_cell(source='print 2+2'),
                      external_form='TEST MODULE',
                      state=state.MODULE_SUCCESS))
     wf = WorkflowHandle(identifier='0',
                         branch_id='0',
                         modules=modules,
                         descriptor=WorkflowDescriptor(
                             identifier='0', action=ACTION_CREATE))
     self.assertTrue(wf.get_state().is_success)
     modules.append(
         ModuleHandle(identifier='MOD1',
                      command=python_cell(source='print 2+2'),
                      external_form='TEST MODULE',
                      state=state.MODULE_CANCELED))
     modules.append(
         ModuleHandle(identifier='MOD1',
                      command=python_cell(source='print 2+2'),
                      external_form='TEST MODULE',
                      state=state.MODULE_SUCCESS))
     wf = WorkflowHandle(identifier='0',
                         branch_id='0',
                         modules=modules,
                         descriptor=WorkflowDescriptor(
                             identifier='0', action=ACTION_CREATE))
     self.assertTrue(wf.get_state().is_canceled)
 def test_pending_append(self):
     """Test appending a workflow with pending modules to a branch."""
     base_path = os.path.join(os.path.abspath(REPO_DIR), 'ABC')
     os.makedirs(base_path)
     vt = OSViztrailHandle.create_viztrail(identifier='ABC',
                                           properties={},
                                           base_path=base_path)
     branch = vt.get_default_branch()
     for i in range(10):
         ts = get_current_time()
         command = python_cell(source='print ' + str(i) + '+' + str(i))
         module = OSModuleHandle.create_module(
             command=command,
             external_form='print ' + str(i) + '+' + str(i),
             state=MODULE_SUCCESS,
             outputs=ModuleOutputs(stdout=[TextOutput(str(i + i))]),
             provenance=ModuleProvenance(),
             timestamp=ModuleTimestamp(created_at=ts,
                                       started_at=ts,
                                       finished_at=ts),
             module_folder=vt.modules_folder,
             object_store=vt.object_store)
         if not branch.head is None:
             modules = branch.head.modules + [module]
         else:
             modules = [module]
         branch.append_workflow(modules=modules,
                                action=ACTION_INSERT,
                                command=command)
     head_modules = branch.get_head().modules
     before_ids = [m.identifier for m in head_modules]
     modules = head_modules[:5]
     pending_modules = [
         ModuleHandle(command=m.command, external_form=m.external_form)
         for m in head_modules[5:]
     ]
     wf = branch.append_workflow(modules=head_modules[:5],
                                 pending_modules=pending_modules,
                                 action=ACTION_DELETE,
                                 command=head_modules[-1].command)
     for m in wf.modules[:5]:
         self.assertTrue(m.identifier in before_ids)
     for m in wf.modules[5:]:
         self.assertFalse(m.identifier in before_ids)
     self.assertEqual(len(wf.modules), 10)
     self.assertEqual(wf.descriptor.identifier, '0000000A')
     self.assertEqual(wf.descriptor.action, ACTION_DELETE)
     self.assertEqual(wf.descriptor.package_id, PACKAGE_PYTHON)
     self.assertEqual(wf.descriptor.command_id, PYTHON_CODE)
     vt = OSViztrailHandle.load_viztrail(base_path)
     branch = vt.get_default_branch()
     history = branch.get_history()
     self.assertEqual(len(history), 11)
     wf = branch.get_head()
     for m in wf.modules[:5]:
         self.assertTrue(m.identifier in before_ids)
     for m in wf.modules[5:]:
         self.assertFalse(m.identifier in before_ids)
     self.assertEqual(len(wf.modules), 10)
     self.assertEqual(wf.descriptor.identifier, '0000000A')
     self.assertEqual(wf.descriptor.action, ACTION_DELETE)
     self.assertEqual(wf.descriptor.package_id, PACKAGE_PYTHON)
     self.assertEqual(wf.descriptor.command_id, PYTHON_CODE)
Esempio n. 3
0
    def replace_workflow_module(self, project_id, branch_id, module_id,
                                command):
        """Replace an existing module in the workflow at the head of the
        specified viztrail branch. The modified workflow is executed and the
        result is the new head of the branch.

        Returns the list of affected modules in the modified workflow. The
        result is None if the specified project or branch do not exist. Raises
        ValueError if the current head of the branch is active.

        Parameters
        ----------
        project_id: string
            Unique project identifier
        branch_id : string, optional
            Unique branch identifier
        module_id : string
            Identifier of the module that is being replaced
        command : vizier.viztrail.command.ModuleCommand
            Specification of the command that is to be evaluated

        Returns
        -------
        list(vizier.viztrail.module.base.ModuleHandle)
        """
        with self.backend.lock:
            # Get the handle for the specified branch and the branch head
            branch = self.projects.get_branch(project_id=project_id,
                                              branch_id=branch_id)
            if branch is None:
                return None
            head = branch.get_head()
            if head is None or len(head.modules) == 0:
                return None
            # Raise ValueError if the head workflow is active
            if head.is_active:
                raise ValueError('cannot replace in active workflow')
            # Get the index of the module that is being replaced
            module_index = None
            modules = head.modules
            for i in range(len(modules)):
                if modules[i].identifier == module_id:
                    module_index = i
                    break
            if module_index is None:
                return None
            # Get handle for the replaced module. Keep any resource information
            # from the provenance object of the previous module execution. The
            # state of the module depends on the state of the backend.
            if module_index > 0:
                datasets = modules[module_index - 1].datasets
            else:
                datasets = dict()
            replaced_module = ModuleHandle(
                command=command,
                state=self.backend.next_task_state(),
                external_form=command.to_external_form(
                    command=self.packages[command.package_id].get(
                        command.command_id),
                    datasets=datasets),
                provenance=ModuleProvenance(
                    resources=modules[module_index].provenance.resources))
            # Create list of pending modules for the new workflow
            pending_modules = [replaced_module]
            for m in modules[module_index + 1:]:
                pending_modules.append(
                    ModuleHandle(command=m.command,
                                 external_form=m.external_form,
                                 datasets=m.datasets,
                                 outputs=m.outputs,
                                 provenance=m.provenance))
            workflow = branch.append_workflow(modules=modules[:module_index],
                                              action=wf.ACTION_REPLACE,
                                              command=replaced_module.command,
                                              pending_modules=pending_modules)
            self.execute_module(project_id=project_id,
                                branch_id=branch_id,
                                module=workflow.modules[module_index],
                                datasets=datasets)
            return workflow.modules[module_index:]
Esempio n. 4
0
    def insert_workflow_module(self, project_id, branch_id, before_module_id,
                               command):
        """Insert a new module to the workflow at the head of the given viztrail
        branch. The modified workflow will be executed. The result is the new
        head of the branch.

        The module is inserted into the sequence of workflow modules before the
        module with the identifier that is given as the before_module_id
        argument.

        Returns the list of affected modules in the modified workflow. The
        result is None if the specified project, branch or module do not exist.
        Raises ValueError if the current head of the branch is active.

        Parameters
        ----------
        project_id: string
            Unique project identifier
        branch_id : string
            Unique branch identifier
        before_module_id : string
            Insert new module before module with given identifier.
        command : vizier.viztrail.command.ModuleCommand
            Specification of the command that is to be executed by the inserted
            workflow module

        Returns
        -------
        list(vizier.viztrail.module.base.ModuleHandle)
        """
        with self.backend.lock:
            # Get the handle for the specified branch and the branch head
            branch = self.projects.get_branch(project_id=project_id,
                                              branch_id=branch_id)
            if branch is None:
                return None
            head = branch.get_head()
            if head is None or len(head.modules) == 0:
                return None

            # Get the index of the module at which the new module is inserted
            module_index = None
            modules = head.modules
            for i in range(len(modules)):
                if modules[i].identifier == before_module_id:
                    module_index = i
                    break
            if module_index is None:
                return None

            # Get handle for the inserted module
            if module_index > 0:
                datasets = modules[module_index - 1].datasets
            else:
                datasets = dict()
            # Create handle for the inserted module. The state of the module
            # depends on the state of the backend.
            if head.is_active:
                state = mstate.MODULE_PENDING
            else:
                state = self.backend.next_task_state()
            inserted_module = ModuleHandle(
                command=command,
                state=state,
                external_form=command.to_external_form(
                    command=self.packages[command.package_id].get(
                        command.command_id),
                    datasets=datasets))
            # Create list of pending modules for the new workflow.
            pending_modules = [inserted_module]
            for m in modules[module_index:]:
                pending_modules.append(
                    ModuleHandle(command=m.command,
                                 external_form=m.external_form,
                                 datasets=m.datasets,
                                 outputs=m.outputs,
                                 provenance=m.provenance))
            workflow = branch.append_workflow(modules=modules[:module_index],
                                              action=wf.ACTION_INSERT,
                                              command=inserted_module.command,
                                              pending_modules=pending_modules)
            if not head.is_active:
                self.execute_module(project_id=project_id,
                                    branch_id=branch_id,
                                    module=workflow.modules[module_index],
                                    datasets=datasets)
            return workflow.modules[module_index:]
Esempio n. 5
0
    def delete_workflow_module(self, project_id, branch_id, module_id):
        """Delete the module with the given identifier from the workflow at the
        head of the viztrail branch. The resulting workflow is executed and will
        be the new head of the branch.

        Returns the list of remaining modules in the modified workflow that are
        affected by the deletion. The result is None if the specified project
        or branch do not exist. Raises ValueError if the current head of the
        branch is active.

        Parameters
        ----------
        project_id: string
            Unique project identifier
        branch_id: string
            Unique branch identifier
        module_id : string
            Unique module identifier

        Returns
        -------
        list(vizier.viztrail.module.base.ModuleHandle)
        """
        with self.backend.lock:
            # Get the handle for the specified branch and the branch head
            branch = self.projects.get_branch(project_id=project_id,
                                              branch_id=branch_id)
            if branch is None:
                return None
            head = branch.get_head()
            if head is None or len(head.modules) == 0:
                return None
            # Raise ValueError if the head workflow is active
            if head.is_active:
                raise ValueError('cannot delete from active workflow')
            # Get the index of the module that is being deleted
            module_index = None
            for i in range(len(head.modules)):
                if head.modules[i].identifier == module_id:
                    module_index = i
                    break
            if module_index is None:
                return None
            deleted_module = head.modules[module_index]
            # Create module list for new workflow
            modules = head.modules[:module_index] + head.modules[module_index +
                                                                 1:]
            # Re-execute modules unless the last module was deleted or the
            # module list is empty
            module_count = len(modules)
            if module_count > 0 and module_index < module_count:
                # Get the context for the first module that requires
                #  re-execution
                if module_index > 0:
                    datasets = modules[module_index - 1].datasets
                else:
                    datasets = dict()
                # Keep track of the first remaining module that was affected
                # by the delete
                first_remaining_module = module_index
                while not modules[module_index].provenance.requires_exec(
                        datasets):
                    if module_index == module_count - 1:
                        # Update the counter before we exit the loop. Otherwise
                        # the last module would be executed.
                        print('No need to execute anything')
                        module_index = module_count
                        break
                    else:
                        m = modules[module_index]
                        datasets = m.provenance.get_database_state(datasets)
                        m.datasets = datasets
                        module_index += 1
                if module_index < module_count:
                    # The module that module_index points to has to be executed.
                    # Create a workflow that contains pending copies of all
                    # modules that require execution and run the first of these
                    # modules.
                    command = modules[module_index].command
                    external_form = command.to_external_form(
                        command=self.packages[command.package_id].get(
                            command.command_id),
                        datasets=datasets)
                    # Replace original modules with pending modules for those
                    # that need to be executed. The state of the first module
                    # depends on the state of the backend. All following modules
                    # will be in pending state.
                    pending_modules = [
                        ModuleHandle(
                            command=command,
                            state=self.backend.next_task_state(),
                            external_form=external_form,
                            provenance=modules[module_index].provenance)
                    ]
                    for i in range(module_index + 1, module_count):
                        m = modules[i]
                        pending_modules.append(
                            ModuleHandle(command=m.command,
                                         external_form=m.external_form,
                                         datasets=m.datasets,
                                         outputs=m.outputs,
                                         provenance=m.provenance))
                    workflow = branch.append_workflow(
                        modules=modules[:module_index],
                        action=wf.ACTION_DELETE,
                        command=deleted_module.command,
                        pending_modules=pending_modules)
                    self.execute_module(project_id=project_id,
                                        branch_id=branch_id,
                                        module=workflow.modules[module_index],
                                        datasets=datasets)
                    return workflow.modules[first_remaining_module:]
                else:
                    # None of the module required execution and the workflow is
                    # complete
                    branch.append_workflow(modules=modules,
                                           action=wf.ACTION_DELETE,
                                           command=deleted_module.command)
            else:
                branch.append_workflow(modules=modules,
                                       action=wf.ACTION_DELETE,
                                       command=deleted_module.command)
            return list()
Esempio n. 6
0
    def append_workflow_module(self, project_id, branch_id, command):
        """Append module to the workflow at the head of the given viztrail
        branch. The modified workflow will be executed. The result is the new
        head of the branch.

        Returns the handle for the new module in the modified workflow. The
        result is None if the specified project or branch do not exist.

        Parameters
        ----------
        project_id: string
            Unique project identifier
        branch_id : string
            Unique branch identifier
        command : vizier.viztrail.command.ModuleCommand
            Specification of the command that is to be executed by the appended
            workflow module

        Returns
        -------
        vizier.viztrail.module.base.ModuleHandle
        """
        with self.backend.lock:
            # Get the handle for the specified branch
            branch = self.projects.get_branch(project_id=project_id,
                                              branch_id=branch_id)
            if branch is None:
                return None
            # Get the current database state from the last module in the current
            # branch head. At the same time we retrieve the list of modules for
            # the current head of the branch.
            head = branch.get_head()
            if not head is None and len(head.modules) > 0:
                datasets = head.modules[-1].datasets
                modules = head.modules
                is_active = head.is_active
                is_error = head.modules[-1].is_error or head.modules[
                    -1].is_canceled
            else:
                datasets = dict()
                modules = list()
                is_active = False
                is_error = False
            # Get the external representation for the command
            external_form = command.to_external_form(
                command=self.packages[command.package_id].get(
                    command.command_id),
                datasets=datasets)
            # If the workflow is not active and the command can be executed
            # synchronously we run the command immediately and return the
            # completed workflow. Otherwise, a pending workflow is created.
            if not is_active and self.backend.can_execute(command):
                ts_start = get_current_time()
                result = self.backend.execute(task=TaskHandle(
                    task_id=get_unique_identifier(),
                    project_id=project_id,
                    controller=self),
                                              command=command,
                                              context=task_context(datasets))
                ts = ModuleTimestamp(created_at=ts_start,
                                     started_at=ts_start,
                                     finished_at=get_current_time())
                # Depending on the execution outcome create a handle for the
                # executed module
                if result.is_success:
                    module = ModuleHandle(
                        state=mstate.MODULE_SUCCESS,
                        command=command,
                        external_form=external_form,
                        timestamp=ts,
                        datasets=result.provenance.get_database_state(
                            modules[-1].datasets if len(modules) > 0 else dict(
                            )),
                        outputs=result.outputs,
                        provenance=result.provenance)
                else:
                    module = ModuleHandle(state=mstate.MODULE_ERROR,
                                          command=command,
                                          external_form=external_form,
                                          timestamp=ts,
                                          outputs=result.outputs)
                workflow = branch.append_workflow(modules=modules,
                                                  action=wf.ACTION_APPEND,
                                                  command=command,
                                                  pending_modules=[module])
            else:
                # Create new workflow by appending one module to the current
                # head of the branch. The module state is pending if the
                # workflow is active otherwise it depends on the associated
                # backend.
                if is_active:
                    state = mstate.MODULE_PENDING
                elif is_error:
                    state = mstate.MODULE_CANCELED
                else:
                    state = self.backend.next_task_state()
                workflow = branch.append_workflow(
                    modules=modules,
                    action=wf.ACTION_APPEND,
                    command=command,
                    pending_modules=[
                        ModuleHandle(state=state,
                                     command=command,
                                     external_form=external_form)
                    ])
                if not is_active and not state == mstate.MODULE_CANCELED:
                    self.execute_module(project_id=project_id,
                                        branch_id=branch_id,
                                        module=workflow.modules[-1],
                                        datasets=datasets)
        return workflow.modules[-1]