Esempio n. 1
0
    def ev_close(self, worker):
        """
        Check process termination status and set action status.
        """
        Action.ev_close(self, worker)

        self.server.lustre_check()

        # Action timed out
        if worker.did_timeout():
            self.server.action_event(self, 'timeout')
            self.set_status(ACT_ERROR)

        # Action succeeded
        elif worker.retcode() == 0:
            result = Result(duration=self.duration, retcode=worker.retcode())
            self.server.action_event(self, 'done', result)
            self.set_status(ACT_OK)

        # Action failed
        else:
            result = ErrorResult(worker.read(), self.duration,
                                 worker.retcode())
            self.server.action_event(self, 'failed', result)
            self.set_status(ACT_ERROR)
Esempio n. 2
0
    def ev_close(self, worker):
        """
        Check process termination status and generate appropriate events.

        Note that if fsck has correctly fixed some errors, actions will be
        considered as successful.
        """

        if worker.did_timeout():
            return FSAction.ev_close(self, worker)

        # We want to skip FSAction.ev_close(), just call the upper layer.
        Action.ev_close(self, worker)

        self.comp.lustre_check()

        # fsck returns 0=NOERROR, 1=OK_BUT_CORRECTION, 2=OK_BUT_REBOOT.
        # see man fsck.
        if worker.retcode() in (0, 1, 2, 4):
            # action succeeded
            result = Result(duration=self.duration, retcode=worker.retcode())
            if worker.retcode() in (1, 2):
                result.message = "Errors corrected"
            if worker.retcode() == 4: # -n
                result.message = "Errors found but NOT corrected"
            self.comp.action_event(self, 'done', result)
            self.set_status(ACT_OK)
        else:
            # action failed
            msg = "\n".join(self._output)
            result = ErrorResult(msg, self.duration, worker.retcode())
            self.comp.action_event(self, 'failed', result)
            self.set_status(ACT_ERROR)
Esempio n. 3
0
File: Fsck.py Progetto: thiell/shine
    def ev_close(self, worker):
        """
        Check process termination status and generate appropriate events.

        Note that if fsck has correctly fixed some errors, actions will be
        considered as successful.
        """

        if worker.did_timeout():
            return FSAction.ev_close(self, worker)

        # We want to skip FSAction.ev_close(), just call the upper layer.
        Action.ev_close(self, worker)

        self.comp.lustre_check()

        # fsck returns 0=NOERROR, 1=OK_BUT_CORRECTION, 2=OK_BUT_REBOOT.
        # see man fsck.
        if worker.retcode() in (0, 1, 2, 4):
            # action succeeded
            result = Result(duration=self.duration, retcode=worker.retcode())
            if worker.retcode() in (1, 2):
                result.message = "Errors corrected"
            if worker.retcode() == 4:  # -n
                result.message = "Errors found but NOT corrected"
            self.comp.action_event(self, 'done', result)
            self.set_status(ACT_OK)
        else:
            # action failed
            msg = "\n".join(self._output)
            result = ErrorResult(msg, self.duration, worker.retcode())
            self.comp.action_event(self, 'failed', result)
            self.set_status(ACT_ERROR)
Esempio n. 4
0
    def ev_close(self, worker):
        """
        Check process termination status and generate appropriate events.
        """
        Action.ev_close(self, worker)

        # Action timed out
        if worker.did_timeout():
            nodes = NodeSet.fromlist(worker.iter_keys_timeout())
            self.fs._handle_shine_proxy_error(nodes, "Nodes timed out")
            self.set_status(ACT_ERROR)

        # Action succeeded
        elif max(rc for rc, _ in worker.iter_retcodes()) == 0:
            self.set_status(ACT_OK)

        # Action failed
        else:
            for rc, nodes in worker.iter_retcodes():
                if rc == 0:
                    continue

                # Avoid warnings, flag this component in error state
                for comp in self._comps or []:
                    comp.sanitize_state(nodes=worker.nodes)

                for output, nodes in worker.iter_buffers(match_keys=nodes):
                    nodes = NodeSet.fromlist(nodes)
                    msg = "Copy failed: %s" % output
                    self.fs._handle_shine_proxy_error(nodes, msg)
            self.set_status(ACT_ERROR)
Esempio n. 5
0
    def ev_close(self, worker):
        """
        Check process termination status and generate appropriate events.
        """
        Action.ev_close(self, worker)

        # Action timed out
        if worker.did_timeout():
            nodes = NodeSet.fromlist(worker.iter_keys_timeout())
            self.fs._handle_shine_proxy_error(nodes, "Nodes timed out")
            self.set_status(ACT_ERROR)

        # Action succeeded
        elif max(rc for rc, _ in worker.iter_retcodes()) == 0:
            self.set_status(ACT_OK)

        # Action failed
        else:
            for rc, nodes in worker.iter_retcodes():
                if rc == 0:
                    continue

                # Avoid warnings, flag this component in error state
                for comp in self._comps or []:
                    comp.sanitize_state(nodes=worker.nodes)

                for output, nodes in worker.iter_buffers(match_keys=nodes):
                    nodes = NodeSet.fromlist(nodes)
                    msg = "Copy failed: %s" % output
                    self.fs._handle_shine_proxy_error(nodes, msg)
            self.set_status(ACT_ERROR)
Esempio n. 6
0
    def ev_close(self, worker):
        """End of proxy command."""
        Action.ev_close(self, worker)

        # Before all, we must check if shine command ran without bugs, node
        # crash, etc...
        # So we need to verify all node retcodes and change the component state
        # on the bad nodes.

        # Action timed out
        if worker.did_timeout():
            self.set_status(ACT_ERROR)
            return

        status = ACT_OK

        # Remove the 'proxy' running action for each component.
        if self._comps:
            for comp in self._comps:
                # This special event helps to keep track of undergoing actions
                # (see ev_start())
                comp.action_event(self, 'done')
                comp.sanitize_state(nodes=worker.nodes)

        # Gather nodes by return code
        for rc, nodes in worker.iter_retcodes():
            # Remote command returns only RUNTIME_ERROR (See RemoteCommand)
            # some common remote errors:
            # rc 127 = command not found
            # rc 126 = found but not executable
            # rc 1 = python failure...
            if rc != 0:

                # If there is at least one error, the action is on error.
                status = ACT_ERROR

                # Gather these nodes by buffer
                key = nodes.__contains__
                for buffers, nodes in self._outputs.walk(match=key):
                    # Handle proxy command error
                    nodes = NodeSet.fromlist(nodes)
                    msg = "Remote action %s failed: %s\n" % \
                                                        (self.action, buffers)
                    self.fs._handle_shine_proxy_error(nodes, msg)

        # Raise errors for each unpickling error,
        # which could happen mostly when Shine exits with 0.
        for buffers, nodes in self._errpickle.walk():
            nodes = NodeSet.fromlist(nodes)
            self.fs._handle_shine_proxy_error(nodes, str(buffers))

        # Raise an error for nodes without output
        if len(self._silentnodes) > 0:
            msg = "Remote action %s failed: No response" % self.action
            self.fs._handle_shine_proxy_error(self._silentnodes, msg)

        self.set_status(status)
Esempio n. 7
0
    def ev_close(self, worker):
        """
        Check process termination status and generate appropriate events.
        """
        Action.ev_close(self, worker)

        # Action timed out
        if worker.did_timeout():
            self.set_status(ACT_ERROR)

        # Action succeeded
        elif max(rc for rc, _ in worker.iter_retcodes()) == 0:
            self.set_status(ACT_OK)

        # Action failed
        else:
            self.set_status(ACT_ERROR)
Esempio n. 8
0
    def ev_close(self, worker):
        """
        Check process termination status and generate appropriate events.
        """
        Action.ev_close(self, worker)

        # Action timed out
        if worker.did_timeout():
            self.set_status(ACT_ERROR)

        # Action succeeded
        elif max(rc for rc, _ in worker.iter_retcodes()) == 0:
            self.set_status(ACT_OK)

        # Action failed
        else:
            self.set_status(ACT_ERROR)
Esempio n. 9
0
    def ev_close(self, worker):
        """
        Check process termination status and set action status.
        """
        Action.ev_close(self, worker)

        self.server.lustre_check()

        # Action timed out
        if worker.did_timeout():
            self.server.action_event(self, 'timeout')
            self.set_status(ACT_ERROR)

        # Action succeeded
        elif worker.retcode() == 0:
            result = Result(duration=self.duration, retcode=worker.retcode())
            self.server.action_event(self, 'done', result)
            self.set_status(ACT_OK)

        # Action failed
        else:
            result = ErrorResult(worker.read(), self.duration, worker.retcode())
            self.server.action_event(self, 'failed', result)
            self.set_status(ACT_ERROR)
Esempio n. 10
0
    def ev_close(self, worker):
        """End of proxy command."""
        Action.ev_close(self, worker)

        # Before all, we must check if shine command ran without bugs, node
        # crash, etc...
        # So we need to verify all node retcodes and change the component state
        # on the bad nodes.

        # Action timed out
        if worker.did_timeout():
            self.set_status(ACT_ERROR)
            return

        status = ACT_OK

        # Remove the 'proxy' running action for each component.
        if self._comps:
            for comp in self._comps:
                # XXX: This should be changed using a real event for proxy.
                comp._del_action('proxy')

                if comp.state is None:
                    comp.state = RUNTIME_ERROR

                # At this step, there should be no more INPROGRESS component.
                # If yes, this is a bug, change state to RUNTIME_ERROR.
                # INPROGRESS management could be change using running action
                # list.
                # Starting with v1.3, there is no more code setting INPROGRESS.
                # This is for compatibility with older clients.
                elif comp.state == INPROGRESS:
                    actions = ""
                    if len(comp._list_action()):
                        actions = "actions: " + ", ".join(comp._list_action())
                    print >> sys.stderr, "ERROR: bad state for %s: %d %s" % \
                                    (comp.label, comp.state, actions)
                    comp.state = RUNTIME_ERROR

        # Gather nodes by return code
        for rc, nodes in worker.iter_retcodes():
            # Remote command returns only RUNTIME_ERROR (See RemoteCommand)
            # some common remote errors:
            # rc 127 = command not found
            # rc 126 = found but not executable
            # rc 1 = python failure...
            if rc != 0:

                # If there is at least one error, the action is on error.
                status = ACT_ERROR

                # Gather these nodes by buffer
                key = nodes.__contains__
                for buffers, nodes in self._outputs.walk(match=key):
                    # Handle proxy command error
                    nodes = NodeSet.fromlist(nodes)
                    msg = "Remote action %s failed: %s\n" % \
                                                        (self.action, buffers)
                    self.fs._handle_shine_proxy_error(nodes, msg)

        # Raise errors for each unpickling error,
        # which could happen mostly when Shine exits with 0.
        for buffers, nodes in self._errpickle.walk():
            nodes = NodeSet.fromlist(nodes)
            self.fs._handle_shine_proxy_error(nodes, str(buffers))

        # Raise an error for nodes without output
        if len(self._silentnodes) > 0:
            msg = "Remote action %s failed: No response" % self.action
            self.fs._handle_shine_proxy_error(self._silentnodes, msg)

        self.set_status(status)