def vm_poll(self, vm):
        """
        vm_poll -- Polls a running VM, updates its status, and returns its state

        Parameters:
            vm -- vm to poll

        Note: If VM does not appear to be running any longer, it will be destroyed.
        """

        # Create an epr for our poll command
        vm_epr = nimbus_xml.ws_epr_factory(vm.id, vm.clusteraddr)

        # Create workspace poll command
        ws_cmd = self.vmpoll_factory(vm_epr)
        log.verbose("(vm_poll) - Running Nimbus poll command:\n%s" % string.join(ws_cmd, " "))

        # Execute the workspace poll (wait, retrieve return code, stdout, and stderr)
        (poll_return, poll_out, poll_err) = self.vm_execwait(ws_cmd, env=vm.get_env())
        poll_out = poll_out + poll_err
        with self.vms_lock:

            # Print output, and parse the VM status from it
            vm.hostname = self._extract_hostname(poll_out)
            new_status = self._extract_state(poll_out)
            if new_status == "Destroyed":
                log.info("Discarding VM %s because Nimbus has destroyed it" % vm.id)
                self.vm_destroy(vm, shutdown_first=False)
                vm.status = new_status

            elif vm.status != new_status:
                vm.last_state_change = int(time.time())
                vm.status = new_status

            # If there was some other error we're not aware of (temporary network problem, etc...)
            elif (poll_return != 0):
                log.warning("(vm_poll) - Failed polling VM %s (ID: %s): %s %s" % (vm.name, vm.id, poll_out, poll_err))
                log.debug("(vm_poll) - Setting VM status to \'Error\'")
                vm.status = "Error"

        # Tidy up and return
        os.remove(vm_epr)
        vm.lastpoll = int(time.time())
        return vm.status
Beispiel #2
0
    def vm_poll(self, vm):
        """
        vm_poll -- Polls a running VM, updates its status, and returns its state

        Parameters:
            vm -- vm to poll

        Note: If VM does not appear to be running any longer, it will be destroyed.
        """
        # Retire not actually bad, just don't want that state overwritten
        bad_status = ("Destroyed", "NoProxy", "ExpiredProxy")
        special_status = ("Retiring", "TempBanned", "HeldBadReqs",
                          "HTTPFail, BrokenPipe")
        # Create an epr for our poll command
        vm_epr = nimbus_xml.ws_epr_factory(vm.id, vm.clusteraddr,
                                           vm.clusterport)

        # Create workspace poll command
        ws_cmd = self.vmpoll_factory(vm_epr)
        log.verbose("Polling Nimbus with:\n%s" % string.join(ws_cmd, " "))

        # Execute the workspace poll (wait, retrieve return code, stdout, and stderr)
        (poll_return, poll_out, poll_err) = self.vm_execwait(ws_cmd,
                                                             env=vm.get_env())
        poll_out = poll_out + poll_err

        with self.vms_lock:

            # Print output, and parse the VM status from it
            #vm.hostname = self._extract_hostname(poll_out)
            new_status = self._extract_state(poll_out)
            if new_status == "Destroyed":
                self.vm_destroy(vm,
                                shutdown_first=False,
                                reason="Nimbus has already destroyed VM")
                vm.status = new_status

            elif new_status == "NoProxy":
                vm.override_status = new_status
                log.error(
                    "Problem polling VM %s. You don't have a valid proxy." %
                    vm.id)

            elif new_status == "ExpiredProxy":
                vm.override_status = new_status
                log.error(
                    "Problem polling VM %s. Your proxy expired. Proxy File: %s"
                    % (vm.id, vm.proxy_file))

            elif new_status == "ConnectionRefused":
                vm.override_status = new_status
                log.error("Unable to connect to nimbus service on %s" %
                          vm.clusteraddr)

            elif new_status == "BrokenPipe":
                vm.override_status = new_status
                log.error(
                    "Broken Pipe error on %s. Check max_clients in libvirtd.conf on nodes."
                    % vm.clusteraddr)

            elif vm.status != new_status:
                vm.last_state_change = int(time.time())
                log.debug("VM: %s on %s. Changed from %s to %s." %
                          (vm.id, self.name, vm.status, new_status))
                vm.status = new_status

            elif vm.override_status != None and new_status not in bad_status and vm.override_status not in special_status:
                vm.override_status = None
                vm.errorconnect = None

            # If there was some other error we're not aware of (temporary network problem, etc...)
            elif (poll_return != 0):
                if poll_out == "" or poll_out == None:
                    poll_out = "No Output returned."
                if poll_err == "" or poll_err == None:
                    poll_err = "No Error output returned."
                log.warning("There was a problem polling VM %s: %s %s %s" %
                            (vm.id, poll_out, poll_err, poll_return))

        # Tidy up and return
        os.remove(vm_epr)
        vm.lastpoll = int(time.time())
        return vm.status
Beispiel #3
0
    def vm_destroy(self,
                   vm,
                   return_resources=True,
                   reason="",
                   shutdown_first=True):
        """
        Shutdown, destroy and return resources of a VM to it's cluster

        Parameters:
        vm -- vm to shutdown and destroy
        return_resources -- if set to false, do not return resources from VM to cluster
        shutdown_first -- if set to false, will first call a shutdown before destroying
        """

        # Create an epr for workspace.sh
        vm_epr = nimbus_xml.ws_epr_factory(vm.id, vm.clusteraddr,
                                           vm.clusterport)
        if vm.clusteraddr != self.network_address:
            log.error(
                "Attempting to destroy a VM on wrong cluster - vm belongs to %s, but this is %s. Abort"
                % (vm.clusteraddr, self.networ_address))
            return -1

        if shutdown_first:
            # Create the workspace command with shutdown option
            shutdown_cmd = self.vmshutdown_factory(vm_epr)
            log.verbose("Shutting down VM with command: " +
                        string.join(shutdown_cmd, " "))

            # Execute the workspace shutdown command.
            shutdown_return = self.vm_exec_silent(shutdown_cmd,
                                                  env=vm.get_env())
            if (shutdown_return != 0):
                log.debug(
                    "(vm_destroy) - VM shutdown request failed, moving directly to destroy."
                )
            else:
                log.verbose(
                    "(vm_destroy) - workspace shutdown command executed successfully."
                )
                # Sleep for a few seconds to allow for proper shutdown
                log.verbose("Waiting %ss for VM to shut down..." %
                            self.VM_SHUTDOWN)
                time.sleep(self.VM_SHUTDOWN)

        # Create the workspace command with destroy option as a list (priv.)
        destroy_cmd = self.vmdestroy_factory(vm_epr)
        log.verbose("Destroying VM with command: " +
                    string.join(destroy_cmd, " "))

        # Execute the workspace destroy command: wait for return, stdout to log.
        (destroy_return, destroy_out,
         destroy_error) = self.vm_execwait(destroy_cmd, env=vm.get_env())
        destroy_out = destroy_out + destroy_error

        # Check destroy return code. If successful, continue. Otherwise, set VM to
        # error state (wait, and the polling thread will attempt a destroy later)
        if (destroy_return != 0):

            if "Destroyed" == self._extract_state(destroy_error):
                log.debug("VM %s seems to have already been destroyed." %
                          vm.id)
            else:
                if destroy_out == "" or destroy_out == None:
                    destroy_out = "No Output returned."
                if destroy_error == "" or destroy_error == None:
                    destroy_error = "No Error output returned."
                log.warning(
                    "VM %s was not correctly destroyed: %s %s %s" %
                    (vm.id, destroy_out, destroy_error, destroy_return))
                vm.status = "Error"
                os.remove(vm_epr)
                return destroy_return

        # Delete VM proxy
        if (vm.get_proxy_file()):
            log.verbose("Cleaning up proxy for VM %s (%s)" %
                        (vm.id, vm.get_proxy_file()))
            try:
                os.remove(vm.get_proxy_file())
            except:
                log.exception("Problem removing VM proxy file")

        # Return checked out resources And remove VM from the Cluster's 'vms' list
        with self.vms_lock:
            try:
                self.vms.remove(vm)
            except ValueError:
                log.error(
                    "Attempted to remove vm from list that was already removed."
                )
                return_resources = False
        if return_resources:
            self.resource_return(vm)

        # Delete EPR
        os.remove(vm_epr)

        log.info("Destroyed VM: %s Name: %s Reason: %s" %
                 (vm.id, vm.hostname, reason))

        return destroy_return
Beispiel #4
0
    def vm_poll(self, vm):
        """
        vm_poll -- Polls a running VM, updates its status, and returns its state

        Parameters:
            vm -- vm to poll

        Note: If VM does not appear to be running any longer, it will be destroyed.
        """
        # Retire not actually bad, just don't want that state overwritten
        bad_status = ("Destroyed", "NoProxy", "ExpiredProxy")
        special_status = ("Retiring", "TempBanned", "HeldBadReqs", "HTTPFail, BrokenPipe")
        # Create an epr for our poll command
        vm_epr = nimbus_xml.ws_epr_factory(vm.id, vm.clusteraddr, vm.clusterport)

        # Create workspace poll command
        ws_cmd = self.vmpoll_factory(vm_epr)
        log.verbose("Polling Nimbus with:\n%s" % string.join(ws_cmd, " "))

        # Execute the workspace poll (wait, retrieve return code, stdout, and stderr)
        (poll_return, poll_out, poll_err) = self.vm_execwait(ws_cmd, env=vm.get_env())
        poll_out = poll_out + poll_err

        with self.vms_lock:

            # Print output, and parse the VM status from it
            #vm.hostname = self._extract_hostname(poll_out)
            new_status = self._extract_state(poll_out)
            if new_status == "Destroyed":
                self.vm_destroy(vm, shutdown_first=False, reason="Nimbus has already destroyed VM")
                vm.status = new_status

            elif new_status == "NoProxy":
                vm.override_status = new_status
                log.error("Problem polling VM %s. You don't have a valid proxy." % vm.id)

            elif new_status == "ExpiredProxy":
                vm.override_status = new_status
                log.error("Problem polling VM %s. Your proxy expired. Proxy File: %s" % (vm.id, vm.proxy_file))

            elif new_status == "ConnectionRefused":
                vm.override_status = new_status
                log.error("Unable to connect to nimbus service on %s" % vm.clusteraddr)

            elif new_status == "BrokenPipe":
                vm.override_status = new_status
                log.error("Broken Pipe error on %s. Check max_clients in libvirtd.conf on nodes." % vm.clusteraddr)

            elif vm.status != new_status:
                vm.last_state_change = int(time.time())
                log.debug("VM: %s on %s. Changed from %s to %s." % (vm.id, self.name, vm.status, new_status))
                vm.status = new_status

            elif vm.override_status != None and new_status not in bad_status and vm.override_status not in special_status:
                vm.override_status = None
                vm.errorconnect = None

            # If there was some other error we're not aware of (temporary network problem, etc...)
            elif (poll_return != 0):
                if poll_out == "" or poll_out == None:
                    poll_out = "No Output returned."
                if poll_err == "" or poll_err == None:
                    poll_err = "No Error output returned."
                log.warning("There was a problem polling VM %s: %s %s %s" % (vm.id, poll_out, poll_err, poll_return))

        # Tidy up and return
        os.remove(vm_epr)
        vm.lastpoll = int(time.time())
        return vm.status
Beispiel #5
0
    def vm_destroy(self, vm, return_resources=True, reason="", shutdown_first=True):
        """
        Shutdown, destroy and return resources of a VM to it's cluster

        Parameters:
        vm -- vm to shutdown and destroy
        return_resources -- if set to false, do not return resources from VM to cluster
        shutdown_first -- if set to false, will first call a shutdown before destroying
        """

        # Create an epr for workspace.sh
        vm_epr = nimbus_xml.ws_epr_factory(vm.id, vm.clusteraddr, vm.clusterport)
        if vm.clusteraddr != self.network_address:
            log.error("Attempting to destroy a VM on wrong cluster - vm belongs to %s, but this is %s. Abort" % (vm.clusteraddr, self.networ_address))
            return -1

        if shutdown_first:
            # Create the workspace command with shutdown option
            shutdown_cmd = self.vmshutdown_factory(vm_epr)
            log.verbose("Shutting down VM with command: " + string.join(shutdown_cmd, " "))

            # Execute the workspace shutdown command.
            shutdown_return = self.vm_exec_silent(shutdown_cmd, env=vm.get_env())
            if (shutdown_return != 0):
                log.debug("(vm_destroy) - VM shutdown request failed, moving directly to destroy.")
            else:
                log.verbose("(vm_destroy) - workspace shutdown command executed successfully.")
                # Sleep for a few seconds to allow for proper shutdown
                log.verbose("Waiting %ss for VM to shut down..." % self.VM_SHUTDOWN)
                time.sleep(self.VM_SHUTDOWN)


        # Create the workspace command with destroy option as a list (priv.)
        destroy_cmd = self.vmdestroy_factory(vm_epr)
        log.verbose("Destroying VM with command: " + string.join(destroy_cmd, " "))

        # Execute the workspace destroy command: wait for return, stdout to log.
        (destroy_return, destroy_out, destroy_error) = self.vm_execwait(destroy_cmd, env=vm.get_env())
        destroy_out = destroy_out + destroy_error


        # Check destroy return code. If successful, continue. Otherwise, set VM to
        # error state (wait, and the polling thread will attempt a destroy later)
        if (destroy_return != 0):

            if "Destroyed" == self._extract_state(destroy_error):
                log.debug("VM %s seems to have already been destroyed." % vm.id)
            else:
                if destroy_out == "" or destroy_out == None:
                    destroy_out = "No Output returned."
                if destroy_error == "" or destroy_error == None:
                    destroy_error = "No Error output returned."
                log.warning("VM %s was not correctly destroyed: %s %s %s" % (vm.id, destroy_out, destroy_error, destroy_return))
                vm.status = "Error"
                os.remove(vm_epr)
                return destroy_return

        # Delete VM proxy
        if (vm.get_proxy_file()) :
            log.verbose("Cleaning up proxy for VM %s (%s)" % (vm.id, vm.get_proxy_file()))
            try:
                os.remove(vm.get_proxy_file())
            except:
                log.exception("Problem removing VM proxy file")

        # Return checked out resources And remove VM from the Cluster's 'vms' list
        with self.vms_lock:
            try:
                self.vms.remove(vm)
            except ValueError:
                log.error("Attempted to remove vm from list that was already removed.")
                return_resources = False
        if return_resources:
            self.resource_return(vm)

        # Delete EPR
        os.remove(vm_epr)


        log.info("Destroyed VM: %s Name: %s Reason: %s" % (vm.id, vm.hostname, reason))

        return destroy_return
    def vm_destroy(self, vm, return_resources=True, shutdown_first=True):
        """
        Shutdown, destroy and return resources of a VM to it's cluster

        Parameters:
        vm -- vm to shutdown and destroy
        return_resources -- if set to false, do not return resources from VM to cluster
        shutdown_first -- if set to false, will first call a shutdown before destroying
        """

        # Create an epr for workspace.sh
        vm_epr = nimbus_xml.ws_epr_factory(vm.id, vm.clusteraddr)

        if shutdown_first:
            # Create the workspace command with shutdown option
            shutdown_cmd = self.vmshutdown_factory(vm_epr)
            log.verbose("Shutting down VM with command: " + string.join(shutdown_cmd, " "))

            # Execute the workspace shutdown command.
            shutdown_return = self.vm_exec_silent(shutdown_cmd, env=vm.get_env())
            if (shutdown_return != 0):
                log.debug("(vm_destroy) - VM shutdown request failed, moving directly to destroy.")
                # Sleep for a few seconds to allow for proper shutdown
                log.debug("Waiting %ss for VM to shut down..." % self.VM_SHUTDOWN)
                time.sleep(self.VM_SHUTDOWN)
            else:
                log.debug("(vm_destroy) - workspace shutdown command executed successfully.")


        # Create the workspace command with destroy option as a list (priv.)
        destroy_cmd = self.vmdestroy_factory(vm_epr)
        log.verbose("Destroying VM with command: " + string.join(destroy_cmd, " "))

        # Execute the workspace destroy command: wait for return, stdout to log.
        (destroy_return, destroy_out, destroy_error) = self.vm_execwait(destroy_cmd, env=vm.get_env())
        destroy_out = destroy_out + destroy_error


        # Check destroy return code. If successful, continue. Otherwise, set VM to
        # error state (wait, and the polling thread will attempt a destroy later)
        if (destroy_return != 0):

            if "Destroyed" == self._extract_state(destroy_out):
                log.debug("VM %s seems to have already been destroyed." % vm.id)
            else:
                log.warning("(vm_destroy) - VM %s was not correctly destroyed: %s %s" % (vm.id, destroy_out, destroy_error))
                vm.status = "Error"
                if vm.errorcount < config.polling_error_threshold:
                    return destroy_return


        # Return checked out resources And remove VM from the Cluster's 'vms' list
        with self.vms_lock:
            try:
                self.vms.remove(vm)
            except ValueError:
                log.error("Attempted to remove vm from list that was already removed.")
                return_resources = False
        if return_resources:
            self.resource_return(vm)

        # Delete EPR
        os.remove(vm_epr)

        log.info("Destroyed vm %s on %s" % (vm.id, vm.clusteraddr))

        return destroy_return