def vm_poll(self, vm): """ vm_poll -- Polls a running VM, updates its status, and returns its state Parameters: vm -- vm to poll Note: If VM does not appear to be running any longer, it will be destroyed. """ # Create an epr for our poll command vm_epr = nimbus_xml.ws_epr_factory(vm.id, vm.clusteraddr) # Create workspace poll command ws_cmd = self.vmpoll_factory(vm_epr) log.verbose("(vm_poll) - Running Nimbus poll command:\n%s" % string.join(ws_cmd, " ")) # Execute the workspace poll (wait, retrieve return code, stdout, and stderr) (poll_return, poll_out, poll_err) = self.vm_execwait(ws_cmd, env=vm.get_env()) poll_out = poll_out + poll_err with self.vms_lock: # Print output, and parse the VM status from it vm.hostname = self._extract_hostname(poll_out) new_status = self._extract_state(poll_out) if new_status == "Destroyed": log.info("Discarding VM %s because Nimbus has destroyed it" % vm.id) self.vm_destroy(vm, shutdown_first=False) vm.status = new_status elif vm.status != new_status: vm.last_state_change = int(time.time()) vm.status = new_status # If there was some other error we're not aware of (temporary network problem, etc...) elif (poll_return != 0): log.warning("(vm_poll) - Failed polling VM %s (ID: %s): %s %s" % (vm.name, vm.id, poll_out, poll_err)) log.debug("(vm_poll) - Setting VM status to \'Error\'") vm.status = "Error" # Tidy up and return os.remove(vm_epr) vm.lastpoll = int(time.time()) return vm.status
def vm_poll(self, vm): """ vm_poll -- Polls a running VM, updates its status, and returns its state Parameters: vm -- vm to poll Note: If VM does not appear to be running any longer, it will be destroyed. """ # Retire not actually bad, just don't want that state overwritten bad_status = ("Destroyed", "NoProxy", "ExpiredProxy") special_status = ("Retiring", "TempBanned", "HeldBadReqs", "HTTPFail, BrokenPipe") # Create an epr for our poll command vm_epr = nimbus_xml.ws_epr_factory(vm.id, vm.clusteraddr, vm.clusterport) # Create workspace poll command ws_cmd = self.vmpoll_factory(vm_epr) log.verbose("Polling Nimbus with:\n%s" % string.join(ws_cmd, " ")) # Execute the workspace poll (wait, retrieve return code, stdout, and stderr) (poll_return, poll_out, poll_err) = self.vm_execwait(ws_cmd, env=vm.get_env()) poll_out = poll_out + poll_err with self.vms_lock: # Print output, and parse the VM status from it #vm.hostname = self._extract_hostname(poll_out) new_status = self._extract_state(poll_out) if new_status == "Destroyed": self.vm_destroy(vm, shutdown_first=False, reason="Nimbus has already destroyed VM") vm.status = new_status elif new_status == "NoProxy": vm.override_status = new_status log.error( "Problem polling VM %s. You don't have a valid proxy." % vm.id) elif new_status == "ExpiredProxy": vm.override_status = new_status log.error( "Problem polling VM %s. Your proxy expired. Proxy File: %s" % (vm.id, vm.proxy_file)) elif new_status == "ConnectionRefused": vm.override_status = new_status log.error("Unable to connect to nimbus service on %s" % vm.clusteraddr) elif new_status == "BrokenPipe": vm.override_status = new_status log.error( "Broken Pipe error on %s. Check max_clients in libvirtd.conf on nodes." % vm.clusteraddr) elif vm.status != new_status: vm.last_state_change = int(time.time()) log.debug("VM: %s on %s. Changed from %s to %s." % (vm.id, self.name, vm.status, new_status)) vm.status = new_status elif vm.override_status != None and new_status not in bad_status and vm.override_status not in special_status: vm.override_status = None vm.errorconnect = None # If there was some other error we're not aware of (temporary network problem, etc...) elif (poll_return != 0): if poll_out == "" or poll_out == None: poll_out = "No Output returned." if poll_err == "" or poll_err == None: poll_err = "No Error output returned." log.warning("There was a problem polling VM %s: %s %s %s" % (vm.id, poll_out, poll_err, poll_return)) # Tidy up and return os.remove(vm_epr) vm.lastpoll = int(time.time()) return vm.status
def vm_destroy(self, vm, return_resources=True, reason="", shutdown_first=True): """ Shutdown, destroy and return resources of a VM to it's cluster Parameters: vm -- vm to shutdown and destroy return_resources -- if set to false, do not return resources from VM to cluster shutdown_first -- if set to false, will first call a shutdown before destroying """ # Create an epr for workspace.sh vm_epr = nimbus_xml.ws_epr_factory(vm.id, vm.clusteraddr, vm.clusterport) if vm.clusteraddr != self.network_address: log.error( "Attempting to destroy a VM on wrong cluster - vm belongs to %s, but this is %s. Abort" % (vm.clusteraddr, self.networ_address)) return -1 if shutdown_first: # Create the workspace command with shutdown option shutdown_cmd = self.vmshutdown_factory(vm_epr) log.verbose("Shutting down VM with command: " + string.join(shutdown_cmd, " ")) # Execute the workspace shutdown command. shutdown_return = self.vm_exec_silent(shutdown_cmd, env=vm.get_env()) if (shutdown_return != 0): log.debug( "(vm_destroy) - VM shutdown request failed, moving directly to destroy." ) else: log.verbose( "(vm_destroy) - workspace shutdown command executed successfully." ) # Sleep for a few seconds to allow for proper shutdown log.verbose("Waiting %ss for VM to shut down..." % self.VM_SHUTDOWN) time.sleep(self.VM_SHUTDOWN) # Create the workspace command with destroy option as a list (priv.) destroy_cmd = self.vmdestroy_factory(vm_epr) log.verbose("Destroying VM with command: " + string.join(destroy_cmd, " ")) # Execute the workspace destroy command: wait for return, stdout to log. (destroy_return, destroy_out, destroy_error) = self.vm_execwait(destroy_cmd, env=vm.get_env()) destroy_out = destroy_out + destroy_error # Check destroy return code. If successful, continue. Otherwise, set VM to # error state (wait, and the polling thread will attempt a destroy later) if (destroy_return != 0): if "Destroyed" == self._extract_state(destroy_error): log.debug("VM %s seems to have already been destroyed." % vm.id) else: if destroy_out == "" or destroy_out == None: destroy_out = "No Output returned." if destroy_error == "" or destroy_error == None: destroy_error = "No Error output returned." log.warning( "VM %s was not correctly destroyed: %s %s %s" % (vm.id, destroy_out, destroy_error, destroy_return)) vm.status = "Error" os.remove(vm_epr) return destroy_return # Delete VM proxy if (vm.get_proxy_file()): log.verbose("Cleaning up proxy for VM %s (%s)" % (vm.id, vm.get_proxy_file())) try: os.remove(vm.get_proxy_file()) except: log.exception("Problem removing VM proxy file") # Return checked out resources And remove VM from the Cluster's 'vms' list with self.vms_lock: try: self.vms.remove(vm) except ValueError: log.error( "Attempted to remove vm from list that was already removed." ) return_resources = False if return_resources: self.resource_return(vm) # Delete EPR os.remove(vm_epr) log.info("Destroyed VM: %s Name: %s Reason: %s" % (vm.id, vm.hostname, reason)) return destroy_return
def vm_poll(self, vm): """ vm_poll -- Polls a running VM, updates its status, and returns its state Parameters: vm -- vm to poll Note: If VM does not appear to be running any longer, it will be destroyed. """ # Retire not actually bad, just don't want that state overwritten bad_status = ("Destroyed", "NoProxy", "ExpiredProxy") special_status = ("Retiring", "TempBanned", "HeldBadReqs", "HTTPFail, BrokenPipe") # Create an epr for our poll command vm_epr = nimbus_xml.ws_epr_factory(vm.id, vm.clusteraddr, vm.clusterport) # Create workspace poll command ws_cmd = self.vmpoll_factory(vm_epr) log.verbose("Polling Nimbus with:\n%s" % string.join(ws_cmd, " ")) # Execute the workspace poll (wait, retrieve return code, stdout, and stderr) (poll_return, poll_out, poll_err) = self.vm_execwait(ws_cmd, env=vm.get_env()) poll_out = poll_out + poll_err with self.vms_lock: # Print output, and parse the VM status from it #vm.hostname = self._extract_hostname(poll_out) new_status = self._extract_state(poll_out) if new_status == "Destroyed": self.vm_destroy(vm, shutdown_first=False, reason="Nimbus has already destroyed VM") vm.status = new_status elif new_status == "NoProxy": vm.override_status = new_status log.error("Problem polling VM %s. You don't have a valid proxy." % vm.id) elif new_status == "ExpiredProxy": vm.override_status = new_status log.error("Problem polling VM %s. Your proxy expired. Proxy File: %s" % (vm.id, vm.proxy_file)) elif new_status == "ConnectionRefused": vm.override_status = new_status log.error("Unable to connect to nimbus service on %s" % vm.clusteraddr) elif new_status == "BrokenPipe": vm.override_status = new_status log.error("Broken Pipe error on %s. Check max_clients in libvirtd.conf on nodes." % vm.clusteraddr) elif vm.status != new_status: vm.last_state_change = int(time.time()) log.debug("VM: %s on %s. Changed from %s to %s." % (vm.id, self.name, vm.status, new_status)) vm.status = new_status elif vm.override_status != None and new_status not in bad_status and vm.override_status not in special_status: vm.override_status = None vm.errorconnect = None # If there was some other error we're not aware of (temporary network problem, etc...) elif (poll_return != 0): if poll_out == "" or poll_out == None: poll_out = "No Output returned." if poll_err == "" or poll_err == None: poll_err = "No Error output returned." log.warning("There was a problem polling VM %s: %s %s %s" % (vm.id, poll_out, poll_err, poll_return)) # Tidy up and return os.remove(vm_epr) vm.lastpoll = int(time.time()) return vm.status
def vm_destroy(self, vm, return_resources=True, reason="", shutdown_first=True): """ Shutdown, destroy and return resources of a VM to it's cluster Parameters: vm -- vm to shutdown and destroy return_resources -- if set to false, do not return resources from VM to cluster shutdown_first -- if set to false, will first call a shutdown before destroying """ # Create an epr for workspace.sh vm_epr = nimbus_xml.ws_epr_factory(vm.id, vm.clusteraddr, vm.clusterport) if vm.clusteraddr != self.network_address: log.error("Attempting to destroy a VM on wrong cluster - vm belongs to %s, but this is %s. Abort" % (vm.clusteraddr, self.networ_address)) return -1 if shutdown_first: # Create the workspace command with shutdown option shutdown_cmd = self.vmshutdown_factory(vm_epr) log.verbose("Shutting down VM with command: " + string.join(shutdown_cmd, " ")) # Execute the workspace shutdown command. shutdown_return = self.vm_exec_silent(shutdown_cmd, env=vm.get_env()) if (shutdown_return != 0): log.debug("(vm_destroy) - VM shutdown request failed, moving directly to destroy.") else: log.verbose("(vm_destroy) - workspace shutdown command executed successfully.") # Sleep for a few seconds to allow for proper shutdown log.verbose("Waiting %ss for VM to shut down..." % self.VM_SHUTDOWN) time.sleep(self.VM_SHUTDOWN) # Create the workspace command with destroy option as a list (priv.) destroy_cmd = self.vmdestroy_factory(vm_epr) log.verbose("Destroying VM with command: " + string.join(destroy_cmd, " ")) # Execute the workspace destroy command: wait for return, stdout to log. (destroy_return, destroy_out, destroy_error) = self.vm_execwait(destroy_cmd, env=vm.get_env()) destroy_out = destroy_out + destroy_error # Check destroy return code. If successful, continue. Otherwise, set VM to # error state (wait, and the polling thread will attempt a destroy later) if (destroy_return != 0): if "Destroyed" == self._extract_state(destroy_error): log.debug("VM %s seems to have already been destroyed." % vm.id) else: if destroy_out == "" or destroy_out == None: destroy_out = "No Output returned." if destroy_error == "" or destroy_error == None: destroy_error = "No Error output returned." log.warning("VM %s was not correctly destroyed: %s %s %s" % (vm.id, destroy_out, destroy_error, destroy_return)) vm.status = "Error" os.remove(vm_epr) return destroy_return # Delete VM proxy if (vm.get_proxy_file()) : log.verbose("Cleaning up proxy for VM %s (%s)" % (vm.id, vm.get_proxy_file())) try: os.remove(vm.get_proxy_file()) except: log.exception("Problem removing VM proxy file") # Return checked out resources And remove VM from the Cluster's 'vms' list with self.vms_lock: try: self.vms.remove(vm) except ValueError: log.error("Attempted to remove vm from list that was already removed.") return_resources = False if return_resources: self.resource_return(vm) # Delete EPR os.remove(vm_epr) log.info("Destroyed VM: %s Name: %s Reason: %s" % (vm.id, vm.hostname, reason)) return destroy_return
def vm_destroy(self, vm, return_resources=True, shutdown_first=True): """ Shutdown, destroy and return resources of a VM to it's cluster Parameters: vm -- vm to shutdown and destroy return_resources -- if set to false, do not return resources from VM to cluster shutdown_first -- if set to false, will first call a shutdown before destroying """ # Create an epr for workspace.sh vm_epr = nimbus_xml.ws_epr_factory(vm.id, vm.clusteraddr) if shutdown_first: # Create the workspace command with shutdown option shutdown_cmd = self.vmshutdown_factory(vm_epr) log.verbose("Shutting down VM with command: " + string.join(shutdown_cmd, " ")) # Execute the workspace shutdown command. shutdown_return = self.vm_exec_silent(shutdown_cmd, env=vm.get_env()) if (shutdown_return != 0): log.debug("(vm_destroy) - VM shutdown request failed, moving directly to destroy.") # Sleep for a few seconds to allow for proper shutdown log.debug("Waiting %ss for VM to shut down..." % self.VM_SHUTDOWN) time.sleep(self.VM_SHUTDOWN) else: log.debug("(vm_destroy) - workspace shutdown command executed successfully.") # Create the workspace command with destroy option as a list (priv.) destroy_cmd = self.vmdestroy_factory(vm_epr) log.verbose("Destroying VM with command: " + string.join(destroy_cmd, " ")) # Execute the workspace destroy command: wait for return, stdout to log. (destroy_return, destroy_out, destroy_error) = self.vm_execwait(destroy_cmd, env=vm.get_env()) destroy_out = destroy_out + destroy_error # Check destroy return code. If successful, continue. Otherwise, set VM to # error state (wait, and the polling thread will attempt a destroy later) if (destroy_return != 0): if "Destroyed" == self._extract_state(destroy_out): log.debug("VM %s seems to have already been destroyed." % vm.id) else: log.warning("(vm_destroy) - VM %s was not correctly destroyed: %s %s" % (vm.id, destroy_out, destroy_error)) vm.status = "Error" if vm.errorcount < config.polling_error_threshold: return destroy_return # Return checked out resources And remove VM from the Cluster's 'vms' list with self.vms_lock: try: self.vms.remove(vm) except ValueError: log.error("Attempted to remove vm from list that was already removed.") return_resources = False if return_resources: self.resource_return(vm) # Delete EPR os.remove(vm_epr) log.info("Destroyed vm %s on %s" % (vm.id, vm.clusteraddr)) return destroy_return