def registerNodeManager(self, host, instances): """Called by the NM every so often as a keep-alive/state polling -- state changes here are NOT AUTHORITATIVE""" # Handle a new registration if (host.id == None): hostList = [ h for h in self.data.getHosts().itervalues() if h.name == host.name ] if (len(hostList) != 1): raise TashiException( d={ 'errno': Errors.NoSuchHost, 'msg': 'A host with name %s is not identifiable' % (host.name) }) host.id = hostList[0].id # Check if remote host information matches mine oldHost = self.data.acquireHost(host.id) if (oldHost.name != host.name): self.data.releaseHost(oldHost) raise TashiException( d={ 'errno': Errors.NoSuchHostId, 'msg': 'Host id and hostname mismatch' }) if oldHost.up == False: self.__upHost(oldHost) self.hostLastContactTime[host.id] = self.__now() oldHost.version = host.version oldHost.memory = host.memory oldHost.cores = host.cores # compare whether CM / NM versions are compatible if (host.version != version and not self.allowMismatchedVersions): oldHost.state = HostState.VersionMismatch if (host.version == version and oldHost.state == HostState.VersionMismatch): oldHost.state = HostState.Normal # let the host communicate what it is running # and note that the information is not stale for instance in instances: if instance.state == InstanceState.Exited: self.log.warning("%s reporting exited instance %s, ignoring." % (host.name, instance.id)) continue self.instanceLastContactTime.setdefault(instance.id, 0) self.data.releaseHost(oldHost) return host.id
def __normalize(self, instance): instance.id = None instance.vmId = None instance.hostId = None instance.decayed = False instance.name = scrubString( instance.name, allowed= "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-.") instance.state = InstanceState.Pending # XXXstroucki At some point, check userId if (not self.allowDuplicateNames): for i in self.data.getInstances().itervalues(): if (i.name == instance.name): raise TashiException( d={ 'errno': Errors.InvalidInstance, 'msg': "The name %s is already in use" % (instance.name) }) if (instance.cores < 1): raise TashiException( d={ 'errno': Errors.InvalidInstance, 'msg': "Number of cores must be >= 1" }) if (instance.cores > self.maxCores): raise TashiException( d={ 'errno': Errors.InvalidInstance, 'msg': "Number of cores must be <= %d" % (self.maxCores) }) if (instance.memory < 1): raise TashiException( d={ 'errno': Errors.InvalidInstance, 'msg': "Amount of memory must be >= 1" }) if (instance.memory > self.maxMemory): raise TashiException( d={ 'errno': Errors.InvalidInstance, 'msg': "Amount of memory must be <= %d" % (self.maxMemory) }) # Make sure disk spec is valid # Make sure network spec is valid # Ignore internal hints for hint in instance.hints: if (hint.startswith("__")): del instance.hints[hint] return instance
def parseDisks(arg): validImageNames = [] for i in client.getImages(): validImageNames.append(i.imageName) try: strDisks = arg.split(",") disks = [] for strDisk in strDisks: strDisk = strDisk.strip() (l, __s, r) = stringPartition(strDisk, ":") if not l in validImageNames: raise TashiException({ 'msg': "Invalid disk image name: %s. See \"tashi-client getImages\" for a list of valid images." % l }) if (r == ""): r = "False" r = boolean(r) disk = DiskConfiguration(d={'uri': l, 'persistent': r}) disks.append(disk) return disks except TashiException as e: print e.msg sys.exit(-1) except: raise ValueError("Incorrect format for disks argument")
def suspendVm(self, instanceId): instance = self.data.acquireInstance(instanceId) try: self.__stateTransition(instance, InstanceState.Running, InstanceState.Suspending) except TashiException: self.data.releaseInstance(instance) raise self.data.releaseInstance(instance) self.__ACCOUNT("CM VM SUSPEND", instance=instance) hostname = self.data.getHost(instance.hostId).name destination = "suspend/%d_%s" % (instance.id, instance.name) try: self.proxy[hostname].suspendVm(instance.vmId, destination) except: self.log.exception('suspendVm failed for host %s vmId %d' % (hostname, instance.vmId)) raise TashiException( d={ 'errno': Errors.UnableToSuspend, 'msg': 'Failed to suspend %s' % (instance.name) }) return "%s is suspending." % (instance.name)
def acquireInstance(self, instanceId): busyCheck = True while busyCheck == True: self.instanceLock.acquire() busyCheck = self.instanceBusy.setdefault(instanceId, False) if busyCheck: self.instanceLock.release() try: cur = self.executeStatement( "SELECT * from instances WHERE id = %d" % (instanceId)) l = cur.fetchone() if (not l): raise TashiException( d={ 'errno': Errors.NoSuchInstanceId, 'msg': "No such instanceId - %d" % (instanceId) }) instance = self.makeListInstance(l) self.instanceLocks[instance.id] = self.instanceLocks.get( instance.id, threading.Lock()) instance._lock = self.instanceLocks[instance.id] instance._lock.acquire() self.instanceBusy[instance.id] = True finally: self.instanceLock.release() return instance
def getHost(self, _id): host = self.hosts.get(_id, None) if (not host): raise TashiException(d={ 'errno': Errors.NoSuchHostId, 'msg': "No such hostId - %s" % (_id) }) return host
def getInstance(self, _id): instance = self.instances.get(_id, None) if (not instance): raise TashiException( d={ 'errno': Errors.NoSuchInstanceId, 'msg': "No such instanceId - %d" % (_id) }) return instance
def getUser(): fetchUsers() if client.username != None: userStr = client.username else: userStr = os.getenv("USER", "unknown") for user in users: if (users[user].name == userStr): return users[user].id raise TashiException({'msg': "Unknown user %s" % (userStr)})
def checkHid(host): hosts = client.getHosts() hostId = None try: hostId = int(host) except: for h in hosts: if (h.name == host): hostId = h.id if (hostId is None): raise TashiException({'msg': "Unknown host %s" % (str(host))}) # XXXstroucki permissions for host related stuff? return hostId
def newFunc(*args, **kw): try: return oldFunc(*args, **kw) except TashiException: raise except: self = args[0] if (self.convertExceptions): raise TashiException( d={ 'errno': Errors.ConvertedException, 'msg': traceback.format_exc(10) }) raise
def checkUid(user): users = client.getUsers() userId = None try: userId = int(user) except: for u in users: if (u.name == user): userId = u.id if (userId is None): raise TashiException({'msg': "Unknown user %s" % (str(user))}) # XXXstroucki permissions for host related stuff? return userId
def acquireInstance(self, instanceId): self.acquireLock(self.instanceLock) try: instance = self.instances.get(instanceId, None) if (instance is None): raise TashiException( d={ 'errno': Errors.NoSuchInstanceId, 'msg': "No such instanceId - %d" % (instanceId) }) self.acquireLock(instance._lock) finally: self.releaseLock(self.instanceLock) return instance
def checkIid(instance): userId = getUser() instances = client.getInstances() instanceId = None try: instanceId = int(instance) except: for i in instances: if (i.name == instance): instanceId = i.id if (instanceId is None): raise TashiException({'msg': "Unknown instance %s" % (str(instance))}) for instance in instances: if (instance.id == instanceId): # XXXstroucki uid 0 to have superuser access # how about admin groups? if (instance.userId != userId and instance.userId != None and userId != 0): raise TashiException({ 'msg': "You don't have permissions on VM %s" % instance.name }) return instanceId
def __stateTransition(self, instance, old, cur): if (old and instance.state != old): raise TashiException( d={ 'errno': Errors.IncorrectVmState, 'msg': "VmState is not %s - it is %s" % (vmStates[old], vmStates[instance.state]) }) if (instance.state == cur): # don't do anything if we're already at current state return instance.state = cur
def resumeVm(self, instance, name): self.__ACCOUNT("NM VM RESUME", instance=instance) instance.state = InstanceState.Resuming instance.hostId = self.id try: instance.vmId = self.vmm.resumeVm(instance, name) self.instances[instance.vmId] = instance threading.Thread(target=self.__resumeVmHelper, args=(instance, name)).start() except: self.log.exception('resumeVm failed') raise TashiException( d={ 'errno': Errors.UnableToResume, 'msg': "resumeVm failed on the node manager" }) return instance.vmId
def getHost(self, in_id): try: _id = int(in_id) except TypeError: self.log.exception("Argument to getHost was not integer: %s" % in_id) raise cur = self.executeStatement("SELECT * FROM hosts WHERE id = %d" % _id) r = cur.fetchone() if (r == None): raise TashiException(d={ 'errno': Errors.NoSuchHostId, 'msg': "No such hostId - %s" % (_id) }) host = self.makeListHost(r) return host
def releaseInstance(self, instance): if type(instance) is not Instance: self.log.exception( "Argument is not of type Instance, but of type %s" % (type(instance))) raise TypeError try: if (instance.id not in self.instances ): # MPR: should never be true, but good to check raise TashiException( d={ 'errno': Errors.NoSuchInstanceId, 'msg': "No such instanceId - %d" % (instance.id) }) finally: self.releaseLock(instance._lock)
def releaseHost(self, host): if type(host) is not Host: self.log.exception("Argument is not of type Host, but of type %s" % (type(host))) raise TypeError try: if (host.id not in self.hosts ): # MPR: should never be true, but good to check raise TashiException( d={ 'errno': Errors.NoSuchHostId, 'msg': "No such hostId - %s" % (host.id) }) finally: self.save() self.releaseLock(host._lock) self.hostLock.release()
def acquireHost(self, hostId): if type(hostId) is not int: self.log.exception("Argument is not of type int, but of type %s" % (type(hostId))) raise TypeError self.hostLock.acquire() host = self.hosts.get(hostId, None) if (host is None): raise TashiException( d={ 'errno': Errors.NoSuchHostId, 'msg': "No such hostId - %s" % (hostId) }) # hostLocks dict added when registerHost was implemented, otherwise newly added hosts don't have _lock self.hostLocks[hostId] = self.hostLocks.get(hostId, threading.Lock()) host._lock = self.hostLocks[host.id] self.acquireLock(host._lock) return host
def getInstance(self, in_id): try: _id = int(in_id) except TypeError: self.log.exception("Argument to getInstance was not integer: %s" % in_id) raise cur = self.executeStatement("SELECT * FROM instances WHERE id = %d" % (_id)) # XXXstroucki should only return one row. # what about migration? should it be enforced? r = cur.fetchone() if (not r): raise TashiException( d={ 'errno': Errors.NoSuchInstanceId, 'msg': "No such instanceId - %d" % (_id) }) instance = self.makeListInstance(r) return instance
def __getInstance(self, vmId): instance = self.instances.get(vmId, None) if instance is not None: # XXXstroucki: force to my own hostId here. Is this the # right place? instance.hostId = self.id return instance # refresh self.instances if not found self.__loadVmInfo() instance = self.instances.get(vmId, None) if instance is not None: # XXXstroucki: force to my own hostId here. Is this the # right place? instance.hostId = self.id return instance raise TashiException( d={ 'errno': Errors.NoSuchVmId, 'msg': "There is no vmId %d on this host" % (vmId) })
def __shutdownOrDestroyMany(method, basename): instances = client.getInstances() count = 0 for i in instances: if (i.name.startswith(basename + "-") and i.name[len(basename) + 1].isdigit()): # checking permissions here checkIid(i.name) if method == "shutdown": client.shutdownVm(i.id) elif method == "destroy": client.destroyVm(i.id) else: raise ValueError("Unknown method") count = count + 1 if (count == 0): raise TashiException({'msg': "%s is an unused basename" % basename}) return None
def __do(self, name, *args, **kwargs): if self.connection is None: self.__connect() threadname = "%s:%s" % (self.host, self.port) # XXXstroucki: Use 10 second timeout, ok? # XXXstroucki: does this fn touch the network? t = TimeoutThread(getattr, (self.connection, name, None)) threading.Thread(name=threadname, target=t.run).start() try: remotefn = t.wait(timeout=10) except TimeoutException: self.connection = None raise try: if callable(remotefn): # XXXstroucki: Use 10 second timeout, ok? t = TimeoutThread(remotefn, args, kwargs) threading.Thread(name=threadname, target=t.run).start() returns = t.wait(timeout=10.0) else: raise TashiException({'msg': '%s not callable' % name}) except: self.connection = None raise # if we get a remote exception, raise it locally if (type(returns) is Exception) or \ (type(returns) is TashiException): raise returns return returns
def activateVm(self, instanceId, host): # XXXstroucki: check my idea of the host's capacity before # trying. dataHost = self.data.acquireHost(host.id) if (dataHost.name != host.name): self.data.releaseHost(dataHost) raise TashiException(d={ 'errno': Errors.HostNameMismatch, 'msg': "Mismatched target host" }) if (not dataHost.up): self.data.releaseHost(dataHost) raise TashiException(d={ 'errno': Errors.HostNotUp, 'msg': "Target host is not up" }) if (dataHost.state != HostState.Normal): self.data.releaseHost(dataHost) raise TashiException( d={ 'errno': Errors.HostStateError, 'msg': "Target host state is not normal" }) self.data.releaseHost(dataHost) instance = self.data.acquireInstance(instanceId) self.__ACCOUNT("CM VM ACTIVATE", instance=instance) if ('__resume_source' in instance.hints): self.__stateTransition(instance, None, InstanceState.Resuming) else: # XXXstroucki should held VMs be continually tried? Or be explicitly set back to pending? #self.__stateTransition(instance, InstanceState.Pending, InstanceState.Activating) self.__stateTransition(instance, None, InstanceState.Activating) instance.hostId = host.id self.data.releaseInstance(instance) try: if ('__resume_source' in instance.hints): vmId = self.proxy[host.name].resumeVm( instance, instance.hints['__resume_source']) else: vmId = self.proxy[host.name].instantiateVm(instance) except Exception: instance = self.data.acquireInstance(instanceId) if ( instance.state is InstanceState.Destroying ): # Special case for if destroyVm is called during initialization and initialization fails self.data.removeInstance(instance) else: # XXXstroucki what can we do about pending hosts in the scheduler? # put them at the end of the queue and keep trying? self.__stateTransition(instance, None, InstanceState.Held) instance.hostId = None self.data.releaseInstance(instance) return "failure" instance = self.data.acquireInstance(instanceId) instance.vmId = vmId if (instance.state is InstanceState.Destroying ): # Special case for if destroyVm is called during initialization try: self.proxy[host.name].destroyVm(vmId) self.data.removeInstance(instance) except Exception: self.log.exception('destroyVm failed for host %s vmId %d' % (host.name, instance.vmId)) self.data.releaseInstance(instance) return "failure" else: if ('__resume_source' not in instance.hints): # XXXstroucki should we just wait for NM to update? #self.__stateTransition(instance, InstanceState.Activating, InstanceState.Running) pass self.data.releaseInstance(instance) return "success"