def pool(self, key, image): """ pool - Get information about pool(s) of VMs """ self.log.debug("Received pool request(%s, %s)" % (key, image)) if self.validateKey(key): pools = self.tango.preallocator.getAllPools() self.log.info("All pools found") if image == "": result = self.status.obtained_all_pools else: if image in pools: pools = {image: pools[image]} self.log.info("Pool image found: %s" % image) result = self.status.obtained_pool else: self.log.info("Invalid image name: %s" % image) result = self.status.pool_not_found result["pools"] = pools result["low_water_mark"] = TangoIntValue("low_water_mark", -1).get() result["max_pool_size"] = TangoIntValue("max_pool_size", -1).get() return result else: self.log.info("Key not recognized: %s" % key) return self.status.wrong_key
def test_sharedInt(self): if Config.USE_REDIS: num1 = TangoIntValue("nextID", 1000) num2 = TangoIntValue("nextID", 3000) self.assertEqual(num1.get(), 1000) self.assertEqual(num1.get(), num2.get()) else: return
def __init__(self, vmms): self.machines = TangoDictionary("machines") self.lock = threading.Lock() self.nextID = TangoIntValue("nextID", 1000) self.vmms = vmms self.log = logging.getLogger("Preallocator-" + str(os.getpid())) self.low_water_mark = TangoIntValue("low_water_mark", -1) if (hasattr(Config, 'POOL_SIZE_LOW_WATER_MARK') and Config.POOL_SIZE_LOW_WATER_MARK >= 0): self.low_water_mark.set(Config.POOL_SIZE_LOW_WATER_MARK)
def __init__(self, preallocator): # Create two dictionaries that, for each job currently in the dictionary, also maintains a mapping # from output file to the job. This allows easy, constant-time lookup for job based on output file. self.liveJobs = WrappingDictionary("liveJobsWrapped", TangoDictionary("liveJobs"), lambda j: j.outputFile) self.deadJobs = WrappingDictionary("deadJobsWrapped", TangoDictionary("deadJobs"), lambda j: j.outputFile) self.queueLock = threading.Lock() self.preallocator = preallocator self.log = logging.getLogger("JobQueue") self.nextID = 1 self.max_pool_size = TangoIntValue("max_pool_size", -1) if (hasattr(Config, 'MAX_POOL_SIZE') and Config.MAX_POOL_SIZE >= 0): self.max_pool_size.set(Config.MAX_POOL_SIZE)
def __init__(self, vmms): self.machines = TangoDictionary("machines") self.lock = threading.Lock() self.nextID = TangoIntValue("nextID", 1000) self.vmms = vmms self.log = logging.getLogger("Preallocator")
class Preallocator(object): def __init__(self, vmms): self.machines = TangoDictionary("machines") self.lock = threading.Lock() self.nextID = TangoIntValue("nextID", 1000) self.vmms = vmms self.log = logging.getLogger("Preallocator") def poolSize(self, vmName): """ poolSize - returns the size of the vmName pool, for external callers """ if vmName not in self.machines: return 0 else: return len(self.machines.get(vmName)[0]) def update(self, vm, num): """ update - Updates the number of machines of a certain type to be preallocated. This function is called via the TangoServer HTTP interface. It will validate the request,update the machine list, and then spawn child threads to do the creation and destruction of machines as necessary. """ self.lock.acquire() if vm.name not in self.machines: self.machines.set(vm.name, [[], TangoQueue(vm.name)]) self.log.debug("Creating empty pool of %s instances" % (vm.name)) self.lock.release() delta = num - len(self.machines.get(vm.name)[0]) if delta > 0: # We need more self.machines, spin them up. self.log.debug("update: Creating %d new %s instances" % (delta, vm.name)) threading.Thread(target=self.__create(vm, delta)).start() elif delta < 0: # We have too many self.machines, remove them from the pool self.log.debug("update: Destroying %d preallocated %s instances" % (-delta, vm.name)) for i in range(-1 * delta): threading.Thread(target=self.__destroy(vm)).start() # If delta == 0 then we are the perfect number! def allocVM(self, vmName): """ allocVM - Allocate a VM from the free list """ vm = None if vmName in self.machines: self.lock.acquire() if not self.machines.get(vmName)[1].empty(): vm = self.machines.get(vmName)[1].get_nowait() self.lock.release() # If we're not reusing instances, then crank up a replacement if vm and not Config.REUSE_VMS: threading.Thread(target=self.__create(vm, 1)).start() return vm def freeVM(self, vm): """ freeVM - Returns a VM instance to the free list """ # Sanity check: Return a VM to the free list only if it is # still a member of the pool. not_found = False self.lock.acquire() if vm and vm.id in self.machines.get(vm.name)[0]: machine = self.machines.get(vm.name) machine[1].put(vm) self.machines.set(vm.name, machine) else: not_found = True self.lock.release() # The VM is no longer in the pool. if not_found: vmms = self.vmms[vm.vmms] vmms.safeDestroyVM(vm) def addVM(self, vm): """ addVM - add a particular VM instance to the pool """ self.lock.acquire() machine = self.machines.get(vm.name) machine[0].append(vm.id) self.machines.set(vm.name, machine) self.lock.release() def removeVM(self, vm): """ removeVM - remove a particular VM instance from the pool """ self.lock.acquire() machine = self.machines.get(vm.name) machine[0].remove(vm.id) self.machines.set(vm.name, machine) self.lock.release() def _getNextID(self): """ _getNextID - returns next ID to be used for a preallocated VM. Preallocated VM's have 4-digit ID numbers between 1000 and 9999. """ self.lock.acquire() id = self.nextID.get() self.nextID.increment() if self.nextID.get() > 9999: self.nextID.set(1000) self.lock.release() return id def __create(self, vm, cnt): """ __create - Creates count VMs and adds them to the pool This function should always be called in a thread since it might take a long time to complete. """ vmms = self.vmms[vm.vmms] self.log.debug("__create: Using VMMS %s " % (Config.VMMS_NAME)) for i in range(cnt): newVM = copy.deepcopy(vm) newVM.id = self._getNextID() self.log.debug("__create|calling initializeVM") vmms.initializeVM(newVM) self.log.debug("__create|done with initializeVM") time.sleep(Config.CREATEVM_SECS) self.addVM(newVM) self.freeVM(newVM) self.log.debug("__create: Added vm %s to pool %s " % (newVM.id, newVM.name)) def __destroy(self, vm): """ __destroy - Removes a VM from the pool If the user asks for fewer preallocated VMs, then we will remove some excess ones. This function should be called in a thread context. Notice that we can only remove a free vm, so it's possible we might not be able to satisfy the request if the free list is empty. """ self.lock.acquire() dieVM = self.machines.get(vm.name)[1].get_nowait() self.lock.release() if dieVM: self.removeVM(dieVM) vmms = self.vmms[vm.vmms] vmms.safeDestroyVM(dieVM) def createVM(self, vm): """ createVM - Called in non-thread context to create a single VM and add it to the pool """ vmms = self.vmms[vm.vmms] newVM = copy.deepcopy(vm) newVM.id = self._getNextID() self.log.info("createVM|calling initializeVM") vmms.initializeVM(newVM) self.log.info("createVM|done with initializeVM") self.addVM(newVM) self.freeVM(newVM) self.log.debug("createVM: Added vm %s to pool %s" % (newVM.id, newVM.name)) def destroyVM(self, vmName, id): """ destroyVM - Called by the delVM API function to remove and destroy a particular VM instance from a pool. We only allow this function when the system is queiscent (pool size == free size) """ if vmName not in self.machines: return -1 dieVM = None self.lock.acquire() size = self.machines.get(vmName)[1].qsize() if (size == len(self.machines.get(vmName)[0])): for i in range(size): vm = self.machines.get(vmName)[1].get_nowait() if vm.id != id: self.machines.get(vmName)[1].put(vm) else: dieVM = vm self.lock.release() if dieVM: self.removeVM(dieVM) vmms = self.vmms[vm.vmms] vmms.safeDestroyVM(dieVM) return 0 else: return -1 def getAllPools(self): result = {} for vmName in self.machines: result[vmName] = self.getPool(vmName) return result def getPool(self, vmName): """ getPool - returns the members of a pool and its free list """ result = {} if vmName not in self.machines: return result result["total"] = [] result["free"] = [] free_list = [] self.lock.acquire() size = self.machines.get(vmName)[1].qsize() for i in range(size): vm = self.machines.get(vmName)[1].get_nowait() free_list.append(vm.id) machine = self.machines.get(vmName) machine[1].put(vm) self.machines.set(vmName, machine) self.lock.release() result["total"] = self.machines.get(vmName)[0] result["free"] = free_list return result
class Preallocator: def __init__(self, vmms): self.machines = TangoDictionary("machines") self.lock = threading.Lock() self.nextID = TangoIntValue("nextID", 1000) self.vmms = vmms self.log = logging.getLogger("Preallocator") def poolSize(self, vmName): """ poolSize - returns the size of the vmName pool, for external callers """ if vmName not in self.machines.keys(): return 0 else: return len(self.machines.get(vmName)[0]) def update(self, vm, num): """ update - Updates the number of machines of a certain type to be preallocated. This function is called via the TangoServer HTTP interface. It will validate the request,update the machine list, and then spawn child threads to do the creation and destruction of machines as necessary. """ self.lock.acquire() if vm.name not in self.machines.keys(): self.machines.set(vm.name, [[], TangoQueue(vm.name)]) self.log.debug("Creating empty pool of %s instances" % (vm.name)) self.lock.release() delta = num - len(self.machines.get(vm.name)[0]) if delta > 0: # We need more self.machines, spin them up. self.log.debug("update: Creating %d new %s instances" % (delta, vm.name)) threading.Thread(target=self.__create(vm, delta)).start() elif delta < 0: # We have too many self.machines, remove them from the pool self.log.debug("update: Destroying %d preallocated %s instances" % (-delta, vm.name)) for i in range(-1 * delta): threading.Thread(target=self.__destroy(vm)).start() # If delta == 0 then we are the perfect number! def allocVM(self, vmName): """ allocVM - Allocate a VM from the free list """ vm = None if vmName in self.machines.keys(): self.lock.acquire() if not self.machines.get(vmName)[1].empty(): vm = self.machines.get(vmName)[1].get_nowait() self.lock.release() # If we're not reusing instances, then crank up a replacement if vm and not Config.REUSE_VMS: threading.Thread(target=self.__create(vm, 1)).start() return vm def freeVM(self, vm): """ freeVM - Returns a VM instance to the free list """ # Sanity check: Return a VM to the free list only if it is # still a member of the pool. not_found = False self.lock.acquire() if vm and vm.id in self.machines.get(vm.name)[0]: machine = self.machines.get(vm.name) machine[1].put(vm) self.machines.set(vm.name, machine) else: not_found = True self.lock.release() # The VM is no longer in the pool. if not_found: vmms = self.vmms[vm.vmms] vmms.safeDestroyVM(vm) def addVM(self, vm): """ addVM - add a particular VM instance to the pool """ self.lock.acquire() machine = self.machines.get(vm.name) machine[0].append(vm.id) self.machines.set(vm.name, machine) self.lock.release() def removeVM(self, vm): """ removeVM - remove a particular VM instance from the pool """ self.lock.acquire() machine = self.machines.get(vm.name) machine[0].remove(vm.id) self.machines.set(vm.name, machine) self.lock.release() def _getNextID(self): """ _getNextID - returns next ID to be used for a preallocated VM. Preallocated VM's have 4-digit ID numbers between 1000 and 9999. """ self.lock.acquire() id = self.nextID.get() self.nextID.increment() if self.nextID.get() > 9999: self.nextID.set(1000) self.lock.release() return id def __create(self, vm, cnt): """ __create - Creates count VMs and adds them to the pool This function should always be called in a thread since it might take a long time to complete. """ vmms = self.vmms[vm.vmms] self.log.debug("__create: Using VMMS %s " % (Config.VMMS_NAME)) for i in range(cnt): newVM = copy.deepcopy(vm) newVM.id = self._getNextID() self.log.debug("__create|calling initializeVM") vmms.initializeVM(newVM) self.log.debug("__create|done with initializeVM") time.sleep(Config.CREATEVM_SECS) self.addVM(newVM) self.freeVM(newVM) self.log.debug("__create: Added vm %s to pool %s " % (newVM.id, newVM.name)) def __destroy(self, vm): """ __destroy - Removes a VM from the pool If the user asks for fewer preallocated VMs, then we will remove some excess ones. This function should be called in a thread context. Notice that we can only remove a free vm, so it's possible we might not be able to satisfy the request if the free list is empty. """ self.lock.acquire() dieVM = self.machines.get(vm.name)[1].get_nowait() self.lock.release() if dieVM: self.removeVM(dieVM) vmms = self.vmms[vm.vmms] vmms.safeDestroyVM(dieVM) def createVM(self, vm): """ createVM - Called in non-thread context to create a single VM and add it to the pool """ vmms = self.vmms[vm.vmms] newVM = copy.deepcopy(vm) newVM.id = self._getNextID() self.log.info("createVM|calling initializeVM") vmms.initializeVM(newVM) self.log.info("createVM|done with initializeVM") self.addVM(newVM) self.freeVM(newVM) self.log.debug("createVM: Added vm %s to pool %s" % (newVM.id, newVM.name)) def destroyVM(self, vmName, id): """ destroyVM - Called by the delVM API function to remove and destroy a particular VM instance from a pool. We only allow this function when the system is queiscent (pool size == free size) """ if vmName not in self.machines.keys(): return -1 dieVM = None self.lock.acquire() size = self.machines.get(vmName)[1].qsize() if size == len(self.machines.get(vmName)[0]): for i in range(size): vm = self.machines.get(vmName)[1].get_nowait() if vm.id != id: self.machines.get(vmName)[1].put(vm) else: dieVM = vm self.lock.release() if dieVM: self.removeVM(dieVM) vmms = self.vmms[vm.vmms] vmms.safeDestroyVM(dieVM) return 0 else: return -1 def getAllPools(self): result = {} for vmName in self.machines.keys(): result[vmName] = self.getPool(vmName) return result def getPool(self, vmName): """ getPool - returns the members of a pool and its free list """ result = {} if vmName not in self.machines.keys(): return result result["total"] = [] result["free"] = [] free_list = [] self.lock.acquire() size = self.machines.get(vmName)[1].qsize() for i in range(size): vm = self.machines.get(vmName)[1].get_nowait() free_list.append(vm.id) machine = self.machines.get(vmName) machine[1].put(vm) self.machines.set(vmName, machine) self.lock.release() result["total"] = self.machines.get(vmName)[0] result["free"] = free_list return result
class Preallocator: def __init__(self, vmms): self.machines = TangoDictionary("machines") self.lock = threading.Lock() self.nextID = TangoIntValue("nextID", 1000) self.vmms = vmms self.log = logging.getLogger("Preallocator-" + str(os.getpid())) self.low_water_mark = TangoIntValue("low_water_mark", -1) if (hasattr(Config, 'POOL_SIZE_LOW_WATER_MARK') and Config.POOL_SIZE_LOW_WATER_MARK >= 0): self.low_water_mark.set(Config.POOL_SIZE_LOW_WATER_MARK) def poolSize(self, vmName): """ poolSize - returns the size of the vmName pool, for external callers """ if vmName not in self.machines.keys(): return 0 else: return len(self.machines.get(vmName)[0]) def freePoolSize(self, vmName): """ freePoolSize - returns the size of the vmName free pool, for external callers """ if vmName in self.machines.keys(): return self.machines.get(vmName)[1].qsize() else: return 0 def incrementPoolSize(self, vm, delta): """ Called by jobQueue to create the pool and allcoate given number of vms """ self.log.debug("incrementPoolSize| acquiring lock on preallocator") with self.lock: self.log.debug("incrementPoolSize| acquired lock on preallocator") if vm.name not in self.machines.keys(): self.machines.set(vm.name, [[], TangoQueue(vm.name)]) # see comments in jobManager.py for the same call self.machines.get(vm.name)[1].make_empty() self.log.debug("Creating empty pool of %s instances" % (vm.name)) self.log.debug("incrementPoolSize| released lock on preallocator") self.log.debug("incrementPoolSize: add %d new %s instances" % (delta, vm.name)) threading.Thread(target=self.__create(vm, delta)).start() def update(self, vm, num): """ update - Updates the number of machines of a certain type to be preallocated. This function is called via the TangoServer HTTP interface. It will validate the request,update the machine list, and then spawn child threads to do the creation and destruction of machines as necessary. """ self.log.debug("update| acquiring lock on preallocator") with self.lock: self.log.debug("update| acquired lock on preallocator") if vm.name not in self.machines.keys(): self.machines.set(vm.name, [[], TangoQueue(vm.name)]) # see comments in jobManager.py for the same call self.machines.get(vm.name)[1].make_empty() self.log.debug("Creating empty pool of %s instances" % (vm.name)) self.log.debug("update| released lock on preallocator") delta = num - len(self.machines.get(vm.name)[0]) if delta > 0: # We need more self.machines, spin them up. self.log.debug("update: Creating %d new %s instances" % (delta, vm.name)) threading.Thread(target=self.__create(vm, delta)).start() elif delta < 0: # We have too many self.machines, remove them from the pool self.log.debug("update: Destroying %d preallocated %s instances" % (-delta, vm.name)) for i in range(-1 * delta): threading.Thread(target=self.__destroy(vm)).start() # If delta == 0 then we are the perfect number! def allocVM(self, vmName): """ allocVM - Allocate a VM from the free list """ vm = None if vmName in self.machines.keys(): self.log.debug("allocVM| acquiring lock on preallocator") with self.lock: self.log.debug("allocVM| acquired lock on preallocator") if not self.machines.get(vmName)[1].empty(): self.log.debug("allocVM| getting (nowait)") vm = self.machines.get(vmName)[1].get_nowait() self.log.debug("allocVM| got (nowait)") self.log.debug("allocVM| released lock on " + vmName) # If we're not reusing instances, then crank up a replacement if vm and not Config.REUSE_VMS: threading.Thread(target=self.__create(vm, 1)).start() return vm def addToFreePool(self, vm): """ addToFreePool - Returns a VM instance to the free list """ self.log.debug("addToFreePool| acquiring lock on preallocator") with self.lock: self.log.debug("addToFreePool| acquired lock on preallocator") machine = self.machines.get(vm.name) self.log.info("addToFreePool: add %s to free pool" % vm.id) machine[1].put(vm) self.machines.set(vm.name, machine) self.log.debug("addToFreePool| released lock on preallocator") def freeVM(self, vm, jobQueue): """ freeVM - Returns a VM instance to the free list """ # Sanity check: Return a VM to the free list only if it is # still a member of the pool. not_found = False should_destroy = False # We must access jobQueue prior to acquiring the preallocator lock, # since otherwise we may have deadlock. (In other places in the codebase, # we acquire the lock on the jobQueue and THEN the lock on the # preallocator.) self.log.debug("freeVM| acquiring lock on jobQueue") with jobQueue.queueLock: self.log.debug("freeVM| acquired lock on jobQueue") numReadyJobs = jobQueue.numReadyJobsUnsafe() self.log.debug("freeVM| acquiring lock on preallocator") with self.lock: self.log.debug("freeVM| acquired lock on preallocator") if vm and vm.id in self.machines.get(vm.name)[0]: lwm = self.low_water_mark.get() if (lwm >= 0 and vm.name in self.machines.keys() and self.freePoolSize(vm.name) - numReadyJobs >= lwm): self.log.info( "freeVM: over low water mark (%d). will destroy %s" % (lwm, vm.id)) should_destroy = True else: machine = self.machines.get(vm.name) self.log.info("freeVM: return %s to free pool" % vm.id) machine[1].put(vm) self.machines.set(vm.name, machine) else: self.log.info( "freeVM: not found in pool %s. will destroy %s" % (vm.name, vm.id)) not_found = True self.log.debug("freeVM| released lock on preallocator") self.log.debug("freeVM| released lock on jobQueue") # The VM is no longer in the pool. if not_found or should_destroy: self.log.info("freeVM: will destroy %s" % vm.id) vmms = self.vmms[vm.vmms] self.removeVM(vm) vmms.safeDestroyVM(vm) def addVM(self, vm): """ addVM - add a particular VM instance to the pool """ self.log.debug("addVM| acquiring lock on preallocator") with self.lock: self.log.debug("addVM| acquired lock on preallocator") machine = self.machines.get(vm.name) machine[0].append(vm.id) self.log.info("addVM: add %s" % vm.id) self.machines.set(vm.name, machine) self.log.debug("addVM| released lock on preallocator") # Note: This function is called from removeVM() to handle the case when a vm # is in free pool. In theory this should never happen but we want to ensure # that. To solve the problem cleanly, preallocator should provide ONE primitive # to add/remove a vm from both total and free pools, instead of two disjoint ones. def removeFromFreePool(self, vm): dieVM = None self.log.debug("removeFromFreePool| acquiring lock on preallocator") with self.lock: self.log.debug("removeFromFreePool| acquired lock on preallocator") size = self.machines.get(vm.name)[1].qsize() self.log.info("removeFromFreePool: %s in pool %s" % (vm.id, vm.name)) for i in range(size): # go through free pool vm = self.machines.get(vm.name)[1].get_nowait() # put it back into free pool, if not our vm if vm.id != id: self.machines.get(vm.name)[1].put(vm) else: self.log.info("removeFromFreePool: found %s in pool %s" % (vm.id, vm.name)) # don't put this particular vm back to free pool, that is removal self.log.debug("removeFromFreePool| released lock on preallocator") def removeVM(self, vm): """ removeVM - remove a particular VM instance from the pool """ self.log.debug("removeVM| acquiring lock on preallocator") with self.lock: self.log.debug("removeVM| acquired lock on preallocator") machine = self.machines.get(vm.name) if vm.id not in machine[0]: self.log.error("removeVM: %s NOT found in pool" % (vm.id, vm.name)) return self.log.info("removeVM: %s" % vm.id) machine[0].remove(vm.id) self.machines.set(vm.name, machine) self.log.debug("removeVM| released lock on preallocator") self.removeFromFreePool(vm) # also remove from free pool, just in case def _getNextID(self): """ _getNextID - returns next ID to be used for a preallocated VM. Preallocated VM's have 4-digit ID numbers between 1000 and 9999. """ self.log.debug("_getNextID| acquiring lock on preallocator") with self.lock: self.log.debug("_getNextID| acquired lock on preallocator") id = self.nextID.get() self.nextID.increment() if self.nextID.get() > 9999: self.nextID.set(1000) self.log.debug("_getNextID| released lock on preallocator") return id def __create(self, vm, cnt): """ __create - Creates count VMs and adds them to the pool This function should always be called in a thread since it might take a long time to complete. """ vmms = self.vmms[vm.vmms] self.log.debug("__create: Using VMMS %s " % (Config.VMMS_NAME)) for i in range(cnt): newVM = copy.deepcopy(vm) newVM.id = self._getNextID() self.log.debug("__create|calling initializeVM") ret = vmms.initializeVM(newVM) if not ret: # ret is None when fails self.log.debug("__create|failed initializeVM") continue self.log.debug("__create|done with initializeVM") time.sleep(Config.CREATEVM_SECS) self.addVM(newVM) self.addToFreePool(newVM) self.log.debug("__create: Added vm %s to pool %s " % (newVM.id, newVM.name)) def __destroy(self, vm): """ __destroy - Removes a VM from the pool If the user asks for fewer preallocated VMs, then we will remove some excess ones. This function should be called in a thread context. Notice that we can only remove a free vm, so it's possible we might not be able to satisfy the request if the free list is empty. """ self.log.debug("__destroy| acquiring lock on preallocator") with self.lock: self.log.debug("__destroy| acquired lock on preallocator") dieVM = self.machines.get(vm.name)[1].get_nowait() self.log.debug("__destroy| released lock on preallocator") if dieVM: self.log.info("__destroy: %s" % dieVM.id) self.removeVM(dieVM) vmms = self.vmms[vm.vmms] vmms.safeDestroyVM(dieVM) def createVM(self, vm): """ createVM - Called in non-thread context to create a single VM and add it to the pool """ vmms = self.vmms[vm.vmms] newVM = copy.deepcopy(vm) newVM.id = self._getNextID() self.log.info("createVM|calling initializeVM") ret = vmms.initializeVM(newVM) if not ret: self.log.debug("createVM|failed initializeVM") return self.log.info("createVM|done with initializeVM %s" % newVM.id) self.addVM(newVM) self.addToFreePool(newVM) self.log.debug("createVM: Added vm %s to pool %s" % (newVM.id, newVM.name)) def destroyVM(self, vmName, id): """ destroyVM - Called by the delVM API function to remove and destroy a particular VM instance from a pool. We only allow this function when the system is queiscent (pool size == free size) """ if vmName not in self.machines.keys(): return -1 dieVM = None self.log.debug("destroyVM| acquiring lock on preallocator") with self.lock: self.log.debug("destroyVM| acquired lock on preallocator") size = self.machines.get(vmName)[1].qsize() self.log.info("destroyVM: free:total pool %d:%d" % (size, len(self.machines.get(vmName)[0]))) if (size == len(self.machines.get(vmName)[0])): for i in range(size): vm = self.machines.get(vmName)[1].get_nowait() if vm.id != id: self.log.info( "destroyVM: put to free pool id:vm.id %s:%s" % (id, vm.id)) self.machines.get(vmName)[1].put(vm) else: self.log.info("destroyVM: will call removeVM %s" % id) dieVM = vm self.log.debug("destroyVM| released lock on preallocator") if dieVM: self.removeVM(dieVM) vmms = self.vmms[vm.vmms] vmms.safeDestroyVM(dieVM) return 0 else: return -1 def getAllPools(self): result = {} for vmName in self.machines.keys(): result[vmName] = self.getPool(vmName) return result def getPool(self, vmName): """ getPool - returns the members of a pool and its free list """ result = {} if vmName not in self.machines.keys(): return result result["total"] = [] result["free"] = [] free_list = [] self.log.debug("getPool| acquiring lock on preallocator") with self.lock: self.log.debug("getPool| acquired lock on preallocator") size = self.machines.get(vmName)[1].qsize() for i in range(size): vm = self.machines.get(vmName)[1].get_nowait() free_list.append(vm.id) machine = self.machines.get(vmName) machine[1].put(vm) self.machines.set(vmName, machine) self.log.debug("getPool| released lock on preallocator") result["total"] = self.machines.get(vmName)[0] result["free"] = free_list self.log.info("getPool: free pool %s" % ', '.join(str(x) for x in result["free"])) self.log.info("getPool: total pool %s" % ', '.join(str(x) for x in result["total"])) return result
class JobQueue: def __init__(self, preallocator): # Create two dictionaries that, for each job currently in the dictionary, also maintains a mapping # from output file to the job. This allows easy, constant-time lookup for job based on output file. self.liveJobs = WrappingDictionary("liveJobsWrapped", TangoDictionary("liveJobs"), lambda j: j.outputFile) self.deadJobs = WrappingDictionary("deadJobsWrapped", TangoDictionary("deadJobs"), lambda j: j.outputFile) self.queueLock = threading.Lock() self.preallocator = preallocator self.log = logging.getLogger("JobQueue") self.nextID = 1 self.max_pool_size = TangoIntValue("max_pool_size", -1) if (hasattr(Config, 'MAX_POOL_SIZE') and Config.MAX_POOL_SIZE >= 0): self.max_pool_size.set(Config.MAX_POOL_SIZE) def _getNextID(self): """_getNextID - updates and returns the next ID to be used for a job Jobs have ID's between 1 and MAX_JOBID. """ self.log.debug("_getNextID|Acquiring lock to job queue.") with self.queueLock: self.log.debug("_getNextID|Acquired lock to job queue.") id = self.nextID # If a job already exists in the queue at nextID, then try to find # an empty ID. If the queue is full, then return -1. keys = self.liveJobs.keys() if (str(id) in keys): id = -1 for i in xrange(1, Config.MAX_JOBID + 1): if (str(i) not in keys): id = i break self.nextID += 1 if self.nextID > Config.MAX_JOBID: self.nextID = 1 self.log.debug("_getNextID|Released lock to job queue.") return id def add(self, job): """add - add job to live queue This function assigns an ID number to a job and then adds it to the queue of live jobs. """ if (not isinstance(job, TangoJob)): return -1 self.log.debug("add|Getting next ID") job.setId(self._getNextID()) if (job.id == -1): self.log.info("add|JobQueue is full") return -1 self.log.debug("add|Gotten next ID: " + str(job.id)) self.log.info("add|Unassigning job ID: %d" % (job.id)) job.makeUnassigned() job.retries = 0 # Add the job to the queue. Careful not to append the trace until we # know the job has actually been added to the queue. self.log.debug("add|Acquiring lock to job queue.") with self.queueLock: self.log.debug("add| Acquired lock to job queue.") self.liveJobs.set(job.id, job) job.appendTrace("Added job %s:%d to queue" % (job.name, job.id)) self.log.debug("Ref: " + str(job._remoteLocation)) self.log.debug("job_id: " + str(job.id)) self.log.debug("job_name: " + str(job.name)) self.log.debug("add|Releasing lock to job queue.") self.log.info("Added job %s:%d to queue, details = %s" % (job.name, job.id, str(job.__dict__))) return str(job.id) def addDead(self, job): """ addDead - add a job to the dead queue. Called by validateJob when a job validation fails. """ if (not isinstance(job, TangoJob)): return -1 job.setId(self._getNextID()) self.log.info("addDead|Unassigning job %s" % str(job.id)) job.makeUnassigned() job.retries = 0 self.log.debug("addDead|Acquiring lock to job queue.") with self.queueLock: self.log.debug("addDead|Acquired lock to job queue.") self.deadJobs.set(job.id, job) self.log.debug("addDead|Released lock to job queue.") return job.id def remove(self, id): """remove - Remove job from live queue """ status = -1 self.log.debug("remove|Acquiring lock to job queue.") with self.queueLock: self.log.debug("remove|Acquired lock to job queue.") if str(id) in self.liveJobs.keys(): self.liveJobs.delete(id) status = 0 self.log.debug("remove|Relased lock to job queue.") if status == 0: self.log.debug("Removed job %s from queue" % id) else: self.log.error("Job %s not found in queue" % id) return status class JobStatus: NOT_FOUND = 0 WAITING = 1 RUNNING = 2 DEAD = 3 def findRemovingWaiting(self, outputFile): """ findRemovingWaiting - find the job with the given output file. If the found job is live but unrun ("waiting"), move it from the live queue to the dead queue. Always return the status of the found job. """ self.log.debug("findRemovingWaiting|Acquiring lock to job queue.") with self.queueLock: self.log.debug("findRemovingWaiting|Acquired lock to job queue.") liveJobResult = self.liveJobs.getWrapped(outputFile) deadJobResult = self.deadJobs.getWrapped(outputFile) if liveJobResult: (id, job) = liveJobResult status = JobQueue.JobStatus.WAITING if job.isNotAssigned( ) else JobQueue.JobStatus.RUNNING elif deadJobResult: (id, job) = deadJobResult status = JobQueue.JobStatus.DEAD else: (id, job) = (None, None) status = JobQueue.JobStatus.NOT_FOUND if status == JobQueue.JobStatus.WAITING: self.makeDeadUnsafe(id, "Requested by findRemovingLabel") self.log.debug("findRemovingWaiting|Relased lock to job queue.") return id, job, status def delJob(self, id, deadjob): """ delJob - Implements delJob() interface call @param id - The id of the job to remove @param deadjob - If 0, move the job from the live queue to the dead queue. If non-zero, remove the job from the dead queue and discard. """ if deadjob == 0: return self.makeDead(id, "Requested by operator") else: status = -1 self.log.debug("delJob| Acquiring lock to job queue.") with self.queueLock: self.log.debug("delJob| Acquired lock to job queue.") if str(id) in self.deadJobs.keys(): self.deadJobs.delete(id) status = 0 self.log.debug("delJob| Released lock to job queue.") if status == 0: self.log.debug("Removed job %s from dead queue" % id) else: self.log.error("Job %s not found in dead queue" % id) return status def isLive(self, id): self.log.debug("isLive| Acquiring lock to job queue.") with self.queueLock: self.log.debug("isLive| Acquired lock to job queue.") isLive = self.liveJobs.get(id) self.log.debug("isLive| Released lock to job queue.") return isLive def get(self, id): """get - retrieve job from live queue @param id - the id of the job to retrieve """ self.log.debug("get| Acquiring lock to job queue.") with self.queueLock: self.log.debug("get| Acquired lock to job queue.") if str(id) in self.liveJobs.keys(): job = self.liveJobs.get(id) else: job = None self.log.debug("get| Released lock to job queue.") return job def getNextPendingJob(self): """getNextPendingJob - Returns ID of next pending job from queue. Called by JobManager when Config.REUSE_VMS==False """ with self.queueLock: limitingKeys = defaultdict(int) for id, job in self.liveJobs.iteritems(): if not job.isNotAssigned(): limitingKeys[job.limitingKey] += 1 max_concurrent = 0 if hasattr(Config, 'MAX_CONCURRENT_JOBS') and Config.MAX_CONCURRENT_JOBS: max_concurrent = Config.MAX_CONCURRENT_JOBS for id, job in self.liveJobs.iteritems(): if job.isNotAssigned() and ( max_concurrent <= 0 or limitingKeys[job.limitingKey] < max_concurrent): return id return None # Create or enlarge a pool if there is no free vm to use and # the limit for pool is not reached yet def incrementPoolSizeIfNecessary(self, job): max_ps = self.max_pool_size.get() if self.preallocator.freePoolSize(job.vm.name) == 0 and \ self.preallocator.poolSize(job.vm.name) < max_ps: increment = 1 if hasattr(Config, 'POOL_ALLOC_INCREMENT') and Config.POOL_ALLOC_INCREMENT: increment = Config.POOL_ALLOC_INCREMENT self.preallocator.incrementPoolSize(job.vm, increment) def getNextPendingJobReuse(self, target_id=None): """getNextPendingJobReuse - Returns ID of next pending job and its VM. Called by JobManager when Config.REUSE_VMS==True """ self.log.debug("getNextPendingJobReuse| Acquiring lock to job queue.") with self.queueLock: self.log.debug( "getNextPendingJobReuse| Acquired lock to job queue.") limitingKeys = defaultdict(int) for id, job in self.liveJobs.iteritems(): if not job.isNotAssigned(): limitingKeys[job.limitingKey] += 1 self.log.debug( "getNextPendingJobReuse| Done checking limitingKeys") max_concurrent = 0 if hasattr(Config, 'MAX_CONCURRENT_JOBS') and Config.MAX_CONCURRENT_JOBS: max_concurrent = Config.MAX_CONCURRENT_JOBS for id, job in self.liveJobs.iteritems(): # if target_id is set, only interested in this id if target_id and target_id != id: continue # If the job hasn't been assigned to a worker yet, see if there # is a free VM if job.isNotAssigned() and ( max_concurrent <= 0 or limitingKeys[job.limitingKey] < max_concurrent): self.log.debug( "getNextPendingJobReuse| Incrementing poolsize if necessary" ) self.incrementPoolSizeIfNecessary(job) self.log.debug( "getNextPendingJobReuse| Done incrementing poolsize if necessary" ) self.log.debug("getNextPendingJobReuse| Allocating vm") vm = self.preallocator.allocVM(job.vm.name) self.log.debug( "getNextPendingJobReuse| Done allocating vm") if vm: self.log.info( "getNextPendingJobReuse alloc vm %s to job %s" % (vm, id)) self.log.debug( "getNextPendingJobReuse| Released lock to job queue." ) return (id, vm) self.log.debug("getNextPendingJobReuse| Released lock to job queue.") return (None, None) # Returns the number of jobs that are ready to be assigned to a VM. # NOTE: the client must manually obtain the queueLock before calling this. def numReadyJobsUnsafe(self): count = 0 max_concurrent = 0 if hasattr(Config, 'MAX_CONCURRENT_JOBS') and Config.MAX_CONCURRENT_JOBS: max_concurrent = Config.MAX_CONCURRENT_JOBS limitingKeys = defaultdict(int) for id, job in self.liveJobs.iteritems(): if not job.isNotAssigned(): limitingKeys[job.limitingKey] += 1 for id, job in self.liveJobs.iteritems(): if job.isNotAssigned() and ( max_concurrent <= 0 or limitingKeys[job.limitingKey] < max_concurrent): count += 1 return count def assignJob(self, jobId): """ assignJob - marks a job to be assigned """ self.log.debug("assignJob| Acquiring lock to job queue.") with self.queueLock: self.log.debug("assignJob| Acquired lock to job queue.") job = self.liveJobs.get(jobId) self.log.debug("assignJob| Retrieved job.") self.log.info("assignJob|Assigning job ID: %s" % str(job.id)) job.makeAssigned() self.log.debug("assignJob| Released lock to job queue.") def unassignJob(self, jobId): """ assignJob - marks a job to be unassigned """ self.log.debug("unassignJob| Acquiring lock to job queue.") with self.queueLock: self.log.debug("unassignJob| Acquired lock to job queue.") job = self.liveJobs.get(jobId) if job.retries is None: job.retries = 0 else: job.retries += 1 Config.job_retries += 1 self.log.info("unassignJob|Unassigning job %s" % str(job.id)) job.makeUnassigned() self.log.debug("unassignJob| Released lock to job queue.") def makeDead(self, id, reason): """ makeDead - move a job from live queue to dead queue """ self.log.info("makeDead| Making dead job ID: " + str(id) + " " + reason) self.log.debug("makeDead| Acquiring lock to job queue.") with self.queueLock: self.log.debug("makeDead| Acquired lock to job queue.") status = self.makeDeadUnsafe(id, reason) self.log.debug("makeDead| Released lock to job queue.") return status # Thread unsafe version of makeDead that acquires no locks. def makeDeadUnsafe(self, id, reason): status = -1 if str(id) in self.liveJobs.keys(): self.log.info("makeDead| Found job ID: %d in the live queue" % (id)) status = 0 job = self.liveJobs.get(id) self.log.info("Terminated job %s:%d: %s" % (job.name, job.id, reason)) self.deadJobs.set(id, job) self.liveJobs.delete(id) job.appendTrace(reason) return status def getInfo(self): info = {} info['size'] = len(self.liveJobs.keys()) info['size_deadjobs'] = len(self.deadJobs.keys()) return info def reset(self): self.liveJobs._clean() self.deadJobs._clean()