Esempio n. 1
0
 def __init__(self, preallocator):
     self.liveJobs = TangoDictionary("liveJobs")
     self.deadJobs = TangoDictionary("deadJobs")
     self.queueLock = threading.Lock()
     self.preallocator = preallocator
     self.log = logging.getLogger("JobQueue")
     self.nextID = 1
Esempio n. 2
0
    def __init__(self, preallocator):
        """
        Here we maintain several data structures used to keep track of the 
        jobs present for the autograder. 

        Live jobs contains:
        - jobs that are yet to be assigned and run
        - jobs that are currently running

        Dead jobs contains: 
        - jobs that have been completed, or have been 'deleted' when in
          the live jobs queue

        Unassigned jobs: 
        This is a FIFO queue of jobs that are pending assignment. 
        - We enforce the invariant that all jobs in this queue must be 
          present in live jobs

        queueLock protects all the internal data structure of JobQueue. This 
        is needed since there are multiple worker threads and they might be 
        using the makeUnassigned api.
        """
        self.liveJobs = TangoDictionary("liveJobs")
        self.deadJobs = TangoDictionary("deadJobs")
        self.unassignedJobs = TangoQueue("unassignedLiveJobs")
        self.queueLock = threading.Lock()
        self.preallocator = preallocator
        self.log = logging.getLogger("JobQueue")
        self.nextID = 1
Esempio n. 3
0
 def __init__(self, preallocator):
     self.liveJobs = TangoDictionary("liveJobs")
     self.deadJobs = TangoDictionary("deadJobs")
     self.queueLock = threading.Lock()
     self.preallocator = preallocator
     self.log = logging.getLogger("JobQueue")
     self.nextID = 1
Esempio n. 4
0
 def __init__(self, vmms):
     self.machines = TangoDictionary("machines")
     self.lock = threading.Lock()
     self.nextID = TangoIntValue("nextID", 1000)
     self.vmms = vmms
     self.log = logging.getLogger("Preallocator-" + str(os.getpid()))
     self.low_water_mark = TangoIntValue("low_water_mark", -1)
     if (hasattr(Config, 'POOL_SIZE_LOW_WATER_MARK')
             and Config.POOL_SIZE_LOW_WATER_MARK >= 0):
         self.low_water_mark.set(Config.POOL_SIZE_LOW_WATER_MARK)
Esempio n. 5
0
 def __init__(self, preallocator):
     # Create two dictionaries that, for each job currently in the dictionary, also maintains a mapping
     # from output file to the job. This allows easy, constant-time lookup for job based on output file.
     self.liveJobs = WrappingDictionary("liveJobsWrapped",
                                        TangoDictionary("liveJobs"),
                                        lambda j: j.outputFile)
     self.deadJobs = WrappingDictionary("deadJobsWrapped",
                                        TangoDictionary("deadJobs"),
                                        lambda j: j.outputFile)
     self.queueLock = threading.Lock()
     self.preallocator = preallocator
     self.log = logging.getLogger("JobQueue")
     self.nextID = 1
     self.max_pool_size = TangoIntValue("max_pool_size", -1)
     if (hasattr(Config, 'MAX_POOL_SIZE') and Config.MAX_POOL_SIZE >= 0):
         self.max_pool_size.set(Config.MAX_POOL_SIZE)
Esempio n. 6
0
 def __init__(self, vmms):
     self.machines = TangoDictionary("machines")
     self.lock = threading.Lock()
     self.nextID = TangoIntValue("nextID", 1000)
     self.vmms = vmms
     self.log = logging.getLogger("Preallocator")
Esempio n. 7
0
class Preallocator(object):
    def __init__(self, vmms):
        self.machines = TangoDictionary("machines")
        self.lock = threading.Lock()
        self.nextID = TangoIntValue("nextID", 1000)
        self.vmms = vmms
        self.log = logging.getLogger("Preallocator")

    def poolSize(self, vmName):
        """ poolSize - returns the size of the vmName pool, for external callers
        """
        if vmName not in self.machines:
            return 0
        else:
            return len(self.machines.get(vmName)[0])

    def update(self, vm, num):
        """ update - Updates the number of machines of a certain type
        to be preallocated.

        This function is called via the TangoServer HTTP interface.
        It will validate the request,update the machine list, and 
        then spawn child threads to do the creation and destruction 
        of machines as necessary.
        """
        self.lock.acquire()
        if vm.name not in self.machines:
            self.machines.set(vm.name, [[], TangoQueue(vm.name)])
            self.log.debug("Creating empty pool of %s instances" % (vm.name))
        self.lock.release()

        delta = num - len(self.machines.get(vm.name)[0])
        if delta > 0:
            # We need more self.machines, spin them up.
            self.log.debug("update: Creating %d new %s instances" %
                           (delta, vm.name))
            threading.Thread(target=self.__create(vm, delta)).start()

        elif delta < 0:
            # We have too many self.machines, remove them from the pool
            self.log.debug("update: Destroying %d preallocated %s instances" %
                           (-delta, vm.name))
            for i in range(-1 * delta):
                threading.Thread(target=self.__destroy(vm)).start()

        # If delta == 0 then we are the perfect number!

    def allocVM(self, vmName):
        """ allocVM - Allocate a VM from the free list
        """
        vm = None
        if vmName in self.machines:
            self.lock.acquire()

        if not self.machines.get(vmName)[1].empty():
            vm = self.machines.get(vmName)[1].get_nowait()

        self.lock.release()

        # If we're not reusing instances, then crank up a replacement
        if vm and not Config.REUSE_VMS:
            threading.Thread(target=self.__create(vm, 1)).start()

        return vm

    def freeVM(self, vm):
        """ freeVM - Returns a VM instance to the free list
        """
        # Sanity check: Return a VM to the free list only if it is
        # still a member of the pool.
        not_found = False
        self.lock.acquire()
        if vm and vm.id in self.machines.get(vm.name)[0]:
            machine = self.machines.get(vm.name)
            machine[1].put(vm)
            self.machines.set(vm.name, machine)
        else:
            not_found = True
        self.lock.release()

        # The VM is no longer in the pool.
        if not_found:
            vmms = self.vmms[vm.vmms]
            vmms.safeDestroyVM(vm)

    def addVM(self, vm):
        """ addVM - add a particular VM instance to the pool
        """
        self.lock.acquire()
        machine = self.machines.get(vm.name)
        machine[0].append(vm.id)
        self.machines.set(vm.name, machine)
        self.lock.release()

    def removeVM(self, vm):
        """ removeVM - remove a particular VM instance from the pool
        """
        self.lock.acquire()
        machine = self.machines.get(vm.name)
        machine[0].remove(vm.id)
        self.machines.set(vm.name, machine)
        self.lock.release()

    def _getNextID(self):
        """ _getNextID - returns next ID to be used for a preallocated
        VM.  Preallocated VM's have 4-digit ID numbers between 1000
        and 9999.
        """
        self.lock.acquire()
        id = self.nextID.get()

        self.nextID.increment()

        if self.nextID.get() > 9999:
            self.nextID.set(1000)

        self.lock.release()
        return id

    def __create(self, vm, cnt):
        """ __create - Creates count VMs and adds them to the pool

        This function should always be called in a thread since it
        might take a long time to complete.
        """
        vmms = self.vmms[vm.vmms]
        self.log.debug("__create: Using VMMS %s " % (Config.VMMS_NAME))
        for i in range(cnt):
            newVM = copy.deepcopy(vm)
            newVM.id = self._getNextID()
            self.log.debug("__create|calling initializeVM")
            vmms.initializeVM(newVM)
            self.log.debug("__create|done with initializeVM")
            time.sleep(Config.CREATEVM_SECS)

            self.addVM(newVM)
            self.freeVM(newVM)
            self.log.debug("__create: Added vm %s to pool %s " %
                           (newVM.id, newVM.name))

    def __destroy(self, vm):
        """ __destroy - Removes a VM from the pool

        If the user asks for fewer preallocated VMs, then we will
        remove some excess ones. This function should be called in a
        thread context. Notice that we can only remove a free vm, so
        it's possible we might not be able to satisfy the request if
        the free list is empty.
        """
        self.lock.acquire()
        dieVM = self.machines.get(vm.name)[1].get_nowait()
        self.lock.release()

        if dieVM:
            self.removeVM(dieVM)
            vmms = self.vmms[vm.vmms]
            vmms.safeDestroyVM(dieVM)

    def createVM(self, vm):
        """ createVM - Called in non-thread context to create a single
        VM and add it to the pool
        """

        vmms = self.vmms[vm.vmms]
        newVM = copy.deepcopy(vm)
        newVM.id = self._getNextID()

        self.log.info("createVM|calling initializeVM")
        vmms.initializeVM(newVM)
        self.log.info("createVM|done with initializeVM")

        self.addVM(newVM)
        self.freeVM(newVM)
        self.log.debug("createVM: Added vm %s to pool %s" %
                       (newVM.id, newVM.name))

    def destroyVM(self, vmName, id):
        """ destroyVM - Called by the delVM API function to remove and
        destroy a particular VM instance from a pool. We only allow
        this function when the system is queiscent (pool size == free
        size)
        """
        if vmName not in self.machines:
            return -1

        dieVM = None
        self.lock.acquire()
        size = self.machines.get(vmName)[1].qsize()
        if (size == len(self.machines.get(vmName)[0])):
            for i in range(size):
                vm = self.machines.get(vmName)[1].get_nowait()
                if vm.id != id:
                    self.machines.get(vmName)[1].put(vm)
                else:
                    dieVM = vm
        self.lock.release()

        if dieVM:
            self.removeVM(dieVM)
            vmms = self.vmms[vm.vmms]
            vmms.safeDestroyVM(dieVM)
            return 0
        else:
            return -1

    def getAllPools(self):
        result = {}
        for vmName in self.machines:
            result[vmName] = self.getPool(vmName)
        return result

    def getPool(self, vmName):
        """ getPool - returns the members of a pool and its free list
        """
        result = {}
        if vmName not in self.machines:
            return result

        result["total"] = []
        result["free"] = []
        free_list = []
        self.lock.acquire()
        size = self.machines.get(vmName)[1].qsize()
        for i in range(size):
            vm = self.machines.get(vmName)[1].get_nowait()
            free_list.append(vm.id)
            machine = self.machines.get(vmName)
            machine[1].put(vm)
            self.machines.set(vmName, machine)
        self.lock.release()

        result["total"] = self.machines.get(vmName)[0]
        result["free"] = free_list
        return result
Esempio n. 8
0
class JobQueue:

    def __init__(self, preallocator):
        self.liveJobs = TangoDictionary("liveJobs")
        self.deadJobs = TangoDictionary("deadJobs")
        self.queueLock = threading.Lock()
        self.preallocator = preallocator
        self.log = logging.getLogger("JobQueue")
        self.nextID = 1

    def _getNextID(self):
        """_getNextID - updates and returns the next ID to be used for a job

        Jobs have ID's between 1 and MAX_JOBID.
        """
        self.log.debug("_getNextID|Acquiring lock to job queue.")
        self.queueLock.acquire()
        self.log.debug("_getNextID|Acquired lock to job queue.")
        id = self.nextID

        # If a job already exists in the queue at nextID, then try to find
        # an empty ID. If the queue is full, then return -1.
        keys = self.liveJobs.keys()
        if (str(id) in keys):
            id = -1
            for i in xrange(1, Config.MAX_JOBID + 1):
                if (str(i) not in keys):
                    id = i
                    break

        self.nextID += 1
        if self.nextID > Config.MAX_JOBID:
            self.nextID = 1
        self.queueLock.release()
        self.log.debug("_getNextID|Released lock to job queue.")
        return id

    def add(self, job):
        """add - add job to live queue

        This function assigns an ID number to a job and then adds it
        to the queue of live jobs.
        """
        if (not isinstance(job, TangoJob)):
            return -1
        self.log.debug("add|Getting next ID")
        job.setId(self._getNextID())
        if (job.id == -1):
            self.log.info("add|JobQueue is full")
            return -1
        self.log.debug("add|Gotten next ID: " + str(job.id))
        self.log.info("add|Unassigning job ID: %d" % (job.id))
        job.makeUnassigned()
        job.retries = 0

        # Add the job to the queue. Careful not to append the trace until we
        # know the job has actually been added to the queue.
        self.log.debug("add|Acquiring lock to job queue.")
        self.queueLock.acquire()
        self.log.debug("add| Acquired lock to job queue.")

        self.liveJobs.set(job.id, job)
        job.appendTrace("%s|Added job %s:%d to queue" %
                        (datetime.utcnow().ctime(), job.name, job.id))

        self.log.debug("Ref: " + str(job._remoteLocation))
        self.log.debug("job_id: " + str(job.id))
        self.log.debug("job_name: " + str(job.name))

        self.queueLock.release()
        self.log.debug("add|Releasing lock to job queue.")

        self.log.info("Added job %s:%d to queue, details = %s" % 
            (job.name, job.id, str(job.__dict__)))

        return str(job.id)

    def addDead(self, job):
        """ addDead - add a job to the dead queue.

        Called by validateJob when a job validation fails.
        """
        if (not isinstance(job, TangoJob)):
            return -1
        job.setId(self._getNextID())
        self.log.info("addDead|Unassigning job %s" % str(job.id))
        job.makeUnassigned()
        job.retries = 0

        self.log.debug("addDead|Acquiring lock to job queue.")
        self.queueLock.acquire()
        self.log.debug("addDead|Acquired lock to job queue.")

        self.deadJobs.set(job.id, job)
        self.queueLock.release()
        self.log.debug("addDead|Released lock to job queue.")

        return job.id

    def remove(self, id):
        """remove - Remove job from live queue
        """
        status = -1
        self.log.debug("remove|Acquiring lock to job queue.")
        self.queueLock.acquire()
        self.log.debug("remove|Acquired lock to job queue.")
        if str(id) in self.liveJobs.keys():
            self.liveJobs.delete(id)
            status = 0

        self.queueLock.release()
        self.log.debug("remove|Relased lock to job queue.")

        if status == 0:
            self.log.debug("Removed job %s from queue" % id)
        else:
            self.log.error("Job %s not found in queue" % id)
        return status

    def delJob(self, id, deadjob):
        """ delJob - Implements delJob() interface call
        @param id - The id of the job to remove
        @param deadjob - If 0, move the job from the live queue to the
        dead queue. If non-zero, remove the job from the dead queue
        and discard.
        """
        if deadjob == 0:
            return self.makeDead(id, "Requested by operator")
        else:
            status = -1
            self.queueLock.acquire()
            self.log.debug("delJob| Acquired lock to job queue.")
            if str(id) in self.deadJobs.keys():
                self.deadJobs.delete(id)
                status = 0
            self.queueLock.release()
            self.log.debug("delJob| Released lock to job queue.")

            if status == 0:
                self.log.debug("Removed job %s from dead queue" % id)
            else:
                self.log.error("Job %s not found in dead queue" % id)
            return status

    def get(self, id):
        """get - retrieve job from live queue
        @param id - the id of the job to retrieve
        """
        self.queueLock.acquire()
        self.log.debug("get| Acquired lock to job queue.")
        if str(id) in self.liveJobs.keys():
            job = self.liveJobs.get(id)
        else:
            job = None
        self.queueLock.release()
        self.log.debug("get| Released lock to job queue.")
        return job

    def getNextPendingJob(self):
        """getNextPendingJob - Returns ID of next pending job from queue.
        Called by JobManager when Config.REUSE_VMS==False
        """
        self.queueLock.acquire()
        for id, job in self.liveJobs.iteritems():
            if job.isNotAssigned():
                self.queueLock.release()
                return id
        self.queueLock.release()
        return None

    def getNextPendingJobReuse(self):
        """getNextPendingJobReuse - Returns ID of next pending job and its VM.
        Called by JobManager when Config.REUSE_VMS==True
        """
        self.queueLock.acquire()
        for id, job in self.liveJobs.iteritems():

            # Create a pool if necessary
            if self.preallocator.poolSize(job.vm.name) == 0:
                self.preallocator.update(job.vm, Config.POOL_SIZE)

            # If the job hasn't been assigned to a worker yet, see if there
            # is a free VM
            if (job.isNotAssigned()):
                vm = self.preallocator.allocVM(job.vm.name)
                if vm:
                    self.queueLock.release()
                    return (id, vm)

        self.queueLock.release()
        return (None, None)

    def assignJob(self, jobId):
        """ assignJob - marks a job to be assigned
        """
        self.queueLock.acquire()
        self.log.debug("assignJob| Acquired lock to job queue.")
        job = self.liveJobs.get(jobId)
        self.log.debug("assignJob| Retrieved job.")
        self.log.info("assignJob|Assigning job ID: %s" % str(job.id))
        job.makeAssigned()

        self.log.debug("assignJob| Releasing lock to job queue.")
        self.queueLock.release()
        self.log.debug("assignJob| Released lock to job queue.")

    def unassignJob(self, jobId):
        """ assignJob - marks a job to be unassigned
        """
        self.queueLock.acquire()
        self.log.debug("unassignJob| Acquired lock to job queue.")
        job = self.liveJobs.get(jobId)
        if job.retries is None:
            job.retries = 0
        else:
            job.retries += 1
            Config.job_retries += 1

        self.log.info("unassignJob|Unassigning job %s" % str(job.id))
        job.makeUnassigned()
        self.queueLock.release()
        self.log.debug("unassignJob| Released lock to job queue.")

    def makeDead(self, id, reason):
        """ makeDead - move a job from live queue to dead queue
        """
        self.log.info("makeDead| Making dead job ID: " + str(id))
        self.queueLock.acquire()
        self.log.debug("makeDead| Acquired lock to job queue.")
        status = -1
        if str(id) in self.liveJobs.keys():
            self.log.info("makeDead| Found job ID: %d in the live queue" % (id))
            status = 0
            job = self.liveJobs.get(id)
            self.log.info("Terminated job %s:%d: %s" %
                          (job.name, job.id, reason))
            self.deadJobs.set(id, job)           
            self.liveJobs.delete(id)
            job.appendTrace("%s|%s" % (datetime.utcnow().ctime(), reason))
        self.queueLock.release()
        self.log.debug("makeDead| Released lock to job queue.")
        return status

    def getInfo(self):

        info = {}
        info['size'] = len(self.liveJobs.keys())
        info['size_deadjobs'] = len(self.deadJobs.keys())

        return info

    def reset(self):
        self.liveJobs._clean()
        self.deadJobs._clean()
Esempio n. 9
0
class JobQueue(object):
    def __init__(self, preallocator):
        """
        Here we maintain several data structures used to keep track of the 
        jobs present for the autograder. 

        Live jobs contains:
        - jobs that are yet to be assigned and run
        - jobs that are currently running

        Dead jobs contains: 
        - jobs that have been completed, or have been 'deleted' when in
          the live jobs queue

        Unassigned jobs: 
        This is a FIFO queue of jobs that are pending assignment. 
        - We enforce the invariant that all jobs in this queue must be 
          present in live jobs

        queueLock protects all the internal data structure of JobQueue. This 
        is needed since there are multiple worker threads and they might be 
        using the makeUnassigned api.
        """
        self.liveJobs = TangoDictionary("liveJobs")
        self.deadJobs = TangoDictionary("deadJobs")
        self.unassignedJobs = TangoQueue("unassignedLiveJobs")
        self.queueLock = threading.Lock()
        self.preallocator = preallocator
        self.log = logging.getLogger("JobQueue")
        self.nextID = 1

    def _getNextID(self):
        """_getNextID - updates and returns the next ID to be used for a job
        Jobs have ID's between 1 and MAX_JOBID.
        """
        self.log.debug("_getNextID|Acquiring lock to job queue.")
        self.queueLock.acquire()
        self.log.debug("_getNextID|Acquired lock to job queue.")
        id = self.nextID

        # If there is an livejob in the queue with with nextID,
        # this means that the id is already taken.
        # We try to find a free id to use by looping through all
        # the job ids possible and finding one that is
        # not used by any of the livejobs.
        # Return -1 if no such free id is found.
        keys = self.liveJobs.keys()
        if (str(id) in keys):
            id = -1
            for i in range(1, Config.MAX_JOBID + 1):
                if (str(i) not in keys):
                    id = i
                    break

        self.nextID += 1
        if self.nextID > Config.MAX_JOBID:
            # Wrap around if job ids go over max job ids avail
            self.nextID = 1
        self.queueLock.release()
        self.log.debug("_getNextID|Released lock to job queue.")
        return id

    def remove(self, id):
        """remove - Remove job from live queue	
        """
        status = -1
        self.log.debug("remove|Acquiring lock to job queue.")
        self.queueLock.acquire()
        self.log.debug("remove|Acquired lock to job queue.")
        if id in self.liveJobs:
            self.liveJobs.delete(id)
            status = 0
        self.unassignedJobs.remove(int(id))

        self.queueLock.release()
        self.log.debug("remove|Relased lock to job queue.")

        if status == 0:
            self.log.debug("Removed job %s from queue" % id)
        else:
            self.log.error("Job %s not found in queue" % id)
        return status

    def add(self, job):
        """add - add job to live queue
        This function assigns an ID number to a *new* job and then adds it
        to the queue of live jobs. 
        Returns the job id on success, -1 otherwise 
        """
        if (not isinstance(job, TangoJob)):
            return -1

        # Get an id for the new job
        self.log.debug("add|Getting next ID")
        nextId = self._getNextID()
        if (nextId == -1):
            self.log.info("add|JobQueue is full")
            return -1
        job.setId(nextId)
        self.log.debug("add|Gotten next ID: " + str(job.id))

        self.log.info("add|Unassigning job ID: %d" % (job.id))
        # Make the job unassigned
        job.makeUnassigned()

        # Since we assume that the job is new, we set the number of retries
        # of this job to 0
        job.retries = 0

        # Add the job to the queue. Careful not to append the trace until we
        # know the job has actually been added to the queue.
        self.log.debug("add|Acquiring lock to job queue.")
        self.queueLock.acquire()
        self.log.debug("add| Acquired lock to job queue.")

        # Adds the job to the live jobs dictionary
        self.liveJobs.set(job.id, job)

        # Add this to the unassigned job queue too
        self.unassignedJobs.put(int(job.id))

        job.appendTrace("%s|Added job %s:%d to queue" %
                        (datetime.utcnow().ctime(), job.name, job.id))

        self.log.debug("Ref: " + str(job._remoteLocation))
        self.log.debug("job_id: " + str(job.id))
        self.log.debug("job_name: " + str(job.name))

        self.queueLock.release()
        self.log.debug("add|Releasing lock to job queue.")

        self.log.info("Added job %s:%s to queue, details = %s" %
                      (job.name, job.id, str(job.__dict__)))

        return str(job.id)

    def addDead(self, job):
        """ addDead - add a job to the dead queue.
        Called by validateJob when a job validation fails. 
        Returns -1 on failure and the job id on success
        """
        if (not isinstance(job, TangoJob)):
            return -1

        # Get an id for the new job
        self.log.debug("add|Getting next ID")
        nextId = self._getNextID()
        if (nextId == -1):
            self.log.info("add|JobQueue is full")
            return -1
        job.setId(nextId)
        self.log.debug("addDead|Gotten next ID: " + str(job.id))

        self.log.info("addDead|Unassigning job %s" % str(job.id))
        job.makeUnassigned()
        job.retries = 0

        self.log.debug("addDead|Acquiring lock to job queue.")
        self.queueLock.acquire()
        self.log.debug("addDead|Acquired lock to job queue.")

        # We add the job into the dead jobs dictionary
        self.deadJobs.set(job.id, job)
        self.queueLock.release()
        self.log.debug("addDead|Released lock to job queue.")

        return job.id

    def delJob(self, id, deadjob):
        """ delJob - Implements delJob() interface call
        @param id - The id of the job to remove
        @param deadjob - If 0, move the job from the live queue to the
        dead queue. If non-zero, remove the job from the dead queue
        and discard.
        """
        if deadjob == 0:
            return self.makeDead(id, "Requested by operator")
        else:
            status = -1
            self.queueLock.acquire()
            self.log.debug("delJob| Acquired lock to job queue.")
            if id in self.deadJobs:
                self.deadJobs.delete(id)
                status = 0
            self.queueLock.release()
            self.log.debug("delJob| Released lock to job queue.")

            if status == 0:
                self.log.debug("Removed job %s from dead queue" % id)
            else:
                self.log.error("Job %s not found in dead queue" % id)
            return status

    def get(self, id):
        """get - retrieve job from live queue
        @param id - the id of the job to retrieve
        """
        self.queueLock.acquire()
        self.log.debug("get| Acquired lock to job queue.")
        job = self.liveJobs.get(id)
        self.queueLock.release()
        self.log.debug("get| Released lock to job queue.")
        return job

    def assignJob(self, jobId):
        """ assignJob - marks a job to be assigned
        """
        self.queueLock.acquire()
        self.log.debug("assignJob| Acquired lock to job queue.")

        job = self.liveJobs.get(jobId)

        # Remove the current job from the queue
        self.unassignedJobs.remove(int(jobId))

        self.log.debug("assignJob| Retrieved job.")
        self.log.info("assignJob|Assigning job ID: %s" % str(job.id))
        job.makeAssigned()

        self.log.debug("assignJob| Releasing lock to job queue.")
        self.queueLock.release()
        self.log.debug("assignJob| Released lock to job queue.")

    def unassignJob(self, jobId):
        """ unassignJob - marks a job to be unassigned
            Note: We assume here that a job is to be rescheduled or 
            'retried' when you unassign it. This retry is done by
            the worker.
        """
        self.queueLock.acquire()
        self.log.debug("unassignJob| Acquired lock to job queue.")

        # Get the current job
        job = self.liveJobs.get(jobId)

        # Increment the number of retires
        if job.retries is None:
            job.retries = 0
        else:
            job.retries += 1
            Config.job_retries += 1

        self.log.info("unassignJob|Unassigning job %s" % str(job.id))
        job.makeUnassigned()

        # Since the assumption is that the job is being retried,
        # we simply add the job to the unassigned jobs queue without
        # removing anything from it
        self.unassignedJobs.put(int(jobId))

        self.queueLock.release()
        self.log.debug("unassignJob| Released lock to job queue.")

    def makeDead(self, id, reason):
        """ makeDead - move a job from live queue to dead queue
        """
        self.log.info("makeDead| Making dead job ID: " + str(id))
        self.queueLock.acquire()
        self.log.debug("makeDead| Acquired lock to job queue.")
        status = -1
        # Check to make sure that the job is in the live jobs queue
        if id in self.liveJobs:
            self.log.info("makeDead| Found job ID: %s in the live queue" %
                          (id))
            status = 0
            job = self.liveJobs.get(id)
            self.log.info("Terminated job %s:%s: %s" %
                          (job.name, job.id, reason))

            # Add the job to the dead jobs dictionary
            self.deadJobs.set(id, job)
            # Remove the job from the live jobs dictionary
            self.liveJobs.delete(id)

            # Remove the job from the unassigned live jobs queue
            self.unassignedJobs.remove(int(id))

            job.appendTrace("%s|%s" % (datetime.utcnow().ctime(), reason))
        self.queueLock.release()
        self.log.debug("makeDead| Released lock to job queue.")
        return status

    def getInfo(self):

        info = {}
        info['size'] = len(self.liveJobs.keys())
        info['size_deadjobs'] = len(self.deadJobs.keys())
        info['size_unassignedjobs'] = self.unassignedJobs.qsize()

        return info

    def reset(self):
        """ reset - resets and clears all the internal dictionaries 
                    and queues
        """
        self.liveJobs._clean()
        self.deadJobs._clean()
        self.unassignedJobs._clean()

    def getNextPendingJob(self):
        """Gets the next unassigned live job. Note that this is a 
           blocking function and we will block till there is an available 
           job.
        """
        # Blocks till the next item is added
        id = self.unassignedJobs.get()

        self.log.debug("_getNextPendingJob|Acquiring lock to job queue.")
        self.queueLock.acquire()
        self.log.debug("_getNextPendingJob|Acquired lock to job queue.")

        # Get the corresponding job
        job = self.liveJobs.get(id)
        if job is None:
            raise Exception("Cannot find unassigned job in live jobs")

        self.log.debug("getNextPendingJob| Releasing lock to job queue.")
        self.queueLock.release()
        self.log.debug("getNextPendingJob| Released lock to job queue.")
        return job

    def reuseVM(self, job):
        """Helps a job reuse a vm. This is called if CONFIG.REUSE_VM is 
           set to true.
        """

        # Create a pool if necessary
        # This is when there is no existing pool for the vm name required.
        if self.preallocator.poolSize(job.vm.name) == 0:
            self.preallocator.update(job.vm, Config.POOL_SIZE)

        # If the job hasn't been assigned to a worker yet, we try to
        # allocate a new vm for this job
        if (job.isNotAssigned()):
            # Note: This could return None, when all VMs are being used
            return self.preallocator.allocVM(job.vm.name)
        else:
            # In the case where a job is already assigned, it should have
            # a vm, and we just return that vm here
            if job.vm:
                return job.vm
            else:
                raise Exception("Job assigned without vm")
Esempio n. 10
0
 def __init__(self, vmms):
     self.machines = TangoDictionary("machines")
     self.lock = threading.Lock()
     self.nextID = TangoIntValue("nextID", 1000)
     self.vmms = vmms
     self.log = logging.getLogger("Preallocator")
Esempio n. 11
0
class Preallocator:
    def __init__(self, vmms):
        self.machines = TangoDictionary("machines")
        self.lock = threading.Lock()
        self.nextID = TangoIntValue("nextID", 1000)
        self.vmms = vmms
        self.log = logging.getLogger("Preallocator")

    def poolSize(self, vmName):
        """ poolSize - returns the size of the vmName pool, for external callers
        """
        if vmName not in self.machines.keys():
            return 0
        else:
            return len(self.machines.get(vmName)[0])

    def update(self, vm, num):
        """ update - Updates the number of machines of a certain type
        to be preallocated.

        This function is called via the TangoServer HTTP interface.
        It will validate the request,update the machine list, and 
        then spawn child threads to do the creation and destruction 
        of machines as necessary.
        """
        self.lock.acquire()
        if vm.name not in self.machines.keys():
            self.machines.set(vm.name, [[], TangoQueue(vm.name)])
            self.log.debug("Creating empty pool of %s instances" % (vm.name))
        self.lock.release()

        delta = num - len(self.machines.get(vm.name)[0])
        if delta > 0:
            # We need more self.machines, spin them up.
            self.log.debug("update: Creating %d new %s instances" % (delta, vm.name))
            threading.Thread(target=self.__create(vm, delta)).start()

        elif delta < 0:
            # We have too many self.machines, remove them from the pool
            self.log.debug("update: Destroying %d preallocated %s instances" % (-delta, vm.name))
            for i in range(-1 * delta):
                threading.Thread(target=self.__destroy(vm)).start()

        # If delta == 0 then we are the perfect number!

    def allocVM(self, vmName):
        """ allocVM - Allocate a VM from the free list
        """
        vm = None
        if vmName in self.machines.keys():
            self.lock.acquire()

        if not self.machines.get(vmName)[1].empty():
            vm = self.machines.get(vmName)[1].get_nowait()

        self.lock.release()

        # If we're not reusing instances, then crank up a replacement
        if vm and not Config.REUSE_VMS:
            threading.Thread(target=self.__create(vm, 1)).start()

        return vm

    def freeVM(self, vm):
        """ freeVM - Returns a VM instance to the free list
        """
        # Sanity check: Return a VM to the free list only if it is
        # still a member of the pool.
        not_found = False
        self.lock.acquire()
        if vm and vm.id in self.machines.get(vm.name)[0]:
            machine = self.machines.get(vm.name)
            machine[1].put(vm)
            self.machines.set(vm.name, machine)
        else:
            not_found = True
        self.lock.release()

        # The VM is no longer in the pool.
        if not_found:
            vmms = self.vmms[vm.vmms]
            vmms.safeDestroyVM(vm)

    def addVM(self, vm):
        """ addVM - add a particular VM instance to the pool
        """
        self.lock.acquire()
        machine = self.machines.get(vm.name)
        machine[0].append(vm.id)
        self.machines.set(vm.name, machine)
        self.lock.release()

    def removeVM(self, vm):
        """ removeVM - remove a particular VM instance from the pool
        """
        self.lock.acquire()
        machine = self.machines.get(vm.name)
        machine[0].remove(vm.id)
        self.machines.set(vm.name, machine)
        self.lock.release()

    def _getNextID(self):
        """ _getNextID - returns next ID to be used for a preallocated
        VM.  Preallocated VM's have 4-digit ID numbers between 1000
        and 9999.
        """
        self.lock.acquire()
        id = self.nextID.get()

        self.nextID.increment()

        if self.nextID.get() > 9999:
            self.nextID.set(1000)

        self.lock.release()
        return id

    def __create(self, vm, cnt):
        """ __create - Creates count VMs and adds them to the pool

        This function should always be called in a thread since it
        might take a long time to complete.
        """
        vmms = self.vmms[vm.vmms]
        self.log.debug("__create: Using VMMS %s " % (Config.VMMS_NAME))
        for i in range(cnt):
            newVM = copy.deepcopy(vm)
            newVM.id = self._getNextID()
            self.log.debug("__create|calling initializeVM")
            vmms.initializeVM(newVM)
            self.log.debug("__create|done with initializeVM")
            time.sleep(Config.CREATEVM_SECS)

            self.addVM(newVM)
            self.freeVM(newVM)
            self.log.debug("__create: Added vm %s to pool %s " % (newVM.id, newVM.name))

    def __destroy(self, vm):
        """ __destroy - Removes a VM from the pool

        If the user asks for fewer preallocated VMs, then we will
        remove some excess ones. This function should be called in a
        thread context. Notice that we can only remove a free vm, so
        it's possible we might not be able to satisfy the request if
        the free list is empty.
        """
        self.lock.acquire()
        dieVM = self.machines.get(vm.name)[1].get_nowait()
        self.lock.release()

        if dieVM:
            self.removeVM(dieVM)
            vmms = self.vmms[vm.vmms]
            vmms.safeDestroyVM(dieVM)

    def createVM(self, vm):
        """ createVM - Called in non-thread context to create a single
        VM and add it to the pool
        """

        vmms = self.vmms[vm.vmms]
        newVM = copy.deepcopy(vm)
        newVM.id = self._getNextID()

        self.log.info("createVM|calling initializeVM")
        vmms.initializeVM(newVM)
        self.log.info("createVM|done with initializeVM")

        self.addVM(newVM)
        self.freeVM(newVM)
        self.log.debug("createVM: Added vm %s to pool %s" % (newVM.id, newVM.name))

    def destroyVM(self, vmName, id):
        """ destroyVM - Called by the delVM API function to remove and
        destroy a particular VM instance from a pool. We only allow
        this function when the system is queiscent (pool size == free
        size)
        """
        if vmName not in self.machines.keys():
            return -1

        dieVM = None
        self.lock.acquire()
        size = self.machines.get(vmName)[1].qsize()
        if size == len(self.machines.get(vmName)[0]):
            for i in range(size):
                vm = self.machines.get(vmName)[1].get_nowait()
                if vm.id != id:
                    self.machines.get(vmName)[1].put(vm)
                else:
                    dieVM = vm
        self.lock.release()

        if dieVM:
            self.removeVM(dieVM)
            vmms = self.vmms[vm.vmms]
            vmms.safeDestroyVM(dieVM)
            return 0
        else:
            return -1

    def getAllPools(self):
        result = {}
        for vmName in self.machines.keys():
            result[vmName] = self.getPool(vmName)
        return result

    def getPool(self, vmName):
        """ getPool - returns the members of a pool and its free list
        """
        result = {}
        if vmName not in self.machines.keys():
            return result

        result["total"] = []
        result["free"] = []
        free_list = []
        self.lock.acquire()
        size = self.machines.get(vmName)[1].qsize()
        for i in range(size):
            vm = self.machines.get(vmName)[1].get_nowait()
            free_list.append(vm.id)
            machine = self.machines.get(vmName)
            machine[1].put(vm)
            self.machines.set(vmName, machine)
        self.lock.release()

        result["total"] = self.machines.get(vmName)[0]
        result["free"] = free_list
        return result
Esempio n. 12
0
class Preallocator:
    def __init__(self, vmms):
        self.machines = TangoDictionary("machines")
        self.lock = threading.Lock()
        self.nextID = TangoIntValue("nextID", 1000)
        self.vmms = vmms
        self.log = logging.getLogger("Preallocator-" + str(os.getpid()))
        self.low_water_mark = TangoIntValue("low_water_mark", -1)
        if (hasattr(Config, 'POOL_SIZE_LOW_WATER_MARK')
                and Config.POOL_SIZE_LOW_WATER_MARK >= 0):
            self.low_water_mark.set(Config.POOL_SIZE_LOW_WATER_MARK)

    def poolSize(self, vmName):
        """ poolSize - returns the size of the vmName pool, for external callers
        """
        if vmName not in self.machines.keys():
            return 0
        else:
            return len(self.machines.get(vmName)[0])

    def freePoolSize(self, vmName):
        """ freePoolSize - returns the size of the vmName free pool, for external callers
        """
        if vmName in self.machines.keys():
            return self.machines.get(vmName)[1].qsize()
        else:
            return 0

    def incrementPoolSize(self, vm, delta):
        """
        Called by jobQueue to create the pool and allcoate given number of vms
        """

        self.log.debug("incrementPoolSize| acquiring lock on preallocator")
        with self.lock:
            self.log.debug("incrementPoolSize| acquired lock on preallocator")
            if vm.name not in self.machines.keys():
                self.machines.set(vm.name, [[], TangoQueue(vm.name)])
                # see comments in jobManager.py for the same call
                self.machines.get(vm.name)[1].make_empty()
                self.log.debug("Creating empty pool of %s instances" %
                               (vm.name))
        self.log.debug("incrementPoolSize| released lock on preallocator")

        self.log.debug("incrementPoolSize: add %d new %s instances" %
                       (delta, vm.name))
        threading.Thread(target=self.__create(vm, delta)).start()

    def update(self, vm, num):
        """ update - Updates the number of machines of a certain type
        to be preallocated.

        This function is called via the TangoServer HTTP interface.
        It will validate the request,update the machine list, and
        then spawn child threads to do the creation and destruction
        of machines as necessary.
        """
        self.log.debug("update| acquiring lock on preallocator")
        with self.lock:
            self.log.debug("update| acquired lock on preallocator")
            if vm.name not in self.machines.keys():
                self.machines.set(vm.name, [[], TangoQueue(vm.name)])
                # see comments in jobManager.py for the same call
                self.machines.get(vm.name)[1].make_empty()
                self.log.debug("Creating empty pool of %s instances" %
                               (vm.name))
        self.log.debug("update| released lock on preallocator")

        delta = num - len(self.machines.get(vm.name)[0])
        if delta > 0:
            # We need more self.machines, spin them up.
            self.log.debug("update: Creating %d new %s instances" %
                           (delta, vm.name))
            threading.Thread(target=self.__create(vm, delta)).start()

        elif delta < 0:
            # We have too many self.machines, remove them from the pool
            self.log.debug("update: Destroying %d preallocated %s instances" %
                           (-delta, vm.name))
            for i in range(-1 * delta):
                threading.Thread(target=self.__destroy(vm)).start()

        # If delta == 0 then we are the perfect number!

    def allocVM(self, vmName):
        """ allocVM - Allocate a VM from the free list
        """
        vm = None
        if vmName in self.machines.keys():
            self.log.debug("allocVM| acquiring lock on preallocator")
            with self.lock:
                self.log.debug("allocVM| acquired lock on preallocator")

                if not self.machines.get(vmName)[1].empty():
                    self.log.debug("allocVM| getting (nowait)")
                    vm = self.machines.get(vmName)[1].get_nowait()
                    self.log.debug("allocVM| got (nowait)")

            self.log.debug("allocVM| released lock on " + vmName)

        # If we're not reusing instances, then crank up a replacement
        if vm and not Config.REUSE_VMS:
            threading.Thread(target=self.__create(vm, 1)).start()

        return vm

    def addToFreePool(self, vm):
        """ addToFreePool - Returns a VM instance to the free list
        """

        self.log.debug("addToFreePool| acquiring lock on preallocator")
        with self.lock:
            self.log.debug("addToFreePool| acquired lock on preallocator")
            machine = self.machines.get(vm.name)
            self.log.info("addToFreePool: add %s to free pool" % vm.id)
            machine[1].put(vm)
            self.machines.set(vm.name, machine)
        self.log.debug("addToFreePool| released lock on preallocator")

    def freeVM(self, vm, jobQueue):
        """ freeVM - Returns a VM instance to the free list
        """
        # Sanity check: Return a VM to the free list only if it is
        # still a member of the pool.
        not_found = False
        should_destroy = False

        # We must access jobQueue prior to acquiring the preallocator lock,
        # since otherwise we may have deadlock. (In other places in the codebase,
        # we acquire the lock on the jobQueue and THEN the lock on the
        # preallocator.)
        self.log.debug("freeVM| acquiring lock on jobQueue")
        with jobQueue.queueLock:
            self.log.debug("freeVM| acquired lock on jobQueue")
            numReadyJobs = jobQueue.numReadyJobsUnsafe()

            self.log.debug("freeVM| acquiring lock on preallocator")
            with self.lock:
                self.log.debug("freeVM| acquired lock on preallocator")
                if vm and vm.id in self.machines.get(vm.name)[0]:
                    lwm = self.low_water_mark.get()
                    if (lwm >= 0 and vm.name in self.machines.keys() and
                            self.freePoolSize(vm.name) - numReadyJobs >= lwm):
                        self.log.info(
                            "freeVM: over low water mark (%d). will destroy %s"
                            % (lwm, vm.id))
                        should_destroy = True
                    else:
                        machine = self.machines.get(vm.name)
                        self.log.info("freeVM: return %s to free pool" % vm.id)
                        machine[1].put(vm)
                        self.machines.set(vm.name, machine)
                else:
                    self.log.info(
                        "freeVM: not found in pool %s.  will destroy %s" %
                        (vm.name, vm.id))
                    not_found = True
            self.log.debug("freeVM| released lock on preallocator")
        self.log.debug("freeVM| released lock on jobQueue")

        # The VM is no longer in the pool.
        if not_found or should_destroy:
            self.log.info("freeVM: will destroy %s" % vm.id)
            vmms = self.vmms[vm.vmms]
            self.removeVM(vm)
            vmms.safeDestroyVM(vm)

    def addVM(self, vm):
        """ addVM - add a particular VM instance to the pool
        """
        self.log.debug("addVM| acquiring lock on preallocator")
        with self.lock:
            self.log.debug("addVM| acquired lock on preallocator")
            machine = self.machines.get(vm.name)
            machine[0].append(vm.id)
            self.log.info("addVM: add %s" % vm.id)
            self.machines.set(vm.name, machine)
        self.log.debug("addVM| released lock on preallocator")

    # Note: This function is called from removeVM() to handle the case when a vm
    # is in free pool.  In theory this should never happen but we want to ensure
    # that.  To solve the problem cleanly, preallocator should provide ONE primitive
    # to add/remove a vm from both total and free pools, instead of two disjoint ones.
    def removeFromFreePool(self, vm):
        dieVM = None
        self.log.debug("removeFromFreePool| acquiring lock on preallocator")
        with self.lock:
            self.log.debug("removeFromFreePool| acquired lock on preallocator")
            size = self.machines.get(vm.name)[1].qsize()
            self.log.info("removeFromFreePool: %s in pool %s" %
                          (vm.id, vm.name))
            for i in range(size):  # go through free pool
                vm = self.machines.get(vm.name)[1].get_nowait()
                # put it back into free pool, if not our vm
                if vm.id != id:
                    self.machines.get(vm.name)[1].put(vm)
                else:
                    self.log.info("removeFromFreePool: found %s in pool %s" %
                                  (vm.id, vm.name))
                    # don't put this particular vm back to free pool, that is removal
        self.log.debug("removeFromFreePool| released lock on preallocator")

    def removeVM(self, vm):
        """ removeVM - remove a particular VM instance from the pool
        """
        self.log.debug("removeVM| acquiring lock on preallocator")
        with self.lock:
            self.log.debug("removeVM| acquired lock on preallocator")
            machine = self.machines.get(vm.name)
            if vm.id not in machine[0]:
                self.log.error("removeVM: %s NOT found in pool" %
                               (vm.id, vm.name))
                return

            self.log.info("removeVM: %s" % vm.id)
            machine[0].remove(vm.id)
            self.machines.set(vm.name, machine)
        self.log.debug("removeVM| released lock on preallocator")

        self.removeFromFreePool(vm)  # also remove from free pool, just in case

    def _getNextID(self):
        """ _getNextID - returns next ID to be used for a preallocated
        VM.  Preallocated VM's have 4-digit ID numbers between 1000
        and 9999.
        """
        self.log.debug("_getNextID| acquiring lock on preallocator")
        with self.lock:
            self.log.debug("_getNextID| acquired lock on preallocator")
            id = self.nextID.get()

            self.nextID.increment()

            if self.nextID.get() > 9999:
                self.nextID.set(1000)

        self.log.debug("_getNextID| released lock on preallocator")
        return id

    def __create(self, vm, cnt):
        """ __create - Creates count VMs and adds them to the pool

        This function should always be called in a thread since it
        might take a long time to complete.
        """

        vmms = self.vmms[vm.vmms]
        self.log.debug("__create: Using VMMS %s " % (Config.VMMS_NAME))
        for i in range(cnt):
            newVM = copy.deepcopy(vm)
            newVM.id = self._getNextID()
            self.log.debug("__create|calling initializeVM")
            ret = vmms.initializeVM(newVM)
            if not ret:  # ret is None when fails
                self.log.debug("__create|failed initializeVM")
                continue
            self.log.debug("__create|done with initializeVM")
            time.sleep(Config.CREATEVM_SECS)

            self.addVM(newVM)
            self.addToFreePool(newVM)
            self.log.debug("__create: Added vm %s to pool %s " %
                           (newVM.id, newVM.name))

    def __destroy(self, vm):
        """ __destroy - Removes a VM from the pool

        If the user asks for fewer preallocated VMs, then we will
        remove some excess ones. This function should be called in a
        thread context. Notice that we can only remove a free vm, so
        it's possible we might not be able to satisfy the request if
        the free list is empty.
        """
        self.log.debug("__destroy| acquiring lock on preallocator")
        with self.lock:
            self.log.debug("__destroy| acquired lock on preallocator")
            dieVM = self.machines.get(vm.name)[1].get_nowait()
        self.log.debug("__destroy| released lock on preallocator")

        if dieVM:
            self.log.info("__destroy: %s" % dieVM.id)
            self.removeVM(dieVM)
            vmms = self.vmms[vm.vmms]
            vmms.safeDestroyVM(dieVM)

    def createVM(self, vm):
        """ createVM - Called in non-thread context to create a single
        VM and add it to the pool
        """

        vmms = self.vmms[vm.vmms]
        newVM = copy.deepcopy(vm)
        newVM.id = self._getNextID()

        self.log.info("createVM|calling initializeVM")
        ret = vmms.initializeVM(newVM)
        if not ret:
            self.log.debug("createVM|failed initializeVM")
            return
        self.log.info("createVM|done with initializeVM %s" % newVM.id)

        self.addVM(newVM)
        self.addToFreePool(newVM)
        self.log.debug("createVM: Added vm %s to pool %s" %
                       (newVM.id, newVM.name))

    def destroyVM(self, vmName, id):
        """ destroyVM - Called by the delVM API function to remove and
        destroy a particular VM instance from a pool. We only allow
        this function when the system is queiscent (pool size == free
        size)
        """
        if vmName not in self.machines.keys():
            return -1

        dieVM = None
        self.log.debug("destroyVM| acquiring lock on preallocator")
        with self.lock:
            self.log.debug("destroyVM| acquired lock on preallocator")
            size = self.machines.get(vmName)[1].qsize()
            self.log.info("destroyVM: free:total pool %d:%d" %
                          (size, len(self.machines.get(vmName)[0])))
            if (size == len(self.machines.get(vmName)[0])):
                for i in range(size):
                    vm = self.machines.get(vmName)[1].get_nowait()
                    if vm.id != id:
                        self.log.info(
                            "destroyVM: put to free pool id:vm.id %s:%s" %
                            (id, vm.id))
                        self.machines.get(vmName)[1].put(vm)
                    else:
                        self.log.info("destroyVM: will call removeVM %s" % id)
                        dieVM = vm
        self.log.debug("destroyVM| released lock on preallocator")

        if dieVM:
            self.removeVM(dieVM)
            vmms = self.vmms[vm.vmms]
            vmms.safeDestroyVM(dieVM)
            return 0
        else:
            return -1

    def getAllPools(self):
        result = {}
        for vmName in self.machines.keys():
            result[vmName] = self.getPool(vmName)
        return result

    def getPool(self, vmName):
        """ getPool - returns the members of a pool and its free list
        """
        result = {}
        if vmName not in self.machines.keys():
            return result

        result["total"] = []
        result["free"] = []
        free_list = []
        self.log.debug("getPool| acquiring lock on preallocator")
        with self.lock:
            self.log.debug("getPool| acquired lock on preallocator")
            size = self.machines.get(vmName)[1].qsize()
            for i in range(size):
                vm = self.machines.get(vmName)[1].get_nowait()
                free_list.append(vm.id)
                machine = self.machines.get(vmName)
                machine[1].put(vm)
                self.machines.set(vmName, machine)
        self.log.debug("getPool| released lock on preallocator")

        result["total"] = self.machines.get(vmName)[0]
        result["free"] = free_list
        self.log.info("getPool: free pool %s" %
                      ', '.join(str(x) for x in result["free"]))
        self.log.info("getPool: total pool %s" %
                      ', '.join(str(x) for x in result["total"]))

        return result
Esempio n. 13
0
class JobQueue:
    def __init__(self, preallocator):
        self.liveJobs = TangoDictionary("liveJobs")
        self.deadJobs = TangoDictionary("deadJobs")
        self.queueLock = threading.Lock()
        self.preallocator = preallocator
        self.log = logging.getLogger("JobQueue")
        self.nextID = 1

    def _getNextID(self):
        """_getNextID - updates and returns the next ID to be used for a job

        Jobs have ID's between 1 and MAX_JOBID.
        """
        self.log.debug("_getNextID|Acquiring lock to job queue.")
        self.queueLock.acquire()
        self.log.debug("_getNextID|Acquired lock to job queue.")
        id = self.nextID

        # If a job already exists in the queue at nextID, then try to find
        # an empty ID. If the queue is full, then return -1.
        keys = self.liveJobs.keys()
        if (str(id) in keys):
            id = -1
            for i in xrange(1, Config.MAX_JOBID + 1):
                if (str(i) not in keys):
                    id = i
                    break

        self.nextID += 1
        if self.nextID > Config.MAX_JOBID:
            self.nextID = 1
        self.queueLock.release()
        self.log.debug("_getNextID|Released lock to job queue.")
        return id

    def add(self, job):
        """add - add job to live queue

        This function assigns an ID number to a job and then adds it
        to the queue of live jobs.
        """
        if (not isinstance(job, TangoJob)):
            return -1
        self.log.debug("add|Getting next ID")
        job.setId(self._getNextID())
        if (job.id == -1):
            self.log.info("add|JobQueue is full")
            return -1
        self.log.debug("add|Gotten next ID: " + str(job.id))
        self.log.info("add|Unassigning job %s" % str(job.id))
        job.makeUnassigned()
        job.retries = 0

        # Add the job to the queue. Careful not to append the trace until we
        # know the job has actually been added to the queue.
        self.log.debug("add|Acquiring lock to job queue.")
        self.queueLock.acquire()
        self.log.debug("add| Acquired lock to job queue.")

        self.liveJobs.set(job.id, job)
        job.appendTrace("%s|Added job %s:%d to queue" %
                        (datetime.utcnow().ctime(), job.name, job.id))

        self.log.debug("Ref: " + str(job._remoteLocation))
        self.log.debug("job_id: " + str(job.id))
        self.log.debug("job_name: " + str(job.name))

        self.queueLock.release()
        self.log.debug("add|Releasing lock to job queue.")

        self.log.info("Added job %s:%d to queue" % (job.name, job.id))
        self.log.info("Job id: " + str(job.id))
        self.log.info("Job details: " + str(job.__dict__))

        return str(job.id)

    def addDead(self, job):
        """ addDead - add a job to the dead queue.

        Called by validateJob when a job validation fails.
        """
        if (not isinstance(job, TangoJob)):
            return -1
        job.setId(self._getNextID())
        self.log.info("addDead|Unassigning job %s" % str(job.id))
        job.makeUnassigned()
        job.retries = 0

        self.log.debug("addDead|Acquiring lock to job queue.")
        self.queueLock.acquire()
        self.log.debug("addDead|Acquired lock to job queue.")

        self.deadJobs.set(job.id, job)
        self.queueLock.release()
        self.log.debug("addDead|Released lock to job queue.")

        return job.id

    def remove(self, id):
        """remove - Remove job from live queue
        """
        status = -1
        self.log.debug("remove|Acquiring lock to job queue.")
        self.queueLock.acquire()
        self.log.debug("remove|Acquired lock to job queue.")
        if str(id) in self.liveJobs.keys():
            self.liveJobs.delete(id)
            status = 0

        self.queueLock.release()
        self.log.debug("remove|Relased lock to job queue.")

        if status == 0:
            self.log.debug("Removed job %s from queue" % id)
        else:
            self.log.error("Job %s not found in queue" % id)
        return status

    def delJob(self, id, deadjob):
        """ delJob - Implements delJob() interface call
        @param id - The id of the job to remove
        @param deadjob - If 0, move the job from the live queue to the
        dead queue. If non-zero, remove the job from the dead queue
        and discard.
        """
        if deadjob == 0:
            return self.makeDead(id, "Requested by operator")
        else:
            status = -1
            self.queueLock.acquire()
            self.log.debug("delJob| Acquired lock to job queue.")
            if str(id) in self.deadJobs.keys():
                self.deadJobs.delete(id)
                status = 0
            self.queueLock.release()
            self.log.debug("delJob| Released lock to job queue.")

            if status == 0:
                self.log.debug("Removed job %s from dead queue" % id)
            else:
                self.log.error("Job %s not found in dead queue" % id)
            return status

    def get(self, id):
        """get - retrieve job from live queue
        @param id - the id of the job to retrieve
        """
        self.queueLock.acquire()
        self.log.debug("get| Acquired lock to job queue.")
        if str(id) in self.liveJobs.keys():
            job = self.liveJobs.get(id)
        else:
            job = None
        self.queueLock.release()
        self.log.debug("get| Released lock to job queue.")
        return job

    def getNextPendingJob(self):
        """getNextPendingJob - Returns ID of next pending job from queue.
        Called by JobManager when Config.REUSE_VMS==False
        """
        self.queueLock.acquire()
        for id, job in self.liveJobs.iteritems():
            if job.isNotAssigned():
                self.queueLock.release()
                return id
        self.queueLock.release()
        return None

    def getNextPendingJobReuse(self):
        """getNextPendingJobReuse - Returns ID of next pending job and its VM.
        Called by JobManager when Config.REUSE_VMS==True
        """
        self.queueLock.acquire()
        for id, job in self.liveJobs.iteritems():

            # Create a pool if necessary
            if self.preallocator.poolSize(job.vm.name) == 0:
                self.preallocator.update(job.vm, Config.POOL_SIZE)

            # If the job hasn't been assigned to a worker yet, see if there
            # is a free VM
            if (job.isNotAssigned()):
                vm = self.preallocator.allocVM(job.vm.name)
                if vm:
                    self.queueLock.release()
                    return (id, vm)

        self.queueLock.release()
        return (None, None)

    def assignJob(self, jobId):
        """ assignJob - marks a job to be assigned
        """
        self.queueLock.acquire()
        self.log.debug("assignJob| Acquired lock to job queue.")
        job = self.liveJobs.get(jobId)
        self.log.debug("assignJob| Retrieved job.")
        self.log.info("assignJob|Assigning job %s" % str(job.id))
        job.makeAssigned()

        self.log.debug("assignJob| Releasing lock to job queue.")
        self.queueLock.release()
        self.log.debug("assignJob| Released lock to job queue.")

    def unassignJob(self, jobId):
        """ assignJob - marks a job to be unassigned
        """
        self.queueLock.acquire()
        self.log.debug("unassignJob| Acquired lock to job queue.")
        job = self.liveJobs.get(jobId)
        self.log.info("unassignJob|Unassigning job %s" % str(job.id))
        job.makeUnassigned()
        if job.retries is None:
            job.retries = 0
        else:
            job.retries += 1
            Config.job_retries += 1

        self.queueLock.release()
        self.log.debug("unassignJob| Released lock to job queue.")

    def makeDead(self, id, reason):
        """ makeDead - move a job from live queue to dead queue
        """
        self.log.info("makeDead| Making dead: " + str(id))
        self.queueLock.acquire()
        self.log.debug("makeDead| Acquired lock to job queue.")
        status = -1
        if str(id) in self.liveJobs.keys():
            self.log.info("makeDead| Job is in the queue")
            status = 0
            job = self.liveJobs.get(id)
            self.liveJobs.delete(id)
            self.log.info("Terminated job %s:%d: %s" %
                          (job.name, job.id, reason))
            self.deadJobs.set(id, job)
            job.appendTrace("%s|%s" % (datetime.utcnow().ctime(), reason))
        self.queueLock.release()
        self.log.debug("makeDead| Released lock to job queue.")
        return status

    def getInfo(self):

        info = {}
        info['size'] = len(self.liveJobs.keys())
        info['size_deadjobs'] = len(self.deadJobs.keys())

        return info

    def reset(self):
        self.liveJobs._clean()
        self.deadJobs._clean()
Esempio n. 14
0
    def runDictionaryTests(self):
        test_dict = TangoDictionary("test")
        self.assertEqual(test_dict.keys(), [])
        self.assertEqual(test_dict.values(), [])

        for key in self.test_entries:
            test_dict.set(key, self.test_entries[key])

        for key in self.test_entries:
            self.assertTrue(key in test_dict)
            self.assertEqual(test_dict.get(key), self.test_entries[key])

        for (key, val) in test_dict.items():
            self.assertEqual(self.test_entries.get(key), val)

        self.assertEqual(test_dict.keys(),
                         [str(key) for key in self.test_entries.keys()])
        self.assertEqual(test_dict.values(), list(self.test_entries.values()))
        self.assertTrue("key_not_present" not in test_dict)
        self.assertEqual(test_dict.get("key_not_present"), None)

        test_dict.set("key", "new_value")
        self.assertEqual(test_dict.get("key"), "new_value")

        test_dict.delete("key")
        self.assertTrue("key" not in test_dict)