Ejemplo n.º 1
0
    def setUp(self):

        if Config.USE_REDIS:
            __db = redis.StrictRedis(Config.REDIS_HOSTNAME,
                                     Config.REDIS_PORT,
                                     db=0)
            __db.flushall()

        self.job1 = TangoJob(
            name="sample_job_1",
            vm="ilter.img",
            outputFile="sample_job_1_output",
            input=[],
            timeout=30,
            notifyURL="notifyMeUrl",
            maxOutputFileSize=4096,
        )

        self.job2 = TangoJob(
            name="sample_job_2",
            vm="ilter.img",
            outputFile="sample_job_2_output",
            input=[],
            timeout=30,
            notifyURL="notifyMeUrl",
            maxOutputFileSize=4096,
        )

        self.jobQueue = JobQueue(None)
        self.jobQueue.reset()
        self.jobId1 = self.jobQueue.add(self.job1)
        self.jobId2 = self.jobQueue.add(self.job2)
Ejemplo n.º 2
0
    def __init__(self):
        self.daemon = True

        vmms = None
        if Config.VMMS_NAME == "tashiSSH":
            from vmms.tashiSSH import TashiSSH
            vmms = TashiSSH()
        elif Config.VMMS_NAME == "ec2SSH":
            from vmms.ec2SSH import Ec2SSH
            vmms = Ec2SSH()
        elif Config.VMMS_NAME == "localDocker":
            from vmms.localDocker import LocalDocker
            vmms = LocalDocker()
        elif Config.VMMS_NAME == "distDocker":
            from vmms.distDocker import DistDocker
            vmms = DistDocker()

        self.preallocator = Preallocator({Config.VMMS_NAME: vmms})
        self.jobQueue = JobQueue(self.preallocator)
        if not Config.USE_REDIS:
            # creates a local Job Manager if there is no persistent
            # memory between processes. Otherwise, JobManager will
            # be initiated separately
            JobManager(self.jobQueue).start()

        logging.basicConfig(
            filename=Config.LOGFILE,
            format="%(levelname)s|%(asctime)s|%(name)s|%(message)s",
            level=Config.LOGLEVEL,
        )
        self.start_time = time.time()
        self.log = logging.getLogger("TangoServer")
        self.log.info("Starting Tango server")
Ejemplo n.º 3
0
Archivo: tango.py Proyecto: cg2v/Tango
    def __init__(self):
        self.daemon = True
        
        vmms = None
        if Config.VMMS_NAME == "tashiSSH":
            from vmms.tashiSSH import TashiSSH
            vmms = TashiSSH()
        elif Config.VMMS_NAME == "ec2SSH":
            from vmms.ec2SSH import Ec2SSH
            vmms = Ec2SSH()
        elif Config.VMMS_NAME == "localDocker":
            from vmms.localDocker import LocalDocker
            vmms = LocalDocker()
        elif Config.VMMS_NAME == "distDocker":
            from vmms.distDocker import DistDocker
            vmms = DistDocker()

        self.preallocator = Preallocator({Config.VMMS_NAME: vmms})
        self.jobQueue = JobQueue(self.preallocator)
        if not Config.USE_REDIS:
            # creates a local Job Manager if there is no persistent
            # memory between processes. Otherwise, JobManager will
            # be initiated separately
            JobManager(self.jobQueue).start()
        
        logging.basicConfig(
            filename=Config.LOGFILE,
            format="%(levelname)s|%(asctime)s|%(name)s|%(message)s",
            level=Config.LOGLEVEL,
        )
        self.start_time = time.time()
        self.log = logging.getLogger("TangoServer")
        self.log.info("Starting Tango server")
 def setUp(self):
     self.app = www.app.test_client()
     try:
         os.remove(quePath)
     except:
         pass
     self.jobQueue = JobQueue(quePath)
Ejemplo n.º 5
0
    def setUp(self):

        if Config.USE_REDIS:
            __db = redis.StrictRedis(
                Config.REDIS_HOSTNAME, Config.REDIS_PORT, db=0)
            __db.flushall()

        self.job1 = TangoJob(
            name="sample_job_1",
            vm="ilter.img",
            outputFile="sample_job_1_output",
            input=[],
            timeout=30,
            notifyURL="notifyMeUrl",
            maxOutputFileSize=4096)

        self.job2 = TangoJob(
            name="sample_job_2",
            vm="ilter.img",
            outputFile="sample_job_2_output",
            input=[],
            timeout=30,
            notifyURL="notifyMeUrl",
            maxOutputFileSize=4096)

        self.jobQueue = JobQueue(None)
        self.jobQueue.reset()
        self.jobId1 = self.jobQueue.add(self.job1)
        self.jobId2 = self.jobQueue.add(self.job2)
Ejemplo n.º 6
0
 def setUp(self):
     try:
         os.remove(quePath)
     except:
         pass
     self.que = JobQueue(quePath)
     self.jobProcess = JobProcess(quePath)
Ejemplo n.º 7
0
 def setUp(s):
     try:
         #os.remove(quePath)
         os.remove(serverQueuePath)
     except:
         pass
     www.app.config['UNIT_TEST'] = True
     s.app = www.app.test_client()
     s.que = JobQueue(quePath)
Ejemplo n.º 8
0
def getStatus(id, queuePath):

    # Retrieve the status and result of the given job ID.
    # @param id: the job ID
    # @param queuePath: the job queue path
    # @returns: a dict of the form: {'status': <status>, 'result': <dict>}
    #           where result is None if the job is not found;
    #           only Success and Error may have an optional result; if
    #           there is no result, no result property is returned
    statusResult = JobQueue(queuePath).getStatus(id)
    if statusResult == None:
        raise ErrorResp('unknown job ID of: ' + str(id))
    return statusResult
Ejemplo n.º 9
0
    def __init__(self):

        logging.basicConfig(
                filename = self.LOGFILE,
                format = "%(levelname)s|%(asctime)s|%(name)s|%(message)s",
                level = Config.LOGLEVEL
                )

        vmms = None

        if Config.VMMS_NAME == "localSSH":
            from vmms.localSSH import LocalSSH
            vmms = LocalSSH()
        elif Config.VMMS_NAME == "tashiSSH":
            from vmms.tashiSSH import TashiSSH
            vmms = TashiSSH()
        elif Config.VMMS_NAME == "ec2SSH":
            from vmms.ec2SSH import Ec2SSH
            vmms = Ec2SSH()
        elif Config.VMMS_NAME == "localDocker":
            from vmms.localDocker import LocalDocker
            vmms = LocalDocker()
            

        self.vmms = {Config.VMMS_NAME: vmms}
        self.preallocator = Preallocator(self.vmms)
        self.queue = JobQueue(self.preallocator)

        if not Config.USE_REDIS:
            # creates a local Job Manager if there is no persistent
            # memory between processes. Otherwise, JobManager will
            # be initiated separately
            JobManager(self.queue, self.vmms, self.preallocator)

        self.tango = TangoServer(self.queue, self.preallocator, self.vmms)

        logging.basicConfig(
            filename=self.LOGFILE,
            format="%(levelname)s|%(asctime)s|%(name)s|%(message)s",
            level=Config.LOGLEVEL
        )

        logging.getLogger('boto').setLevel(logging.INFO)
        self.log = logging.getLogger("TangoREST")
        self.log.info("Starting RESTful Tango server")
        self.status = Status()
Ejemplo n.º 10
0
def add(email, operation, parms, ctx):

    # Add a job to the tail end of the job queue.
    # @param         email: email/username requesting the job
    # @param    operation: job operation to run; the python module that
    #                      contains the calcMain() function should be in the
    #                      file, <operation>_www.py
    # @param        parms: parameters as a python dict to be passed to
    #                      <operation>_www.py.calcMain()
    # @params         ctx: the job context holding information for the postCalc
    # @returns: (jobId, status)

    # Extract any doNotEmail flag.
    doNotEmail = parms.pop('doNotEmail', None)

    if 'map' in parms:
        ctx.map = parms['map']
    elif 'mapId' in parms:
        ctx.map = parms['mapId']

    if email != None:
        ctx.email = email

    packedTask = _packTask(operation, parms, ctx)
    queuePath = ctx.app.jobQueuePath
    jobId = JobQueue(queuePath).add(id, packedTask, email, doNotEmail)

    # Get the status of the job just added to the queue.
    result = getStatus(jobId, queuePath)

    # Run the job now.
    if not ctx.app.unitTest:
        _runNow(jobId, ctx.app.jobProcessPath, queuePath)

    # Return the id and status.
    return {
        'status': 'InJobQueue',
        'jobId': jobId,
        'jobStatusUrl': ctx.app.jobStatusUrl + str(jobId),
    }
Ejemplo n.º 11
0
def main(args):
    queuePath = args[0]
    id = int(args[1])

    # TODO these should be wrapped with a try-except because any errors here
    # will not be reported in the server log.
    jobProcess = JobProcess(queuePath)
    operation, parms, ctx = jobProcess.unpackTask(jobProcess.queue.getTask(id))

    try:
        status, result = jobProcess.run(id, operation, parms, ctx)
    except Exception as e:
        status = 'Error'
        result = _formatError(str(e), traceback.format_exc(100), operation,
                              parms)
    except:
        status = 'Error'
        result = _formatError(None, traceback.format_exc(100), operation,
                              parms)

    # Set the completion status.
    JobQueue(queuePath).setResult(id, status, result, ctx, operation)
Ejemplo n.º 12
0
class TangoServer:
    """ TangoServer - Implements the API functions that the server accepts
    """
    def __init__(self):
        self.daemon = True

        vmms = None
        if Config.VMMS_NAME == "tashiSSH":
            from vmms.tashiSSH import TashiSSH
            vmms = TashiSSH()
        elif Config.VMMS_NAME == "ec2SSH":
            from vmms.ec2SSH import Ec2SSH
            vmms = Ec2SSH()
        elif Config.VMMS_NAME == "localDocker":
            from vmms.localDocker import LocalDocker
            vmms = LocalDocker()
        elif Config.VMMS_NAME == "distDocker":
            from vmms.distDocker import DistDocker
            vmms = DistDocker()

        self.preallocator = Preallocator({Config.VMMS_NAME: vmms})
        self.jobQueue = JobQueue(self.preallocator)
        if not Config.USE_REDIS:
            # creates a local Job Manager if there is no persistent
            # memory between processes. Otherwise, JobManager will
            # be initiated separately
            JobManager(self.jobQueue).start()

        logging.basicConfig(
            filename=Config.LOGFILE,
            format="%(levelname)s|%(asctime)s|%(name)s|%(message)s",
            level=Config.LOGLEVEL,
        )
        self.start_time = time.time()
        self.log = logging.getLogger("TangoServer")
        self.log.info("Starting Tango server")

    def addJob(self, job):
        """ addJob - Add a job to the job queue
        """
        Config.job_requests += 1
        self.log.debug("Received addJob request")
        ret = self.__validateJob(job, self.preallocator.vmms)
        self.log.info("Done validating job %s" % (job.name))
        if ret == 0:
            return self.jobQueue.add(job)
        else:
            self.jobQueue.addDead(job)
            return -1

    def delJob(self, id, deadjob):
        """ delJob - Delete a job
        @param id: Id of job to delete
        @param deadjob - If 0, move the job from the live queue to the
        dead queue. If non-zero, remove the job from the dead queue
        and discard. Use with caution!
        """
        self.log.debug("Received delJob(%d, %d) request" % (id, deadjob))
        return self.jobQueue.delJob(id, deadjob)

    def getJobs(self, item):
        """ getJobs - Return the list of live jobs (item == 0) or the
        list of dead jobs (item == -1).
        """
        try:
            self.log.debug("Received getJobs(%s) request" % (item))

            if item == -1:  # return the list of dead jobs
                return self.jobQueue.deadJobs.values()

            elif item == 0:  # return the list of live jobs
                return self.jobQueue.liveJobs.values()

            else:  # invalid parameter
                return []
        except Exception as e:
            self.log.debug("getJobs: %s" % str(e))

    def preallocVM(self, vm, num):
        """ preallocVM - Set the pool size for VMs of type vm to num
        """
        self.log.debug("Received preallocVM(%s,%d)request" % (vm.name, num))
        try:
            vmms = self.preallocator.vmms[vm.vmms]
            if not vm or num < 0:
                return -2
            if vm.image not in vmms.getImages():
                self.log.error("Invalid image name")
                return -3
            (name, ext) = os.path.splitext(vm.image)
            vm.name = name
            self.preallocator.update(vm, num)
            return 0
        except Exception as err:
            self.log.error("preallocVM failed: %s" % err)
            return -1

    def getVMs(self, vmms_name):
        """ getVMs - return the list of VMs managed by the service vmms_name
        """
        self.log.debug("Received getVMs request(%s)" % vmms_name)
        try:
            if vmms_name in self.preallocator.vmms:
                vmms_inst = self.preallocator.vmms[vmms_name]
                return vmms_inst.getVMs()
            else:
                return []
        except Exception as err:
            self.log.error("getVMs request failed: %s" % err)
            return []

    def delVM(self, vmName, id):
        """ delVM - delete a specific VM instance from a pool
        """
        self.log.debug("Received delVM request(%s, %d)" % (vmName, id))
        try:
            if not vmName or vmName == "" or not id:
                return -1
            return self.preallocator.destroyVM(vmName, id)
        except Exception as err:
            self.log.error("delVM request failed: %s" % err)
            return -1

    def getPool(self, vmName):
        """ getPool - Return the current members of a pool and its free list
        """
        self.log.debug("Received getPool request(%s)" % (vmName))
        try:
            if not vmName or vmName == "":
                return []
            result = self.preallocator.getPool(vmName)
            return [
                "pool_size=%d" % len(result["pool"]),
                "free_size=%d" % len(result["free"]),
                "pool=%s" % result["pool"],
                "free=%s" % result["free"]
            ]

        except Exception as err:
            self.log.error("getPool request failed: %s" % err)
            return []

    def getInfo(self):
        """ getInfo - return various statistics about the Tango daemon
        """
        stats = {}
        stats['elapsed_secs'] = time.time() - self.start_time
        stats['job_requests'] = Config.job_requests
        stats['job_retries'] = Config.job_retries
        stats['waitvm_timeouts'] = Config.waitvm_timeouts
        stats['runjob_timeouts'] = Config.runjob_timeouts
        stats['copyin_errors'] = Config.copyin_errors
        stats['runjob_errors'] = Config.runjob_errors
        stats['copyout_errors'] = Config.copyout_errors
        stats['num_threads'] = threading.activeCount()

        return stats

    #
    # Helper functions
    #
    def resetTango(self, vmms):
        """ resetTango - resets Tango to a clean predictable state and
        ensures that it has a working virtualization environment. A side
        effect is that also checks that each supported VMMS is actually
        running.
        """
        self.log.debug("Received resetTango request.")

        try:
            # For each supported VMM system, get the instances it knows about,
            # and kill those in the current Tango name space.
            for vmms_name in vmms:
                vobj = vmms[vmms_name]
                vms = vobj.getVMs()
                self.log.debug("Pre-existing VMs: %s" %
                               [vm.name for vm in vms])
                namelist = []
                for vm in vms:
                    if re.match("%s-" % Config.PREFIX, vm.name):
                        vobj.destroyVM(vm)
                        # Need a consistent abstraction for a vm between
                        # interfaces
                        namelist.append(vm.name)
                if namelist:
                    self.log.warning("Killed these %s VMs on restart: %s" %
                                     (vmms_name, namelist))

            for _, job in self.jobQueue.liveJobs.iteritems():
                if not job.isNotAssigned():
                    job.makeUnassigned()
                self.log.debug("job: %s, assigned: %s" %
                               (str(job.name), str(job.assigned)))
        except Exception as err:
            self.log.error("resetTango: Call to VMMS %s failed: %s" %
                           (vmms_name, err))
            os._exit(1)

    def __validateJob(self, job, vmms):
        """ validateJob - validate the input arguments in an addJob request.
        """
        errors = 0

        # If this isn't a Tango job then bail with an error
        if (not isinstance(job, TangoJob)):
            return -1

        # Every job must have a name
        if not job.name:
            self.log.error("validateJob: Missing job.name")
            job.appendTrace("%s|validateJob: Missing job.name" %
                            (datetime.utcnow().ctime()))
            errors += 1

        # Check the virtual machine field
        if not job.vm:
            self.log.error("validateJob: Missing job.vm")
            job.appendTrace("%s|validateJob: Missing job.vm" %
                            (datetime.utcnow().ctime()))
            errors += 1
        else:
            if not job.vm.image:
                self.log.error("validateJob: Missing job.vm.image")
                job.appendTrace("%s|validateJob: Missing job.vm.image" %
                                (datetime.utcnow().ctime()))
                errors += 1
            else:
                vobj = vmms[Config.VMMS_NAME]
                imgList = vobj.getImages()
                if job.vm.image not in imgList:
                    self.log.error("validateJob: Image not found: %s" %
                                   job.vm.image)
                    job.appendTrace("%s|validateJob: Image not found: %s" %
                                    (datetime.utcnow().ctime(), job.vm.image))
                    job.appendTrace("%s|validateJob: Images available: %s" %
                                    (datetime.utcnow().ctime(), imgList))
                    errors += 1
                else:
                    (name, ext) = os.path.splitext(job.vm.image)
                    job.vm.name = name

            if not job.vm.vmms:
                self.log.error("validateJob: Missing job.vm.vmms")
                job.appendTrace("%s|validateJob: Missing job.vm.vmms" %
                                (datetime.utcnow().ctime()))
                errors += 1
            else:
                if job.vm.vmms not in vmms:
                    self.log.error("validateJob: Invalid vmms name: %s" %
                                   job.vm.vmms)
                    job.appendTrace("%s|validateJob: Invalid vmms name: %s" %
                                    (datetime.utcnow().ctime(), job.vm.vmms))
                    errors += 1

        # Check the output file
        if not job.outputFile:
            self.log.error("validateJob: Missing job.outputFile")
            job.appendTrace("%s|validateJob: Missing job.outputFile" %
                            (datetime.utcnow().ctime()))
            errors += 1
        else:
            if not os.path.exists(os.path.dirname(job.outputFile)):
                self.log.error("validateJob: Bad output path: %s",
                               job.outputFile)
                job.appendTrace("%s|validateJob: Bad output path: %s" %
                                (datetime.utcnow().ctime(), job.outputFile))
                errors += 1

        # Check for max output file size parameter
        if not job.maxOutputFileSize:
            self.log.debug(
                "validateJob: Setting job.maxOutputFileSize "
                "to default value: %d bytes", Config.MAX_OUTPUT_FILE_SIZE)
            job.maxOutputFileSize = Config.MAX_OUTPUT_FILE_SIZE

        # Check the list of input files
        hasMakefile = False
        for inputFile in job.input:
            if not inputFile.localFile:
                self.log.error("validateJob: Missing inputFile.localFile")
                job.appendTrace("%s|validateJob: Missing inputFile.localFile" %
                                (datetime.utcnow().ctime()))
                errors += 1
            else:
                if not os.path.exists(os.path.dirname(job.outputFile)):
                    self.log.error("validateJob: Bad output path: %s",
                                   job.outputFile)
                    job.appendTrace(
                        "%s|validateJob: Bad output path: %s" %
                        (datetime.utcnow().ctime(), job.outputFile))
                    errors += 1

            if inputFile.destFile == 'Makefile':
                hasMakefile = True

        # Check if input files include a Makefile
        if not hasMakefile:
            self.log.error("validateJob: Missing Makefile in input files.")
            job.appendTrace(
                "%s|validateJob: Missing Makefile in input files." %
                (datetime.utcnow().ctime()))
            errors += 1

        # Check if job timeout has been set; If not set timeout to default
        if not job.timeout or job.timeout <= 0:
            self.log.debug(
                "validateJob: Setting job.timeout to"
                " default config value: %d secs", Config.RUNJOB_TIMEOUT)
            job.timeout = Config.RUNJOB_TIMEOUT

        # Any problems, return an error status
        if errors > 0:
            self.log.error("validateJob: Job rejected: %d errors" % errors)
            job.appendTrace("%s|validateJob: Job rejected: %d errors" %
                            (datetime.utcnow().ctime(), errors))
            return -1
        else:
            return 0
Ejemplo n.º 13
0
Archivo: tango.py Proyecto: cg2v/Tango
class TangoServer:

    """ TangoServer - Implements the API functions that the server accepts
    """

    def __init__(self):
        self.daemon = True
        
        vmms = None
        if Config.VMMS_NAME == "tashiSSH":
            from vmms.tashiSSH import TashiSSH
            vmms = TashiSSH()
        elif Config.VMMS_NAME == "ec2SSH":
            from vmms.ec2SSH import Ec2SSH
            vmms = Ec2SSH()
        elif Config.VMMS_NAME == "localDocker":
            from vmms.localDocker import LocalDocker
            vmms = LocalDocker()
        elif Config.VMMS_NAME == "distDocker":
            from vmms.distDocker import DistDocker
            vmms = DistDocker()

        self.preallocator = Preallocator({Config.VMMS_NAME: vmms})
        self.jobQueue = JobQueue(self.preallocator)
        if not Config.USE_REDIS:
            # creates a local Job Manager if there is no persistent
            # memory between processes. Otherwise, JobManager will
            # be initiated separately
            JobManager(self.jobQueue).start()
        
        logging.basicConfig(
            filename=Config.LOGFILE,
            format="%(levelname)s|%(asctime)s|%(name)s|%(message)s",
            level=Config.LOGLEVEL,
        )
        self.start_time = time.time()
        self.log = logging.getLogger("TangoServer")
        self.log.info("Starting Tango server")

    def addJob(self, job):
        """ addJob - Add a job to the job queue
        """
        Config.job_requests += 1
        self.log.debug("Received addJob request")
        ret = self.__validateJob(job, self.preallocator.vmms)
        self.log.info("Done validating job %s" % (job.name))
        if ret == 0:
            return self.jobQueue.add(job)
        else:
            self.jobQueue.addDead(job)
            return -1

    def delJob(self, id, deadjob):
        """ delJob - Delete a job
        @param id: Id of job to delete
        @param deadjob - If 0, move the job from the live queue to the
        dead queue. If non-zero, remove the job from the dead queue
        and discard. Use with caution!
        """
        self.log.debug("Received delJob(%d, %d) request" % (id, deadjob))
        return self.jobQueue.delJob(id, deadjob)

    def getJobs(self, item):
        """ getJobs - Return the list of live jobs (item == 0) or the
        list of dead jobs (item == -1).
        """
        try:
            self.log.debug("Received getJobs(%s) request" % (item))

            if item == -1:  # return the list of dead jobs
                return self.jobQueue.deadJobs.values()

            elif item == 0:  # return the list of live jobs
                return self.jobQueue.liveJobs.values()

            else:  # invalid parameter
                return []
        except Exception as e:
            self.log.debug("getJobs: %s" % str(e))

    def preallocVM(self, vm, num):
        """ preallocVM - Set the pool size for VMs of type vm to num
        """
        self.log.debug("Received preallocVM(%s,%d)request"
                       % (vm.name, num))
        try:
            vmms = self.preallocator.vmms[vm.vmms]
            if not vm or num < 0:
                return -2
            if vm.image not in vmms.getImages():
                self.log.error("Invalid image name")
                return -3
            (name, ext) = os.path.splitext(vm.image)
            vm.name = name
            self.preallocator.update(vm, num)
            return 0
        except Exception as err:
            self.log.error("preallocVM failed: %s" % err)
            return -1

    def getVMs(self, vmms_name):
        """ getVMs - return the list of VMs managed by the service vmms_name
        """
        self.log.debug("Received getVMs request(%s)" % vmms_name)
        try:
            if vmms_name in self.preallocator.vmms:
                vmms_inst = self.preallocator.vmms[vmms_name]
                return vmms_inst.getVMs()
            else:
                return []
        except Exception as err:
            self.log.error("getVMs request failed: %s" % err)
            return []

    def delVM(self, vmName, id):
        """ delVM - delete a specific VM instance from a pool
        """
        self.log.debug("Received delVM request(%s, %d)" % (vmName, id))
        try:
            if not vmName or vmName == "" or not id:
                return -1
            return self.preallocator.destroyVM(vmName, id)
        except Exception as err:
            self.log.error("delVM request failed: %s" % err)
            return -1

    def getPool(self, vmName):
        """ getPool - Return the current members of a pool and its free list
        """
        self.log.debug("Received getPool request(%s)" % (vmName))
        try:
            if not vmName or vmName == "":
                return []
            result = self.preallocator.getPool(vmName)
            return ["pool_size=%d" % len(result["pool"]),
                    "free_size=%d" % len(result["free"]),
                    "pool=%s" % result["pool"],
                    "free=%s" % result["free"]]

        except Exception as err:
            self.log.error("getPool request failed: %s" % err)
            return []

    def getInfo(self):
        """ getInfo - return various statistics about the Tango daemon
        """
        stats = {}
        stats['elapsed_secs'] = time.time() - self.start_time;
        stats['job_requests'] = Config.job_requests
        stats['job_retries'] = Config.job_retries
        stats['waitvm_timeouts'] = Config.waitvm_timeouts
        stats['runjob_timeouts'] = Config.runjob_timeouts
        stats['copyin_errors'] = Config.copyin_errors
        stats['runjob_errors'] = Config.runjob_errors
        stats['copyout_errors'] = Config.copyout_errors
        stats['num_threads'] = threading.activeCount()
        
        return stats

    #
    # Helper functions
    #
    def resetTango(self, vmms):
        """ resetTango - resets Tango to a clean predictable state and
        ensures that it has a working virtualization environment. A side
        effect is that also checks that each supported VMMS is actually
        running.
        """
        self.log.debug("Received resetTango request.")

        try:
            # For each supported VMM system, get the instances it knows about,
            # and kill those in the current Tango name space.
            for vmms_name in vmms:
                vobj = vmms[vmms_name]
                vms = vobj.getVMs()
                self.log.debug("Pre-existing VMs: %s" % [vm.name for vm in vms])
                namelist = []
                for vm in vms:
                    if re.match("%s-" % Config.PREFIX, vm.name):
                        vobj.destroyVM(vm)
                        # Need a consistent abstraction for a vm between
                        # interfaces
                        namelist.append(vm.name)
                if namelist:
                    self.log.warning("Killed these %s VMs on restart: %s" %
                                (vmms_name, namelist))

            for _, job in self.jobQueue.liveJobs.iteritems():
                if not job.isNotAssigned():
                    job.makeUnassigned()
                self.log.debug("job: %s, assigned: %s" %
                               (str(job.name), str(job.assigned)))
        except Exception as err:
            self.log.error("resetTango: Call to VMMS %s failed: %s" %
                      (vmms_name, err))
            os._exit(1)


    def __validateJob(self, job, vmms):
        """ validateJob - validate the input arguments in an addJob request.
        """
        errors = 0

        # If this isn't a Tango job then bail with an error
        if (not isinstance(job, TangoJob)):
            return -1

        # Every job must have a name
        if not job.name:
            self.log.error("validateJob: Missing job.name")
            job.appendTrace("%s|validateJob: Missing job.name" %
                            (datetime.utcnow().ctime()))
            errors += 1

        # Check the virtual machine field
        if not job.vm:
            self.log.error("validateJob: Missing job.vm")
            job.appendTrace("%s|validateJob: Missing job.vm" %
                            (datetime.utcnow().ctime()))
            errors += 1
        else:
            if not job.vm.image:
                self.log.error("validateJob: Missing job.vm.image")
                job.appendTrace("%s|validateJob: Missing job.vm.image" %
                                (datetime.utcnow().ctime()))
                errors += 1
            else:
                vobj = vmms[Config.VMMS_NAME]
                imgList = vobj.getImages()
                if job.vm.image not in imgList:
                    self.log.error("validateJob: Image not found: %s" %
                              job.vm.image)
                    job.appendTrace("%s|validateJob: Image not found: %s" %
                                    (datetime.utcnow().ctime(), job.vm.image))
                    errors += 1
                else:
                    (name, ext) = os.path.splitext(job.vm.image)
                    job.vm.name = name

            if not job.vm.vmms:
                self.log.error("validateJob: Missing job.vm.vmms")
                job.appendTrace("%s|validateJob: Missing job.vm.vmms" %
                                (datetime.utcnow().ctime()))
                errors += 1
            else:
                if job.vm.vmms not in vmms:
                    self.log.error("validateJob: Invalid vmms name: %s" % job.vm.vmms)
                    job.appendTrace("%s|validateJob: Invalid vmms name: %s" %
                                    (datetime.utcnow().ctime(), job.vm.vmms))
                    errors += 1

        # Check the output file
        if not job.outputFile:
            self.log.error("validateJob: Missing job.outputFile")
            job.appendTrace("%s|validateJob: Missing job.outputFile" % (datetime.utcnow().ctime()))           
            errors += 1
        else:
            if not os.path.exists(os.path.dirname(job.outputFile)):
                self.log.error("validateJob: Bad output path: %s", job.outputFile)
                job.appendTrace("%s|validateJob: Bad output path: %s" %
                                (datetime.utcnow().ctime(), job.outputFile))
                errors += 1

        # Check for max output file size parameter
        if not job.maxOutputFileSize:
            self.log.debug("validateJob: Setting job.maxOutputFileSize "
                      "to default value: %d bytes", Config.MAX_OUTPUT_FILE_SIZE)
            job.maxOutputFileSize = Config.MAX_OUTPUT_FILE_SIZE

        # Check the list of input files
        hasMakefile = False
        for inputFile in job.input:
            if not inputFile.localFile:
                self.log.error("validateJob: Missing inputFile.localFile")
                job.appendTrace("%s|validateJob: Missing inputFile.localFile" %
                            (datetime.utcnow().ctime()))
                errors += 1
            else:
                if not os.path.exists(os.path.dirname(job.outputFile)):
                    self.log.error("validateJob: Bad output path: %s", job.outputFile)
                    job.appendTrace("%s|validateJob: Bad output path: %s" %
                                    (datetime.utcnow().ctime(), job.outputFile))
                    errors += 1

            if inputFile.destFile == 'Makefile':
                hasMakefile = True

        # Check if input files include a Makefile
        if not hasMakefile:
            self.log.error("validateJob: Missing Makefile in input files.")
            job.appendTrace("%s|validateJob: Missing Makefile in input files." % (datetime.utcnow().ctime()))
            errors+=1    

        # Check if job timeout has been set; If not set timeout to default
        if not job.timeout or job.timeout <= 0:
            self.log.debug("validateJob: Setting job.timeout to"
                      " default config value: %d secs", Config.RUNJOB_TIMEOUT)
            job.timeout = Config.RUNJOB_TIMEOUT

        # Any problems, return an error status
        if errors > 0:
            self.log.error("validateJob: Job rejected: %d errors" % errors)
            job.appendTrace("%s|validateJob: Job rejected: %d errors" %
                                (datetime.utcnow().ctime(), errors))
            return -1
        else:
            return 0
Ejemplo n.º 14
0
class TestJobQueue(unittest.TestCase):
    def setUp(self):

        if Config.USE_REDIS:
            __db = redis.StrictRedis(Config.REDIS_HOSTNAME,
                                     Config.REDIS_PORT,
                                     db=0)
            __db.flushall()

        self.job1 = TangoJob(
            name="sample_job_1",
            vm="ilter.img",
            outputFile="sample_job_1_output",
            input=[],
            timeout=30,
            notifyURL="notifyMeUrl",
            maxOutputFileSize=4096,
        )

        self.job2 = TangoJob(
            name="sample_job_2",
            vm="ilter.img",
            outputFile="sample_job_2_output",
            input=[],
            timeout=30,
            notifyURL="notifyMeUrl",
            maxOutputFileSize=4096,
        )

        self.jobQueue = JobQueue(None)
        self.jobQueue.reset()
        self.jobId1 = self.jobQueue.add(self.job1)
        self.jobId2 = self.jobQueue.add(self.job2)

    def test_sharedInt(self):
        if Config.USE_REDIS:
            num1 = TangoIntValue("nextID", 1000)
            num2 = TangoIntValue("nextID", 3000)
            self.assertEqual(num1.get(), 1000)
            self.assertEqual(num1.get(), num2.get())
        else:
            return

    def test_job(self):
        self.job1.makeUnassigned()
        self.assertTrue(self.job1.isNotAssigned())

        job = self.jobQueue.get(self.jobId1)
        self.assertTrue(job.isNotAssigned())

        self.job1.makeAssigned()
        print("Checkout:")
        self.assertFalse(self.job1.isNotAssigned())
        self.assertFalse(job.isNotAssigned())

    def test_add(self):
        info = self.jobQueue.getInfo()
        self.assertEqual(info["size"], 2)

    def test_addToUnassigned(self):
        info = self.jobQueue.getInfo()
        self.assertEqual(info["size_unassignedjobs"], 2)

    def test_addDead(self):
        return self.assertEqual(1, 1)

    def test_delJob(self):
        self.jobQueue.delJob(self.jobId1, 0)
        info = self.jobQueue.getInfo()
        self.assertEqual(info["size"], 1)
        self.assertEqual(info["size_deadjobs"], 1)
        self.assertEqual(info["size_unassignedjobs"], 1)

        self.jobQueue.delJob(self.jobId1, 1)
        info = self.jobQueue.getInfo()
        self.assertEqual(info["size_deadjobs"], 0)
        self.assertEqual(info["size"], 1)
        self.assertEqual(info["size_unassignedjobs"], 1)

        return False

    def test_get(self):
        ret_job_1 = self.jobQueue.get(self.jobId1)
        self.assertEqual(str(ret_job_1.id), self.jobId1)

        ret_job_2 = self.jobQueue.get(self.jobId2)
        self.assertEqual(str(ret_job_2.id), self.jobId2)

    def test_getNextPendingJob(self):
        self.jobQueue.assignJob(self.jobId2)
        # job 2 should have been removed from unassigned queue
        info = self.jobQueue.getInfo()
        self.assertEqual(info["size_unassignedjobs"], 1)
        self.jobQueue.assignJob(self.jobId1)
        info = self.jobQueue.getInfo()
        self.assertEqual(info["size_unassignedjobs"], 0)
        self.jobQueue.unassignJob(self.jobId1)
        info = self.jobQueue.getInfo()
        self.assertEqual(info["size_unassignedjobs"], 1)
        job = self.jobQueue.getNextPendingJob()
        self.assertMultiLineEqual(str(job.id), self.jobId1)

    def test_getNextPendingJob2(self):
        job = self.jobQueue.getNextPendingJob()
        self.assertMultiLineEqual(str(job.id), self.jobId1)
        job = self.jobQueue.getNextPendingJob()
        self.assertMultiLineEqual(str(job.id), self.jobId2)

    def test_assignJob(self):
        self.jobQueue.assignJob(self.jobId1)
        job = self.jobQueue.get(self.jobId1)
        self.assertFalse(job.isNotAssigned())

    def test_unassignJob(self):
        self.jobQueue.assignJob(self.jobId1)
        job = self.jobQueue.get(self.jobId1)
        self.assertTrue(job.assigned)

        self.jobQueue.unassignJob(self.jobId1)
        job = self.jobQueue.get(self.jobId1)
        return self.assertEqual(job.assigned, False)

    def test_makeDead(self):
        info = self.jobQueue.getInfo()
        self.assertEqual(info["size_deadjobs"], 0)
        self.assertEqual(info["size_unassignedjobs"], 2)
        self.jobQueue.makeDead(self.jobId1, "test")
        info = self.jobQueue.getInfo()
        self.assertEqual(info["size_deadjobs"], 1)
        self.assertEqual(info["size_unassignedjobs"], 1)

    def test__getNextID(self):
        init_id = self.jobQueue.nextID
        for i in range(1, Config.MAX_JOBID + 100):
            id = self.jobQueue._getNextID()
            self.assertNotEqual(str(id), self.jobId1)
        self.jobQueue.nextID = init_id
Ejemplo n.º 15
0
class TangoServer:
    """ TangoServer - Implements the API functions that the server accepts
    """
    def __init__(self):
        self.daemon = True

        # init logging early, or some logging will be lost
        logging.basicConfig(
            filename=Config.LOGFILE,
            format="%(levelname)s|%(asctime)s|%(name)s|%(message)s",
            level=Config.LOGLEVEL,
        )

        vmms = None
        if Config.VMMS_NAME == "tashiSSH":
            from vmms.tashiSSH import TashiSSH
            vmms = TashiSSH()
        elif Config.VMMS_NAME == "ec2SSH":
            from vmms.ec2SSH import Ec2SSH
            vmms = Ec2SSH()
        elif Config.VMMS_NAME == "localDocker":
            from vmms.localDocker import LocalDocker
            vmms = LocalDocker()
        elif Config.VMMS_NAME == "distDocker":
            from vmms.distDocker import DistDocker
            vmms = DistDocker()

        self.preallocator = Preallocator({Config.VMMS_NAME: vmms})
        self.jobQueue = JobQueue(self.preallocator)
        if not Config.USE_REDIS:
            # creates a local Job Manager if there is no persistent
            # memory between processes. Otherwise, JobManager will
            # be initiated separately
            JobManager(self.jobQueue).start()

        self.start_time = time.time()
        self.log = logging.getLogger("TangoServer")
        self.log.info("Starting Tango server")

    def addJob(self, job):
        """ addJob - Add a job to the job queue
        """
        Config.job_requests += 1
        self.log.debug("Received addJob request")
        ret = self.__validateJob(job, self.preallocator.vmms)
        self.log.info("Done validating job %s" % (job.name))
        if ret == 0:
            return self.jobQueue.add(job)
        else:
            self.jobQueue.addDead(job)
            return -1

    def delJob(self, id, deadjob):
        """ delJob - Delete a job
        @param id: Id of job to delete
        @param deadjob - If 0, move the job from the live queue to the
        dead queue. If non-zero, remove the job from the dead queue
        and discard. Use with caution!
        """
        self.log.debug("Received delJob(%d, %d) request" % (id, deadjob))
        return self.jobQueue.delJob(id, deadjob)

    def cancelJobWithPath(self, outFilePath):
        """ cancelJobWithPath - when this function returns, one of the following
        is true:
          1. The job with the specified output file does not exist
          2. the job with the specified output file has finished running normally
          3. The job with the specified output file has been cancelled
          4. The job was found, and it's running, but cancellation failed.
        In case 1, NOT_FOUND is returned.
                2, ALREADY_COMPLETED is returned.
                3, SUCCEEDED is returned.
                4, FAILED is returned.
        """
        self.log.debug("Received cancelJobWithPath(%s) request" %
                       (outFilePath))

        id, job, job_status = self.jobQueue.findRemovingWaiting(outFilePath)
        self.log.debug("cancelJobWithPath: Found a job %s with status %s" %
                       (job, job_status))

        if job_status == JobQueue.JobStatus.NOT_FOUND:
            return CancellationStatus.NOT_FOUND
        elif job_status == JobQueue.JobStatus.DEAD:
            return CancellationStatus.ALREADY_COMPLETED
        elif job_status == JobQueue.JobStatus.RUNNING:
            return self.killUntilJobComplete(id, job)
        else:
            assert job_status == JobQueue.JobStatus.WAITING
            # In this case, findRemovingLive has moved the live job to the dead
            # queue, and we have nothing to worry about.
            # Let's notify autolab that the job is done.
            if job.notifyURL:
                outputFileName = job.outputFile.split("/")[
                    -1]  # get filename from path
                files = {
                    'file': unicode('Job was cancelled before it started.')
                }
                hdrs = {'Filename': outputFileName}
                self.log.debug("Sending request to %s" % job.notifyURL)

                def worker():
                    requests.post(job.notifyURL,
                                  files=files,
                                  headers=hdrs,
                                  data={'runningTimeSeconds': 0},
                                  verify=False)

                threading.Thread(target=worker).start()
            return CancellationStatus.SUCCEEDED

    def killUntilJobComplete(self, id, job):
        """ Here's the contract:
        If the job is currently running (i.e. it could complete at some point
        in the future), then this method will return only when the job is
        complete. It tries to help by repeatedly `pkill`ing the process. But
        a compliant implementation could just block until the job completes
        on its own.

        On success, returns SUCCEEDED;
        on failure, return FAILED (compliant w above method)
        """
        self.log.debug("Received killUntilJobComplete request")

        vm = job.vm
        for _ in xrange(0, Config.CANCEL_RETRIES):
            # Returns 0 on success.
            if self.preallocator.vmms[vm.vmms].kill(vm) == 0:
                return CancellationStatus.SUCCEEDED

        return CancellationStatus.FAILED

    def getJobs(self, item):
        """ getJobs - Return the list of live jobs (item == 0) or the
        list of dead jobs (item == -1).

        ^ You gotta be kidding me. Is this an API for number lovers.
        """
        try:
            self.log.debug("Received getJobs(%s) request" % (item))

            if item == -1:  # return the list of dead jobs
                return self.jobQueue.deadJobs.values()

            elif item == 0:  # return the list of live jobs
                return self.jobQueue.liveJobs.values()

            else:  # invalid parameter
                return []
        except Exception as e:
            self.log.debug("getJobs: %s" % str(e))

    def preallocVM(self, vm, num):
        """ preallocVM - Set the pool size for VMs of type vm to num
        """
        self.log.debug("Received preallocVM(%s,%d)request" % (vm.name, num))
        try:
            vmms = self.preallocator.vmms[vm.vmms]
            if not vm or num < 0:
                return -2
            if not vmms.isValidImage(vm.image):
                self.log.error("Invalid image name")
                return -3
            (name, ext) = os.path.splitext(vm.image)
            vm.name = name
            self.preallocator.update(vm, num)
            return 0
        except Exception as err:
            self.log.error("preallocVM failed: %s" % err)
            return -1

    def getVMs(self, vmms_name):
        """ getVMs - return the list of VMs managed by the service vmms_name
        """
        self.log.debug("Received getVMs request(%s)" % vmms_name)
        try:
            if vmms_name in self.preallocator.vmms:
                vmms_inst = self.preallocator.vmms[vmms_name]
                return vmms_inst.getVMs()
            else:
                return []
        except Exception as err:
            self.log.error("getVMs request failed: %s" % err)
            return []

    def delVM(self, vmName, id):
        """ delVM - delete a specific VM instance from a pool
        """
        self.log.debug("Received delVM request(%s, %d)" % (vmName, id))
        try:
            if not vmName or vmName == "" or not id:
                return -1
            return self.preallocator.destroyVM(vmName, id)
        except Exception as err:
            self.log.error("delVM request failed: %s" % err)
            return -1

    def getPool(self, vmName):
        """ getPool - Return the current members of a pool and its free list
        """
        self.log.debug("Received getPool request(%s)" % (vmName))
        try:
            if not vmName or vmName == "":
                return []
            result = self.preallocator.getPool(vmName)
            return [
                "pool_size=%d" % len(result["pool"]),
                "free_size=%d" % len(result["free"]),
                "pool=%s" % result["pool"],
                "free=%s" % result["free"]
            ]

        except Exception as err:
            self.log.error("getPool request failed: %s" % err)
            return []

    def getInfo(self):
        """ getInfo - return various statistics about the Tango daemon
        """
        stats = {}
        stats['elapsed_secs'] = time.time() - self.start_time
        stats['job_requests'] = Config.job_requests
        stats['job_retries'] = Config.job_retries
        stats['waitvm_timeouts'] = Config.waitvm_timeouts
        stats['runjob_timeouts'] = Config.runjob_timeouts
        stats['copyin_errors'] = Config.copyin_errors
        stats['runjob_errors'] = Config.runjob_errors
        stats['copyout_errors'] = Config.copyout_errors
        stats['num_threads'] = threading.activeCount()

        return stats

    def setScaleParams(self, low_water_mark, max_pool_size):
        self.preallocator.low_water_mark.set(low_water_mark)
        self.jobQueue.max_pool_size.set(max_pool_size)
        return 0

    def runningTimeForOutputFile(self, outputFile):
        self.log.debug("Received runningTimeForOutputFile(%s)" % outputFile)
        liveJobTuple = self.jobQueue.liveJobs.getWrapped(outputFile)
        if liveJobTuple:
            (_, liveJob) = liveJobTuple
            self.log.debug(str(liveJob.startTime))
            return liveJob.runningTime()
        return None

    #
    # Helper functions
    #

    # NOTE: This function should be called by ONLY jobManager.  The rest servers
    # shouldn't call this function.
    def resetTango(self, vmms):
        """ resetTango - resets Tango to a clean predictable state and
        ensures that it has a working virtualization environment. A side
        effect is that also checks that each supported VMMS is actually
        running.
        """

        # There are two cases this function is called: 1. Tango has a fresh start.
        # Then we want to destroy all instances in Tango's name space.  2. Job
        # Manager is restarted after a previous crash.  Then we want to destroy
        # the "busy" instances prior to the crash and leave the "free" onces intact.

        self.log.debug("Received resetTango request.")

        try:
            # For each supported VMM system, get the instances it knows about
            # in the current Tango name space and kill those not in free pools.
            for vmms_name in vmms:
                vobj = vmms[vmms_name]

                # Round up all instances in the free pools.
                allFreeVMs = []
                for key in self.preallocator.machines.keys():
                    freePool = self.preallocator.getPool(key)["free"]
                    for vmId in freePool:
                        vmName = vobj.instanceName(vmId, key)
                        allFreeVMs.append(vmName)
                self.log.info("vms in all free pools: %s" % allFreeVMs)

                # For each in Tango's name space, destroy the onces in free pool.
                # AND remove it from Tango's internal bookkeeping.
                vms = vobj.getVMs()
                self.log.debug("Pre-existing VMs: %s" %
                               [vm.name for vm in vms])
                destroyedList = []
                removedList = []
                for vm in vms:
                    if re.match("%s-" % Config.PREFIX, vm.name):

                        # Todo: should have an one-call interface to destroy the
                        # machine AND to keep the interval data consistent.
                        if vm.name not in allFreeVMs:
                            destroyedList.append(vm.name)
                            vobj.destroyVM(vm)

                            # also remove it from "total" set of the pool
                            (prefix, vmId, poolName) = vm.name.split("-")
                            machine = self.preallocator.machines.get(poolName)
                            if not machine:  # the pool may not exist
                                continue

                            if int(vmId) in machine[0]:
                                removedList.append(vm.name)
                                machine[0].remove(int(vmId))
                            self.preallocator.machines.set(poolName, machine)

                if destroyedList:
                    self.log.warning("Killed these %s VMs on restart: %s" %
                                     (vmms_name, destroyedList))
                if removedList:
                    self.log.warning("Removed these %s VMs from their pools" %
                                     (removedList))

            for _, job in self.jobQueue.liveJobs.iteritems():
                if not job.isNotAssigned():
                    job.makeUnassigned()
                self.log.debug("job: %s, assigned: %s" %
                               (str(job.name), str(job.assigned)))
        except Exception as err:
            self.log.error("resetTango: Call to VMMS %s failed: %s" %
                           (vmms_name, err))
            os._exit(1)

    def __validateJob(self, job, vmms):
        """ validateJob - validate the input arguments in an addJob request.
        """
        errors = 0

        # If this isn't a Tango job then bail with an error
        if (not isinstance(job, TangoJob)):
            return -1

        # Every job must have a name
        if not job.name:
            self.log.error("validateJob: Missing job.name")
            job.appendTrace("validateJob: Missing job.name")
            errors += 1

        # Check the virtual machine field
        if not job.vm:
            self.log.error("validateJob: Missing job.vm")
            job.appendTrace("validateJob: Missing job.vm")
            errors += 1
        else:
            if not job.vm.image:
                self.log.error("validateJob: Missing job.vm.image")
                job.appendTrace("validateJob: Missing job.vm.image")
                errors += 1
            else:
                vobj = vmms[Config.VMMS_NAME]
                if not vobj.isValidImage(job.vm.image):
                    self.log.error("validateJob: Image not found: %s" %
                                   job.vm.image)

                    job.appendTrace("validateJob: Image not found: %s" %
                                    job.vm.image)
                    errors += 1
                else:
                    (name, ext) = os.path.splitext(job.vm.image)
                    job.vm.name = name

            if not job.vm.vmms:
                self.log.error("validateJob: Missing job.vm.vmms")
                job.appendTrace("validateJob: Missing job.vm.vmms")
                errors += 1
            else:
                if job.vm.vmms not in vmms:
                    self.log.error("validateJob: Invalid vmms name: %s" %
                                   job.vm.vmms)
                    job.appendTrace("validateJob: Invalid vmms name: %s" %
                                    job.vm.vmms)
                    errors += 1

        # Check the output file
        if not job.outputFile:
            self.log.error("validateJob: Missing job.outputFile")
            job.appendTrace("validateJob: Missing job.outputFile")
            errors += 1
        else:
            if not os.path.exists(os.path.dirname(job.outputFile)):
                self.log.error("validateJob: Bad output path: %s" %
                               job.outputFile)
                job.appendTrace("validateJob: Bad output path: %s" %
                                job.outputFile)
                errors += 1

        # Check for max output file size parameter
        if not job.maxOutputFileSize:
            self.log.debug(
                "validateJob: Setting job.maxOutputFileSize "
                "to default value: %d bytes", Config.MAX_OUTPUT_FILE_SIZE)
            job.maxOutputFileSize = Config.MAX_OUTPUT_FILE_SIZE

        # Check the list of input files
        hasMakefile = False
        for inputFile in job.input:
            if not inputFile.localFile:
                self.log.error("validateJob: Missing inputFile.localFile")
                job.appendTrace("validateJob: Missing inputFile.localFile")
                errors += 1
            else:
                if not os.path.exists(os.path.dirname(job.outputFile)):
                    self.log.error("validateJob: Bad output path: %s" %
                                   job.outputFile)
                    job.appendTrace("validateJob: Bad output path: %s" %
                                    job.outputFile)
                    errors += 1

            if inputFile.destFile == 'Makefile':
                hasMakefile = True

        # Check if input files include a Makefile
        if not hasMakefile:
            self.log.error("validateJob: Missing Makefile in input files.")
            job.appendTrace("validateJob: Missing Makefile in input files.")
            errors += 1

        # Check if job timeout has been set; If not set timeout to default
        if not job.timeout or job.timeout <= 0:
            self.log.debug(
                "validateJob: Setting job.timeout to"
                " default config value: %d secs", Config.RUNJOB_TIMEOUT)
            job.timeout = Config.RUNJOB_TIMEOUT

        # Any problems, return an error status
        if errors > 0:
            self.log.error("validateJob: Job rejected: %d errors" % errors)
            job.appendTrace("validateJob: Job rejected: %d errors" % errors)
            return -1
        else:
            return 0
Ejemplo n.º 16
0
 def __init__(s, queuePath):
     s.queuePath = queuePath
     s._connection_cache = {}
     s.queue = JobQueue(queuePath)
Ejemplo n.º 17
0
class TestJobQueue(unittest.TestCase):

    def setUp(self):

        if Config.USE_REDIS:
            __db = redis.StrictRedis(
                Config.REDIS_HOSTNAME, Config.REDIS_PORT, db=0)
            __db.flushall()

        self.job1 = TangoJob(
            name="sample_job_1",
            vm="ilter.img",
            outputFile="sample_job_1_output",
            input=[],
            timeout=30,
            notifyURL="notifyMeUrl",
            maxOutputFileSize=4096)

        self.job2 = TangoJob(
            name="sample_job_2",
            vm="ilter.img",
            outputFile="sample_job_2_output",
            input=[],
            timeout=30,
            notifyURL="notifyMeUrl",
            maxOutputFileSize=4096)

        self.jobQueue = JobQueue(None)
        self.jobQueue.reset()
        self.jobId1 = self.jobQueue.add(self.job1)
        self.jobId2 = self.jobQueue.add(self.job2)

    def test_sharedInt(self):
        if Config.USE_REDIS:
            num1 = TangoIntValue("nextID", 1000)
            num2 = TangoIntValue("nextID", 3000)
            self.assertEqual(num1.get(), 1000)
            self.assertEqual(num1.get(), num2.get())
        else:
            return

    def test_job(self):
        self.job1.makeUnassigned()
        self.assertTrue(self.job1.isNotAssigned())

        job = self.jobQueue.get(self.jobId1)
        self.assertTrue(job.isNotAssigned())

        self.job1.makeAssigned()
        print "Checkout:"
        self.assertFalse(self.job1.isNotAssigned())
        self.assertFalse(job.isNotAssigned())

    def test_add(self):
        info = self.jobQueue.getInfo()
        self.assertEqual(info['size'], 2)

    def test_addDead(self):
        return self.assertEqual(1, 1)

    def test_remove(self):
        self.jobQueue.remove(self.jobId1)
        info = self.jobQueue.getInfo()
        self.assertEqual(info['size'], 1)

        self.jobQueue.remove(self.jobId2)
        info = self.jobQueue.getInfo()
        self.assertEqual(info['size'], 0)

    def test_delJob(self):
        self.jobQueue.delJob(self.jobId1, 0)
        info = self.jobQueue.getInfo()
        self.assertEqual(info['size'], 1)
        self.assertEqual(info['size_deadjobs'], 1)

        self.jobQueue.delJob(self.jobId1, 1)
        info = self.jobQueue.getInfo()
        self.assertEqual(info['size_deadjobs'], 0)

        return False

    def test_get(self):
        ret_job_1 = self.jobQueue.get(self.jobId1)
        self.assertEqual(str(ret_job_1.id), self.jobId1)

        ret_job_2 = self.jobQueue.get(self.jobId2)
        self.assertEqual(str(ret_job_2.id), self.jobId2)

    def test_getNextPendingJob(self):
        self.jobQueue.assignJob(self.jobId2)
        self.jobQueue.unassignJob(self.jobId1)
        exp_id = self.jobQueue.getNextPendingJob()
        self.assertMultiLineEqual(exp_id, self.jobId1)

    def test_getNextPendingJobReuse(self):
        return False

    def test_assignJob(self):
        self.jobQueue.assignJob(self.jobId1)
        job = self.jobQueue.get(self.jobId1)
        self.assertFalse(job.isNotAssigned())

    def test_unassignJob(self):
        self.jobQueue.assignJob(self.jobId1)
        job = self.jobQueue.get(self.jobId1)
        self.assertTrue(job.assigned)

        self.jobQueue.unassignJob(self.jobId1)
        job = self.jobQueue.get(self.jobId1)
        return self.assertEqual(job.assigned, False)

    def test_makeDead(self):
        info = self.jobQueue.getInfo()
        self.assertEqual(info['size_deadjobs'], 0)
        self.jobQueue.makeDead(self.jobId1, "test")
        info = self.jobQueue.getInfo()
        self.assertEqual(info['size_deadjobs'], 1)

    def test__getNextID(self):

        init_id = self.jobQueue.nextID
        for i in xrange(1, Config.MAX_JOBID + 100):
            id = self.jobQueue._getNextID()
            self.assertNotEqual(str(id), self.jobId1)

        self.jobQueue.nextID = init_id
Ejemplo n.º 18
0
if __name__ == "__main__":

    if not Config.USE_REDIS:
        print(
            "You need to have Redis running to be able to initiate stand-alone\
         JobManager")
    else:
        vmms = None

        if Config.VMMS_NAME == "localSSH":
            from vmms.localSSH import LocalSSH
            vmms = LocalSSH()
        elif Config.VMMS_NAME == "tashiSSH":
            from vmms.tashiSSH import TashiSSH
            vmms = TashiSSH()
        elif Config.VMMS_NAME == "ec2SSH":
            from vmms.ec2SSH import Ec2SSH
            vmms = Ec2SSH()
        elif Config.VMMS_NAME == "localDocker":
            from vmms.localDocker import LocalDocker
            vmms = LocalDocker()

        vmms = {Config.VMMS_NAME: vmms}
        preallocator = Preallocator(vmms)
        queue = JobQueue(preallocator)

        JobManager(queue, vmms, preallocator)

        print("Starting the stand-alone Tango JobManager")
Ejemplo n.º 19
0
def getAll(queuePath):

    # Dump all jobs in the queue.
    # @param queuePath: the job queue path
    # @returns: an array of jobs in an object
    return {'jobs': JobQueue(queuePath).getAll()}