Esempio n. 1
0
    def __init__(self):
        self.daemon = True

        vmms = None
        if Config.VMMS_NAME == "tashiSSH":
            from vmms.tashiSSH import TashiSSH
            vmms = TashiSSH()
        elif Config.VMMS_NAME == "ec2SSH":
            from vmms.ec2SSH import Ec2SSH
            vmms = Ec2SSH()
        elif Config.VMMS_NAME == "localDocker":
            from vmms.localDocker import LocalDocker
            vmms = LocalDocker()
        elif Config.VMMS_NAME == "distDocker":
            from vmms.distDocker import DistDocker
            vmms = DistDocker()

        self.preallocator = Preallocator({Config.VMMS_NAME: vmms})
        self.jobQueue = JobQueue(self.preallocator)
        if not Config.USE_REDIS:
            # creates a local Job Manager if there is no persistent
            # memory between processes. Otherwise, JobManager will
            # be initiated separately
            JobManager(self.jobQueue).start()

        logging.basicConfig(
            filename=Config.LOGFILE,
            format="%(levelname)s|%(asctime)s|%(name)s|%(message)s",
            level=Config.LOGLEVEL,
        )
        self.start_time = time.time()
        self.log = logging.getLogger("TangoServer")
        self.log.info("Starting Tango server")
Esempio n. 2
0
File: tango.py Progetto: cg2v/Tango
    def __init__(self):
        self.daemon = True
        
        vmms = None
        if Config.VMMS_NAME == "tashiSSH":
            from vmms.tashiSSH import TashiSSH
            vmms = TashiSSH()
        elif Config.VMMS_NAME == "ec2SSH":
            from vmms.ec2SSH import Ec2SSH
            vmms = Ec2SSH()
        elif Config.VMMS_NAME == "localDocker":
            from vmms.localDocker import LocalDocker
            vmms = LocalDocker()
        elif Config.VMMS_NAME == "distDocker":
            from vmms.distDocker import DistDocker
            vmms = DistDocker()

        self.preallocator = Preallocator({Config.VMMS_NAME: vmms})
        self.jobQueue = JobQueue(self.preallocator)
        if not Config.USE_REDIS:
            # creates a local Job Manager if there is no persistent
            # memory between processes. Otherwise, JobManager will
            # be initiated separately
            JobManager(self.jobQueue).start()
        
        logging.basicConfig(
            filename=Config.LOGFILE,
            format="%(levelname)s|%(asctime)s|%(name)s|%(message)s",
            level=Config.LOGLEVEL,
        )
        self.start_time = time.time()
        self.log = logging.getLogger("TangoServer")
        self.log.info("Starting Tango server")
Esempio n. 3
0
    def __init__(self):

        logging.basicConfig(
                filename = self.LOGFILE,
                format = "%(levelname)s|%(asctime)s|%(name)s|%(message)s",
                level = Config.LOGLEVEL
                )

        vmms = None

        if Config.VMMS_NAME == "localSSH":
            from vmms.localSSH import LocalSSH
            vmms = LocalSSH()
        elif Config.VMMS_NAME == "tashiSSH":
            from vmms.tashiSSH import TashiSSH
            vmms = TashiSSH()
        elif Config.VMMS_NAME == "ec2SSH":
            from vmms.ec2SSH import Ec2SSH
            vmms = Ec2SSH()
        elif Config.VMMS_NAME == "localDocker":
            from vmms.localDocker import LocalDocker
            vmms = LocalDocker()
            

        self.vmms = {Config.VMMS_NAME: vmms}
        self.preallocator = Preallocator(self.vmms)
        self.queue = JobQueue(self.preallocator)

        if not Config.USE_REDIS:
            # creates a local Job Manager if there is no persistent
            # memory between processes. Otherwise, JobManager will
            # be initiated separately
            JobManager(self.queue, self.vmms, self.preallocator)

        self.tango = TangoServer(self.queue, self.preallocator, self.vmms)

        logging.basicConfig(
            filename=self.LOGFILE,
            format="%(levelname)s|%(asctime)s|%(name)s|%(message)s",
            level=Config.LOGLEVEL
        )

        logging.getLogger('boto').setLevel(logging.INFO)
        self.log = logging.getLogger("TangoREST")
        self.log.info("Starting RESTful Tango server")
        self.status = Status()
Esempio n. 4
0
class TangoServer:
    """ TangoServer - Implements the API functions that the server accepts
    """
    def __init__(self):
        self.daemon = True

        vmms = None
        if Config.VMMS_NAME == "tashiSSH":
            from vmms.tashiSSH import TashiSSH
            vmms = TashiSSH()
        elif Config.VMMS_NAME == "ec2SSH":
            from vmms.ec2SSH import Ec2SSH
            vmms = Ec2SSH()
        elif Config.VMMS_NAME == "localDocker":
            from vmms.localDocker import LocalDocker
            vmms = LocalDocker()
        elif Config.VMMS_NAME == "distDocker":
            from vmms.distDocker import DistDocker
            vmms = DistDocker()

        self.preallocator = Preallocator({Config.VMMS_NAME: vmms})
        self.jobQueue = JobQueue(self.preallocator)
        if not Config.USE_REDIS:
            # creates a local Job Manager if there is no persistent
            # memory between processes. Otherwise, JobManager will
            # be initiated separately
            JobManager(self.jobQueue).start()

        logging.basicConfig(
            filename=Config.LOGFILE,
            format="%(levelname)s|%(asctime)s|%(name)s|%(message)s",
            level=Config.LOGLEVEL,
        )
        self.start_time = time.time()
        self.log = logging.getLogger("TangoServer")
        self.log.info("Starting Tango server")

    def addJob(self, job):
        """ addJob - Add a job to the job queue
        """
        Config.job_requests += 1
        self.log.debug("Received addJob request")
        ret = self.__validateJob(job, self.preallocator.vmms)
        self.log.info("Done validating job %s" % (job.name))
        if ret == 0:
            return self.jobQueue.add(job)
        else:
            self.jobQueue.addDead(job)
            return -1

    def delJob(self, id, deadjob):
        """ delJob - Delete a job
        @param id: Id of job to delete
        @param deadjob - If 0, move the job from the live queue to the
        dead queue. If non-zero, remove the job from the dead queue
        and discard. Use with caution!
        """
        self.log.debug("Received delJob(%d, %d) request" % (id, deadjob))
        return self.jobQueue.delJob(id, deadjob)

    def getJobs(self, item):
        """ getJobs - Return the list of live jobs (item == 0) or the
        list of dead jobs (item == -1).
        """
        try:
            self.log.debug("Received getJobs(%s) request" % (item))

            if item == -1:  # return the list of dead jobs
                return self.jobQueue.deadJobs.values()

            elif item == 0:  # return the list of live jobs
                return self.jobQueue.liveJobs.values()

            else:  # invalid parameter
                return []
        except Exception as e:
            self.log.debug("getJobs: %s" % str(e))

    def preallocVM(self, vm, num):
        """ preallocVM - Set the pool size for VMs of type vm to num
        """
        self.log.debug("Received preallocVM(%s,%d)request" % (vm.name, num))
        try:
            vmms = self.preallocator.vmms[vm.vmms]
            if not vm or num < 0:
                return -2
            if vm.image not in vmms.getImages():
                self.log.error("Invalid image name")
                return -3
            (name, ext) = os.path.splitext(vm.image)
            vm.name = name
            self.preallocator.update(vm, num)
            return 0
        except Exception as err:
            self.log.error("preallocVM failed: %s" % err)
            return -1

    def getVMs(self, vmms_name):
        """ getVMs - return the list of VMs managed by the service vmms_name
        """
        self.log.debug("Received getVMs request(%s)" % vmms_name)
        try:
            if vmms_name in self.preallocator.vmms:
                vmms_inst = self.preallocator.vmms[vmms_name]
                return vmms_inst.getVMs()
            else:
                return []
        except Exception as err:
            self.log.error("getVMs request failed: %s" % err)
            return []

    def delVM(self, vmName, id):
        """ delVM - delete a specific VM instance from a pool
        """
        self.log.debug("Received delVM request(%s, %d)" % (vmName, id))
        try:
            if not vmName or vmName == "" or not id:
                return -1
            return self.preallocator.destroyVM(vmName, id)
        except Exception as err:
            self.log.error("delVM request failed: %s" % err)
            return -1

    def getPool(self, vmName):
        """ getPool - Return the current members of a pool and its free list
        """
        self.log.debug("Received getPool request(%s)" % (vmName))
        try:
            if not vmName or vmName == "":
                return []
            result = self.preallocator.getPool(vmName)
            return [
                "pool_size=%d" % len(result["pool"]),
                "free_size=%d" % len(result["free"]),
                "pool=%s" % result["pool"],
                "free=%s" % result["free"]
            ]

        except Exception as err:
            self.log.error("getPool request failed: %s" % err)
            return []

    def getInfo(self):
        """ getInfo - return various statistics about the Tango daemon
        """
        stats = {}
        stats['elapsed_secs'] = time.time() - self.start_time
        stats['job_requests'] = Config.job_requests
        stats['job_retries'] = Config.job_retries
        stats['waitvm_timeouts'] = Config.waitvm_timeouts
        stats['runjob_timeouts'] = Config.runjob_timeouts
        stats['copyin_errors'] = Config.copyin_errors
        stats['runjob_errors'] = Config.runjob_errors
        stats['copyout_errors'] = Config.copyout_errors
        stats['num_threads'] = threading.activeCount()

        return stats

    #
    # Helper functions
    #
    def resetTango(self, vmms):
        """ resetTango - resets Tango to a clean predictable state and
        ensures that it has a working virtualization environment. A side
        effect is that also checks that each supported VMMS is actually
        running.
        """
        self.log.debug("Received resetTango request.")

        try:
            # For each supported VMM system, get the instances it knows about,
            # and kill those in the current Tango name space.
            for vmms_name in vmms:
                vobj = vmms[vmms_name]
                vms = vobj.getVMs()
                self.log.debug("Pre-existing VMs: %s" %
                               [vm.name for vm in vms])
                namelist = []
                for vm in vms:
                    if re.match("%s-" % Config.PREFIX, vm.name):
                        vobj.destroyVM(vm)
                        # Need a consistent abstraction for a vm between
                        # interfaces
                        namelist.append(vm.name)
                if namelist:
                    self.log.warning("Killed these %s VMs on restart: %s" %
                                     (vmms_name, namelist))

            for _, job in self.jobQueue.liveJobs.iteritems():
                if not job.isNotAssigned():
                    job.makeUnassigned()
                self.log.debug("job: %s, assigned: %s" %
                               (str(job.name), str(job.assigned)))
        except Exception as err:
            self.log.error("resetTango: Call to VMMS %s failed: %s" %
                           (vmms_name, err))
            os._exit(1)

    def __validateJob(self, job, vmms):
        """ validateJob - validate the input arguments in an addJob request.
        """
        errors = 0

        # If this isn't a Tango job then bail with an error
        if (not isinstance(job, TangoJob)):
            return -1

        # Every job must have a name
        if not job.name:
            self.log.error("validateJob: Missing job.name")
            job.appendTrace("%s|validateJob: Missing job.name" %
                            (datetime.utcnow().ctime()))
            errors += 1

        # Check the virtual machine field
        if not job.vm:
            self.log.error("validateJob: Missing job.vm")
            job.appendTrace("%s|validateJob: Missing job.vm" %
                            (datetime.utcnow().ctime()))
            errors += 1
        else:
            if not job.vm.image:
                self.log.error("validateJob: Missing job.vm.image")
                job.appendTrace("%s|validateJob: Missing job.vm.image" %
                                (datetime.utcnow().ctime()))
                errors += 1
            else:
                vobj = vmms[Config.VMMS_NAME]
                imgList = vobj.getImages()
                if job.vm.image not in imgList:
                    self.log.error("validateJob: Image not found: %s" %
                                   job.vm.image)
                    job.appendTrace("%s|validateJob: Image not found: %s" %
                                    (datetime.utcnow().ctime(), job.vm.image))
                    job.appendTrace("%s|validateJob: Images available: %s" %
                                    (datetime.utcnow().ctime(), imgList))
                    errors += 1
                else:
                    (name, ext) = os.path.splitext(job.vm.image)
                    job.vm.name = name

            if not job.vm.vmms:
                self.log.error("validateJob: Missing job.vm.vmms")
                job.appendTrace("%s|validateJob: Missing job.vm.vmms" %
                                (datetime.utcnow().ctime()))
                errors += 1
            else:
                if job.vm.vmms not in vmms:
                    self.log.error("validateJob: Invalid vmms name: %s" %
                                   job.vm.vmms)
                    job.appendTrace("%s|validateJob: Invalid vmms name: %s" %
                                    (datetime.utcnow().ctime(), job.vm.vmms))
                    errors += 1

        # Check the output file
        if not job.outputFile:
            self.log.error("validateJob: Missing job.outputFile")
            job.appendTrace("%s|validateJob: Missing job.outputFile" %
                            (datetime.utcnow().ctime()))
            errors += 1
        else:
            if not os.path.exists(os.path.dirname(job.outputFile)):
                self.log.error("validateJob: Bad output path: %s",
                               job.outputFile)
                job.appendTrace("%s|validateJob: Bad output path: %s" %
                                (datetime.utcnow().ctime(), job.outputFile))
                errors += 1

        # Check for max output file size parameter
        if not job.maxOutputFileSize:
            self.log.debug(
                "validateJob: Setting job.maxOutputFileSize "
                "to default value: %d bytes", Config.MAX_OUTPUT_FILE_SIZE)
            job.maxOutputFileSize = Config.MAX_OUTPUT_FILE_SIZE

        # Check the list of input files
        hasMakefile = False
        for inputFile in job.input:
            if not inputFile.localFile:
                self.log.error("validateJob: Missing inputFile.localFile")
                job.appendTrace("%s|validateJob: Missing inputFile.localFile" %
                                (datetime.utcnow().ctime()))
                errors += 1
            else:
                if not os.path.exists(os.path.dirname(job.outputFile)):
                    self.log.error("validateJob: Bad output path: %s",
                                   job.outputFile)
                    job.appendTrace(
                        "%s|validateJob: Bad output path: %s" %
                        (datetime.utcnow().ctime(), job.outputFile))
                    errors += 1

            if inputFile.destFile == 'Makefile':
                hasMakefile = True

        # Check if input files include a Makefile
        if not hasMakefile:
            self.log.error("validateJob: Missing Makefile in input files.")
            job.appendTrace(
                "%s|validateJob: Missing Makefile in input files." %
                (datetime.utcnow().ctime()))
            errors += 1

        # Check if job timeout has been set; If not set timeout to default
        if not job.timeout or job.timeout <= 0:
            self.log.debug(
                "validateJob: Setting job.timeout to"
                " default config value: %d secs", Config.RUNJOB_TIMEOUT)
            job.timeout = Config.RUNJOB_TIMEOUT

        # Any problems, return an error status
        if errors > 0:
            self.log.error("validateJob: Job rejected: %d errors" % errors)
            job.appendTrace("%s|validateJob: Job rejected: %d errors" %
                            (datetime.utcnow().ctime(), errors))
            return -1
        else:
            return 0
Esempio n. 5
0
class TangoServer:
    """ TangoServer - Implements the API functions that the server accepts
    """
    def __init__(self):
        self.daemon = True

        # init logging early, or some logging will be lost
        logging.basicConfig(
            filename=Config.LOGFILE,
            format="%(levelname)s|%(asctime)s|%(name)s|%(message)s",
            level=Config.LOGLEVEL,
        )

        vmms = None
        if Config.VMMS_NAME == "tashiSSH":
            from vmms.tashiSSH import TashiSSH
            vmms = TashiSSH()
        elif Config.VMMS_NAME == "ec2SSH":
            from vmms.ec2SSH import Ec2SSH
            vmms = Ec2SSH()
        elif Config.VMMS_NAME == "localDocker":
            from vmms.localDocker import LocalDocker
            vmms = LocalDocker()
        elif Config.VMMS_NAME == "distDocker":
            from vmms.distDocker import DistDocker
            vmms = DistDocker()

        self.preallocator = Preallocator({Config.VMMS_NAME: vmms})
        self.jobQueue = JobQueue(self.preallocator)
        if not Config.USE_REDIS:
            # creates a local Job Manager if there is no persistent
            # memory between processes. Otherwise, JobManager will
            # be initiated separately
            JobManager(self.jobQueue).start()

        self.start_time = time.time()
        self.log = logging.getLogger("TangoServer")
        self.log.info("Starting Tango server")

    def addJob(self, job):
        """ addJob - Add a job to the job queue
        """
        Config.job_requests += 1
        self.log.debug("Received addJob request")
        ret = self.__validateJob(job, self.preallocator.vmms)
        self.log.info("Done validating job %s" % (job.name))
        if ret == 0:
            return self.jobQueue.add(job)
        else:
            self.jobQueue.addDead(job)
            return -1

    def delJob(self, id, deadjob):
        """ delJob - Delete a job
        @param id: Id of job to delete
        @param deadjob - If 0, move the job from the live queue to the
        dead queue. If non-zero, remove the job from the dead queue
        and discard. Use with caution!
        """
        self.log.debug("Received delJob(%d, %d) request" % (id, deadjob))
        return self.jobQueue.delJob(id, deadjob)

    def cancelJobWithPath(self, outFilePath):
        """ cancelJobWithPath - when this function returns, one of the following
        is true:
          1. The job with the specified output file does not exist
          2. the job with the specified output file has finished running normally
          3. The job with the specified output file has been cancelled
          4. The job was found, and it's running, but cancellation failed.
        In case 1, NOT_FOUND is returned.
                2, ALREADY_COMPLETED is returned.
                3, SUCCEEDED is returned.
                4, FAILED is returned.
        """
        self.log.debug("Received cancelJobWithPath(%s) request" %
                       (outFilePath))

        id, job, job_status = self.jobQueue.findRemovingWaiting(outFilePath)
        self.log.debug("cancelJobWithPath: Found a job %s with status %s" %
                       (job, job_status))

        if job_status == JobQueue.JobStatus.NOT_FOUND:
            return CancellationStatus.NOT_FOUND
        elif job_status == JobQueue.JobStatus.DEAD:
            return CancellationStatus.ALREADY_COMPLETED
        elif job_status == JobQueue.JobStatus.RUNNING:
            return self.killUntilJobComplete(id, job)
        else:
            assert job_status == JobQueue.JobStatus.WAITING
            # In this case, findRemovingLive has moved the live job to the dead
            # queue, and we have nothing to worry about.
            # Let's notify autolab that the job is done.
            if job.notifyURL:
                outputFileName = job.outputFile.split("/")[
                    -1]  # get filename from path
                files = {
                    'file': unicode('Job was cancelled before it started.')
                }
                hdrs = {'Filename': outputFileName}
                self.log.debug("Sending request to %s" % job.notifyURL)

                def worker():
                    requests.post(job.notifyURL,
                                  files=files,
                                  headers=hdrs,
                                  data={'runningTimeSeconds': 0},
                                  verify=False)

                threading.Thread(target=worker).start()
            return CancellationStatus.SUCCEEDED

    def killUntilJobComplete(self, id, job):
        """ Here's the contract:
        If the job is currently running (i.e. it could complete at some point
        in the future), then this method will return only when the job is
        complete. It tries to help by repeatedly `pkill`ing the process. But
        a compliant implementation could just block until the job completes
        on its own.

        On success, returns SUCCEEDED;
        on failure, return FAILED (compliant w above method)
        """
        self.log.debug("Received killUntilJobComplete request")

        vm = job.vm
        for _ in xrange(0, Config.CANCEL_RETRIES):
            # Returns 0 on success.
            if self.preallocator.vmms[vm.vmms].kill(vm) == 0:
                return CancellationStatus.SUCCEEDED

        return CancellationStatus.FAILED

    def getJobs(self, item):
        """ getJobs - Return the list of live jobs (item == 0) or the
        list of dead jobs (item == -1).

        ^ You gotta be kidding me. Is this an API for number lovers.
        """
        try:
            self.log.debug("Received getJobs(%s) request" % (item))

            if item == -1:  # return the list of dead jobs
                return self.jobQueue.deadJobs.values()

            elif item == 0:  # return the list of live jobs
                return self.jobQueue.liveJobs.values()

            else:  # invalid parameter
                return []
        except Exception as e:
            self.log.debug("getJobs: %s" % str(e))

    def preallocVM(self, vm, num):
        """ preallocVM - Set the pool size for VMs of type vm to num
        """
        self.log.debug("Received preallocVM(%s,%d)request" % (vm.name, num))
        try:
            vmms = self.preallocator.vmms[vm.vmms]
            if not vm or num < 0:
                return -2
            if not vmms.isValidImage(vm.image):
                self.log.error("Invalid image name")
                return -3
            (name, ext) = os.path.splitext(vm.image)
            vm.name = name
            self.preallocator.update(vm, num)
            return 0
        except Exception as err:
            self.log.error("preallocVM failed: %s" % err)
            return -1

    def getVMs(self, vmms_name):
        """ getVMs - return the list of VMs managed by the service vmms_name
        """
        self.log.debug("Received getVMs request(%s)" % vmms_name)
        try:
            if vmms_name in self.preallocator.vmms:
                vmms_inst = self.preallocator.vmms[vmms_name]
                return vmms_inst.getVMs()
            else:
                return []
        except Exception as err:
            self.log.error("getVMs request failed: %s" % err)
            return []

    def delVM(self, vmName, id):
        """ delVM - delete a specific VM instance from a pool
        """
        self.log.debug("Received delVM request(%s, %d)" % (vmName, id))
        try:
            if not vmName or vmName == "" or not id:
                return -1
            return self.preallocator.destroyVM(vmName, id)
        except Exception as err:
            self.log.error("delVM request failed: %s" % err)
            return -1

    def getPool(self, vmName):
        """ getPool - Return the current members of a pool and its free list
        """
        self.log.debug("Received getPool request(%s)" % (vmName))
        try:
            if not vmName or vmName == "":
                return []
            result = self.preallocator.getPool(vmName)
            return [
                "pool_size=%d" % len(result["pool"]),
                "free_size=%d" % len(result["free"]),
                "pool=%s" % result["pool"],
                "free=%s" % result["free"]
            ]

        except Exception as err:
            self.log.error("getPool request failed: %s" % err)
            return []

    def getInfo(self):
        """ getInfo - return various statistics about the Tango daemon
        """
        stats = {}
        stats['elapsed_secs'] = time.time() - self.start_time
        stats['job_requests'] = Config.job_requests
        stats['job_retries'] = Config.job_retries
        stats['waitvm_timeouts'] = Config.waitvm_timeouts
        stats['runjob_timeouts'] = Config.runjob_timeouts
        stats['copyin_errors'] = Config.copyin_errors
        stats['runjob_errors'] = Config.runjob_errors
        stats['copyout_errors'] = Config.copyout_errors
        stats['num_threads'] = threading.activeCount()

        return stats

    def setScaleParams(self, low_water_mark, max_pool_size):
        self.preallocator.low_water_mark.set(low_water_mark)
        self.jobQueue.max_pool_size.set(max_pool_size)
        return 0

    def runningTimeForOutputFile(self, outputFile):
        self.log.debug("Received runningTimeForOutputFile(%s)" % outputFile)
        liveJobTuple = self.jobQueue.liveJobs.getWrapped(outputFile)
        if liveJobTuple:
            (_, liveJob) = liveJobTuple
            self.log.debug(str(liveJob.startTime))
            return liveJob.runningTime()
        return None

    #
    # Helper functions
    #

    # NOTE: This function should be called by ONLY jobManager.  The rest servers
    # shouldn't call this function.
    def resetTango(self, vmms):
        """ resetTango - resets Tango to a clean predictable state and
        ensures that it has a working virtualization environment. A side
        effect is that also checks that each supported VMMS is actually
        running.
        """

        # There are two cases this function is called: 1. Tango has a fresh start.
        # Then we want to destroy all instances in Tango's name space.  2. Job
        # Manager is restarted after a previous crash.  Then we want to destroy
        # the "busy" instances prior to the crash and leave the "free" onces intact.

        self.log.debug("Received resetTango request.")

        try:
            # For each supported VMM system, get the instances it knows about
            # in the current Tango name space and kill those not in free pools.
            for vmms_name in vmms:
                vobj = vmms[vmms_name]

                # Round up all instances in the free pools.
                allFreeVMs = []
                for key in self.preallocator.machines.keys():
                    freePool = self.preallocator.getPool(key)["free"]
                    for vmId in freePool:
                        vmName = vobj.instanceName(vmId, key)
                        allFreeVMs.append(vmName)
                self.log.info("vms in all free pools: %s" % allFreeVMs)

                # For each in Tango's name space, destroy the onces in free pool.
                # AND remove it from Tango's internal bookkeeping.
                vms = vobj.getVMs()
                self.log.debug("Pre-existing VMs: %s" %
                               [vm.name for vm in vms])
                destroyedList = []
                removedList = []
                for vm in vms:
                    if re.match("%s-" % Config.PREFIX, vm.name):

                        # Todo: should have an one-call interface to destroy the
                        # machine AND to keep the interval data consistent.
                        if vm.name not in allFreeVMs:
                            destroyedList.append(vm.name)
                            vobj.destroyVM(vm)

                            # also remove it from "total" set of the pool
                            (prefix, vmId, poolName) = vm.name.split("-")
                            machine = self.preallocator.machines.get(poolName)
                            if not machine:  # the pool may not exist
                                continue

                            if int(vmId) in machine[0]:
                                removedList.append(vm.name)
                                machine[0].remove(int(vmId))
                            self.preallocator.machines.set(poolName, machine)

                if destroyedList:
                    self.log.warning("Killed these %s VMs on restart: %s" %
                                     (vmms_name, destroyedList))
                if removedList:
                    self.log.warning("Removed these %s VMs from their pools" %
                                     (removedList))

            for _, job in self.jobQueue.liveJobs.iteritems():
                if not job.isNotAssigned():
                    job.makeUnassigned()
                self.log.debug("job: %s, assigned: %s" %
                               (str(job.name), str(job.assigned)))
        except Exception as err:
            self.log.error("resetTango: Call to VMMS %s failed: %s" %
                           (vmms_name, err))
            os._exit(1)

    def __validateJob(self, job, vmms):
        """ validateJob - validate the input arguments in an addJob request.
        """
        errors = 0

        # If this isn't a Tango job then bail with an error
        if (not isinstance(job, TangoJob)):
            return -1

        # Every job must have a name
        if not job.name:
            self.log.error("validateJob: Missing job.name")
            job.appendTrace("validateJob: Missing job.name")
            errors += 1

        # Check the virtual machine field
        if not job.vm:
            self.log.error("validateJob: Missing job.vm")
            job.appendTrace("validateJob: Missing job.vm")
            errors += 1
        else:
            if not job.vm.image:
                self.log.error("validateJob: Missing job.vm.image")
                job.appendTrace("validateJob: Missing job.vm.image")
                errors += 1
            else:
                vobj = vmms[Config.VMMS_NAME]
                if not vobj.isValidImage(job.vm.image):
                    self.log.error("validateJob: Image not found: %s" %
                                   job.vm.image)

                    job.appendTrace("validateJob: Image not found: %s" %
                                    job.vm.image)
                    errors += 1
                else:
                    (name, ext) = os.path.splitext(job.vm.image)
                    job.vm.name = name

            if not job.vm.vmms:
                self.log.error("validateJob: Missing job.vm.vmms")
                job.appendTrace("validateJob: Missing job.vm.vmms")
                errors += 1
            else:
                if job.vm.vmms not in vmms:
                    self.log.error("validateJob: Invalid vmms name: %s" %
                                   job.vm.vmms)
                    job.appendTrace("validateJob: Invalid vmms name: %s" %
                                    job.vm.vmms)
                    errors += 1

        # Check the output file
        if not job.outputFile:
            self.log.error("validateJob: Missing job.outputFile")
            job.appendTrace("validateJob: Missing job.outputFile")
            errors += 1
        else:
            if not os.path.exists(os.path.dirname(job.outputFile)):
                self.log.error("validateJob: Bad output path: %s" %
                               job.outputFile)
                job.appendTrace("validateJob: Bad output path: %s" %
                                job.outputFile)
                errors += 1

        # Check for max output file size parameter
        if not job.maxOutputFileSize:
            self.log.debug(
                "validateJob: Setting job.maxOutputFileSize "
                "to default value: %d bytes", Config.MAX_OUTPUT_FILE_SIZE)
            job.maxOutputFileSize = Config.MAX_OUTPUT_FILE_SIZE

        # Check the list of input files
        hasMakefile = False
        for inputFile in job.input:
            if not inputFile.localFile:
                self.log.error("validateJob: Missing inputFile.localFile")
                job.appendTrace("validateJob: Missing inputFile.localFile")
                errors += 1
            else:
                if not os.path.exists(os.path.dirname(job.outputFile)):
                    self.log.error("validateJob: Bad output path: %s" %
                                   job.outputFile)
                    job.appendTrace("validateJob: Bad output path: %s" %
                                    job.outputFile)
                    errors += 1

            if inputFile.destFile == 'Makefile':
                hasMakefile = True

        # Check if input files include a Makefile
        if not hasMakefile:
            self.log.error("validateJob: Missing Makefile in input files.")
            job.appendTrace("validateJob: Missing Makefile in input files.")
            errors += 1

        # Check if job timeout has been set; If not set timeout to default
        if not job.timeout or job.timeout <= 0:
            self.log.debug(
                "validateJob: Setting job.timeout to"
                " default config value: %d secs", Config.RUNJOB_TIMEOUT)
            job.timeout = Config.RUNJOB_TIMEOUT

        # Any problems, return an error status
        if errors > 0:
            self.log.error("validateJob: Job rejected: %d errors" % errors)
            job.appendTrace("validateJob: Job rejected: %d errors" % errors)
            return -1
        else:
            return 0
Esempio n. 6
0
if __name__ == "__main__":

    if not Config.USE_REDIS:
        print(
            "You need to have Redis running to be able to initiate stand-alone\
         JobManager")
    else:
        vmms = None

        if Config.VMMS_NAME == "localSSH":
            from vmms.localSSH import LocalSSH
            vmms = LocalSSH()
        elif Config.VMMS_NAME == "tashiSSH":
            from vmms.tashiSSH import TashiSSH
            vmms = TashiSSH()
        elif Config.VMMS_NAME == "ec2SSH":
            from vmms.ec2SSH import Ec2SSH
            vmms = Ec2SSH()
        elif Config.VMMS_NAME == "localDocker":
            from vmms.localDocker import LocalDocker
            vmms = LocalDocker()

        vmms = {Config.VMMS_NAME: vmms}
        preallocator = Preallocator(vmms)
        queue = JobQueue(preallocator)

        JobManager(queue, vmms, preallocator)

        print("Starting the stand-alone Tango JobManager")
Esempio n. 7
0
File: tango.py Progetto: cg2v/Tango
class TangoServer:

    """ TangoServer - Implements the API functions that the server accepts
    """

    def __init__(self):
        self.daemon = True
        
        vmms = None
        if Config.VMMS_NAME == "tashiSSH":
            from vmms.tashiSSH import TashiSSH
            vmms = TashiSSH()
        elif Config.VMMS_NAME == "ec2SSH":
            from vmms.ec2SSH import Ec2SSH
            vmms = Ec2SSH()
        elif Config.VMMS_NAME == "localDocker":
            from vmms.localDocker import LocalDocker
            vmms = LocalDocker()
        elif Config.VMMS_NAME == "distDocker":
            from vmms.distDocker import DistDocker
            vmms = DistDocker()

        self.preallocator = Preallocator({Config.VMMS_NAME: vmms})
        self.jobQueue = JobQueue(self.preallocator)
        if not Config.USE_REDIS:
            # creates a local Job Manager if there is no persistent
            # memory between processes. Otherwise, JobManager will
            # be initiated separately
            JobManager(self.jobQueue).start()
        
        logging.basicConfig(
            filename=Config.LOGFILE,
            format="%(levelname)s|%(asctime)s|%(name)s|%(message)s",
            level=Config.LOGLEVEL,
        )
        self.start_time = time.time()
        self.log = logging.getLogger("TangoServer")
        self.log.info("Starting Tango server")

    def addJob(self, job):
        """ addJob - Add a job to the job queue
        """
        Config.job_requests += 1
        self.log.debug("Received addJob request")
        ret = self.__validateJob(job, self.preallocator.vmms)
        self.log.info("Done validating job %s" % (job.name))
        if ret == 0:
            return self.jobQueue.add(job)
        else:
            self.jobQueue.addDead(job)
            return -1

    def delJob(self, id, deadjob):
        """ delJob - Delete a job
        @param id: Id of job to delete
        @param deadjob - If 0, move the job from the live queue to the
        dead queue. If non-zero, remove the job from the dead queue
        and discard. Use with caution!
        """
        self.log.debug("Received delJob(%d, %d) request" % (id, deadjob))
        return self.jobQueue.delJob(id, deadjob)

    def getJobs(self, item):
        """ getJobs - Return the list of live jobs (item == 0) or the
        list of dead jobs (item == -1).
        """
        try:
            self.log.debug("Received getJobs(%s) request" % (item))

            if item == -1:  # return the list of dead jobs
                return self.jobQueue.deadJobs.values()

            elif item == 0:  # return the list of live jobs
                return self.jobQueue.liveJobs.values()

            else:  # invalid parameter
                return []
        except Exception as e:
            self.log.debug("getJobs: %s" % str(e))

    def preallocVM(self, vm, num):
        """ preallocVM - Set the pool size for VMs of type vm to num
        """
        self.log.debug("Received preallocVM(%s,%d)request"
                       % (vm.name, num))
        try:
            vmms = self.preallocator.vmms[vm.vmms]
            if not vm or num < 0:
                return -2
            if vm.image not in vmms.getImages():
                self.log.error("Invalid image name")
                return -3
            (name, ext) = os.path.splitext(vm.image)
            vm.name = name
            self.preallocator.update(vm, num)
            return 0
        except Exception as err:
            self.log.error("preallocVM failed: %s" % err)
            return -1

    def getVMs(self, vmms_name):
        """ getVMs - return the list of VMs managed by the service vmms_name
        """
        self.log.debug("Received getVMs request(%s)" % vmms_name)
        try:
            if vmms_name in self.preallocator.vmms:
                vmms_inst = self.preallocator.vmms[vmms_name]
                return vmms_inst.getVMs()
            else:
                return []
        except Exception as err:
            self.log.error("getVMs request failed: %s" % err)
            return []

    def delVM(self, vmName, id):
        """ delVM - delete a specific VM instance from a pool
        """
        self.log.debug("Received delVM request(%s, %d)" % (vmName, id))
        try:
            if not vmName or vmName == "" or not id:
                return -1
            return self.preallocator.destroyVM(vmName, id)
        except Exception as err:
            self.log.error("delVM request failed: %s" % err)
            return -1

    def getPool(self, vmName):
        """ getPool - Return the current members of a pool and its free list
        """
        self.log.debug("Received getPool request(%s)" % (vmName))
        try:
            if not vmName or vmName == "":
                return []
            result = self.preallocator.getPool(vmName)
            return ["pool_size=%d" % len(result["pool"]),
                    "free_size=%d" % len(result["free"]),
                    "pool=%s" % result["pool"],
                    "free=%s" % result["free"]]

        except Exception as err:
            self.log.error("getPool request failed: %s" % err)
            return []

    def getInfo(self):
        """ getInfo - return various statistics about the Tango daemon
        """
        stats = {}
        stats['elapsed_secs'] = time.time() - self.start_time;
        stats['job_requests'] = Config.job_requests
        stats['job_retries'] = Config.job_retries
        stats['waitvm_timeouts'] = Config.waitvm_timeouts
        stats['runjob_timeouts'] = Config.runjob_timeouts
        stats['copyin_errors'] = Config.copyin_errors
        stats['runjob_errors'] = Config.runjob_errors
        stats['copyout_errors'] = Config.copyout_errors
        stats['num_threads'] = threading.activeCount()
        
        return stats

    #
    # Helper functions
    #
    def resetTango(self, vmms):
        """ resetTango - resets Tango to a clean predictable state and
        ensures that it has a working virtualization environment. A side
        effect is that also checks that each supported VMMS is actually
        running.
        """
        self.log.debug("Received resetTango request.")

        try:
            # For each supported VMM system, get the instances it knows about,
            # and kill those in the current Tango name space.
            for vmms_name in vmms:
                vobj = vmms[vmms_name]
                vms = vobj.getVMs()
                self.log.debug("Pre-existing VMs: %s" % [vm.name for vm in vms])
                namelist = []
                for vm in vms:
                    if re.match("%s-" % Config.PREFIX, vm.name):
                        vobj.destroyVM(vm)
                        # Need a consistent abstraction for a vm between
                        # interfaces
                        namelist.append(vm.name)
                if namelist:
                    self.log.warning("Killed these %s VMs on restart: %s" %
                                (vmms_name, namelist))

            for _, job in self.jobQueue.liveJobs.iteritems():
                if not job.isNotAssigned():
                    job.makeUnassigned()
                self.log.debug("job: %s, assigned: %s" %
                               (str(job.name), str(job.assigned)))
        except Exception as err:
            self.log.error("resetTango: Call to VMMS %s failed: %s" %
                      (vmms_name, err))
            os._exit(1)


    def __validateJob(self, job, vmms):
        """ validateJob - validate the input arguments in an addJob request.
        """
        errors = 0

        # If this isn't a Tango job then bail with an error
        if (not isinstance(job, TangoJob)):
            return -1

        # Every job must have a name
        if not job.name:
            self.log.error("validateJob: Missing job.name")
            job.appendTrace("%s|validateJob: Missing job.name" %
                            (datetime.utcnow().ctime()))
            errors += 1

        # Check the virtual machine field
        if not job.vm:
            self.log.error("validateJob: Missing job.vm")
            job.appendTrace("%s|validateJob: Missing job.vm" %
                            (datetime.utcnow().ctime()))
            errors += 1
        else:
            if not job.vm.image:
                self.log.error("validateJob: Missing job.vm.image")
                job.appendTrace("%s|validateJob: Missing job.vm.image" %
                                (datetime.utcnow().ctime()))
                errors += 1
            else:
                vobj = vmms[Config.VMMS_NAME]
                imgList = vobj.getImages()
                if job.vm.image not in imgList:
                    self.log.error("validateJob: Image not found: %s" %
                              job.vm.image)
                    job.appendTrace("%s|validateJob: Image not found: %s" %
                                    (datetime.utcnow().ctime(), job.vm.image))
                    errors += 1
                else:
                    (name, ext) = os.path.splitext(job.vm.image)
                    job.vm.name = name

            if not job.vm.vmms:
                self.log.error("validateJob: Missing job.vm.vmms")
                job.appendTrace("%s|validateJob: Missing job.vm.vmms" %
                                (datetime.utcnow().ctime()))
                errors += 1
            else:
                if job.vm.vmms not in vmms:
                    self.log.error("validateJob: Invalid vmms name: %s" % job.vm.vmms)
                    job.appendTrace("%s|validateJob: Invalid vmms name: %s" %
                                    (datetime.utcnow().ctime(), job.vm.vmms))
                    errors += 1

        # Check the output file
        if not job.outputFile:
            self.log.error("validateJob: Missing job.outputFile")
            job.appendTrace("%s|validateJob: Missing job.outputFile" % (datetime.utcnow().ctime()))           
            errors += 1
        else:
            if not os.path.exists(os.path.dirname(job.outputFile)):
                self.log.error("validateJob: Bad output path: %s", job.outputFile)
                job.appendTrace("%s|validateJob: Bad output path: %s" %
                                (datetime.utcnow().ctime(), job.outputFile))
                errors += 1

        # Check for max output file size parameter
        if not job.maxOutputFileSize:
            self.log.debug("validateJob: Setting job.maxOutputFileSize "
                      "to default value: %d bytes", Config.MAX_OUTPUT_FILE_SIZE)
            job.maxOutputFileSize = Config.MAX_OUTPUT_FILE_SIZE

        # Check the list of input files
        hasMakefile = False
        for inputFile in job.input:
            if not inputFile.localFile:
                self.log.error("validateJob: Missing inputFile.localFile")
                job.appendTrace("%s|validateJob: Missing inputFile.localFile" %
                            (datetime.utcnow().ctime()))
                errors += 1
            else:
                if not os.path.exists(os.path.dirname(job.outputFile)):
                    self.log.error("validateJob: Bad output path: %s", job.outputFile)
                    job.appendTrace("%s|validateJob: Bad output path: %s" %
                                    (datetime.utcnow().ctime(), job.outputFile))
                    errors += 1

            if inputFile.destFile == 'Makefile':
                hasMakefile = True

        # Check if input files include a Makefile
        if not hasMakefile:
            self.log.error("validateJob: Missing Makefile in input files.")
            job.appendTrace("%s|validateJob: Missing Makefile in input files." % (datetime.utcnow().ctime()))
            errors+=1    

        # Check if job timeout has been set; If not set timeout to default
        if not job.timeout or job.timeout <= 0:
            self.log.debug("validateJob: Setting job.timeout to"
                      " default config value: %d secs", Config.RUNJOB_TIMEOUT)
            job.timeout = Config.RUNJOB_TIMEOUT

        # Any problems, return an error status
        if errors > 0:
            self.log.error("validateJob: Job rejected: %d errors" % errors)
            job.appendTrace("%s|validateJob: Job rejected: %d errors" %
                                (datetime.utcnow().ctime(), errors))
            return -1
        else:
            return 0
Esempio n. 8
0
class TangoREST:

    COURSELABS = Config.COURSELABS
    OUTPUT_FOLDER = "output"
    LOGFILE = Config.LOGFILE

    # Replace with choice of key store and override validateKey.
    # This key is just for testing.
    keys = Config.KEYS

    def __init__(self):

        logging.basicConfig(
                filename = self.LOGFILE,
                format = "%(levelname)s|%(asctime)s|%(name)s|%(message)s",
                level = Config.LOGLEVEL
                )

        vmms = None

        if Config.VMMS_NAME == "localSSH":
            from vmms.localSSH import LocalSSH
            vmms = LocalSSH()
        elif Config.VMMS_NAME == "tashiSSH":
            from vmms.tashiSSH import TashiSSH
            vmms = TashiSSH()
        elif Config.VMMS_NAME == "ec2SSH":
            from vmms.ec2SSH import Ec2SSH
            vmms = Ec2SSH()
        elif Config.VMMS_NAME == "localDocker":
            from vmms.localDocker import LocalDocker
            vmms = LocalDocker()
            

        self.vmms = {Config.VMMS_NAME: vmms}
        self.preallocator = Preallocator(self.vmms)
        self.queue = JobQueue(self.preallocator)

        if not Config.USE_REDIS:
            # creates a local Job Manager if there is no persistent
            # memory between processes. Otherwise, JobManager will
            # be initiated separately
            JobManager(self.queue, self.vmms, self.preallocator)

        self.tango = TangoServer(self.queue, self.preallocator, self.vmms)

        logging.basicConfig(
            filename=self.LOGFILE,
            format="%(levelname)s|%(asctime)s|%(name)s|%(message)s",
            level=Config.LOGLEVEL
        )

        logging.getLogger('boto').setLevel(logging.INFO)
        self.log = logging.getLogger("TangoREST")
        self.log.info("Starting RESTful Tango server")
        self.status = Status()

    def validateKey(self, key):
        """ validateKey - Validates key provided by client
        """
        result = False
        for el in self.keys:
            if el == key:
                result = True
        return result

    def getDirName(self, key, courselab):
        """ getDirName - Computes directory name
        """
        return "%s-%s" % (key, courselab)

    def getDirPath(self, key, courselab):
        """ getDirPath - Computes directory path
        """
        labName = self.getDirName(key, courselab)
        return "%s/%s" % (self.COURSELABS, labName)

    def getOutPath(self, key, courselab):
        """ getOutPath - Computes output directory path
        """
        labPath = self.getDirPath(key, courselab)
        return "%s/%s" % (labPath, self.OUTPUT_FOLDER)

    def computeMD5(self, directory):
        """ computeMD5 - Computes the MD5 hash of given files in the
        given directory
        """
        result = []
        for elem in os.listdir(directory):
            try:
                body = open("%s/%s" % (directory, elem)).read()
                md5hash = hashlib.md5(body).hexdigest()
                result.append({'md5': md5hash, 'localFile': elem})
            except IOError:
                continue
        return result

    def createTangoMachine(self, image, vmms=Config.VMMS_NAME,
                           vmObj={'cores': 1, 'memory': 512}):
        """ createTangoMachine - Creates a tango machine object from image
        """
        return TangoMachine(
            name=image,
            vmms=vmms,
            image="%s" % (image),
            cores=vmObj["cores"],
            memory=vmObj["memory"],
            disk=None,
            network=None)

    def convertJobObj(self, dirName, jobObj):
        """ convertJobObj - Converts a dictionary into a TangoJob object
        """

        name = jobObj['jobName']
        outputFile = "%s/%s/%s/%s" % (self.COURSELABS,
                                      dirName,
                                      self.OUTPUT_FOLDER,
                                      jobObj['output_file'])
        timeout = jobObj['timeout']
        notifyURL = None
        maxOutputFileSize = Config.MAX_OUTPUT_FILE_SIZE
        if 'callback_url' in jobObj:
            notifyURL = jobObj['callback_url']

        # List of input files
        input = []
        for file in jobObj['files']:
            inFile = file['localFile']
            vmFile = file['destFile']
            handinfile = InputFile(
                localFile="%s/%s/%s" % (self.COURSELABS, dirName, inFile),
                destFile=vmFile)
            input.append(handinfile)

        # VM object
        vm = self.createTangoMachine(jobObj["image"])

        job = TangoJob(
            name=name,
            vm=vm,
            outputFile=outputFile,
            input=input,
            timeout=timeout,
            notifyURL=notifyURL,
            maxOutputFileSize=maxOutputFileSize)

        self.log.debug("inputFiles: %s" % [file.localFile for file in input])
        self.log.debug("outputFile: %s" % outputFile)
        return job

    def convertTangoMachineObj(self, tangoMachine):
        """ convertVMObj - Converts a TangoMachine object into a dictionary
        """
        # May need to convert instance_id
        vm = dict()
        vm['network'] = tangoMachine.network
        vm['resume'] = tangoMachine.resume
        vm['image'] = tangoMachine.image
        vm['memory'] = tangoMachine.memory
        vm['vmms'] = tangoMachine.vmms
        vm['cores'] = tangoMachine.cores
        vm['disk'] = tangoMachine.disk
        vm['id'] = tangoMachine.id
        vm['name'] = tangoMachine.name
        return vm

    def convertInputFileObj(self, inputFile):
        """ convertInputFileObj - Converts an InputFile object into a dictionary
        """
        input = dict()
        input['destFile'] = inputFile.destFile
        input['localFile'] = inputFile.localFile
        return input

    def convertTangoJobObj(self, tangoJobObj):
        """ convertTangoJobObj - Converts a TangoJob object into a dictionary
        """
        job = dict()
        # Convert scalar attribtues first
        job['retries'] = tangoJobObj.retries
        job['outputFile'] = tangoJobObj.outputFile
        job['name'] = tangoJobObj.name
        job['notifyURL'] = tangoJobObj.notifyURL
        job['maxOutputFileSize'] = tangoJobObj.maxOutputFileSize
        job['assigned'] = tangoJobObj.assigned
        job['timeout'] = tangoJobObj.timeout
        job['id'] = tangoJobObj.id
        job['trace'] = tangoJobObj.trace

        # Convert VM object
        job['vm'] = self.convertTangoMachineObj(tangoJobObj.vm)

        # Convert InputFile objects
        inputFiles = list()
        for inputFile in tangoJobObj.input:
            inputFiles.append(self.convertInputFileObj(inputFile))
        job['input'] = inputFiles

        return job
    ##
    # Tango RESTful API
    ##

    def open(self, key, courselab):
        """ open - Return a list of md5 hashes for each input file in the
        key-courselab directory and make one if the directory doesn't exist
        """
        self.log.debug("Received open request(%s, %s)" % (key, courselab))
        if self.validateKey(key):
            labPath = self.getDirPath(key, courselab)
            try:
                if os.path.exists(labPath):
                    self.log.info(
                        "Found directory for (%s, %s)" % (key, courselab))
                    statusObj = self.status.found_dir
                    statusObj['files'] = self.computeMD5(labPath)
                    return statusObj
                else:
                    outputPath = self.getOutPath(key, courselab)
                    os.makedirs(outputPath)
                    self.log.info(
                        "Created directory for (%s, %s)" % (key, courselab))
                    statusObj = self.status.made_dir
                    statusObj["files"] = []
                    return statusObj
            except Exception as e:
                self.log.error("open request failed: %s" % str(e))
                return self.status.create(-1, str(e))
        else:
            self.log.info("Key not recognized: %s" % key)
            return self.status.wrong_key

    def upload(self, key, courselab, file, body):
        """ upload - Upload file as an input file in key-courselab
        """
        self.log.debug("Received upload request(%s, %s, %s)" %
                       (key, courselab, file))
        if (self.validateKey(key)):
            labPath = self.getDirPath(key, courselab)
            try:
                if os.path.exists(labPath):
                    absPath = "%s/%s" % (labPath, file)
                    if os.path.exists(absPath):
                        fileMD5 = hashlib.md5(body).hexdigest()
                        if fileMD5 in [obj["md5"]
                                       for obj in self.computeMD5(labPath)]:
                            return self.status.file_exists
                    fh = open(absPath, "wt")
                    fh.write(body)
                    fh.close()
                    self.log.info(
                        "Uploaded file to (%s, %s, %s)" %
                        (key, courselab, file))
                    return self.status.file_uploaded
                else:
                    self.log.info(
                        "Courselab for (%s, %s) not found" % (key, courselab))
                    return self.status.wrong_courselab
            except Exception as e:
                self.log.error("upload request failed: %s" % str(e))
                return self.status.create(-1, str(e))
        else:
            self.log.info("Key not recognized: %s" % key)
            return self.status.wrong_key

    def addJob(self, key, courselab, jobStr):
        """ addJob - Add the job to be processed by Tango
        """
        self.log.debug("Received addJob request(%s, %s, %s)" %
                       (key, courselab, jobStr))
        if (self.validateKey(key)):
            labName = self.getDirName(key, courselab)
            try:
                jobObj = json.loads(jobStr)
                job = self.convertJobObj(labName, jobObj)
                jobId = self.tango.addJob(job)
                self.log.debug("Done adding job")
                if (jobId == -1):
                    self.log.info("Failed to add job to tango")
                    return self.status.create(-1, job.trace)
                self.log.info("Successfully added job to tango")
                result = self.status.job_added
                result['jobId'] = jobId
                return result
            except Exception as e:
                exc_type, exc_obj, exc_tb = sys.exc_info()
                fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
                print(exc_type, fname, exc_tb.tb_lineno)
                self.log.error("addJob request failed: %s" % str(e))
                return self.status.create(-1, str(e))
        else:
            self.log.info("Key not recognized: %s" % key)
            return self.status.wrong_key

    def poll(self, key, courselab, outputFile):
        """ poll - Poll for the output file in key-courselab
        """
        self.log.debug("Received poll request(%s, %s, %s)" %
                       (key, courselab, outputFile))
        if (self.validateKey(key)):
            outputPath = self.getOutPath(key, courselab)
            outfilePath = "%s/%s" % (outputPath, outputFile)
            if os.path.exists(outfilePath):
                self.log.info("Output file (%s, %s, %s) found" %
                              (key, courselab, outputFile))
                output = open(outfilePath)
                result = output.read()
                output.close()
                return result
            self.log.info("Output file (%s, %s, %s) not found" %
                          (key, courselab, outputFile))
            return self.status.out_not_found
        else:
            self.log.info("Key not recognized: %s" % key)
            return self.status.wrong_key

    def info(self, key):
        """ info - Returns basic status for the Tango service such as uptime, number of jobs etc
        """
        self.log.debug("Received info request (%s)" % (key))
        if (self.validateKey(key)):
            info = self.tango.getInfo()
            result = self.status.obtained_info
            result['info'] = info
            return result
        else:
            self.log.info("Key not recognized: %s" % key)
            return self.status.wrong_key

    def jobs(self, key, deadJobs):
        """ jobs - Returns the list of live jobs (deadJobs == 0) or the list of dead jobs (deadJobs == 1)
        """
        self.log.debug("Received jobs request (%s, %s)" % (key, deadJobs))
        if (self.validateKey(key)):
            jobs = list()
            result = self.status.obtained_jobs
            if (int(deadJobs) == 0):
                jobs = self.tango.getJobs(0)
                self.log.debug(
                    "Retrieved live jobs (deadJobs = %s)" % deadJobs)
            elif (int(deadJobs) == 1):
                jobs = self.tango.getJobs(-1)
                self.log.debug(
                    "Retrieved dead jobs (deadJobs = %s)" % deadJobs)
            result['jobs'] = list()
            for job in jobs:
                result['jobs'].append(self.convertTangoJobObj(job))

            return result
        else:
            self.log.info("Key not recognized: %s" % key)
            return self.status.wrong_key

    def pool(self, key, image):
        """ pool - Get information about a pool of VMs spawned from image
        """
        self.log.debug("Received pool request(%s, %s)" % (key, image))
        if self.validateKey(key):
            if not image or image == "" or not image.endswith(".img"):
                self.log.info("Invalid image name")
                return self.status.invalid_image
            image = image[:-4]
            info = self.preallocator.getPool(image)
            if len(info["pool"]) == 0:
                self.log.info("Pool image not found: %s" % image)
                return self.status.pool_not_found
            self.log.info("Pool image found: %s" % image)
            result = self.status.obtained_pool
            result["total"] = info["pool"]
            result["free"] = info["free"]
            return result
        else:
            self.log.info("Key not recognized: %s" % key)
            return self.status.wrong_key

    def prealloc(self, key, image, num, vmStr):
        """ prealloc - Create a pool of num instances spawned from image
        """
        self.log.debug("Received prealloc request(%s, %s, %s)" %
                       (key, image, num))
        if self.validateKey(key):
            if not image or image == "" or not image.endswith(".img"):
                self.log.info("Invalid image name")
                return self.status.invalid_image
            if vmStr != "":
                vmObj = json.loads(vmStr)
                vm = self.createTangoMachine(image, vmObj=vmObj)
            else:
                vm = self.createTangoMachine(image)
            success = self.tango.preallocVM(vm, int(num))
            if (success == -1):
                self.log.info("Failed to preallocated VMs")
                return self.status.prealloc_failed
            self.log.info("Successfully preallocated VMs")
            return self.status.preallocated
        else:
            self.log.info("Key not recognized: %s" % key)
            return self.status.wrong_key

    def resetTango(self):
        """ Destroys VMs associated with this namespace. Used for admin
            purposes only.
        """
        self.log.debug("Received resetTango request.")
        self.tango.resetTango(self.vmms)