Example #1
    def kill(self, obj):
        Kill the job instance
        if type(obj) == Job:
            jobList = [obj]
        elif type(obj) == Task:
            jobList = obj.jobs
            raise SchedulerError('wrong argument type', str(type(obj)))

        jobsFile, arcId2job = self.createJobsFile(jobList, "Will kill")

        cmd = self.pre_arcCmd + "arckill -i " + jobsFile.name
        output, stat = self.ExecuteCommand(cmd)
        if stat != 0:
            raise SchedulerError('arckill returned %i' % stat, output, cmd)

        for line in output.split('\n'):
            # If a job URL ("arcId") occurs on a line of output, it tends
            # to be en error message:
            errorMatch = re.match(".*: *(gsiftp://[a-zA-Z0-9.-]+\S*/\w*)",
            if errorMatch:
                arcId = errorMatch.group(1)
                job = arcId2job[arcId]
                job.runningJob.errors.append("Killing job %s failed: %s" %
                                             (job['name'], line))
Example #2
    def getOutput(self, obj, outdir=''):
        Retrieve (move) job output from cache directory to outdir
        User files from CondorG appear asynchronously in the cache directory

        if type(obj) == RunningJob:  # The object passed is a RunningJob
            raise SchedulerError(
                'Operation not possible',
                'CondorG cannot retrieve files when passed RunningJob')
        elif type(obj) == Job:  # The object passed is a Job

            # check for the RunningJob integrity
            if not self.valid(obj.runningJob):
                raise SchedulerError('invalid object', str(obj.runningJob))

            # retrieve output
            self.getCondorOutput(obj, outdir)

        # the object passed is a Task
        elif type(obj) == Task:

            if outdir == '':
                outdir = obj['outputDirectory']

            for job in obj.jobs:
                if self.valid(job.runningJob):
                    self.getCondorOutput(job, outdir)

        # unknown object type
            raise SchedulerError('wrong argument type', str(type(obj)))
Example #3
    def kill(self, obj):
        kill job

        jobsToKill = []

        # the object passed is a job
        if type(obj) == Job and self.valid(obj.runningJob):

            # check for the RunningJob integrity
            schedIdList = str(obj.runningJob['schedulerId']).strip()

            command = "glite-wms-job-cancel --json --noint " + schedIdList

            out, ret = self.ExecuteCommand(self.proxyString + command)

            if ret != 0:
                raise SchedulerError('error executing glite-wms-job-cancel',
            elif ret == 0:
                    ## try to see if we got a real json
                    result = eval(out)
                except SyntaxError, ex:
                    ## not possible to evaluate json - try as string
                    if out.find("result: success") == -1:
                        raise SchedulerError('error', out)
                    ## if was a json...
                    if 'result' in result:
                        if not result['result'] == "success":
                            raise SchedulerError('error', result)
                        raise SchedulerError('Missing result', result)
Example #4
    def checkUserProxy(self):
        Retrieve the user proxy for the task
        If the proxy is valid pass, otherwise raise an axception

        if self.validProxy is not None:
            return self.validProxy

        command = 'voms-proxy-info'

        if self.cert != '':
            command += ' --file ' + self.cert

        output, ret = self.ExecuteCommand(command)

            output = output.split("timeleft  :")[1].strip()
        except IndexError:
            self.validProxy = False
            raise SchedulerError("Missing Proxy", output.strip())

        if output == "0:00:00":
            self.validProxy = False
            raise SchedulerError("Proxy Expired", output.strip())

        self.validProxy = True
        return self.validProxy
Example #5
    def purgeService(self, obj):
        purge the service used by the scheduler from job files
        not available for every scheduler

        # check the proxy

        # perform action
        timestamp = int(time.time())

        # the object passed is a runningJob
        if type(obj) == RunningJob and self.schedObj.valid(obj):
            obj['status'] = 'E'
            obj['closed'] = 'Y'
            obj['getOutputTime'] = timestamp
            obj['statusScheduler'] = "Cleared"

        # the object passed is a job
        elif type(obj) == Job and self.schedObj.valid(obj.runningJob):
            obj.runningJob['status'] = 'E'
            obj.runningJob['closed'] = 'Y'
            obj.runningJob['getOutputTime'] = timestamp
            obj.runningJob['statusScheduler'] = "Cleared"

        # the object passed is a Task
        elif type(obj) == Task:

            # error messages collector
            errors = ''

            # update objects
            for job in obj.jobs:

                # skip jobs not requested for action
                if not self.schedObj.valid(job.runningJob):

                # evaluate errors: if not, update
                if job.runningJob.isError():
                    errors += str(job.runningJob.errors)
                    job.runningJob['status'] = 'E'
                    job.runningJob['closed'] = 'Y'
                    job.runningJob['getOutputTime'] = timestamp
                    job.runningJob['statusScheduler'] = "Cleared"

            # handle errors
            if errors != '':
                raise SchedulerError('interaction failed for some jobs', \
                                     errors )

        # unknown object type
            raise SchedulerError('wrong argument type', str(type(obj)))
Example #6
    def __init__(self, **args):

        # call super class init method
        super(SchedulerGLite, self).__init__(**args)

        # some initializations
        self.warnings = []

        # typical options
        self.vo = args.get("vo", "cms")
        self.service = args.get("service", "")
        self.config = args.get("config", "")
        self.delegationId = args.get("proxyname", "bossproxy")

        # rename output files with submission number
        self.renameOutputFiles = args.get("renameOutputFiles", 0)
        self.renameOutputFiles = int(self.renameOutputFiles)

        # x509 string & hackEnv for CLI commands
        if self.cert != '':
            self.proxyString = "env X509_USER_PROXY=" + self.cert + ' '
            self.hackEnv = hackTheEnv()
            self.proxyString = ''
            self.hackEnv = hackTheEnv('env')

        # this section requires an improvement....
        if os.environ.get('CRABDIR'):
            self.commandQueryPath = os.environ.get('CRABDIR') + \
        elif os.environ.get('PRODCOMMON_ROOT'):
            self.commandQueryPath = os.environ.get('PRODCOMMON_ROOT') + \
            # Impossible to locate GLiteQueryStatus.py ...
            raise SchedulerError('Impossible to locate GLiteQueryStatus.py ')

        # cache pattern to optimize reg-exp substitution
        self.pathPattern = re.compile('location:([\S]+)$', re.M)
        self.patternCE = re.compile('(?<= - ).*(?=:)', re.M)

        # init BossliteJsonDecoder specialized class
        self.myJSONDecoder = BossliteJsonDecoder()

        # Raise an error if UI is old than 3.2 ...
        version, ret = self.ExecuteCommand('glite-version')
        version = version.strip()
        if version.find('3.2') != 0:
            version1, ret1 = self.ExecuteCommand(
                'glite-version -n glite-UI -v')
            version1 = version1.strip()
            if version1.find('3.2') != 0:
                raise SchedulerError('SchedulerGLite is allowed on UI >3.2')

        # job killed per CLI call (tunable value)
        self.killThreshold = 100
Example #7
    def getOutput(self, obj, outdir):
        retrieve output or just put it in the destination directory

        # check the proxy

        # perform action
        self.schedObj.getOutput(obj, outdir)
        timestamp = int(time.time())

        # the object passed is a runningJob
        if type(obj) == RunningJob and self.schedObj.valid(obj):
            obj['status'] = 'E'
            obj['closed'] = 'Y'
            obj['getOutputTime'] = timestamp
            obj['statusScheduler'] = "Retrieved"

        # the object passed is a job
        elif type(obj) == Job and self.schedObj.valid(obj.runningJob):
            obj.runningJob['status'] = 'E'
            obj.runningJob['closed'] = 'Y'
            obj.runningJob['getOutputTime'] = timestamp
            obj.runningJob['statusScheduler'] = "Retrieved"

        # the object passed is a Task
        elif type(obj) == Task:

            # error messages collector
            errors = ''

            # update objects
            for job in obj.jobs:

                # skip jobs not requested for action
                if not self.schedObj.valid(job.runningJob):

                # evaluate errors: if not, update
                if job.runningJob.isError():
                    errors += str(job.runningJob.errors)
                    job.runningJob['status'] = 'E'
                    job.runningJob['closed'] = 'Y'
                    job.runningJob['getOutputTime'] = timestamp
                    job.runningJob['statusScheduler'] = "Retrieved"

            # handle errors
            if errors != '':
                raise SchedulerError('interaction failed for some jobs', \
                                     errors )

        # unknown object type
            raise SchedulerError('wrong argument type', str(type(obj)))
Example #8
    def postMortem(self, obj, schedulerId, outfile, service):
        Get detailed postMortem job info

        if not type(obj) == Task:
            raise SchedulerError('Wrong argument type or object type',
                                 str(type(obj)) + ' ' + str(objType))

        if not outfile:
            raise SchedulerError('Empty filename',
                                 'postMortem called with empty logfile name')

        taskId = obj['name']
        condorId = schedulerId.split('//')[-1]
        header = '========= LOGGING INFO FOR %s =========\n' % schedulerId
        horsep = '\n' + 80 * '=' + '\n'
        sep1 = '\n========= OUTPUT OF : Condor_history -match 1 -l %s =========\n' % condorId
        sep2 = '\n========= OUTPUT OF : Condor_q -l  %s =========\n' % condorId


        fp = open(outfile, 'w')

        command = '%s %s %s %s ' \
                  % (self.unsetenvScram, self.remoteCommand, self.gsisshOptions, self.remoteUserHost)
        command += ' "condor_history -match 1 -userlog %s/condor.log -l %s"' % \
                   (taskId, condorId)
        (status, output) = commands.getstatusoutput(command)
        if (status):
            if "already exists" in output:


        # the following condor_q only makes sense if job status
        # is 1(Idle), 2(Run) or 5(Held) but may cost little to do always

        command = '%s %s %s %s ' \
                  % (self.unsetenvScram, self.remoteCommand, self.gsisshOptions, self.remoteUserHost)
        command += ' "condor_q -l %s"' % condorId
        (status, output) = commands.getstatusoutput(command)


Example #9
    def getOutput(self, obj, outdir=''):
        Retrieve (move) job output from cache directory to outdir

        filesToGet = []
        if type(obj) == RunningJob:  # The object passed is a RunningJob
            raise SchedulerError(
                'Operation not possible',
                'Condor cannot retrieve files when passed RunningJob')
        elif type(obj) == Job:  # The object passed is a Job
            # check for the RunningJob integrity
            if not self.valid(obj.runningJob):
                raise SchedulerError('Invalid object', \
                                str( obj.runningJob ))
            filesToGet = obj['outputFiles']

        # the object passed is a Task
        elif type(obj) == Task:
            taskId = obj['name']
            self.taskId = taskId
            if outdir == '':
                outdir = obj['outputDirectory']
            for job in obj.jobs:
                if self.valid(job.runningJob):

        # unknown object type
            raise SchedulerError('Wrong argument type', str(type(obj)))

        retval, stdout = \
            self.rsyncFromRemoteHost(self.remoteUserHost, outdir, filesToGet )

        if retval:
            # uh oh
            if type(obj) == Job:
                jobList = [obj]
                jobList = obj.jobs
            for job in jobList:
                for fileName in job['outputFiles']:
                    if not os.access(outdir + '/' + fileName, os.F_OK):
                        msg = "Could not retrieve file %s." % fileName
                        msg += " Rsync failed with status,output=\n%d\n%s" % \

            if "already exists" in stdout:
Example #10
    def query(self, obj, service='', objType='node'):
        query status and eventually other scheduler related information
        It may use single 'node' scheduler id or bulk id for association
        if type(obj) != Task:
            raise SchedulerError('wrong argument type', str(type(obj)))

        jobids = []
        for job in obj.jobs:
            if not self.valid(job.runningJob): continue
            id = str(job.runningJob['schedulerId']).strip()
            #p = subprocess.Popen( ['qstat', '-x', id], stdout=subprocess.PIPE,
            p = subprocess.Popen([
                'squeue', '-h', '-o',
                '-j', id
            qstat_output, \
                qstat_error = p.communicate()
            qstat_return = p.returncode

            if qstat_return:
                #if qstat_return != 153: # 153 means the job isn't there
                if qstat_return != 1:  # 153 means the job isn't there
                    self.logging.error('Error in job query for ' + id)
                    self.logging.error('SLURM stdout: \n %s' % qstat_output)
                    self.logging.error('SLURM stderr: \n %s' % qstat_error)
                    raise SchedulerError(
                        'SLURM error', '%s: %s' % (qstat_error, qstat_return))

            host = ''
            if len(qstat_output) == 0:
                pbs_stat = 'Done'
                if qstat_output.find('</exec_host>') >= 0:
                    host = qstat_output[qstat_output.find('<exec_host>') +
                if qstat_output.find('</job_state>') >= 0:
                    pbs_stat = qstat_output[qstat_output.find('<job_state>') +

            job.runningJob['statusScheduler'] = pbs_stat
            job.runningJob['status'] = self.status_map[pbs_stat]
            job.runningJob['destination'] = host
Example #11
    def kill(self, obj):
        kill the job instance

        does not return
        r = re.compile("Job <(\d+)> is being terminated")
        rFinished = re.compile("Job <(\d+)>: Job has already finished")
        # for jobid in schedIdList:
        for job in obj.jobs:
            if not self.valid(job.runningJob):
            jobid = str(job.runningJob['schedulerId']).strip()
            command = 'bkill ' + str(jobid)
            if self.ksuCmd:
                # write a ksu tmpFile
                cmd = '%s\n' % command
                command, fname = self.createCommand(cmd, obj)

            out, ret = self.executeCommandWrapper(command)

            if self.ksuCmd: os.unlink(fname)
            mFailed = rFinished.search(out)
            if mFailed:
                raise SchedulerError(
                    "Unable to kill job " + jobid + " . Reason: ", out,
Example #12
    def kill(self, obj):
        kill the job instance

        does not return
        r = re.compile("has registered the job (\d+) for deletion")
        rFinished = re.compile("Job <(\d+)>: Job has already finished")
        r2 = re.compile("has deleted job (\d+)")  #by Leo
        # for jobid in schedIdList:
        for job in obj.jobs:
            if not self.valid(job.runningJob):
            jobid = str(job.runningJob['schedulerId']).strip()
            cmd = 'qdel ' + str(jobid)
            out, ret = self.ExecuteCommand(cmd)
            #print "kill:"+out
            mKilled = r.search(out)
            mKilled2 = r2.search(out)

            if not mKilled and not mKilled2:
                raise SchedulerError(
                    "Unable to kill job #" + str(job['jobId']) + " (SGE id:" +
                    jobid + ") . Reason: ", out)
Example #13
    def postMortem(self, obj, outfile):
        execute any post mortem command such as logging-info

        # check the proxy

        # the object passed is a runningJob
        if type(obj) == RunningJob:
            self.schedObj.postMortem(obj, obj['schedulerId'], outfile,

        # the object passed is a job
        elif type(obj) == Job:
            self.schedObj.postMortem( obj, obj.runningJob['schedulerId'], \
                                      outfile, self.parameters['service']

        # the object passed is a Task
        elif type(obj) == Task:
            for job in obj.jobs:
                if job.runningJob is None:
                self.schedObj.postMortem( obj, job.runningJob['schedulerId'], \
                                          outfile, self.parameters['service'] )

        # unknown object type
            raise SchedulerError('wrong argument type', str(type(obj)))
Example #14
    def getOutput(self, obj, outdir=''):
        retrieve output or just put it in the destination directory

        # obj can be a task, a job or even a running job
        # several possibilities:
        # 1) connect to a service and perform a remote copy
        # 2) just eventually copy the local output to the destination dir
        # 3) wrap a CLI command like glite-wms-job-output

        errorList = []

        if outdir == '' and obj['outputDirectory'] is not None:
            outdir = obj['outputDirectory']

        if outdir != '' and not os.path.exists(outdir):
            raise SchedulerError( 'Permission denied', \
                                  'Unable to write files in ' + outdir )

        # retrieve scheduler id list
        schedIdList = {}
        for job in obj.jobs:
            if self.valid(job.runningJob):
                # retrieve output
                # if error: job.runningJob.errors.append( error )
Example #15
    def getOutput(self, obj, outdir):
        retrieve output or just put it in the destination directory

        does not return
        #output ends up in the wrong location with a user defined
        #output directory...Thus we have to move it to the correct
        #directory here....
        #print "SchedulerSGE:getOutput called!"

        if type(obj) == Task:
            #           oldoutdir=obj[ 'outputDirectory' ]
            oldoutdir = obj[
                'outputDirectory'] + '/temp'  ## copy new output  files from temp"
            if (outdir != oldoutdir):
                for job in obj.jobs:
                    jobid = job['id']
                    #print "job:"+str(jobid)
                    if self.valid(job.runningJob):
                        #print "is valid"
                        for outFile in job['outputFiles']:
                            #print "outputFile:"+outFile
                            command = "mv " + oldoutdir + "/" + outFile + " " + outdir + "/. \n"
                            #print command
                            out, ret = self.ExecuteCommand(command)
                            if (out != ""):
                                raise SchedulerError('unable to move file',
                                #raise SchedulerError("unable to move file "+oldoutdir+"/"+outFile+" ",out)
Example #16
    def query(self, obj, objType='node'):
        query status and eventually other scheduler related information

        # check the proxy

        # error messages collector
        errors = ''

        # delegate query to scheduler plugin
        self.schedObj.query(obj, self.parameters['service'], objType)

        # handle errors
        for job in obj.jobs:

            # evaluate errors:
            if job.runningJob.isError():
                errors += str(job.runningJob.errors)

        # handle errors
        if errors != '':
            raise SchedulerError('interaction failed for some jobs', errors)
Example #17
class Scheduler(object):
    Upper layer for scheduler interaction

    def __init__(self, scheduler, parameters=None):

        # define scheduler parameters
        self.scheduler = scheduler
        defaults = {'user_proxy': '', 'service': '', 'config': ''}
        if parameters is not None:
        self.parameters = defaults

        # load scheduler plugin
            module =  __import__(
                'ProdCommon.BossLite.Scheduler.' + self.scheduler, \
                globals(), locals(), [self.scheduler]
            schedClass = vars(module)[self.scheduler]
            self.schedObj = schedClass(**self.parameters)
        except KeyError, e:
            msg = 'Scheduler interface' + self.scheduler + 'not found'
            raise SchedulerError(msg, str(e))
        except Exception, e:
            raise SchedulerError(e.__class__.__name__, str(e))
Example #18
    def submitJob(self, job, task=None, requirements=''):
        """Need to copy the inputsandbox to WN before submitting a job"""

        arg = self.decode(job, task, requirements)

        command = "qsub " + arg
        out, ret = self.ExecuteCommand(command)
        self.logging.debug("crab:  %s" % out)
        r = re.compile("Your job (\d+) .* has been submitted")

        m = r.search(out)
        if m is not None:
            jobId = m.group(1)
            command = "qstat -j " + jobId
            #out, ret = self.ExecuteCommand(command)
            #print "out:" + out + "\n"
            #queue = m.group(2)
            queue = "all"
            #rNot = re.compile("Job not submitted.*<(\w+)>")
            #m= rNot.search(out)
            #if m is not None:
            #    print m
            #    print "Job NOT submitted"
            #    print out
            raise SchedulerError('error', out)
        taskId = None
        #print "Your job identifier is: ", taskId, queue
        map = {job['name']: jobId}
        return map, taskId, queue
Example #19
 def pbs_conn(self):
     conn = pbs.pbs_connect(pbs.pbs_default())
     if (conn < 0):
         err, err_text = pbs.error()
         self.logging.error('Error in PBS server conncet')
         self.logging.error('PBS error code ' + str(err) + ': ' + err_text)
         raise SchedulerError('PBS error', str(err) + ': ' + err_text)
     return conn
Example #20
    def getOutput(self, obj, outdir=''):
        Get output files from jobs in 'obj' and put them in 'outdir', and  
        remove the job from the CE.
        if type(obj) == Task:
            joblist = obj.jobs
            if outdir == '':
                outdir = obj['outputDirectory']
        elif type(obj) == Job:
            joblist = [obj]
            raise SchedulerError('wrong argument type', str(type(obj)))

        assert outdir != ''
        if outdir[-1] != '/': outdir += '/'

        jobsFile, arcId2job = self.createJobsFile(joblist, "Will fetch")

        # Create a tmp dir where ngget can create its subdirs of job
        # output. Use outdir as the parent dir, to keep moving of files
        # afterwards within the same files system (faster!)
        tmpdir = tempfile.mkdtemp(prefix="joboutputs.", dir=outdir)

        cmd = self.pre_arcCmd + 'arcget -i %s -dir %s' % (jobsFile.name,
        self.logging.debug("Running command: %s" % cmd)
        output, stat = self.ExecuteCommand(cmd)
        self.logging.debug("Output of arcget: %s" % output)
        if stat != 0:
            raise SchedulerError('arcget returned %i' % stat, output, cmd)

        # Copy the dowlodaed files to their final destination
        cmd = 'mv %s/*/* %s' % (tmpdir, outdir)
        self.logging.debug("Moving files from %s/* to %s" % (tmpdir, outdir))
        output, stat = self.ExecuteCommand(cmd)
        if stat != 0:
            raise SchedulerError('mv returned %i' % stat, output, cmd)

        # Remove the tmp output dir
        cmd = 'rm -r %s' % tmpdir
        output, stat = self.ExecuteCommand(cmd)
        if stat != 0:
            raise SchedulerError('rm returned %i' % stat, output, cmd)
Example #21
    def submitJob(self, job, task=None, requirements=''):
        """ Need to copy the inputsandbox to WN before submitting a job"""

        arg = self.decode(job, task, requirements)

        # command = "bsub " + arg
        chDir = "pushd . > /dev/null ; "
        resetDir = " ; popd > /dev/null"
        command = " bsub " + arg + resetDir

        if self.ksuCmd:
            chDir += "cd /tmp; "
            cmd = "#!/usr/bin/pagsh.krb\n"
            cmd += "aklog\n"
            cmd += '%s %s\n' % (chDir, command)
            command, fname = self.createCommand(cmd, task)
            # execute bsub in the directory where files have be returned
            chDir += " cd %s ;" % task['outputDirectory']
            command = '%s %s' % (chDir, command)
        out, ret = self.executeCommandWrapper(command)

        if self.ksuCmd: os.unlink(fname)
        if ret != 0:
            raise SchedulerError('Error in submit', out, command)
        r = re.compile("Job <(\d+)> is submitted.*<(\w+)>")

        m = r.search(out)
        if m is not None:
            jobId = m.group(1)
            queue = m.group(2)
            rNot = re.compile("Job not submitted.*<(\w+)>")
            m = rNot.search(out)
            if m is not None:
                self.logging.error("Job NOT submitted")
            raise SchedulerError('Cannot submit ', out, command)
        taskId = None
        #print "Your job identifier is: ", taskId, queue
        map = {job['name']: jobId}
        return map, taskId, queue
Example #22
    def query(self, obj, service='', objType='node'):
        query status and eventually other scheduler related information
        It may use single 'node' scheduler id or bulk id for association
        if type(obj) != Task:
            raise SchedulerError('wrong argument type', str(type(obj)))

        jobids = []

        conn = self.pbs_conn()
        attrl = pbs.new_attrl(2)
        attrl[0].name = 'job_state'
        attrl[1].name = 'exec_host'

        for job in obj.jobs:
            if not self.valid(job.runningJob): continue
            id = str(job.runningJob['schedulerId']).strip()
            jobstat = pbs.pbs_statjob(conn, id, attrl, 'Null')

            if not jobstat:
                err, err_text = pbs.error()
                if err != 15001:  # unknown job (probably finished)
                    self.logging.error('Error in job query for ' + id)
                    self.logging.error('PBS error code ' + str(err) + ': ' +
                    raise SchedulerError('PBS error',
                                         str(err) + ': ' + err_text)

            host = ''
            if len(jobstat) == 0:
                pbs_stat = 'Done'
                pbs_stat = jobstat[0].attribs[0].value
                if len(jobstat[0].attribs) > 1:
                    host = jobstat[0].attribs[1].value
            job.runningJob['statusScheduler'] = pbs_stat
            job.runningJob['status'] = self.status_map[pbs_stat]
            job.runningJob['destination'] = host

Example #23
    def purgeService(self, obj):
        Purge job (even bulk) from service

        # not always available...
        # it may be useful to connect to a remote service and purge job sandbox

        out = "whatever"
        if out.find('error') >= 0:
            raise SchedulerError("Unable to purge job", out)
Example #24
    def __init__(self, **args):
        super(SchedulerPbs, self).__init__(**args)
        self.jobScriptDir = args['jobScriptDir']
        self.jobResDir = args['jobResDir']
        self.queue = args['queue']
        self.workerNodeWorkDir = args.get('workDir', '')

        self.res_dict = {}
        for a in args['resources'].split(','):
            if len(a) > 0:
                if a.find("=") != -1:
                    res, val = a.split('=')
                    self.res_dict.update({res: val})
                    raise SchedulerError("PBS error", +\
                                         "Unkown resource format: " + a)

        env = []
        for v in ('HOME', 'LANG', 'LOGNAME', 'MAIL', 'PATH', 'SHELL'):
            env.append('PBS_O_' + v + '=' + os.environ[v])

        env.append('PBS_O_WORKDIR=' + os.getcwd())
        env.append('PBS_O_HOST=' + pbs.pbs_default())
        #if 'use_proxy' in args:
        #     if args['use_proxy'] == 1:
        #         proxy_location = ''
        #         try:
        #             proxy_location = os.environ['X509_USER_PROXY']
        #         except:
        #             proxy_location = '/tmp/x509up_u'+ repr(os.getuid())

        #         msg, ret = self.ExecuteCommand('cp ' + proxy_location + " " + self.cert)
        ##          proxy_path = self.getUserProxy()
        #         env.append('X509_USER_PROXY=' + self.cert)
        #         env.append('X509_USER_CERT=' + self.cert)
        #         env.append('X509_USER_KEY=' + self.cert)
        #     else:
        #         raise SchedulerError(str(args), self.cert)

        self.pbs_env = ','.join(env)

        self.status_map = {
            'E': 'R',
            'H': 'SS',
            'Q': 'SS',
            'R': 'R',
            'S': 'R',
            'T': 'R',
            'W': 'SS',
            'Done': 'SD',
            'C': 'SD'
Example #25
    def matchResources(self, obj, requirements='', config='', service=''):
        resources list match

        # several possibilities:
        # 1) connect to a service and ask
        # 2) wrap a CLI command like glite-wms-job-listmatch
        # 3) nor available... skip
        # 4) there is a useful lcgInfo...

        out = "whatever"
        if out.find('error') >= 0:
            raise SchedulerError("Unable to find resources", out)
Example #26
    def getOutput(self, obj, outdir=''):
        Get output files from jobs in 'obj' and put them in 'outdir', and  
        remove the job from the CE.
        if type(obj) == Task:
            self.logging.debug("getOutput called for %i jobs" % len(obj.jobs))
            joblist = obj.jobs
            if outdir == '':
                outdir = obj['outputDirectory']
        elif type(obj) == Job:
            self.logging.debug("getOutput called for 1 job")
            joblist = [obj]
            raise SchedulerError('wrong argument type', str(type(obj)))

        assert outdir != ''
        if outdir[-1] != '/': outdir += '/'

        for job in joblist:
            tmpdir = tempfile.mkdtemp(prefix="joboutputs.", dir=outdir)

            cmd = self.pre_arcCmd + 'arcget --timeout=600 %s --dir %s' % (
                job.runningJob['schedulerId'], tmpdir)
            self.logging.debug("Running command: %s" % cmd)
            output, stat = self.ExecuteCommand(cmd)
            self.logging.debug("Status and output of arcget: %i, '%s'" %
                               (stat, output))
            if stat != 0:
                msg = "arcget failed with status %i: %s" % (stat, output)
                # Copy the dowlodaed files to their final destination
                cmd = 'mv %s/*/* %s' % (tmpdir, outdir)
                self.logging.debug("Moving files from %s/* to %s" %
                                   (tmpdir, outdir))
                output, stat = self.ExecuteCommand(cmd)
                if stat != 0:
                    msg = "Moving files to final destination failed: %s" % (
                    cmd = ' rm -r %s' % (tmpdir)
                    self.logging.debug("Removing tempdir %s" % (tmpdir))
                    output, stat = self.ExecuteCommand(cmd)
                    if stat != 0:
                        msg = "Removing tempdir: %s" % (output)
Example #27
    def kill(self, obj):

        for job in obj.jobs :
            if not self.valid( job.runningJob ): continue

            p = subprocess.Popen( ['qdel', id], stdout=subprocess.PIPE,
            qdel_output, \
                qdel_error = p.communicate()
            qdel_return    = p.returncode

            if qdel_return != 0:
                self.logging.error('Error in job kill for '+id)
                self.logging.error('PBS Error stdout: %s' % qdel_output)
                raise SchedulerError('PBS Error in kill', qdel_output)                  
Example #28
    def submit(self, task, requirements='', config='', service=''):
        user submission function
        takes as arguments:
        - a finite, dedicated jdl
        - eventually a list of services to connect
        - eventually a config file

        the passed config file or, if not provided, a default one can be
        used from eventual defaults

        the function returns an eventual parent id, the service of the
        successfully submission and a map associating the jobname to the
        node id. If the submission is not bulk, the parent id should be the
        node id of the unique entry of the map

        taskId = None
        queue = None
        retMap = {}

        for job in task.jobs:
            command = self.decodeJob(job, task, requirements)
            out, ret = self.ExecuteCommand(command)
            if ret != 0:
                raise SchedulerError('Error in submit', out, command)

            r = re.compile("Job <(\d+)> is submitted.*<(\w+)>")

            m = r.search(out)
            if m is not None:
                jobId = m.group(1)
                queue = m.group(2)
                retMap[job['name']] = jobId
                rNot = re.compile("Job not submitted.*<(\w+)>")
                m = rNot.search(out)
                if m is not None:
                    self.logging.error("Job NOT submitted: %s" % out)
                    job.runningJob.errors.append('Cannot submit using %s: %s' %
                                                 (out, command))

        return retMap, taskId, queue
Example #29
    def query(self, obj, service='', objType='node'):
        query status and eventually other scheduler related information

        # ask for the job informations, mainly status
        # some systems allow a query job per job, others also bulk queries

        #print schedIdList, service, objType
        r = re.compile("(\d+)\s+\w+\s+(\w+).*")
        rfull = re.compile("(\d+)\s+\w+\s+(\w+)\s+(\w+)\s+\w+\s+(\w+).*")
        rnotfound = re.compile("Job <(\d+)> is not found")
        for job in obj.jobs:

            if not self.valid(job.runningJob):

            jobid = str(job.runningJob['schedulerId']).strip()
            command = 'bjobs ' + str(jobid)
            out, ret = self.ExecuteCommand(command)
            if ret != 0:
                raise SchedulerError('Error in status query', out, command)

            mnotfound = rnotfound.search(out)
            queue = None
            host = None
            sid = None
            st = None
            if (mnotfound):
                sid = mnotfound.group(1)
                st = 'DONE'
                mfull = rfull.search(out)
                if (mfull):
                    sid, st, queue, host = mfull.groups()
                    m = r.search(out)
                    if (m):
                        sid, st = m.groups()

            if (st):
                job.runningJob['statusScheduler'] = st
                job.runningJob['status'] = self.statusMap[st]
            if (host):
                job.runningJob['destination'] = host
Example #30
    def kill(self, obj):

        conn = self.pbs_conn()

        for job in obj.jobs:
            if not self.valid(job.runningJob): continue
            id = str(job.runningJob['schedulerId']).strip()
            res = pbs.pbs_deljob(conn, id, '')

            if res != 0:
                err, err_text = pbs.error()
                self.logging.error('Error in job kill for ' + id)
                self.logging.error('PBS error code ' + str(err) + ': ' +
                raise SchedulerError('PBS error', str(err) + ': ' + err_text)
