Example #1
0
    def getClusterAd(self):
        """
        _initSubmit_

        Return common cluster classad

        scriptFile & Output/Error/Log filenames shortened to
        avoid condorg submission errors from >256 chars paths

        """
        ad = classad.ClassAd()

        #ad['universe'] = "vanilla"
        ad['Requirements'] = classad.ExprTree(self.reqStr)
        ad['ShouldTransferFiles'] = "YES"
        ad['WhenToTransferOutput'] = "ON_EXIT"
        ad['UserLogUseXML'] = True
        ad['JobNotification'] = 0
        ad['Cmd'] = self.scriptFile
        ad['Out'] = classad.ExprTree('strcat("condor.", ClusterId, ".", ProcId, ".out")')
        ad['Err'] = classad.ExprTree('strcat("condor.", ClusterId, ".", ProcId, ".err")')
        ad['UserLog'] = classad.ExprTree('strcat("condor.", ClusterId, ".", ProcId, ".log")')

        ad['WMAgent_AgentName'] = self.agent

        ad['JOBGLIDEIN_CMSSite'] = classad.ExprTree('isUndefined(GLIDEIN_CMSSite) ? Unknown : GLIDEIN_CMSSite')

        ad['JobLeaseDuration'] = classad.ExprTree('isUndefined(MachineAttrMaxHibernateTime0) ? 1200 : MachineAttrMaxHibernateTime0')

        # Required for global pool accounting
        ad['AcctGroup'] = self.acctGroup
        ad['AcctGroupUser'] = self.acctGroupUser

        # Customized classAds for this plugin
        ad['DESIRED_Archs'] = "INTEL,X86_64"

        ad['x509userproxy'] = self.x509userproxy
        ad['x509userproxysubject'] = self.x509userproxysubject

        ad['Rank'] = 0.0
        ad['TransferIn'] = False

        # TODO: remove when 8.5.7 is deployed
        params_to_add = htcondor.param['SUBMIT_ATTRS'].split() + htcondor.param['SUBMIT_EXPRS'].split()
        params_to_skip = ['accounting_group', 'use_x509userproxy', 'PostJobPrio2', 'JobAdInformationAttrs']
        for param in params_to_add:
            if (param not in ad) and (param in htcondor.param) and (param not in params_to_skip):
                ad[param] = classad.ExprTree(htcondor.param[param])
        ad = convertFromUnicodeToStr(ad)
        return ad
Example #2
0
    def getProcAds(self, jobList):
        """
        _getProcAds_

        Return list of job specific classads for submission

        """
        classAds = []
        for job in jobList:
            ad = {}

            ad['Iwd'] = job['cache_dir']
            ad['TransferInput'] = "%s,%s/%s,%s" % (job['sandbox'], job['packageDir'],
                                                   'JobPackage.pkl', self.unpacker)
            ad['Arguments'] = "%s %i" % (os.path.basename(job['sandbox']), job['id'])

            ad['TransferOutput'] = "Report.%i.pkl" % job["retry_count"]

            sites = ','.join(sorted(job.get('possibleSites')))
            ad['DESIRED_Sites'] = sites

            sites = ','.join(sorted(job.get('potentialSites')))
            ad['ExtDESIRED_Sites'] = sites

            ad['WMAgent_RequestName'] = job['requestName']

            match = re.compile("^[a-zA-Z0-9_]+_([a-zA-Z0-9]+)-").match(job['requestName'])
            if match:
                ad['CMSGroups'] = match.groups()[0]
            else:
                ad['CMSGroups'] = classad.Value.Undefined

            ad['WMAgent_JobID'] = job['jobid']
            ad['WMAgent_SubTaskName'] = job['taskName']
            ad['CMS_JobType'] = job['taskType']

            # Handling for AWS, cloud and opportunistic resources
            ad['AllowOpportunistic'] = job.get('allowOpportunistic', False)

            if job.get('inputDataset'):
                ad['DESIRED_CMSDataset'] = job['inputDataset']
            else:
                ad['DESIRED_CMSDataset'] = classad.Value.Undefined

            if job.get('inputDatasetLocations'):
                sites = ','.join(sorted(job['inputDatasetLocations']))
                ad['DESIRED_CMSDataLocations'] = sites
            else:
                ad['DESIRED_CMSDataLocations'] = classad.Value.Undefined

            # HighIO and repack jobs
            ad['Requestioslots'] = 1 if job['taskType'] in ["Merge", "Cleanup", "LogCollect"] else 0
            ad['RequestRepackslots'] = 1 if job['taskType'] == 'Repack' else 0

            # Performance and resource estimates (including JDL magic tweaks)
            origCores = job.get('numberOfCores', 1)
            estimatedMins = int(job['estimatedJobTime'] / 60.0) if job.get('estimatedJobTime') else 12 * 60
            estimatedMinsSingleCore = estimatedMins * origCores
            # For now, assume a 15 minute job startup overhead -- condor will round this up further
            ad['EstimatedSingleCoreMins'] = estimatedMinsSingleCore
            ad['OriginalMaxWallTimeMins'] = estimatedMins
            ad['MaxWallTimeMins'] = classad.ExprTree('WMCore_ResizeJob ? (EstimatedSingleCoreMins/RequestCpus + 15) : OriginalMaxWallTimeMins')

            requestMemory = int(job['estimatedMemoryUsage']) if job.get('estimatedMemoryUsage', None) else 1000
            ad['OriginalMemory'] = requestMemory
            ad['ExtraMemory'] = self.extraMem
            ad['RequestMemory'] = classad.ExprTree('OriginalMemory + ExtraMemory * (WMCore_ResizeJob ? (RequestCpus-OriginalCpus) : 0)')

            requestDisk = int(job['estimatedDiskUsage']) if job.get('estimatedDiskUsage', None) else 20 * 1000 * 1000 * origCores
            ad['RequestDisk'] = requestDisk

            # Set up JDL for multithreaded jobs.
            # By default, RequestCpus will evaluate to whatever CPU request was in the workflow.
            # If the job is labelled as resizable, then the logic is more complex:
            # - If the job is running in a slot with N cores, this should evaluate to N
            # - If the job is being matched against a machine, match all available CPUs, provided
            # they are between min and max CPUs.
            # - Otherwise, just use the original CPU count.
            ad['MinCores'] = int(job.get('minCores', max(1, origCores/2)))
            ad['MaxCores'] = max(int(job.get('maxCores', origCores)), origCores)
            ad['OriginalCpus'] = origCores
            # Prefer slots that are closest to our MaxCores without going over.
            # If the slot size is _greater_ than our MaxCores, we prefer not to
            # use it - we might unnecessarily fragment the slot.
            ad['Rank'] = classad.ExprTree('isUndefined(Cpus) ? 0 : ifThenElse(Cpus > MaxCores, -Cpus, Cpus)')
            # Record the number of CPUs utilized at match time.  We'll use this later
            # for monitoring and accounting.  Defaults to 0; once matched, it'll
            # put an attribute in the job  MATCH_EXP_JOB_GLIDEIN_Cpus = 4
            ad['JOB_GLIDEIN_Cpus'] = "$$(Cpus:0)"
            # Make sure the resize request stays within MinCores and MaxCores.
            ad['RequestResizedCpus'] = classad.ExprTree('(Cpus>MaxCores) ? MaxCores : ((Cpus < MinCores) ? MinCores : Cpus)')
            # If the job is running, then we should report the matched CPUs in RequestCpus - but only if there are sane
            # values.  Otherwise, we just report the original CPU request
            ad['JobCpus'] = classad.ExprTree('((JobStatus =!= 1) && (JobStatus =!= 5) && !isUndefined(MATCH_EXP_JOB_GLIDEIN_Cpus) && (int(MATCH_EXP_JOB_GLIDEIN_Cpus) isnt error)) ? int(MATCH_EXP_JOB_GLIDEIN_Cpus) : OriginalCpus')

            # Cpus is taken from the machine ad - hence it is only defined when we are doing negotiation.
            # Otherwise, we use either the cores in the running job (if available) or the original cores.
            ad['RequestCpus'] = classad.ExprTree('WMCore_ResizeJob ? (!isUndefined(Cpus) ? RequestResizedCpus : JobCpus) : OriginalCpus')
            ad['WMCore_ResizeJob'] = bool(job.get('resizeJob', False))

            taskPriority = int(job.get('taskPriority', self.defaultTaskPriority))
            priority = int(job.get('priority', 0))
            ad['JobPrio'] = int(priority + taskPriority * self.maxTaskPriority)
            ad['PostJobPrio1'] = int(-1 * len(job.get('potentialSites', [])))
            ad['PostJobPrio2'] = int(-1 * job['taskID'])

            # Add OS requirements for jobs
            if job.get('scramArch') is not None and job.get('scramArch').startswith("slc6_"):
                ad['REQUIRED_OS'] = "rhel6"
            else:
                ad['REQUIRED_OS'] = "any"
            
            ad = convertFromUnicodeToStr(ad)
            condorAd = classad.ClassAd()
            for k, v in ad.iteritems():
                condorAd[k] = v
            classAds.append((condorAd, 1))

        return classAds
    def getProcAds(self, jobList):
        """
        _getProcAds_

        Return list of job specific classads for submission

        """
        classAds = []
        for job in jobList:
            ad = classad.ClassAd()

            ad['Iwd'] = job['cache_dir']
            ad['TransferInput'] = "%s,%s/%s,%s" % (
                job['sandbox'], job['packageDir'], 'JobPackage.pkl',
                self.unpacker)
            ad['Arguments'] = "%s %i" % (os.path.basename(
                job['sandbox']), job['id'])

            ad['TransferOutput'] = "Report.%i.pkl" % job["retry_count"]

            ad['JobMachineAttrs'] = "GLIDEIN_CMSSite"
            ad['JobAdInformationAttrs'] = "JobStatus,QDate,EnteredCurrentStatus,JobStartDate,DESIRED_Sites,ExtDESIRED_Sites,WMAgent_JobID,MATCH_EXP_JOBGLIDEIN_CMSSite"

            sites = ','.join(sorted(job.get('possibleSites')))
            ad['DESIRED_Sites'] = sites

            sites = ','.join(sorted(job.get('potentialSites')))
            ad['ExtDESIRED_Sites'] = sites

            ad['WMAgent_RequestName'] = job['requestName']

            match = re.compile("^[a-zA-Z0-9_]+_([a-zA-Z0-9]+)-").match(
                job['requestName'])
            if match:
                ad['CMSGroups'] = match.groups()[0]
            else:
                ad['CMSGroups'] = classad.Value.Undefined

            ad['WMAgent_JobID'] = job['jobid']
            ad['WMAgent_SubTaskName'] = job['taskName']
            ad['CMS_JobType'] = job['taskType']

            # Handling for AWS, cloud and opportunistic resources
            ad['AllowOpportunistic'] = job.get('allowOpportunistic', False)

            if job.get('inputDataset'):
                ad['DESIRED_CMSDataset'] = job['inputDataset']
            else:
                ad['DESIRED_CMSDataset'] = classad.Value.Undefined

            if job.get('inputDatasetLocations'):
                sites = ','.join(sorted(job['inputDatasetLocations']))
                ad['DESIRED_CMSDataLocations'] = sites
            else:
                ad['DESIRED_CMSDataLocations'] = classad.Value.Undefined

            # HighIO and repack jobs
            ad['Requestioslots'] = 1 if job['taskType'] in [
                "Merge", "Cleanup", "LogCollect"
            ] else 0
            ad['RequestRepackslots'] = 1 if job['taskType'] == 'Repack' else 0

            # Performance and resource estimates
            numberOfCores = job.get('numberOfCores', 1)
            ad['RequestCpus'] = numberOfCores
            ad['RequestMemory'] = int(job['estimatedMemoryUsage']) if job.get(
                'estimatedMemoryUsage', None) else 1000
            ad['RequestDisk'] = int(job['estimatedDiskUsage']) if job.get(
                'estimatedDiskUsage',
                None) else 20 * 1000 * 1000 * numberOfCores
            ad['MaxWallTimeMins'] = int(
                job['estimatedJobTime']) / 60.0 if job.get(
                    'estimatedJobTime', None) else 12 * 6

            taskPriority = job.get('taskPriority', self.defaultTaskPriority)
            try:
                taskPriority = int(taskPriority)
            except ValueError:
                logging.error("Job taskPriority %s not an int, using default",
                              taskPriority)
                taskPriority = self.defaultTaskPriority

            priority = job.get('priority', 0)
            try:
                priority = int(priority)
            except ValueError:
                logging.error("Job priority %s not an int, using 0", priority)
                priority = 0

            ad['JobPrio'] = int(taskPriority + priority * self.maxTaskPriority)

            postJobPrio1 = -1 * len(job.get('potentialSites', []))
            postJobPrio2 = -1 * job['taskID']

            ad['PostJobPrio1'] = int(postJobPrio1)
            ad['PostJobPrio2'] = int(postJobPrio2)

            # Add OS requirements for jobs
            if job.get('scramArch') is not None and job.get(
                    'scramArch').startswith("slc6_"):
                ad['REQUIRED_OS'] = "rhel6"
            else:
                ad['REQUIRED_OS'] = "any"

            ad = convertFromUnicodeToStr(ad)
            classAds.append((ad, 1))

        return classAds
    def getClusterAd(self):
        """
        _initSubmit_

        Return common cluster classad

        scriptFile & Output/Error/Log filenames shortened to
        avoid condorg submission errors from >256 chars paths

        """
        ad = classad.ClassAd()

        #ad['universe'] = "vanilla"
        ad['Requirements'] = classad.ExprTree(self.reqStr)
        ad['ShouldTransferFiles'] = "YES"
        ad['WhenToTransferOutput'] = "ON_EXIT"
        ad['UserLogUseXML'] = True
        ad['JobNotification'] = 0
        ad['Cmd'] = self.scriptFile
        ad['Out'] = classad.ExprTree(
            'strcat("condor.", ClusterId, ".", ProcId, ".out")')
        ad['Err'] = classad.ExprTree(
            'strcat("condor.", ClusterId, ".", ProcId, ".err")')
        ad['UserLog'] = classad.ExprTree(
            'strcat("condor.", ClusterId, ".", ProcId, ".log")')

        ad['WMAgent_AgentName'] = self.agent

        ad['JOBGLIDEIN_CMSSite'] = classad.ExprTree(
            'isUndefined(GLIDEIN_CMSSite) ? Unknown : GLIDEIN_CMSSite')

        ad['JobLeaseDuration'] = classad.ExprTree(
            'isUndefined(MachineAttrMaxHibernateTime0) ? 1200 : MachineAttrMaxHibernateTime0'
        )

        # Required for global pool accounting
        ad['AcctGroup'] = self.acctGroup
        ad['AcctGroupUser'] = self.acctGroupUser

        # Customized classAds for this plugin
        ad['DESIRED_Archs'] = "INTEL,X86_64"

        ad['x509userproxy'] = self.x509userproxy
        ad['x509userproxysubject'] = self.x509userproxysubject

        ad['Rank'] = 0.0
        ad['TransferIn'] = False

        # TODO: remove when 8.5.7 is deployed
        params_to_add = htcondor.param['SUBMIT_ATTRS'].split(
        ) + htcondor.param['SUBMIT_EXPRS'].split()
        params_to_skip = [
            'accounting_group', 'use_x509userproxy', 'PostJobPrio2',
            'JobAdInformationAttrs'
        ]
        for param in params_to_add:
            if (param not in ad) and (param in htcondor.param) and (
                    param not in params_to_skip):
                ad[param] = classad.ExprTree(htcondor.param[param])
        ad = convertFromUnicodeToStr(ad)
        return ad
Example #5
0
    def getProcAds(self, jobList):
        """
        _getProcAds_

        Return list of job specific classads for submission

        """
        classAds = []
        for job in jobList:
            ad = classad.ClassAd()

            ad['Iwd'] = job['cache_dir']
            ad['TransferInput'] = "%s,%s/%s,%s" % (job['sandbox'], job['packageDir'],
                                                   'JobPackage.pkl', self.unpacker)
            ad['Arguments'] = "%s %i" % (os.path.basename(job['sandbox']), job['id'])

            ad['TransferOutput'] = "Report.%i.pkl" % job["retry_count"]

            ad['JobMachineAttrs'] = "GLIDEIN_CMSSite"
            ad['JobAdInformationAttrs'] = "JobStatus,QDate,EnteredCurrentStatus,JobStartDate,DESIRED_Sites,ExtDESIRED_Sites,WMAgent_JobID,MATCH_EXP_JOBGLIDEIN_CMSSite"

            sites = ','.join(sorted(job.get('possibleSites')))
            ad['DESIRED_Sites'] = sites

            sites = ','.join(sorted(job.get('potentialSites')))
            ad['ExtDESIRED_Sites'] = sites

            ad['WMAgent_RequestName'] = job['requestName']

            match = re.compile("^[a-zA-Z0-9_]+_([a-zA-Z0-9]+)-").match(job['requestName'])
            if match:
                ad['CMSGroups'] = match.groups()[0]
            else:
                ad['CMSGroups'] = classad.Value.Undefined

            ad['WMAgent_JobID'] = job['jobid']
            ad['WMAgent_SubTaskName'] = job['taskName']
            ad['CMS_JobType'] = job['taskType']

            # Handling for AWS, cloud and opportunistic resources
            ad['AllowOpportunistic'] = job.get('allowOpportunistic', False)

            if job.get('inputDataset'):
                ad['DESIRED_CMSDataset'] = job['inputDataset']
            else:
                ad['DESIRED_CMSDataset'] = classad.Value.Undefined

            if job.get('inputDatasetLocations'):
                sites = ','.join(sorted(job['inputDatasetLocations']))
                ad['DESIRED_CMSDataLocations'] = sites
            else:
                ad['DESIRED_CMSDataLocations'] = classad.Value.Undefined

            # HighIO jobs
            ad['Requestioslots'] = int(job.get('highIOjob', False))

            # Performance and resource estimates
            numberOfCores = job.get('numberOfCores', 1)
            ad['RequestCpus'] = numberOfCores
            ad['RequestMemory'] = int(job['estimatedMemoryUsage']) if job.get('estimatedMemoryUsage', None) else 1000
            ad['RequestDisk'] = int(job['estimatedDiskUsage']) if job.get('estimatedDiskUsage', None) else 20*1000*1000*numberOfCores
            ad['MaxWallTimeMins'] = int(job['estimatedJobTime'])/60.0 if job.get('estimatedJobTime', None) else 12*6

            taskPriority = job.get('taskPriority', self.defaultTaskPriority)
            try:
                taskPriority = int(taskPriority)
            except ValueError:
                logging.error("Job taskPriority %s not an int, using default", taskPriority)
                taskPriority = self.defaultTaskPriority

            priority = job.get('priority', 0)
            try:
                priority = int(priority)
            except ValueError:
                logging.error("Job priority %s not an int, using 0", priority)
                priority = 0

            ad['JobPrio'] = int(taskPriority + priority * self.maxTaskPriority)

            postJobPrio1 = -1 * len(job.get('potentialSites', []))
            postJobPrio2 = -1 * job['taskID']

            ad['PostJobPrio1'] = int(postJobPrio1)
            ad['PostJobPrio2'] = int(postJobPrio2)

            # Add OS requirements for jobs
            if job.get('scramArch') is not None and job.get('scramArch').startswith("slc6_"):
                ad['REQUIRED_OS'] = "rhel6"
            else:
                ad['REQUIRED_OS'] = "any"
            
            ad = convertFromUnicodeToStr(ad)
            classAds.append((ad,1))

        return classAds