def insertJobInQueue(self, job, classAdJob): """ Check individual job and add to the Task Queue eventually. """ jobReq = classAdJob.get_expression("JobRequirements") classAdJobReq = ClassAd(jobReq) jobReqDict = {} for name in self.taskQueueDB.getSingleValueTQDefFields(): if classAdJobReq.lookupAttribute(name): if name == 'CPUTime': jobReqDict[name] = classAdJobReq.getAttributeInt(name) else: jobReqDict[name] = classAdJobReq.getAttributeString(name) for name in self.taskQueueDB.getMultiValueTQDefFields(): if classAdJobReq.lookupAttribute(name): jobReqDict[name] = classAdJobReq.getListFromExpression(name) jobPriority = classAdJobReq.getAttributeInt('UserPriority') result = self.taskQueueDB.insertJob(job, jobReqDict, jobPriority) if not result['OK']: self.log.error("Cannot insert job %s in task queue: %s" % (job, result['Message'])) # Force removing the job from the TQ if it was actually inserted result = self.taskQueueDB.deleteJob(job) if result['OK']: if result['Value']: self.log.info("Job %s removed from the TQ" % job) return S_ERROR("Cannot insert in task queue") return S_OK()
def insertJobInQueue( self, job, classAdJob ): """ Check individual job and add to the Task Queue eventually. """ jobReq = classAdJob.get_expression( "JobRequirements" ) classAdJobReq = ClassAd( jobReq ) jobReqDict = {} for name in self.taskQueueDB.getSingleValueTQDefFields(): if classAdJobReq.lookupAttribute( name ): if name == 'CPUTime': jobReqDict[name] = classAdJobReq.getAttributeInt( name ) else: jobReqDict[name] = classAdJobReq.getAttributeString( name ) for name in self.taskQueueDB.getMultiValueTQDefFields(): if classAdJobReq.lookupAttribute( name ): jobReqDict[name] = classAdJobReq.getListFromExpression( name ) jobPriority = classAdJobReq.getAttributeInt( 'UserPriority' ) result = self.taskQueueDB.insertJob( job, jobReqDict, jobPriority ) if not result[ 'OK' ]: self.log.error( "Cannot insert job %s in task queue: %s" % ( job, result[ 'Message' ] ) ) # Force removing the job from the TQ if it was actually inserted result = self.taskQueueDB.deleteJob( job ) if result['OK']: if result['Value']: self.log.info( "Job %s removed from the TQ" % job ) return S_ERROR( "Cannot insert in task queue" ) return S_OK()
def __processResourceDescription(self, resourceDescription): # Check and form the resource description dictionary resourceDict = {} if type(resourceDescription) in StringTypes: classAdAgent = ClassAd(resourceDescription) if not classAdAgent.isOK(): return S_ERROR('Illegal Resource JDL') gLogger.verbose(classAdAgent.asJDL()) for name in gTaskQueueDB.getSingleValueTQDefFields(): if classAdAgent.lookupAttribute(name): if name == 'CPUTime': resourceDict[name] = classAdAgent.getAttributeInt(name) else: resourceDict[name] = classAdAgent.getAttributeString( name) for name in gTaskQueueDB.getMultiValueMatchFields(): if classAdAgent.lookupAttribute(name): resourceDict[name] = classAdAgent.getAttributeString(name) # Check if a JobID is requested if classAdAgent.lookupAttribute('JobID'): resourceDict['JobID'] = classAdAgent.getAttributeInt('JobID') if classAdAgent.lookupAttribute('DIRACVersion'): resourceDict['DIRACVersion'] = classAdAgent.getAttributeString( 'DIRACVersion') if classAdAgent.lookupAttribute('VirtualOrganization'): resourceDict[ 'VirtualOrganization'] = classAdAgent.getAttributeString( 'VirtualOrganization') else: for name in gTaskQueueDB.getSingleValueTQDefFields(): if resourceDescription.has_key(name): resourceDict[name] = resourceDescription[name] for name in gTaskQueueDB.getMultiValueMatchFields(): if resourceDescription.has_key(name): resourceDict[name] = resourceDescription[name] if resourceDescription.has_key('JobID'): resourceDict['JobID'] = resourceDescription['JobID'] if resourceDescription.has_key('DIRACVersion'): resourceDict['DIRACVersion'] = resourceDescription[ 'DIRACVersion'] if resourceDescription.has_key('VirtualOrganization'): resourceDict['VirtualOrganization'] = resourceDescription[ 'VirtualOrganization'] return resourceDict
def __processResourceDescription(self, resourceDescription): # Check and form the resource description dictionary resourceDict = {} if type(resourceDescription) in StringTypes: classAdAgent = ClassAd(resourceDescription) if not classAdAgent.isOK(): return S_ERROR('Illegal Resource JDL') gLogger.verbose(classAdAgent.asJDL()) for name in gTaskQueueDB.getSingleValueTQDefFields(): if classAdAgent.lookupAttribute(name): if name == 'CPUTime': resourceDict[name] = classAdAgent.getAttributeInt(name) else: resourceDict[name] = classAdAgent.getAttributeString( name) for name in gTaskQueueDB.getMultiValueMatchFields(): if classAdAgent.lookupAttribute(name): if name == 'SubmitPool': resourceDict[ name] = classAdAgent.getListFromExpression(name) else: resourceDict[name] = classAdAgent.getAttributeString( name) # Check if a JobID is requested if classAdAgent.lookupAttribute('JobID'): resourceDict['JobID'] = classAdAgent.getAttributeInt('JobID') for k in ('DIRACVersion', 'ReleaseVersion', 'ReleaseProject', 'VirtualOrganization'): if classAdAgent.lookupAttribute(k): resourceDict[k] = classAdAgent.getAttributeString(k) else: for name in gTaskQueueDB.getSingleValueTQDefFields(): if resourceDescription.has_key(name): resourceDict[name] = resourceDescription[name] for name in gTaskQueueDB.getMultiValueMatchFields(): if resourceDescription.has_key(name): resourceDict[name] = resourceDescription[name] if resourceDescription.has_key('JobID'): resourceDict['JobID'] = resourceDescription['JobID'] for k in ('DIRACVersion', 'ReleaseVersion', 'ReleaseProject', 'VirtualOrganization', 'PilotReference', 'PilotInfoReportedFlag', 'PilotBenchmark', 'LHCbPlatform'): if k in resourceDescription: resourceDict[k] = resourceDescription[k] return resourceDict
def _processResourceDescription( self, resourceDescription ): """ Check and form the resource description dictionary resourceDescription is a ceDict coming from a JobAgent, for example. """ resourceDict = {} if type( resourceDescription ) in StringTypes: classAdAgent = ClassAd( resourceDescription ) if not classAdAgent.isOK(): raise ValueError( 'Illegal Resource JDL' ) self.log.verbose( classAdAgent.asJDL() ) for name in singleValueDefFields: if classAdAgent.lookupAttribute( name ): if name == 'CPUTime': resourceDict[name] = classAdAgent.getAttributeInt( name ) else: resourceDict[name] = classAdAgent.getAttributeString( name ) for name in multiValueMatchFields: if classAdAgent.lookupAttribute( name ): if name == 'SubmitPool': resourceDict[name] = classAdAgent.getListFromExpression( name ) else: resourceDict[name] = classAdAgent.getAttributeString( name ) # Check if a JobID is requested if classAdAgent.lookupAttribute( 'JobID' ): resourceDict['JobID'] = classAdAgent.getAttributeInt( 'JobID' ) for k in ( 'DIRACVersion', 'ReleaseVersion', 'ReleaseProject', 'VirtualOrganization' ): if classAdAgent.lookupAttribute( k ): resourceDict[ k ] = classAdAgent.getAttributeString( k ) else: for name in singleValueDefFields: if resourceDescription.has_key( name ): resourceDict[name] = resourceDescription[name] for name in multiValueMatchFields: if resourceDescription.has_key( name ): resourceDict[name] = resourceDescription[name] if resourceDescription.has_key( 'JobID' ): resourceDict['JobID'] = resourceDescription['JobID'] for k in ( 'DIRACVersion', 'ReleaseVersion', 'ReleaseProject', 'VirtualOrganization', 'PilotReference', 'PilotBenchmark', 'PilotInfoReportedFlag' ): if k in resourceDescription: resourceDict[ k ] = resourceDescription[ k ] return resourceDict
def _processResourceDescription( self, resourceDescription ): """ Check and form the resource description dictionary resourceDescription is a ceDict coming from a JobAgent, for example. """ resourceDict = {} if isinstance( resourceDescription, basestring ): classAdAgent = ClassAd( resourceDescription ) if not classAdAgent.isOK(): raise ValueError( 'Illegal Resource JDL' ) self.log.verbose( classAdAgent.asJDL() ) for name in singleValueDefFields: if classAdAgent.lookupAttribute( name ): if name == 'CPUTime': resourceDict[name] = classAdAgent.getAttributeInt( name ) else: resourceDict[name] = classAdAgent.getAttributeString( name ) for name in multiValueMatchFields: if classAdAgent.lookupAttribute( name ): if name == 'SubmitPool': resourceDict[name] = classAdAgent.getListFromExpression( name ) else: resourceDict[name] = classAdAgent.getAttributeString( name ) # Check if a JobID is requested if classAdAgent.lookupAttribute( 'JobID' ): resourceDict['JobID'] = classAdAgent.getAttributeInt( 'JobID' ) for k in ( 'DIRACVersion', 'ReleaseVersion', 'ReleaseProject', 'VirtualOrganization' ): if classAdAgent.lookupAttribute( k ): resourceDict[ k ] = classAdAgent.getAttributeString( k ) else: for name in singleValueDefFields: if resourceDescription.has_key( name ): resourceDict[name] = resourceDescription[name] for name in multiValueMatchFields: if resourceDescription.has_key( name ): resourceDict[name] = resourceDescription[name] if resourceDescription.has_key( 'JobID' ): resourceDict['JobID'] = resourceDescription['JobID'] for k in ( 'DIRACVersion', 'ReleaseVersion', 'ReleaseProject', 'VirtualOrganization', 'PilotReference', 'PilotBenchmark', 'PilotInfoReportedFlag' ): if k in resourceDescription: resourceDict[ k ] = resourceDescription[ k ] return resourceDict
def __processResourceDescription( self, resourceDescription ): # Check and form the resource description dictionary resourceDict = {} if type( resourceDescription ) in StringTypes: classAdAgent = ClassAd( resourceDescription ) if not classAdAgent.isOK(): return S_ERROR( 'Illegal Resource JDL' ) gLogger.verbose( classAdAgent.asJDL() ) for name in gTaskQueueDB.getSingleValueTQDefFields(): if classAdAgent.lookupAttribute( name ): if name == 'CPUTime': resourceDict[name] = classAdAgent.getAttributeInt( name ) else: resourceDict[name] = classAdAgent.getAttributeString( name ) for name in gTaskQueueDB.getMultiValueMatchFields(): if classAdAgent.lookupAttribute( name ): if name == 'SubmitPool': resourceDict[name] = classAdAgent.getListFromExpression( name ) else: resourceDict[name] = classAdAgent.getAttributeString( name ) # Check if a JobID is requested if classAdAgent.lookupAttribute( 'JobID' ): resourceDict['JobID'] = classAdAgent.getAttributeInt( 'JobID' ) for k in ( 'DIRACVersion', 'ReleaseVersion', 'ReleaseProject', 'VirtualOrganization' ): if classAdAgent.lookupAttribute( k ): resourceDict[ k ] = classAdAgent.getAttributeString( k ) else: for name in gTaskQueueDB.getSingleValueTQDefFields(): if resourceDescription.has_key( name ): resourceDict[name] = resourceDescription[name] for name in gTaskQueueDB.getMultiValueMatchFields(): if resourceDescription.has_key( name ): resourceDict[name] = resourceDescription[name] if resourceDescription.has_key( 'JobID' ): resourceDict['JobID'] = resourceDescription['JobID'] for k in ( 'DIRACVersion', 'ReleaseVersion', 'ReleaseProject', 'VirtualOrganization', 'PilotReference', 'PilotInfoReportedFlag', 'PilotBenchmark' ): if k in resourceDescription: resourceDict[ k ] = resourceDescription[ k ] return resourceDict
def __processResourceDescription( self, resourceDescription ): # Check and form the resource description dictionary resourceDict = {} if type( resourceDescription ) in StringTypes: classAdAgent = ClassAd( resourceDescription ) if not classAdAgent.isOK(): return S_ERROR( 'Illegal Resource JDL' ) gLogger.verbose( classAdAgent.asJDL() ) for name in gTaskQueueDB.getSingleValueTQDefFields(): if classAdAgent.lookupAttribute( name ): if name == 'CPUTime': resourceDict[name] = classAdAgent.getAttributeInt( name ) else: resourceDict[name] = classAdAgent.getAttributeString( name ) for name in gTaskQueueDB.getMultiValueMatchFields(): if classAdAgent.lookupAttribute( name ): resourceDict[name] = classAdAgent.getAttributeString( name ) # Check if a JobID is requested if classAdAgent.lookupAttribute( 'JobID' ): resourceDict['JobID'] = classAdAgent.getAttributeInt( 'JobID' ) if classAdAgent.lookupAttribute( 'DIRACVersion' ): resourceDict['DIRACVersion'] = classAdAgent.getAttributeString( 'DIRACVersion' ) if classAdAgent.lookupAttribute( 'VirtualOrganization' ): resourceDict['VirtualOrganization'] = classAdAgent.getAttributeString( 'VirtualOrganization' ) else: for name in gTaskQueueDB.getSingleValueTQDefFields(): if resourceDescription.has_key( name ): resourceDict[name] = resourceDescription[name] for name in gTaskQueueDB.getMultiValueMatchFields(): if resourceDescription.has_key( name ): resourceDict[name] = resourceDescription[name] if resourceDescription.has_key( 'JobID' ): resourceDict['JobID'] = resourceDescription['JobID'] if resourceDescription.has_key( 'DIRACVersion' ): resourceDict['DIRACVersion'] = resourceDescription['DIRACVersion'] if resourceDescription.has_key( 'VirtualOrganization' ): resourceDict['VirtualOrganization'] = resourceDescription['VirtualOrganization'] return resourceDict
def _getProcessingType(self, jobID): """Get the Processing Type from the JDL, until it is promoted to a real Attribute""" processingType = "unknown" result = self.jobDB.getJobJDL(jobID, original=True) if not result["OK"]: return processingType classAdJob = ClassAd(result["Value"]) if classAdJob.lookupAttribute("ProcessingType"): processingType = classAdJob.getAttributeString("ProcessingType") return processingType
def __getProcessingType(self, jobID): """ Get the Processing Type from the JDL, until it is promoted to a real Attribute """ processingType = 'unknown' result = self.jobDB.getJobJDL(jobID, original=True) if not result['OK']: return processingType classAdJob = ClassAd(result['Value']) if classAdJob.lookupAttribute('ProcessingType'): processingType = classAdJob.getAttributeString('ProcessingType') return processingType
def __getProcessingType( self, jobID ): """ Get the Processing Type from the JDL, until it is promoted to a real Attribute """ processingType = 'unknown' result = self.jobDB.getJobJDL( jobID, original = True ) if not result['OK']: return processingType classAdJob = ClassAd( result['Value'] ) if classAdJob.lookupAttribute( 'ProcessingType' ): processingType = classAdJob.getAttributeString( 'ProcessingType' ) return processingType
def __getProcessingType(self, jobID): """ Get the Processing Type from the JDL, until it is promoted to a real Attribute """ processingType = "unknown" result = self.jobDB.getJobJDL(jobID, original=True) if not result["OK"]: return processingType classAdJob = ClassAd(result["Value"]) if classAdJob.lookupAttribute("ProcessingType"): processingType = classAdJob.getAttributeString("ProcessingType") return processingType
def submitNewBigJob( self ): result = jobDB.getJobJDL( str( self.__jobID ) , True ) classAdJob = ClassAd( result['Value'] ) executableFile = "" if classAdJob.lookupAttribute( 'Executable' ): executableFile = classAdJob.getAttributeString( 'Executable' ) tempPath = self.__tmpSandBoxDir dirac = Dirac() if not os.path.exists( tempPath ): os.makedirs( tempPath ) settingJobSandBoxDir = dirac.getInputSandbox( self.__jobID, tempPath ) self.log.info( 'Writting temporal SandboxDir in Server', settingJobSandBoxDir ) moveData = self.__tmpSandBoxDir + "/InputSandbox" + str( self.__jobID ) HiveV1Cli = HiveV1Client( self.__User , self.__publicIP ) returned = HiveV1Cli.dataCopy( moveData, self.__tmpSandBoxDir ) self.log.info( 'Copy the job contain to the Hadoop Master with HIVE: ', returned ) jobInfo = jobDB.getJobAttributes( self.__jobID ) if not jobInfo['OK']: return S_ERROR( jobInfo['Value'] ) proxy = "" jobInfo = jobInfo['Value'] if gProxyManager.userHasProxy( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ): proxy = gProxyManager.downloadProxyToFile( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ) else: proxy = self.__requestProxyFromProxyManager( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ) HiveJob = "InputSandbox" + str( self.__jobID ) + "/" + executableFile HiveJobOutput = str( self.__jobID ) + "_" + executableFile + "_out" returned = HiveV1Cli.jobSubmit( tempPath, HiveJob, proxy['chain'], HiveJobOutput ) self.log.info( 'Launch Hadoop-Hive job to the Master: ', returned ) if not returned['OK']: return S_ERROR( returned['Message'] ) else: self.log.info( 'Hadoop-Hive Job ID: ', returned['Value'] ) return S_OK( returned['Value'] )
def export_submitJob(self, jobDesc): """ Submit a single job to DIRAC WMS """ if self.peerUsesLimitedProxy: return S_ERROR("Can't submit using a limited proxy! (bad boy!)") # Check job submission permission result = self.jobPolicy.getJobPolicy() if not result['OK']: return S_ERROR('Failed to get job policies') policyDict = result['Value'] if not policyDict[RIGHT_SUBMIT]: return S_ERROR('Job submission not authorized') #jobDesc is JDL for now jobDesc = jobDesc.strip() if jobDesc[0] != "[": jobDesc = "[%s" % jobDesc if jobDesc[-1] != "]": jobDesc = "%s]" % jobDesc # Check if the job is a parameteric one jobClassAd = ClassAd(jobDesc) parametricJob = False if jobClassAd.lookupAttribute('Parameters'): parametricJob = True if jobClassAd.isAttributeList('Parameters'): parameterList = jobClassAd.getListFromExpression('Parameters') else: pStep = 0 pFactor = 1 pStart = 1 nParameters = jobClassAd.getAttributeInt('Parameters') if not nParameters: value = jobClassAd.get_expression('Parameters') return S_ERROR( 'Illegal value for Parameters JDL field: %s' % value) if jobClassAd.lookupAttribute('ParameterStart'): value = jobClassAd.get_expression( 'ParameterStart').replace('"', '') try: pStart = int(value) except: try: pStart = float(value) except: return S_ERROR( 'Illegal value for ParameterStart JDL field: %s' % value) if jobClassAd.lookupAttribute('ParameterStep'): pStep = jobClassAd.getAttributeInt('ParameterStep') if not pStep: pStep = jobClassAd.getAttributeFloat('ParameterStep') if not pStep: value = jobClassAd.get_expression('ParameterStep') return S_ERROR( 'Illegal value for ParameterStep JDL field: %s' % value) if jobClassAd.lookupAttribute('ParameterFactor'): pFactor = jobClassAd.getAttributeInt('ParameterFactor') if not pFactor: pFactor = jobClassAd.getAttributeFloat( 'ParameterFactor') if not pFactor: value = jobClassAd.get_expression( 'ParameterFactor') return S_ERROR( 'Illegal value for ParameterFactor JDL field: %s' % value) parameterList = list() parameterList.append(pStart) for i in range(nParameters - 1): parameterList.append(parameterList[i] * pFactor + pStep) if len(parameterList) > self.maxParametricJobs: return S_ERROR( 'The number of parametric jobs exceeded the limit of %d' % self.maxParametricJobs) jobDescList = [] nParam = len(parameterList) - 1 for n, p in enumerate(parameterList): newJobDesc = jobDesc.replace('%s', str(p)).replace( '%n', str(n).zfill(len(str(nParam)))) newClassAd = ClassAd(newJobDesc) for attr in ['Parameters', 'ParameterStep', 'ParameterFactor']: newClassAd.deleteAttribute(attr) if type(p) == type(' ') and p.startswith('{'): newClassAd.insertAttributeInt('Parameter', str(p)) else: newClassAd.insertAttributeString('Parameter', str(p)) newClassAd.insertAttributeInt('ParameterNumber', n) newJDL = newClassAd.asJDL() jobDescList.append(newJDL) else: jobDescList = [jobDesc] jobIDList = [] for jobDescription in jobDescList: result = gJobDB.insertNewJobIntoDB(jobDescription, self.owner, self.ownerDN, self.ownerGroup, self.diracSetup) if not result['OK']: return result jobID = result['JobID'] gLogger.info('Job %s added to the JobDB for %s/%s' % (jobID, self.ownerDN, self.ownerGroup)) gJobLoggingDB.addLoggingRecord(jobID, result['Status'], result['MinorStatus'], source='JobManager') jobIDList.append(jobID) #Set persistency flag retVal = gProxyManager.getUserPersistence(self.ownerDN, self.ownerGroup) if 'Value' not in retVal or not retVal['Value']: gProxyManager.setPersistency(self.ownerDN, self.ownerGroup, True) if parametricJob: result = S_OK(jobIDList) else: result = S_OK(jobIDList[0]) result['JobID'] = result['Value'] result['requireProxyUpload'] = self.__checkIfProxyUploadIsRequired() self.__sendNewJobsToMind(jobIDList) return result
def _processResourceDescription(self, resourceDescription): """ Check and form the resource description dictionary resourceDescription is a ceDict coming from a JobAgent, for example. """ resourceDict = {} if isinstance(resourceDescription, basestring): classAdAgent = ClassAd(resourceDescription) if not classAdAgent.isOK(): raise ValueError('Illegal Resource JDL') self.log.verbose(classAdAgent.asJDL()) for name in singleValueDefFields: if classAdAgent.lookupAttribute(name): if name == 'CPUTime': resourceDict[name] = classAdAgent.getAttributeInt(name) else: resourceDict[name] = classAdAgent.getAttributeString( name) for name in multiValueMatchFields: if classAdAgent.lookupAttribute(name): if name == 'SubmitPool': resourceDict[ name] = classAdAgent.getListFromExpression(name) else: resourceDict[name] = classAdAgent.getAttributeString( name) # Check if a JobID is requested if classAdAgent.lookupAttribute('JobID'): resourceDict['JobID'] = classAdAgent.getAttributeInt('JobID') for k in ('DIRACVersion', 'ReleaseVersion', 'ReleaseProject', 'VirtualOrganization'): if classAdAgent.lookupAttribute(k): resourceDict[k] = classAdAgent.getAttributeString(k) else: for name in singleValueDefFields: if resourceDescription.has_key(name): resourceDict[name] = resourceDescription[name] for name in multiValueMatchFields: if resourceDescription.has_key(name): resourceDict[name] = resourceDescription[name] if 'JobID' in resourceDescription: resourceDict['JobID'] = resourceDescription['JobID'] # Convert MaxRAM and NumberOfCores parameters into a list of tags maxRAM = resourceDescription.get('MaxRAM') nCores = resourceDescription.get('NumberOfProcessors') for param, key in [(maxRAM, 'GB'), (nCores, 'Cores')]: if param: try: intValue = int(param) / 1000 if intValue <= 128: paramList = range(1, intValue + 1) paramTags = [ '%d%s' % (par, key) for par in paramList ] resourceDict.setdefault("Tag", []).extend(paramTags) except ValueError: pass if 'Tag' in resourceDict: resourceDict['Tag'] = list(set(resourceDict['Tag'])) for k in ('DIRACVersion', 'ReleaseVersion', 'ReleaseProject', 'VirtualOrganization', 'PilotReference', 'PilotBenchmark', 'PilotInfoReportedFlag'): if k in resourceDescription: resourceDict[k] = resourceDescription[k] return resourceDict
def _processResourceDescription( self, resourceDescription ): """ Check and form the resource description dictionary resourceDescription is a ceDict coming from a JobAgent, for example. """ resourceDict = {} if isinstance( resourceDescription, basestring ): classAdAgent = ClassAd( resourceDescription ) if not classAdAgent.isOK(): raise ValueError( 'Illegal Resource JDL' ) self.log.verbose( classAdAgent.asJDL() ) for name in singleValueDefFields: if classAdAgent.lookupAttribute( name ): if name == 'CPUTime': resourceDict[name] = classAdAgent.getAttributeInt( name ) else: resourceDict[name] = classAdAgent.getAttributeString( name ) for name in multiValueMatchFields: if classAdAgent.lookupAttribute( name ): if name == 'SubmitPool': resourceDict[name] = classAdAgent.getListFromExpression( name ) else: resourceDict[name] = classAdAgent.getAttributeString( name ) # Check if a JobID is requested if classAdAgent.lookupAttribute( 'JobID' ): resourceDict['JobID'] = classAdAgent.getAttributeInt( 'JobID' ) for k in ( 'DIRACVersion', 'ReleaseVersion', 'ReleaseProject', 'VirtualOrganization' ): if classAdAgent.lookupAttribute( k ): resourceDict[ k ] = classAdAgent.getAttributeString( k ) else: for name in singleValueDefFields: if resourceDescription.has_key( name ): resourceDict[name] = resourceDescription[name] for name in multiValueMatchFields: if resourceDescription.has_key( name ): resourceDict[name] = resourceDescription[name] if 'JobID' in resourceDescription: resourceDict['JobID'] = resourceDescription['JobID'] # Convert MaxRAM and NumberOfCores parameters into a list of tags maxRAM = resourceDescription.get( 'MaxRAM' ) nCores = resourceDescription.get( 'NumberOfProcessors' ) for param, key in [ ( maxRAM, 'GB' ), ( nCores, 'Cores' ) ]: if param: try: intValue = int( param )/1000 if intValue <= 128: paramList = range( 1, intValue + 1 ) paramTags = [ '%d%s' % ( par, key ) for par in paramList ] resourceDict.setdefault( "Tag", [] ).extend( paramTags ) except ValueError: pass if 'Tag' in resourceDict: resourceDict['Tag'] = list( set( resourceDict['Tag'] ) ) for k in ( 'DIRACVersion', 'ReleaseVersion', 'ReleaseProject', 'VirtualOrganization', 'PilotReference', 'PilotBenchmark', 'PilotInfoReportedFlag' ): if k in resourceDescription: resourceDict[ k ] = resourceDescription[ k ] return resourceDict
def submitNewBigJob( self ): #1.- Creamos carpeta temporal self.log.debug( 'Step1::: mkdir temp folder' ) tempPath = self.__tmpSandBoxDir + str( self.__jobID ) + "/" dirac = Dirac() if not os.path.exists( tempPath ): os.makedirs( tempPath ) #2.- Introducimos el contenido del inputsandbox en la carpeta temporal self.log.debug( 'Step2::: download inputsand to temp folder' ) settingJobSandBoxDir = dirac.getInputSandbox( self.__jobID, tempPath ) self.log.info( 'Writting temporal SandboxDir in Server', settingJobSandBoxDir ) moveData = tempPath + "/InputSandbox" + str( self.__jobID ) #3.- Move the data to client self.log.debug( 'Step2::: download inputsandbox to temp folder' ) HadoopV1InteractiveCli = HadoopV1InteractiveClient( self.__User , self.__publicIP, self.__Port ) returned = HadoopV1InteractiveCli.dataCopy( tempPath, self.__tmpSandBoxDir ) self.log.debug( 'Returned of copy the job contain to the Hadoop Master with HadoopInteractive::: ', returned ) #3.- Get executable file result = jobDB.getJobJDL( str( self.__jobID ) , True ) classAdJob = ClassAd( result['Value'] ) executableFile = "" if classAdJob.lookupAttribute( 'Executable' ): executableFile = classAdJob.getAttributeString( 'Executable' ) self.log.debug( 'Step3::: Get executable file: ', executableFile ) jobInfo = jobDB.getJobAttributes( self.__jobID ) if not jobInfo['OK']: return S_ERROR( jobInfo['Value'] ) proxy = "" jobInfo = jobInfo['Value'] if gProxyManager.userHasProxy( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ): proxy = gProxyManager.downloadProxyToFile( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ) else: proxy = self.__requestProxyFromProxyManager( jobInfo["OwnerDN"], jobInfo["OwnerGroup"] ) HadoopInteractiveJob = "InputSandbox" + str( self.__jobID ) + "/" + executableFile HadoopInteractiveJobCommand = "InputSandbox" + str( self.__jobID ) + "/" + executableFile + " " + self.__JobName HadoopInteractiveJobOutput = tempPath + str( self.__jobID ) + "_" + executableFile + "_out" #4.- Creating second part of the job name if ( len( re.split( " ", self.__JobName ) ) > 1 ): #(name for random writter with -D)name_job = re.split( " ", self.__JobName )[0] + " " + re.split( " ", self.__JobName )[1] + " " + re.split( " ", self.__JobName )[2] name_job = re.split( " ", self.__JobName )[0] + " " + re.split( " ", self.__JobName )[1] #(name for random writter with -D)output_job = moveData + "/" + re.split( " ", self.__JobName )[3] #(name for random writter with -D)cfg_job = "" #(name for random writter with -D)if ( len( re.split( " ", self.__JobName ) ) > 4 ): #(name for random writter with -D) cfg_job = moveData + "/" + re.split( " ", self.__JobName )[4] #5.- Parsing execution command #cmd = "hadoop jar " + tempPath + HadoopInteractiveJob + " " + name_job + " " + output_job + " " + cfg_job cmd = "hadoop jar " + tempPath + HadoopInteractiveJob + " " + name_job + " " + tempPath + "/InputSandbox" + str( self.__jobID ) + "/" + "/dataset-USC-a-grep '[and]+'" else: dataset = re.split( "/", self.__Dataset ) count = 0 datasetname = "" for dir in dataset: count = count + 1 if ( count > 2 ): datasetname = datasetname + "/" + dir cmd = "hadoop jar " + tempPath + HadoopInteractiveJob + " " + self.__JobName + " " + datasetname + " " + tempPath + "/" + self.__JobName.replace( " ", "" ) + "_" + str( self.__jobID ) self.log.debug( 'Step4::: Making CMD for submission: ', cmd ) self.log.debug( 'Step5::: Submit file to hadoop: ' ) returned = HadoopV1InteractiveCli.jobSubmit( tempPath, HadoopInteractiveJob, proxy['chain'], HadoopInteractiveJobOutput, cmd ) self.log.info( 'Launch Hadoop-HadoopInteractive job to the Master: ', returned ) if not returned['OK']: return S_ERROR( returned['Message'] ) else: self.log.info( 'Hadoop-HadoopInteractive Job ID: ', returned['Value'] ) return S_OK( returned['Value'] )
def selectJob( self, resourceDescription ): """ Main job selection function to find the highest priority job matching the resource capacity """ startTime = time.time() # Check and form the resource description dictionary resourceDict = {} if type( resourceDescription ) in StringTypes: classAdAgent = ClassAd( resourceDescription ) if not classAdAgent.isOK(): return S_ERROR( 'Illegal Resource JDL' ) gLogger.verbose( classAdAgent.asJDL() ) for name in gTaskQueueDB.getSingleValueTQDefFields(): if classAdAgent.lookupAttribute( name ): if name == 'CPUTime': resourceDict[name] = classAdAgent.getAttributeInt( name ) else: resourceDict[name] = classAdAgent.getAttributeString( name ) for name in gTaskQueueDB.getMultiValueMatchFields(): if classAdAgent.lookupAttribute( name ): resourceDict[name] = classAdAgent.getAttributeString( name ) # Check if a JobID is requested if classAdAgent.lookupAttribute( 'JobID' ): resourceDict['JobID'] = classAdAgent.getAttributeInt( 'JobID' ) if classAdAgent.lookupAttribute( 'DIRACVersion' ): resourceDict['DIRACVersion'] = classAdAgent.getAttributeString( 'DIRACVersion' ) if classAdAgent.lookupAttribute( 'VirtualOrganization' ): resourceDict['VirtualOrganization'] = classAdAgent.getAttributeString( 'VirtualOrganization' ) else: for name in gTaskQueueDB.getSingleValueTQDefFields(): if resourceDescription.has_key( name ): resourceDict[name] = resourceDescription[name] for name in gTaskQueueDB.getMultiValueMatchFields(): if resourceDescription.has_key( name ): resourceDict[name] = resourceDescription[name] if resourceDescription.has_key( 'JobID' ): resourceDict['JobID'] = resourceDescription['JobID'] if resourceDescription.has_key( 'DIRACVersion' ): resourceDict['DIRACVersion'] = resourceDescription['DIRACVersion'] if resourceDescription.has_key( 'VirtualOrganization' ): resourceDict['VirtualOrganization'] = resourceDescription['VirtualOrganization'] # Check the pilot DIRAC version if self.checkPilotVersion: if not 'DIRACVersion' in resourceDict: return S_ERROR( 'Version check requested and not provided by Pilot' ) # Check if the matching Request provides a VirtualOrganization if 'VirtualOrganization' in resourceDict: voName = resourceDict['VirtualOrganization'] # Check if the matching Request provides an OwnerGroup elif 'OwnerGroup' in resourceDict: voName = getVOForGroup( resourceDict['OwnerGroup'] ) # else take the default VirtualOrganization for the installation else: voName = getVOForGroup( '' ) self.pilotVersion = gConfig.getValue( '/Operations/%s/%s/Versions/PilotVersion' % ( voName, self.setup ), '' ) if self.pilotVersion and resourceDict['DIRACVersion'] != self.pilotVersion: return S_ERROR( 'Pilot version does not match the production version %s:%s' % \ ( resourceDict['DIRACVersion'], self.pilotVersion ) ) # Get common site mask and check the agent site result = gJobDB.getSiteMask( siteState = 'Active' ) if result['OK']: maskList = result['Value'] else: return S_ERROR( 'Internal error: can not get site mask' ) if not 'Site' in resourceDict: return S_ERROR( 'Missing Site Name in Resource JDL' ) siteName = resourceDict['Site'] if resourceDict['Site'] not in maskList: if 'GridCE' in resourceDict: del resourceDict['Site'] else: return S_ERROR( 'Site not in mask and GridCE not specified' ) resourceDict['Setup'] = self.serviceInfoDict['clientSetup'] if DEBUG: print "Resource description:" for key, value in resourceDict.items(): print key.rjust( 20 ), value # Check if Job Limits are imposed onto the site extraConditions = {} if self.siteJobLimits: result = self.getExtraConditions( siteName ) if result['OK']: extraConditions = result['Value'] if extraConditions: gLogger.info( 'Job Limits for site %s are: %s' % ( siteName, str( extraConditions ) ) ) result = gTaskQueueDB.matchAndGetJob( resourceDict, extraConditions = extraConditions ) if DEBUG: print result if not result['OK']: return result result = result['Value'] if not result['matchFound']: return S_ERROR( 'No match found' ) jobID = result['jobId'] resAtt = gJobDB.getJobAttributes( jobID, ['OwnerDN', 'OwnerGroup', 'Status'] ) if not resAtt['OK']: return S_ERROR( 'Could not retrieve job attributes' ) if not resAtt['Value']: return S_ERROR( 'No attributes returned for job' ) if not resAtt['Value']['Status'] == 'Waiting': gLogger.error( 'Job %s matched by the TQ is not in Waiting state' % str( jobID ) ) result = gTaskQueueDB.deleteJob( jobID ) result = gJobDB.setJobStatus( jobID, status = 'Matched', minor = 'Assigned' ) result = gJobLoggingDB.addLoggingRecord( jobID, status = 'Matched', minor = 'Assigned', source = 'Matcher' ) result = gJobDB.getJobJDL( jobID ) if not result['OK']: return S_ERROR( 'Failed to get the job JDL' ) resultDict = {} resultDict['JDL'] = result['Value'] resultDict['JobID'] = jobID matchTime = time.time() - startTime gLogger.info( "Match time: [%s]" % str( matchTime ) ) gMonitor.addMark( "matchTime", matchTime ) # Get some extra stuff into the response returned resOpt = gJobDB.getJobOptParameters( jobID ) if resOpt['OK']: for key, value in resOpt['Value'].items(): resultDict[key] = value resAtt = gJobDB.getJobAttributes( jobID, ['OwnerDN', 'OwnerGroup'] ) if not resAtt['OK']: return S_ERROR( 'Could not retrieve job attributes' ) if not resAtt['Value']: return S_ERROR( 'No attributes returned for job' ) resultDict['DN'] = resAtt['Value']['OwnerDN'] resultDict['Group'] = resAtt['Value']['OwnerGroup'] return S_OK( resultDict )
def execute( self ): """Main Agent code: 1.- Query TaskQueueDB for existing TQs 2.- Count Pending Jobs 3.- Submit Jobs """ self.__checkSubmitPools() bigDataJobsToSubmit = {} bigDataJobIdsToSubmit = {} for directorName, directorDict in self.directors.items(): self.log.verbose( 'Checking Director:', directorName ) self.log.verbose( 'RunningEndPoints:', directorDict['director'].runningEndPoints ) for runningEndPointName in directorDict['director'].runningEndPoints: runningEndPointDict = directorDict['director'].runningEndPoints[runningEndPointName] NameNode = runningEndPointDict['NameNode'] jobsByEndPoint = 0 result = BigDataDB.getBigDataJobsByStatusAndEndpoint( 'Submitted', NameNode ) if result['OK']: jobsByEndPoint += len( result['Value'] ) result = BigDataDB.getBigDataJobsByStatusAndEndpoint( 'Running', NameNode ) if result['OK']: jobsByEndPoint += len( result['Value'] ) self.log.verbose( 'Checking Jobs By EndPoint %s:' % jobsByEndPoint ) jobLimitsEndPoint = runningEndPointDict['LimitQueueJobsEndPoint'] bigDataJobs = 0 if jobsByEndPoint >= jobLimitsEndPoint: self.log.info( '%s >= %s Running jobs reach job limits: %s, skipping' % ( jobsByEndPoint, jobLimitsEndPoint, runningEndPointName ) ) continue else: bigDataJobs = jobLimitsEndPoint - jobsByEndPoint requirementsDict = runningEndPointDict['Requirements'] self.log.info( 'Requirements Dict: ', requirementsDict ) result = taskQueueDB.getMatchingTaskQueues( requirementsDict ) if not result['OK']: self.log.error( 'Could not retrieve TaskQueues from TaskQueueDB', result['Message'] ) return result taskQueueDict = result['Value'] self.log.info( 'Task Queues Dict: ', taskQueueDict ) jobs = 0 priority = 0 cpu = 0 jobsID = 0 self.log.info( 'Pending Jobs from TaskQueue, which not matching before: ', self.pendingTaskQueueJobs ) for tq in taskQueueDict: jobs += taskQueueDict[tq]['Jobs'] priority += taskQueueDict[tq]['Priority'] cpu += taskQueueDict[tq]['Jobs'] * taskQueueDict[tq]['CPUTime'] #Matching of Jobs with BigData Softwares #This process is following the sequence: #Retrieve a job from taskqueueDict #Get job name and try to match with the resources #If not match store the var pendingTaskQueueJobs for the #next iteration # #This matching is doing with the following JobName Pattern # NameSoftware _ SoftwareVersion _ HighLanguageName _ HighLanguageVersion _ DataSetName #extract a job from the TaskQueue if tq not in self.pendingTaskQueueJobs.keys(): self.pendingTaskQueueJobs[tq] = {} getJobFromTaskQueue = taskQueueDB.matchAndGetJob( taskQueueDict[tq] ) if not getJobFromTaskQueue['OK']: self.log.error( 'Could not get Job and FromTaskQueue', getJobFromTaskQueue['Message'] ) return getJobFromTaskQueue jobInfo = getJobFromTaskQueue['Value'] jobID = jobInfo['jobId'] jobAttrInfo = jobDB.getJobAttributes( jobID ) if not jobAttrInfo['OK']: self.log.error( 'Could not get Job Attributes', jobAttrInfo['Message'] ) return jobAttrInfo jobInfoUniq = jobAttrInfo['Value'] jobName = jobInfoUniq['JobName'] self.pendingTaskQueueJobs[tq][jobID] = jobName result = jobDB.getJobJDL( jobID, True ) classAdJob = ClassAd( result['Value'] ) arguments = 0 if classAdJob.lookupAttribute( 'Arguments' ): arguments = classAdJob.getAttributeString( 'Arguments' ) #if not classAdJob.lookupAttribute( 'Arguments' ): # continue jobsToSubmit = self.matchingJobsForBDSubmission( arguments, runningEndPointName, runningEndPointDict['BigDataSoftware'], runningEndPointDict['BigDataSoftwareVersion'], runningEndPointDict['HighLevelLanguage']['HLLName'], runningEndPointDict['HighLevelLanguage']['HLLVersion'], jobID ) if ( jobsToSubmit == "OK" ): if directorName not in bigDataJobsToSubmit: bigDataJobsToSubmit[directorName] = {} if runningEndPointName not in bigDataJobsToSubmit[directorName]: bigDataJobsToSubmit[directorName][runningEndPointName] = {} bigDataJobsToSubmit[directorName][runningEndPointName] = { 'JobId': jobID, 'JobName': jobName, 'TQPriority': priority, 'CPUTime': cpu, 'BigDataEndpoint': runningEndPointName, 'BigDataEndpointNameNode': runningEndPointDict['NameNode'], 'BdSoftware': runningEndPointDict['BigDataSoftware'], 'BdSoftwareVersion': runningEndPointDict['BigDataSoftwareVersion'], 'HLLName' : runningEndPointDict['HighLevelLanguage']['HLLName'], 'HLLVersion' : runningEndPointDict['HighLevelLanguage']['HLLVersion'], 'NumBigDataJobsAllowedToSubmit': bigDataJobs, 'SiteName': runningEndPointDict['SiteName'], 'PublicIP': runningEndPointDict['PublicIP'], 'User': runningEndPointDict['User'], 'Port': runningEndPointDict['Port'], 'UsePilot': runningEndPointDict['UsePilot'], 'IsInteractive': runningEndPointDict['IsInteractive'], 'Arguments': arguments } del self.pendingTaskQueueJobs[tq][jobID] else: self.log.error( jobsToSubmit ) self.log.info( 'Pending Jobs from TaskQueue, which not matching after: ', self.pendingTaskQueueJobs ) for tq in self.pendingTaskQueueJobs.keys(): for jobid in self.pendingTaskQueueJobs[tq].keys(): result = jobDB.getJobJDL( jobid, True ) classAdJob = ClassAd( result['Value'] ) arguments = 0 if classAdJob.lookupAttribute( 'Arguments' ): arguments = classAdJob.getAttributeString( 'Arguments' ) #if not classAdJob.lookupAttribute( 'Arguments' ): # continue #do the match with the runningEndPoint jobsToSubmit = self.matchingJobsForBDSubmission( arguments, runningEndPointName, runningEndPointDict['BigDataSoftware'], runningEndPointDict['BigDataSoftwareVersion'], runningEndPointDict['HighLevelLanguage']['HLLName'], runningEndPointDict['HighLevelLanguage']['HLLVersion'], jobid ) if ( jobsToSubmit == "OK" ): if directorName not in bigDataJobsToSubmit: bigDataJobsToSubmit[directorName] = {} if runningEndPointName not in bigDataJobsToSubmit[directorName]: bigDataJobsToSubmit[directorName][runningEndPointName] = {} bigDataJobsToSubmit[directorName][runningEndPointName] = { 'JobId': jobid, 'JobName': self.pendingTaskQueueJobs[tq][jobid], 'TQPriority': priority, 'CPUTime': cpu, 'BigDataEndpoint': runningEndPointName, 'BigDataEndpointNameNode': runningEndPointDict['NameNode'], 'BdSoftware': runningEndPointDict['BigDataSoftware'], 'BdSoftwareVersion': runningEndPointDict['BigDataSoftwareVersion'], 'HLLName' : runningEndPointDict['HighLevelLanguage']['HLLName'], 'HLLVersion' : runningEndPointDict['HighLevelLanguage']['HLLVersion'], 'NumBigDataJobsAllowedToSubmit': bigDataJobs, 'SiteName': runningEndPointDict['SiteName'], 'PublicIP': runningEndPointDict['PublicIP'], 'User': runningEndPointDict['User'], 'Port': runningEndPointDict['Port'], 'UsePilot': runningEndPointDict['UsePilot'], 'IsInteractive': runningEndPointDict['IsInteractive'], 'Arguments': arguments } del self.pendingTaskQueueJobs[tq][jobid] else: self.log.error( jobsToSubmit ) if not jobs and not self.pendingTaskQueueJobs: self.log.info( 'No matching jobs for %s found, skipping' % NameNode ) continue self.log.info( '___BigDataJobsTo Submit:', bigDataJobsToSubmit ) for directorName, JobsToSubmitDict in bigDataJobsToSubmit.items(): for runningEndPointName, jobsToSubmitDict in JobsToSubmitDict.items(): if self.directors[directorName]['isEnabled']: self.log.info( 'Requesting submission to %s of %s' % ( runningEndPointName, directorName ) ) director = self.directors[directorName]['director'] pool = self.pools[self.directors[directorName]['pool']] jobIDs = JobsToSubmitDict[runningEndPointName]['JobId'] jobName = JobsToSubmitDict[runningEndPointName]['JobName'] endpoint = JobsToSubmitDict[runningEndPointName]['BigDataEndpoint'] runningSiteName = JobsToSubmitDict[runningEndPointName]['SiteName'] NameNode = JobsToSubmitDict[runningEndPointName]['BigDataEndpointNameNode'] BigDataSoftware = JobsToSubmitDict[runningEndPointName]['BdSoftware'] BigDataSoftwareVersion = JobsToSubmitDict[runningEndPointName]['BdSoftwareVersion'] HLLName = JobsToSubmitDict[runningEndPointName]['HLLName'] HLLVersion = JobsToSubmitDict[runningEndPointName]['HLLVersion'] PublicIP = JobsToSubmitDict[runningEndPointName]['PublicIP'] User = JobsToSubmitDict[runningEndPointName]['User'] Port = JobsToSubmitDict[runningEndPointName]['Port'] UsePilot = JobsToSubmitDict[runningEndPointName]['UsePilot'] IsInteractive = JobsToSubmitDict[runningEndPointName]['IsInteractive'] Arguments = JobsToSubmitDict[runningEndPointName]['Arguments'] numBigDataJobsAllowed = JobsToSubmitDict[runningEndPointName]['NumBigDataJobsAllowedToSubmit'] ret = pool.generateJobAndQueueIt( director.submitBigDataJobs, args = ( endpoint, numBigDataJobsAllowed, runningSiteName, NameNode, BigDataSoftware, BigDataSoftwareVersion, HLLName, HLLVersion, PublicIP, Port, jobIDs, runningEndPointName, jobName, User, self.jobDataset, UsePilot, IsInteractive ), oCallback = self.callBack, oExceptionCallback = director.exceptionCallBack, blocking = False ) if not ret['OK']: # Disable submission until next iteration self.directors[directorName]['isEnabled'] = False else: time.sleep( self.am_getOption( 'ThreadStartDelay' ) ) if 'Default' in self.pools: # only for those in "Default' thread Pool # for pool in self.pools: self.pools['Default'].processAllResults() return DIRAC.S_OK()
def selectJob( self, resourceDescription ): """ Main job selection function to find the highest priority job matching the resource capacity """ startTime = time.time() # Check and form the resource description dictionary resourceDict = {} if type( resourceDescription ) in StringTypes: classAdAgent = ClassAd( resourceDescription ) if not classAdAgent.isOK(): return S_ERROR( 'Illegal Resource JDL' ) gLogger.verbose( classAdAgent.asJDL() ) for name in taskQueueDB.getSingleValueTQDefFields(): if classAdAgent.lookupAttribute( name ): if name == 'CPUTime': resourceDict[name] = classAdAgent.getAttributeInt( name ) else: resourceDict[name] = classAdAgent.getAttributeString( name ) for name in taskQueueDB.getMultiValueMatchFields(): if classAdAgent.lookupAttribute( name ): resourceDict[name] = classAdAgent.getAttributeString( name ) # Check if a JobID is requested if classAdAgent.lookupAttribute( 'JobID' ): resourceDict['JobID'] = classAdAgent.getAttributeInt( 'JobID' ) if classAdAgent.lookupAttribute( 'DIRACVersion' ): resourceDict['DIRACVersion'] = classAdAgent.getAttributeString( 'DIRACVersion' ) else: for name in taskQueueDB.getSingleValueTQDefFields(): if resourceDescription.has_key( name ): resourceDict[name] = resourceDescription[name] for name in taskQueueDB.getMultiValueMatchFields(): if resourceDescription.has_key( name ): resourceDict[name] = resourceDescription[name] if resourceDescription.has_key( 'JobID' ): resourceDict['JobID'] = resourceDescription['JobID'] if resourceDescription.has_key( 'DIRACVersion' ): resourceDict['DIRACVersion'] = resourceDescription['DIRACVersion'] # Check the pilot DIRAC version if self.checkPilotVersion: if 'DIRACVersion' in resourceDict: if self.pilotVersion and resourceDict['DIRACVersion'] != self.pilotVersion: return S_ERROR( 'Pilot version does not match the production version %s:%s' % \ ( resourceDict['DIRACVersion'], self.pilotVersion ) ) # Get common site mask and check the agent site result = jobDB.getSiteMask( siteState = 'Active' ) if result['OK']: maskList = result['Value'] else: return S_ERROR( 'Internal error: can not get site mask' ) if not 'Site' in resourceDict: return S_ERROR( 'Missing Site Name in Resource JDL' ) siteName = resourceDict['Site'] if resourceDict['Site'] not in maskList: if 'GridCE' in resourceDict: del resourceDict['Site'] else: return S_ERROR( 'Site not in mask and GridCE not specified' ) resourceDict['Setup'] = self.serviceInfoDict['clientSetup'] if DEBUG: print "Resource description:" for k, v in resourceDict.items(): print k.rjust( 20 ), v # Check if Job Limits are imposed onto the site extraConditions = {} if self.siteJobLimits: result = self.getExtraConditions( siteName ) if result['OK']: extraConditions = result['Value'] if extraConditions: gLogger.info( 'Job Limits for site %s are: %s' % ( siteName, str( extraConditions ) ) ) result = taskQueueDB.matchAndGetJob( resourceDict, extraConditions = extraConditions ) if DEBUG: print result if not result['OK']: return result result = result['Value'] if not result['matchFound']: return S_ERROR( 'No match found' ) jobID = result['jobId'] resAtt = jobDB.getJobAttributes( jobID, ['OwnerDN', 'OwnerGroup', 'Status'] ) if not resAtt['OK']: return S_ERROR( 'Could not retrieve job attributes' ) if not resAtt['Value']: return S_ERROR( 'No attributes returned for job' ) if not resAtt['Value']['Status'] == 'Waiting': gLogger.error( 'Job %s matched by the TQ is not in Waiting state' % str( jobID ) ) result = taskQueueDB.deleteJob( jobID ) result = jobDB.setJobStatus( jobID, status = 'Matched', minor = 'Assigned' ) result = jobLoggingDB.addLoggingRecord( jobID, status = 'Matched', minor = 'Assigned', source = 'Matcher' ) result = jobDB.getJobJDL( jobID ) if not result['OK']: return S_ERROR( 'Failed to get the job JDL' ) resultDict = {} resultDict['JDL'] = result['Value'] resultDict['JobID'] = jobID matchTime = time.time() - startTime gLogger.info( "Match time: [%s]" % str( matchTime ) ) gMonitor.addMark( "matchTime", matchTime ) # Get some extra stuff into the response returned resOpt = jobDB.getJobOptParameters( jobID ) if resOpt['OK']: for key, value in resOpt['Value'].items(): resultDict[key] = value resAtt = jobDB.getJobAttributes( jobID, ['OwnerDN', 'OwnerGroup'] ) if not resAtt['OK']: return S_ERROR( 'Could not retrieve job attributes' ) if not resAtt['Value']: return S_ERROR( 'No attributes returned for job' ) resultDict['DN'] = resAtt['Value']['OwnerDN'] resultDict['Group'] = resAtt['Value']['OwnerGroup'] return S_OK( resultDict )
def export_submitJob( self, jobDesc ): """ Submit a single job to DIRAC WMS """ if self.peerUsesLimitedProxy: return S_ERROR( "Can't submit using a limited proxy! (bad boy!)" ) # Check job submission permission result = self.jobPolicy.getJobPolicy() if not result['OK']: return S_ERROR( 'Failed to get job policies' ) policyDict = result['Value'] if not policyDict[ RIGHT_SUBMIT ]: return S_ERROR( 'Job submission not authorized' ) #jobDesc is JDL for now jobDesc = jobDesc.strip() if jobDesc[0] != "[": jobDesc = "[%s" % jobDesc if jobDesc[-1] != "]": jobDesc = "%s]" % jobDesc # Check if the job is a parameteric one jobClassAd = ClassAd( jobDesc ) parametricJob = False if jobClassAd.lookupAttribute( 'Parameters' ): parametricJob = True if jobClassAd.isAttributeList( 'Parameters' ): parameterList = jobClassAd.getListFromExpression( 'Parameters' ) else: pStep = 0 pFactor = 1 pStart = 1 nParameters = jobClassAd.getAttributeInt( 'Parameters' ) if not nParameters: value = jobClassAd.get_expression( 'Parameters' ) return S_ERROR( 'Illegal value for Parameters JDL field: %s' % value ) if jobClassAd.lookupAttribute( 'ParameterStart' ): value = jobClassAd.get_expression( 'ParameterStart' ).replace( '"', '' ) try: pStart = int( value ) except: try: pStart = float( value ) except: return S_ERROR( 'Illegal value for ParameterStart JDL field: %s' % value ) if jobClassAd.lookupAttribute( 'ParameterStep' ): pStep = jobClassAd.getAttributeInt( 'ParameterStep' ) if not pStep: pStep = jobClassAd.getAttributeFloat( 'ParameterStep' ) if not pStep: value = jobClassAd.get_expression( 'ParameterStep' ) return S_ERROR( 'Illegal value for ParameterStep JDL field: %s' % value ) if jobClassAd.lookupAttribute( 'ParameterFactor' ): pFactor = jobClassAd.getAttributeInt( 'ParameterFactor' ) if not pFactor: pFactor = jobClassAd.getAttributeFloat( 'ParameterFactor' ) if not pFactor: value = jobClassAd.get_expression( 'ParameterFactor' ) return S_ERROR( 'Illegal value for ParameterFactor JDL field: %s' % value ) parameterList = list() parameterList.append( pStart ) for i in range( nParameters - 1 ): parameterList.append( parameterList[i] * pFactor + pStep ) if len( parameterList ) > self.maxParametricJobs: return S_ERROR( 'The number of parametric jobs exceeded the limit of %d' % self.maxParametricJobs ) jobDescList = [] nParam = len(parameterList) - 1 for n,p in enumerate(parameterList): newJobDesc = jobDesc.replace('%s',str(p)).replace('%n',str(n).zfill(len(str(nParam)))) newClassAd = ClassAd(newJobDesc) for attr in ['Parameters','ParameterStep','ParameterFactor']: newClassAd.deleteAttribute(attr) if type( p ) == type ( ' ' ) and p.startswith('{'): newClassAd.insertAttributeInt( 'Parameter',str(p) ) else: newClassAd.insertAttributeString( 'Parameter', str( p ) ) newClassAd.insertAttributeInt( 'ParameterNumber', n ) newJDL = newClassAd.asJDL() jobDescList.append( newJDL ) else: jobDescList = [ jobDesc ] jobIDList = [] for jobDescription in jobDescList: result = gJobDB.insertNewJobIntoDB( jobDescription, self.owner, self.ownerDN, self.ownerGroup, self.diracSetup ) if not result['OK']: return result jobID = result['JobID'] gLogger.info( 'Job %s added to the JobDB for %s/%s' % ( jobID, self.ownerDN, self.ownerGroup ) ) gJobLoggingDB.addLoggingRecord( jobID, result['Status'], result['MinorStatus'], source = 'JobManager' ) jobIDList.append( jobID ) #Set persistency flag retVal = gProxyManager.getUserPersistence( self.ownerDN, self.ownerGroup ) if 'Value' not in retVal or not retVal[ 'Value' ]: gProxyManager.setPersistency( self.ownerDN, self.ownerGroup, True ) if parametricJob: result = S_OK( jobIDList ) else: result = S_OK( jobIDList[0] ) result['JobID'] = result['Value'] result[ 'requireProxyUpload' ] = self.__checkIfProxyUploadIsRequired() self.__sendNewJobsToMind( jobIDList ) return result
def export_submitJob( self, jobDesc ): """ Submit a single job to DIRAC WMS """ if self.peerUsesLimitedProxy: return S_ERROR( "Can't submit using a limited proxy! (bad boy!)" ) # Check job submission permission result = self.jobPolicy.getJobPolicy() if not result['OK']: return S_ERROR( 'Failed to get job policies' ) policyDict = result['Value'] if not policyDict[ RIGHT_SUBMIT ]: return S_ERROR('Job submission not authorized') #jobDesc is JDL for now jobDesc = jobDesc.strip() if jobDesc[0] != "[": jobDesc = "[%s" % jobDesc if jobDesc[-1] != "]": jobDesc = "%s]" % jobDesc # Check if the job is a parameteric one jobClassAd = ClassAd(jobDesc) parametricJob = False if jobClassAd.lookupAttribute('Parameters'): parametricJob = True if jobClassAd.isAttributeList('Parameters'): parameterList = jobClassAd.getListFromExpression('Parameters') else: nParameters = jobClassAd.getAttributeInt('Parameters') if not nParameters: value = jobClassAd.get_expression('Parameters') return S_ERROR('Illegal value for Parameters JDL field: %s' % value) if jobClassAd.lookupAttribute('ParameterStart'): pStart = jobClassAd.getAttributeInt('ParameterStart') else: return S_ERROR('Missing JDL field ParameterStart') if jobClassAd.lookupAttribute('ParameterStep'): pStep = jobClassAd.getAttributeInt('ParameterStep') if not pStep: value = jobClassAd.get_expression('ParameterStep') return S_ERROR('Illegal value for ParameterStep JDL field: %s' % value) else: return S_ERROR('Missing JDL field ParameterStep') parameterList = list( range(pStart,pStart+pStep*nParameters,pStep) ) if len(parameterList) > MAX_PARAMETRIC_JOBS: return S_ERROR('The number of parametric jobs exceeded the limit of %d' % MAX_PARAMETRIC_JOBS ) jobDescList = [] for p in parameterList: jobDescList.append( jobDesc.replace('%s',str(p)) ) else: jobDescList = [ jobDesc ] jobIDList = [] for jobDescription in jobDescList: result = gJobDB.insertNewJobIntoDB( jobDescription, self.owner, self.ownerDN, self.ownerGroup, self.diracSetup ) if not result['OK']: return result jobID = result['JobID'] gLogger.info( 'Job %s added to the JobDB for %s/%s' % ( jobID, self.ownerDN, self.ownerGroup ) ) gJobLoggingDB.addLoggingRecord( jobID, result['Status'], result['MinorStatus'], source = 'JobManager' ) jobIDList.append(jobID) #Set persistency flag retVal = gProxyManager.getUserPersistence( self.ownerDN, self.ownerGroup ) if 'Value' not in retVal or not retVal[ 'Value' ]: gProxyManager.setPersistency( self.ownerDN, self.ownerGroup, True ) if parametricJob: result = S_OK(jobIDList) else: result = S_OK(jobIDList[0]) result['JobID'] = result['Value'] result[ 'requireProxyUpload' ] = self.__checkIfProxyUploadIsRequired() return result
def selectJob(self, resourceDescription): """ Main job selection function to find the highest priority job matching the resource capacity """ startTime = time.time() # Check and form the resource description dictionary resourceDict = {} if type(resourceDescription) in StringTypes: classAdAgent = ClassAd(resourceDescription) if not classAdAgent.isOK(): return S_ERROR("Illegal Resource JDL") gLogger.verbose(classAdAgent.asJDL()) for name in taskQueueDB.getSingleValueTQDefFields(): if classAdAgent.lookupAttribute(name): if name == "CPUTime": resourceDict[name] = classAdAgent.getAttributeInt(name) else: resourceDict[name] = classAdAgent.getAttributeString(name) for name in taskQueueDB.getMultiValueMatchFields(): if classAdAgent.lookupAttribute(name): resourceDict[name] = classAdAgent.getAttributeString(name) # Check if a JobID is requested if classAdAgent.lookupAttribute("JobID"): resourceDict["JobID"] = classAdAgent.getAttributeInt("JobID") if classAdAgent.lookupAttribute("DIRACVersion"): resourceDict["DIRACVersion"] = classAdAgent.getAttributeString("DIRACVersion") else: for name in taskQueueDB.getSingleValueTQDefFields(): if resourceDescription.has_key(name): resourceDict[name] = resourceDescription[name] for name in taskQueueDB.getMultiValueMatchFields(): if resourceDescription.has_key(name): resourceDict[name] = resourceDescription[name] if resourceDescription.has_key("JobID"): resourceDict["JobID"] = resourceDescription["JobID"] if resourceDescription.has_key("DIRACVersion"): resourceDict["DIRACVersion"] = resourceDescription["DIRACVersion"] # Check the pilot DIRAC version if self.checkPilotVersion: if "DIRACVersion" in resourceDict: if self.pilotVersion and resourceDict["DIRACVersion"] != self.pilotVersion: return S_ERROR( "Pilot version does not match the production version %s:%s" % (resourceDict["DIRACVersion"], self.pilotVersion) ) # Get common site mask and check the agent site result = jobDB.getSiteMask(siteState="Active") if result["OK"]: maskList = result["Value"] else: return S_ERROR("Internal error: can not get site mask") if not "Site" in resourceDict: return S_ERROR("Missing Site Name in Resource JDL") siteName = resourceDict["Site"] if resourceDict["Site"] not in maskList: if "GridCE" in resourceDict: del resourceDict["Site"] else: return S_ERROR("Site not in mask and GridCE not specified") resourceDict["Setup"] = self.serviceInfoDict["clientSetup"] if DEBUG: print "Resource description:" for k, v in resourceDict.items(): print k.rjust(20), v # Check if Job Limits are imposed onto the site extraConditions = {} if self.siteJobLimits: result = self.getExtraConditions(siteName) if result["OK"]: extraConditions = result["Value"] if extraConditions: gLogger.info("Job Limits for site %s are: %s" % (siteName, str(extraConditions))) result = taskQueueDB.matchAndGetJob(resourceDict, extraConditions=extraConditions) if DEBUG: print result if not result["OK"]: return result result = result["Value"] if not result["matchFound"]: return S_ERROR("No match found") jobID = result["jobId"] resAtt = jobDB.getJobAttributes(jobID, ["OwnerDN", "OwnerGroup", "Status"]) if not resAtt["OK"]: return S_ERROR("Could not retrieve job attributes") if not resAtt["Value"]: return S_ERROR("No attributes returned for job") if not resAtt["Value"]["Status"] == "Waiting": gLogger.error("Job %s matched by the TQ is not in Waiting state" % str(jobID)) result = taskQueueDB.deleteJob(jobID) result = jobDB.setJobStatus(jobID, status="Matched", minor="Assigned") result = jobLoggingDB.addLoggingRecord(jobID, status="Matched", minor="Assigned", source="Matcher") result = jobDB.getJobJDL(jobID) if not result["OK"]: return S_ERROR("Failed to get the job JDL") resultDict = {} resultDict["JDL"] = result["Value"] resultDict["JobID"] = jobID matchTime = time.time() - startTime gLogger.info("Match time: [%s]" % str(matchTime)) gMonitor.addMark("matchTime", matchTime) # Get some extra stuff into the response returned resOpt = jobDB.getJobOptParameters(jobID) if resOpt["OK"]: for key, value in resOpt["Value"].items(): resultDict[key] = value resAtt = jobDB.getJobAttributes(jobID, ["OwnerDN", "OwnerGroup"]) if not resAtt["OK"]: return S_ERROR("Could not retrieve job attributes") if not resAtt["Value"]: return S_ERROR("No attributes returned for job") resultDict["DN"] = resAtt["Value"]["OwnerDN"] resultDict["Group"] = resAtt["Value"]["OwnerGroup"] return S_OK(resultDict)