def myRequest(): """Create a request and put it to the db""" request = Request() request.RequestName = 'myAwesomeRemovalRequest.xml' request.JobID = 0 request.SourceComponent = "myScript" remove = Operation() remove.Type = "RemoveFile" lfn = "/ilc/user/s/sailer/test.txt" rmFile = File() rmFile.LFN = lfn remove.addFile( rmFile ) request.addOperation( remove ) isValid = RequestValidator().validate( request ) if not isValid['OK']: raise RuntimeError( "Failover request is not valid: %s" % isValid['Message'] ) else: print("It is a GOGOGO") requestClient = ReqClient() result = requestClient.putRequest( request ) print(result)
def __deleteSandboxFromExternalBackend( self, SEName, SEPFN ): if self.getCSOption( "DelayedExternalDeletion", True ): gLogger.info( "Setting deletion request" ) try: request = Request() request.RequestName = "RemoteSBDeletion:%s|%s:%s" % ( SEName, SEPFN, time.time() ) physicalRemoval = Operation() physicalRemoval.Type = "PhysicalRemoval" physicalRemoval.TargetSE = SEName fileToRemove = File() fileToRemove.PFN = SEPFN physicalRemoval.addFile( fileToRemove ) request.addOperation( physicalRemoval ) return ReqClient().putRequest( request ) except Exception as e: gLogger.exception( "Exception while setting deletion request" ) return S_ERROR( "Cannot set deletion request: %s" % str( e ) ) else: gLogger.info( "Deleting external Sandbox" ) try: return StorageElement( SEName ).removeFile( SEPFN ) except Exception as e: gLogger.exception( "RM raised an exception while trying to delete a remote sandbox" ) return S_ERROR( "RM raised an exception while trying to delete a remote sandbox" )
def test02Props( self ): """ props """ # # valid values req = Request() req.RequestID = 1 self.assertEqual( req.RequestID, 1 ) req.RequestName = "test" self.assertEqual( req.RequestName, "test" ) req.JobID = 1 self.assertEqual( req.JobID, 1 ) req.CreationTime = "1970-01-01 00:00:00" self.assertEqual( req.CreationTime, datetime.datetime( 1970, 1, 1, 0, 0, 0 ) ) req.CreationTime = datetime.datetime( 1970, 1, 1, 0, 0, 0 ) self.assertEqual( req.CreationTime, datetime.datetime( 1970, 1, 1, 0, 0, 0 ) ) req.SubmitTime = "1970-01-01 00:00:00" self.assertEqual( req.SubmitTime, datetime.datetime( 1970, 1, 1, 0, 0, 0 ) ) req.SubmitTime = datetime.datetime( 1970, 1, 1, 0, 0, 0 ) self.assertEqual( req.SubmitTime, datetime.datetime( 1970, 1, 1, 0, 0, 0 ) ) req.LastUpdate = "1970-01-01 00:00:00" self.assertEqual( req.LastUpdate, datetime.datetime( 1970, 1, 1, 0, 0, 0 ) ) req.LastUpdate = datetime.datetime( 1970, 1, 1, 0, 0, 0 ) self.assertEqual( req.LastUpdate, datetime.datetime( 1970, 1, 1, 0, 0, 0 ) ) req.Error = ""
def test06Dirty( self ): """ dirty records """ db = RequestDB() r = Request() r.RequestName = "dirty" op1 = Operation( { "Type": "ReplicateAndRegister", "TargetSE": "CERN-USER"} ) op1 += File( {"LFN": "/a/b/c/1", "Status": "Scheduled", "Checksum": "123456", "ChecksumType": "ADLER32" } ) op2 = Operation( { "Type": "ReplicateAndRegister", "TargetSE": "CERN-USER"} ) op2 += File( {"LFN": "/a/b/c/2", "Status": "Scheduled", "Checksum": "123456", "ChecksumType": "ADLER32" } ) op3 = Operation( { "Type": "ReplicateAndRegister", "TargetSE": "CERN-USER"} ) op3 += File( {"LFN": "/a/b/c/3", "Status": "Scheduled", "Checksum": "123456", "ChecksumType": "ADLER32" } ) r += op1 r += op2 r += op3 put = db.putRequest( r ) self.assertEqual( put["OK"], True, "1. putRequest failed: %s" % put.get( "Message", "" ) ) reqID = put['Value'] r = db.getRequest( reqID ) self.assertEqual( r["OK"], True, "1. getRequest failed: %s" % r.get( "Message", "" ) ) r = r["Value"] del r[0] self.assertEqual( len( r ), 2, "1. len wrong" ) put = db.putRequest( r ) self.assertEqual( put["OK"], True, "2. putRequest failed: %s" % put.get( "Message", "" ) ) reqID = put['Value'] r = db.getRequest( reqID ) self.assertEqual( r["OK"], True, "2. getRequest failed: %s" % r.get( "Message", "" ) ) r = r["Value"] self.assertEqual( len( r ), 2, "2. len wrong" ) op4 = Operation( { "Type": "ReplicateAndRegister", "TargetSE": "CERN-USER"} ) op4 += File( {"LFN": "/a/b/c/4", "Status": "Scheduled", "Checksum": "123456", "ChecksumType": "ADLER32" } ) r[0] = op4 put = db.putRequest( r ) self.assertEqual( put["OK"], True, "3. putRequest failed: %s" % put.get( "Message", "" ) ) reqID = put['Value'] r = db.getRequest( reqID ) self.assertEqual( r["OK"], True, "3. getRequest failed: %s" % r.get( "Message", "" ) ) r = r["Value"] self.assertEqual( len( r ), 2, "3. len wrong" ) delete = db.deleteRequest( reqID ) self.assertEqual( delete["OK"], True, delete['Message'] if 'Message' in delete else 'OK' )
def prepareTransformationTasks( self, transBody, taskDict, owner = '', ownerGroup = '', ownerDN = '' ): """ Prepare tasks, given a taskDict, that is created (with some manipulation) by the DB """ if ( not owner ) or ( not ownerGroup ): res = getProxyInfo( False, False ) if not res['OK']: return res proxyInfo = res['Value'] owner = proxyInfo['username'] ownerGroup = proxyInfo['group'] if not ownerDN: res = getDNForUsername( owner ) if not res['OK']: return res ownerDN = res['Value'][0] requestOperation = 'ReplicateAndRegister' if transBody: try: _requestType, requestOperation = transBody.split( ';' ) except AttributeError: pass for taskID in sorted( taskDict ): paramDict = taskDict[taskID] if paramDict['InputData']: transID = paramDict['TransformationID'] oRequest = Request() transfer = Operation() transfer.Type = requestOperation transfer.TargetSE = paramDict['TargetSE'] if isinstance( paramDict['InputData'], list ): files = paramDict['InputData'] elif isinstance( paramDict['InputData'], basestring ): files = paramDict['InputData'].split( ';' ) for lfn in files: trFile = File() trFile.LFN = lfn transfer.addFile( trFile ) oRequest.addOperation( transfer ) oRequest.RequestName = _requestName( transID, taskID ) oRequest.OwnerDN = ownerDN oRequest.OwnerGroup = ownerGroup isValid = self.requestValidator.validate( oRequest ) if not isValid['OK']: return isValid taskDict[taskID]['TaskObject'] = oRequest return S_OK( taskDict )
def _sendToFailover( rpcStub ): """ Create a ForwardDISET operation for failover """ request = Request() request.RequestName = "Accounting.DataStore.%s.%s" % ( time.time(), random.random() ) forwardDISETOp = Operation() forwardDISETOp.Type = "ForwardDISET" forwardDISETOp.Arguments = DEncode.encode( rpcStub ) request.addOperation( forwardDISETOp ) return ReqClient().putRequest( request )
def test03sql( self ): """ sql insert or update """ operation = Operation() operation.Type = "ReplicateAndRegister" request = Request() request.RequestName = "testRequest" request.RequestID = 1 # # no parent request set try: operation.toSQL() except Exception, error: self.assertEqual( isinstance( error, AttributeError ), True, "wrong exc raised" ) self.assertEqual( str( error ), "RequestID not set", "wrong exc reason" )
def _sendToFailover( rpcStub ): """ Create a ForwardDISET operation for failover """ try: request = Request() request.RequestName = "Accounting.DataStore.%s.%s" % ( time.time(), random.random() ) forwardDISETOp = Operation() forwardDISETOp.Type = "ForwardDISET" forwardDISETOp.Arguments = DEncode.encode( rpcStub ) request.addOperation( forwardDISETOp ) return ReqClient().putRequest( request ) # We catch all the exceptions, because it should never crash except Exception as e: # pylint: disable=broad-except return S_ERROR( ERMSUKN, "Exception sending accounting failover request: %s" % repr( e ) )
def __setRemovalRequest( self, lfn, ownerDN, ownerGroup ): """ Set removal request with the given credentials """ oRequest = Request() oRequest.OwnerDN = ownerDN oRequest.OwnerGroup = ownerGroup oRequest.RequestName = os.path.basename( lfn ).strip() + '_removal_request.xml' oRequest.SourceComponent = 'JobCleaningAgent' removeFile = Operation() removeFile.Type = 'RemoveFile' removedFile = File() removedFile.LFN = lfn removeFile.addFile( removedFile ) oRequest.addOperation( removeFile ) return ReqClient().putRequest( oRequest )
def prepareTransformationTasks( self, transBody, taskDict, owner = '', ownerGroup = '' ): """ Prepare tasks, given a taskDict, that is created (with some manipulation) by the DB """ requestOperation = 'ReplicateAndRegister' if transBody: try: _requestType, requestOperation = transBody.split( ';' ) except AttributeError: pass for taskID in sorted( taskDict ): paramDict = taskDict[taskID] if paramDict['InputData']: transID = paramDict['TransformationID'] oRequest = Request() transfer = Operation() transfer.Type = requestOperation transfer.TargetSE = paramDict['TargetSE'] if type( paramDict['InputData'] ) == type( [] ): files = paramDict['InputData'] elif type( paramDict['InputData'] ) == type( '' ): files = paramDict['InputData'].split( ';' ) for lfn in files: trFile = File() trFile.LFN = lfn transfer.addFile( trFile ) oRequest.addOperation( transfer ) oRequest.RequestName = str( transID ).zfill( 8 ) + '_' + str( taskID ).zfill( 8 ) oRequest.OwnerDN = owner oRequest.OwnerGroup = ownerGroup isValid = gRequestValidator.validate( oRequest ) if not isValid['OK']: return isValid taskDict[taskID]['TaskObject'] = oRequest return S_OK( taskDict )
def createRequest(self, requestName, archiveLFN, lfnChunk): """Create the Request.""" request = Request() request.RequestName = requestName self._checkReplicaSites(request, lfnChunk) archiveFiles = Operation() archiveFiles.Type = "ArchiveFiles" archiveFiles.Arguments = DEncode.encode({ "SourceSE": self.sourceSEs[0], "TarballSE": self.switches["TarballSE"], "RegisterDescendent": self.switches["RegisterDescendent"], "ArchiveLFN": archiveLFN, }) self.addLFNs(archiveFiles, lfnChunk) request.addOperation(archiveFiles) # Replicate the Tarball, ArchiveFiles will upload it if self.switches.get("ReplicateTarball"): replicateAndRegisterTarBall = Operation() replicateAndRegisterTarBall.Type = "ReplicateAndRegister" replicateAndRegisterTarBall.TargetSE = self.targetSE opFile = File() opFile.LFN = archiveLFN replicateAndRegisterTarBall.addFile(opFile) request.addOperation(replicateAndRegisterTarBall) if self.switches.get("CheckMigration"): checkMigrationTarBall = Operation() checkMigrationTarBall.Type = "CheckMigration" migrationTarget = self.targetSE if self.switches.get( "ReplicateTarball") else self.switches["TarballSE"] checkMigrationTarBall.TargetSE = migrationTarget opFile = File() opFile.LFN = archiveLFN checkMigrationTarBall.addFile(opFile) request.addOperation(checkMigrationTarBall) # Register Archive Replica for LFNs if self.switches.get("ArchiveSE"): registerArchived = Operation() registerArchived.Type = "RegisterReplica" registerArchived.TargetSE = self.switches.get("ArchiveSE") self.addLFNs(registerArchived, lfnChunk, addPFN=True) request.addOperation(registerArchived) # Remove all Other Replicas for LFNs if self.switches.get("RemoveReplicas"): removeArchiveReplicas = Operation() removeArchiveReplicas.Type = "RemoveReplica" removeArchiveReplicas.TargetSE = ",".join(self.replicaSEs) self.addLFNs(removeArchiveReplicas, lfnChunk) request.addOperation(removeArchiveReplicas) # Remove all Replicas for LFNs if self.switches.get("RemoveFiles"): removeArchiveFiles = Operation() removeArchiveFiles.Type = "RemoveFile" self.addLFNs(removeArchiveFiles, lfnChunk) request.addOperation(removeArchiveFiles) # Remove Original tarball replica if self.switches.get("ReplicateTarball"): removeTarballOrg = Operation() removeTarballOrg.Type = "RemoveReplica" removeTarballOrg.TargetSE = self.sourceSEs[0] opFile = File() opFile.LFN = archiveLFN removeTarballOrg.addFile(opFile) request.addOperation(removeTarballOrg) return request
from DIRAC.RequestManagementSystem.private.RequestValidator import RequestValidator from DIRAC.Resources.Catalog.FileCatalog import FileCatalog reqClient = ReqClient() fc = FileCatalog() requestOperation = 'RemoveReplica' if targetSE == 'All': requestOperation = 'RemoveFile' for lfnList in breakListIntoChunks(lfns, 100): oRequest = Request() requestName = "%s_%s" % (md5(repr(time.time())).hexdigest()[:16], md5(repr(time.time())).hexdigest()[:16]) oRequest.RequestName = requestName oOperation = Operation() oOperation.Type = requestOperation oOperation.TargetSE = targetSE res = fc.getFileMetadata(lfnList) if not res['OK']: print("Can't get file metadata: %s" % res['Message']) DIRAC.exit(1) if res['Value']['Failed']: print( "Could not get the file metadata of the following, so skipping them:" ) for fFile in res['Value']['Failed']: print(fFile)
def test01fullChain(self): put = self.requestClient.putRequest(self.request) self.assertTrue(put['OK']) self.assertEqual(type(put['Value']), long) reqID = put['Value'] # # summary ret = RequestDB().getDBSummary() self.assertEqual(ret, {'OK': True, 'Value': {'Operation': {'ReplicateAndRegister': {'Waiting': 1}}, 'Request': {'Waiting': 1}, 'File': {'Waiting': 2}}}) get = self.requestClient.getRequest(reqID) self.assertTrue(get['OK']) self.assertEqual(isinstance(get['Value'], Request), True) # # summary - the request became "Assigned" res = RequestDB().getDBSummary() self.assertEqual(res, {'OK': True, 'Value': {'Operation': {'ReplicateAndRegister': {'Waiting': 1}}, 'Request': {'Assigned': 1}, 'File': {'Waiting': 2}}}) res = self.requestClient.getRequestInfo(reqID) self.assertEqual(res['OK'], True, res['Message'] if 'Message' in res else 'OK') res = self.requestClient.getRequestFileStatus(reqID, self.file.LFN) self.assertEqual(res['OK'], True, res['Message'] if 'Message' in res else 'OK') res = self.requestClient.getRequestFileStatus(reqID, [self.file.LFN]) self.assertEqual(res['OK'], True, res['Message'] if 'Message' in res else 'OK') res = self.requestClient.getDigest(reqID) self.assertEqual(res['OK'], True, res['Message'] if 'Message' in res else 'OK') res = self.requestClient.readRequestsForJobs([123]) self.assertEqual(res['OK'], True, res['Message'] if 'Message' in res else 'OK') self.assertTrue(isinstance(res['Value']['Successful'][123], Request)) # Adding new request request2 = Request() request2.RequestName = "RequestManagerHandlerTests-2" request2.OwnerDN = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=cibak/CN=605919/CN=Krzysztof Ciba" request2.OwnerGroup = "dirac_user" request2.JobID = 456 request2.addOperation(self.operation) # # update res = self.requestClient.putRequest(request2) self.assertEqual(res['OK'], True, res['Message'] if 'Message' in res else 'OK') reqID2 = res['Value'] # # get summary again ret = RequestDB().getDBSummary() self.assertEqual(ret, {'OK': True, 'Value': {'Operation': {'ReplicateAndRegister': {'Waiting': 2}}, 'Request': {'Waiting': 1, 'Assigned': 1}, 'File': {'Waiting': 4}}}) delete = self.requestClient.deleteRequest(reqID) self.assertEqual(delete['OK'], True, delete['Message'] if 'Message' in delete else 'OK') delete = self.requestClient.deleteRequest(reqID2) self.assertEqual(delete['OK'], True, delete['Message'] if 'Message' in delete else 'OK') # # should be empty now ret = RequestDB().getDBSummary() self.assertEqual(ret, {'OK': True, 'Value': {'Operation': {}, 'Request': {}, 'File': {}}})
def execute(self): """The JobAgent execution method. """ # Temporary mechanism to pass a shutdown message to the agent if os.path.exists('/var/lib/dirac_drain'): return self.__finish('Node is being drained by an operator') # Check if we can match jobs at all self.log.verbose('Job Agent execution loop') result = self.computingElement.available() if not result['OK']: self.log.info('Resource is not available', result['Message']) return self.__finish('CE Not Available') ceInfoDict = result['CEInfoDict'] runningJobs = ceInfoDict.get("RunningJobs") availableSlots = result['Value'] if not availableSlots: if runningJobs: self.log.info('No available slots', ': %d running jobs' % runningJobs) return S_OK('Job Agent cycle complete with %d running jobs' % runningJobs) self.log.info( 'CE is not available (and there are no running jobs)') return self.__finish('CE Not Available') if self.jobCount: # Only call timeLeft utility after a job has been picked up self.log.info('Attempting to check CPU time left for filling mode') if self.fillingMode: self.timeLeft = self.computeCPUWorkLeft() self.log.info('normalized CPU units remaining in slot', self.timeLeft) if self.timeLeft <= self.minimumTimeLeft: return self.__finish('No more time left') # Need to update the Configuration so that the new value is published in the next matching request result = self.computingElement.setCPUTimeLeft( cpuTimeLeft=self.timeLeft) if not result['OK']: return self.__finish(result['Message']) # Update local configuration to be used by submitted job wrappers localCfg = CFG() if self.extraOptions: localConfigFile = os.path.join('.', self.extraOptions) else: localConfigFile = os.path.join(rootPath, "etc", "dirac.cfg") localCfg.loadFromFile(localConfigFile) if not localCfg.isSection('/LocalSite'): localCfg.createNewSection('/LocalSite') localCfg.setOption('/LocalSite/CPUTimeLeft', self.timeLeft) localCfg.writeToFile(localConfigFile) else: return self.__finish('Filling Mode is Disabled') # if we are here we assume that a job can be matched result = self.computingElement.getDescription() if not result['OK']: return result # We can have several prioritized job retrieval strategies if isinstance(result['Value'], dict): ceDictList = [result['Value']] elif isinstance(result['Value'], list): # This is the case for Pool ComputingElement, and parameter 'MultiProcessorStrategy' ceDictList = result['Value'] for ceDict in ceDictList: # Add pilot information gridCE = gConfig.getValue('LocalSite/GridCE', 'Unknown') if gridCE != 'Unknown': ceDict['GridCE'] = gridCE if 'PilotReference' not in ceDict: ceDict['PilotReference'] = str(self.pilotReference) ceDict['PilotBenchmark'] = self.cpuFactor ceDict['PilotInfoReportedFlag'] = self.pilotInfoReportedFlag # Add possible job requirements result = gConfig.getOptionsDict('/AgentJobRequirements') if result['OK']: requirementsDict = result['Value'] ceDict.update(requirementsDict) self.log.info('Requirements:', requirementsDict) self.log.verbose('CE dict', ceDict) # here finally calling the matcher start = time.time() jobRequest = MatcherClient().requestJob(ceDict) matchTime = time.time() - start self.log.info('MatcherTime', '= %.2f (s)' % (matchTime)) if jobRequest['OK']: break self.stopAfterFailedMatches = self.am_getOption( 'StopAfterFailedMatches', self.stopAfterFailedMatches) if not jobRequest['OK']: # if we don't match a job, independently from the reason, # we wait a bit longer before trying again self.am_setOption("PollingTime", int(self.am_getOption("PollingTime") * 1.5)) if re.search('No match found', jobRequest['Message']): self.log.notice('Job request OK, but no match found', ': %s' % (jobRequest['Message'])) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) elif jobRequest['Message'].find("seconds timeout") != -1: self.log.error('Timeout while requesting job', jobRequest['Message']) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) elif jobRequest['Message'].find( "Pilot version does not match") != -1: errorMsg = 'Pilot version does not match the production version' self.log.error(errorMsg, jobRequest['Message'].replace(errorMsg, '')) return S_ERROR(jobRequest['Message']) else: self.log.notice('Failed to get jobs', ': %s' % (jobRequest['Message'])) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) # Reset the Counter self.matchFailedCount = 0 # If we are here it is because we matched a job matcherInfo = jobRequest['Value'] if not self.pilotInfoReportedFlag: # Check the flag after the first access to the Matcher self.pilotInfoReportedFlag = matcherInfo.get( 'PilotInfoReportedFlag', False) jobID = matcherInfo['JobID'] jobReport = JobReport(jobID, 'JobAgent@%s' % self.siteName) matcherParams = ['JDL', 'DN', 'Group'] for param in matcherParams: if param not in matcherInfo: jobReport.setJobStatus(status='Failed', minor='Matcher did not return %s' % (param)) return self.__finish('Matcher Failed') elif not matcherInfo[param]: jobReport.setJobStatus(status='Failed', minor='Matcher returned null %s' % (param)) return self.__finish('Matcher Failed') else: self.log.verbose('Matcher returned', '%s = %s ' % (param, matcherInfo[param])) jobJDL = matcherInfo['JDL'] jobGroup = matcherInfo['Group'] ownerDN = matcherInfo['DN'] optimizerParams = {} for key in matcherInfo: if key not in matcherParams: optimizerParams[key] = matcherInfo[key] parameters = self._getJDLParameters(jobJDL) if not parameters['OK']: jobReport.setJobStatus(status='Failed', minor='Could Not Extract JDL Parameters') self.log.warn('Could Not Extract JDL Parameters', parameters['Message']) return self.__finish('JDL Problem') params = parameters['Value'] if 'JobID' not in params: msg = 'Job has not JobID defined in JDL parameters' jobReport.setJobStatus(status='Failed', minor=msg) self.log.warn(msg) return self.__finish('JDL Problem') else: jobID = params['JobID'] if 'JobType' not in params: self.log.warn('Job has no JobType defined in JDL parameters') jobType = 'Unknown' else: jobType = params['JobType'] if 'CPUTime' not in params: self.log.warn( 'Job has no CPU requirement defined in JDL parameters') # Job requirements for determining the number of processors # the minimum number of processors requested processors = int( params.get('NumberOfProcessors', int(params.get('MinNumberOfProcessors', 1)))) # the maximum number of processors allowed to the payload maxNumberOfProcessors = int(params.get('MaxNumberOfProcessors', 0)) # need or not the whole node for the job wholeNode = 'WholeNode' in params mpTag = 'MultiProcessor' in params.get('Tags', []) if self.extraOptions and 'dirac-jobexec' in params.get( 'Executable', '').strip(): params['Arguments'] = (params.get('Arguments', '') + ' ' + self.extraOptions).strip() params['ExtraOptions'] = self.extraOptions self.log.verbose('Job request successful: \n', jobRequest['Value']) self.log.info( 'Received', 'JobID=%s, JobType=%s, OwnerDN=%s, JobGroup=%s' % (jobID, jobType, ownerDN, jobGroup)) self.jobCount += 1 try: jobReport.setJobParameter(par_name='MatcherServiceTime', par_value=str(matchTime), sendFlag=False) if 'BOINC_JOB_ID' in os.environ: # Report BOINC environment for thisp in ('BoincUserID', 'BoincHostID', 'BoincHostPlatform', 'BoincHostName'): jobReport.setJobParameter(par_name=thisp, par_value=gConfig.getValue( '/LocalSite/%s' % thisp, 'Unknown'), sendFlag=False) jobReport.setJobStatus(status='Matched', minor='Job Received by Agent', sendFlag=False) result_setupProxy = self._setupProxy(ownerDN, jobGroup) if not result_setupProxy['OK']: return self._rescheduleFailedJob(jobID, result_setupProxy['Message'], self.stopOnApplicationFailure) proxyChain = result_setupProxy.get('Value') # Save the job jdl for external monitoring self.__saveJobJDLRequest(jobID, jobJDL) software = self._checkInstallSoftware(jobID, params, ceDict, jobReport) if not software['OK']: self.log.error('Failed to install software for job', '%s' % (jobID)) errorMsg = software['Message'] if not errorMsg: errorMsg = 'Failed software installation' return self._rescheduleFailedJob(jobID, errorMsg, self.stopOnApplicationFailure) self.log.debug('Before self._submitJob() (%sCE)' % (self.ceName)) result_submitJob = self._submitJob( jobID=jobID, jobParams=params, resourceParams=ceDict, optimizerParams=optimizerParams, proxyChain=proxyChain, jobReport=jobReport, processors=processors, wholeNode=wholeNode, maxNumberOfProcessors=maxNumberOfProcessors, mpTag=mpTag) # Committing the JobReport before evaluating the result of job submission res = jobReport.commit() if not res['OK']: resFD = jobReport.generateForwardDISET() if not resFD['OK']: self.log.error("Error generating ForwardDISET operation", resFD['Message']) else: # Here we create the Request. op = resFD['Value'] request = Request() requestName = 'jobAgent_%s' % jobID request.RequestName = requestName.replace('"', '') request.JobID = jobID request.SourceComponent = "JobAgent_%s" % jobID request.addOperation(op) # This might fail, but only a message would be printed. self._sendFailoverRequest(request) if not result_submitJob['OK']: return self.__finish(result_submitJob['Message']) elif 'PayloadFailed' in result_submitJob: # Do not keep running and do not overwrite the Payload error message = 'Payload execution failed with error code %s' % result_submitJob[ 'PayloadFailed'] if self.stopOnApplicationFailure: return self.__finish(message, self.stopOnApplicationFailure) else: self.log.info(message) self.log.debug('After %sCE submitJob()' % (self.ceName)) except Exception as subExcept: # pylint: disable=broad-except self.log.exception("Exception in submission", "", lException=subExcept, lExcInfo=True) return self._rescheduleFailedJob( jobID, 'Job processing failed with exception', self.stopOnApplicationFailure) return S_OK('Job Agent cycle complete')
def main(): from DIRAC.Core.Base.Script import parseCommandLine parseCommandLine() import DIRAC from DIRAC import gLogger args = Script.getPositionalArgs() requestName = None LFN = None PFN = None targetSE = None if len(args) != 4: Script.showHelp() else: requestName = args[0] LFN = args[1] PFN = args[2] targetSE = args[3] if not os.path.isabs(LFN): gLogger.error("LFN should be absolute path!!!") DIRAC.exit(-1) gLogger.info("will create request '%s' with 'PutAndRegister' " "operation using %s pfn and %s target SE" % (requestName, PFN, targetSE)) from DIRAC.RequestManagementSystem.Client.Request import Request from DIRAC.RequestManagementSystem.Client.Operation import Operation from DIRAC.RequestManagementSystem.Client.File import File from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient from DIRAC.Core.Utilities.Adler import fileAdler if not os.path.exists(PFN): gLogger.error("%s does not exist" % PFN) DIRAC.exit(-1) if not os.path.isfile(PFN): gLogger.error("%s is not a file" % PFN) DIRAC.exit(-1) PFN = os.path.abspath(PFN) size = os.path.getsize(PFN) adler32 = fileAdler(PFN) request = Request() request.RequestName = requestName putAndRegister = Operation() putAndRegister.Type = "PutAndRegister" putAndRegister.TargetSE = targetSE opFile = File() opFile.LFN = LFN opFile.PFN = PFN opFile.Size = size opFile.Checksum = adler32 opFile.ChecksumType = "ADLER32" putAndRegister.addFile(opFile) request.addOperation(putAndRegister) reqClient = ReqClient() putRequest = reqClient.putRequest(request) if not putRequest["OK"]: gLogger.error("unable to put request '%s': %s" % (requestName, putRequest["Message"])) DIRAC.exit(-1) gLogger.always("Request '%s' has been put to ReqDB for execution." % requestName) gLogger.always( "You can monitor its status using command: 'dirac-rms-request %s'" % requestName) DIRAC.exit(0)
def prepareTransformationTasks( self, transBody, taskDict, owner = '', ownerGroup = '', ownerDN = '' ): """ Prepare tasks, given a taskDict, that is created (with some manipulation) by the DB """ if not taskDict: return S_OK({}) if ( not owner ) or ( not ownerGroup ): res = getProxyInfo( False, False ) if not res['OK']: return res proxyInfo = res['Value'] owner = proxyInfo['username'] ownerGroup = proxyInfo['group'] if not ownerDN: res = getDNForUsername( owner ) if not res['OK']: return res ownerDN = res['Value'][0] requestOperation = 'ReplicateAndRegister' if transBody: try: _requestType, requestOperation = transBody.split( ';' ) except AttributeError: pass # Do not remove sorted, we might pop elements in the loop for taskID in sorted( taskDict ): paramDict = taskDict[taskID] transID = paramDict['TransformationID'] oRequest = Request() transfer = Operation() transfer.Type = requestOperation transfer.TargetSE = paramDict['TargetSE'] # If there are input files if paramDict['InputData']: if isinstance( paramDict['InputData'], list ): files = paramDict['InputData'] elif isinstance( paramDict['InputData'], basestring ): files = paramDict['InputData'].split( ';' ) for lfn in files: trFile = File() trFile.LFN = lfn transfer.addFile( trFile ) oRequest.addOperation( transfer ) oRequest.RequestName = _requestName( transID, taskID ) oRequest.OwnerDN = ownerDN oRequest.OwnerGroup = ownerGroup isValid = self.requestValidator.validate( oRequest ) if not isValid['OK']: self.log.error( "Error creating request for task", "%s %s" % ( taskID, isValid ) ) # This works because we loop over a copy of the keys ! taskDict.pop( taskID ) continue taskDict[taskID]['TaskObject'] = oRequest return S_OK( taskDict )
from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient from DIRAC.RequestManagementSystem.private.RequestValidator import gRequestValidator from DIRAC.Resources.Catalog.FileCatalog import FileCatalog reqClient = ReqClient() fc = FileCatalog() requestOperation = 'RemoveReplica' if targetSE == 'All': requestOperation = 'RemoveFile' for lfnList in breakListIntoChunks( lfns, 100 ): oRequest = Request() requestName = "%s_%s" % ( md5( repr( time.time() ) ).hexdigest()[:16], md5( repr( time.time() ) ).hexdigest()[:16] ) oRequest.RequestName = requestName oOperation = Operation() oOperation.Type = requestOperation oOperation.TargetSE = targetSE res = fc.getFileMetadata( lfnList ) if not res['OK']: print "Can't get file metadata: %s" % res['Message'] DIRAC.exit( 1 ) if res['Value']['Failed']: print "Could not get the file metadata of the following, so skipping them:" for fFile in res['Value']['Failed']: print fFile lfnMetadata = res['Value']['Successful']
def test05FTS( self ): """ FTS state machine """ req = Request() req.RequestName = "FTSTest" ftsTransfer = Operation() ftsTransfer.Type = "ReplicateAndRegister" ftsTransfer.TargetSE = "CERN-USER" ftsFile = File() ftsFile.LFN = "/a/b/c" ftsFile.Checksum = "123456" ftsFile.ChecksumType = "Adler32" ftsTransfer.addFile( ftsFile ) req.addOperation( ftsTransfer ) self.assertEqual( req.Status, "Waiting", "1. wrong request status: %s" % req.Status ) self.assertEqual( ftsTransfer.Status, "Waiting", "1. wrong ftsStatus status: %s" % ftsTransfer.Status ) # # scheduled ftsFile.Status = "Scheduled" self.assertEqual( ftsTransfer.Status, "Scheduled", "2. wrong status for ftsTransfer: %s" % ftsTransfer.Status ) self.assertEqual( req.Status, "Scheduled", "2. wrong status for request: %s" % req.Status ) # # add new operation before FTS insertBefore = Operation() insertBefore.Type = "RegisterReplica" insertBefore.TargetSE = "CERN-USER" insertFile = File() insertFile.LFN = "/a/b/c" insertFile.PFN = "http://foo/bar" insertBefore.addFile( insertFile ) req.insertBefore( insertBefore, ftsTransfer ) self.assertEqual( insertBefore.Status, "Waiting", "3. wrong status for insertBefore: %s" % insertBefore.Status ) self.assertEqual( ftsTransfer.Status, "Scheduled", "3. wrong status for ftsStatus: %s" % ftsTransfer.Status ) self.assertEqual( req.Status, "Waiting", "3. wrong status for request: %s" % req.Status ) # # prev done insertFile.Status = "Done" self.assertEqual( insertBefore.Status, "Done", "4. wrong status for insertBefore: %s" % insertBefore.Status ) self.assertEqual( ftsTransfer.Status, "Scheduled", "4. wrong status for ftsStatus: %s" % ftsTransfer.Status ) self.assertEqual( req.Status, "Scheduled", "4. wrong status for request: %s" % req.Status ) # # reschedule ftsFile.Status = "Waiting" self.assertEqual( insertBefore.Status, "Done", "5. wrong status for insertBefore: %s" % insertBefore.Status ) self.assertEqual( ftsTransfer.Status, "Waiting", "5. wrong status for ftsStatus: %s" % ftsTransfer.Status ) self.assertEqual( req.Status, "Waiting", "5. wrong status for request: %s" % req.Status ) # # fts done ftsFile.Status = "Done" self.assertEqual( insertBefore.Status, "Done", "5. wrong status for insertBefore: %s" % insertBefore.Status ) self.assertEqual( ftsTransfer.Status, "Done", "5. wrong status for ftsStatus: %s" % ftsTransfer.Status ) self.assertEqual( req.Status, "Done", "5. wrong status for request: %s" % req.Status )
def main(): catalog = None Script.registerSwitch("C:", "Catalog=", "Catalog to use") # Registering arguments will automatically add their description to the help menu Script.registerArgument(" requestName: a request name") Script.registerArgument(" LFNs: single LFN or file with LFNs") Script.registerArgument(["targetSE: target SE"]) Script.parseCommandLine() for switch in Script.getUnprocessedSwitches(): if switch[0] == "C" or switch[0].lower() == "catalog": catalog = switch[1] args = Script.getPositionalArgs() requestName = None targetSEs = None if len(args) < 3: Script.showHelp(exitCode=1) requestName = args[0] lfnList = getLFNList(args[1]) targetSEs = list( set([se for targetSE in args[2:] for se in targetSE.split(",")])) gLogger.info("Will create request '%s' with 'ReplicateAndRegister' " "operation using %s lfns and %s target SEs" % (requestName, len(lfnList), len(targetSEs))) from DIRAC.RequestManagementSystem.Client.Request import Request from DIRAC.RequestManagementSystem.Client.Operation import Operation from DIRAC.RequestManagementSystem.Client.File import File from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient from DIRAC.Resources.Catalog.FileCatalog import FileCatalog from DIRAC.Core.Utilities.List import breakListIntoChunks lfnChunks = breakListIntoChunks(lfnList, 100) multiRequests = len(lfnChunks) > 1 error = 0 count = 0 reqClient = ReqClient() fc = FileCatalog() requestIDs = [] for lfnChunk in lfnChunks: metaDatas = fc.getFileMetadata(lfnChunk) if not metaDatas["OK"]: gLogger.error("unable to read metadata for lfns: %s" % metaDatas["Message"]) error = -1 continue metaDatas = metaDatas["Value"] for failedLFN, reason in metaDatas["Failed"].items(): gLogger.error("skipping %s: %s" % (failedLFN, reason)) lfnChunk = set(metaDatas["Successful"]) if not lfnChunk: gLogger.error("LFN list is empty!!!") error = -1 continue if len(lfnChunk) > Operation.MAX_FILES: gLogger.error( "too many LFNs, max number of files per operation is %s" % Operation.MAX_FILES) error = -1 continue count += 1 request = Request() request.RequestName = requestName if not multiRequests else "%s_%d" % ( requestName, count) replicateAndRegister = Operation() replicateAndRegister.Type = "ReplicateAndRegister" replicateAndRegister.TargetSE = ",".join(targetSEs) if catalog is not None: replicateAndRegister.Catalog = catalog for lfn in lfnChunk: metaDict = metaDatas["Successful"][lfn] opFile = File() opFile.LFN = lfn opFile.Size = metaDict["Size"] if "Checksum" in metaDict: # # should check checksum type, now assuming Adler32 (metaDict["ChecksumType"] = 'AD' opFile.Checksum = metaDict["Checksum"] opFile.ChecksumType = "ADLER32" replicateAndRegister.addFile(opFile) request.addOperation(replicateAndRegister) putRequest = reqClient.putRequest(request) if not putRequest["OK"]: gLogger.error("unable to put request '%s': %s" % (request.RequestName, putRequest["Message"])) error = -1 continue requestIDs.append(str(putRequest["Value"])) if not multiRequests: gLogger.always( "Request '%s' has been put to ReqDB for execution." % request.RequestName) if multiRequests: gLogger.always( "%d requests have been put to ReqDB for execution, with name %s_<num>" % (count, requestName)) if requestIDs: gLogger.always("RequestID(s): %s" % " ".join(requestIDs)) gLogger.always( "You can monitor requests' status using command: 'dirac-rms-request <requestName/ID>'" ) DIRAC.exit(error)
ar_mock = MagicMock() ar_mock.commit.return_value = {'OK': True, 'Value': ''} jr_mock = MagicMock() jr_mock.setApplicationStatus.return_value = {'OK': True, 'Value': ''} jr_mock.generateForwardDISET.return_value = {'OK': True, 'Value': Operation()} jr_mock.setJobParameter.return_value = {'OK': True, 'Value': 'pippo'} fr_mock = MagicMock() fr_mock.getFiles.return_value = {} fr_mock.setFileStatus.return_value = {'OK': True, 'Value': ''} fr_mock.commit.return_value = {'OK': True, 'Value': ''} fr_mock.generateForwardDISET.return_value = {'OK': True, 'Value': Operation()} rc_mock = Request() rc_mock.RequestName = 'aRequestName' rc_mock.OwnerDN = 'pippo' rc_mock.OwnerGroup = 'pippoGRP' rOp = Operation() rOp.Type = 'PutAndRegister' rOp.TargetSE = 'anSE' f = File() f.LFN = '/foo/bar.py' f.PFN = '/foo/bar.py' rOp.addFile(f) rc_mock.addOperation(rOp) wf_commons = [{'PRODUCTION_ID': prod_id, 'JOB_ID': prod_job_id, 'eventType': '123456789', 'jobType': 'merge', 'configName': 'aConfigName', 'configVersion': 'aConfigVersion', 'outputDataFileMask': '', 'BookkeepingLFNs': 'aa', 'ProductionOutputData': 'ProductionOutputData', 'numberOfEvents': '100',
def prepareTransformationTasks(self, transBody, taskDict, owner='', ownerGroup='', ownerDN=''): """ Prepare tasks, given a taskDict, that is created (with some manipulation) by the DB """ if (not owner) or (not ownerGroup): res = getProxyInfo(False, False) if not res['OK']: return res proxyInfo = res['Value'] owner = proxyInfo['username'] ownerGroup = proxyInfo['group'] if not ownerDN: res = getDNForUsername(owner) if not res['OK']: return res ownerDN = res['Value'][0] requestOperation = 'ReplicateAndRegister' if transBody: try: _requestType, requestOperation = transBody.split(';') except AttributeError: pass for taskID in sorted(taskDict): paramDict = taskDict[taskID] if paramDict['InputData']: transID = paramDict['TransformationID'] oRequest = Request() transfer = Operation() transfer.Type = requestOperation transfer.TargetSE = paramDict['TargetSE'] if type(paramDict['InputData']) == type([]): files = paramDict['InputData'] elif type(paramDict['InputData']) == type(''): files = paramDict['InputData'].split(';') for lfn in files: trFile = File() trFile.LFN = lfn transfer.addFile(trFile) oRequest.addOperation(transfer) oRequest.RequestName = _requestName(transID, taskID) oRequest.OwnerDN = ownerDN oRequest.OwnerGroup = ownerGroup isValid = RequestValidator().validate(oRequest) if not isValid['OK']: return isValid taskDict[taskID]['TaskObject'] = oRequest return S_OK(taskDict)
def main(): # Registering arguments will automatically add their description to the help menu Script.registerArgument(" sourceSE: source SE") Script.registerArgument( " LFN: LFN or file containing a List of LFNs") Script.registerArgument(["targetSE: target SEs"]) Script.parseCommandLine() import DIRAC from DIRAC import gLogger # parseCommandLine show help when mandatory arguments are not specified or incorrect argument args = Script.getPositionalArgs() sourceSE = args[0] lfnList = getLFNList(args[1]) targetSEs = list( set([se for targetSE in args[2:] for se in targetSE.split(",")])) gLogger.info("Will create request with 'MoveReplica' " "operation using %s lfns and %s target SEs" % (len(lfnList), len(targetSEs))) from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient from DIRAC.RequestManagementSystem.Client.Request import Request from DIRAC.RequestManagementSystem.Client.Operation import Operation from DIRAC.RequestManagementSystem.Client.File import File from DIRAC.Resources.Catalog.FileCatalog import FileCatalog from DIRAC.Core.Utilities.List import breakListIntoChunks lfnChunks = breakListIntoChunks(lfnList, 100) multiRequests = len(lfnChunks) > 1 error = 0 count = 0 reqClient = ReqClient() fc = FileCatalog() for lfnChunk in lfnChunks: metaDatas = fc.getFileMetadata(lfnChunk) if not metaDatas["OK"]: gLogger.error("unable to read metadata for lfns: %s" % metaDatas["Message"]) error = -1 continue metaDatas = metaDatas["Value"] for failedLFN, reason in metaDatas["Failed"].items(): gLogger.error("skipping %s: %s" % (failedLFN, reason)) lfnChunk = set(metaDatas["Successful"]) if not lfnChunk: gLogger.error("LFN list is empty!!!") error = -1 continue if len(lfnChunk) > Operation.MAX_FILES: gLogger.error( "too many LFNs, max number of files per operation is %s" % Operation.MAX_FILES) error = -1 continue count += 1 request = Request() request.RequestName = "%s_%s" % ( md5(repr(time.time()).encode()).hexdigest()[:16], md5(repr(time.time()).encode()).hexdigest()[:16], ) moveReplica = Operation() moveReplica.Type = "MoveReplica" moveReplica.SourceSE = sourceSE moveReplica.TargetSE = ",".join(targetSEs) for lfn in lfnChunk: metaDict = metaDatas["Successful"][lfn] opFile = File() opFile.LFN = lfn opFile.Size = metaDict["Size"] if "Checksum" in metaDict: # # should check checksum type, now assuming Adler32 (metaDict["ChecksumType"] = 'AD' opFile.Checksum = metaDict["Checksum"] opFile.ChecksumType = "ADLER32" moveReplica.addFile(opFile) request.addOperation(moveReplica) result = reqClient.putRequest(request) if not result["OK"]: gLogger.error("Failed to submit Request: %s" % (result["Message"])) error = -1 continue if not multiRequests: gLogger.always("Request %d submitted successfully" % result["Value"]) if multiRequests: gLogger.always("%d requests have been submitted" % (count)) DIRAC.exit(error)
def test06Dirty(self): """ dirty records """ db = RequestDB() r = Request() r.RequestName = "dirty" op1 = Operation({ "Type": "ReplicateAndRegister", "TargetSE": "CERN-USER" }) op1 += File({ "LFN": "/a/b/c/1", "Status": "Scheduled", "Checksum": "123456", "ChecksumType": "ADLER32" }) op2 = Operation({ "Type": "ReplicateAndRegister", "TargetSE": "CERN-USER" }) op2 += File({ "LFN": "/a/b/c/2", "Status": "Scheduled", "Checksum": "123456", "ChecksumType": "ADLER32" }) op3 = Operation({ "Type": "ReplicateAndRegister", "TargetSE": "CERN-USER" }) op3 += File({ "LFN": "/a/b/c/3", "Status": "Scheduled", "Checksum": "123456", "ChecksumType": "ADLER32" }) r += op1 r += op2 r += op3 put = db.putRequest(r) self.assertEqual(put["OK"], True, "1. putRequest failed: %s" % put.get("Message", "")) reqID = put['Value'] r = db.getRequest(reqID) self.assertEqual(r["OK"], True, "1. getRequest failed: %s" % r.get("Message", "")) r = r["Value"] del r[0] self.assertEqual(len(r), 2, "1. len wrong") put = db.putRequest(r) self.assertEqual(put["OK"], True, "2. putRequest failed: %s" % put.get("Message", "")) reqID = put['Value'] r = db.getRequest(reqID) self.assertEqual(r["OK"], True, "2. getRequest failed: %s" % r.get("Message", "")) r = r["Value"] self.assertEqual(len(r), 2, "2. len wrong") op4 = Operation({ "Type": "ReplicateAndRegister", "TargetSE": "CERN-USER" }) op4 += File({ "LFN": "/a/b/c/4", "Status": "Scheduled", "Checksum": "123456", "ChecksumType": "ADLER32" }) r[0] = op4 put = db.putRequest(r) self.assertEqual(put["OK"], True, "3. putRequest failed: %s" % put.get("Message", "")) reqID = put['Value'] r = db.getRequest(reqID) self.assertEqual(r["OK"], True, "3. getRequest failed: %s" % r.get("Message", "")) r = r["Value"] self.assertEqual(len(r), 2, "3. len wrong") delete = db.deleteRequest(reqID) self.assertEqual(delete["OK"], True, delete['Message'] if 'Message' in delete else 'OK')
def test01fullChain( self ): put = self.requestClient.putRequest( self.request ) self.assert_( put['OK'] ) self.assertEqual( type( put['Value'] ), long ) reqID = put['Value'] # # summary ret = RequestDB().getDBSummary() self.assertEqual( ret, { 'OK': True, 'Value': { 'Operation': { 'ReplicateAndRegister': { 'Waiting': 1L } }, 'Request': { 'Waiting': 1L }, 'File': { 'Waiting': 2L} } } ) get = self.requestClient.getRequest( reqID ) self.assert_( get['OK'] ) self.assertEqual( isinstance( get['Value'], Request ), True ) # # summary - the request became "Assigned" res = RequestDB().getDBSummary() self.assertEqual( res, { 'OK': True, 'Value': { 'Operation': { 'ReplicateAndRegister': { 'Waiting': 1L } }, 'Request': { 'Assigned': 1L }, 'File': { 'Waiting': 2L} } } ) res = self.requestClient.getRequestInfo( reqID ) self.assertEqual( res['OK'], True, res['Message'] if 'Message' in res else 'OK' ) res = self.requestClient.getRequestFileStatus( reqID, self.file.LFN ) self.assertEqual( res['OK'], True, res['Message'] if 'Message' in res else 'OK' ) res = self.requestClient.getRequestFileStatus( reqID, [self.file.LFN] ) self.assertEqual( res['OK'], True, res['Message'] if 'Message' in res else 'OK' ) res = self.requestClient.getDigest( reqID ) self.assertEqual( res['OK'], True, res['Message'] if 'Message' in res else 'OK' ) res = self.requestClient.readRequestsForJobs( [123] ) self.assertEqual( res['OK'], True, res['Message'] if 'Message' in res else 'OK' ) self.assert_( isinstance( res['Value']['Successful'][123], Request ) ) # Adding new request request2 = Request() request2.RequestName = "RequestManagerHandlerTests-2" request2.OwnerDN = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=cibak/CN=605919/CN=Krzysztof Ciba" request2.OwnerGroup = "dirac_user" request2.JobID = 456 request2.addOperation( self.operation ) # # update res = self.requestClient.putRequest( request2 ) self.assertEqual( res['OK'], True, res['Message'] if 'Message' in res else 'OK' ) reqID2 = res['Value'] # # get summary again ret = RequestDB().getDBSummary() self.assertEqual( ret, { 'OK': True, 'Value': { 'Operation': { 'ReplicateAndRegister': {'Waiting': 2L } }, 'Request': { 'Waiting': 1L, 'Assigned': 1L }, 'File': { 'Waiting': 4L} } } )
elif path.find("http://") > -1: gLogger.error("Path %s was not foreseen!" % path) gLogger.error( "Location not known, upload to location yourself, and publish in CS manually" ) return S_ERROR() else: lfnpath = "%s%s" % (path, os.path.basename(appTar)) res = datMan.putAndRegister( lfnpath, appTar, ops.getValue('Software/BaseStorageElement', "CERN-SRM")) if not res['OK']: return res request = Request() requestClient = ReqClient() request.RequestName = 'copy_%s' % os.path.basename(appTar).replace( ".tgz", "").replace(".tar.gz", "") request.SourceComponent = 'ReplicateILCSoft' copies_at = ops.getValue('Software/CopiesAt', []) for copies in copies_at: transfer = Operation() transfer.Type = "ReplicateAndRegister" transfer.TargetSE = copies trFile = File() trFile.LFN = lfnpath trFile.GUID = "" transfer.addFile(trFile) request.addOperation(transfer) res = RequestValidator().validate(request) if not res['OK']: return res
def createRequest(self, requestName, archiveLFN, lfnChunk): """Create the Request.""" request = Request() request.RequestName = requestName self._checkReplicaSites(request, lfnChunk) archiveFiles = Operation() archiveFiles.Type = 'ArchiveFiles' archiveFiles.Arguments = DEncode.encode({ 'SourceSE': self.sourceSEs[0], 'TarballSE': self.switches['TarballSE'], 'RegisterDescendent': self.switches['RegisterDescendent'], 'ArchiveLFN': archiveLFN }) self.addLFNs(archiveFiles, lfnChunk) request.addOperation(archiveFiles) # Replicate the Tarball, ArchiveFiles will upload it if self.switches.get('ReplicateTarball'): replicateAndRegisterTarBall = Operation() replicateAndRegisterTarBall.Type = 'ReplicateAndRegister' replicateAndRegisterTarBall.TargetSE = self.targetSE opFile = File() opFile.LFN = archiveLFN replicateAndRegisterTarBall.addFile(opFile) request.addOperation(replicateAndRegisterTarBall) if self.switches.get('CheckMigration'): checkMigrationTarBall = Operation() checkMigrationTarBall.Type = 'CheckMigration' migrationTarget = self.targetSE if self.switches.get( 'ReplicateTarball') else self.switches['TarballSE'] checkMigrationTarBall.TargetSE = migrationTarget opFile = File() opFile.LFN = archiveLFN checkMigrationTarBall.addFile(opFile) request.addOperation(checkMigrationTarBall) # Register Archive Replica for LFNs if self.switches.get('ArchiveSE'): registerArchived = Operation() registerArchived.Type = 'RegisterReplica' registerArchived.TargetSE = self.switches.get('ArchiveSE') self.addLFNs(registerArchived, lfnChunk, addPFN=True) request.addOperation(registerArchived) # Remove all Other Replicas for LFNs if self.switches.get('RemoveReplicas'): removeArchiveReplicas = Operation() removeArchiveReplicas.Type = 'RemoveReplica' removeArchiveReplicas.TargetSE = ','.join(self.replicaSEs) self.addLFNs(removeArchiveReplicas, lfnChunk) request.addOperation(removeArchiveReplicas) # Remove all Replicas for LFNs if self.switches.get('RemoveFiles'): removeArchiveFiles = Operation() removeArchiveFiles.Type = 'RemoveFile' self.addLFNs(removeArchiveFiles, lfnChunk) request.addOperation(removeArchiveFiles) # Remove Original tarball replica if self.switches.get('ReplicateTarball'): removeTarballOrg = Operation() removeTarballOrg.Type = 'RemoveReplica' removeTarballOrg.TargetSE = self.sourceSEs[0] opFile = File() opFile.LFN = archiveLFN removeTarballOrg.addFile(opFile) request.addOperation(removeTarballOrg) return request
self.assert_( res['OK'] ) res = self.requestClient.getRequestNamesForJobs( [123] ) self.assert_( res['OK'] ) self.assertEqual( res['Value'], {'Successful': {123L:self.request.RequestName}, 'Failed': {}} ) res = self.requestClient.getRequestNamesList() self.assert_( res['OK'] ) res = self.requestClient.readRequestsForJobs( [123] ) self.assert_( res['OK'] ) self.assert_( isinstance( res['Value']['Successful'][123], Request ) ) # Adding new request request2 = Request() request2.RequestName = "RequestManagerHandlerTests-2" request2.OwnerDN = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=cibak/CN=605919/CN=Krzysztof Ciba" request2.OwnerGroup = "dirac_user" request2.JobID = 456 request2.addOperation( self.operation ) # # update res = self.requestClient.putRequest( request2 ) self.assert_( res['OK'] ) # # get summary again ret = RequestDB().getDBSummary() self.assertEqual( ret, { 'OK': True, 'Value': { 'Operation': { 'ReplicateAndRegister': {'Waiting': 2L } }, 'Request': { 'Waiting': 1L, 'Assigned': 1L },
def test01fullChain(self): put = self.requestClient.putRequest(self.request) self.assertTrue(put['OK'], put) self.assertTrue(isinstance(put['Value'], six.integer_types)) reqID = put['Value'] # # summary ret = self.requestClient.getDBSummary() self.assertTrue(ret['OK']) self.assertEqual( ret['Value'], { 'Operation': { 'ReplicateAndRegister': { 'Waiting': 1 } }, 'Request': { 'Waiting': 1 }, 'File': { 'Waiting': 2 } }) get = self.requestClient.getRequest(reqID) self.assertTrue(get['OK']) self.assertEqual(isinstance(get['Value'], Request), True) # # summary - the request became "Assigned" res = self.requestClient.getDBSummary() self.assertTrue(res['OK']) self.assertEqual( res['Value'], { 'Operation': { 'ReplicateAndRegister': { 'Waiting': 1 } }, 'Request': { 'Assigned': 1 }, 'File': { 'Waiting': 2 } }) res = self.requestClient.getRequestInfo(reqID) self.assertEqual(res['OK'], True, res['Message'] if 'Message' in res else 'OK') res = self.requestClient.getRequestFileStatus(reqID, self.file.LFN) self.assertEqual(res['OK'], True, res['Message'] if 'Message' in res else 'OK') res = self.requestClient.getRequestFileStatus(reqID, [self.file.LFN]) self.assertEqual(res['OK'], True, res['Message'] if 'Message' in res else 'OK') res = self.requestClient.getDigest(reqID) self.assertEqual(res['OK'], True, res['Message'] if 'Message' in res else 'OK') res = self.requestClient.readRequestsForJobs([123]) self.assertEqual(res['OK'], True, res['Message'] if 'Message' in res else 'OK') self.assertTrue(isinstance(res['Value']['Successful'][123], Request)) proxyInfo = getProxyInfo()['Value'] # Adding new request request2 = Request() request2.RequestName = "RequestManagerHandlerTests-2" self.request.OwnerDN = proxyInfo['identity'] self.request.OwnerGroup = proxyInfo['group'] request2.JobID = 456 request2.addOperation(self.operation) # # update res = self.requestClient.putRequest(request2) self.assertEqual(res['OK'], True, res['Message'] if 'Message' in res else 'OK') reqID2 = res['Value'] # # get summary again ret = self.requestClient.getDBSummary() self.assertTrue(ret['OK']) self.assertEqual( ret['Value'], { 'Operation': { 'ReplicateAndRegister': { 'Waiting': 2 } }, 'Request': { 'Waiting': 1, 'Assigned': 1 }, 'File': { 'Waiting': 4 } }) delete = self.requestClient.deleteRequest(reqID) self.assertEqual(delete['OK'], True, delete['Message'] if 'Message' in delete else 'OK') delete = self.requestClient.deleteRequest(reqID2) self.assertEqual(delete['OK'], True, delete['Message'] if 'Message' in delete else 'OK') # # should be empty now ret = self.requestClient.getDBSummary() self.assertTrue(ret['OK']) self.assertEqual(ret['Value'], { 'Operation': {}, 'Request': {}, 'File': {} })
shutil.copy(appTar,"%s%s" % (final_path, os.path.basename(appTar))) except EnvironmentError, x: gLogger.error("Could not copy because %s" % x) return S_ERROR("Could not copy because %s" % x) elif path.find("http://") > -1: gLogger.error("Path %s was not foreseen!" % path) gLogger.error("Location not known, upload to location yourself, and publish in CS manually") return S_ERROR() else: lfnpath = "%s%s" % (path, os.path.basename(appTar)) res = datMan.putAndRegister(lfnpath, appTar, ops.getValue('Software/BaseStorageElement', "CERN-SRM")) if not res['OK']: return res request = Request() requestClient = ReqClient() request.RequestName = 'copy_%s' % os.path.basename(appTar).replace(".tgz", "").replace(".tar.gz", "") request.SourceComponent = 'ReplicateILCSoft' copies_at = ops.getValue('Software/CopiesAt', []) for copies in copies_at: transfer = Operation() transfer.Type = "ReplicateAndRegister" transfer.TargetSE = copies trFile = File() trFile.LFN = lfnpath trFile.GUID = "" transfer.addFile(trFile) request.addOperation(transfer) res = RequestValidator().validate(request) if not res['OK']: return res
def main(): # Registering arguments will automatically add their description to the help menu Script.registerArgument("requestName: a request name") Script.registerArgument("LFN: logical file name") Script.registerArgument("localFile: local file you want to put") Script.registerArgument("targetSE: target SE") Script.parseCommandLine() import DIRAC from DIRAC import gLogger # parseCommandLine show help when mandatory arguments are not specified or incorrect argument requestName, LFN, PFN, targetSE = Script.getPositionalArgs(group=True) if not os.path.isabs(LFN): gLogger.error("LFN should be absolute path!!!") DIRAC.exit(-1) gLogger.info("will create request '%s' with 'PutAndRegister' " "operation using %s pfn and %s target SE" % (requestName, PFN, targetSE)) from DIRAC.RequestManagementSystem.Client.Request import Request from DIRAC.RequestManagementSystem.Client.Operation import Operation from DIRAC.RequestManagementSystem.Client.File import File from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient from DIRAC.Core.Utilities.Adler import fileAdler if not os.path.exists(PFN): gLogger.error("%s does not exist" % PFN) DIRAC.exit(-1) if not os.path.isfile(PFN): gLogger.error("%s is not a file" % PFN) DIRAC.exit(-1) PFN = os.path.abspath(PFN) size = os.path.getsize(PFN) adler32 = fileAdler(PFN) request = Request() request.RequestName = requestName putAndRegister = Operation() putAndRegister.Type = "PutAndRegister" putAndRegister.TargetSE = targetSE opFile = File() opFile.LFN = LFN opFile.PFN = PFN opFile.Size = size opFile.Checksum = adler32 opFile.ChecksumType = "ADLER32" putAndRegister.addFile(opFile) request.addOperation(putAndRegister) reqClient = ReqClient() putRequest = reqClient.putRequest(request) if not putRequest["OK"]: gLogger.error("unable to put request '%s': %s" % (requestName, putRequest["Message"])) DIRAC.exit(-1) gLogger.always("Request '%s' has been put to ReqDB for execution." % requestName) gLogger.always( "You can monitor its status using command: 'dirac-rms-request %s'" % requestName) DIRAC.exit(0)
def test01fullChain(self): put = self.requestClient.putRequest(self.request) self.assertTrue(put['OK'], put) self.assertEqual(type(put['Value']), long) reqID = put['Value'] # # summary ret = RequestDB().getDBSummary() self.assertEqual(ret, {'OK': True, 'Value': {'Operation': {'ReplicateAndRegister': {'Waiting': 1}}, 'Request': {'Waiting': 1}, 'File': {'Waiting': 2}}}) get = self.requestClient.getRequest(reqID) self.assertTrue(get['OK']) self.assertEqual(isinstance(get['Value'], Request), True) # # summary - the request became "Assigned" res = RequestDB().getDBSummary() self.assertEqual(res, {'OK': True, 'Value': {'Operation': {'ReplicateAndRegister': {'Waiting': 1}}, 'Request': {'Assigned': 1}, 'File': {'Waiting': 2}}}) res = self.requestClient.getRequestInfo(reqID) self.assertEqual(res['OK'], True, res['Message'] if 'Message' in res else 'OK') res = self.requestClient.getRequestFileStatus(reqID, self.file.LFN) self.assertEqual(res['OK'], True, res['Message'] if 'Message' in res else 'OK') res = self.requestClient.getRequestFileStatus(reqID, [self.file.LFN]) self.assertEqual(res['OK'], True, res['Message'] if 'Message' in res else 'OK') res = self.requestClient.getDigest(reqID) self.assertEqual(res['OK'], True, res['Message'] if 'Message' in res else 'OK') res = self.requestClient.readRequestsForJobs([123]) self.assertEqual(res['OK'], True, res['Message'] if 'Message' in res else 'OK') self.assertTrue(isinstance(res['Value']['Successful'][123], Request)) proxyInfo = getProxyInfo()['Value'] # Adding new request request2 = Request() request2.RequestName = "RequestManagerHandlerTests-2" self.request.OwnerDN = proxyInfo['identity'] self.request.OwnerGroup = proxyInfo['group'] request2.JobID = 456 request2.addOperation(self.operation) # # update res = self.requestClient.putRequest(request2) self.assertEqual(res['OK'], True, res['Message'] if 'Message' in res else 'OK') reqID2 = res['Value'] # # get summary again ret = RequestDB().getDBSummary() self.assertEqual(ret, {'OK': True, 'Value': {'Operation': {'ReplicateAndRegister': {'Waiting': 2}}, 'Request': {'Waiting': 1, 'Assigned': 1}, 'File': {'Waiting': 4}}}) delete = self.requestClient.deleteRequest(reqID) self.assertEqual(delete['OK'], True, delete['Message'] if 'Message' in delete else 'OK') delete = self.requestClient.deleteRequest(reqID2) self.assertEqual(delete['OK'], True, delete['Message'] if 'Message' in delete else 'OK') # # should be empty now ret = RequestDB().getDBSummary() self.assertEqual(ret, {'OK': True, 'Value': {'Operation': {}, 'Request': {}, 'File': {}}})
gLogger.error("LFN list is empty!!!") error = -1 continue if len(lfnChunk) > Operation.MAX_FILES: gLogger.error( "too many LFNs, max number of files per operation is %s" % Operation.MAX_FILES) error = -1 continue count += 1 request = Request() request.RequestName = "%s_%s" % (md5(repr( time.time())).hexdigest()[:16], md5(repr( time.time())).hexdigest()[:16]) moveReplica = Operation() moveReplica.Type = 'MoveReplica' moveReplica.SourceSE = sourceSE moveReplica.TargetSE = ",".join(targetSEs) for lfn in lfnChunk: metaDict = metaDatas["Successful"][lfn] opFile = File() opFile.LFN = lfn opFile.Size = metaDict["Size"] if "Checksum" in metaDict: # # should check checksum type, now assuming Adler32 (metaDict["ChecksumType"] = 'AD'
def test01fullChain(self): ret = self.requestClient.getDBSummary() self.assertTrue(ret["OK"]) initialSummary = ret["Value"] put = self.requestClient.putRequest(self.request) self.assertTrue(put["OK"], put) self.assertTrue(isinstance(put["Value"], six.integer_types)) reqID = put["Value"] # summary ret = self.requestClient.getDBSummary() self.assertTrue(ret["OK"]) self._checkSummary( initialSummary, [ ("Operation", "ReplicateAndRegister", "Waiting", 1), (None, "Request", "Waiting", 1), (None, "File", "Waiting", 2), ], ) get = self.requestClient.getRequest(reqID) self.assertTrue(get["OK"]) self.assertEqual(isinstance(get["Value"], Request), True) # # summary - the request became "Assigned" self._checkSummary( initialSummary, [ ("Operation", "ReplicateAndRegister", "Waiting", 1), (None, "Request", "Assigned", 1), (None, "File", "Waiting", 2), ], ) res = self.requestClient.getRequestInfo(reqID) self.assertEqual(res["OK"], True, res["Message"] if "Message" in res else "OK") res = self.requestClient.getRequestFileStatus(reqID, self.file.LFN) self.assertEqual(res["OK"], True, res["Message"] if "Message" in res else "OK") res = self.requestClient.getRequestFileStatus(reqID, [self.file.LFN]) self.assertEqual(res["OK"], True, res["Message"] if "Message" in res else "OK") res = self.requestClient.getDigest(reqID) self.assertEqual(res["OK"], True, res["Message"] if "Message" in res else "OK") res = self.requestClient.readRequestsForJobs([123]) self.assertEqual(res["OK"], True, res["Message"] if "Message" in res else "OK") self.assertTrue(isinstance(res["Value"]["Successful"][123], Request)) proxyInfo = getProxyInfo()["Value"] # Adding new request request2 = Request() request2.RequestName = "RequestManagerHandlerTests-2" self.request.OwnerDN = proxyInfo["identity"] self.request.OwnerGroup = proxyInfo["group"] request2.JobID = 456 request2.addOperation(self.operation) # # update res = self.requestClient.putRequest(request2) self.assertEqual(res["OK"], True, res["Message"] if "Message" in res else "OK") reqID2 = res["Value"] # # get summary again ret = self.requestClient.getDBSummary() self.assertTrue(ret["OK"]) self._checkSummary( initialSummary, [ ("Operation", "ReplicateAndRegister", "Waiting", 2), (None, "Request", "Waiting", 1), (None, "Request", "Assigned", 1), (None, "File", "Waiting", 4), ], ) delete = self.requestClient.deleteRequest(reqID) self.assertEqual(delete["OK"], True, delete["Message"] if "Message" in delete else "OK") delete = self.requestClient.deleteRequest(reqID2) self.assertEqual(delete["OK"], True, delete["Message"] if "Message" in delete else "OK") # # should be empty now ret = self.requestClient.getDBSummary() self.assertTrue(ret["OK"]) self.assertEqual(ret["Value"], initialSummary)
def test05FTS(self): """ FTS state machine """ req = Request() req.RequestName = "FTSTest" ftsTransfer = Operation() ftsTransfer.Type = "ReplicateAndRegister" ftsTransfer.TargetSE = "CERN-USER" ftsFile = File() ftsFile.LFN = "/a/b/c" ftsFile.Checksum = "123456" ftsFile.ChecksumType = "Adler32" ftsTransfer.addFile(ftsFile) req.addOperation(ftsTransfer) self.assertEqual(req.Status, "Waiting", "1. wrong request status: %s" % req.Status) self.assertEqual(ftsTransfer.Status, "Waiting", "1. wrong ftsStatus status: %s" % ftsTransfer.Status) # # scheduled ftsFile.Status = "Scheduled" self.assertEqual( ftsTransfer.Status, "Scheduled", "2. wrong status for ftsTransfer: %s" % ftsTransfer.Status) self.assertEqual(req.Status, "Scheduled", "2. wrong status for request: %s" % req.Status) # # add new operation before FTS insertBefore = Operation() insertBefore.Type = "RegisterReplica" insertBefore.TargetSE = "CERN-USER" insertFile = File() insertFile.LFN = "/a/b/c" insertFile.PFN = "http://foo/bar" insertBefore.addFile(insertFile) req.insertBefore(insertBefore, ftsTransfer) self.assertEqual( insertBefore.Status, "Waiting", "3. wrong status for insertBefore: %s" % insertBefore.Status) self.assertEqual( ftsTransfer.Status, "Scheduled", "3. wrong status for ftsStatus: %s" % ftsTransfer.Status) self.assertEqual(req.Status, "Waiting", "3. wrong status for request: %s" % req.Status) # # prev done insertFile.Status = "Done" self.assertEqual( insertBefore.Status, "Done", "4. wrong status for insertBefore: %s" % insertBefore.Status) self.assertEqual( ftsTransfer.Status, "Scheduled", "4. wrong status for ftsStatus: %s" % ftsTransfer.Status) self.assertEqual(req.Status, "Scheduled", "4. wrong status for request: %s" % req.Status) # # reschedule ftsFile.Status = "Waiting" self.assertEqual( insertBefore.Status, "Done", "5. wrong status for insertBefore: %s" % insertBefore.Status) self.assertEqual( ftsTransfer.Status, "Waiting", "5. wrong status for ftsStatus: %s" % ftsTransfer.Status) self.assertEqual(req.Status, "Waiting", "5. wrong status for request: %s" % req.Status) # # fts done ftsFile.Status = "Done" self.assertEqual( insertBefore.Status, "Done", "5. wrong status for insertBefore: %s" % insertBefore.Status) self.assertEqual( ftsTransfer.Status, "Done", "5. wrong status for ftsStatus: %s" % ftsTransfer.Status) self.assertEqual(req.Status, "Done", "5. wrong status for request: %s" % req.Status)
gLogger.error( "skipping %s: %s" % ( failedLFN, reason ) ) lfnChunk = set( metaDatas["Successful"] ) if not lfnChunk: gLogger.error( "LFN list is empty!!!" ) error = -1 continue if len( lfnChunk ) > Operation.MAX_FILES: gLogger.error( "too many LFNs, max number of files per operation is %s" % Operation.MAX_FILES ) error = -1 continue count += 1 request = Request() request.RequestName = requestName if not multiRequests else '%s_%d' % ( requestName, count ) replicateAndRegister = Operation() replicateAndRegister.Type = "ReplicateAndRegister" replicateAndRegister.TargetSE = ",".join( targetSEs ) if catalog is not None: replicateAndRegister.Catalog = catalog for lfn in lfnChunk: metaDict = metaDatas["Successful"][lfn] opFile = File() opFile.LFN = lfn opFile.Size = metaDict["Size"] if "Checksum" in metaDict: # # should check checksum type, now assuming Adler32 (metaDict["ChecksumType"] = 'AD'
gLogger.error( "skipping %s: %s" % ( failedLFN, reason ) ) lfnChunk = set( metaDatas["Successful"] ) if not lfnChunk: gLogger.error( "LFN list is empty!!!" ) error = -1 continue if len( lfnChunk ) > Operation.MAX_FILES: gLogger.error( "too many LFNs, max number of files per operation is %s" % Operation.MAX_FILES ) error = -1 continue count += 1 request = Request() request.RequestName = requestName if not multiRequests else '%s_%d' % ( requestName, count ) replicateAndRegister = Operation() replicateAndRegister.Type = "ReplicateAndRegister" replicateAndRegister.TargetSE = ",".join( targetSEs ) for lfn in lfnChunk: metaDict = metaDatas["Successful"][lfn] opFile = File() opFile.LFN = lfn opFile.Size = metaDict["Size"] if "Checksum" in metaDict: # # should check checksum type, now assuming Adler32 (metaDict["ChecksumType"] = 'AD' opFile.Checksum = metaDict["Checksum"] opFile.ChecksumType = "ADLER32"
Script.showHelp() from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient from DIRAC.RequestManagementSystem.Client.Request import Request from DIRAC.RequestManagementSystem.Client.Operation import Operation from DIRAC.RequestManagementSystem.Client.File import File from DIRAC.RequestManagementSystem.private.RequestValidator import RequestValidator from DIRAC.Resources.Catalog.FileCatalog import FileCatalog reqClient = ReqClient() fc = FileCatalog() for lfnList in breakListIntoChunks( lfns, 100 ): oRequest = Request() oRequest.RequestName = "%s_%s" % ( md5( repr( time.time() ) ).hexdigest()[:16], md5( repr( time.time() ) ).hexdigest()[:16] ) replicateAndRegister = Operation() replicateAndRegister.Type = 'ReplicateAndRegister' replicateAndRegister.TargetSE = targetSE res = fc.getFileMetadata( lfnList ) if not res['OK']: print "Can't get file metadata: %s" % res['Message'] DIRAC.exit( 1 ) if res['Value']['Failed']: print "Could not get the file metadata of the following, so skipping them:" for fFile in res['Value']['Failed']: print fFile lfnMetadata = res['Value']['Successful']
def execute(self): """The JobAgent execution method.""" self.log.verbose("Job Agent execution loop") queueDictItems = list(self.queueDict.items()) random.shuffle(queueDictItems) # Check that there is enough slots locally result = self._checkCEAvailability(self.computingElement) if not result["OK"] or result["Value"]: return result for queueName, queueDictionary in queueDictItems: # Make sure there is no problem with the queue before trying to submit if not self._allowedToSubmit(queueName): continue # Get a working proxy ce = queueDictionary["CE"] cpuTime = 86400 * 3 self.log.verbose("Getting pilot proxy", "for %s/%s %d long" % (self.pilotDN, self.pilotGroup, cpuTime)) result = gProxyManager.getPilotProxyFromDIRACGroup(self.pilotDN, self.pilotGroup, cpuTime) if not result["OK"]: return result proxy = result["Value"] result = proxy.getRemainingSecs() # pylint: disable=no-member if not result["OK"]: return result lifetime_secs = result["Value"] ce.setProxy(proxy, lifetime_secs) # Check that there is enough slots in the remote CE to match a job result = self._checkCEAvailability(ce) if not result["OK"] or result["Value"]: self.failedQueues[queueName] += 1 continue # Get environment details and enhance them result = self._getCEDict(ce) if not result["OK"]: self.failedQueues[queueName] += 1 continue ceDictList = result["Value"] for ceDict in ceDictList: # Information about number of processors might not be returned in CE.getCEStatus() ceDict["NumberOfProcessors"] = ce.ceParameters.get("NumberOfProcessors") self._setCEDict(ceDict) # Update the configuration with the names of the Site, CE and queue to target # This is used in the next stages self._updateConfiguration("Site", queueDictionary["Site"]) self._updateConfiguration("GridCE", queueDictionary["CEName"]) self._updateConfiguration("CEQueue", queueDictionary["QueueName"]) self._updateConfiguration("RemoteExecution", True) # Try to match a job jobRequest = self._matchAJob(ceDictList) while jobRequest["OK"]: # Check matcher information returned matcherParams = ["JDL", "DN", "Group"] matcherInfo = jobRequest["Value"] jobID = matcherInfo["JobID"] jobReport = JobReport(jobID, "PushJobAgent@%s" % self.siteName) result = self._checkMatcherInfo(matcherInfo, matcherParams, jobReport) if not result["OK"]: self.failedQueues[queueName] += 1 break jobJDL = matcherInfo["JDL"] jobGroup = matcherInfo["Group"] ownerDN = matcherInfo["DN"] ceDict = matcherInfo["CEDict"] matchTime = matcherInfo["matchTime"] optimizerParams = {} for key in matcherInfo: if key not in matcherParams: optimizerParams[key] = matcherInfo[key] # Get JDL paramters parameters = self._getJDLParameters(jobJDL) if not parameters["OK"]: jobReport.setJobStatus(status=JobStatus.FAILED, minorStatus="Could Not Extract JDL Parameters") self.log.warn("Could Not Extract JDL Parameters", parameters["Message"]) self.failedQueues[queueName] += 1 break params = parameters["Value"] result = self._extractValuesFromJobParams(params, jobReport) if not result["OK"]: self.failedQueues[queueName] += 1 break submissionParams = result["Value"] jobID = submissionParams["jobID"] jobType = submissionParams["jobType"] self.log.verbose("Job request successful: \n", jobRequest["Value"]) self.log.info( "Received", "JobID=%s, JobType=%s, OwnerDN=%s, JobGroup=%s" % (jobID, jobType, ownerDN, jobGroup) ) try: jobReport.setJobParameter(par_name="MatcherServiceTime", par_value=str(matchTime), sendFlag=False) jobReport.setJobStatus( status=JobStatus.MATCHED, minorStatus="Job Received by Agent", sendFlag=False ) # Setup proxy result_setupProxy = self._setupProxy(ownerDN, jobGroup) if not result_setupProxy["OK"]: result = self._rescheduleFailedJob(jobID, result_setupProxy["Message"]) self.failedQueues[queueName] += 1 break proxyChain = result_setupProxy.get("Value") # Check software and install them if required software = self._checkInstallSoftware(jobID, params, ceDict, jobReport) if not software["OK"]: self.log.error("Failed to install software for job", "%s" % (jobID)) errorMsg = software["Message"] if not errorMsg: errorMsg = "Failed software installation" result = self._rescheduleFailedJob(jobID, errorMsg) self.failedQueues[queueName] += 1 break # Submit the job to the CE self.log.debug("Before self._submitJob() (%sCE)" % (self.ceName)) result_submitJob = self._submitJob( jobID=jobID, jobParams=params, resourceParams=ceDict, optimizerParams=optimizerParams, proxyChain=proxyChain, jobReport=jobReport, processors=submissionParams["processors"], wholeNode=submissionParams["wholeNode"], maxNumberOfProcessors=submissionParams["maxNumberOfProcessors"], mpTag=submissionParams["mpTag"], ) # Committing the JobReport before evaluating the result of job submission res = jobReport.commit() if not res["OK"]: resFD = jobReport.generateForwardDISET() if not resFD["OK"]: self.log.error("Error generating ForwardDISET operation", resFD["Message"]) elif resFD["Value"]: # Here we create the Request. op = resFD["Value"] request = Request() requestName = "jobAgent_%s" % jobID request.RequestName = requestName.replace('"', "") request.JobID = jobID request.SourceComponent = "JobAgent_%s" % jobID request.addOperation(op) # This might fail, but only a message would be printed. self._sendFailoverRequest(request) if not result_submitJob["OK"]: self.log.error("Error during submission", result_submitJob["Message"]) self.failedQueues[queueName] += 1 break elif "PayloadFailed" in result_submitJob: # Do not keep running and do not overwrite the Payload error message = "Payload execution failed with error code %s" % result_submitJob["PayloadFailed"] self.log.info(message) self.log.debug("After %sCE submitJob()" % (self.ceName)) # Check that there is enough slots locally result = self._checkCEAvailability(self.computingElement) if not result["OK"] or result["Value"]: return result # Check that there is enough slots in the remote CE to match a new job result = self._checkCEAvailability(ce) if not result["OK"] or result["Value"]: self.failedQueues[queueName] += 1 break # Try to match a new job jobRequest = self._matchAJob(ceDictList) except Exception as subExcept: # pylint: disable=broad-except self.log.exception("Exception in submission", "", lException=subExcept, lExcInfo=True) result = self._rescheduleFailedJob(jobID, "Job processing failed with exception") self.failedQueues[queueName] += 1 break if not jobRequest["OK"]: self._checkMatchingIssues(jobRequest) self.failedQueues[queueName] += 1 continue return S_OK("Push Job Agent cycle complete")