class TransferAgent(AgentModule, RequestAgentMixIn): def initialize(self): self.RequestDBClient = RequestClient() self.ReplicaManager = ReplicaManager() self.DataLog = DataLoggingClient() gMonitor.registerActivity("Iteration", "Agent Loops", "TransferAgent", "Loops/min", gMonitor.OP_SUM) gMonitor.registerActivity("Execute", "Request Processed", "TransferAgent", "Requests/min", gMonitor.OP_SUM) gMonitor.registerActivity("Done", "Request Completed", "TransferAgent", "Requests/min", gMonitor.OP_SUM) gMonitor.registerActivity("Replicate and register", "Replicate and register operations", "TransferAgent", "Attempts/min", gMonitor.OP_SUM) gMonitor.registerActivity("Replicate", "Replicate operations", "TransferAgent", "Attempts/min", gMonitor.OP_SUM) gMonitor.registerActivity("Put and register", "Put and register operations", "TransferAgent", "Attempts/min", gMonitor.OP_SUM) gMonitor.registerActivity("Put", "Put operations", "TransferAgent", "Attempts/min", gMonitor.OP_SUM) gMonitor.registerActivity("Replication successful", "Successful replications", "TransferAgent", "Successful/min", gMonitor.OP_SUM) gMonitor.registerActivity("Put successful", "Successful puts", "TransferAgent", "Successful/min", gMonitor.OP_SUM) gMonitor.registerActivity("Replication failed", "Failed replications", "TransferAgent", "Failed/min", gMonitor.OP_SUM) gMonitor.registerActivity("Put failed", "Failed puts", "TransferAgent", "Failed/min", gMonitor.OP_SUM) gMonitor.registerActivity("Replica registration successful", "Successful replica registrations", "TransferAgent", "Successful/min", gMonitor.OP_SUM) gMonitor.registerActivity("File registration successful", "Successful file registrations", "TransferAgent", "Successful/min", gMonitor.OP_SUM) gMonitor.registerActivity("Replica registration failed", "Failed replica registrations", "TransferAgent", "Failed/min", gMonitor.OP_SUM) gMonitor.registerActivity("File registration failed", "Failed file registrations", "TransferAgent", "Failed/min", gMonitor.OP_SUM) self.maxNumberOfThreads = self.am_getOption('NumberOfThreads', 1) self.threadPoolDepth = self.am_getOption('ThreadPoolDepth', 1) self.threadPool = ThreadPool(1, self.maxNumberOfThreads) # This sets the Default Proxy to used as that defined under # /Operations/Shifter/DataManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption('shifterProxy', 'DataManager') return S_OK() def execute(self): for i in range(self.threadPoolDepth): requestExecutor = ThreadedJob(self.executeRequest) self.threadPool.queueJob(requestExecutor) self.threadPool.processResults() return self.executeRequest() def executeRequest(self): ################################################ # Get a request from request DB gMonitor.addMark("Iteration", 1) res = self.RequestDBClient.getRequest('transfer') if not res['OK']: gLogger.info( "TransferAgent.execute: Failed to get request from database.") return S_OK() elif not res['Value']: gLogger.info( "TransferAgent.execute: No requests to be executed found.") return S_OK() requestString = res['Value']['RequestString'] requestName = res['Value']['RequestName'] sourceServer = res['Value']['Server'] try: jobID = int(res['Value']['JobID']) except: jobID = 0 gLogger.info("TransferAgent.execute: Obtained request %s" % requestName) result = self.RequestDBClient.getCurrentExecutionOrder( requestName, sourceServer) if result['OK']: currentOrder = result['Value'] else: return S_OK('Can not get the request execution order') oRequest = RequestContainer(request=requestString) ################################################ # Find the number of sub-requests from the request res = oRequest.getNumSubRequests('transfer') if not res['OK']: errStr = "TransferAgent.execute: Failed to obtain number of transfer subrequests." gLogger.error(errStr, res['Message']) return S_OK() gLogger.info("TransferAgent.execute: Found %s sub requests." % res['Value']) ################################################ # For all the sub-requests in the request modified = False for ind in range(res['Value']): gMonitor.addMark("Execute", 1) gLogger.info("TransferAgent.execute: Processing sub-request %s." % ind) subRequestAttributes = oRequest.getSubRequestAttributes( ind, 'transfer')['Value'] if subRequestAttributes['ExecutionOrder']: subExecutionOrder = int(subRequestAttributes['ExecutionOrder']) else: subExecutionOrder = 0 subStatus = subRequestAttributes['Status'] if subStatus == 'Waiting' and subExecutionOrder <= currentOrder: subRequestFiles = oRequest.getSubRequestFiles( ind, 'transfer')['Value'] operation = subRequestAttributes['Operation'] subRequestError = '' ################################################ # If the sub-request is a put and register operation if operation == 'putAndRegister' or operation == 'putAndRegisterAndRemove': gLogger.info( "TransferAgent.execute: Attempting to execute %s sub-request." % operation) diracSE = str(subRequestAttributes['TargetSE']) catalog = '' if subRequestAttributes.has_key('Catalogue'): catalog = subRequestAttributes['Catalogue'] for subRequestFile in subRequestFiles: if subRequestFile['Status'] == 'Waiting': gMonitor.addMark("Put and register", 1) lfn = str(subRequestFile['LFN']) file = subRequestFile['PFN'] guid = subRequestFile['GUID'] addler = subRequestFile['Addler'] res = self.ReplicaManager.putAndRegister( lfn, file, diracSE, guid=guid, checksum=addler, catalog=catalog) if res['OK']: if res['Value']['Successful'].has_key(lfn): if not res['Value']['Successful'][ lfn].has_key('put'): gMonitor.addMark("Put failed", 1) self.DataLog.addFileRecord( lfn, 'PutFail', diracSE, '', 'TransferAgent') gLogger.info( "TransferAgent.execute: Failed to put %s to %s." % (lfn, diracSE)) subRequestError = "Put operation failed for %s to %s" % ( lfn, diracSE) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Error', 'Put failed') elif not res['Value']['Successful'][ lfn].has_key('register'): gMonitor.addMark("Put successful", 1) gMonitor.addMark( "File registration failed", 1) self.DataLog.addFileRecord( lfn, 'Put', diracSE, '', 'TransferAgent') self.DataLog.addFileRecord( lfn, 'RegisterFail', diracSE, '', 'TransferAgent') gLogger.info( "TransferAgent.execute: Successfully put %s to %s in %s seconds." % (lfn, diracSE, res['Value'] ['Successful'][lfn]['put'])) gLogger.info( "TransferAgent.execute: Failed to register %s to %s." % (lfn, diracSE)) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Status', 'Done') oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Error', 'Registration failed') subRequestError = "Registration failed for %s to %s" % ( lfn, diracSE) fileDict = res['Value']['Failed'][lfn][ 'register'] registerRequestDict = { 'Attributes': { 'TargetSE': fileDict['TargetSE'], 'Operation': 'registerFile' }, 'Files': [{ 'LFN': fileDict['LFN'], 'PFN': fileDict['PFN'], 'Size': fileDict['Size'], 'Addler': fileDict['Addler'], 'GUID': fileDict['GUID'] }] } gLogger.info( "TransferAgent.execute: Setting registration request for failed file." ) oRequest.addSubRequest( registerRequestDict, 'register') modified = True else: gMonitor.addMark("Put successful", 1) gMonitor.addMark( "File registration successful", 1) self.DataLog.addFileRecord( lfn, 'Put', diracSE, '', 'TransferAgent') self.DataLog.addFileRecord( lfn, 'Register', diracSE, '', 'TransferAgent') gLogger.info( "TransferAgent.execute: Successfully put %s to %s in %s seconds." % (lfn, diracSE, res['Value'] ['Successful'][lfn]['put'])) gLogger.info( "TransferAgent.execute: Successfully registered %s to %s in %s seconds." % (lfn, diracSE, res['Value'] ['Successful'][lfn]['register'])) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Status', 'Done') modified = True else: gMonitor.addMark("Put failed", 1) self.DataLog.addFileRecord( lfn, 'PutFail', diracSE, '', 'TransferAgent') errStr = "TransferAgent.execute: Failed to put and register file." gLogger.error( errStr, "%s %s %s" % (lfn, diracSE, res['Value']['Failed'][lfn])) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Error', 'Complete file failure') subRequestError = "Failed to put and register file" else: gMonitor.addMark("Put failed", 1) self.DataLog.addFileRecord( lfn, 'PutFail', diracSE, '', 'TransferAgent') errStr = "TransferAgent.execute: Completely failed to put and register file." gLogger.error(errStr, res['Message']) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Error', 'RM call failure') subRequestError = operation + " RM call file" else: gLogger.info( "TransferAgent.execute: File already completed." ) ################################################ # If the sub-request is a put operation elif operation == 'put': gLogger.info( "TransferAgent.execute: Attempting to execute %s sub-request." % operation) diracSE = subRequestAttributes['TargetSE'] for subRequestFile in subRequestFiles: if subRequestFile['Status'] == 'Waiting': gMonitor.addMark("Put", 1) lfn = subRequestFile['LFN'] file = subRequestFile['PFN'] res = self.ReplicaManager.put(lfn, file, diracSE) if res['OK']: if res['Value']['Successful'].has_key(lfn): gMonitor.addMark("Put successful", 1) self.DataLog.addFileRecord( lfn, 'Put', diracSE, '', 'TransferAgent') gLogger.info( "TransferAgent.execute: Successfully put %s to %s in %s seconds." % (lfn, diracSE, res['Value']['Successful'][lfn])) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Status', 'Done') modified = True else: gMonitor.addMark("Put failed", 1) self.DataLog.addFileRecord( lfn, 'PutFail', diracSE, '', 'TransferAgent') errStr = "TransferAgent.execute: Failed to put file." gLogger.error( errStr, "%s %s %s" % (lfn, diracSE, res['Value']['Failed'][lfn])) subRequestError = "Put operation failed for %s to %s" % ( lfn, diracSE) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Error', 'Put failed') else: gMonitor.addMark("Put failed", 1) self.DataLog.addFileRecord( lfn, 'PutFail', diracSE, '', 'TransferAgent') errStr = "TransferAgent.execute: Completely failed to put file." gLogger.error(errStr, res['Message']) subRequestError = "Put RM call failed for %s to %s" % ( lfn, diracSE) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Error', 'Put RM call failed') else: gLogger.info( "TransferAgent.execute: File already completed." ) ################################################ # If the sub-request is a replicate and register operation elif operation == 'replicateAndRegister' or operation == 'replicateAndRegisterAndRemove': gLogger.info( "TransferAgent.execute: Attempting to execute %s sub-request." % operation) targetSE = subRequestAttributes['TargetSE'] sourceSE = subRequestAttributes['SourceSE'] if sourceSE == "None": sourceSE = '' for subRequestFile in subRequestFiles: if subRequestFile['Status'] == 'Waiting': gMonitor.addMark("Replicate and register", 1) lfn = subRequestFile['LFN'] res = self.ReplicaManager.replicateAndRegister( lfn, targetSE, sourceSE=sourceSE) if res['OK']: if res['Value']['Successful'].has_key(lfn): if not res['Value']['Successful'][ lfn].has_key('replicate'): gLogger.info( "TransferAgent.execute: Failed to replicate %s to %s." % (lfn, targetSE)) gMonitor.addMark( "Replication failed", 1) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, "Error", "Replication failed") subRequestError = "Replication failed for %s to %s" % ( lfn, targetSE) elif not res['Value']['Successful'][ lfn].has_key('register'): gMonitor.addMark( "Replication successful", 1) gMonitor.addMark( "Replica registration failed", 1) gLogger.info( "TransferAgent.execute: Successfully replicated %s to %s in %s seconds." % (lfn, targetSE, res['Value'] ['Successful'][lfn]['replicate'])) gLogger.info( "TransferAgent.execute: Failed to register %s to %s." % (lfn, targetSE)) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Status', 'Done') oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Error', 'Registration failed') subRequestError = "Registration failed for %s to %s" % ( lfn, targetSE) fileDict = res['Value']['Failed'][lfn][ 'register'] registerRequestDict = { 'Attributes': { 'TargetSE': fileDict['TargetSE'], 'Operation': 'registerReplica' }, 'Files': [{ 'LFN': fileDict['LFN'], 'PFN': fileDict['PFN'] }] } gLogger.info( "TransferAgent.execute: Setting registration request for failed replica." ) oRequest.addSubRequest( registerRequestDict, 'register') modified = True else: gMonitor.addMark( "Replication successful", 1) gMonitor.addMark( "Replica registration successful", 1) gLogger.info( "TransferAgent.execute: Successfully replicated %s to %s in %s seconds." % (lfn, targetSE, res['Value'] ['Successful'][lfn]['replicate'])) gLogger.info( "TransferAgent.execute: Successfully registered %s to %s in %s seconds." % (lfn, targetSE, res['Value'] ['Successful'][lfn]['register'])) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Status', 'Done') modified = True else: gMonitor.addMark("Replication failed", 1) errStr = "TransferAgent.execute: Failed to replicate and register file." gLogger.error( errStr, "%s %s %s" % (lfn, targetSE, res['Value']['Failed'][lfn])) else: gMonitor.addMark("Replication failed", 1) errStr = "TransferAgent.execute: Completely failed to replicate and register file." gLogger.error(errStr, res['Message']) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Error', 'RM call failure') subRequestError = operation + " RM call failed" else: gLogger.info( "TransferAgent.execute: File already completed." ) ################################################ # If the sub-request is a replicate operation elif operation == 'replicate': gLogger.info( "TransferAgent.execute: Attempting to execute %s sub-request." % operation) targetSE = subRequestAttributes['TargetSE'] sourceSE = subRequestAttributes['SourceSE'] for subRequestFile in subRequestFiles: if subRequestFile['Status'] == 'Waiting': gMonitor.addMark("Replicate", 1) lfn = subRequestFile['LFN'] res = self.ReplicaManager.replicate( lfn, targetSE, sourceSE=sourceSE) if res['OK']: if res['Value']['Successful'].has_key(lfn): gMonitor.addMark("Replication successful", 1) gLogger.info( "TransferAgent.execute: Successfully replicated %s to %s in %s seconds." % (lfn, diracSE, res['Value']['Successful'][lfn])) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Status', 'Done') modified = True else: gMonitor.addMark("Replication failed", 1) errStr = "TransferAgent.execute: Failed to replicate file." gLogger.error( errStr, "%s %s %s" % (lfn, targetSE, res['Value']['Failed'][lfn])) subRequestError = "Replicate operation failed for %s to %s" % ( lfn, targetSE) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Error', 'Put failed') else: gMonitor.addMark("Replication failed", 1) errStr = "TransferAgent.execute: Completely failed to replicate file." gLogger.error(errStr, res['Message']) subRequestError = "Replicate RM call failed for %s to %s" % ( lfn, targetSE) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Error', 'Replicate RM call failed') else: gLogger.info( "TransferAgent.execute: File already completed." ) ################################################ # If the sub-request is a get operation elif operation == 'get': gLogger.info( "TransferAgent.execute: Attempting to execute %s sub-request." % operation) sourceSE = subRequestAttributes['TargetSE'] for subRequestFile in subRequestFiles: if subRequestFile['Status'] == 'Waiting': lfn = str(subRequestFile['LFN']) pfn = str(subRequestFile['PFN']) got = False if sourceSE and pfn: res = self.ReplicaManager.getStorageFile( pfn, sourceSE) if res['Value']['Successful'].has_key(pfn): got = True else: res = self.ReplicaManager.getFile(lfn) if res['Value']['Successful'].has_key(lfn): got = False if got: gLogger.info( "TransferAgent.execute: Successfully got %s." % lfn) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Status', 'Done') modified = True else: errStr = "TransferAgent.execute: Failed to get file." gLogger.error(errStr, lfn) else: gLogger.info( "TransferAgent.execute: File already completed." ) ################################################ # If the sub-request is none of the above types else: gLogger.error( "TransferAgent.execute: Operation not supported.", operation) if subRequestError: oRequest.setSubRequestAttributeValue( ind, 'transfer', 'Error', subRequestError) ################################################ # Determine whether there are any active files if oRequest.isSubRequestEmpty(ind, 'transfer')['Value']: oRequest.setSubRequestStatus(ind, 'transfer', 'Done') gMonitor.addMark("Done", 1) ################################################ # If the sub-request is already in terminal state else: gLogger.info( "TransferAgent.execute: Sub-request %s is status '%s' and not to be executed." % (ind, subRequestAttributes['Status'])) ################################################ # Generate the new request string after operation requestString = oRequest.toXML()['Value'] res = self.RequestDBClient.updateRequest(requestName, requestString, sourceServer) if modified and jobID: result = self.finalizeRequest(requestName, jobID, sourceServer) return S_OK()
class RemovalAgent(AgentModule, RequestAgentMixIn): """ This Agent takes care of executing "removal" request from the RequestManagement system """ def __init__(self, *args): """ Initialize the base class and define some extra data members """ AgentModule.__init__(self, *args) self.requestDBClient = None self.replicaManager = None self.maxNumberOfThreads = 4 self.maxRequestsInQueue = 100 self.threadPool = None self.timeOutCounter = 0 self.pendingRequests = True def initialize(self): """ Called by the framework upon startup, before any cycle (execute method bellow) """ self.requestDBClient = RequestClient() # the RequestAgentMixIn needs the capitalized version, until is is fixed keep this. self.RequestDBClient = self.requestDBClient self.replicaManager = ReplicaManager() gMonitor.registerActivity("Iteration", "Agent Loops", "RemovalAgent", "Loops/min", gMonitor.OP_SUM) gMonitor.registerActivity("Execute", "Request Processed", "RemovalAgent", "Requests/min", gMonitor.OP_SUM) gMonitor.registerActivity("Done", "Request Completed", "RemovalAgent", "Requests/min", gMonitor.OP_SUM) gMonitor.registerActivity("PhysicalRemovalAtt", "Physical removals attempted", "RemovalAgent", "Removal/min", gMonitor.OP_SUM) gMonitor.registerActivity("PhysicalRemovalDone", "Successful physical removals", "RemovalAgent", "Removal/min", gMonitor.OP_SUM) gMonitor.registerActivity("PhysicalRemovalFail", "Failed physical removals", "RemovalAgent", "Removal/min", gMonitor.OP_SUM) gMonitor.registerActivity("PhysicalRemovalSize", "Physically removed size", "RemovalAgent", "Bytes", gMonitor.OP_ACUM) gMonitor.registerActivity("ReplicaRemovalAtt", "Replica removal attempted", "RemovalAgent", "Removal/min", gMonitor.OP_SUM) gMonitor.registerActivity("ReplicaRemovalDone", "Successful replica removals", "RemovalAgent", "Removal/min", gMonitor.OP_SUM) gMonitor.registerActivity("ReplicaRemovalFail", "Failed replica removals", "RemovalAgent", "Removal/min", gMonitor.OP_SUM) gMonitor.registerActivity("RemoveFileAtt", "File removal attempted", "RemovalAgent", "Removal/min", gMonitor.OP_SUM) gMonitor.registerActivity("RemoveFileDone", "File removal done", "RemovalAgent", "Removal/min", gMonitor.OP_SUM) gMonitor.registerActivity("RemoveFileFail", "File removal failed", "RemovalAgent", "Removal/min", gMonitor.OP_SUM) self.maxNumberOfThreads = self.am_getOption('NumberOfThreads', self.maxNumberOfThreads) self.maxRequestsInQueue = self.am_getOption('RequestsInQueue', self.maxRequestsInQueue) self.threadPool = ThreadPool(1, self.maxNumberOfThreads, self.maxRequestsInQueue) # Set the ThreadPool in daemon mode to process new ThreadedJobs as they are inserted self.threadPool.daemonize() self.maxRequests = self.am_getOption('MaxRequestsPerCycle', 1200.) # This sets the Default Proxy to used as that defined under # /Operations/Shifter/DataManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption('shifterProxy', 'DataManager') return S_OK() def execute(self): """ Fill the TreadPool with ThreadJobs """ self.pendingRequests = True self.maxRequests = min( 10000., self.am_getOption('MaxRequestsPerCycle', self.maxRequests)) requestCounter = 0 while self.pendingRequests: if requestCounter > self.maxRequests: break requestCounter += 1 requestExecutor = ThreadedJob(self.executeRequest) ret = self.threadPool.queueJob(requestExecutor) if not ret['OK']: break time.sleep(0.1) if self.timeOutCounter: gLogger.error('Timeouts during removal execution:', self.timeOutCounter) return S_OK() def executeRequest(self): """ Do the actual work in the Thread """ ################################################ # Get a request from request DB gMonitor.addMark("Iteration", 1) res = self.requestDBClient.getRequest('removal') if not res['OK']: gLogger.info( "RemovalAgent.execute: Failed to get request from database.") return S_OK() elif not res['Value']: gLogger.info( "RemovalAgent.execute: No requests to be executed found.") self.pendingRequests = False return S_OK() requestString = res['Value']['RequestString'] requestName = res['Value']['RequestName'] sourceServer = res['Value']['Server'] jobID = 0 try: jobID = int(res['Value']['JobID']) except: gLogger.warn( "RemovalAgent.execute: JobID not present or malformed in request '%s', will use 0 instead." % requestName) gLogger.info("RemovalAgent.execute: Obtained request %s" % requestName) try: result = self.requestDBClient.getCurrentExecutionOrder( requestName, sourceServer) if result['OK']: currentOrder = result['Value'] else: gLogger.error('Can not get the request execution order') self.requestDBClient.updateRequest(requestName, requestString, sourceServer) return S_OK('Can not get the request execution order') oRequest = RequestContainer(request=requestString) ################################################ # Find the number of sub-requests from the request res = oRequest.getNumSubRequests('removal') if not res['OK']: errStr = "RemovalAgent.execute: Failed to obtain number of removal subrequests." gLogger.error(errStr, res['Message']) return S_OK() gLogger.info("RemovalAgent.execute: Found %s sub requests." % res['Value']) ################################################ # For all the sub-requests in the request modified = False for ind in range(res['Value']): gMonitor.addMark("Execute", 1) gLogger.info( "RemovalAgent.execute: Processing sub-request %s." % ind) subRequestAttributes = oRequest.getSubRequestAttributes( ind, 'removal')['Value'] subExecutionOrder = int(subRequestAttributes['ExecutionOrder']) subStatus = subRequestAttributes['Status'] if subStatus == 'Waiting' and subExecutionOrder <= currentOrder: subRequestFiles = oRequest.getSubRequestFiles( ind, 'removal')['Value'] operation = subRequestAttributes['Operation'] ################################################ # If the sub-request is a physical removal operation if operation == 'physicalRemoval': gLogger.info( "RemovalAgent.execute: Attempting to execute %s sub-request." % operation) diracSEs = subRequestAttributes['TargetSE'].split(',') physicalFiles = [] pfnToLfn = {} for subRequestFile in subRequestFiles: if subRequestFile['Status'] == 'Waiting': pfn = str(subRequestFile['PFN']) lfn = str(subRequestFile['LFN']) pfnToLfn[pfn] = lfn physicalFiles.append(pfn) gMonitor.addMark('PhysicalRemovalAtt', len(physicalFiles)) failed = {} errMsg = {} for diracSE in diracSEs: res = self.replicaManager.removeStorageFile( physicalFiles, diracSE) if res['OK']: for pfn in res['Value']['Failed'].keys(): if not failed.has_key(pfn): failed[pfn] = {} failed[pfn][diracSE] = res['Value'][ 'Failed'][pfn] else: errMsg[diracSE] = res['Message'] for pfn in physicalFiles: if not failed.has_key(pfn): failed[pfn] = {} failed[pfn][diracSE] = 'Completely' # Now analyse the results failedPFNs = failed.keys() pfnsOK = [ pfn for pfn in physicalFiles if not pfn in failedPFNs ] gMonitor.addMark('PhysicalRemovalDone', len(pfnsOK)) for pfn in pfnsOK: gLogger.info( "RemovalAgent.execute: Successfully removed %s at %s" % (pfn, str(diracSEs))) res = oRequest.setSubRequestFileAttributeValue( ind, 'removal', pfnToLfn[pfn], 'Status', 'Done') if not res['OK']: gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ('Done', pfnToLfn[pfn])) modified = True if failed: gMonitor.addMark('PhysicalRemovalFail', len(failedPFNs)) for pfn in failedPFNs: for diracSE in failed[pfn].keys(): if type(failed[pfn] [diracSE]) in StringTypes: if re.search( 'no such file or directory', failed[pfn][diracSE].lower()): gLogger.info( "RemovalAgent.execute: File did not exist.", pfn) res = oRequest.setSubRequestFileAttributeValue( ind, 'removal', pfnToLfn[pfn], 'Status', 'Done') if not res['OK']: gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ('Done', pfnToLfn[pfn])) modified = True else: gLogger.info( "RemovalAgent.execute: Failed to remove file.", "%s at %s - %s" % (pfn, diracSE, failed[pfn][diracSE])) if errMsg: for diracSE in errMsg.keys(): errStr = "RemovalAgent.execute: Completely failed to remove replicas. At %s", diracSE gLogger.error(errStr, errMsg[diracSE]) ################################################ # If the sub-request is a physical removal operation elif operation == 'removeFile': gLogger.info( "RemovalAgent.execute: Attempting to execute %s sub-request." % operation) lfns = [] for subRequestFile in subRequestFiles: if subRequestFile['Status'] == 'Waiting': lfn = str(subRequestFile['LFN']) lfns.append(lfn) gMonitor.addMark('RemoveFileAtt', len(lfns)) res = self.replicaManager.removeFile(lfns) if res['OK']: gMonitor.addMark( 'RemoveFileDone', len(res['Value']['Successful'].keys())) for lfn in res['Value']['Successful'].keys(): gLogger.info( "RemovalAgent.execute: Successfully removed %s." % lfn) result = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done') if not result['OK']: gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ('Done', lfn)) modified = True gMonitor.addMark( 'RemoveFileFail', len(res['Value']['Failed'].keys())) for lfn in res['Value']['Failed'].keys(): if type(res['Value']['Failed'] [lfn]) in StringTypes: if re.search( 'no such file or directory', res['Value']['Failed'] [lfn].lower()): gLogger.info( "RemovalAgent.execute: File did not exist.", lfn) result = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done') if not result['OK']: gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ('Done', lfn)) modified = True else: gLogger.info( "RemovalAgent.execute: Failed to remove file:", "%s %s" % (lfn, res['Value']['Failed'][lfn])) else: gMonitor.addMark('RemoveFileFail', len(lfns)) errStr = "RemovalAgent.execute: Completely failed to remove files files." gLogger.error(errStr, res['Message']) ################################################ # If the sub-request is a physical removal operation elif operation == 'replicaRemoval': gLogger.info( "RemovalAgent.execute: Attempting to execute %s sub-request." % operation) diracSEs = subRequestAttributes['TargetSE'].split(',') lfns = [] for subRequestFile in subRequestFiles: if subRequestFile['Status'] == 'Waiting': lfn = str(subRequestFile['LFN']) lfns.append(lfn) gMonitor.addMark('ReplicaRemovalAtt', len(lfns)) failed = {} errMsg = {} for diracSE in diracSEs: res = self.replicaManager.removeReplica( diracSE, lfns) if res['OK']: for lfn in res['Value']['Failed'].keys(): errorMessage = str( res['Value']['Failed'][lfn]) if errorMessage.find( 'Write access not permitted for this credential.' ) != -1: if self.__getProxyAndRemoveReplica( diracSE, lfn): continue if errorMessage.find( 'seconds timeout for "__gfal_wrapper" call' ) != -1: self.timeOutCounter += 1 if not failed.has_key(lfn): failed[lfn] = {} failed[lfn][diracSE] = res['Value'][ 'Failed'][lfn] else: errMsg[diracSE] = res['Message'] for lfn in lfns: if not failed.has_key(lfn): failed[lfn] = {} failed[lfn][diracSE] = 'Completely' # Now analyse the results failedLFNs = failed.keys() lfnsOK = [lfn for lfn in lfns if not lfn in failedLFNs] gMonitor.addMark('ReplicaRemovalDone', len(lfnsOK)) for lfn in lfnsOK: gLogger.info( "RemovalAgent.execute: Successfully removed %s at %s" % (lfn, str(diracSEs))) res = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done') if not res['OK']: gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ('Done', lfn)) modified = True if failed: gMonitor.addMark('PhysicalRemovalFail', len(failedLFNs)) for lfn in failedLFNs: for diracSE in failed[lfn].keys(): if type(failed[lfn] [diracSE]) in StringTypes: if re.search( 'no such file or directory', failed[lfn][diracSE].lower()): gLogger.info( "RemovalAgent.execute: File did not exist.", lfn) res = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done') if not res['OK']: gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ('Done', lfn)) modified = True else: gLogger.info( "RemovalAgent.execute: Failed to remove file.", "%s at %s - %s" % (lfn, diracSE, failed[lfn][diracSE])) if errMsg: for diracSE in errMsg.keys(): errStr = "RemovalAgent.execute: Completely failed to remove replicas. At %s", diracSE gLogger.error(errStr, errMsg[diracSE]) ################################################ # If the sub-request is a request to the online system to retransfer elif operation == 'reTransfer': gLogger.info( "RemovalAgent.execute: Attempting to execute %s sub-request." % operation) diracSE = subRequestAttributes['TargetSE'] for subRequestFile in subRequestFiles: if subRequestFile['Status'] == 'Waiting': pfn = str(subRequestFile['PFN']) lfn = str(subRequestFile['LFN']) res = self.replicaManager.onlineRetransfer( diracSE, pfn) if res['OK']: if res['Value']['Successful'].has_key(pfn): gLogger.info( "RemovalAgent.execute: Successfully requested retransfer of %s." % pfn) result = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done') if not result['OK']: gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ('Done', lfn)) modified = True else: errStr = "RemovalAgent.execute: Failed to request retransfer." gLogger.error( errStr, "%s %s %s" % (pfn, diracSE, res['Value']['Failed'][pfn])) else: errStr = "RemovalAgent.execute: Completely failed to request retransfer." gLogger.error(errStr, res['Message']) else: gLogger.info( "RemovalAgent.execute: File already completed." ) ################################################ # If the sub-request is none of the above types else: gLogger.error( "RemovalAgent.execute: Operation not supported.", operation) ################################################ # Determine whether there are any active files if oRequest.isSubRequestEmpty(ind, 'removal')['Value']: oRequest.setSubRequestStatus(ind, 'removal', 'Done') gMonitor.addMark("Done", 1) ################################################ # If the sub-request is already in terminal state else: gLogger.info( "RemovalAgent.execute:", "Sub-request %s is status '%s' and not to be executed." % (ind, subRequestAttributes['Status'])) ################################################ # Generate the new request string after operation newrequestString = oRequest.toXML()['Value'] except: # if something fails return the original request back to the server res = self.requestDBClient.updateRequest(requestName, requestString, sourceServer) return S_OK() res = self.requestDBClient.updateRequest(requestName, newrequestString, sourceServer) if modified and jobID: result = self.finalizeRequest(requestName, jobID, sourceServer) return S_OK() def __getProxyAndRemoveReplica(self, diracSE, lfn): """ get a proxy from the owner of the file and try to remove it returns True if it succeeds, False otherwise """ result = self.replicaManager.getCatalogDirectoryMetadata( lfn, singleFile=True) if not result['OK']: gLogger.error("Could not get metadata info", result['Message']) return False ownerRole = result['Value']['OwnerRole'] ownerDN = result['Value']['OwnerDN'] if ownerRole[0] != "/": ownerRole = "/%s" % ownerRole userProxy = '' for ownerGroup in Registry.getGroupsWithVOMSAttribute(ownerRole): result = gProxyManager.downloadVOMSProxy( ownerDN, ownerGroup, limited=True, requiredVOMSAttribute=ownerRole) if not result['OK']: gLogger.verbose( 'Failed to retrieve voms proxy for %s : %s:' % (ownerDN, ownerRole), result['Message']) continue userProxy = result['Value'] gLogger.verbose("Got proxy for %s@%s [%s]" % (ownerDN, ownerGroup, ownerRole)) break if not userProxy: return False result = userProxy.dumpAllToFile() if not result['OK']: gLogger.verbose(result['Message']) return False upFile = result['Value'] prevProxyEnv = os.environ['X509_USER_PROXY'] os.environ['X509_USER_PROXY'] = upFile try: res = self.replicaManager.removeReplica(diracSE, lfn) if res['OK'] and lfn in res['Value']['Successful']: gLogger.verbose('Removed %s from %s' % (lfn, diracSE)) return True finally: os.environ['X509_USER_PROXY'] = prevProxyEnv os.unlink(upFile) return False def finalize(self): """ Called by the Agent framework to cleanly end execution. In this case this module will wait until all pending ThreadedJbos in the ThreadPool get executed """ self.threadPool.processAllResults() return S_OK()
class RegistrationAgent( AgentModule, RequestAgentMixIn ): def initialize( self ): self.RequestDBClient = RequestClient() self.ReplicaManager = ReplicaManager() self.DataLog = DataLoggingClient() self.maxNumberOfThreads = self.am_getOption( 'NumberOfThreads', 1 ) self.threadPoolDepth = self.am_getOption( 'ThreadPoolDepth', 1 ) self.threadPool = ThreadPool( 1, self.maxNumberOfThreads ) # This sets the Default Proxy to used as that defined under # /Operations/Shifter/DataManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption( 'shifterProxy', 'DataManager' ) return S_OK() def execute( self ): for i in range( self.threadPoolDepth ): requestExecutor = ThreadedJob( self.executeRequest ) self.threadPool.queueJob( requestExecutor ) self.threadPool.processResults() return self.executeRequest() def executeRequest( self ): ################################################ # Get a request from request DB res = self.RequestDBClient.getRequest( 'register' ) if not res['OK']: gLogger.info( "RegistrationAgent.execute: Failed to get request from database." ) return S_OK() elif not res['Value']: gLogger.info( "RegistrationAgent.execute: No requests to be executed found." ) return S_OK() requestString = res['Value']['RequestString'] requestName = res['Value']['RequestName'] sourceServer = res['Value']['Server'] try: jobID = int( res['Value']['JobID'] ) except: jobID = 0 gLogger.info( "RegistrationAgent.execute: Obtained request %s" % requestName ) result = self.RequestDBClient.getCurrentExecutionOrder( requestName, sourceServer ) if result['OK']: currentOrder = result['Value'] else: return S_OK( 'Can not get the request execution order' ) oRequest = RequestContainer( request = requestString ) ################################################ # Find the number of sub-requests from the request res = oRequest.getNumSubRequests( 'register' ) if not res['OK']: errStr = "RegistrationAgent.execute: Failed to obtain number of transfer subrequests." gLogger.error( errStr, res['Message'] ) return S_OK() gLogger.info( "RegistrationAgent.execute: Found %s sub requests." % res['Value'] ) ################################################ # For all the sub-requests in the request modified = False for ind in range( res['Value'] ): gLogger.info( "RegistrationAgent.execute: Processing sub-request %s." % ind ) subRequestAttributes = oRequest.getSubRequestAttributes( ind, 'register' )['Value'] subExecutionOrder = int( subRequestAttributes['ExecutionOrder'] ) subStatus = subRequestAttributes['Status'] if subStatus == 'Waiting' and subExecutionOrder <= currentOrder: subRequestFiles = oRequest.getSubRequestFiles( ind, 'register' )['Value'] operation = subRequestAttributes['Operation'] ################################################ # If the sub-request is a register file operation if operation == 'registerFile': gLogger.info( "RegistrationAgent.execute: Attempting to execute %s sub-request." % operation ) diracSE = str( subRequestAttributes['TargetSE'] ) if diracSE == 'SE': # We do not care about SE, put any there diracSE = "CERN-FAILOVER" catalog = subRequestAttributes['Catalogue'] if catalog == "None": catalog = '' subrequest_done = True for subRequestFile in subRequestFiles: if subRequestFile['Status'] == 'Waiting': lfn = subRequestFile.get( 'LFN', '' ) if lfn: lfn = str( lfn ) physicalFile = subRequestFile.get( 'PFN', '' ) if physicalFile: physicalFile = str( physicalFile ) fileSize = subRequestFile.get( 'Size', 0 ) if fileSize: fileSize = int( fileSize ) fileGuid = subRequestFile.get( 'GUID', '' ) if fileGuid: fileGuid = str( fileGuid ) checksum = subRequestFile.get( 'Addler', '' ) if checksum: checksum = str( checksum ) if catalog == 'BookkeepingDB': diracSE = 'CERN-HIST' fileTuple = ( lfn, physicalFile, fileSize, diracSE, fileGuid, checksum ) res = self.ReplicaManager.registerFile( fileTuple, catalog ) print res if not res['OK']: self.DataLog.addFileRecord( lfn, 'RegisterFail', diracSE, '', 'RegistrationAgent' ) errStr = "RegistrationAgent.execute: Completely failed to register file." gLogger.error( errStr, res['Message'] ) subrequest_done = False elif lfn in res['Value']['Failed'].keys(): self.DataLog.addFileRecord( lfn, 'RegisterFail', diracSE, '', 'RegistrationAgent' ) errStr = "RegistrationAgent.execute: Completely failed to register file." gLogger.error( errStr, res['Value']['Failed'][lfn] ) subrequest_done = False else: self.DataLog.addFileRecord( lfn, 'Register', diracSE, '', 'TransferAgent' ) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Status', 'Done' ) modified = True else: gLogger.info( "RegistrationAgent.execute: File already completed." ) if subrequest_done: oRequest.setSubRequestStatus( ind, 'register', 'Done' ) ################################################ # If the sub-request is none of the above types else: gLogger.error( "RegistrationAgent.execute: Operation not supported.", operation ) ################################################ # Determine whether there are any active files if oRequest.isSubRequestEmpty( ind, 'register' )['Value']: oRequest.setSubRequestStatus( ind, 'register', 'Done' ) ################################################ # If the sub-request is already in terminal state else: gLogger.info( "RegistrationAgent.execute: Sub-request %s is status '%s' and not to be executed." % ( ind, subRequestAttributes['Status'] ) ) ################################################ # Generate the new request string after operation requestString = oRequest.toXML()['Value'] res = self.RequestDBClient.updateRequest( requestName, requestString, sourceServer ) if modified and jobID: result = self.finalizeRequest( requestName, jobID, sourceServer ) return S_OK()
class RemovalAgent( AgentModule, RequestAgentMixIn ): """ This Agent takes care of executing "removal" request from the RequestManagement system """ def __init__( self, *args ): """ Initialize the base class and define some extra data members """ AgentModule.__init__( self, *args ) self.requestDBClient = None self.replicaManager = None self.maxNumberOfThreads = 4 self.maxRequestsInQueue = 100 self.threadPool = None self.timeOutCounter = 0 self.pendingRequests = True def initialize( self ): """ Called by the framework upon startup, before any cycle (execute method bellow) """ self.requestDBClient = RequestClient() # the RequestAgentMixIn needs the capitalized version, until is is fixed keep this. self.RequestDBClient = self.requestDBClient self.replicaManager = ReplicaManager() gMonitor.registerActivity( "Iteration", "Agent Loops", "RemovalAgent", "Loops/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "Execute", "Request Processed", "RemovalAgent", "Requests/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "Done", "Request Completed", "RemovalAgent", "Requests/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "PhysicalRemovalAtt", "Physical removals attempted", "RemovalAgent", "Removal/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "PhysicalRemovalDone", "Successful physical removals", "RemovalAgent", "Removal/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "PhysicalRemovalFail", "Failed physical removals", "RemovalAgent", "Removal/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "PhysicalRemovalSize", "Physically removed size", "RemovalAgent", "Bytes", gMonitor.OP_ACUM ) gMonitor.registerActivity( "ReplicaRemovalAtt", "Replica removal attempted", "RemovalAgent", "Removal/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "ReplicaRemovalDone", "Successful replica removals", "RemovalAgent", "Removal/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "ReplicaRemovalFail", "Failed replica removals", "RemovalAgent", "Removal/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "RemoveFileAtt", "File removal attempted", "RemovalAgent", "Removal/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "RemoveFileDone", "File removal done", "RemovalAgent", "Removal/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "RemoveFileFail", "File removal failed", "RemovalAgent", "Removal/min", gMonitor.OP_SUM ) self.maxNumberOfThreads = self.am_getOption( 'NumberOfThreads', self.maxNumberOfThreads ) self.maxRequestsInQueue = self.am_getOption( 'RequestsInQueue', self.maxRequestsInQueue ) self.threadPool = ThreadPool( 1, self.maxNumberOfThreads, self.maxRequestsInQueue ) # Set the ThreadPool in daemon mode to process new ThreadedJobs as they are inserted self.threadPool.daemonize() # This sets the Default Proxy to used as that defined under # /Operations/Shifter/DataManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption( 'shifterProxy', 'DataManager' ) return S_OK() def execute( self ): """ Fill the TreadPool with ThreadJobs """ self.pendingRequests = True while self.pendingRequests: requestExecutor = ThreadedJob( self.executeRequest ) ret = self.threadPool.queueJob( requestExecutor ) if not ret['OK']: break time.sleep( 0.1 ) if self.timeOutCounter: gLogger.error( 'Timeouts during removal execution:', self.timeOutCounter ) return S_OK() def executeRequest( self ): """ Do the actual work in the Thread """ ################################################ # Get a request from request DB gMonitor.addMark( "Iteration", 1 ) res = self.requestDBClient.getRequest( 'removal' ) if not res['OK']: gLogger.info( "RemovalAgent.execute: Failed to get request from database." ) return S_OK() elif not res['Value']: gLogger.info( "RemovalAgent.execute: No requests to be executed found." ) self.pendingRequests = False return S_OK() requestString = res['Value']['RequestString'] requestName = res['Value']['RequestName'] sourceServer = res['Value']['Server'] try: jobID = int( res['Value']['JobID'] ) except ValueError: jobID = 0 gLogger.info( "RemovalAgent.execute: Obtained request %s" % requestName ) try: result = self.requestDBClient.getCurrentExecutionOrder( requestName, sourceServer ) if result['OK']: currentOrder = result['Value'] else: gLogger.error( 'Can not get the request execution order' ) self.requestDBClient.updateRequest( requestName, requestString, sourceServer ) return S_OK( 'Can not get the request execution order' ) oRequest = RequestContainer( request = requestString ) ################################################ # Find the number of sub-requests from the request res = oRequest.getNumSubRequests( 'removal' ) if not res['OK']: errStr = "RemovalAgent.execute: Failed to obtain number of removal subrequests." gLogger.error( errStr, res['Message'] ) return S_OK() gLogger.info( "RemovalAgent.execute: Found %s sub requests." % res['Value'] ) ################################################ # For all the sub-requests in the request modified = False for ind in range( res['Value'] ): gMonitor.addMark( "Execute", 1 ) gLogger.info( "RemovalAgent.execute: Processing sub-request %s." % ind ) subRequestAttributes = oRequest.getSubRequestAttributes( ind, 'removal' )['Value'] subExecutionOrder = int( subRequestAttributes['ExecutionOrder'] ) subStatus = subRequestAttributes['Status'] if subStatus == 'Waiting' and subExecutionOrder <= currentOrder: subRequestFiles = oRequest.getSubRequestFiles( ind, 'removal' )['Value'] operation = subRequestAttributes['Operation'] ################################################ # If the sub-request is a physical removal operation if operation == 'physicalRemoval': gLogger.info( "RemovalAgent.execute: Attempting to execute %s sub-request." % operation ) diracSEs = subRequestAttributes['TargetSE'].split( ',' ) physicalFiles = [] pfnToLfn = {} for subRequestFile in subRequestFiles: if subRequestFile['Status'] == 'Waiting': pfn = str( subRequestFile['PFN'] ) lfn = str( subRequestFile['LFN'] ) pfnToLfn[pfn] = lfn physicalFiles.append( pfn ) gMonitor.addMark( 'PhysicalRemovalAtt', len( physicalFiles ) ) failed = {} errMsg = {} for diracSE in diracSEs: res = self.replicaManager.removeStorageFile( physicalFiles, diracSE ) if res['OK']: for pfn in res['Value']['Failed'].keys(): if not failed.has_key( pfn ): failed[pfn] = {} failed[pfn][diracSE] = res['Value']['Failed'][pfn] else: errMsg[diracSE] = res['Message'] for pfn in physicalFiles: if not failed.has_key( pfn ): failed[pfn] = {} failed[pfn][diracSE] = 'Completely' # Now analyse the results failedPFNs = failed.keys() pfnsOK = [pfn for pfn in physicalFiles if not pfn in failedPFNs] gMonitor.addMark( 'PhysicalRemovalDone', len( pfnsOK ) ) for pfn in pfnsOK: gLogger.info( "RemovalAgent.execute: Successfully removed %s at %s" % ( pfn, str( diracSEs ) ) ) res = oRequest.setSubRequestFileAttributeValue( ind, 'removal', pfnToLfn[pfn], 'Status', 'Done' ) if not res['OK']: gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', pfnToLfn[pfn] ) ) modified = True if failed: gMonitor.addMark( 'PhysicalRemovalFail', len( failedPFNs ) ) for pfn in failedPFNs: for diracSE in failed[pfn].keys(): if type( failed[pfn][diracSE] ) in StringTypes: if re.search( 'no such file or directory', failed[pfn][diracSE].lower() ): gLogger.info( "RemovalAgent.execute: File did not exist.", pfn ) res = oRequest.setSubRequestFileAttributeValue( ind, 'removal', pfnToLfn[pfn], 'Status', 'Done' ) if not res['OK']: gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', pfnToLfn[pfn] ) ) modified = True else: gLogger.info( "RemovalAgent.execute: Failed to remove file.", "%s at %s - %s" % ( pfn, diracSE, failed[pfn][diracSE] ) ) if errMsg: for diracSE in errMsg.keys(): errStr = "RemovalAgent.execute: Completely failed to remove replicas. At %s", diracSE gLogger.error( errStr, errMsg[diracSE] ) ################################################ # If the sub-request is a physical removal operation elif operation == 'removeFile': gLogger.info( "RemovalAgent.execute: Attempting to execute %s sub-request." % operation ) lfns = [] for subRequestFile in subRequestFiles: if subRequestFile['Status'] == 'Waiting': lfn = str( subRequestFile['LFN'] ) lfns.append( lfn ) gMonitor.addMark( 'RemoveFileAtt', len( lfns ) ) res = self.replicaManager.removeFile( lfns ) if res['OK']: gMonitor.addMark( 'RemoveFileDone', len( res['Value']['Successful'].keys() ) ) for lfn in res['Value']['Successful'].keys(): gLogger.info( "RemovalAgent.execute: Successfully removed %s." % lfn ) result = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done' ) if not result['OK']: gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', lfn ) ) modified = True gMonitor.addMark( 'RemoveFileFail', len( res['Value']['Failed'].keys() ) ) for lfn in res['Value']['Failed'].keys(): if type( res['Value']['Failed'][lfn] ) in StringTypes: if re.search( 'no such file or directory', res['Value']['Failed'][lfn].lower() ): gLogger.info( "RemovalAgent.execute: File did not exist.", lfn ) result = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done' ) if not result['OK']: gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', lfn ) ) modified = True else: gLogger.info( "RemovalAgent.execute: Failed to remove file:", "%s %s" % ( lfn, res['Value']['Failed'][lfn] ) ) else: gMonitor.addMark( 'RemoveFileFail', len( lfns ) ) errStr = "RemovalAgent.execute: Completely failed to remove files files." gLogger.error( errStr, res['Message'] ) ################################################ # If the sub-request is a physical removal operation elif operation == 'replicaRemoval': gLogger.info( "RemovalAgent.execute: Attempting to execute %s sub-request." % operation ) diracSEs = subRequestAttributes['TargetSE'].split( ',' ) lfns = [] for subRequestFile in subRequestFiles: if subRequestFile['Status'] == 'Waiting': lfn = str( subRequestFile['LFN'] ) lfns.append( lfn ) gMonitor.addMark( 'ReplicaRemovalAtt', len( lfns ) ) failed = {} errMsg = {} for diracSE in diracSEs: res = self.replicaManager.removeReplica( diracSE, lfns ) if res['OK']: for lfn in res['Value']['Failed'].keys(): errorMessage = str( res['Value']['Failed'][lfn] ) if errorMessage.find( 'Write access not permitted for this credential.' ) != -1: if self.__getProxyAndRemoveReplica( diracSE, lfn ): continue if errorMessage.find( 'seconds timeout for "__gfal_wrapper" call' ) != -1: self.timeOutCounter += 1 if not failed.has_key( lfn ): failed[lfn] = {} failed[lfn][diracSE] = res['Value']['Failed'][lfn] else: errMsg[diracSE] = res['Message'] for lfn in lfns: if not failed.has_key( lfn ): failed[lfn] = {} failed[lfn][diracSE] = 'Completely' # Now analyse the results failedLFNs = failed.keys() lfnsOK = [lfn for lfn in lfns if not lfn in failedLFNs] gMonitor.addMark( 'ReplicaRemovalDone', len( lfnsOK ) ) for lfn in lfnsOK: gLogger.info( "RemovalAgent.execute: Successfully removed %s at %s" % ( lfn, str( diracSEs ) ) ) res = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done' ) if not res['OK']: gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', lfn ) ) modified = True if failed: gMonitor.addMark( 'PhysicalRemovalFail', len( failedLFNs ) ) for lfn in failedLFNs: for diracSE in failed[lfn].keys(): if type( failed[lfn][diracSE] ) in StringTypes: if re.search( 'no such file or directory', failed[lfn][diracSE].lower() ): gLogger.info( "RemovalAgent.execute: File did not exist.", lfn ) res = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done' ) if not res['OK']: gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', lfn ) ) modified = True else: gLogger.info( "RemovalAgent.execute: Failed to remove file.", "%s at %s - %s" % ( lfn, diracSE, failed[lfn][diracSE] ) ) if errMsg: for diracSE in errMsg.keys(): errStr = "RemovalAgent.execute: Completely failed to remove replicas. At %s", diracSE gLogger.error( errStr, errMsg[diracSE] ) ################################################ # If the sub-request is a request to the online system to retransfer elif operation == 'reTransfer': gLogger.info( "RemovalAgent.execute: Attempting to execute %s sub-request." % operation ) diracSE = subRequestAttributes['TargetSE'] for subRequestFile in subRequestFiles: if subRequestFile['Status'] == 'Waiting': pfn = str( subRequestFile['PFN'] ) lfn = str( subRequestFile['LFN'] ) res = self.replicaManager.onlineRetransfer( diracSE, pfn ) if res['OK']: if res['Value']['Successful'].has_key( pfn ): gLogger.info( "RemovalAgent.execute: Successfully requested retransfer of %s." % pfn ) result = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done' ) if not result['OK']: gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', lfn ) ) modified = True else: errStr = "RemovalAgent.execute: Failed to request retransfer." gLogger.error( errStr, "%s %s %s" % ( pfn, diracSE, res['Value']['Failed'][pfn] ) ) else: errStr = "RemovalAgent.execute: Completely failed to request retransfer." gLogger.error( errStr, res['Message'] ) else: gLogger.info( "RemovalAgent.execute: File already completed." ) ################################################ # If the sub-request is none of the above types else: gLogger.error( "RemovalAgent.execute: Operation not supported.", operation ) ################################################ # Determine whether there are any active files if oRequest.isSubRequestEmpty( ind, 'removal' )['Value']: oRequest.setSubRequestStatus( ind, 'removal', 'Done' ) gMonitor.addMark( "Done", 1 ) ################################################ # If the sub-request is already in terminal state else: gLogger.info( "RemovalAgent.execute:", "Sub-request %s is status '%s' and not to be executed." % ( ind, subRequestAttributes['Status'] ) ) ################################################ # Generate the new request string after operation newrequestString = oRequest.toXML()['Value'] except: # if something fails return the original request back to the server res = self.requestDBClient.updateRequest( requestName, requestString, sourceServer ) return S_OK() res = self.requestDBClient.updateRequest( requestName, newrequestString, sourceServer ) if modified and jobID: result = self.finalizeRequest( requestName, jobID, sourceServer ) return S_OK() def __getProxyAndRemoveReplica( self, diracSE, lfn ): """ get a proxy from the owner of the file and try to remove it returns True if it succeeds, False otherwise """ result = self.replicaManager.getCatalogDirectoryMetadata( lfn, singleFile = True ) if not result[ 'OK' ]: gLogger.error( "Could not get metadata info", result[ 'Message' ] ) return False ownerRole = result[ 'Value' ][ 'OwnerRole' ] ownerDN = result[ 'Value' ][ 'OwnerDN' ] if ownerRole[0] != "/": ownerRole = "/%s" % ownerRole userProxy = '' for ownerGroup in Registry.getGroupsWithVOMSAttribute( ownerRole ): result = gProxyManager.downloadVOMSProxy( ownerDN, ownerGroup, limited = True, requiredVOMSAttribute = ownerRole ) if not result[ 'OK' ]: gLogger.verbose ( 'Failed to retrieve voms proxy for %s : %s:' % ( ownerDN, ownerRole ), result[ 'Message' ] ) continue userProxy = result[ 'Value' ] gLogger.verbose( "Got proxy for %s@%s [%s]" % ( ownerDN, ownerGroup, ownerRole ) ) break if not userProxy: return False result = userProxy.dumpAllToFile() if not result[ 'OK' ]: gLogger.verbose( result[ 'Message' ] ) return False upFile = result[ 'Value' ] prevProxyEnv = os.environ[ 'X509_USER_PROXY' ] os.environ[ 'X509_USER_PROXY' ] = upFile try: res = self.replicaManager.removeReplica( diracSE, lfn ) if res['OK'] and lfn in res[ 'Value' ]['Successful']: gLogger.verbose( 'Removed %s from %s' % ( lfn, diracSE ) ) return True finally: os.environ[ 'X509_USER_PROXY' ] = prevProxyEnv os.unlink( upFile ) return False def finalize( self ): """ Called by the Agent framework to cleanly end execution. In this case this module will wait until all pending ThreadedJbos in the ThreadPool get executed """ self.threadPool.processAllResults() return S_OK()
class RegistrationAgent(AgentModule, RequestAgentMixIn): def initialize(self): self.RequestDBClient = RequestClient() self.ReplicaManager = ReplicaManager() self.DataLog = DataLoggingClient() self.maxNumberOfThreads = self.am_getOption('NumberOfThreads', 1) self.threadPoolDepth = self.am_getOption('ThreadPoolDepth', 1) self.threadPool = ThreadPool(1, self.maxNumberOfThreads) # This sets the Default Proxy to used as that defined under # /Operations/Shifter/DataManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption('shifterProxy', 'DataManager') return S_OK() def execute(self): for i in range(self.threadPoolDepth): requestExecutor = ThreadedJob(self.executeRequest) self.threadPool.queueJob(requestExecutor) self.threadPool.processResults() return self.executeRequest() def executeRequest(self): ################################################ # Get a request from request DB res = self.RequestDBClient.getRequest('register') if not res['OK']: gLogger.info( "RegistrationAgent.execute: Failed to get request from database." ) return S_OK() elif not res['Value']: gLogger.info( "RegistrationAgent.execute: No requests to be executed found.") return S_OK() requestString = res['Value']['RequestString'] requestName = res['Value']['RequestName'] sourceServer = res['Value']['Server'] try: jobID = int(res['Value']['JobID']) except: jobID = 0 gLogger.info("RegistrationAgent.execute: Obtained request %s" % requestName) result = self.RequestDBClient.getCurrentExecutionOrder( requestName, sourceServer) if result['OK']: currentOrder = result['Value'] else: return S_OK('Can not get the request execution order') oRequest = RequestContainer(request=requestString) ################################################ # Find the number of sub-requests from the request res = oRequest.getNumSubRequests('register') if not res['OK']: errStr = "RegistrationAgent.execute: Failed to obtain number of transfer subrequests." gLogger.error(errStr, res['Message']) return S_OK() gLogger.info("RegistrationAgent.execute: Found %s sub requests." % res['Value']) ################################################ # For all the sub-requests in the request modified = False for ind in range(res['Value']): gLogger.info( "RegistrationAgent.execute: Processing sub-request %s." % ind) subRequestAttributes = oRequest.getSubRequestAttributes( ind, 'register')['Value'] subExecutionOrder = int(subRequestAttributes['ExecutionOrder']) subStatus = subRequestAttributes['Status'] if subStatus == 'Waiting' and subExecutionOrder <= currentOrder: subRequestFiles = oRequest.getSubRequestFiles( ind, 'register')['Value'] operation = subRequestAttributes['Operation'] ################################################ # If the sub-request is a register file operation if operation == 'registerFile': gLogger.info( "RegistrationAgent.execute: Attempting to execute %s sub-request." % operation) diracSE = str(subRequestAttributes['TargetSE']) if diracSE == 'SE': # We do not care about SE, put any there diracSE = "CERN-FAILOVER" catalog = subRequestAttributes['Catalogue'] if catalog == "None": catalog = '' subrequest_done = True for subRequestFile in subRequestFiles: if subRequestFile['Status'] == 'Waiting': lfn = subRequestFile.get('LFN', '') if lfn: lfn = str(lfn) physicalFile = subRequestFile.get('PFN', '') if physicalFile: physicalFile = str(physicalFile) fileSize = subRequestFile.get('Size', 0) if fileSize: fileSize = int(fileSize) fileGuid = subRequestFile.get('GUID', '') if fileGuid: fileGuid = str(fileGuid) checksum = subRequestFile.get('Addler', '') if checksum: checksum = str(checksum) if catalog == 'BookkeepingDB': diracSE = 'CERN-HIST' fileTuple = (lfn, physicalFile, fileSize, diracSE, fileGuid, checksum) res = self.ReplicaManager.registerFile( fileTuple, catalog) print res if not res['OK']: self.DataLog.addFileRecord( lfn, 'RegisterFail', diracSE, '', 'RegistrationAgent') errStr = "RegistrationAgent.execute: Completely failed to register file." gLogger.error(errStr, res['Message']) subrequest_done = False elif lfn in res['Value']['Failed'].keys(): self.DataLog.addFileRecord( lfn, 'RegisterFail', diracSE, '', 'RegistrationAgent') errStr = "RegistrationAgent.execute: Completely failed to register file." gLogger.error(errStr, res['Value']['Failed'][lfn]) subrequest_done = False else: self.DataLog.addFileRecord( lfn, 'Register', diracSE, '', 'TransferAgent') oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Status', 'Done') modified = True else: gLogger.info( "RegistrationAgent.execute: File already completed." ) if subrequest_done: oRequest.setSubRequestStatus(ind, 'register', 'Done') ################################################ # If the sub-request is none of the above types else: gLogger.error( "RegistrationAgent.execute: Operation not supported.", operation) ################################################ # Determine whether there are any active files if oRequest.isSubRequestEmpty(ind, 'register')['Value']: oRequest.setSubRequestStatus(ind, 'register', 'Done') ################################################ # If the sub-request is already in terminal state else: gLogger.info( "RegistrationAgent.execute: Sub-request %s is status '%s' and not to be executed." % (ind, subRequestAttributes['Status'])) ################################################ # Generate the new request string after operation requestString = oRequest.toXML()['Value'] res = self.RequestDBClient.updateRequest(requestName, requestString, sourceServer) if modified and jobID: result = self.finalizeRequest(requestName, jobID, sourceServer) return S_OK()
class RemovalAgent( AgentModule, RequestAgentMixIn ): """ This Agent takes care of executing "removal" request from the RequestManagement system """ def __init__( self, *args ): """ Initialize the base class and define some extra data members """ AgentModule.__init__( self, *args ) self.requestDBClient = None self.replicaManager = None self.maxNumberOfThreads = 4 self.maxRequestsInQueue = 100 self.threadPool = None def initialize( self ): """ Called by the framework upon startup, before any cycle (execute method bellow) """ self.requestDBClient = RequestClient() self.replicaManager = ReplicaManager() gMonitor.registerActivity( "Iteration", "Agent Loops", "RemovalAgent", "Loops/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "Execute", "Request Processed", "RemovalAgent", "Requests/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "Done", "Request Completed", "RemovalAgent", "Requests/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "PhysicalRemovalAtt", "Physical removals attempted", "RemovalAgent", "Removal/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "PhysicalRemovalDone", "Successful physical removals", "RemovalAgent", "Removal/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "PhysicalRemovalFail", "Failed physical removals", "RemovalAgent", "Removal/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "PhysicalRemovalSize", "Physically removed size", "RemovalAgent", "Bytes", gMonitor.OP_ACUM ) gMonitor.registerActivity( "ReplicaRemovalAtt", "Replica removal attempted", "RemovalAgent", "Removal/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "ReplicaRemovalDone", "Successful replica removals", "RemovalAgent", "Removal/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "ReplicaRemovalFail", "Failed replica removals", "RemovalAgent", "Removal/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "RemoveFileAtt", "File removal attempted", "RemovalAgent", "Removal/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "RemoveFileDone", "File removal done", "RemovalAgent", "Removal/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "RemoveFileFail", "File removal failed", "RemovalAgent", "Removal/min", gMonitor.OP_SUM ) self.maxNumberOfThreads = self.am_getOption( 'NumberOfThreads', self.maxNumberOfThreads ) self.maxRequestsInQueue = self.am_getOption( 'RequestsInQueue', self.maxRequestsInQueue ) self.threadPool = ThreadPool( 1, self.maxNumberOfThreads, self.maxRequestsInQueue ) # Set the ThreadPool in daemon mode to process new ThreadedJobs as they are inserted self.threadPool.daemonize() # This sets the Default Proxy to used as that defined under # /Operations/Shifter/DataManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption( 'shifterProxy', 'DataManager' ) return S_OK() def execute( self ): """ Fill the TreadPool with ThreadJobs """ while True: requestExecutor = ThreadedJob( self.executeRequest ) ret = self.threadPool.queueJob( requestExecutor ) if not ret['OK']: break return S_OK() def executeRequest( self ): """ Do the actual work in the Thread """ ################################################ # Get a request from request DB gMonitor.addMark( "Iteration", 1 ) res = self.requestDBClient.getRequest( 'removal' ) if not res['OK']: gLogger.info( "RemovalAgent.execute: Failed to get request from database." ) return S_OK() elif not res['Value']: gLogger.info( "RemovalAgent.execute: No requests to be executed found." ) return S_OK() requestString = res['Value']['RequestString'] requestName = res['Value']['RequestName'] sourceServer = res['Value']['Server'] try: jobID = int( res['Value']['JobID'] ) except ValueError: jobID = 0 gLogger.info( "RemovalAgent.execute: Obtained request %s" % requestName ) result = self.requestDBClient.getCurrentExecutionOrder( requestName, sourceServer ) if result['OK']: currentOrder = result['Value'] else: gLogger.error( 'Can not get the request execution order' ) return S_OK( 'Can not get the request execution order' ) oRequest = RequestContainer( request = requestString ) ################################################ # Find the number of sub-requests from the request res = oRequest.getNumSubRequests( 'removal' ) if not res['OK']: errStr = "RemovalAgent.execute: Failed to obtain number of removal subrequests." gLogger.error( errStr, res['Message'] ) return S_OK() gLogger.info( "RemovalAgent.execute: Found %s sub requests." % res['Value'] ) ################################################ # For all the sub-requests in the request modified = False for ind in range( res['Value'] ): gMonitor.addMark( "Execute", 1 ) gLogger.info( "RemovalAgent.execute: Processing sub-request %s." % ind ) subRequestAttributes = oRequest.getSubRequestAttributes( ind, 'removal' )['Value'] subExecutionOrder = int( subRequestAttributes['ExecutionOrder'] ) subStatus = subRequestAttributes['Status'] if subStatus == 'Waiting' and subExecutionOrder <= currentOrder: subRequestFiles = oRequest.getSubRequestFiles( ind, 'removal' )['Value'] operation = subRequestAttributes['Operation'] ################################################ # If the sub-request is a physical removal operation if operation == 'physicalRemoval': gLogger.info( "RemovalAgent.execute: Attempting to execute %s sub-request." % operation ) diracSEs = subRequestAttributes['TargetSE'].split( ',' ) physicalFiles = [] pfnToLfn = {} for subRequestFile in subRequestFiles: if subRequestFile['Status'] == 'Waiting': pfn = str( subRequestFile['PFN'] ) lfn = str( subRequestFile['LFN'] ) pfnToLfn[pfn] = lfn physicalFiles.append( pfn ) gMonitor.addMark( 'PhysicalRemovalAtt', len( physicalFiles ) ) failed = {} errMsg = {} for diracSE in diracSEs: res = self.replicaManager.removeStorageFile( physicalFiles, diracSE ) if res['OK']: for pfn in res['Value']['Failed'].keys(): if not failed.has_key( pfn ): failed[pfn] = {} failed[pfn][diracSE] = res['Value']['Failed'][pfn] else: errMsg[diracSE] = res['Message'] for pfn in physicalFiles: if not failed.has_key( pfn ): failed[pfn] = {} failed[pfn][diracSE] = 'Completely' # Now analyse the results failedPFNs = failed.keys() pfnsOK = [pfn for pfn in physicalFiles if not pfn in failedPFNs] gMonitor.addMark( 'PhysicalRemovalDone', len( pfnsOK ) ) for pfn in pfnsOK: gLogger.info( "RemovalAgent.execute: Successfully removed %s at %s" % ( pfn, str( diracSEs ) ) ) res = oRequest.setSubRequestFileAttributeValue( ind, 'removal', pfnToLfn[pfn], 'Status', 'Done' ) if not res['OK']: gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', pfnToLfn[pfn] ) ) modified = True if failed: gMonitor.addMark( 'PhysicalRemovalFail', len( failedPFNs ) ) for pfn in failedPFNs: for diracSE in failed[pfn].keys(): if type( failed[pfn][diracSE] ) in StringTypes: if re.search( 'no such file or directory', failed[pfn][diracSE].lower() ): gLogger.info( "RemovalAgent.execute: File did not exist.", pfn ) res = oRequest.setSubRequestFileAttributeValue( ind, 'removal', pfnToLfn[pfn], 'Status', 'Done' ) if not res['OK']: gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', pfnToLfn[pfn] ) ) modified = True else: gLogger.info( "RemovalAgent.execute: Failed to remove file.", "%s at %s - %s" % ( pfn, diracSE, failed[pfn][diracSE] ) ) if errMsg: for diracSE in errMsg.keys(): errStr = "RemovalAgent.execute: Completely failed to remove replicas. At %s", diracSE gLogger.error( errStr, errMsg[diracSE] ) ################################################ # If the sub-request is a physical removal operation elif operation == 'removeFile': gLogger.info( "RemovalAgent.execute: Attempting to execute %s sub-request." % operation ) lfns = [] for subRequestFile in subRequestFiles: if subRequestFile['Status'] == 'Waiting': lfn = str( subRequestFile['LFN'] ) lfns.append( lfn ) gMonitor.addMark( 'RemoveFileAtt', len( lfns ) ) res = self.replicaManager.removeFile( lfns ) if res['OK']: gMonitor.addMark( 'RemoveFileDone', len( res['Value']['Successful'].keys() ) ) for lfn in res['Value']['Successful'].keys(): gLogger.info( "RemovalAgent.execute: Successfully removed %s." % lfn ) result = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done' ) if not result['OK']: gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', lfn ) ) modified = True gMonitor.addMark( 'RemoveFileFail', len( res['Value']['Failed'].keys() ) ) for lfn in res['Value']['Failed'].keys(): if type( res['Value']['Failed'][lfn] ) in StringTypes: if re.search( 'no such file or directory', res['Value']['Failed'][lfn].lower() ): gLogger.info( "RemovalAgent.execute: File did not exist.", lfn ) result = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done' ) if not result['OK']: gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', lfn ) ) modified = True else: gLogger.info( "RemovalAgent.execute: Failed to remove file:", "%s %s" % ( lfn, res['Value']['Failed'][lfn] ) ) else: gMonitor.addMark( 'RemoveFileFail', len( lfns ) ) errStr = "RemovalAgent.execute: Completely failed to remove files files." gLogger.error( errStr, res['Message'] ) ################################################ # If the sub-request is a physical removal operation elif operation == 'replicaRemoval': gLogger.info( "RemovalAgent.execute: Attempting to execute %s sub-request." % operation ) diracSEs = subRequestAttributes['TargetSE'].split( ',' ) lfns = [] for subRequestFile in subRequestFiles: if subRequestFile['Status'] == 'Waiting': lfn = str( subRequestFile['LFN'] ) lfns.append( lfn ) gMonitor.addMark( 'ReplicaRemovalAtt', len( lfns ) ) failed = {} errMsg = {} for diracSE in diracSEs: res = self.replicaManager.removeReplica( diracSE, lfns ) if res['OK']: for lfn in res['Value']['Failed'].keys(): if not failed.has_key( lfn ): failed[lfn] = {} failed[lfn][diracSE] = res['Value']['Failed'][lfn] else: errMsg[diracSE] = res['Message'] for lfn in lfns: if not failed.has_key( lfn ): failed[lfn] = {} failed[lfn][diracSE] = 'Completely' # Now analyse the results failedLFNs = failed.keys() lfnsOK = [lfn for lfn in lfns if not lfn in failedLFNs] gMonitor.addMark( 'ReplicaRemovalDone', len( lfnsOK ) ) for lfn in lfnsOK: gLogger.info( "RemovalAgent.execute: Successfully removed %s at %s" % ( lfn, str( diracSEs ) ) ) res = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done' ) if not res['OK']: gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', lfn ) ) modified = True if failed: gMonitor.addMark( 'PhysicalRemovalFail', len( failedLFNs ) ) for lfn in failedLFNs: for diracSE in failed[lfn].keys(): if type( failed[lfn][diracSE] ) in StringTypes: if re.search( 'no such file or directory', failed[lfn][diracSE].lower() ): gLogger.info( "RemovalAgent.execute: File did not exist.", lfn ) res = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done' ) if not res['OK']: gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', lfn ) ) modified = True else: gLogger.info( "RemovalAgent.execute: Failed to remove file.", "%s at %s - %s" % ( lfn, diracSE, failed[lfn][diracSE] ) ) if errMsg: for diracSE in errMsg.keys(): errStr = "RemovalAgent.execute: Completely failed to remove replicas. At %s", diracSE gLogger.error( errStr, errMsg[diracSE] ) ################################################ # If the sub-request is a request to the online system to retransfer elif operation == 'reTransfer': gLogger.info( "RemovalAgent.execute: Attempting to execute %s sub-request." % operation ) diracSE = subRequestAttributes['TargetSE'] for subRequestFile in subRequestFiles: if subRequestFile['Status'] == 'Waiting': pfn = str( subRequestFile['PFN'] ) lfn = str( subRequestFile['LFN'] ) res = self.replicaManager.onlineRetransfer( diracSE, pfn ) if res['OK']: if res['Value']['Successful'].has_key( pfn ): gLogger.info( "RemovalAgent.execute: Successfully requested retransfer of %s." % pfn ) result = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done' ) if not result['OK']: gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', lfn ) ) modified = True else: errStr = "RemovalAgent.execute: Failed to request retransfer." gLogger.error( errStr, "%s %s %s" % ( pfn, diracSE, res['Value']['Failed'][pfn] ) ) else: errStr = "RemovalAgent.execute: Completely failed to request retransfer." gLogger.error( errStr, res['Message'] ) else: gLogger.info( "RemovalAgent.execute: File already completed." ) ################################################ # If the sub-request is none of the above types else: gLogger.error( "RemovalAgent.execute: Operation not supported.", operation ) ################################################ # Determine whether there are any active files if oRequest.isSubRequestEmpty( ind, 'removal' )['Value']: oRequest.setSubRequestStatus( ind, 'removal', 'Done' ) gMonitor.addMark( "Done", 1 ) ################################################ # If the sub-request is already in terminal state else: gLogger.info( "RemovalAgent.execute:", "Sub-request %s is status '%s' and not to be executed." % ( ind, subRequestAttributes['Status'] ) ) ################################################ # Generate the new request string after operation requestString = oRequest.toXML()['Value'] res = self.requestDBClient.updateRequest( requestName, requestString, sourceServer ) if modified and jobID: result = self.finalizeRequest( requestName, jobID, sourceServer ) return S_OK() def finalize( self ): """ Called by the Agent framework to cleanly end execution. In this case this module will wait until all pending ThreadedJbos in the ThreadPool get executed """ self.threadPool.processAllResults() return S_OK()
class TransferAgent( AgentModule, RequestAgentMixIn ): def initialize( self ): self.RequestDBClient = RequestClient() self.ReplicaManager = ReplicaManager() self.DataLog = DataLoggingClient() gMonitor.registerActivity( "Iteration", "Agent Loops", "TransferAgent", "Loops/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "Execute", "Request Processed", "TransferAgent", "Requests/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "Done", "Request Completed", "TransferAgent", "Requests/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "Replicate and register", "Replicate and register operations", "TransferAgent", "Attempts/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "Replicate", "Replicate operations", "TransferAgent", "Attempts/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "Put and register", "Put and register operations", "TransferAgent", "Attempts/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "Put", "Put operations", "TransferAgent", "Attempts/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "Replication successful", "Successful replications", "TransferAgent", "Successful/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "Put successful", "Successful puts", "TransferAgent", "Successful/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "Replication failed", "Failed replications", "TransferAgent", "Failed/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "Put failed", "Failed puts", "TransferAgent", "Failed/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "Replica registration successful", "Successful replica registrations", "TransferAgent", "Successful/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "File registration successful", "Successful file registrations", "TransferAgent", "Successful/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "Replica registration failed", "Failed replica registrations", "TransferAgent", "Failed/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "File registration failed", "Failed file registrations", "TransferAgent", "Failed/min", gMonitor.OP_SUM ) self.maxNumberOfThreads = self.am_getOption( 'NumberOfThreads', 1 ) self.threadPoolDepth = self.am_getOption( 'ThreadPoolDepth', 1 ) self.threadPool = ThreadPool( 1, self.maxNumberOfThreads ) # This sets the Default Proxy to used as that defined under # /Operations/Shifter/DataManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption( 'shifterProxy', 'DataManager' ) return S_OK() def execute( self ): for i in range( self.threadPoolDepth ): requestExecutor = ThreadedJob( self.executeRequest ) self.threadPool.queueJob( requestExecutor ) self.threadPool.processResults() return self.executeRequest() def executeRequest( self ): ################################################ # Get a request from request DB gMonitor.addMark( "Iteration", 1 ) res = self.RequestDBClient.getRequest( 'transfer' ) if not res['OK']: gLogger.info( "TransferAgent.execute: Failed to get request from database." ) return S_OK() elif not res['Value']: gLogger.info( "TransferAgent.execute: No requests to be executed found." ) return S_OK() requestString = res['Value']['RequestString'] requestName = res['Value']['RequestName'] sourceServer = res['Value']['Server'] try: jobID = int( res['Value']['JobID'] ) except: jobID = 0 gLogger.info( "TransferAgent.execute: Obtained request %s" % requestName ) result = self.RequestDBClient.getCurrentExecutionOrder( requestName, sourceServer ) if result['OK']: currentOrder = result['Value'] else: return S_OK( 'Can not get the request execution order' ) oRequest = RequestContainer( request = requestString ) ################################################ # Find the number of sub-requests from the request res = oRequest.getNumSubRequests( 'transfer' ) if not res['OK']: errStr = "TransferAgent.execute: Failed to obtain number of transfer subrequests." gLogger.error( errStr, res['Message'] ) return S_OK() gLogger.info( "TransferAgent.execute: Found %s sub requests." % res['Value'] ) ################################################ # For all the sub-requests in the request modified = False for ind in range( res['Value'] ): gMonitor.addMark( "Execute", 1 ) gLogger.info( "TransferAgent.execute: Processing sub-request %s." % ind ) subRequestAttributes = oRequest.getSubRequestAttributes( ind, 'transfer' )['Value'] if subRequestAttributes['ExecutionOrder']: subExecutionOrder = int( subRequestAttributes['ExecutionOrder'] ) else: subExecutionOrder = 0 subStatus = subRequestAttributes['Status'] if subStatus == 'Waiting' and subExecutionOrder <= currentOrder: subRequestFiles = oRequest.getSubRequestFiles( ind, 'transfer' )['Value'] operation = subRequestAttributes['Operation'] subRequestError = '' ################################################ # If the sub-request is a put and register operation if operation == 'putAndRegister' or operation == 'putAndRegisterAndRemove': gLogger.info( "TransferAgent.execute: Attempting to execute %s sub-request." % operation ) diracSE = str( subRequestAttributes['TargetSE'] ) catalog = '' if subRequestAttributes.has_key( 'Catalogue' ): catalog = subRequestAttributes['Catalogue'] for subRequestFile in subRequestFiles: if subRequestFile['Status'] == 'Waiting': gMonitor.addMark( "Put and register", 1 ) lfn = str( subRequestFile['LFN'] ) file = subRequestFile['PFN'] guid = subRequestFile['GUID'] addler = subRequestFile['Addler'] res = self.ReplicaManager.putAndRegister( lfn, file, diracSE, guid = guid, checksum = addler, catalog = catalog ) if res['OK']: if res['Value']['Successful'].has_key( lfn ): if not res['Value']['Successful'][lfn].has_key( 'put' ): gMonitor.addMark( "Put failed", 1 ) self.DataLog.addFileRecord( lfn, 'PutFail', diracSE, '', 'TransferAgent' ) gLogger.info( "TransferAgent.execute: Failed to put %s to %s." % ( lfn, diracSE ) ) subRequestError = "Put operation failed for %s to %s" % ( lfn, diracSE ) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn,'Error', 'Put failed' ) elif not res['Value']['Successful'][lfn].has_key( 'register' ): gMonitor.addMark( "Put successful", 1 ) gMonitor.addMark( "File registration failed", 1 ) self.DataLog.addFileRecord( lfn, 'Put', diracSE, '', 'TransferAgent' ) self.DataLog.addFileRecord( lfn, 'RegisterFail', diracSE, '', 'TransferAgent' ) gLogger.info( "TransferAgent.execute: Successfully put %s to %s in %s seconds." % ( lfn, diracSE, res['Value']['Successful'][lfn]['put'] ) ) gLogger.info( "TransferAgent.execute: Failed to register %s to %s." % ( lfn, diracSE ) ) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Status', 'Done' ) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn,'Error', 'Registration failed' ) subRequestError = "Registration failed for %s to %s" % ( lfn, diracSE ) fileDict = res['Value']['Failed'][lfn]['register'] registerRequestDict = {'Attributes':{'TargetSE': fileDict['TargetSE'], 'Operation':'registerFile'}, 'Files':[{'LFN': fileDict['LFN'], 'PFN':fileDict['PFN'], 'Size':fileDict['Size'], 'Addler':fileDict['Addler'], 'GUID':fileDict['GUID']}]} gLogger.info( "TransferAgent.execute: Setting registration request for failed file." ) oRequest.addSubRequest( registerRequestDict, 'register' ) modified = True else: gMonitor.addMark( "Put successful", 1 ) gMonitor.addMark( "File registration successful", 1 ) self.DataLog.addFileRecord( lfn, 'Put', diracSE, '', 'TransferAgent' ) self.DataLog.addFileRecord( lfn, 'Register', diracSE, '', 'TransferAgent' ) gLogger.info( "TransferAgent.execute: Successfully put %s to %s in %s seconds." % ( lfn, diracSE, res['Value']['Successful'][lfn]['put'] ) ) gLogger.info( "TransferAgent.execute: Successfully registered %s to %s in %s seconds." % ( lfn, diracSE, res['Value']['Successful'][lfn]['register'] ) ) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Status', 'Done' ) modified = True else: gMonitor.addMark( "Put failed", 1 ) self.DataLog.addFileRecord( lfn, 'PutFail', diracSE, '', 'TransferAgent' ) errStr = "TransferAgent.execute: Failed to put and register file." gLogger.error( errStr, "%s %s %s" % ( lfn, diracSE, res['Value']['Failed'][lfn] ) ) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn,'Error', 'Complete file failure' ) subRequestError = "Failed to put and register file" else: gMonitor.addMark( "Put failed", 1 ) self.DataLog.addFileRecord( lfn, 'PutFail', diracSE, '', 'TransferAgent' ) errStr = "TransferAgent.execute: Completely failed to put and register file." gLogger.error( errStr, res['Message'] ) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn,'Error', 'RM call failure' ) subRequestError = operation + " RM call file" else: gLogger.info( "TransferAgent.execute: File already completed." ) ################################################ # If the sub-request is a put operation elif operation == 'put': gLogger.info( "TransferAgent.execute: Attempting to execute %s sub-request." % operation ) diracSE = subRequestAttributes['TargetSE'] for subRequestFile in subRequestFiles: if subRequestFile['Status'] == 'Waiting': gMonitor.addMark( "Put", 1 ) lfn = subRequestFile['LFN'] file = subRequestFile['PFN'] res = self.ReplicaManager.put( lfn, file, diracSE ) if res['OK']: if res['Value']['Successful'].has_key( lfn ): gMonitor.addMark( "Put successful", 1 ) self.DataLog.addFileRecord( lfn, 'Put', diracSE, '', 'TransferAgent' ) gLogger.info( "TransferAgent.execute: Successfully put %s to %s in %s seconds." % ( lfn, diracSE, res['Value']['Successful'][lfn] ) ) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Status', 'Done' ) modified = True else: gMonitor.addMark( "Put failed", 1 ) self.DataLog.addFileRecord( lfn, 'PutFail', diracSE, '', 'TransferAgent' ) errStr = "TransferAgent.execute: Failed to put file." gLogger.error( errStr, "%s %s %s" % ( lfn, diracSE, res['Value']['Failed'][lfn] ) ) subRequestError = "Put operation failed for %s to %s" % ( lfn, diracSE ) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn,'Error', 'Put failed' ) else: gMonitor.addMark( "Put failed", 1 ) self.DataLog.addFileRecord( lfn, 'PutFail', diracSE, '', 'TransferAgent' ) errStr = "TransferAgent.execute: Completely failed to put file." gLogger.error( errStr, res['Message'] ) subRequestError = "Put RM call failed for %s to %s" % ( lfn, diracSE ) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn,'Error', 'Put RM call failed' ) else: gLogger.info( "TransferAgent.execute: File already completed." ) ################################################ # If the sub-request is a replicate and register operation elif operation == 'replicateAndRegister' or operation == 'replicateAndRegisterAndRemove': gLogger.info( "TransferAgent.execute: Attempting to execute %s sub-request." % operation ) targetSE = subRequestAttributes['TargetSE'] sourceSE = subRequestAttributes['SourceSE'] if sourceSE == "None": sourceSE = '' for subRequestFile in subRequestFiles: if subRequestFile['Status'] == 'Waiting': gMonitor.addMark( "Replicate and register", 1 ) lfn = subRequestFile['LFN'] res = self.ReplicaManager.replicateAndRegister( lfn, targetSE, sourceSE = sourceSE ) if res['OK']: if res['Value']['Successful'].has_key( lfn ): if not res['Value']['Successful'][lfn].has_key( 'replicate' ): gLogger.info( "TransferAgent.execute: Failed to replicate %s to %s." % ( lfn, targetSE ) ) gMonitor.addMark( "Replication failed", 1 ) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn,"Error", "Replication failed" ) subRequestError = "Replication failed for %s to %s" % ( lfn, targetSE ) elif not res['Value']['Successful'][lfn].has_key( 'register' ): gMonitor.addMark( "Replication successful", 1 ) gMonitor.addMark( "Replica registration failed", 1 ) gLogger.info( "TransferAgent.execute: Successfully replicated %s to %s in %s seconds." % ( lfn, targetSE, res['Value']['Successful'][lfn]['replicate'] ) ) gLogger.info( "TransferAgent.execute: Failed to register %s to %s." % ( lfn, targetSE ) ) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Status', 'Done' ) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Error', 'Registration failed' ) subRequestError = "Registration failed for %s to %s" % ( lfn, targetSE ) fileDict = res['Value']['Failed'][lfn]['register'] registerRequestDict = {'Attributes':{'TargetSE': fileDict['TargetSE'], 'Operation':'registerReplica'}, 'Files':[{'LFN': fileDict['LFN'], 'PFN':fileDict['PFN']}]} gLogger.info( "TransferAgent.execute: Setting registration request for failed replica." ) oRequest.addSubRequest( registerRequestDict, 'register' ) modified = True else: gMonitor.addMark( "Replication successful", 1 ) gMonitor.addMark( "Replica registration successful", 1 ) gLogger.info( "TransferAgent.execute: Successfully replicated %s to %s in %s seconds." % ( lfn, targetSE, res['Value']['Successful'][lfn]['replicate'] ) ) gLogger.info( "TransferAgent.execute: Successfully registered %s to %s in %s seconds." % ( lfn, targetSE, res['Value']['Successful'][lfn]['register'] ) ) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Status', 'Done' ) modified = True else: gMonitor.addMark( "Replication failed", 1 ) errStr = "TransferAgent.execute: Failed to replicate and register file." gLogger.error( errStr, "%s %s %s" % ( lfn, targetSE, res['Value']['Failed'][lfn] ) ) else: gMonitor.addMark( "Replication failed", 1 ) errStr = "TransferAgent.execute: Completely failed to replicate and register file." gLogger.error( errStr, res['Message'] ) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn,'Error', 'RM call failure' ) subRequestError = operation + " RM call failed" else: gLogger.info( "TransferAgent.execute: File already completed." ) ################################################ # If the sub-request is a replicate operation elif operation == 'replicate': gLogger.info( "TransferAgent.execute: Attempting to execute %s sub-request." % operation ) targetSE = subRequestAttributes['TargetSE'] sourceSE = subRequestAttributes['SourceSE'] for subRequestFile in subRequestFiles: if subRequestFile['Status'] == 'Waiting': gMonitor.addMark( "Replicate", 1 ) lfn = subRequestFile['LFN'] res = self.ReplicaManager.replicate( lfn, targetSE, sourceSE = sourceSE ) if res['OK']: if res['Value']['Successful'].has_key( lfn ): gMonitor.addMark( "Replication successful", 1 ) gLogger.info( "TransferAgent.execute: Successfully replicated %s to %s in %s seconds." % ( lfn, diracSE, res['Value']['Successful'][lfn] ) ) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Status', 'Done' ) modified = True else: gMonitor.addMark( "Replication failed", 1 ) errStr = "TransferAgent.execute: Failed to replicate file." gLogger.error( errStr, "%s %s %s" % ( lfn, targetSE, res['Value']['Failed'][lfn] ) ) subRequestError = "Replicate operation failed for %s to %s" % ( lfn, targetSE ) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn,'Error', 'Put failed' ) else: gMonitor.addMark( "Replication failed", 1 ) errStr = "TransferAgent.execute: Completely failed to replicate file." gLogger.error( errStr, res['Message'] ) subRequestError = "Replicate RM call failed for %s to %s" % ( lfn, targetSE ) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn,'Error', 'Replicate RM call failed' ) else: gLogger.info( "TransferAgent.execute: File already completed." ) ################################################ # If the sub-request is a get operation elif operation == 'get': gLogger.info( "TransferAgent.execute: Attempting to execute %s sub-request." % operation ) sourceSE = subRequestAttributes['TargetSE'] for subRequestFile in subRequestFiles: if subRequestFile['Status'] == 'Waiting': lfn = str( subRequestFile['LFN'] ) pfn = str( subRequestFile['PFN'] ) got = False if sourceSE and pfn: res = self.ReplicaManager.getStorageFile( pfn, sourceSE ) if res['Value']['Successful'].has_key( pfn ): got = True else: res = self.ReplicaManager.getFile( lfn ) if res['Value']['Successful'].has_key( lfn ): got = False if got: gLogger.info( "TransferAgent.execute: Successfully got %s." % lfn ) oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Status', 'Done' ) modified = True else: errStr = "TransferAgent.execute: Failed to get file." gLogger.error( errStr, lfn ) else: gLogger.info( "TransferAgent.execute: File already completed." ) ################################################ # If the sub-request is none of the above types else: gLogger.error( "TransferAgent.execute: Operation not supported.", operation ) if subRequestError: oRequest.setSubRequestAttributeValue( ind, 'transfer', 'Error', subRequestError ) ################################################ # Determine whether there are any active files if oRequest.isSubRequestEmpty( ind, 'transfer' )['Value']: oRequest.setSubRequestStatus( ind, 'transfer', 'Done' ) gMonitor.addMark( "Done", 1 ) ################################################ # If the sub-request is already in terminal state else: gLogger.info( "TransferAgent.execute: Sub-request %s is status '%s' and not to be executed." % ( ind, subRequestAttributes['Status'] ) ) ################################################ # Generate the new request string after operation requestString = oRequest.toXML()['Value'] res = self.RequestDBClient.updateRequest( requestName, requestString, sourceServer ) if modified and jobID: result = self.finalizeRequest( requestName, jobID, sourceServer ) return S_OK()