def printRequest( request, status = None, full = False, verbose = True, terse = False ): from DIRAC.DataManagementSystem.Client.FTSClient import FTSClient global output ftsClient = FTSClient() if full: output = '' prettyPrint( request.toJSON()['Value'] ) gLogger.always( output ) else: if not status: status = request.Status gLogger.always( "Request name='%s' ID=%s Status='%s'%s%s%s" % ( request.RequestName, request.RequestID, request.Status, " ('%s' in DB)" % status if status != request.Status else '', ( " Error='%s'" % request.Error ) if request.Error and request.Error.strip() else "" , ( " Job=%s" % request.JobID ) if request.JobID else "" ) ) gLogger.always( "Created %s, Updated %s" % ( request.CreationTime, request.LastUpdate ) ) if request.OwnerDN: gLogger.always( "Owner: '%s', Group: %s" % ( request.OwnerDN, request.OwnerGroup ) ) for indexOperation in enumerate( request ): op = indexOperation[1] if not terse or op.Status == 'Failed': printOperation( indexOperation, verbose, onlyFailed = terse ) # Check if FTS job exists res = ftsClient.getFTSJobsForRequest( request.RequestID ) if res['OK']: ftsJobs = res['Value'] if ftsJobs: gLogger.always( ' FTS jobs associated: %s' % ','.join( ['%s (%s)' % ( job.FTSGUID, job.Status ) \ for job in ftsJobs] ) )
def __init__( self, operation = None, csPath = None ): """c'tor :param self: self reference :param Operation operation: Operation instance :param str csPath: CS path for this handler """ super( ReplicateAndRegister, self ).__init__( operation, csPath ) # # own gMonitor stuff for files gMonitor.registerActivity( "ReplicateAndRegisterAtt", "Replicate and register attempted", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "ReplicateOK", "Replications successful", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "ReplicateFail", "Replications failed", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "RegisterOK", "Registrations successful", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "RegisterFail", "Registrations failed", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) # # for FTS gMonitor.registerActivity( "FTSScheduleAtt", "Files schedule attempted", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "FTSScheduleOK", "File schedule successful", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "FTSScheduleFail", "File schedule failed", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) # # SE cache # Clients self.fc = FileCatalog() if hasattr( self, "FTSMode" ) and getattr( self, "FTSMode" ): from DIRAC.DataManagementSystem.Client.FTSClient import FTSClient self.ftsClient = FTSClient()
def printFTSJobs(request): """ Prints the FTSJobs associated to a request :param request: Request object """ try: if request.RequestID: # We try first the new FTS3 system from DIRAC.DataManagementSystem.Client.FTS3Client import FTS3Client fts3Client = FTS3Client() res = fts3Client.ping() if res['OK']: associatedFTS3Jobs = [] for op in request: res = fts3Client.getOperationsFromRMSOpID(op.OperationID) if res['OK']: for fts3Op in res['Value']: associatedFTS3Jobs.extend(fts3Op.ftsJobs) if associatedFTS3Jobs: gLogger.always( '\n\nFTS3 jobs associated: \n%s' % '\n'.join('%s@%s (%s)' % (job.ftsGUID, job.ftsServer, job.status) for job in associatedFTS3Jobs)) return # If we are here, the attempt with the new FTS3 system did not work, let's try the old FTS system gLogger.debug("Could not instantiate FTS3Client", res) from DIRAC.DataManagementSystem.Client.FTSClient import FTSClient ftsClient = FTSClient() res = ftsClient.ping() if not res['OK']: gLogger.debug("Could not instantiate FtsClient", res) return res = ftsClient.getFTSJobsForRequest(request.RequestID) if res['OK']: ftsJobs = res['Value'] if ftsJobs: gLogger.always(' FTS jobs associated: %s' % ','.join('%s (%s)' % (job.FTSGUID, job.Status) for job in ftsJobs)) # ImportError can be thrown for the old client # AttributeError can be thrown because the deserialization will not have # happened correctly on the new fts3 (CC7 typically), and the error is not # properly propagated except (ImportError, AttributeError) as err: gLogger.debug("Could not instantiate FtsClient because of Exception", repr(err))
def printFTSJobs(request): """ Prints the FTSJobs associated to a request :param request: Request object """ try: if request.RequestID: # We try first the new FTS3 system from DIRAC.DataManagementSystem.Client.FTS3Client import FTS3Client fts3Client = FTS3Client() res = fts3Client.ping() if res['OK']: associatedFTS3Jobs = [] for op in request: res = fts3Client.getOperationsFromRMSOpID(op.OperationID) if res['OK']: for fts3Op in res['Value']: associatedFTS3Jobs.extend(fts3Op.ftsJobs) if associatedFTS3Jobs: gLogger.always( '\n\nFTS3 jobs associated: \n%s' % '\n'.join( '%s@%s (%s)' % (job.ftsGUID, job.ftsServer, job.status) for job in associatedFTS3Jobs)) return # If we are here, the attempt with the new FTS3 system did not work, let's try the old FTS system gLogger.debug("Could not instantiate FTS3Client", res) from DIRAC.DataManagementSystem.Client.FTSClient import FTSClient ftsClient = FTSClient() res = ftsClient.ping() if not res['OK']: gLogger.debug("Could not instantiate FtsClient", res) return res = ftsClient.getFTSJobsForRequest(request.RequestID) if res['OK']: ftsJobs = res['Value'] if ftsJobs: gLogger.always(' FTS jobs associated: %s' % ','.join('%s (%s)' % (job.FTSGUID, job.Status) for job in ftsJobs)) # ImportError can be thrown for the old client # AttributeError can be thrown because the deserialization will not have # happened correctly on the new fts3 (CC7 typically), and the error is not # properly propagated except (ImportError, AttributeError) as err: gLogger.debug("Could not instantiate FtsClient because of Exception", repr(err))
def printRequest(request, status=None, full=False, verbose=True, terse=False): global output ftsClient = None try: if request.RequestID: from DIRAC.DataManagementSystem.Client.FTSClient import FTSClient ftsClient = FTSClient() res = ftsClient.ping() if not res['OK']: gLogger.debug("Could not instantiate FtsClient", res) ftsClient = None except ImportError as err: gLogger.debug("Could not instantiate FtsClient because of Exception", repr(err)) if full: output = '' prettyPrint(json.loads(request.toJSON()['Value'])) gLogger.always(output) else: if not status: status = request.Status gLogger.always( "Request name='%s' ID=%s Status='%s'%s%s%s" % (request.RequestName, request.RequestID if hasattr( request, 'RequestID') else '(not set yet)', request.Status, " ('%s' in DB)" % status if status != request.Status else '', (" Error='%s'" % request.Error) if request.Error and request.Error.strip() else "", (" Job=%s" % request.JobID) if request.JobID else "")) gLogger.always("Created %s, Updated %s%s" % (request.CreationTime, request.LastUpdate, (", NotBefore %s" % request.NotBefore) if request.NotBefore else "")) if request.OwnerDN: gLogger.always("Owner: '%s', Group: %s" % (request.OwnerDN, request.OwnerGroup)) for indexOperation in enumerate(request): op = indexOperation[1] if not terse or op.Status == 'Failed': printOperation(indexOperation, verbose, onlyFailed=terse) if ftsClient: # Check if FTS job exists res = ftsClient.getFTSJobsForRequest(request.RequestID) if res['OK']: ftsJobs = res['Value'] if ftsJobs: gLogger.always(' FTS jobs associated: %s' % ','.join('%s (%s)' % (job.FTSGUID, job.Status) for job in ftsJobs))
def printFTSJobs(request): """ Prints the FTSJobs associated to a request :param request: Request object """ try: if request.RequestID: # We try first the new FTS3 system from DIRAC.DataManagementSystem.Client.FTS3Client import FTS3Client fts3Client = FTS3Client() res = fts3Client.ping() if res['OK']: associatedFTS3Jobs = [] for op in request: res = fts3Client.getOperationsFromRMSOpID(op.OperationID) if res['OK']: for fts3Op in res['Value']: associatedFTS3Jobs.extend(fts3Op.ftsJobs) if associatedFTS3Jobs: gLogger.always( '\n\nFTS3 jobs associated: \n%s' % '\n'.join( '%s@%s (%s)' % (job.ftsGUID, job.ftsServer, job.status) for job in associatedFTS3Jobs)) return # If we are here, the attempt with the new FTS3 system did not work, let's try the old FTS system gLogger.debug("Could not instantiate FTS3Client", res) from DIRAC.DataManagementSystem.Client.FTSClient import FTSClient ftsClient = FTSClient() res = ftsClient.ping() if not res['OK']: gLogger.debug("Could not instantiate FtsClient", res) return res = ftsClient.getFTSJobsForRequest(request.RequestID) if res['OK']: ftsJobs = res['Value'] if ftsJobs: gLogger.always(' FTS jobs associated: %s' % ','.join('%s (%s)' % (job.FTSGUID, job.Status) for job in ftsJobs)) except ImportError as err: gLogger.debug("Could not instantiate FtsClient because of Exception", repr(err))
def __init__( self, operation = None, csPath = None ): """c'tor :param self: self reference :param Operation operation: Operation instance :param str csPath: CS path for this handler """ super( ReplicateAndRegister, self ).__init__( operation, csPath ) # # own gMonitor stuff for files gMonitor.registerActivity( "ReplicateAndRegisterAtt", "Replicate and register attempted", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "ReplicateOK", "Replications successful", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "ReplicateFail", "Replications failed", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "RegisterOK", "Registrations successful", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "RegisterFail", "Registrations failed", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) # # for FTS gMonitor.registerActivity( "FTSScheduleAtt", "Files schedule attempted", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "FTSScheduleOK", "File schedule successful", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "FTSScheduleFail", "File schedule failed", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) # # SE cache self.seCache = {} # Clients self.fc = FileCatalog() self.ftsClient = FTSClient()
def printRequest(request, status=None, full=False, verbose=True, terse=False): global output ftsClient = None try: from DIRAC.DataManagementSystem.Client.FTSClient import FTSClient ftsClient = FTSClient() except Exception, e: gLogger.debug("Could not instantiate FtsClient", e)
def __init__(self, operation=None, csPath=None): """c'tor :param self: self reference :param Operation operation: Operation instance :param str csPath: CS path for this handler """ super(ReplicateAndRegister, self).__init__(operation, csPath) # # own gMonitor stuff for files gMonitor.registerActivity("ReplicateAndRegisterAtt", "Replicate and register attempted", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM) gMonitor.registerActivity("ReplicateOK", "Replications successful", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM) gMonitor.registerActivity("ReplicateFail", "Replications failed", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM) gMonitor.registerActivity("RegisterOK", "Registrations successful", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM) gMonitor.registerActivity("RegisterFail", "Registrations failed", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM) # # for FTS gMonitor.registerActivity("FTSScheduleAtt", "Files schedule attempted", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM) gMonitor.registerActivity("FTSScheduleOK", "File schedule successful", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM) gMonitor.registerActivity("FTSScheduleFail", "File schedule failed", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM) # # SE cache self.seCache = {} # Clients self.rm = ReplicaManager() self.ftsClient = FTSClient()
def ftsClient(cls): """ FTS client """ if not cls.__ftsClient: cls.__ftsClient = FTSClient() return cls.__ftsClient
def ftsClient(self): """ FTSClient getter """ if not self.__ftsClient: self.__ftsClient = FTSClient() return self.__ftsClient
def ftsTransfer(self): """ replicate and register using FTS """ self.log.info("scheduling files in FTS...") bannedTargets = self.checkSEsRSS() if not bannedTargets['OK']: gMonitor.addMark("FTSScheduleAtt") gMonitor.addMark("FTSScheduleFail") return bannedTargets if bannedTargets['Value']: return S_OK("%s targets are banned for writing" % ",".join(bannedTargets['Value'])) # Can continue now self.log.verbose("No targets banned for writing") toSchedule = {} delayExecution = 0 errors = defaultdict(int) for opFile in self.getWaitingFilesList(): opFile.Error = '' gMonitor.addMark("FTSScheduleAtt") # # check replicas replicas = self._filterReplicas(opFile) if not replicas["OK"]: continue replicas = replicas["Value"] validReplicas = replicas.get("Valid") noMetaReplicas = replicas.get("NoMetadata") noReplicas = replicas.get('NoReplicas') badReplicas = replicas.get('Bad') noActiveReplicas = replicas.get('NoActiveReplicas') if validReplicas: validTargets = list( set(self.operation.targetSEList) - set(validReplicas)) if not validTargets: self.log.info("file %s is already present at all targets" % opFile.LFN) opFile.Status = "Done" else: toSchedule[opFile.LFN] = [ opFile, validReplicas, validTargets ] else: gMonitor.addMark("FTSScheduleFail") if noMetaReplicas: err = "Couldn't get metadata" errors[err] += 1 self.log.verbose( "unable to schedule '%s', %s at %s" % (opFile.LFN, err, ','.join(noMetaReplicas))) opFile.Error = err elif noReplicas: err = "File doesn't exist" errors[err] += 1 self.log.error( "Unable to schedule transfer", "%s %s at %s" % (opFile.LFN, err, ','.join(noReplicas))) opFile.Error = err opFile.Status = 'Failed' elif badReplicas: err = "All replicas have a bad checksum" errors[err] += 1 self.log.error( "Unable to schedule transfer", "%s, %s at %s" % (opFile.LFN, err, ','.join(badReplicas))) opFile.Error = err opFile.Status = 'Failed' elif noActiveReplicas: err = "No active replica found" errors[err] += 1 self.log.verbose( "Unable to schedule transfer", "%s, %s at %s" % (opFile.LFN, err, ','.join(noActiveReplicas))) opFile.Error = err # All source SEs are banned, delay execution by 1 hour delayExecution = 60 if delayExecution: self.log.info("Delay execution of the request by %d minutes" % delayExecution) self.request.delayNextExecution(delayExecution) # Log error counts for error, count in errors.iteritems(): self.log.error(error, 'for %d files' % count) filesToScheduleList = [] res = self._addMetadataToFiles(toSchedule) if not res['OK']: return res else: filesToSchedule = res['Value'] for lfn in filesToSchedule: filesToScheduleList.append( (filesToSchedule[lfn][0].toJSON()['Value'], toSchedule[lfn][1], toSchedule[lfn][2])) if filesToScheduleList: ftsSchedule = FTSClient().ftsSchedule(self.request.RequestID, self.operation.OperationID, filesToScheduleList) if not ftsSchedule["OK"]: self.log.error("Completely failed to schedule to FTS:", ftsSchedule["Message"]) return ftsSchedule # might have nothing to schedule ftsSchedule = ftsSchedule["Value"] if not ftsSchedule: return S_OK() self.log.info("%d files have been scheduled to FTS" % len(ftsSchedule['Successful'])) for opFile in self.operation: fileID = opFile.FileID if fileID in ftsSchedule["Successful"]: gMonitor.addMark("FTSScheduleOK", 1) opFile.Status = "Scheduled" self.log.debug("%s has been scheduled for FTS" % opFile.LFN) elif fileID in ftsSchedule["Failed"]: gMonitor.addMark("FTSScheduleFail", 1) opFile.Error = ftsSchedule["Failed"][fileID] if 'sourceSURL equals to targetSURL' in opFile.Error: # In this case there is no need to continue opFile.Status = 'Failed' self.log.warn("unable to schedule %s for FTS: %s" % (opFile.LFN, opFile.Error)) else: self.log.info("No files to schedule after metadata checks") # Just in case some transfers could not be scheduled, try them with RM return self.dmTransfer(fromFTS=True)
if __name__ == "__main__": args = Script.getPositionalArgs() if len( args ) != 1: Script.showHelp() try: requestID = long( args[0] ) except ValueError: DIRAC.gLogger.error( "requestID should be an integer" ) DIRAC.exit( -1 ) from DIRAC.DataManagementSystem.Client.FTSClient import FTSClient from DIRAC.DataManagementSystem.Client.FTSJob import FTSJob ftsClient = FTSClient() ftsJobs = ftsClient.getFTSJobsForRequest( requestID, list( FTSJob.INITSTATES + FTSJob.TRANSSTATES + FTSJob.FINALSTATES ) ) if not ftsJobs["OK"]: DIRAC.gLogger.error( ftsJobs["Message"] ) DIRAC.exit( -1 ) ftsJobs = ftsJobs["Value"] if not ftsJobs: DIRAC.gLogger.always( "No FTSJobs found for requestID %s" % requestID ) DIRAC.exit( 0 ) DIRAC.gLogger.always( "Found %s FTSJobs for requestID %s" % ( len( ftsJobs ), requestID ) ) jobKeys = ( "SourceSE", "TargetSE", "Status", "Files", "Size", "Completeness", "CreationTime", "SubmitTime", "LastUpdate", "Error" ) fileKeys = ( "SourceSURL", "TargetSURL", "Attempt", "Status", "Error" )
class ReplicateAndRegister( DMSRequestOperationsBase ): """ .. class:: ReplicateAndRegister ReplicateAndRegister operation handler """ def __init__( self, operation = None, csPath = None ): """c'tor :param self: self reference :param Operation operation: Operation instance :param str csPath: CS path for this handler """ super( ReplicateAndRegister, self ).__init__( operation, csPath ) # # own gMonitor stuff for files gMonitor.registerActivity( "ReplicateAndRegisterAtt", "Replicate and register attempted", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "ReplicateOK", "Replications successful", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "ReplicateFail", "Replications failed", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "RegisterOK", "Registrations successful", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "RegisterFail", "Registrations failed", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) # # for FTS gMonitor.registerActivity( "FTSScheduleAtt", "Files schedule attempted", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "FTSScheduleOK", "File schedule successful", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "FTSScheduleFail", "File schedule failed", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) # # SE cache # Clients self.fc = FileCatalog() if hasattr( self, "FTSMode" ) and getattr( self, "FTSMode" ): from DIRAC.DataManagementSystem.Client.FTSClient import FTSClient self.ftsClient = FTSClient() def __call__( self ): """ call me maybe """ # # check replicas first checkReplicas = self.__checkReplicas() if not checkReplicas["OK"]: self.log.error( 'Failed to check replicas', checkReplicas["Message"] ) if hasattr( self, "FTSMode" ) and getattr( self, "FTSMode" ): bannedGroups = getattr( self, "FTSBannedGroups" ) if hasattr( self, "FTSBannedGroups" ) else () if self.request.OwnerGroup in bannedGroups: self.log.verbose( "usage of FTS system is banned for request's owner" ) return self.dmTransfer() return self.ftsTransfer() return self.dmTransfer() def __checkReplicas( self ): """ check done replicas and update file states """ waitingFiles = dict( [ ( opFile.LFN, opFile ) for opFile in self.operation if opFile.Status in ( "Waiting", "Scheduled" ) ] ) targetSESet = set( self.operation.targetSEList ) replicas = self.fc.getReplicas( waitingFiles.keys() ) if not replicas["OK"]: self.log.error( 'Failed to get replicas', replicas["Message"] ) return replicas reMissing = re.compile( r".*such file.*" ) for failedLFN, errStr in replicas["Value"]["Failed"].items(): waitingFiles[failedLFN].Error = errStr if reMissing.search( errStr.lower() ): self.log.error( "File does not exists", failedLFN ) gMonitor.addMark( "ReplicateFail", len( targetSESet ) ) waitingFiles[failedLFN].Status = "Failed" for successfulLFN, reps in replicas["Value"]["Successful"].items(): if targetSESet.issubset( set( reps ) ): self.log.info( "file %s has been replicated to all targets" % successfulLFN ) waitingFiles[successfulLFN].Status = "Done" return S_OK() def _addMetadataToFiles( self, toSchedule ): """ Add metadata to those files that need to be scheduled through FTS toSchedule is a dictionary: {'lfn1': [opFile, validReplicas, validTargets], 'lfn2': [opFile, validReplicas, validTargets]} """ if toSchedule: self.log.info( "found %s files to schedule, getting metadata from FC" % len( toSchedule ) ) lfns = toSchedule.keys() else: self.log.info( "No files to schedule" ) return S_OK() res = self.fc.getFileMetadata( lfns ) if not res['OK']: return res else: if res['Value']['Failed']: self.log.warn( "Can't schedule %d files: problems getting the metadata: %s" % ( len( res['Value']['Failed'] ), ', '.join( res['Value']['Failed'] ) ) ) metadata = res['Value']['Successful'] filesToScheduleList = [] for lfnsToSchedule, lfnMetadata in metadata.items(): opFileToSchedule = toSchedule[lfnsToSchedule][0] opFileToSchedule.GUID = lfnMetadata['GUID'] opFileToSchedule.Checksum = metadata[lfnsToSchedule]['Checksum'] opFileToSchedule.ChecksumType = metadata[lfnsToSchedule]['ChecksumType'] opFileToSchedule.Size = metadata[lfnsToSchedule]['Size'] filesToScheduleList.append( ( opFileToSchedule.toJSON()['Value'], toSchedule[lfnsToSchedule][1], toSchedule[lfnsToSchedule][2] ) ) return S_OK( filesToScheduleList ) def _filterReplicas( self, opFile ): """ filter out banned/invalid source SEs """ return filterReplicas( opFile, logger = self.log, dataManager = self.dm ) def ftsTransfer( self ): """ replicate and register using FTS """ self.log.info( "scheduling files in FTS..." ) bannedTargets = self.checkSEsRSS() if not bannedTargets['OK']: gMonitor.addMark( "FTSScheduleAtt" ) gMonitor.addMark( "FTSScheduleFail" ) return bannedTargets if bannedTargets['Value']: return S_OK( "%s targets are banned for writing" % ",".join( bannedTargets['Value'] ) ) # Can continue now self.log.verbose( "No targets banned for writing" ) toSchedule = {} for opFile in self.getWaitingFilesList(): opFile.Error = '' gMonitor.addMark( "FTSScheduleAtt" ) # # check replicas replicas = self._filterReplicas( opFile ) if not replicas["OK"]: continue replicas = replicas["Value"] validReplicas = replicas["Valid"] noMetaReplicas = replicas["NoMetadata"] noReplicas = replicas['NoReplicas'] badReplicas = replicas['Bad'] noPFN = replicas['NoPFN'] if validReplicas: validTargets = list( set( self.operation.targetSEList ) - set( validReplicas ) ) if not validTargets: self.log.info( "file %s is already present at all targets" % opFile.LFN ) opFile.Status = "Done" else: toSchedule[opFile.LFN] = [ opFile, validReplicas, validTargets ] else: gMonitor.addMark( "FTSScheduleFail" ) if noMetaReplicas: self.log.warn( "unable to schedule '%s', couldn't get metadata at %s" % ( opFile.LFN, ','.join( noMetaReplicas ) ) ) opFile.Error = "Couldn't get metadata" elif noReplicas: self.log.error( "Unable to schedule transfer", "File %s doesn't exist at %s" % ( opFile.LFN, ','.join( noReplicas ) ) ) opFile.Error = 'No replicas found' opFile.Status = 'Failed' elif badReplicas: self.log.error( "Unable to schedule transfer", "File %s, all replicas have a bad checksum at %s" % ( opFile.LFN, ','.join( badReplicas ) ) ) opFile.Error = 'All replicas have a bad checksum' opFile.Status = 'Failed' elif noPFN: self.log.warn( "unable to schedule %s, could not get a PFN at %s" % ( opFile.LFN, ','.join( noPFN ) ) ) res = self._addMetadataToFiles( toSchedule ) if not res['OK']: return res else: filesToScheduleList = res['Value'] if filesToScheduleList: ftsSchedule = self.ftsClient.ftsSchedule( self.request.RequestID, self.operation.OperationID, filesToScheduleList ) if not ftsSchedule["OK"]: self.log.error( "Completely failed to schedule to FTS:", ftsSchedule["Message"] ) return ftsSchedule # might have nothing to schedule ftsSchedule = ftsSchedule["Value"] if not ftsSchedule: return S_OK() self.log.info( "%d files have been scheduled to FTS" % len( ftsSchedule['Successful'] ) ) for opFile in self.operation: fileID = opFile.FileID if fileID in ftsSchedule["Successful"]: gMonitor.addMark( "FTSScheduleOK", 1 ) opFile.Status = "Scheduled" self.log.debug( "%s has been scheduled for FTS" % opFile.LFN ) elif fileID in ftsSchedule["Failed"]: gMonitor.addMark( "FTSScheduleFail", 1 ) opFile.Error = ftsSchedule["Failed"][fileID] if 'sourceSURL equals to targetSURL' in opFile.Error: # In this case there is no need to continue opFile.Status = 'Failed' self.log.warn( "unable to schedule %s for FTS: %s" % ( opFile.LFN, opFile.Error ) ) else: self.log.info( "No files to schedule after metadata checks" ) # Just in case some transfers could not be scheduled, try them with RM return self.dmTransfer( fromFTS = True ) def dmTransfer( self, fromFTS = False ): """ replicate and register using dataManager """ # # get waiting files. If none just return # # source SE sourceSE = self.operation.SourceSE if self.operation.SourceSE else None if sourceSE: # # check source se for read bannedSource = self.checkSEsRSS( sourceSE, 'ReadAccess' ) if not bannedSource["OK"]: gMonitor.addMark( "ReplicateAndRegisterAtt", len( self.operation ) ) gMonitor.addMark( "ReplicateFail", len( self.operation ) ) return bannedSource if bannedSource["Value"]: self.operation.Error = "SourceSE %s is banned for reading" % sourceSE self.log.info( self.operation.Error ) return S_OK( self.operation.Error ) # # check targetSEs for write bannedTargets = self.checkSEsRSS() if not bannedTargets['OK']: gMonitor.addMark( "ReplicateAndRegisterAtt", len( self.operation ) ) gMonitor.addMark( "ReplicateFail", len( self.operation ) ) return bannedTargets if bannedTargets['Value']: self.operation.Error = "%s targets are banned for writing" % ",".join( bannedTargets['Value'] ) return S_OK( self.operation.Error ) # Can continue now self.log.verbose( "No targets banned for writing" ) waitingFiles = self.getWaitingFilesList() if not waitingFiles: return S_OK() # # loop over files if fromFTS: self.log.info( "Trying transfer using replica manager as FTS failed" ) else: self.log.info( "Transferring files using Data manager..." ) for opFile in waitingFiles: gMonitor.addMark( "ReplicateAndRegisterAtt", 1 ) opFile.Error = '' lfn = opFile.LFN # Check if replica is at the specified source replicas = self._filterReplicas( opFile ) if not replicas["OK"]: self.log.error( 'Failed to check replicas', replicas["Message"] ) continue replicas = replicas["Value"] validReplicas = replicas["Valid"] noMetaReplicas = replicas["NoMetadata"] noReplicas = replicas['NoReplicas'] badReplicas = replicas['Bad'] noPFN = replicas['NoPFN'] if not validReplicas: gMonitor.addMark( "ReplicateFail" ) if noMetaReplicas: self.log.warn( "unable to replicate '%s', couldn't get metadata at %s" % ( opFile.LFN, ','.join( noMetaReplicas ) ) ) opFile.Error = "Couldn't get metadata" elif noReplicas: self.log.error( "Unable to replicate", "File %s doesn't exist at %s" % ( opFile.LFN, ','.join( noReplicas ) ) ) opFile.Error = 'No replicas found' opFile.Status = 'Failed' elif badReplicas: self.log.error( "Unable to replicate", "%s, all replicas have a bad checksum at %s" % ( opFile.LFN, ','.join( badReplicas ) ) ) opFile.Error = 'All replicas have a bad checksum' opFile.Status = 'Failed' elif noPFN: self.log.warn( "unable to replicate %s, could not get a PFN" % opFile.LFN ) continue # # get the first one in the list if sourceSE not in validReplicas: if sourceSE: self.log.warn( "%s is not at specified sourceSE %s, changed to %s" % ( lfn, sourceSE, validReplicas[0] ) ) sourceSE = validReplicas[0] # # loop over targetSE catalogs = self.operation.Catalog if catalogs: catalogs = [ cat.strip() for cat in catalogs.split( ',' ) ] for targetSE in self.operation.targetSEList: # # call DataManager if targetSE in validReplicas: self.log.warn( "Request to replicate %s to an existing location: %s" % ( lfn, targetSE ) ) opFile.Status = 'Done' continue res = self.dm.replicateAndRegister( lfn, targetSE, sourceSE = sourceSE, catalog = catalogs ) if res["OK"]: if lfn in res["Value"]["Successful"]: if "replicate" in res["Value"]["Successful"][lfn]: repTime = res["Value"]["Successful"][lfn]["replicate"] prString = "file %s replicated at %s in %s s." % ( lfn, targetSE, repTime ) gMonitor.addMark( "ReplicateOK", 1 ) if "register" in res["Value"]["Successful"][lfn]: gMonitor.addMark( "RegisterOK", 1 ) regTime = res["Value"]["Successful"][lfn]["register"] prString += ' and registered in %s s.' % regTime self.log.info( prString ) else: gMonitor.addMark( "RegisterFail", 1 ) prString += " but failed to register" self.log.warn( prString ) opFile.Error = "Failed to register" # # add register replica operation registerOperation = self.getRegisterOperation( opFile, targetSE, type = 'RegisterReplica' ) self.request.insertAfter( registerOperation, self.operation ) else: self.log.error( "Failed to replicate", "%s to %s" % ( lfn, targetSE ) ) gMonitor.addMark( "ReplicateFail", 1 ) opFile.Error = "Failed to replicate" else: gMonitor.addMark( "ReplicateFail", 1 ) reason = res["Value"]["Failed"][lfn] self.log.error( "Failed to replicate and register", "File %s at %s:" % ( lfn, targetSE ), reason ) opFile.Error = reason else: gMonitor.addMark( "ReplicateFail", 1 ) opFile.Error = "DataManager error: %s" % res["Message"] self.log.error( "DataManager error", res["Message"] ) if not opFile.Error: if len( self.operation.targetSEList ) > 1: self.log.info( "file %s has been replicated to all targetSEs" % lfn ) opFile.Status = "Done" return S_OK()
if __name__ == "__main__": args = Script.getPositionalArgs() if len(args) != 1: Script.showHelp() try: requestID = long(args[0]) except ValueError: DIRAC.gLogger.error("requestID should be an integer") DIRAC.exit(-1) from DIRAC.DataManagementSystem.Client.FTSClient import FTSClient from DIRAC.DataManagementSystem.Client.FTSJob import FTSJob ftsClient = FTSClient() ftsJobs = ftsClient.getFTSJobsForRequest( requestID, list(FTSJob.INITSTATES + FTSJob.TRANSSTATES + FTSJob.FINALSTATES)) if not ftsJobs["OK"]: DIRAC.gLogger.error(ftsJobs["Message"]) DIRAC.exit(-1) ftsJobs = ftsJobs["Value"] if not ftsJobs: DIRAC.gLogger.always("No FTSJobs found for requestID %s" % requestID) DIRAC.exit(0) DIRAC.gLogger.always("Found %s FTSJobs for requestID %s" % (len(ftsJobs), requestID))
args = Script.getPositionalArgs() maxActiveJobs = 50 ftsSite = ftsServer = "" if not len( args ) == 3: Script.showHelp() DIRAC.exit( 0 ) else: ftsSite, ftsServer, maxActiveJobs = args try: maxActiveJobs = int( maxActiveJobs ) except ValueError, error: gLogger.error( error ) DIRAC.exit( -1 ) ftsClient = FTSClient() ftsSites = ftsClient.getFTSSitesList() if not ftsSites["OK"]: gLogger.error( "unable to read FTSSites: %s" % ftsSites["Message"] ) DIRAC.exit( -1 ) ftsSites = ftsSites["Value"] for site in ftsSites: if site.Name == ftsSite: gLogger.error( "FTSSite '%s' is present in FTSDB!!!" % ftsSite ) DIRAC.exit( -1 ) getSites = getSites() if not getSites["OK"]: gLogger.error( "unable to read sites defined in CS!!!" )
from DIRAC.Core.Base import Script Script.setUsageMessage( '\n'.join( [ __doc__, 'Usage:', ' %s [option|cfgfile]' % Script.scriptName ] ) ) from operator import itemgetter if __name__ == "__main__": from DIRAC.Core.Base.Script import parseCommandLine parseCommandLine() import DIRAC from DIRAC import gLogger, gConfig from DIRAC.DataManagementSystem.Client.FTSClient import FTSClient ftsClient = FTSClient() ret = ftsClient.getDBSummary() if not ret["OK"]: gLogger.error( ret["Message"] ) DIRAC.exit( -1 ) ret = ret["Value"] ic = 1 ftsSites = ret.get( "FTSSite", None ) if ftsSites: gLogger.always( "[%d] FTSSites:" % ic ) ic += 1 for ftsSite in ftsSites: gLogger.always( "- %-20s (%s)" % ( ftsSite["Name"], ftsSite["FTSServer"] ) )
Script.setUsageMessage('\n'.join( [__doc__, 'Usage:', ' %s [option|cfgfile]' % Script.scriptName])) from operator import itemgetter if __name__ == "__main__": from DIRAC.Core.Base.Script import parseCommandLine parseCommandLine() import DIRAC from DIRAC import gLogger, gConfig from DIRAC.DataManagementSystem.Client.FTSClient import FTSClient ftsClient = FTSClient() ret = ftsClient.getDBSummary() if not ret["OK"]: gLogger.error(ret["Message"]) DIRAC.exit(-1) ret = ret["Value"] ic = 1 ftsSites = ret.get("FTSSite", None) if ftsSites: gLogger.always("[%d] FTSSites:" % ic) ic += 1 for ftsSite in ftsSites: gLogger.always("- %-20s (%s)" %
def setUp(self): """ test case set up """ gLogger.setLevel('NOTICE') self.ftsSites = [ FTSSite( ftsServer= 'https://fts22-t0-export.cern.ch:8443/glite-data-transfer-fts/services/FileTransfer', name='CERN.ch'), FTSSite( ftsServer= 'https://fts.pic.es:8443/glite-data-transfer-fts/services/FileTransfer', name='PIC.es'), FTSSite( ftsServer= 'https://lcgfts.gridpp.rl.ac.uk:8443/glite-data-transfer-fts/services/FileTransfer', name='RAL.uk'), ] self.ses = ['CERN-USER', 'RAL-USER'] self.statuses = [ 'Submitted', 'Finished', 'FinishedDirty', 'Active', 'Ready' ] self.submitted = 0 self.numberOfJobs = 10 self.opIDs = [] self.ftsJobs = [] for i in xrange(self.numberOfJobs): opID = i % 3 if opID not in self.opIDs: self.opIDs.append(opID) ftsJob = FTSJob() ftsJob.FTSGUID = str(uuid.uuid4()) ftsJob.FTSServer = self.ftsSites[0].FTSServer ftsJob.Status = self.statuses[i % len(self.statuses)] ftsJob.OperationID = opID if ftsJob.Status in FTSJob.FINALSTATES: ftsJob.Completeness = 100 if ftsJob.Status == 'Active': ftsJob.Completeness = 90 ftsJob.SourceSE = self.ses[i % len(self.ses)] ftsJob.TargetSE = 'PIC-USER' ftsJob.RequestID = 12345 ftsFile = FTSFile() ftsFile.FileID = i + 1 ftsFile.OperationID = i + 1 ftsFile.LFN = '/a/b/c/%d' % i ftsFile.Size = 1000000 ftsFile.OperationID = opID ftsFile.SourceSE = ftsJob.SourceSE ftsFile.TargetSE = ftsJob.TargetSE ftsFile.SourceSURL = 'foo://source.bar.baz/%s' % ftsFile.LFN ftsFile.TargetSURL = 'foo://target.bar.baz/%s' % ftsFile.LFN ftsFile.Status = 'Waiting' if ftsJob.Status != 'FinishedDirty' else 'Failed' ftsFile.RequestID = 12345 ftsFile.Checksum = 'addler' ftsFile.ChecksumType = 'adler32' ftsFile.FTSGUID = ftsJob.FTSGUID if ftsJob.Status == 'FinishedDirty': ftsJob.FailedFiles = 1 ftsJob.FailedSize = ftsFile.Size ftsJob.addFile(ftsFile) self.ftsJobs.append(ftsJob) self.submitted = len( [i for i in self.ftsJobs if i.Status == 'Submitted']) self.ftsClient = FTSClient()
class ReplicateAndRegister( DMSRequestOperationsBase ): """ .. class:: ReplicateAndRegister ReplicateAndRegister operation handler """ def __init__( self, operation = None, csPath = None ): """c'tor :param self: self reference :param Operation operation: Operation instance :param str csPath: CS path for this handler """ super( ReplicateAndRegister, self ).__init__( operation, csPath ) # # own gMonitor stuff for files gMonitor.registerActivity( "ReplicateAndRegisterAtt", "Replicate and register attempted", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "ReplicateOK", "Replications successful", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "ReplicateFail", "Replications failed", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "RegisterOK", "Registrations successful", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "RegisterFail", "Registrations failed", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) # # for FTS gMonitor.registerActivity( "FTSScheduleAtt", "Files schedule attempted", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "FTSScheduleOK", "File schedule successful", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) gMonitor.registerActivity( "FTSScheduleFail", "File schedule failed", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM ) # # SE cache self.seCache = {} # Clients self.fc = FileCatalog() self.ftsClient = FTSClient() def __call__( self ): """ call me maybe """ # # check replicas first checkReplicas = self.__checkReplicas() if not checkReplicas["OK"]: self.log.error( checkReplicas["Message"] ) if hasattr( self, "FTSMode" ) and getattr( self, "FTSMode" ): bannedGroups = getattr( self, "FTSBannedGroups" ) if hasattr( self, "FTSBannedGroups" ) else () if self.request.OwnerGroup in bannedGroups: self.log.info( "usage of FTS system is banned for request's owner" ) return self.rmTransfer() return self.ftsTransfer() return self.rmTransfer() def __checkReplicas( self ): """ check done replicas and update file states """ waitingFiles = dict( [ ( opFile.LFN, opFile ) for opFile in self.operation if opFile.Status in ( "Waiting", "Scheduled" ) ] ) targetSESet = set( self.operation.targetSEList ) replicas = self.fc.getReplicas( waitingFiles.keys() ) if not replicas["OK"]: self.log.error( replicas["Message"] ) return replicas reMissing = re.compile( "no such file or directory" ) for failedLFN, errStr in replicas["Value"]["Failed"].items(): waitingFiles[failedLFN].Error = errStr if reMissing.search( errStr.lower() ): self.log.error( "file %s does not exists" % failedLFN ) gMonitor.addMark( "ReplicateFail", len( targetSESet ) ) waitingFiles[failedLFN].Status = "Failed" for successfulLFN, reps in replicas["Value"]["Successful"].items(): if targetSESet.issubset( set( reps ) ): self.log.info( "file %s has been replicated to all targets" % successfulLFN ) waitingFiles[successfulLFN].Status = "Done" return S_OK() def _addMetadataToFiles( self, toSchedule ): """ Add metadata to those files that need to be scheduled through FTS toSchedule is a dictionary: {'lfn1': [opFile, validReplicas, validTargets], 'lfn2': [opFile, validReplicas, validTargets]} """ if toSchedule: self.log.info( "found %s files to schedule, getting metadata from FC" % len( toSchedule ) ) lfns = toSchedule.keys() else: self.log.info( "No files to schedule" ) return S_OK() res = self.fc.getFileMetadata( lfns ) if not res['OK']: return res else: if res['Value']['Failed']: self.log.warn( "Can't schedule %d files: problems getting the metadata: %s" % ( len( res['Value']['Failed'] ), ', '.join( res['Value']['Failed'] ) ) ) metadata = res['Value']['Successful'] filesToScheduleList = [] for lfnsToSchedule, lfnMetadata in metadata.items(): opFileToSchedule = toSchedule[lfnsToSchedule][0] opFileToSchedule.GUID = lfnMetadata['GUID'] opFileToSchedule.Checksum = metadata[lfnsToSchedule]['Checksum'] opFileToSchedule.ChecksumType = metadata[lfnsToSchedule]['CheckSumType'] opFileToSchedule.Size = metadata[lfnsToSchedule]['Size'] filesToScheduleList.append( ( opFileToSchedule.toJSON()['Value'], toSchedule[lfnsToSchedule][1], toSchedule[lfnsToSchedule][2] ) ) return S_OK( filesToScheduleList ) def _filterReplicas( self, opFile ): """ filter out banned/invalid source SEs """ return filterReplicas( opFile, logger = self.log, dataManager = self.dm, seCache = self.seCache ) def ftsTransfer( self ): """ replicate and register using FTS """ self.log.info( "scheduling files in FTS..." ) bannedTargets = self.checkSEsRSS() if not bannedTargets['OK']: gMonitor.addMark( "FTSScheduleAtt" ) gMonitor.addMark( "FTSScheduleFail" ) return bannedTargets if bannedTargets['Value']: return S_OK( "%s targets are banned for writing" % ",".join( bannedTargets['Value'] ) ) # Can continue now self.log.verbose( "No targets banned for writing" ) toSchedule = {} for opFile in self.getWaitingFilesList(): opFile.Error = '' gMonitor.addMark( "FTSScheduleAtt" ) # # check replicas replicas = self._filterReplicas( opFile ) if not replicas["OK"]: continue replicas = replicas["Value"] validReplicas = replicas["Valid"] bannedReplicas = replicas["Banned"] noReplicas = replicas['NoReplicas'] badReplicas = replicas['Bad'] noPFN = replicas['NoPFN'] if not validReplicas: gMonitor.addMark( "FTSScheduleFail" ) if bannedReplicas: self.log.warn( "unable to schedule '%s', replicas only at banned SEs" % opFile.LFN ) elif noReplicas: self.log.error( "unable to schedule %s, file doesn't exist" % opFile.LFN ) opFile.Error = 'No replicas found' opFile.Status = 'Failed' elif badReplicas: self.log.error( "unable to schedule %s, all replicas have a bad checksum" % opFile.LFN ) opFile.Error = 'All replicas have a bad checksum' opFile.Status = 'Failed' elif noPFN: self.log.warn( "unable to schedule %s, could not get a PFN" % opFile.LFN ) else: validTargets = list( set( self.operation.targetSEList ) - set( validReplicas ) ) if not validTargets: self.log.info( "file %s is already present at all targets" % opFile.LFN ) opFile.Status = "Done" else: toSchedule[opFile.LFN] = [ opFile, validReplicas, validTargets ] res = self._addMetadataToFiles( toSchedule ) if not res['OK']: return res else: filesToScheduleList = res['Value'] if filesToScheduleList: ftsSchedule = self.ftsClient.ftsSchedule( self.request.RequestID, self.operation.OperationID, filesToScheduleList ) if not ftsSchedule["OK"]: self.log.error( ftsSchedule["Message"] ) return ftsSchedule # might have nothing to schedule ftsSchedule = ftsSchedule["Value"] if not ftsSchedule: return S_OK() for fileID in ftsSchedule["Successful"]: gMonitor.addMark( "FTSScheduleOK", 1 ) for opFile in self.operation: if fileID == opFile.FileID: opFile.Status = "Scheduled" self.log.debug( "%s has been scheduled for FTS" % opFile.LFN ) self.log.info( "%d files have been scheduled to FTS" % len( ftsSchedule['Successful'] ) ) for fileID in ftsSchedule["Failed"]: gMonitor.addMark( "FTSScheduleFail", 1 ) for opFile in self.operation: if fileID == opFile.FileID: opFile.Error = ftsSchedule["Failed"][fileID] if 'sourceSURL equals to targetSURL' in opFile.Error: # In this case there is no need to continue opFile.Status = 'Failed' self.log.warn( "unable to schedule %s for FTS: %s" % ( opFile.LFN, opFile.Error ) ) else: self.log.info( "No files to schedule after metadata checks" ) # Just in case some transfers could not be scheduled, try them with RM return self.rmTransfer( fromFTS = True ) def rmTransfer( self, fromFTS = False ): """ replicate and register using dataManager """ # # get waiting files. If none just return waitingFiles = self.getWaitingFilesList() if not waitingFiles: return S_OK() if fromFTS: self.log.info( "Trying transfer using replica manager as FTS failed" ) else: self.log.info( "Transferring files using Data manager..." ) # # source SE sourceSE = self.operation.SourceSE if self.operation.SourceSE else None if sourceSE: # # check source se for read sourceRead = self.rssSEStatus( sourceSE, "ReadAccess" ) if not sourceRead["OK"]: self.log.info( sourceRead["Message"] ) for opFile in self.operation: opFile.Error = sourceRead["Message"] self.operation.Error = sourceRead["Message"] gMonitor.addMark( "ReplicateAndRegisterAtt", len( self.operation ) ) gMonitor.addMark( "ReplicateFail", len( self.operation ) ) return sourceRead if not sourceRead["Value"]: self.operation.Error = "SourceSE %s is banned for reading" % sourceSE self.log.info( self.operation.Error ) return S_OK( self.operation.Error ) # # check targetSEs for write bannedTargets = self.checkSEsRSS() if not bannedTargets['OK']: gMonitor.addMark( "ReplicateAndRegisterAtt", len( self.operation ) ) gMonitor.addMark( "ReplicateFail", len( self.operation ) ) return bannedTargets if bannedTargets['Value']: return S_OK( "%s targets are banned for writing" % ",".join( bannedTargets['Value'] ) ) # Can continue now self.log.verbose( "No targets banned for writing" ) # # loop over files for opFile in waitingFiles: gMonitor.addMark( "ReplicateAndRegisterAtt", 1 ) opFile.Error = '' lfn = opFile.LFN # Check if replica is at the specified source replicas = self._filterReplicas( opFile ) if not replicas["OK"]: self.log.error( replicas["Message"] ) continue replicas = replicas["Value"] if not replicas["Valid"]: self.log.warn( "unable to find valid replicas for %s" % lfn ) continue # # get the first one in the list if sourceSE not in replicas['Valid']: if sourceSE: self.log.warn( "%s is not at specified sourceSE %s, changed to %s" % ( lfn, sourceSE, replicas["Valid"][0] ) ) sourceSE = replicas["Valid"][0] # # loop over targetSE catalog = self.operation.Catalog for targetSE in self.operation.targetSEList: # # call DataManager if targetSE == sourceSE: self.log.warn( "Request to replicate %s to the source SE: %s" % ( lfn, sourceSE ) ) continue res = self.dm.replicateAndRegister( lfn, targetSE, sourceSE = sourceSE, catalog = catalog ) if res["OK"]: if lfn in res["Value"]["Successful"]: if "replicate" in res["Value"]["Successful"][lfn]: repTime = res["Value"]["Successful"][lfn]["replicate"] prString = "file %s replicated at %s in %s s." % ( lfn, targetSE, repTime ) gMonitor.addMark( "ReplicateOK", 1 ) if "register" in res["Value"]["Successful"][lfn]: gMonitor.addMark( "RegisterOK", 1 ) regTime = res["Value"]["Successful"][lfn]["register"] prString += ' and registered in %s s.' % regTime self.log.info( prString ) else: gMonitor.addMark( "RegisterFail", 1 ) prString += " but failed to register" self.log.warn( prString ) opFile.Error = "Failed to register" # # add register replica operation registerOperation = self.getRegisterOperation( opFile, targetSE ) self.request.insertAfter( registerOperation, self.operation ) else: self.log.error( "failed to replicate %s to %s." % ( lfn, targetSE ) ) gMonitor.addMark( "ReplicateFail", 1 ) opFile.Error = "Failed to replicate" else: gMonitor.addMark( "ReplicateFail", 1 ) reason = res["Value"]["Failed"][lfn] self.log.error( "failed to replicate and register file %s at %s:" % ( lfn, targetSE ), reason ) opFile.Error = reason else: gMonitor.addMark( "ReplicateFail", 1 ) opFile.Error = "DataManager error: %s" % res["Message"] self.log.error( opFile.Error ) if not opFile.Error: if len( self.operation.targetSEList ) > 1: self.log.info( "file %s has been replicated to all targetSEs" % lfn ) opFile.Status = "Done" return S_OK()
class ReplicateAndRegister(OperationHandlerBase, DMSRequestOperationsBase): """ .. class:: ReplicateAndRegister ReplicateAndRegister operation handler """ def __init__(self, operation=None, csPath=None): """c'tor :param self: self reference :param Operation operation: Operation instance :param str csPath: CS path for this handler """ super(ReplicateAndRegister, self).__init__(operation, csPath) # # own gMonitor stuff for files gMonitor.registerActivity("ReplicateAndRegisterAtt", "Replicate and register attempted", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM) gMonitor.registerActivity("ReplicateOK", "Replications successful", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM) gMonitor.registerActivity("ReplicateFail", "Replications failed", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM) gMonitor.registerActivity("RegisterOK", "Registrations successful", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM) gMonitor.registerActivity("RegisterFail", "Registrations failed", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM) # # for FTS gMonitor.registerActivity("FTSScheduleAtt", "Files schedule attempted", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM) gMonitor.registerActivity("FTSScheduleOK", "File schedule successful", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM) gMonitor.registerActivity("FTSScheduleFail", "File schedule failed", "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM) # # SE cache self.seCache = {} # Clients self.rm = ReplicaManager() self.ftsClient = FTSClient() def __call__(self): """ call me maybe """ # # check replicas first checkReplicas = self.__checkReplicas() if not checkReplicas["OK"]: self.log.error(checkReplicas["Message"]) if hasattr(self, "FTSMode") and getattr(self, "FTSMode"): bannedGroups = getattr(self, "FTSBannedGroups") if hasattr( self, "FTSBannedGroups") else () if self.request.OwnerGroup in bannedGroups: self.log.info( "usage of FTS system is banned for request's owner") return self.rmTransfer() return self.ftsTransfer() return self.rmTransfer() def __checkReplicas(self): """ check done replicas and update file states """ waitingFiles = dict([(opFile.LFN, opFile) for opFile in self.operation if opFile.Status in ("Waiting", "Scheduled")]) targetSESet = set(self.operation.targetSEList) replicas = self.rm.getCatalogReplicas(waitingFiles.keys()) if not replicas["OK"]: self.log.error(replicas["Message"]) return replicas reMissing = re.compile("no such file or directory") for failedLFN, errStr in replicas["Value"]["Failed"].items(): waitingFiles[failedLFN].Error = errStr if reMissing.search(errStr.lower()): self.log.error("file %s does not exists" % failedLFN) gMonitor.addMark("ReplicateFail", len(targetSESet)) waitingFiles[failedLFN].Status = "Failed" for successfulLFN, reps in replicas["Value"]["Successful"].items(): if targetSESet.issubset(set(reps)): self.log.info("file %s has been replicated to all targets" % successfulLFN) waitingFiles[successfulLFN].Status = "Done" return S_OK() def _addMetadataToFiles(self, toSchedule): """ Add metadata to those files that need to be scheduled through FTS toSchedule is a dictionary: {'lfn1': [opFile, validReplicas, validTargets], 'lfn2': [opFile, validReplicas, validTargets]} """ if toSchedule: self.log.info( "found %s files to schedule, getting metadata from FC" % len(toSchedule)) lfns = toSchedule.keys() else: self.log.info("No files to schedule") return S_OK() res = self.rm.getCatalogFileMetadata(lfns) if not res['OK']: return res else: if res['Value']['Failed']: self.log.warn( "Can't schedule %d files: problems getting the metadata: %s" % (len(res['Value']['Failed']), ', '.join( res['Value']['Failed']))) metadata = res['Value']['Successful'] filesToScheduleList = [] for lfnsToSchedule, lfnMetadata in metadata.items(): opFileToSchedule = toSchedule[lfnsToSchedule][0] opFileToSchedule.GUID = lfnMetadata['GUID'] opFileToSchedule.Checksum = metadata[lfnsToSchedule]['Checksum'] opFileToSchedule.ChecksumType = metadata[lfnsToSchedule][ 'CheckSumType'] opFileToSchedule.Size = metadata[lfnsToSchedule]['Size'] filesToScheduleList.append( (opFileToSchedule.toJSON()['Value'], toSchedule[lfnsToSchedule][1], toSchedule[lfnsToSchedule][2])) return S_OK(filesToScheduleList) def _filterReplicas(self, opFile): """ filter out banned/invalid source SEs """ from DIRAC.Core.Utilities.Adler import compareAdler ret = {"Valid": [], "Banned": [], "Bad": []} replicas = self.rm.getActiveReplicas(opFile.LFN) if not replicas["OK"]: self.log.error(replicas["Message"]) reNotExists = re.compile("not such file or directory") replicas = replicas["Value"] failed = replicas["Failed"].get(opFile.LFN, "") if reNotExists.match(failed.lower()): opFile.Status = "Failed" opFile.Error = failed return S_ERROR(failed) replicas = replicas["Successful"][ opFile.LFN] if opFile.LFN in replicas["Successful"] else {} for repSEName in replicas: seRead = self.rssSEStatus(repSEName, "ReadAccess") if not seRead["OK"]: self.log.info(seRead["Message"]) ret["Banned"].append(repSEName) continue if not seRead["Value"]: self.log.info("StorageElement '%s' is banned for reading" % (repSEName)) repSE = self.seCache.get(repSEName, None) if not repSE: repSE = StorageElement(repSEName, "SRM2") self.seCache[repSE] = repSE pfn = repSE.getPfnForLfn(opFile.LFN) if not pfn["OK"]: self.log.warn("unable to create pfn for %s lfn: %s" % (opFile.LFN, pfn["Message"])) ret["Banned"].append(repSEName) continue pfn = pfn["Value"] repSEMetadata = repSE.getFileMetadata(pfn, singleFile=True) if not repSEMetadata["OK"]: self.log.warn(repSEMetadata["Message"]) ret["Banned"].append(repSEName) continue repSEMetadata = repSEMetadata["Value"] seChecksum = repSEMetadata.get("Checksum") if opFile.Checksum and seChecksum and not compareAdler( seChecksum, opFile.Checksum): self.log.warn(" %s checksum mismatch: %s %s:%s" % (opFile.LFN, opFile.Checksum, repSE, seChecksum)) ret["Bad"].append(repSEName) continue # # if we're here repSE is OK ret["Valid"].append(repSEName) return S_OK(ret) def ftsTransfer(self): """ replicate and register using FTS """ self.log.info("scheduling files in FTS...") bannedTargets = self.checkSEsRSS() if not bannedTargets['OK']: gMonitor.addMark("FTSScheduleAtt") gMonitor.addMark("FTSScheduleFail") return bannedTargets if bannedTargets['Value']: return S_OK("%s targets are banned for writing" % ",".join(bannedTargets['Value'])) # Can continue now self.log.verbose("No targets banned for writing") toSchedule = {} for opFile in self.getWaitingFilesList(): opFile.Error = '' gMonitor.addMark("FTSScheduleAtt") # # check replicas replicas = self._filterReplicas(opFile) if not replicas["OK"]: continue replicas = replicas["Value"] if not replicas["Valid"] and replicas["Banned"]: self.log.warn( "unable to schedule '%s', replicas only at banned SEs" % opFile.LFN) gMonitor.addMark("FTSScheduleFail") continue validReplicas = replicas["Valid"] bannedReplicas = replicas["Banned"] if not validReplicas and bannedReplicas: self.log.warn( "unable to schedule '%s', replicas only at banned SEs" % opFile.LFN) gMonitor.addMark("FTSScheduleFail") continue if validReplicas: validTargets = list( set(self.operation.targetSEList) - set(validReplicas)) if not validTargets: self.log.info("file %s is already present at all targets" % opFile.LFN) opFile.Status = "Done" continue toSchedule[opFile.LFN] = [opFile, validReplicas, validTargets] res = self._addMetadataToFiles(toSchedule) if not res['OK']: return res else: filesToScheduleList = res['Value'] if filesToScheduleList: ftsSchedule = self.ftsClient.ftsSchedule( self.request.RequestID, self.operation.OperationID, filesToScheduleList) if not ftsSchedule["OK"]: self.log.error(ftsSchedule["Message"]) return ftsSchedule # might have nothing to schedule ftsSchedule = ftsSchedule["Value"] if not ftsSchedule: return S_OK() for fileID in ftsSchedule["Successful"]: gMonitor.addMark("FTSScheduleOK", 1) for opFile in self.operation: if fileID == opFile.FileID: opFile.Status = "Scheduled" self.log.always("%s has been scheduled for FTS" % opFile.LFN) for fileID, reason in ftsSchedule["Failed"]: gMonitor.addMark("FTSScheduleFail", 1) for opFile in self.operation: if fileID == opFile.FileID: opFile.Error = reason self.log.error("unable to schedule %s for FTS: %s" % (opFile.LFN, opFile.Error)) else: self.log.info("No files to schedule after metadata checks") # Just in case some transfers could not be scheduled, try them with RM return self.rmTransfer(fromFTS=True) def rmTransfer(self, fromFTS=False): """ replicate and register using ReplicaManager """ # # get waiting files. If none just return waitingFiles = self.getWaitingFilesList() if not waitingFiles: return S_OK() if fromFTS: self.log.info( "Trying transfer using replica manager as FTS failed") else: self.log.info("Transferring files using replica manager...") # # source SE sourceSE = self.operation.SourceSE if self.operation.SourceSE else None if sourceSE: # # check source se for read sourceRead = self.rssSEStatus(sourceSE, "ReadAccess") if not sourceRead["OK"]: self.log.info(sourceRead["Message"]) for opFile in self.operation: opFile.Error = sourceRead["Message"] opFile.Status = "Failed" self.operation.Error = sourceRead["Message"] gMonitor.addMark("ReplicateAndRegisterAtt", len(self.operation)) gMonitor.addMark("ReplicateFail", len(self.operation)) return sourceRead if not sourceRead["Value"]: self.operation.Error = "SourceSE %s is banned for reading" % sourceSE self.log.info(self.operation.Error) return S_OK(self.operation.Error) # # check targetSEs for write bannedTargets = self.checkSEsRSS() if not bannedTargets['OK']: gMonitor.addMark("ReplicateAndRegisterAtt", len(self.operation)) gMonitor.addMark("ReplicateFail", len(self.operation)) return bannedTargets if bannedTargets['Value']: return S_OK("%s targets are banned for writing" % ",".join(bannedTargets['Value'])) # Can continue now self.log.verbose("No targets banned for writing") # # loop over files for opFile in waitingFiles: gMonitor.addMark("ReplicateAndRegisterAtt", 1) opFile.Error = '' lfn = opFile.LFN # Check if replica is at the specified source replicas = self._filterReplicas(opFile) if not replicas["OK"]: self.log.error(replicas["Message"]) continue replicas = replicas["Value"] if not replicas["Valid"]: self.log.warn("unable to find valid replicas for %s" % lfn) continue # # get the first one in the list if sourceSE not in replicas['Valid']: if sourceSE: self.log.warn( "%s is not at specified sourceSE %s, changed to %s" % (lfn, sourceSE, replicas["Valid"][0])) sourceSE = replicas["Valid"][0] # # loop over targetSE for targetSE in self.operation.targetSEList: # # call ReplicaManager if targetSE == sourceSE: self.log.warn( "Request to replicate %s to the source SE: %s" % (lfn, sourceSE)) continue res = self.rm.replicateAndRegister(lfn, targetSE, sourceSE=sourceSE) if res["OK"]: if lfn in res["Value"]["Successful"]: if "replicate" in res["Value"]["Successful"][lfn]: repTime = res["Value"]["Successful"][lfn][ "replicate"] prString = "file %s replicated at %s in %s s." % ( lfn, targetSE, repTime) gMonitor.addMark("ReplicateOK", 1) if "register" in res["Value"]["Successful"][lfn]: gMonitor.addMark("RegisterOK", 1) regTime = res["Value"]["Successful"][lfn][ "register"] prString += ' and registered in %s s.' % regTime self.log.info(prString) else: gMonitor.addMark("RegisterFail", 1) prString += " but failed to register" self.log.warn(prString) opFile.Error = "Failed to register" opFile.Status = "Failed" # # add register replica operation registerOperation = self.getRegisterOperation( opFile, targetSE) self.request.insertAfter( registerOperation, self.operation) else: self.log.error("failed to replicate %s to %s." % (lfn, targetSE)) gMonitor.addMark("ReplicateFail", 1) opFile.Error = "Failed to replicate" else: gMonitor.addMark("ReplicateFail", 1) reason = res["Value"]["Failed"][lfn] self.log.error( "failed to replicate and register file %s at %s: %s" % (lfn, targetSE, reason)) opFile.Error = reason else: gMonitor.addMark("ReplicateFail", 1) opFile.Error = "ReplicaManager error: %s" % res["Message"] self.log.error(opFile.Error) if not opFile.Error: if len(self.operation.targetSEList) > 1: self.log.info( "file %s has been replicated to all targetSEs" % lfn) opFile.Status = "Done" return S_OK()