def __validateChecksums( self, se, migratedFileIDs, migratingFiles ): """ Obtain the checksums in the catalog if not present and check against the checksum from the storage """ lfnFileID = {} checksumToObtain = [] for fileID in migratedFileIDs.keys(): if not migratingFiles[fileID]['Checksum']: lfn = migratingFiles[fileID]['LFN'] checksumToObtain.append( lfn ) lfnFileID[lfn] = fileID if checksumToObtain: res = self.ReplicaManager.getCatalogFileMetadata( checksumToObtain ) if not res['OK']: gLogger.error( "[%s] __validateChecksums: Failed to obtain file checksums" % se ) return res for lfn, error in res['Value']['Failed'].items(): gLogger.error( "[%s] __validateChecksums: Failed to get file checksum" % se, "%s %s" % ( lfn, error ) ) for lfn, metadata in res['Value']['Successful'].items(): migratingFiles[lfnFileID[lfn]]['Checksum'] = metadata['CheckSumValue'] mismatchFiles = [] matchFiles = [] checksumMismatches = [] fileRecords = [] for fileID, seChecksum in migratedFileIDs.items(): lfn = migratingFiles[fileID]['LFN'] catalogChecksum = migratingFiles[fileID]['Checksum'] if not seChecksum: gLogger.error( "[%s] __validateChecksums: Storage checksum not available" % se, migratingFiles[fileID]['PFN'] ) elif not compareAdler( seChecksum, catalogChecksum ): gLogger.error( "[%s] __validateChecksums: Storage and catalog checksum mismatch" % se, "%s '%s' '%s'" % ( migratingFiles[fileID]['PFN'], seChecksum, catalogChecksum ) ) mismatchFiles.append( fileID ) pfn = migratingFiles[fileID]['PFN'] se = migratingFiles[fileID]['SE'] checksumMismatches.append( ( lfn, pfn, se, 'CatalogPFNChecksumMismatch' ) ) fileRecords.append( ( lfn, 'Checksum match', '%s@%s' % ( seChecksum, se ), '', 'MigrationMonitoringAgent' ) ) else: fileRecords.append( ( lfn, 'Checksum mismatch', '%s@%s' % ( seChecksum, se ), '', 'MigrationMonitoringAgent' ) ) matchFiles.append( fileID ) # Add the data logging records self.DataLog.addFileRecords( fileRecords ) if checksumMismatches: # Update the (mis)matching checksums (in the integrityDB and) in the migration monitoring db self.__reportProblematicReplicas( checksumMismatches ) res = self.MigrationMonitoringDB.setMigratingReplicaStatus( mismatchFiles, 'ChecksumFail' ) if not res['OK']: gLogger.error( "[%s] __validateChecksums: Failed to update checksum mismatching files." % se, res['Message'] ) if matchFiles: res = self.MigrationMonitoringDB.setMigratingReplicaStatus( matchFiles, 'ChecksumMatch' ) if not res['OK']: gLogger.error( "[%s] __validateChecksums: Failed to update checksum mismatching files." % se, res['Message'] ) resDict = {'MatchingFiles':matchFiles, 'MismatchFiles':mismatchFiles} return S_OK( resDict )
def compareChecksum(self, lfns): """compare the checksum of the file in the FC and the checksum of the physical replicas. Returns a dictionary containing 3 sub-dictionaries: one with files with missing PFN, one with files with all replicas corrupted, and one with files with some replicas corrupted and at least one good replica """ retDict = {'AllReplicasCorrupted': {}, 'SomeReplicasCorrupted': {}, 'MissingReplica': {}, 'MissingAllReplicas': {}, 'NoReplicas': {}} chunkSize = 100 replicas = {} setLfns = set(lfns) cachedLfns = setLfns & set(self.cachedReplicas) for lfn in cachedLfns: replicas[lfn] = self.cachedReplicas[lfn] lfnsLeft = list(setLfns - cachedLfns) if lfnsLeft: self.__write("Get replicas for %d files (chunks of %d): " % (len(lfnsLeft), chunkSize)) for lfnChunk in breakListIntoChunks(lfnsLeft, chunkSize): self.__write('.') replicasRes = self.fileCatalog.getReplicas(lfnChunk) if not replicasRes['OK']: gLogger.error("error: %s" % replicasRes['Message']) return S_ERROR(errno.ENOENT, "error: %s" % replicasRes['Message']) replicasRes = replicasRes['Value'] if replicasRes['Failed']: retDict['NoReplicas'].update(replicasRes['Failed']) replicas.update(replicasRes['Successful']) self.__write("Get FC metadata for %d files to be checked: " % len(lfns)) metadata = {} for lfnChunk in breakListIntoChunks(replicas, chunkSize): self.__write('.') res = self.fileCatalog.getFileMetadata(lfnChunk) if not res['OK']: return S_ERROR(errno.ENOENT, "error %s" % res['Message']) metadata.update(res['Value']['Successful']) gLogger.notice("Check existence and compare checksum file by file...") csDict = {} seFiles = {} # Reverse the LFN->SE dictionary nReps = 0 for lfn in replicas: csDict.setdefault(lfn, {})['LFCChecksum'] = metadata.get( lfn, {}).get('Checksum') for se in replicas[lfn]: seFiles.setdefault(se, []).append(lfn) nReps += 1 gLogger.notice('Getting checksum of %d replicas in %d SEs' % (nReps, len(seFiles))) checkSum = {} lfnNotExisting = {} lfnNoInfo = {} logLevel = gLogger.getLevel() gLogger.setLevel('FATAL') for num, se in enumerate(sorted(seFiles)): self.__write('\n%d. At %s (%d files): ' % (num, se, len(seFiles[se]))) oSe = StorageElement(se) notFound = 0 for surlChunk in breakListIntoChunks(seFiles[se], chunkSize): self.__write('.') metadata = oSe.getFileMetadata(surlChunk) if not metadata['OK']: gLogger.error("Error: getFileMetadata returns %s. Ignore those replicas" % ( metadata['Message'])) # Remove from list of replicas as we don't know whether it is OK or # not for lfn in seFiles[se]: lfnNoInfo.setdefault(lfn, []).append(se) else: metadata = metadata['Value'] notFound += len(metadata['Failed']) for lfn in metadata['Failed']: lfnNotExisting.setdefault(lfn, []).append(se) for lfn in metadata['Successful']: checkSum.setdefault( lfn, {})[se] = metadata['Successful'][lfn]['Checksum'] if notFound: gLogger.error('%d files not found' % notFound) gLogger.setLevel(logLevel) gLogger.notice('Verifying checksum of %d files' % len(replicas)) for lfn in replicas: # get the lfn checksum from the FC replicaDict = replicas[lfn] oneGoodReplica = False allGoodReplicas = True lfcChecksum = csDict[lfn].pop('LFCChecksum') for se in replicaDict: # If replica doesn't exist skip check if se in lfnNotExisting.get(lfn, []): allGoodReplicas = False continue if se in lfnNoInfo.get(lfn, []): # If there is no info, a priori it could be good oneGoodReplica = True continue # get the surls metadata and compare the checksum surlChecksum = checkSum.get(lfn, {}).get(se, '') if not surlChecksum or not compareAdler(lfcChecksum, surlChecksum): # if lfcChecksum does not match surlChecksum csDict[lfn][se] = {'PFNChecksum': surlChecksum} gLogger.info("ERROR!! checksum mismatch at %s for LFN %s: LFC checksum: %s , PFN checksum : %s " % (se, lfn, lfcChecksum, surlChecksum)) allGoodReplicas = False else: oneGoodReplica = True if not oneGoodReplica: if lfn in lfnNotExisting: gLogger.info("=> All replicas are missing", lfn) retDict['MissingAllReplicas'][lfn] = 'All' else: gLogger.info("=> All replicas have bad checksum", lfn) retDict['AllReplicasCorrupted'][lfn] = csDict[lfn] elif not allGoodReplicas: if lfn in lfnNotExisting: gLogger.info("=> At least one replica missing", lfn) retDict['MissingReplica'][lfn] = lfnNotExisting[lfn] else: gLogger.info("=> At least one replica with good Checksum", lfn) retDict['SomeReplicasCorrupted'][lfn] = csDict[lfn] return S_OK(retDict)
def resolveSource( self ): """ resolve source SE eligible for submission :param self: self reference """ toResolve = [ lfn for lfn in self.fileDict ] if not toResolve: return S_OK() res = self.__updateMetadataCache( toResolve ) if not res['OK']: return res res = self.__updateReplicaCache( toResolve ) if not res['OK']: return res for lfn in toResolve: if self.fileDict[lfn].get( "Status", "" ) == "Failed": continue replicas = self.catalogReplicas.get( lfn, {} ) if self.sourceSE not in replicas: gLogger.warn("resolveSource: skipping %s - not replicas at SourceSE %s" % ( lfn, self.sourceSE ) ) self.__setFileParameter( lfn, 'Reason', "No replica at SourceSE" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) continue res = self.oSourceSE.getPfnForProtocol( replicas[self.sourceSE], 'SRM2', withPort = True ) if not res['OK']: gLogger.warn("resolveSource: skipping %s - %s" % ( lfn, res["Message"] ) ) self.__setFileParameter( lfn, 'Reason', res['Message'] ) self.__setFileParameter( lfn, 'Status', 'Failed' ) continue res = self.setSourceSURL( lfn, res['Value'] ) if not res['OK']: gLogger.warn("resolveSource: skipping %s - %s" % ( lfn, res["Message"] ) ) self.__setFileParameter( lfn, 'Reason', res['Message'] ) self.__setFileParameter( lfn, 'Status', 'Failed' ) continue toResolve = {} for lfn in self.fileDict: if "Source" in self.fileDict[lfn]: toResolve[self.fileDict[lfn]['Source']] = lfn if not toResolve: return S_ERROR( "No eligible Source files" ) res = self.oSourceSE.getFileMetadata( toResolve.keys() ) if not res['OK']: return S_ERROR( "Failed to check source file metadata" ) for pfn, error in res['Value']['Failed'].items(): lfn = toResolve[pfn] if re.search( 'File does not exist', error ): gLogger.warn("resolveSource: skipping %s - source file does not exists" % lfn ) self.__setFileParameter( lfn, 'Reason', "Source file does not exist" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) else: gLogger.warn("resolveSource: skipping %s - failed to get source metadata" % lfn ) self.__setFileParameter( lfn, 'Reason', "Failed to get Source metadata" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) for pfn, metadata in res['Value']['Successful'].items(): lfn = toResolve[pfn] if metadata['Unavailable']: gLogger.warn("resolveSource: skipping %s - source file unavailable" % lfn ) self.__setFileParameter( lfn, 'Reason', "Source file Unavailable" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) elif metadata['Lost']: gLogger.warn("resolveSource: skipping %s - source file lost" % lfn ) self.__setFileParameter( lfn, 'Reason', "Source file Lost" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) elif not metadata['Cached']: gLogger.warn("resolveSource: source file %s not cached, prestaging..." % lfn ) stage = self.replicaManager.prestageStorageFile( pfn, self.sourceSE, singleFile = True ) if not stage["OK"]: gLogger.warn("resolveSource: skipping %s - %s" % ( lfn, stage["Message"] ) ) self.__setFileParameter( lfn, 'Reason', stage["Message"] ) self.__setFileParameter( lfn, 'Status', 'Failed' ) elif metadata['Size'] != self.catalogMetadata[lfn]['Size']: gLogger.warn("resolveSource: skipping %s - source file size mismatch" % lfn ) self.__setFileParameter( lfn, 'Reason', "Source size mismatch" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) elif self.catalogMetadata[lfn]['Checksum'] and metadata['Checksum'] and \ not ( compareAdler( metadata['Checksum'], self.catalogMetadata[lfn]['Checksum'] ) ): gLogger.warn("resolveSource: skipping %s - source file checksum mismatch" % lfn ) self.__setFileParameter( lfn, 'Reason', "Source checksum mismatch" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) return S_OK()
def __resolveSource( self ): toResolve = [] for lfn in self.fileDict.keys(): if ( not self.fileDict[lfn].has_key( 'Source' ) ) and ( self.fileDict[lfn].get( 'Status' ) != 'Failed' ): toResolve.append( lfn ) if not toResolve: return S_OK() res = self.__updateMetadataCache( toResolve ) if not res['OK']: return res res = self.__updateReplicaCache( toResolve ) if not res['OK']: return res for lfn in toResolve: if self.fileDict[lfn].get( 'Status' ) == 'Failed': continue replicas = self.catalogReplicas.get( lfn, {} ) if not replicas.has_key( self.sourceSE ): self.__setFileParameter( lfn, 'Reason', "No replica at SourceSE" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) continue res = self.oSourceSE.getPfnForProtocol( replicas[self.sourceSE], 'SRM2', withPort = True ) if not res['OK']: self.__setFileParameter( lfn, 'Reason', res['Message'] ) self.__setFileParameter( lfn, 'Status', 'Failed' ) continue res = self.setSourceSURL( lfn, res['Value'] ) if not res['OK']: self.__setFileParameter( lfn, 'Reason', res['Message'] ) self.__setFileParameter( lfn, 'Status', 'Failed' ) continue toResolve = {} for lfn in self.fileDict.keys(): if self.fileDict[lfn].has_key( 'Source' ): toResolve[self.fileDict[lfn]['Source']] = lfn if not toResolve: return S_ERROR( "No eligible Source files" ) res = self.oSourceSE.getFileMetadata( toResolve.keys() ) if not res['OK']: return S_ERROR( "Failed to check source file metadata" ) for pfn, error in res['Value']['Failed'].items(): lfn = toResolve[pfn] if re.search( 'File does not exist', error ): self.__setFileParameter( lfn, 'Reason', "Source file does not exist" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) else: self.__setFileParameter( lfn, 'Reason', "Failed to get Source metadata" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) for pfn, metadata in res['Value']['Successful'].items(): lfn = toResolve[pfn] if metadata['Unavailable']: self.__setFileParameter( lfn, 'Reason', "Source file Unavailable" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) elif metadata['Lost']: self.__setFileParameter( lfn, 'Reason', "Source file Lost" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) elif not metadata['Cached']: self.__setFileParameter( lfn, 'Reason', "Source file not Cached" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) elif metadata['Size'] != self.catalogMetadata[lfn]['Size']: self.__setFileParameter( lfn, 'Reason', "Source size mismatch" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) elif self.catalogMetadata[lfn]['Checksum'] and metadata['Checksum'] and not ( compareAdler( metadata['Checksum'], self.catalogMetadata[lfn]['Checksum'] ) ): self.__setFileParameter( lfn, 'Reason', "Source checksum mismatch" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) return S_OK()
def resolveSource( self ): """ resolve source SE eligible for submission :param self: self reference """ # Avoid resolving sources twice if self.sourceResolved: return S_OK() # Only resolve files that need a transfer toResolve = [ lfn for lfn in self.fileDict if self.fileDict[lfn].get( "Status", "" ) != "Failed" ] if not toResolve: return S_OK() res = self.__updateMetadataCache( toResolve ) if not res['OK']: return res res = self.__updateReplicaCache( toResolve ) if not res['OK']: return res # Define the source URLs for lfn in toResolve: replicas = self.catalogReplicas.get( lfn, {} ) if self.sourceSE not in replicas: gLogger.warn( "resolveSource: skipping %s - not replicas at SourceSE %s" % ( lfn, self.sourceSE ) ) self.__setFileParameter( lfn, 'Reason', "No replica at SourceSE" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) continue res = returnSingleResult( self.oSourceSE.getURL( lfn, protocol = 'srm' ) ) if not res['OK']: gLogger.warn( "resolveSource: skipping %s - %s" % ( lfn, res["Message"] ) ) self.__setFileParameter( lfn, 'Reason', res['Message'] ) self.__setFileParameter( lfn, 'Status', 'Failed' ) continue res = self.setSourceSURL( lfn, res['Value'] ) if not res['OK']: gLogger.warn( "resolveSource: skipping %s - %s" % ( lfn, res["Message"] ) ) self.__setFileParameter( lfn, 'Reason', res['Message'] ) self.__setFileParameter( lfn, 'Status', 'Failed' ) continue toResolve = [] for lfn in self.fileDict: if "Source" in self.fileDict[lfn]: toResolve.append( lfn ) if not toResolve: return S_ERROR( "No eligible Source files" ) # Get metadata of the sources, to check for existance, availability and caching res = self.oSourceSE.getFileMetadata( toResolve ) if not res['OK']: return S_ERROR( "Failed to check source file metadata" ) for lfn, error in res['Value']['Failed'].items(): if re.search( 'File does not exist', error ): gLogger.warn( "resolveSource: skipping %s - source file does not exists" % lfn ) self.__setFileParameter( lfn, 'Reason', "Source file does not exist" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) else: gLogger.warn( "resolveSource: skipping %s - failed to get source metadata" % lfn ) self.__setFileParameter( lfn, 'Reason', "Failed to get Source metadata" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) toStage = [] nbStagedFiles = 0 for lfn, metadata in res['Value']['Successful'].items(): lfnStatus = self.fileDict.get( lfn, {} ).get( 'Status' ) if metadata.get( 'Unavailable', False ): gLogger.warn( "resolveSource: skipping %s - source file unavailable" % lfn ) self.__setFileParameter( lfn, 'Reason', "Source file Unavailable" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) elif metadata.get( 'Lost', False ): gLogger.warn( "resolveSource: skipping %s - source file lost" % lfn ) self.__setFileParameter( lfn, 'Reason', "Source file Lost" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) elif not metadata.get( 'Cached', metadata['Accessible'] ): if lfnStatus != 'Staging': toStage.append( lfn ) elif metadata['Size'] != self.catalogMetadata[lfn]['Size']: gLogger.warn( "resolveSource: skipping %s - source file size mismatch" % lfn ) self.__setFileParameter( lfn, 'Reason', "Source size mismatch" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) elif self.catalogMetadata[lfn]['Checksum'] and metadata['Checksum'] and \ not compareAdler( metadata['Checksum'], self.catalogMetadata[lfn]['Checksum'] ): gLogger.warn( "resolveSource: skipping %s - source file checksum mismatch" % lfn ) self.__setFileParameter( lfn, 'Reason', "Source checksum mismatch" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) elif lfnStatus == 'Staging': # file that was staging is now cached self.__setFileParameter( lfn, 'Status', 'Waiting' ) nbStagedFiles += 1 # Some files were being staged if nbStagedFiles: self.log.info( 'resolveSource: %d files have been staged' % nbStagedFiles ) # Launching staging of files not in cache if toStage: gLogger.warn( "resolveSource: %s source files not cached, prestaging..." % len( toStage ) ) stage = self.oSourceSE.prestageFile( toStage ) if not stage["OK"]: gLogger.error( "resolveSource: error is prestaging", stage["Message"] ) for lfn in toStage: self.__setFileParameter( lfn, 'Reason', stage["Message"] ) self.__setFileParameter( lfn, 'Status', 'Failed' ) else: for lfn in toStage: if lfn in stage['Value']['Successful']: self.__setFileParameter( lfn, 'Status', 'Staging' ) elif lfn in stage['Value']['Failed']: self.__setFileParameter( lfn, 'Reason', stage['Value']['Failed'][lfn] ) self.__setFileParameter( lfn, 'Status', 'Failed' ) self.sourceResolved = True return S_OK()
def filterReplicas( opFile, logger = None, dataManager = None, seCache = None ): """ filter out banned/invalid source SEs """ from DIRAC.Core.Utilities.Adler import compareAdler if not logger: logger = gLogger if not dataManager: dataManager = DataManager() if not seCache: seCache = {} log = logger.getSubLogger( "filterReplicas" ) ret = { "Valid" : [], "Banned" : [], "Bad" : [], 'NoReplicas':[], 'NoPFN':[] } replicas = dataManager.getActiveReplicas( opFile.LFN ) if not replicas["OK"]: log.error( replicas["Message"] ) return replicas reNotExists = re.compile( "not such file or directory" ) replicas = replicas["Value"] failed = replicas["Failed"].get( opFile.LFN , "" ) if reNotExists.match( failed.lower() ): opFile.Status = "Failed" opFile.Error = failed return S_ERROR( failed ) replicas = replicas["Successful"].get( opFile.LFN, {} ) for repSEName in replicas: repSE = seCache[repSEName] if repSEName in seCache else \ seCache.setdefault( repSEName, StorageElement( repSEName ) ) pfn = repSE.getPfnForLfn( opFile.LFN ) if not pfn["OK"] or opFile.LFN not in pfn['Value']['Successful']: log.warn( "unable to create pfn for %s lfn at %s: %s" % ( opFile.LFN, repSEName, pfn.get( 'Message', pfn.get( 'Value', {} ).get( 'Failed', {} ).get( opFile.LFN ) ) ) ) ret["NoPFN"].append( repSEName ) else: pfn = pfn["Value"]['Successful'][ opFile.LFN ] repSEMetadata = repSE.getFileMetadata( pfn ) error = repSEMetadata.get( 'Message', repSEMetadata.get( 'Value', {} ).get( 'Failed', {} ).get( pfn ) ) if error: log.warn( 'unable to get metadata at %s for %s' % ( repSEName, opFile.LFN ), error ) if 'File does not exist' in error: ret['NoReplicas'].append( repSEName ) else: log.verbose( "StorageElement '%s' is banned for reading" % ( repSEName ) ) ret["Banned"].append( repSEName ) else: repSEMetadata = repSEMetadata['Value']['Successful'][pfn] seChecksum = repSEMetadata.get( "Checksum" ) if opFile.Checksum and seChecksum and not compareAdler( seChecksum, opFile.Checksum ) : log.warn( " %s checksum mismatch: %s %s:%s" % ( opFile.LFN, opFile.Checksum, repSE, seChecksum ) ) ret["Bad"].append( repSEName ) else: # # if we're here repSE is OK ret["Valid"].append( repSEName ) return S_OK( ret )
def _filterReplicas(self, opFile): """ filter out banned/invalid source SEs """ from DIRAC.Core.Utilities.Adler import compareAdler ret = {"Valid": [], "Banned": [], "Bad": []} replicas = self.rm.getActiveReplicas(opFile.LFN) if not replicas["OK"]: self.log.error(replicas["Message"]) reNotExists = re.compile("not such file or directory") replicas = replicas["Value"] failed = replicas["Failed"].get(opFile.LFN, "") if reNotExists.match(failed.lower()): opFile.Status = "Failed" opFile.Error = failed return S_ERROR(failed) replicas = replicas["Successful"][ opFile.LFN] if opFile.LFN in replicas["Successful"] else {} for repSEName in replicas: seRead = self.rssSEStatus(repSEName, "ReadAccess") if not seRead["OK"]: self.log.info(seRead["Message"]) ret["Banned"].append(repSEName) continue if not seRead["Value"]: self.log.info("StorageElement '%s' is banned for reading" % (repSEName)) repSE = self.seCache.get(repSEName, None) if not repSE: repSE = StorageElement(repSEName, "SRM2") self.seCache[repSE] = repSE pfn = repSE.getPfnForLfn(opFile.LFN) if not pfn["OK"]: self.log.warn("unable to create pfn for %s lfn: %s" % (opFile.LFN, pfn["Message"])) ret["Banned"].append(repSEName) continue pfn = pfn["Value"] repSEMetadata = repSE.getFileMetadata(pfn, singleFile=True) if not repSEMetadata["OK"]: self.log.warn(repSEMetadata["Message"]) ret["Banned"].append(repSEName) continue repSEMetadata = repSEMetadata["Value"] seChecksum = repSEMetadata.get("Checksum") if opFile.Checksum and seChecksum and not compareAdler( seChecksum, opFile.Checksum): self.log.warn(" %s checksum mismatch: %s %s:%s" % (opFile.LFN, opFile.Checksum, repSE, seChecksum)) ret["Bad"].append(repSEName) continue # # if we're here repSE is OK ret["Valid"].append(repSEName) return S_OK(ret)
def compareChecksum(self, lfns): """compare the checksum of the file in the FC and the checksum of the physical replicas. Returns a dictionary containing 3 sub-dictionaries: one with files with missing PFN, one with files with all replicas corrupted, and one with files with some replicas corrupted and at least one good replica """ retDict = { "AllReplicasCorrupted": {}, "SomeReplicasCorrupted": {}, "MissingReplica": {}, "MissingAllReplicas": {}, "NoReplicas": {}, } chunkSize = 100 replicas = {} setLfns = set(lfns) cachedLfns = setLfns & set(self.cachedReplicas) for lfn in cachedLfns: replicas[lfn] = self.cachedReplicas[lfn] lfnsLeft = list(setLfns - cachedLfns) if lfnsLeft: self.__write("Get replicas for %d files (chunks of %d): " % (len(lfnsLeft), chunkSize)) for lfnChunk in breakListIntoChunks(lfnsLeft, chunkSize): self.__write(".") replicasRes = self.fileCatalog.getReplicas(lfnChunk) if not replicasRes["OK"]: gLogger.error("error: %s" % replicasRes["Message"]) return S_ERROR(errno.ENOENT, "error: %s" % replicasRes["Message"]) replicasRes = replicasRes["Value"] if replicasRes["Failed"]: retDict["NoReplicas"].update(replicasRes["Failed"]) replicas.update(replicasRes["Successful"]) self.__write("Get FC metadata for %d files to be checked: " % len(lfns)) metadata = {} for lfnChunk in breakListIntoChunks(replicas, chunkSize): self.__write(".") res = self.fileCatalog.getFileMetadata(lfnChunk) if not res["OK"]: return S_ERROR(errno.ENOENT, "error %s" % res["Message"]) metadata.update(res["Value"]["Successful"]) gLogger.notice("Check existence and compare checksum file by file...") csDict = {} seFiles = {} # Reverse the LFN->SE dictionary nReps = 0 for lfn in replicas: csDict.setdefault(lfn, {})["FCChecksum"] = metadata.get( lfn, {}).get("Checksum") for se in replicas[lfn]: seFiles.setdefault(se, []).append(lfn) nReps += 1 gLogger.notice("Getting checksum of %d replicas in %d SEs" % (nReps, len(seFiles))) checkSum = {} lfnNotExisting = {} lfnNoInfo = {} logLevel = gLogger.getLevel() gLogger.setLevel("FATAL") for num, se in enumerate(sorted(seFiles)): self.__write("\n%d. At %s (%d files): " % (num, se, len(seFiles[se]))) oSe = StorageElement(se) notFound = 0 for surlChunk in breakListIntoChunks(seFiles[se], chunkSize): self.__write(".") metadata = oSe.getFileMetadata(surlChunk) if not metadata["OK"]: gLogger.error( "Error: getFileMetadata returns %s. Ignore those replicas" % (metadata["Message"])) # Remove from list of replicas as we don't know whether it is OK or # not for lfn in seFiles[se]: lfnNoInfo.setdefault(lfn, []).append(se) else: metadata = metadata["Value"] notFound += len(metadata["Failed"]) for lfn in metadata["Failed"]: lfnNotExisting.setdefault(lfn, []).append(se) for lfn in metadata["Successful"]: checkSum.setdefault( lfn, {})[se] = metadata["Successful"][lfn]["Checksum"] if notFound: gLogger.error("%d files not found" % notFound) gLogger.setLevel(logLevel) gLogger.notice("Verifying checksum of %d files" % len(replicas)) for lfn in replicas: # get the lfn checksum from the FC replicaDict = replicas[lfn] oneGoodReplica = False allGoodReplicas = True fcChecksum = csDict[lfn].pop("FCChecksum") for se in replicaDict: # If replica doesn't exist skip check if se in lfnNotExisting.get(lfn, []): allGoodReplicas = False continue if se in lfnNoInfo.get(lfn, []): # If there is no info, a priori it could be good oneGoodReplica = True continue # get the surls metadata and compare the checksum surlChecksum = checkSum.get(lfn, {}).get(se, "") if not surlChecksum or not compareAdler( fcChecksum, surlChecksum): # if fcChecksum does not match surlChecksum csDict[lfn][se] = {"PFNChecksum": surlChecksum} gLogger.info( "ERROR!! checksum mismatch at %s for LFN %s: FC checksum: %s , PFN checksum : %s " % (se, lfn, fcChecksum, surlChecksum)) allGoodReplicas = False else: oneGoodReplica = True if not oneGoodReplica: if lfn in lfnNotExisting: gLogger.info("=> All replicas are missing", lfn) retDict["MissingAllReplicas"][lfn] = "All" else: gLogger.info("=> All replicas have bad checksum", lfn) retDict["AllReplicasCorrupted"][lfn] = csDict[lfn] elif not allGoodReplicas: if lfn in lfnNotExisting: gLogger.info("=> At least one replica missing", lfn) retDict["MissingReplica"][lfn] = lfnNotExisting[lfn] else: gLogger.info("=> At least one replica with good Checksum", lfn) retDict["SomeReplicasCorrupted"][lfn] = csDict[lfn] return S_OK(retDict)
def filterReplicas( opFile, logger = None, dataManager = None ): """ filter out banned/invalid source SEs """ if logger is None: logger = gLogger if dataManager is None: dataManager = DataManager() log = logger.getSubLogger( "filterReplicas" ) ret = { "Valid" : [], "NoMetadata" : [], "Bad" : [], 'NoReplicas':[], 'NoPFN':[] } replicas = dataManager.getActiveReplicas( opFile.LFN ) if not replicas["OK"]: log.error( 'Failed to get active replicas', replicas["Message"] ) return replicas reNotExists = re.compile( r".*such file.*" ) replicas = replicas["Value"] failed = replicas["Failed"].get( opFile.LFN , "" ) if reNotExists.match( failed.lower() ): opFile.Status = "Failed" opFile.Error = failed return S_ERROR( failed ) replicas = replicas["Successful"].get( opFile.LFN, {} ) if not opFile.Checksum: # Set Checksum to FC checksum if not set in the request fcMetadata = FileCatalog().getFileMetadata( opFile.LFN ) fcChecksum = fcMetadata.get( 'Value', {} ).get( 'Successful', {} ).get( opFile.LFN, {} ).get( 'Checksum', '' ) # Replace opFile.Checksum if it doesn't match a valid FC checksum if fcChecksum: opFile.Checksum = fcChecksum opFile.ChecksumType = fcMetadata['Value']['Successful'][opFile.LFN].get( 'ChecksumType', 'Adler32' ) for repSEName in replicas: repSE = StorageElement( repSEName ) repSEMetadata = repSE.getFileMetadata( opFile.LFN ) error = repSEMetadata.get( 'Message', repSEMetadata.get( 'Value', {} ).get( 'Failed', {} ).get( opFile.LFN ) ) if error: log.warn( 'unable to get metadata at %s for %s' % ( repSEName, opFile.LFN ), error.replace( '\n', '' ) ) if 'File does not exist' in error: ret['NoReplicas'].append( repSEName ) else: ret["NoMetadata"].append( repSEName ) else: repSEMetadata = repSEMetadata['Value']['Successful'][opFile.LFN] seChecksum = repSEMetadata.get( "Checksum" ) if ( opFile.Checksum and seChecksum and compareAdler( seChecksum, opFile.Checksum ) ) or\ ( not opFile.Checksum and not seChecksum ): # # All checksums are OK ret["Valid"].append( repSEName ) else: log.warn( " %s checksum mismatch, FC: '%s' @%s: '%s'" % ( opFile.LFN, opFile.Checksum, repSEName, seChecksum ) ) ret["Bad"].append( repSEName ) return S_OK( ret )
def filterReplicas( opFile, logger = None, dataManager = None, seCache = None ): """ filter out banned/invalid source SEs """ if not logger: logger = gLogger if not dataManager: dataManager = DataManager() if not seCache: seCache = {} log = logger.getSubLogger( "filterReplicas" ) ret = { "Valid" : [], "NoMetadata" : [], "Bad" : [], 'NoReplicas':[], 'NoPFN':[] } replicas = dataManager.getActiveReplicas( opFile.LFN ) if not replicas["OK"]: log.error( replicas["Message"] ) return replicas reNotExists = re.compile( r".*such file.*" ) replicas = replicas["Value"] failed = replicas["Failed"].get( opFile.LFN , "" ) if reNotExists.match( failed.lower() ): opFile.Status = "Failed" opFile.Error = failed return S_ERROR( failed ) replicas = replicas["Successful"].get( opFile.LFN, {} ) for repSEName in replicas: repSE = seCache[repSEName] if repSEName in seCache else \ seCache.setdefault( repSEName, StorageElement( repSEName ) ) pfn = repSE.getPfnForLfn( opFile.LFN ) if not pfn["OK"] or opFile.LFN not in pfn['Value']['Successful']: log.warn( "unable to create pfn for %s lfn at %s: %s" % ( opFile.LFN, repSEName, pfn.get( 'Message', pfn.get( 'Value', {} ).get( 'Failed', {} ).get( opFile.LFN ) ) ) ) ret["NoPFN"].append( repSEName ) else: pfn = pfn["Value"]['Successful'][ opFile.LFN ] repSEMetadata = repSE.getFileMetadata( pfn ) error = repSEMetadata.get( 'Message', repSEMetadata.get( 'Value', {} ).get( 'Failed', {} ).get( pfn ) ) if error: log.warn( 'unable to get metadata at %s for %s' % ( repSEName, opFile.LFN ), error.replace( '\n', '' ) ) if 'File does not exist' in error: ret['NoReplicas'].append( repSEName ) else: ret["NoMetadata"].append( repSEName ) else: repSEMetadata = repSEMetadata['Value']['Successful'][pfn] seChecksum = repSEMetadata.get( "Checksum" ) if opFile.Checksum and seChecksum and not compareAdler( seChecksum, opFile.Checksum ) : # The checksum in the request may be wrong, check with FC fcMetadata = FileCatalog().getFileMetadata( opFile.LFN ) fcChecksum = fcMetadata.get( 'Value', {} ).get( 'Successful', {} ).get( opFile.LFN, {} ).get( 'Checksum' ) if fcChecksum and fcChecksum != opFile.Checksum and compareAdler( fcChecksum , seChecksum ): opFile.Checksum = fcChecksum ret['Valid'].append( repSEName ) else: log.warn( " %s checksum mismatch, request: %s @%s: %s" % ( opFile.LFN, opFile.Checksum, repSEName, seChecksum ) ) ret["Bad"].append( repSEName ) else: # # if we're here repSE is OK ret["Valid"].append( repSEName ) return S_OK( ret )
def resolveSource(self): """ resolve source SE eligible for submission :param self: self reference """ toResolve = [lfn for lfn in self.fileDict] if not toResolve: return S_OK() res = self.__updateMetadataCache(toResolve) if not res['OK']: return res res = self.__updateReplicaCache(toResolve) if not res['OK']: return res for lfn in toResolve: if self.fileDict[lfn].get("Status", "") == "Failed": continue replicas = self.catalogReplicas.get(lfn, {}) if self.sourceSE not in replicas: gLogger.warn( "resolveSource: skipping %s - not replicas at SourceSE %s" % (lfn, self.sourceSE)) self.__setFileParameter(lfn, 'Reason', "No replica at SourceSE") self.__setFileParameter(lfn, 'Status', 'Failed') continue res = self.oSourceSE.getPfnForProtocol(replicas[self.sourceSE], 'SRM2', withPort=True) if not res['OK']: gLogger.warn("resolveSource: skipping %s - %s" % (lfn, res["Message"])) self.__setFileParameter(lfn, 'Reason', res['Message']) self.__setFileParameter(lfn, 'Status', 'Failed') continue res = self.setSourceSURL(lfn, res['Value']) if not res['OK']: gLogger.warn("resolveSource: skipping %s - %s" % (lfn, res["Message"])) self.__setFileParameter(lfn, 'Reason', res['Message']) self.__setFileParameter(lfn, 'Status', 'Failed') continue toResolve = {} for lfn in self.fileDict: if "Source" in self.fileDict[lfn]: toResolve[self.fileDict[lfn]['Source']] = lfn if not toResolve: return S_ERROR("No eligible Source files") res = self.oSourceSE.getFileMetadata(toResolve.keys()) if not res['OK']: return S_ERROR("Failed to check source file metadata") for pfn, error in res['Value']['Failed'].items(): lfn = toResolve[pfn] if re.search('File does not exist', error): gLogger.warn( "resolveSource: skipping %s - source file does not exists" % lfn) self.__setFileParameter(lfn, 'Reason', "Source file does not exist") self.__setFileParameter(lfn, 'Status', 'Failed') else: gLogger.warn( "resolveSource: skipping %s - failed to get source metadata" % lfn) self.__setFileParameter(lfn, 'Reason', "Failed to get Source metadata") self.__setFileParameter(lfn, 'Status', 'Failed') for pfn, metadata in res['Value']['Successful'].items(): lfn = toResolve[pfn] if metadata['Unavailable']: gLogger.warn( "resolveSource: skipping %s - source file unavailable" % lfn) self.__setFileParameter(lfn, 'Reason', "Source file Unavailable") self.__setFileParameter(lfn, 'Status', 'Failed') elif metadata['Lost']: gLogger.warn("resolveSource: skipping %s - source file lost" % lfn) self.__setFileParameter(lfn, 'Reason', "Source file Lost") self.__setFileParameter(lfn, 'Status', 'Failed') elif not metadata['Cached']: gLogger.warn( "resolveSource: source file %s not cached, prestaging..." % lfn) stage = self.replicaManager.prestageStorageFile( pfn, self.sourceSE, singleFile=True) if not stage["OK"]: gLogger.warn("resolveSource: skipping %s - %s" % (lfn, stage["Message"])) self.__setFileParameter(lfn, 'Reason', stage["Message"]) self.__setFileParameter(lfn, 'Status', 'Failed') elif metadata['Size'] != self.catalogMetadata[lfn]['Size']: gLogger.warn( "resolveSource: skipping %s - source file size mismatch" % lfn) self.__setFileParameter(lfn, 'Reason', "Source size mismatch") self.__setFileParameter(lfn, 'Status', 'Failed') elif self.catalogMetadata[lfn]['Checksum'] and metadata['Checksum'] and \ not ( compareAdler( metadata['Checksum'], self.catalogMetadata[lfn]['Checksum'] ) ): gLogger.warn( "resolveSource: skipping %s - source file checksum mismatch" % lfn) self.__setFileParameter(lfn, 'Reason', "Source checksum mismatch") self.__setFileParameter(lfn, 'Status', 'Failed') return S_OK()
def filterReplicas(opFile, logger=None, dataManager=None): """ filter out banned/invalid source SEs """ if logger is None: logger = gLogger if dataManager is None: dataManager = DataManager() log = logger.getSubLogger("filterReplicas") ret = {"Valid": [], "NoMetadata": [], "Bad": [], "NoReplicas": [], "NoPFN": []} replicas = dataManager.getActiveReplicas(opFile.LFN) if not replicas["OK"]: log.error("Failed to get active replicas", replicas["Message"]) return replicas reNotExists = re.compile(r".*such file.*") replicas = replicas["Value"] failed = replicas["Failed"].get(opFile.LFN, "") if reNotExists.match(failed.lower()): opFile.Status = "Failed" opFile.Error = failed return S_ERROR(failed) replicas = replicas["Successful"].get(opFile.LFN, {}) noReplicas = False if not replicas: allReplicas = dataManager.getReplicas(opFile.LFN) if allReplicas["OK"]: allReplicas = allReplicas["Value"]["Successful"].get(opFile.LFN, {}) if not allReplicas: ret["NoReplicas"].append(None) noReplicas = True else: # We try inactive replicas to see if maybe the file doesn't exist at all replicas = allReplicas log.warn("File has no%s replica in File Catalog" % ("" if noReplicas else " active"), opFile.LFN) else: return allReplicas if not opFile.Checksum: # Set Checksum to FC checksum if not set in the request fcMetadata = FileCatalog().getFileMetadata(opFile.LFN) fcChecksum = fcMetadata.get("Value", {}).get("Successful", {}).get(opFile.LFN, {}).get("Checksum") # Replace opFile.Checksum if it doesn't match a valid FC checksum if fcChecksum: opFile.Checksum = fcChecksum opFile.ChecksumType = fcMetadata["Value"]["Successful"][opFile.LFN].get("ChecksumType", "Adler32") for repSEName in replicas: repSEMetadata = StorageElement(repSEName).getFileMetadata(opFile.LFN) error = repSEMetadata.get("Message", repSEMetadata.get("Value", {}).get("Failed", {}).get(opFile.LFN)) if error: log.warn("unable to get metadata at %s for %s" % (repSEName, opFile.LFN), error.replace("\n", "")) if "File does not exist" in error: ret["NoReplicas"].append(repSEName) else: ret["NoMetadata"].append(repSEName) elif not noReplicas: repSEMetadata = repSEMetadata["Value"]["Successful"][opFile.LFN] seChecksum = repSEMetadata.get("Checksum") if not seChecksum and opFile.Checksum: opFile.Checksum = None opFile.ChecksumType = None elif seChecksum and not opFile.Checksum: opFile.Checksum = seChecksum if not opFile.Checksum or not seChecksum or compareAdler(seChecksum, opFile.Checksum): # # All checksums are OK ret["Valid"].append(repSEName) else: log.warn( " %s checksum mismatch, FC: '%s' @%s: '%s'" % (opFile.LFN, opFile.Checksum, repSEName, seChecksum) ) ret["Bad"].append(repSEName) else: # If a replica was found somewhere, don't set the file as no replicas ret["NoReplicas"] = [] return S_OK(ret)
def filterReplicas( opFile, logger = None, dataManager = None ): """ filter out banned/invalid source SEs """ if logger is None: logger = gLogger if dataManager is None: dataManager = DataManager() log = logger.getSubLogger( "filterReplicas" ) ret = { "Valid" : [], "NoMetadata" : [], "Bad" : [], 'NoReplicas':[], 'NoPFN':[] } replicas = dataManager.getActiveReplicas( opFile.LFN ) if not replicas["OK"]: log.error( 'Failed to get active replicas', replicas["Message"] ) return replicas reNotExists = re.compile( r".*such file.*" ) replicas = replicas["Value"] failed = replicas["Failed"].get( opFile.LFN , "" ) if reNotExists.match( failed.lower() ): opFile.Status = "Failed" opFile.Error = failed return S_ERROR( failed ) replicas = replicas["Successful"].get( opFile.LFN, {} ) noReplicas = False if not replicas: allReplicas = dataManager.getReplicas( opFile.LFN ) if allReplicas['OK']: allReplicas = allReplicas['Value']['Successful'].get( opFile.LFN, {} ) if not allReplicas: ret['NoReplicas'].append( None ) noReplicas = True else: # We try inactive replicas to see if maybe the file doesn't exist at all replicas = allReplicas log.warn( "File has no%s replica in File Catalog" % ( '' if noReplicas else ' active' ), opFile.LFN ) else: return allReplicas if not opFile.Checksum or hexAdlerToInt( opFile.Checksum ) == False: # Set Checksum to FC checksum if not set in the request fcMetadata = FileCatalog().getFileMetadata( opFile.LFN ) fcChecksum = fcMetadata.get( 'Value', {} ).get( 'Successful', {} ).get( opFile.LFN, {} ).get( 'Checksum' ) # Replace opFile.Checksum if it doesn't match a valid FC checksum if fcChecksum: if hexAdlerToInt( fcChecksum ) != False: opFile.Checksum = fcChecksum opFile.ChecksumType = fcMetadata['Value']['Successful'][opFile.LFN].get( 'ChecksumType', 'Adler32' ) else: opFile.Checksum = None for repSEName in replicas: repSEMetadata = StorageElement( repSEName ).getFileMetadata( opFile.LFN ) error = repSEMetadata.get( 'Message', repSEMetadata.get( 'Value', {} ).get( 'Failed', {} ).get( opFile.LFN ) ) if error: log.warn( 'unable to get metadata at %s for %s' % ( repSEName, opFile.LFN ), error.replace( '\n', '' ) ) if 'File does not exist' in error: ret['NoReplicas'].append( repSEName ) else: ret["NoMetadata"].append( repSEName ) elif not noReplicas: repSEMetadata = repSEMetadata['Value']['Successful'][opFile.LFN] seChecksum = hexAdlerToInt( repSEMetadata.get( "Checksum" ) ) if seChecksum == False and opFile.Checksum: ret['NoMetadata'].append( repSEName ) elif not seChecksum and opFile.Checksum: opFile.Checksum = None opFile.ChecksumType = None elif seChecksum and ( not opFile.Checksum or opFile.Checksum == 'False' ): # Use the SE checksum and force type to be Adler32 opFile.Checksum = seChecksum opFile.ChecksumType = 'Adler32' if not opFile.Checksum or not seChecksum or compareAdler( seChecksum, opFile.Checksum ): # # All checksums are OK ret["Valid"].append( repSEName ) else: log.warn( " %s checksum mismatch, FC: '%s' @%s: '%s'" % ( opFile.LFN, opFile.Checksum, repSEName, seChecksum ) ) ret["Bad"].append( repSEName ) else: # If a replica was found somewhere, don't set the file as no replicas ret['NoReplicas'] = [] return S_OK( ret )
def __validateChecksums(self, se, migratedFileIDs, migratingFiles): """ Obtain the checksums in the catalog if not present and check against the checksum from the storage """ lfnFileID = {} checksumToObtain = [] for fileID in migratedFileIDs.keys(): if not migratingFiles[fileID]['Checksum']: lfn = migratingFiles[fileID]['LFN'] checksumToObtain.append(lfn) lfnFileID[lfn] = fileID if checksumToObtain: res = self.ReplicaManager.getCatalogFileMetadata(checksumToObtain) if not res['OK']: gLogger.error( "[%s] __validateChecksums: Failed to obtain file checksums" % se) return res for lfn, error in res['Value']['Failed'].items(): gLogger.error( "[%s] __validateChecksums: Failed to get file checksum" % se, "%s %s" % (lfn, error)) for lfn, metadata in res['Value']['Successful'].items(): migratingFiles[ lfnFileID[lfn]]['Checksum'] = metadata['CheckSumValue'] mismatchFiles = [] matchFiles = [] checksumMismatches = [] fileRecords = [] for fileID, seChecksum in migratedFileIDs.items(): lfn = migratingFiles[fileID]['LFN'] catalogChecksum = migratingFiles[fileID]['Checksum'] if not seChecksum: gLogger.error( "[%s] __validateChecksums: Storage checksum not available" % se, migratingFiles[fileID]['PFN']) elif not compareAdler(seChecksum, catalogChecksum): gLogger.error( "[%s] __validateChecksums: Storage and catalog checksum mismatch" % se, "%s '%s' '%s'" % (migratingFiles[fileID]['PFN'], seChecksum, catalogChecksum)) mismatchFiles.append(fileID) pfn = migratingFiles[fileID]['PFN'] se = migratingFiles[fileID]['SE'] checksumMismatches.append( (lfn, pfn, se, 'CatalogPFNChecksumMismatch')) fileRecords.append( (lfn, 'Checksum match', '%s@%s' % (seChecksum, se), '', 'MigrationMonitoringAgent')) else: fileRecords.append( (lfn, 'Checksum mismatch', '%s@%s' % (seChecksum, se), '', 'MigrationMonitoringAgent')) matchFiles.append(fileID) # Add the data logging records self.DataLog.addFileRecords(fileRecords) if checksumMismatches: # Update the (mis)matching checksums (in the integrityDB and) in the migration monitoring db self.__reportProblematicReplicas(checksumMismatches) res = self.MigrationMonitoringDB.setMigratingReplicaStatus( mismatchFiles, 'ChecksumFail') if not res['OK']: gLogger.error( "[%s] __validateChecksums: Failed to update checksum mismatching files." % se, res['Message']) if matchFiles: res = self.MigrationMonitoringDB.setMigratingReplicaStatus( matchFiles, 'ChecksumMatch') if not res['OK']: gLogger.error( "[%s] __validateChecksums: Failed to update checksum mismatching files." % se, res['Message']) resDict = {'MatchingFiles': matchFiles, 'MismatchFiles': mismatchFiles} return S_OK(resDict)
def resolveSource( self ): """ resolve source SE eligible for submission :param self: self reference """ # Avoid resolving sources twice if self.sourceResolved: return S_OK() # Only resolve files that need a transfer toResolve = [ lfn for lfn in self.fileDict if self.fileDict[lfn].get( "Status", "" ) != "Failed" ] if not toResolve: return S_OK() res = self.__updateMetadataCache( toResolve ) if not res['OK']: return res res = self.__updateReplicaCache( toResolve ) if not res['OK']: return res # Define the source URLs for lfn in toResolve: replicas = self.catalogReplicas.get( lfn, {} ) if self.sourceSE not in replicas: gLogger.warn( "resolveSource: skipping %s - not replicas at SourceSE %s" % ( lfn, self.sourceSE ) ) self.__setFileParameter( lfn, 'Reason', "No replica at SourceSE" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) continue # Fix first the PFN pfn = self.oSourceSE.getPfnForLfn( lfn ).get( 'Value', {} ).get( 'Successful', {} ).get( lfn, replicas[self.sourceSE] ) res = returnSingleResult( self.oSourceSE.getPfnForProtocol( pfn, protocol = 'SRM2', withPort = True ) ) if not res['OK']: gLogger.warn( "resolveSource: skipping %s - %s" % ( lfn, res["Message"] ) ) self.__setFileParameter( lfn, 'Reason', res['Message'] ) self.__setFileParameter( lfn, 'Status', 'Failed' ) continue res = self.setSourceSURL( lfn, res['Value'] ) if not res['OK']: gLogger.warn( "resolveSource: skipping %s - %s" % ( lfn, res["Message"] ) ) self.__setFileParameter( lfn, 'Reason', res['Message'] ) self.__setFileParameter( lfn, 'Status', 'Failed' ) continue toResolve = {} for lfn in self.fileDict: if "Source" in self.fileDict[lfn]: toResolve[self.fileDict[lfn]['Source']] = lfn if not toResolve: return S_ERROR( "No eligible Source files" ) # Get metadata of the sources, to check for existance, availability and caching res = self.oSourceSE.getFileMetadata( toResolve.keys() ) if not res['OK']: return S_ERROR( "Failed to check source file metadata" ) for pfn, error in res['Value']['Failed'].items(): lfn = toResolve[pfn] if re.search( 'File does not exist', error ): gLogger.warn( "resolveSource: skipping %s - source file does not exists" % lfn ) self.__setFileParameter( lfn, 'Reason', "Source file does not exist" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) else: gLogger.warn( "resolveSource: skipping %s - failed to get source metadata" % lfn ) self.__setFileParameter( lfn, 'Reason', "Failed to get Source metadata" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) toStage = [] nbStagedFiles = 0 for pfn, metadata in res['Value']['Successful'].items(): lfn = toResolve[pfn] lfnStatus = self.fileDict.get( lfn, {} ).get( 'Status' ) if metadata['Unavailable']: gLogger.warn( "resolveSource: skipping %s - source file unavailable" % lfn ) self.__setFileParameter( lfn, 'Reason', "Source file Unavailable" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) elif metadata['Lost']: gLogger.warn( "resolveSource: skipping %s - source file lost" % lfn ) self.__setFileParameter( lfn, 'Reason', "Source file Lost" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) elif not metadata['Cached']: if lfnStatus != 'Staging': toStage.append( pfn ) elif metadata['Size'] != self.catalogMetadata[lfn]['Size']: gLogger.warn( "resolveSource: skipping %s - source file size mismatch" % lfn ) self.__setFileParameter( lfn, 'Reason', "Source size mismatch" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) elif self.catalogMetadata[lfn]['Checksum'] and metadata['Checksum'] and \ not compareAdler( metadata['Checksum'], self.catalogMetadata[lfn]['Checksum'] ): gLogger.warn( "resolveSource: skipping %s - source file checksum mismatch" % lfn ) self.__setFileParameter( lfn, 'Reason', "Source checksum mismatch" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) elif lfnStatus == 'Staging': # file that was staging is now cached self.__setFileParameter( lfn, 'Status', 'Waiting' ) nbStagedFiles += 1 # Some files were being staged if nbStagedFiles: self.log.info( 'resolveSource: %d files have been staged' % nbStagedFiles ) # Launching staging of files not in cache if toStage: gLogger.warn( "resolveSource: %s source files not cached, prestaging..." % len( toStage ) ) stage = self.oSourceSE.prestageFile( toStage ) if not stage["OK"]: gLogger.error( "resolveSource: error is prestaging - %s" % stage["Message"] ) for pfn in toStage: lfn = toResolve[pfn] self.__setFileParameter( lfn, 'Reason', stage["Message"] ) self.__setFileParameter( lfn, 'Status', 'Failed' ) else: for pfn in toStage: lfn = toResolve[pfn] if pfn in stage['Value']['Successful']: self.__setFileParameter( lfn, 'Status', 'Staging' ) elif pfn in stage['Value']['Failed']: self.__setFileParameter( lfn, 'Reason', stage['Value']['Failed'][pfn] ) self.__setFileParameter( lfn, 'Status', 'Failed' ) self.sourceResolved = True return S_OK()
def filterReplicas(opFile, logger=None, dataManager=None, seCache=None): """ filter out banned/invalid source SEs """ if not logger: logger = gLogger if not dataManager: dataManager = DataManager() if not seCache: seCache = {} log = logger.getSubLogger("filterReplicas") ret = { "Valid": [], "NoMetadata": [], "Bad": [], 'NoReplicas': [], 'NoPFN': [] } replicas = dataManager.getActiveReplicas(opFile.LFN) if not replicas["OK"]: log.error(replicas["Message"]) return replicas reNotExists = re.compile("not such file or directory") replicas = replicas["Value"] failed = replicas["Failed"].get(opFile.LFN, "") if reNotExists.match(failed.lower()): opFile.Status = "Failed" opFile.Error = failed return S_ERROR(failed) replicas = replicas["Successful"].get(opFile.LFN, {}) for repSEName in replicas: repSE = seCache[repSEName] if repSEName in seCache else \ seCache.setdefault( repSEName, StorageElement( repSEName ) ) pfn = repSE.getPfnForLfn(opFile.LFN) if not pfn["OK"] or opFile.LFN not in pfn['Value']['Successful']: log.warn( "unable to create pfn for %s lfn at %s: %s" % (opFile.LFN, repSEName, pfn.get( 'Message', pfn.get('Value', {}).get('Failed', {}).get(opFile.LFN)))) ret["NoPFN"].append(repSEName) else: pfn = pfn["Value"]['Successful'][opFile.LFN] repSEMetadata = repSE.getFileMetadata(pfn) error = repSEMetadata.get( 'Message', repSEMetadata.get('Value', {}).get('Failed', {}).get(pfn)) if error: log.warn( 'unable to get metadata at %s for %s' % (repSEName, opFile.LFN), error.replace('\n', '')) if 'File does not exist' in error: ret['NoReplicas'].append(repSEName) else: ret["NoMetadata"].append(repSEName) else: repSEMetadata = repSEMetadata['Value']['Successful'][pfn] seChecksum = repSEMetadata.get("Checksum") if opFile.Checksum and seChecksum and not compareAdler( seChecksum, opFile.Checksum): # The checksum in the request may be wrong, check with FC fcMetadata = FileCatalog().getFileMetadata(opFile.LFN) fcChecksum = fcMetadata.get('Value', {}).get( 'Successful', {}).get(opFile.LFN, {}).get('Checksum') if fcChecksum and fcChecksum != opFile.Checksum and compareAdler( fcChecksum, seChecksum): opFile.Checksum = fcChecksum ret['Valid'].append(repSEName) else: log.warn(" %s checksum mismatch, request: %s @%s: %s" % (opFile.LFN, opFile.Checksum, repSEName, seChecksum)) ret["Bad"].append(repSEName) else: # # if we're here repSE is OK ret["Valid"].append(repSEName) return S_OK(ret)
def filterReplicas(opFile, logger=None, dataManager=None): """ filter out banned/invalid source SEs """ if logger is None: logger = gLogger if dataManager is None: dataManager = DataManager() log = logger.getSubLogger("filterReplicas") result = defaultdict(list) replicas = dataManager.getActiveReplicas(opFile.LFN, getUrl=False) if not replicas["OK"]: log.error('Failed to get active replicas', replicas["Message"]) return replicas reNotExists = re.compile(r".*such file.*") replicas = replicas["Value"] failed = replicas["Failed"].get(opFile.LFN, "") if reNotExists.match(failed.lower()): opFile.Status = "Failed" opFile.Error = failed return S_ERROR(failed) replicas = replicas["Successful"].get(opFile.LFN, {}) noReplicas = False if not replicas: allReplicas = dataManager.getReplicas(opFile.LFN, getUrl=False) if allReplicas['OK']: allReplicas = allReplicas['Value']['Successful'].get( opFile.LFN, {}) if not allReplicas: result['NoReplicas'].append(None) noReplicas = True else: # There are replicas but we cannot get metadata because the replica is not active result['NoActiveReplicas'] += list(allReplicas) log.verbose( "File has no%s replica in File Catalog" % ('' if noReplicas else ' active'), opFile.LFN) else: return allReplicas if not opFile.Checksum or hexAdlerToInt(opFile.Checksum) is False: # Set Checksum to FC checksum if not set in the request fcMetadata = FileCatalog().getFileMetadata(opFile.LFN) fcChecksum = fcMetadata.get('Value', {}).get('Successful', {}).get(opFile.LFN, {}).get('Checksum') # Replace opFile.Checksum if it doesn't match a valid FC checksum if fcChecksum: if hexAdlerToInt(fcChecksum) is not False: opFile.Checksum = fcChecksum opFile.ChecksumType = fcMetadata['Value']['Successful'][ opFile.LFN].get('ChecksumType', 'Adler32') else: opFile.Checksum = None # If no replica was found, return what we collected as information if not replicas: return S_OK(result) for repSEName in replicas: repSEMetadata = StorageElement(repSEName).getFileMetadata(opFile.LFN) error = repSEMetadata.get( 'Message', repSEMetadata.get('Value', {}).get('Failed', {}).get(opFile.LFN)) if error: log.warn( 'unable to get metadata at %s for %s' % (repSEName, opFile.LFN), error.replace('\n', '')) if 'File does not exist' in error: result['NoReplicas'].append(repSEName) else: result["NoMetadata"].append(repSEName) elif not noReplicas: repSEMetadata = repSEMetadata['Value']['Successful'][opFile.LFN] seChecksum = hexAdlerToInt(repSEMetadata.get("Checksum")) # As from here seChecksum is an integer or False, not a hex string! if seChecksum is False and opFile.Checksum: result['NoMetadata'].append(repSEName) elif not seChecksum and opFile.Checksum: opFile.Checksum = None opFile.ChecksumType = None elif seChecksum and (not opFile.Checksum or opFile.Checksum == 'False'): # Use the SE checksum (convert to hex) and force type to be Adler32 opFile.Checksum = intAdlerToHex(seChecksum) opFile.ChecksumType = 'Adler32' if not opFile.Checksum or not seChecksum or compareAdler( intAdlerToHex(seChecksum), opFile.Checksum): # # All checksums are OK result["Valid"].append(repSEName) else: log.warn(" %s checksum mismatch, FC: '%s' @%s: '%s'" % (opFile.LFN, opFile.Checksum, repSEName, intAdlerToHex(seChecksum))) result["Bad"].append(repSEName) else: # If a replica was found somewhere, don't set the file as no replicas result['NoReplicas'] = [] return S_OK(result)
def filterReplicas(opFile, logger=None, dataManager=None): """ filter out banned/invalid source SEs """ if logger is None: logger = gLogger if dataManager is None: dataManager = DataManager() log = logger.getSubLogger("filterReplicas") result = defaultdict(list) replicas = dataManager.getActiveReplicas(opFile.LFN, getUrl=False) if not replicas["OK"]: log.error('Failed to get active replicas', replicas["Message"]) return replicas reNotExists = re.compile(r".*such file.*") replicas = replicas["Value"] failed = replicas["Failed"].get(opFile.LFN, "") if reNotExists.match(failed.lower()): opFile.Status = "Failed" opFile.Error = failed return S_ERROR(failed) replicas = replicas["Successful"].get(opFile.LFN, {}) noReplicas = False if not replicas: allReplicas = dataManager.getReplicas(opFile.LFN, getUrl=False) if allReplicas['OK']: allReplicas = allReplicas['Value']['Successful'].get(opFile.LFN, {}) if not allReplicas: result['NoReplicas'].append(None) noReplicas = True else: # There are replicas but we cannot get metadata because the replica is not active result['NoActiveReplicas'] += list(allReplicas) log.verbose("File has no%s replica in File Catalog" % ('' if noReplicas else ' active'), opFile.LFN) else: return allReplicas if not opFile.Checksum or hexAdlerToInt(opFile.Checksum) is False: # Set Checksum to FC checksum if not set in the request fcMetadata = FileCatalog().getFileMetadata(opFile.LFN) fcChecksum = fcMetadata.get( 'Value', {}).get( 'Successful', {}).get( opFile.LFN, {}).get('Checksum') # Replace opFile.Checksum if it doesn't match a valid FC checksum if fcChecksum: if hexAdlerToInt(fcChecksum) is not False: opFile.Checksum = fcChecksum opFile.ChecksumType = fcMetadata['Value']['Successful'][opFile.LFN].get('ChecksumType', 'Adler32') else: opFile.Checksum = None # If no replica was found, return what we collected as information if not replicas: return S_OK(result) for repSEName in replicas: repSEMetadata = StorageElement(repSEName).getFileMetadata(opFile.LFN) error = repSEMetadata.get('Message', repSEMetadata.get('Value', {}).get('Failed', {}).get(opFile.LFN)) if error: log.warn('unable to get metadata at %s for %s' % (repSEName, opFile.LFN), error.replace('\n', '')) if 'File does not exist' in error: result['NoReplicas'].append(repSEName) else: result["NoMetadata"].append(repSEName) elif not noReplicas: repSEMetadata = repSEMetadata['Value']['Successful'][opFile.LFN] seChecksum = hexAdlerToInt(repSEMetadata.get("Checksum")) # As from here seChecksum is an integer or False, not a hex string! if seChecksum is False and opFile.Checksum: result['NoMetadata'].append(repSEName) elif not seChecksum and opFile.Checksum: opFile.Checksum = None opFile.ChecksumType = None elif seChecksum and (not opFile.Checksum or opFile.Checksum == 'False'): # Use the SE checksum (convert to hex) and force type to be Adler32 opFile.Checksum = intAdlerToHex(seChecksum) opFile.ChecksumType = 'Adler32' if not opFile.Checksum or not seChecksum or compareAdler( intAdlerToHex(seChecksum), opFile.Checksum): # # All checksums are OK result["Valid"].append(repSEName) else: log.warn(" %s checksum mismatch, FC: '%s' @%s: '%s'" % (opFile.LFN, opFile.Checksum, repSEName, intAdlerToHex(seChecksum))) result["Bad"].append(repSEName) else: # If a replica was found somewhere, don't set the file as no replicas result['NoReplicas'] = [] return S_OK(result)
def filterReplicas( opFile, logger = None, dataManager = None ): """ filter out banned/invalid source SEs """ if logger is None: logger = gLogger if dataManager is None: dataManager = DataManager() log = logger.getSubLogger( "filterReplicas" ) ret = { "Valid" : [], "NoMetadata" : [], "Bad" : [], 'NoReplicas':[], 'NoPFN':[] } replicas = dataManager.getActiveReplicas( opFile.LFN ) if not replicas["OK"]: log.error( 'Failed to get active replicas', replicas["Message"] ) return replicas reNotExists = re.compile( r".*such file.*" ) replicas = replicas["Value"] failed = replicas["Failed"].get( opFile.LFN , "" ) if reNotExists.match( failed.lower() ): opFile.Status = "Failed" opFile.Error = failed return S_ERROR( failed ) replicas = replicas["Successful"].get( opFile.LFN, {} ) noReplicas = False if not replicas: allReplicas = dataManager.getReplicas( opFile.LFN ) if allReplicas['OK']: allReplicas = allReplicas['Value']['Successful'].get( opFile.LFN, {} ) if not allReplicas: ret['NoReplicas'].append( None ) noReplicas = True else: # We try inactive replicas to see if maybe the file doesn't exist at all replicas = allReplicas log.warn( "File has no%s replica in File Catalog" % ( '' if noReplicas else ' active' ), opFile.LFN ) else: return allReplicas if not opFile.Checksum: # Set Checksum to FC checksum if not set in the request fcMetadata = FileCatalog().getFileMetadata( opFile.LFN ) fcChecksum = fcMetadata.get( 'Value', {} ).get( 'Successful', {} ).get( opFile.LFN, {} ).get( 'Checksum' ) # Replace opFile.Checksum if it doesn't match a valid FC checksum if fcChecksum: opFile.Checksum = fcChecksum opFile.ChecksumType = fcMetadata['Value']['Successful'][opFile.LFN].get( 'ChecksumType', 'Adler32' ) for repSEName in replicas: repSEMetadata = StorageElement( repSEName ).getFileMetadata( opFile.LFN ) error = repSEMetadata.get( 'Message', repSEMetadata.get( 'Value', {} ).get( 'Failed', {} ).get( opFile.LFN ) ) if error: log.warn( 'unable to get metadata at %s for %s' % ( repSEName, opFile.LFN ), error.replace( '\n', '' ) ) if 'File does not exist' in error: ret['NoReplicas'].append( repSEName ) else: ret["NoMetadata"].append( repSEName ) elif not noReplicas: repSEMetadata = repSEMetadata['Value']['Successful'][opFile.LFN] seChecksum = repSEMetadata.get( "Checksum" ) if not seChecksum and opFile.Checksum: opFile.Checksum = None opFile.ChecksumType = None elif seChecksum and not opFile.Checksum: opFile.Checksum = seChecksum if not opFile.Checksum or not seChecksum or compareAdler( seChecksum, opFile.Checksum ): # # All checksums are OK ret["Valid"].append( repSEName ) else: log.warn( " %s checksum mismatch, FC: '%s' @%s: '%s'" % ( opFile.LFN, opFile.Checksum, repSEName, seChecksum ) ) ret["Bad"].append( repSEName ) else: # If a replica was found somewhere, don't set the file as no replicas ret['NoReplicas'] = [] return S_OK( ret )
def __resolveSource(self): toResolve = [] for lfn in self.fileDict.keys(): if (not self.fileDict[lfn].has_key('Source')) and ( self.fileDict[lfn].get('Status') != 'Failed'): toResolve.append(lfn) if not toResolve: return S_OK() res = self.__updateMetadataCache(toResolve) if not res['OK']: return res res = self.__updateReplicaCache(toResolve) if not res['OK']: return res for lfn in toResolve: if self.fileDict[lfn].get('Status') == 'Failed': continue replicas = self.catalogReplicas.get(lfn, {}) if not replicas.has_key(self.sourceSE): self.__setFileParameter(lfn, 'Reason', "No replica at SourceSE") self.__setFileParameter(lfn, 'Status', 'Failed') continue res = self.oSourceSE.getPfnForProtocol(replicas[self.sourceSE], 'SRM2', withPort=True) if not res['OK']: self.__setFileParameter(lfn, 'Reason', res['Message']) self.__setFileParameter(lfn, 'Status', 'Failed') continue res = self.setSourceSURL(lfn, res['Value']) if not res['OK']: self.__setFileParameter(lfn, 'Reason', res['Message']) self.__setFileParameter(lfn, 'Status', 'Failed') continue toResolve = {} for lfn in self.fileDict.keys(): if self.fileDict[lfn].has_key('Source'): toResolve[self.fileDict[lfn]['Source']] = lfn if not toResolve: return S_ERROR("No eligible Source files") res = self.oSourceSE.getFileMetadata(toResolve.keys()) if not res['OK']: return S_ERROR("Failed to check source file metadata") for pfn, error in res['Value']['Failed'].items(): lfn = toResolve[pfn] if re.search('File does not exist', error): self.__setFileParameter(lfn, 'Reason', "Source file does not exist") self.__setFileParameter(lfn, 'Status', 'Failed') else: self.__setFileParameter(lfn, 'Reason', "Failed to get Source metadata") self.__setFileParameter(lfn, 'Status', 'Failed') for pfn, metadata in res['Value']['Successful'].items(): lfn = toResolve[pfn] if metadata['Unavailable']: self.__setFileParameter(lfn, 'Reason', "Source file Unavailable") self.__setFileParameter(lfn, 'Status', 'Failed') elif metadata['Lost']: self.__setFileParameter(lfn, 'Reason', "Source file Lost") self.__setFileParameter(lfn, 'Status', 'Failed') elif not metadata['Cached']: self.__setFileParameter(lfn, 'Reason', "Source file not Cached") self.__setFileParameter(lfn, 'Status', 'Failed') elif metadata['Size'] != self.catalogMetadata[lfn]['Size']: self.__setFileParameter(lfn, 'Reason', "Source size mismatch") self.__setFileParameter(lfn, 'Status', 'Failed') elif self.catalogMetadata[lfn]['Checksum'] and metadata[ 'Checksum'] and not (compareAdler( metadata['Checksum'], self.catalogMetadata[lfn]['Checksum'])): self.__setFileParameter(lfn, 'Reason', "Source checksum mismatch") self.__setFileParameter(lfn, 'Status', 'Failed') return S_OK()