class DataIntegrityClient(Client): """ The following methods are supported in the service but are not mentioned explicitly here: getProblematic() Obtains a problematic file from the IntegrityDB based on the LastUpdate time getPrognosisProblematics(prognosis) Obtains all the problematics of a particular prognosis from the integrityDB getProblematicsSummary() Obtains a count of the number of problematics for each prognosis found getDistinctPrognosis() Obtains the distinct prognosis found in the integrityDB getTransformationProblematics(prodID) Obtains the problematics for a given production incrementProblematicRetry(fileID) Increments the retry count for the supplied file ID changeProblematicPrognosis(fileID,newPrognosis) Changes the prognosis of the supplied file to the new prognosis setProblematicStatus(fileID,status) Updates the status of a problematic in the integrityDB removeProblematic(self,fileID) This removes the specified file ID from the integrity DB insertProblematic(sourceComponent,fileMetadata) Inserts file with supplied metadata into the integrity DB """ def __init__(self, **kwargs): Client.__init__(self, **kwargs) self.setServer('DataManagement/DataIntegrity') self.dm = DataManager() self.fc = FileCatalog() ########################################################################## # # This section contains the specific methods for LFC->SE checks # def catalogDirectoryToSE(self, lfnDir): """ This obtains the replica and metadata information from the catalog for the supplied directory and checks against the storage elements. """ gLogger.info("-" * 40) gLogger.info("Performing the LFC->SE check") gLogger.info("-" * 40) if type(lfnDir) in types.StringTypes: lfnDir = [lfnDir] res = self.__getCatalogDirectoryContents(lfnDir) if not res['OK']: return res replicas = res['Value']['Replicas'] catalogMetadata = res['Value']['Metadata'] res = self.__checkPhysicalFiles(replicas, catalogMetadata) if not res['OK']: return res resDict = { 'CatalogMetadata': catalogMetadata, 'CatalogReplicas': replicas } return S_OK(resDict) def catalogFileToSE(self, lfns): """ This obtains the replica and metadata information from the catalog and checks against the storage elements. """ gLogger.info("-" * 40) gLogger.info("Performing the LFC->SE check") gLogger.info("-" * 40) if type(lfns) in types.StringTypes: lfns = [lfns] res = self.__getCatalogMetadata(lfns) if not res['OK']: return res catalogMetadata = res['Value'] res = self.__getCatalogReplicas(catalogMetadata.keys()) if not res['OK']: return res replicas = res['Value'] res = self.__checkPhysicalFiles(replicas, catalogMetadata) if not res['OK']: return res resDict = { 'CatalogMetadata': catalogMetadata, 'CatalogReplicas': replicas } return S_OK(resDict) def checkPhysicalFiles(self, replicas, catalogMetadata, ses=[]): """ This obtains takes the supplied replica and metadata information obtained from the catalog and checks against the storage elements. """ gLogger.info("-" * 40) gLogger.info("Performing the LFC->SE check") gLogger.info("-" * 40) return self.__checkPhysicalFiles(replicas, catalogMetadata, ses=ses) def __checkPhysicalFiles(self, replicas, catalogMetadata, ses=[]): """ This obtains the physical file metadata and checks the metadata against the catalog entries """ seLfns = {} for lfn, replicaDict in replicas.items(): for se, _url in replicaDict.items(): if (ses) and (se not in ses): continue seLfns.setdefault(se, []).append(lfn) gLogger.info('%s %s' % ('Storage Element'.ljust(20), 'Replicas'.rjust(20))) for se in sortList(seLfns): files = len(seLfns[se]) gLogger.info('%s %s' % (se.ljust(20), str(files).rjust(20))) lfns = seLfns[se] sizeMismatch = [] res = self.__checkPhysicalFileMetadata(lfns, se) if not res['OK']: gLogger.error('Failed to get physical file metadata.', res['Message']) return res for lfn, metadata in res['Value'].items(): if lfn in catalogMetadata: if (metadata['Size'] != catalogMetadata[lfn]['Size']) and ( metadata['Size'] != 0): sizeMismatch.append((lfn, 'deprecatedUrl', se, 'CatalogPFNSizeMismatch')) if sizeMismatch: self.__reportProblematicReplicas(sizeMismatch, se, 'CatalogPFNSizeMismatch') return S_OK() def __checkPhysicalFileMetadata(self, lfns, se): """ Check obtain the physical file metadata and check the files are available """ gLogger.info('Checking the integrity of %s physical files at %s' % (len(lfns), se)) res = StorageElement(se).getFileMetadata(lfns) if not res['OK']: gLogger.error('Failed to get metadata for lfns.', res['Message']) return res lfnMetadataDict = res['Value']['Successful'] # If the replicas are completely missing missingReplicas = [] for lfn, reason in res['Value']['Failed'].items(): if re.search('File does not exist', reason): missingReplicas.append( (lfn, 'deprecatedUrl', se, 'PFNMissing')) if missingReplicas: self.__reportProblematicReplicas(missingReplicas, se, 'PFNMissing') lostReplicas = [] unavailableReplicas = [] zeroSizeReplicas = [] # If the files are not accessible for lfn, lfnMetadata in lfnMetadataDict.items(): if lfnMetadata['Lost']: lostReplicas.append((lfn, 'deprecatedUrl', se, 'PFNLost')) if lfnMetadata['Unavailable']: unavailableReplicas.append( (lfn, 'deprecatedUrl', se, 'PFNUnavailable')) if lfnMetadata['Size'] == 0: zeroSizeReplicas.append( (lfn, 'deprecatedUrl', se, 'PFNZeroSize')) if lostReplicas: self.__reportProblematicReplicas(lostReplicas, se, 'PFNLost') if unavailableReplicas: self.__reportProblematicReplicas(unavailableReplicas, se, 'PFNUnavailable') if zeroSizeReplicas: self.__reportProblematicReplicas(zeroSizeReplicas, se, 'PFNZeroSize') gLogger.info( 'Checking the integrity of physical files at %s complete' % se) return S_OK(lfnMetadataDict) ########################################################################## # # This section contains the specific methods for SE->LFC checks # def storageDirectoryToCatalog(self, lfnDir, storageElement): """ This obtains the file found on the storage element in the supplied directories and determines whether they exist in the catalog and checks their metadata elements """ gLogger.info("-" * 40) gLogger.info("Performing the SE->LFC check at %s" % storageElement) gLogger.info("-" * 40) if type(lfnDir) in types.StringTypes: lfnDir = [lfnDir] res = self.__getStorageDirectoryContents(lfnDir, storageElement) if not res['OK']: return res storageFileMetadata = res['Value'] if storageFileMetadata: return self.__checkCatalogForSEFiles(storageFileMetadata, storageElement) return S_OK({'CatalogMetadata': {}, 'StorageMetadata': {}}) def __checkCatalogForSEFiles(self, storageMetadata, storageElement): gLogger.info('Checking %s storage files exist in the catalog' % len(storageMetadata)) res = self.fc.getReplicas(storageMetadata) if not res['OK']: gLogger.error("Failed to get replicas for LFN", res['Message']) return res failedLfns = res['Value']['Failed'] successfulLfns = res['Value']['Successful'] notRegisteredLfns = [] for lfn in storageMetadata: if lfn in failedLfns: if 'No such file or directory' in failedLfns[lfn]: notRegisteredLfns.append( (lfn, 'deprecatedUrl', storageElement, 'LFNNotRegistered')) failedLfns.pop(lfn) elif storageElement not in successfulLfns[lfn]: notRegisteredLfns.append( (lfn, 'deprecatedUrl', storageElement, 'LFNNotRegistered')) if notRegisteredLfns: self.__reportProblematicReplicas(notRegisteredLfns, storageElement, 'LFNNotRegistered') if failedLfns: return S_ERROR('Failed to obtain replicas') # For the LFNs found to be registered obtain the file metadata from the catalog and verify against the storage metadata res = self.__getCatalogMetadata(storageMetadata) if not res['OK']: return res catalogMetadata = res['Value'] sizeMismatch = [] for lfn, lfnCatalogMetadata in catalogMetadata.items(): lfnStorageMetadata = storageMetadata[lfn] if (lfnStorageMetadata['Size'] != lfnCatalogMetadata['Size']) and ( lfnStorageMetadata['Size'] != 0): sizeMismatch.append((lfn, 'deprecatedUrl', storageElement, 'CatalogPFNSizeMismatch')) if sizeMismatch: self.__reportProblematicReplicas(sizeMismatch, storageElement, 'CatalogPFNSizeMismatch') gLogger.info('Checking storage files exist in the catalog complete') resDict = { 'CatalogMetadata': catalogMetadata, 'StorageMetadata': storageMetadata } return S_OK(resDict) def getStorageDirectoryContents(self, lfnDir, storageElement): """ This obtains takes the supplied lfn directories and recursively obtains the files in the supplied storage element """ return self.__getStorageDirectoryContents(lfnDir, storageElement) def __getStorageDirectoryContents(self, lfnDir, storageElement): """ Obtians the contents of the supplied directory on the storage """ gLogger.info('Obtaining the contents for %s directories at %s' % (len(lfnDir), storageElement)) se = StorageElement(storageElement) res = se.exists(lfnDir) if not res['OK']: gLogger.error("Failed to obtain existance of directories", res['Message']) return res for directory, error in res['Value']['Failed'].items(): gLogger.error('Failed to determine existance of directory', '%s %s' % (directory, error)) if res['Value']['Failed']: return S_ERROR('Failed to determine existance of directory') directoryExists = res['Value']['Successful'] activeDirs = [] for directory in sorted(directoryExists): exists = directoryExists[directory] if exists: activeDirs.append(directory) allFiles = {} while len(activeDirs) > 0: currentDir = activeDirs[0] res = se.listDirectory(currentDir) activeDirs.remove(currentDir) if not res['OK']: gLogger.error('Failed to get directory contents', res['Message']) return res elif currentDir in res['Value']['Failed']: gLogger.error( 'Failed to get directory contents', '%s %s' % (currentDir, res['Value']['Failed'][currentDir])) return S_ERROR(res['Value']['Failed'][currentDir]) else: dirContents = res['Value']['Successful'][currentDir] activeDirs.extend( se.getLFNFromURL(dirContents['SubDirs']).get( 'Value', {}).get('Successful', [])) fileURLMetadata = dirContents['Files'] fileMetadata = {} res = se.getLFNFromURL(fileURLMetadata) if not res['OK']: gLogger.error('Failed to get directory content LFNs', res['Message']) return res for url, error in res['Value']['Failed'].items(): gLogger.error("Failed to get LFN for URL", "%s %s" % (url, error)) if res['Value']['Failed']: return S_ERROR("Failed to get LFNs for PFNs") urlLfns = res['Value']['Successful'] for urlLfn, lfn in urlLfns.items(): fileMetadata[lfn] = fileURLMetadata[urlLfn] allFiles.update(fileMetadata) zeroSizeFiles = [] for lfn in sorted(allFiles): if os.path.basename(lfn) == 'dirac_directory': allFiles.pop(lfn) else: metadata = allFiles[lfn] if metadata['Size'] == 0: zeroSizeFiles.append( (lfn, 'deprecatedUrl', storageElement, 'PFNZeroSize')) if zeroSizeFiles: self.__reportProblematicReplicas(zeroSizeFiles, storageElement, 'PFNZeroSize') gLogger.info('Obtained at total of %s files for directories at %s' % (len(allFiles), storageElement)) return S_OK(allFiles) def __getStoragePathExists(self, lfnPaths, storageElement): gLogger.info('Determining the existance of %d files at %s' % (len(lfnPaths), storageElement)) se = StorageElement(storageElement) res = se.exists(lfnPaths) if not res['OK']: gLogger.error("Failed to obtain existance of paths", res['Message']) return res for lfnPath, error in res['Value']['Failed'].items(): gLogger.error('Failed to determine existance of path', '%s %s' % (lfnPath, error)) if res['Value']['Failed']: return S_ERROR('Failed to determine existance of paths') pathExists = res['Value']['Successful'] resDict = {} for lfn, exists in pathExists.items(): if exists: resDict[lfn] = True return S_OK(resDict) ########################################################################## # # This section contains the specific methods for obtaining replica and metadata information from the catalog # def __getCatalogDirectoryContents(self, lfnDir): """ Obtain the contents of the supplied directory """ gLogger.info('Obtaining the catalog contents for %s directories' % len(lfnDir)) activeDirs = lfnDir allFiles = {} while len(activeDirs) > 0: currentDir = activeDirs[0] res = self.fc.listDirectory(currentDir) activeDirs.remove(currentDir) if not res['OK']: gLogger.error('Failed to get directory contents', res['Message']) return res elif res['Value']['Failed'].has_key(currentDir): gLogger.error( 'Failed to get directory contents', '%s %s' % (currentDir, res['Value']['Failed'][currentDir])) else: dirContents = res['Value']['Successful'][currentDir] activeDirs.extend(dirContents['SubDirs']) allFiles.update(dirContents['Files']) zeroReplicaFiles = [] zeroSizeFiles = [] allReplicaDict = {} allMetadataDict = {} for lfn, lfnDict in allFiles.items(): lfnReplicas = {} for se, replicaDict in lfnDict['Replicas'].items(): lfnReplicas[se] = replicaDict['PFN'] if not lfnReplicas: zeroReplicaFiles.append(lfn) allReplicaDict[lfn] = lfnReplicas allMetadataDict[lfn] = lfnDict['MetaData'] if lfnDict['MetaData']['Size'] == 0: zeroSizeFiles.append(lfn) if zeroReplicaFiles: self.__reportProblematicFiles(zeroReplicaFiles, 'LFNZeroReplicas') if zeroSizeFiles: self.__reportProblematicFiles(zeroSizeFiles, 'LFNZeroSize') gLogger.info( 'Obtained at total of %s files for the supplied directories' % len(allMetadataDict)) resDict = {'Metadata': allMetadataDict, 'Replicas': allReplicaDict} return S_OK(resDict) def __getCatalogReplicas(self, lfns): """ Obtain the file replicas from the catalog while checking that there are replicas """ gLogger.info('Obtaining the replicas for %s files' % len(lfns)) zeroReplicaFiles = [] res = self.fc.getReplicas(lfns, allStatus=True) if not res['OK']: gLogger.error('Failed to get catalog replicas', res['Message']) return res allReplicas = res['Value']['Successful'] for lfn, error in res['Value']['Failed'].items(): if re.search('File has zero replicas', error): zeroReplicaFiles.append(lfn) if zeroReplicaFiles: self.__reportProblematicFiles(zeroReplicaFiles, 'LFNZeroReplicas') gLogger.info('Obtaining the replicas for files complete') return S_OK(allReplicas) def __getCatalogMetadata(self, lfns): """ Obtain the file metadata from the catalog while checking they exist """ if not lfns: return S_OK({}) gLogger.info('Obtaining the catalog metadata for %s files' % len(lfns)) missingCatalogFiles = [] zeroSizeFiles = [] res = self.fc.getFileMetadata(lfns) if not res['OK']: gLogger.error('Failed to get catalog metadata', res['Message']) return res allMetadata = res['Value']['Successful'] for lfn, error in res['Value']['Failed'].items(): if re.search('No such file or directory', error): missingCatalogFiles.append(lfn) if missingCatalogFiles: self.__reportProblematicFiles(missingCatalogFiles, 'LFNCatalogMissing') for lfn, metadata in allMetadata.items(): if metadata['Size'] == 0: zeroSizeFiles.append(lfn) if zeroSizeFiles: self.__reportProblematicFiles(zeroSizeFiles, 'LFNZeroSize') gLogger.info('Obtaining the catalog metadata complete') return S_OK(allMetadata) ########################################################################## # # This section contains the methods for inserting problematic files into the integrity DB # def __reportProblematicFiles(self, lfns, reason): """ Simple wrapper function around setFileProblematic """ gLogger.info('The following %s files were found with %s' % (len(lfns), reason)) for lfn in sortList(lfns): gLogger.info(lfn) res = self.setFileProblematic(lfns, reason, sourceComponent='DataIntegrityClient') if not res['OK']: gLogger.info('Failed to update integrity DB with files', res['Message']) else: gLogger.info('Successfully updated integrity DB with files') def setFileProblematic(self, lfn, reason, sourceComponent=''): """ This method updates the status of the file in the FileCatalog and the IntegrityDB lfn - the lfn of the file reason - this is given to the integrity DB and should reflect the problem observed with the file sourceComponent is the component issuing the request. """ if type(lfn) == types.ListType: lfns = lfn elif type(lfn) == types.StringType: lfns = [lfn] else: errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN." gLogger.error(errStr) return S_ERROR(errStr) gLogger.info( "DataIntegrityClient.setFileProblematic: Attempting to update %s files." % len(lfns)) fileMetadata = {} for lfn in lfns: fileMetadata[lfn] = { 'Prognosis': reason, 'LFN': lfn, 'PFN': '', 'SE': '' } res = self.insertProblematic(sourceComponent, fileMetadata) if not res['OK']: gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB" ) return res def __reportProblematicReplicas(self, replicaTuple, se, reason): """ Simple wrapper function around setReplicaProblematic """ gLogger.info('The following %s files had %s at %s' % (len(replicaTuple), reason, se)) for lfn, _pfn, se, reason in sortList(replicaTuple): if lfn: gLogger.info(lfn) res = self.setReplicaProblematic(replicaTuple, sourceComponent='DataIntegrityClient') if not res['OK']: gLogger.info('Failed to update integrity DB with replicas', res['Message']) else: gLogger.info('Successfully updated integrity DB with replicas') def setReplicaProblematic(self, replicaTuple, sourceComponent=''): """ This method updates the status of the replica in the FileCatalog and the IntegrityDB The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis} lfn - the lfn of the file pfn - the pfn if available (otherwise '') se - the storage element of the problematic replica (otherwise '') prognosis - this is given to the integrity DB and should reflect the problem observed with the file sourceComponent is the component issuing the request. """ if type(replicaTuple) == types.TupleType: replicaTuple = [replicaTuple] elif type(replicaTuple) == types.ListType: pass else: errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples." gLogger.error(errStr) return S_ERROR(errStr) gLogger.info( "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas." % len(replicaTuple)) replicaDict = {} for lfn, pfn, se, reason in replicaTuple: replicaDict[lfn] = { 'Prognosis': reason, 'LFN': lfn, 'PFN': pfn, 'SE': se } res = self.insertProblematic(sourceComponent, replicaDict) if not res['OK']: gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB" ) return res for lfn in replicaDict.keys(): replicaDict[lfn]['Status'] = 'Problematic' res = self.fc.setReplicaStatus(replicaDict) if not res['OK']: errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas." gLogger.error(errStr, res['Message']) return res failed = res['Value']['Failed'] successful = res['Value']['Successful'] resDict = {'Successful': successful, 'Failed': failed} return S_OK(resDict) ########################################################################## # # This section contains the resolution methods for various prognoses # def __updateCompletedFiles(self, prognosis, fileID): gLogger.info("%s file (%d) is resolved" % (prognosis, fileID)) return self.setProblematicStatus(fileID, 'Resolved') def __returnProblematicError(self, fileID, res): self.incrementProblematicRetry(fileID) gLogger.error('DataIntegrityClient failure', res['Message']) return res # def __getRegisteredPFNLFN( self, pfn, storageElement ): # # res = StorageElement( storageElement ).getURL( pfn ) # if not res['OK']: # gLogger.error( "Failed to get registered PFN for physical files", res['Message'] ) # return res # for pfn, error in res['Value']['Failed'].items(): # gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) ) # return S_ERROR( 'Failed to obtain registered PFNs from physical file' ) # registeredPFN = res['Value']['Successful'][pfn] # res = returnSingleResult( self.fc.getLFNForPFN( registeredPFN ) ) # if ( not res['OK'] ) and re.search( 'No such file or directory', res['Message'] ): # return S_OK( False ) # return S_OK( res['Value'] ) def __updateReplicaToChecked(self, problematicDict): lfn = problematicDict['LFN'] fileID = problematicDict['FileID'] prognosis = problematicDict['Prognosis'] problematicDict['Status'] = 'Checked' res = returnSingleResult( self.fc.setReplicaStatus({lfn: problematicDict})) if not res['OK']: return self.__returnProblematicError(fileID, res) gLogger.info("%s replica (%d) is updated to Checked status" % (prognosis, fileID)) return self.__updateCompletedFiles(prognosis, fileID) def resolveCatalogPFNSizeMismatch(self, problematicDict): """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis """ lfn = problematicDict['LFN'] se = problematicDict['SE'] fileID = problematicDict['FileID'] res = returnSingleResult(self.fc.getFileSize(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) catalogSize = res['Value'] res = returnSingleResult(StorageElement(se).getFileSize(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) storageSize = res['Value'] bkKCatalog = FileCatalog(['BookkeepingDB']) res = returnSingleResult(bkKCatalog.getFileSize(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) bookkeepingSize = res['Value'] if bookkeepingSize == catalogSize == storageSize: gLogger.info( "CatalogPFNSizeMismatch replica (%d) matched all registered sizes." % fileID) return self.__updateReplicaToChecked(problematicDict) if (catalogSize == bookkeepingSize): gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also" % fileID) res = returnSingleResult(self.fc.getReplicas(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) if len(res['Value']) <= 1: gLogger.info( "CatalogPFNSizeMismatch replica (%d) has no other replicas." % fileID) return S_ERROR( "Not removing catalog file mismatch since the only replica" ) else: gLogger.info( "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..." % fileID) res = self.dm.removeReplica(se, lfn) if not res['OK']: return self.__returnProblematicError(fileID, res) return self.__updateCompletedFiles('CatalogPFNSizeMismatch', fileID) if (catalogSize != bookkeepingSize) and (bookkeepingSize == storageSize): gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size" % fileID) res = self.__updateReplicaToChecked(problematicDict) if not res['OK']: return self.__returnProblematicError(fileID, res) return self.changeProblematicPrognosis(fileID, 'BKCatalogSizeMismatch') gLogger.info( "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count" % fileID) return self.incrementProblematicRetry(fileID) def resolvePFNNotRegistered(self, problematicDict): """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNNotRegistered prognosis """ lfn = problematicDict['LFN'] seName = problematicDict['SE'] fileID = problematicDict['FileID'] se = StorageElement(seName) res = returnSingleResult(self.fc.exists(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) if not res['Value']: # The file does not exist in the catalog res = returnSingleResult(se.removeFile(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) return self.__updateCompletedFiles('PFNNotRegistered', fileID) res = returnSingleResult(se.getFileMetadata(lfn)) if (not res['OK']) and (re.search('File does not exist', res['Message'])): gLogger.info("PFNNotRegistered replica (%d) found to be missing." % fileID) return self.__updateCompletedFiles('PFNNotRegistered', fileID) elif not res['OK']: return self.__returnProblematicError(fileID, res) storageMetadata = res['Value'] if storageMetadata['Lost']: gLogger.info( "PFNNotRegistered replica (%d) found to be Lost. Updating prognosis" % fileID) return self.changeProblematicPrognosis(fileID, 'PFNLost') if storageMetadata['Unavailable']: gLogger.info( "PFNNotRegistered replica (%d) found to be Unavailable. Updating retry count" % fileID) return self.incrementProblematicRetry(fileID) # HACK until we can obtain the space token descriptions through GFAL site = seName.split('_')[0].split('-')[0] if not storageMetadata['Cached']: if lfn.endswith('.raw'): seName = '%s-RAW' % site else: seName = '%s-RDST' % site elif storageMetadata['Migrated']: if lfn.startswith('/lhcb/data'): seName = '%s_M-DST' % site else: seName = '%s_MC_M-DST' % site else: if lfn.startswith('/lhcb/data'): seName = '%s-DST' % site else: seName = '%s_MC-DST' % site problematicDict['SE'] = seName res = returnSingleResult(se.getURL(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) problematicDict['PFN'] = res['Value'] res = returnSingleResult(self.fc.addReplica({lfn: problematicDict})) if not res['OK']: return self.__returnProblematicError(fileID, res) res = returnSingleResult(self.fc.getFileMetadata(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) if res['Value']['Size'] != storageMetadata['Size']: gLogger.info( "PFNNotRegistered replica (%d) found with catalog size mismatch. Updating prognosis" % fileID) return self.changeProblematicPrognosis(fileID, 'CatalogPFNSizeMismatch') return self.__updateCompletedFiles('PFNNotRegistered', fileID) def resolveLFNCatalogMissing(self, problematicDict): """ This takes the problematic dictionary returned by the integrity DB and resolved the LFNCatalogMissing prognosis """ lfn = problematicDict['LFN'] fileID = problematicDict['FileID'] res = returnSingleResult(self.fc.exists(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) if res['Value']: return self.__updateCompletedFiles('LFNCatalogMissing', fileID) # Remove the file from all catalogs # RF_NOTE : here I can do it because it's a single file, but otherwise I would need to sort the path res = returnSingleResult(self.fc.removeFile(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) return self.__updateCompletedFiles('LFNCatalogMissing', fileID) def resolvePFNMissing(self, problematicDict): """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNMissing prognosis """ se = problematicDict['SE'] lfn = problematicDict['LFN'] fileID = problematicDict['FileID'] res = returnSingleResult(self.fc.exists(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) if not res['Value']: gLogger.info("PFNMissing file (%d) no longer exists in catalog" % fileID) return self.__updateCompletedFiles('PFNMissing', fileID) res = returnSingleResult(StorageElement(se).exists(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) if res['Value']: gLogger.info("PFNMissing replica (%d) is no longer missing" % fileID) return self.__updateReplicaToChecked(problematicDict) gLogger.info("PFNMissing replica (%d) does not exist" % fileID) res = returnSingleResult(self.fc.getReplicas(lfn, allStatus=True)) if not res['OK']: return self.__returnProblematicError(fileID, res) replicas = res['Value'] seSite = se.split('_')[0].split('-')[0] found = False print replicas for replicaSE in replicas.keys(): if re.search(seSite, replicaSE): found = True problematicDict['SE'] = replicaSE se = replicaSE if not found: gLogger.info( "PFNMissing replica (%d) is no longer registered at SE. Resolved." % fileID) return self.__updateCompletedFiles('PFNMissing', fileID) gLogger.info( "PFNMissing replica (%d) does not exist. Removing from catalog..." % fileID) res = returnSingleResult(self.fc.removeReplica({lfn: problematicDict})) if not res['OK']: return self.__returnProblematicError(fileID, res) if len(replicas) == 1: gLogger.info( "PFNMissing replica (%d) had a single replica. Updating prognosis" % fileID) return self.changeProblematicPrognosis(fileID, 'LFNZeroReplicas') res = self.dm.replicateAndRegister(problematicDict['LFN'], se) if not res['OK']: return self.__returnProblematicError(fileID, res) # If we get here the problem is solved so we can update the integrityDB return self.__updateCompletedFiles('PFNMissing', fileID) def resolvePFNUnavailable(self, problematicDict): """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNUnavailable prognosis """ lfn = problematicDict['LFN'] se = problematicDict['SE'] fileID = problematicDict['FileID'] res = returnSingleResult(StorageElement(se).getFileMetadata(lfn)) if (not res['OK']) and (re.search('File does not exist', res['Message'])): # The file is no longer Unavailable but has now dissapeared completely gLogger.info( "PFNUnavailable replica (%d) found to be missing. Updating prognosis" % fileID) return self.changeProblematicPrognosis(fileID, 'PFNMissing') if (not res['OK']) or res['Value']['Unavailable']: gLogger.info( "PFNUnavailable replica (%d) found to still be Unavailable" % fileID) return self.incrementProblematicRetry(fileID) if res['Value']['Lost']: gLogger.info( "PFNUnavailable replica (%d) is now found to be Lost. Updating prognosis" % fileID) return self.changeProblematicPrognosis(fileID, 'PFNLost') gLogger.info("PFNUnavailable replica (%d) is no longer Unavailable" % fileID) # Need to make the replica okay in the Catalog return self.__updateReplicaToChecked(problematicDict) def resolvePFNZeroSize(self, problematicDict): """ This takes the problematic dictionary returned by the integrity DB and resolves the PFNZeroSize prognosis """ lfn = problematicDict['LFN'] seName = problematicDict['SE'] fileID = problematicDict['FileID'] se = StorageElement(seName) res = returnSingleResult(se.getFileSize(lfn)) if (not res['OK']) and (re.search('File does not exist', res['Message'])): gLogger.info( "PFNZeroSize replica (%d) found to be missing. Updating prognosis" % problematicDict['FileID']) return self.changeProblematicPrognosis(fileID, 'PFNMissing') storageSize = res['Value'] if storageSize == 0: res = returnSingleResult(se.removeFile(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) gLogger.info( "PFNZeroSize replica (%d) removed. Updating prognosis" % problematicDict['FileID']) return self.changeProblematicPrognosis(fileID, 'PFNMissing') res = returnSingleResult(self.fc.getReplicas(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) if seName not in res['Value']: gLogger.info( "PFNZeroSize replica (%d) not registered in catalog. Updating prognosis" % problematicDict['FileID']) return self.changeProblematicPrognosis(fileID, 'PFNNotRegistered') res = returnSingleResult(self.fc.getFileMetadata(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) catalogSize = res['Value']['Size'] if catalogSize != storageSize: gLogger.info( "PFNZeroSize replica (%d) size found to differ from registered metadata. Updating prognosis" % problematicDict['FileID']) return self.changeProblematicPrognosis(fileID, 'CatalogPFNSizeMismatch') return self.__updateCompletedFiles('PFNZeroSize', fileID) ############################################################################################ def resolveLFNZeroReplicas(self, problematicDict): """ This takes the problematic dictionary returned by the integrity DB and resolves the LFNZeroReplicas prognosis """ lfn = problematicDict['LFN'] fileID = problematicDict['FileID'] res = returnSingleResult(self.fc.getReplicas(lfn, allStatus=True)) if res['OK'] and res['Value']: gLogger.info("LFNZeroReplicas file (%d) found to have replicas" % fileID) else: gLogger.info( "LFNZeroReplicas file (%d) does not have replicas. Checking storage..." % fileID) pfnsFound = False for storageElementName in sorted( gConfig.getValue( 'Resources/StorageElementGroups/Tier1_MC_M-DST', [])): res = self.__getStoragePathExists([lfn], storageElementName) if lfn in res['Value']: gLogger.info( "LFNZeroReplicas file (%d) found storage file at %s" % (fileID, storageElementName)) self.__reportProblematicReplicas( [(lfn, 'deprecatedUrl', storageElementName, 'PFNNotRegistered')], storageElementName, 'PFNNotRegistered') pfnsFound = True if not pfnsFound: gLogger.info( "LFNZeroReplicas file (%d) did not have storage files. Removing..." % fileID) res = returnSingleResult(self.fc.removeFile(lfn)) if not res['OK']: gLogger.error('DataIntegrityClient: failed to remove file', res['Message']) # Increment the number of retries for this file self.server.incrementProblematicRetry(fileID) return res gLogger.info("LFNZeroReplicas file (%d) removed from catalog" % fileID) # If we get here the problem is solved so we can update the integrityDB return self.__updateCompletedFiles('LFNZeroReplicas', fileID)
class CatalogPlugInTestCase(unittest.TestCase): """Base class for the CatalogPlugin test case""" def setUp(self): self.fullMetadata = [ "Status", "ChecksumType", "OwnerRole", "CreationDate", "Checksum", "ModificationDate", "OwnerDN", "Mode", "GUID", "Size", ] self.dirMetadata = self.fullMetadata + ["NumberOfSubPaths"] self.fileMetadata = self.fullMetadata + ["NumberOfLinks"] self.catalog = FileCatalog(catalogs=[catalogClientToTest]) valid = self.catalog.isOK() self.assertTrue(valid) self.destDir = "/lhcb/test/unit-test/TestCatalogPlugin" self.link = "%s/link" % self.destDir # Clean the existing directory self.cleanDirectory() res = self.catalog.createDirectory(self.destDir) returnValue = self.parseResult(res, self.destDir) # Register some files to work with self.numberOfFiles = 2 self.files = [] for i in range(self.numberOfFiles): lfn = "%s/testFile_%d" % (self.destDir, i) res = self.registerFile(lfn) self.assertTrue(res) self.files.append(lfn) def registerFile(self, lfn): pfn = "protocol://host:port/storage/path%s" % lfn size = 10000000 se = "DIRAC-storage" guid = makeGuid() adler = stringAdler(guid) fileDict = {} fileDict[lfn] = { "PFN": pfn, "Size": size, "SE": se, "GUID": guid, "Checksum": adler } res = self.catalog.addFile(fileDict) return self.parseResult(res, lfn) def parseResult(self, res, path): self.assertTrue(res["OK"]) self.assertTrue(res["Value"]) self.assertTrue(res["Value"]["Successful"]) self.assertTrue(path in res["Value"]["Successful"]) return res["Value"]["Successful"][path] def parseError(self, res, path): self.assertTrue(res["OK"]) self.assertTrue(res["Value"]) self.assertTrue(res["Value"]["Failed"]) self.assertTrue(path in res["Value"]["Failed"]) return res["Value"]["Failed"][path] def cleanDirectory(self): res = self.catalog.exists(self.destDir) returnValue = self.parseResult(res, self.destDir) if not returnValue: return res = self.catalog.listDirectory(self.destDir) returnValue = self.parseResult(res, self.destDir) toRemove = list(returnValue["Files"]) if toRemove: self.purgeFiles(toRemove) res = self.catalog.removeDirectory(self.destDir) returnValue = self.parseResult(res, self.destDir) self.assertTrue(returnValue) def purgeFiles(self, lfns): for lfn in lfns: res = self.catalog.getReplicas(lfn, True) replicas = self.parseResult(res, lfn) for se, pfn in replicas.items(): repDict = {} repDict[lfn] = {"PFN": pfn, "SE": se} res = self.catalog.removeReplica(repDict) self.parseResult(res, lfn) res = self.catalog.removeFile(lfn) self.parseResult(res, lfn) def tearDown(self): self.cleanDirectory()
class CatalogPlugInTestCase(unittest.TestCase): """ Base class for the CatalogPlugin test case """ def setUp(self): self.fullMetadata = [ 'Status', 'ChecksumType', 'OwnerRole', 'CreationDate', 'Checksum', 'ModificationDate', 'OwnerDN', 'Mode', 'GUID', 'Size' ] self.dirMetadata = self.fullMetadata + ['NumberOfSubPaths'] self.fileMetadata = self.fullMetadata + ['NumberOfLinks'] self.catalog = FileCatalog(catalogs=[catalogClientToTest]) valid = self.catalog.isOK() self.assertTrue(valid) self.destDir = '/lhcb/test/unit-test/TestCatalogPlugin' self.link = "%s/link" % self.destDir # Clean the existing directory self.cleanDirectory() res = self.catalog.createDirectory(self.destDir) returnValue = self.parseResult(res, self.destDir) # Register some files to work with self.numberOfFiles = 2 self.files = [] for i in xrange(self.numberOfFiles): lfn = "%s/testFile_%d" % (self.destDir, i) res = self.registerFile(lfn) self.assertTrue(res) self.files.append(lfn) def registerFile(self, lfn): pfn = 'protocol://host:port/storage/path%s' % lfn size = 10000000 se = 'DIRAC-storage' guid = makeGuid() adler = stringAdler(guid) fileDict = {} fileDict[lfn] = { 'PFN': pfn, 'Size': size, 'SE': se, 'GUID': guid, 'Checksum': adler } res = self.catalog.addFile(fileDict) return self.parseResult(res, lfn) def parseResult(self, res, path): self.assertTrue(res['OK']) self.assertTrue(res['Value']) self.assertTrue(res['Value']['Successful']) self.assertTrue(path in res['Value']['Successful']) return res['Value']['Successful'][path] def parseError(self, res, path): self.assertTrue(res['OK']) self.assertTrue(res['Value']) self.assertTrue(res['Value']['Failed']) self.assertTrue(path in res['Value']['Failed']) return res['Value']['Failed'][path] def cleanDirectory(self): res = self.catalog.exists(self.destDir) returnValue = self.parseResult(res, self.destDir) if not returnValue: return res = self.catalog.listDirectory(self.destDir) returnValue = self.parseResult(res, self.destDir) toRemove = returnValue['Files'].keys() if toRemove: self.purgeFiles(toRemove) res = self.catalog.removeDirectory(self.destDir) returnValue = self.parseResult(res, self.destDir) self.assertTrue(returnValue) def purgeFiles(self, lfns): for lfn in lfns: res = self.catalog.getReplicas(lfn, True) replicas = self.parseResult(res, lfn) for se, pfn in replicas.items(): repDict = {} repDict[lfn] = {'PFN': pfn, 'SE': se} res = self.catalog.removeReplica(repDict) self.parseResult(res, lfn) res = self.catalog.removeFile(lfn) self.parseResult(res, lfn) def tearDown(self): self.cleanDirectory()
class RequestPreparationAgent( AgentModule ): def initialize( self ): self.fileCatalog = FileCatalog() self.dm = DataManager() self.stagerClient = StorageManagerClient() self.dataIntegrityClient = DataIntegrityClient() # This sets the Default Proxy to used as that defined under # /Operations/Shifter/DataManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption( 'shifterProxy', 'DataManager' ) return S_OK() def execute( self ): """ This is the first logical task to be executed and manages the New->Waiting transition of the Replicas """ res = self.__getNewReplicas() if not res['OK']: gLogger.fatal( "RequestPreparation.prepareNewReplicas: Failed to get replicas from StagerDB.", res['Message'] ) return res if not res['Value']: gLogger.info( "There were no New replicas found" ) return res replicas = res['Value']['Replicas'] replicaIDs = res['Value']['ReplicaIDs'] gLogger.info( "RequestPreparation.prepareNewReplicas: Obtained %s New replicas for preparation." % len( replicaIDs ) ) # Check if the files exist in the FileCatalog res = self.__getExistingFiles( replicas ) if not res['OK']: return res exist = res['Value']['Exist'] terminal = res['Value']['Missing'] failed = res['Value']['Failed'] if not exist: gLogger.error( 'RequestPreparation.prepareNewReplicas: Failed to determine the existence of any file' ) return S_OK() terminalReplicaIDs = {} for lfn, reason in terminal.items(): for replicaID in replicas[lfn].values(): terminalReplicaIDs[replicaID] = reason replicas.pop( lfn ) gLogger.info( "RequestPreparation.prepareNewReplicas: %s files exist in the FileCatalog." % len( exist ) ) if terminal: gLogger.info( "RequestPreparation.prepareNewReplicas: %s files do not exist in the FileCatalog." % len( terminal ) ) # Obtain the file sizes from the FileCatalog res = self.__getFileSize( exist ) if not res['OK']: return res failed.update( res['Value']['Failed'] ) terminal = res['Value']['ZeroSize'] fileSizes = res['Value']['FileSizes'] if not fileSizes: gLogger.error( 'RequestPreparation.prepareNewReplicas: Failed determine sizes of any files' ) return S_OK() for lfn, reason in terminal.items(): for _se, replicaID in replicas[lfn].items(): terminalReplicaIDs[replicaID] = reason replicas.pop( lfn ) gLogger.info( "RequestPreparation.prepareNewReplicas: Obtained %s file sizes from the FileCatalog." % len( fileSizes ) ) if terminal: gLogger.info( "RequestPreparation.prepareNewReplicas: %s files registered with zero size in the FileCatalog." % len( terminal ) ) # Obtain the replicas from the FileCatalog res = self.__getFileReplicas( fileSizes.keys() ) if not res['OK']: return res failed.update( res['Value']['Failed'] ) terminal = res['Value']['ZeroReplicas'] fileReplicas = res['Value']['Replicas'] if not fileReplicas: gLogger.error( 'RequestPreparation.prepareNewReplicas: Failed determine replicas for any files' ) return S_OK() for lfn, reason in terminal.items(): for _se, replicaID in replicas[lfn].items(): terminalReplicaIDs[replicaID] = reason replicas.pop( lfn ) gLogger.info( "RequestPreparation.prepareNewReplicas: Obtained replica information for %s file from the FileCatalog." % len( fileReplicas ) ) if terminal: gLogger.info( "RequestPreparation.prepareNewReplicas: %s files registered with zero replicas in the FileCatalog." % len( terminal ) ) # Check the replicas exist at the requested site replicaMetadata = [] for lfn, requestedSEs in replicas.items(): lfnReplicas = fileReplicas.get( lfn ) # This should not happen in principle, but it was seen # after a corrupted staging request has entered the DB if not lfnReplicas: gLogger.error( "Missing replicas information", "%s %s" % ( lfn, requestedSEs ) ) continue for requestedSE, replicaID in requestedSEs.items(): if not requestedSE in lfnReplicas.keys(): terminalReplicaIDs[replicaID] = "LFN not registered at requested SE" replicas[lfn].pop( requestedSE ) else: replicaMetadata.append( ( replicaID, lfnReplicas[requestedSE], fileSizes[lfn] ) ) # Update the states of the files in the database if terminalReplicaIDs: gLogger.info( "RequestPreparation.prepareNewReplicas: %s replicas are terminally failed." % len( terminalReplicaIDs ) ) # res = self.stagerClient.updateReplicaFailure( terminalReplicaIDs ) res = self.stagerClient.updateReplicaFailure( terminalReplicaIDs ) if not res['OK']: gLogger.error( "RequestPreparation.prepareNewReplicas: Failed to update replica failures.", res['Message'] ) if replicaMetadata: gLogger.info( "RequestPreparation.prepareNewReplicas: %s replica metadata to be updated." % len( replicaMetadata ) ) # Sets the Status='Waiting' of CacheReplicas records that are OK with catalogue checks res = self.stagerClient.updateReplicaInformation( replicaMetadata ) if not res['OK']: gLogger.error( "RequestPreparation.prepareNewReplicas: Failed to update replica metadata.", res['Message'] ) return S_OK() def __getNewReplicas( self ): """ This obtains the New replicas from the Replicas table and for each LFN the requested storage element """ # First obtain the New replicas from the CacheReplicas table res = self.stagerClient.getCacheReplicas( {'Status':'New'} ) if not res['OK']: gLogger.error( "RequestPreparation.__getNewReplicas: Failed to get replicas with New status.", res['Message'] ) return res if not res['Value']: gLogger.debug( "RequestPreparation.__getNewReplicas: No New replicas found to process." ) return S_OK() else: gLogger.debug( "RequestPreparation.__getNewReplicas: Obtained %s New replicas(s) to process." % len( res['Value'] ) ) replicas = {} replicaIDs = {} for replicaID, info in res['Value'].items(): lfn = info['LFN'] storageElement = info['SE'] replicas.setdefault( lfn, {} )[storageElement] = replicaID replicaIDs[replicaID] = ( lfn, storageElement ) return S_OK( {'Replicas':replicas, 'ReplicaIDs':replicaIDs} ) def __getExistingFiles( self, lfns ): """ This checks that the files exist in the FileCatalog. """ res = self.fileCatalog.exists( list( set( lfns ) ) ) if not res['OK']: gLogger.error( "RequestPreparation.__getExistingFiles: Failed to determine whether files exist.", res['Message'] ) return res failed = res['Value']['Failed'] success = res['Value']['Successful'] exist = [lfn for lfn, exists in success.items() if exists] missing = list( set( success ) - set( exist ) ) if missing: reason = 'LFN not registered in the FC' gLogger.warn( "RequestPreparation.__getExistingFiles: %s" % reason, '\n'.join( [''] + missing ) ) self.__reportProblematicFiles( missing, 'LFN-LFC-DoesntExist' ) missing = dict.fromkeys( missing, reason ) else: missing = {} return S_OK( {'Exist':exist, 'Missing':missing, 'Failed':failed} ) def __getFileSize( self, lfns ): """ This obtains the file size from the FileCatalog. """ fileSizes = {} zeroSize = {} res = self.fileCatalog.getFileSize( lfns ) if not res['OK']: gLogger.error( "RequestPreparation.__getFileSize: Failed to get sizes for files.", res['Message'] ) return res failed = res['Value']['Failed'] for lfn, size in res['Value']['Successful'].items(): if size == 0: zeroSize[lfn] = "LFN registered with zero size in the FileCatalog" else: fileSizes[lfn] = size if zeroSize: for lfn, reason in zeroSize.items(): gLogger.warn( "RequestPreparation.__getFileSize: %s" % reason, lfn ) self.__reportProblematicFiles( zeroSize.keys(), 'LFN-LFC-ZeroSize' ) return S_OK( {'FileSizes':fileSizes, 'ZeroSize':zeroSize, 'Failed':failed} ) def __getFileReplicas( self, lfns ): """ This obtains the replicas from the FileCatalog. """ replicas = {} noReplicas = {} res = self.dm.getActiveReplicas( lfns ) if not res['OK']: gLogger.error( "RequestPreparation.__getFileReplicas: Failed to obtain file replicas.", res['Message'] ) return res failed = res['Value']['Failed'] for lfn, lfnReplicas in res['Value']['Successful'].items(): if len( lfnReplicas.keys() ) == 0: noReplicas[lfn] = "LFN registered with zero replicas in the FileCatalog" else: replicas[lfn] = lfnReplicas if noReplicas: for lfn, reason in noReplicas.items(): gLogger.warn( "RequestPreparation.__getFileReplicas: %s" % reason, lfn ) self.__reportProblematicFiles( noReplicas.keys(), 'LFN-LFC-NoReplicas' ) return S_OK( {'Replicas':replicas, 'ZeroReplicas':noReplicas, 'Failed':failed} ) def __reportProblematicFiles( self, lfns, reason ): return S_OK() res = self.dataIntegrityClient.setFileProblematic( lfns, reason, sourceComponent = 'RequestPreparationAgent' ) if not res['OK']: gLogger.error( "RequestPreparation.__reportProblematicFiles: Failed to report missing files.", res['Message'] ) return res if res['Value']['Successful']: gLogger.info( "RequestPreparation.__reportProblematicFiles: Successfully reported %s missing files." % len( res['Value']['Successful'] ) ) if res['Value']['Failed']: gLogger.info( "RequestPreparation.__reportProblematicFiles: Failed to report %s problematic files." % len( res['Value']['Failed'] ) ) return res
class RequestPreparationAgent(AgentModule): def initialize(self): self.fileCatalog = FileCatalog() self.dm = DataManager() self.stagerClient = StorageManagerClient() self.dataIntegrityClient = DataIntegrityClient() # This sets the Default Proxy to used as that defined under # /Operations/Shifter/DataManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption("shifterProxy", "DataManager") return S_OK() def execute(self): """This is the first logical task to be executed and manages the New->Waiting transition of the Replicas""" res = self.__getNewReplicas() if not res["OK"]: gLogger.fatal( "RequestPreparation.prepareNewReplicas: Failed to get replicas from StagerDB.", res["Message"] ) return res if not res["Value"]: gLogger.info("There were no New replicas found") return res replicas = res["Value"]["Replicas"] replicaIDs = res["Value"]["ReplicaIDs"] gLogger.info( "RequestPreparation.prepareNewReplicas: Obtained %s New replicas for preparation." % len(replicaIDs) ) # Check if the files exist in the FileCatalog res = self.__getExistingFiles(replicas) if not res["OK"]: return res exist = res["Value"]["Exist"] terminal = res["Value"]["Missing"] failed = res["Value"]["Failed"] if not exist: gLogger.error("RequestPreparation.prepareNewReplicas: Failed to determine the existence of any file") return S_OK() terminalReplicaIDs = {} for lfn, reason in terminal.items(): for replicaID in replicas[lfn].values(): terminalReplicaIDs[replicaID] = reason replicas.pop(lfn) gLogger.info("RequestPreparation.prepareNewReplicas: %s files exist in the FileCatalog." % len(exist)) if terminal: gLogger.info( "RequestPreparation.prepareNewReplicas: %s files do not exist in the FileCatalog." % len(terminal) ) # Obtain the file sizes from the FileCatalog res = self.__getFileSize(exist) if not res["OK"]: return res failed.update(res["Value"]["Failed"]) terminal = res["Value"]["ZeroSize"] fileSizes = res["Value"]["FileSizes"] if not fileSizes: gLogger.error("RequestPreparation.prepareNewReplicas: Failed determine sizes of any files") return S_OK() for lfn, reason in terminal.items(): for _se, replicaID in replicas[lfn].items(): terminalReplicaIDs[replicaID] = reason replicas.pop(lfn) gLogger.info( "RequestPreparation.prepareNewReplicas: Obtained %s file sizes from the FileCatalog." % len(fileSizes) ) if terminal: gLogger.info( "RequestPreparation.prepareNewReplicas: %s files registered with zero size in the FileCatalog." % len(terminal) ) # Obtain the replicas from the FileCatalog res = self.__getFileReplicas(list(fileSizes)) if not res["OK"]: return res failed.update(res["Value"]["Failed"]) terminal = res["Value"]["ZeroReplicas"] fileReplicas = res["Value"]["Replicas"] if not fileReplicas: gLogger.error("RequestPreparation.prepareNewReplicas: Failed determine replicas for any files") return S_OK() for lfn, reason in terminal.items(): for _se, replicaID in replicas[lfn].items(): terminalReplicaIDs[replicaID] = reason replicas.pop(lfn) gLogger.info( "RequestPreparation.prepareNewReplicas: Obtained replica information for %s file from the FileCatalog." % len(fileReplicas) ) if terminal: gLogger.info( "RequestPreparation.prepareNewReplicas: %s files registered with zero replicas in the FileCatalog." % len(terminal) ) # Check the replicas exist at the requested site replicaMetadata = [] for lfn, requestedSEs in replicas.items(): lfnReplicas = fileReplicas.get(lfn) # This should not happen in principle, but it was seen # after a corrupted staging request has entered the DB if not lfnReplicas: gLogger.error("Missing replicas information", "%s %s" % (lfn, requestedSEs)) continue for requestedSE, replicaID in requestedSEs.items(): if requestedSE not in lfnReplicas.keys(): terminalReplicaIDs[replicaID] = "LFN not registered at requested SE" replicas[lfn].pop(requestedSE) else: replicaMetadata.append((replicaID, lfnReplicas[requestedSE], fileSizes[lfn])) # Update the states of the files in the database if terminalReplicaIDs: gLogger.info( "RequestPreparation.prepareNewReplicas: %s replicas are terminally failed." % len(terminalReplicaIDs) ) # res = self.stagerClient.updateReplicaFailure( terminalReplicaIDs ) res = self.stagerClient.updateReplicaFailure(terminalReplicaIDs) if not res["OK"]: gLogger.error( "RequestPreparation.prepareNewReplicas: Failed to update replica failures.", res["Message"] ) if replicaMetadata: gLogger.info( "RequestPreparation.prepareNewReplicas: %s replica metadata to be updated." % len(replicaMetadata) ) # Sets the Status='Waiting' of CacheReplicas records that are OK with catalogue checks res = self.stagerClient.updateReplicaInformation(replicaMetadata) if not res["OK"]: gLogger.error( "RequestPreparation.prepareNewReplicas: Failed to update replica metadata.", res["Message"] ) return S_OK() def __getNewReplicas(self): """This obtains the New replicas from the Replicas table and for each LFN the requested storage element""" # First obtain the New replicas from the CacheReplicas table res = self.stagerClient.getCacheReplicas({"Status": "New"}) if not res["OK"]: gLogger.error( "RequestPreparation.__getNewReplicas: Failed to get replicas with New status.", res["Message"] ) return res if not res["Value"]: gLogger.debug("RequestPreparation.__getNewReplicas: No New replicas found to process.") return S_OK() else: gLogger.debug( "RequestPreparation.__getNewReplicas: Obtained %s New replicas(s) to process." % len(res["Value"]) ) replicas = {} replicaIDs = {} for replicaID, info in res["Value"].items(): lfn = info["LFN"] storageElement = info["SE"] replicas.setdefault(lfn, {})[storageElement] = replicaID replicaIDs[replicaID] = (lfn, storageElement) return S_OK({"Replicas": replicas, "ReplicaIDs": replicaIDs}) def __getExistingFiles(self, lfns): """This checks that the files exist in the FileCatalog.""" res = self.fileCatalog.exists(list(set(lfns))) if not res["OK"]: gLogger.error( "RequestPreparation.__getExistingFiles: Failed to determine whether files exist.", res["Message"] ) return res failed = res["Value"]["Failed"] success = res["Value"]["Successful"] exist = [lfn for lfn, exists in success.items() if exists] missing = list(set(success) - set(exist)) if missing: reason = "LFN not registered in the FC" gLogger.warn("RequestPreparation.__getExistingFiles: %s" % reason, "\n".join([""] + missing)) self.__reportProblematicFiles(missing, "LFN-LFC-DoesntExist") missing = dict.fromkeys(missing, reason) else: missing = {} return S_OK({"Exist": exist, "Missing": missing, "Failed": failed}) def __getFileSize(self, lfns): """This obtains the file size from the FileCatalog.""" fileSizes = {} zeroSize = {} res = self.fileCatalog.getFileSize(lfns) if not res["OK"]: gLogger.error("RequestPreparation.__getFileSize: Failed to get sizes for files.", res["Message"]) return res failed = res["Value"]["Failed"] for lfn, size in res["Value"]["Successful"].items(): if size == 0: zeroSize[lfn] = "LFN registered with zero size in the FileCatalog" else: fileSizes[lfn] = size if zeroSize: for lfn, reason in zeroSize.items(): gLogger.warn("RequestPreparation.__getFileSize: %s" % reason, lfn) self.__reportProblematicFiles(zeroSize.keys(), "LFN-LFC-ZeroSize") return S_OK({"FileSizes": fileSizes, "ZeroSize": zeroSize, "Failed": failed}) def __getFileReplicas(self, lfns): """This obtains the replicas from the FileCatalog.""" replicas = {} noReplicas = {} res = self.dm.getActiveReplicas(lfns) if not res["OK"]: gLogger.error("RequestPreparation.__getFileReplicas: Failed to obtain file replicas.", res["Message"]) return res failed = res["Value"]["Failed"] for lfn, lfnReplicas in res["Value"]["Successful"].items(): if len(lfnReplicas) == 0: noReplicas[lfn] = "LFN registered with zero replicas in the FileCatalog" else: replicas[lfn] = lfnReplicas if noReplicas: for lfn, reason in noReplicas.items(): gLogger.warn("RequestPreparation.__getFileReplicas: %s" % reason, lfn) self.__reportProblematicFiles(list(noReplicas), "LFN-LFC-NoReplicas") return S_OK({"Replicas": replicas, "ZeroReplicas": noReplicas, "Failed": failed}) def __reportProblematicFiles(self, lfns, reason): return S_OK() res = self.dataIntegrityClient.setFileProblematic(lfns, reason, sourceComponent="RequestPreparationAgent") if not res["OK"]: gLogger.error( "RequestPreparation.__reportProblematicFiles: Failed to report missing files.", res["Message"] ) return res if res["Value"]["Successful"]: gLogger.info( "RequestPreparation.__reportProblematicFiles: Successfully reported %s missing files." % len(res["Value"]["Successful"]) ) if res["Value"]["Failed"]: gLogger.info( "RequestPreparation.__reportProblematicFiles: Failed to report %s problematic files." % len(res["Value"]["Failed"]) ) return res
class ValidateOutputDataAgent(AgentModule): def __init__(self, *args, **kwargs): """ c'tor """ AgentModule.__init__(self, *args, **kwargs) self.consistencyInspector = ConsistencyInspector() self.integrityClient = DataIntegrityClient() self.fc = FileCatalog() self.transClient = TransformationClient() self.fileCatalogClient = FileCatalogClient() agentTSTypes = self.am_getOption('TransformationTypes', []) if agentTSTypes: self.transformationTypes = agentTSTypes else: self.transformationTypes = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge']) self.directoryLocations = sorted( self.am_getOption('DirectoryLocations', ['TransformationDB', 'MetadataCatalog'])) self.transfidmeta = self.am_getOption('TransfIDMeta', "TransformationID") self.enableFlag = True ############################################################################# def initialize(self): """ Sets defaults """ # This sets the Default Proxy to used as that defined under # /Operations/Shifter/DataManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption('shifterProxy', 'DataManager') gLogger.info("Will treat the following transformation types: %s" % str(self.transformationTypes)) gLogger.info( "Will search for directories in the following locations: %s" % str(self.directoryLocations)) gLogger.info("Will use %s as metadata tag name for TransformationID" % self.transfidmeta) return S_OK() ############################################################################# def execute(self): """ The VerifyOutputData execution method """ self.enableFlag = self.am_getOption('EnableFlag', 'True') if not self.enableFlag == 'True': self.log.info( "VerifyOutputData is disabled by configuration option 'EnableFlag'" ) return S_OK('Disabled via CS flag') gLogger.info("-" * 40) self.updateWaitingIntegrity() gLogger.info("-" * 40) res = self.transClient.getTransformations({ 'Status': 'ValidatingOutput', 'Type': self.transformationTypes }) if not res['OK']: gLogger.error("Failed to get ValidatingOutput transformations", res['Message']) return res transDicts = res['Value'] if not transDicts: gLogger.info("No transformations found in ValidatingOutput status") return S_OK() gLogger.info("Found %s transformations in ValidatingOutput status" % len(transDicts)) for transDict in transDicts: transID = transDict['TransformationID'] res = self.checkTransformationIntegrity(int(transID)) if not res['OK']: gLogger.error( "Failed to perform full integrity check for transformation %d" % transID) else: self.finalizeCheck(transID) gLogger.info("-" * 40) return S_OK() def updateWaitingIntegrity(self): """ Get 'WaitingIntegrity' transformations, update to 'ValidatedOutput' """ gLogger.info( "Looking for transformations in the WaitingIntegrity status to update" ) res = self.transClient.getTransformations( {'Status': 'WaitingIntegrity'}) if not res['OK']: gLogger.error("Failed to get WaitingIntegrity transformations", res['Message']) return res transDicts = res['Value'] if not transDicts: gLogger.info("No transformations found in WaitingIntegrity status") return S_OK() gLogger.info("Found %s transformations in WaitingIntegrity status" % len(transDicts)) for transDict in transDicts: transID = transDict['TransformationID'] gLogger.info("-" * 40) res = self.integrityClient.getTransformationProblematics( int(transID)) if not res['OK']: gLogger.error( "Failed to determine waiting problematics for transformation", res['Message']) elif not res['Value']: res = self.transClient.setTransformationParameter( transID, 'Status', 'ValidatedOutput') if not res['OK']: gLogger.error( "Failed to update status of transformation %s to ValidatedOutput" % (transID)) else: gLogger.info( "Updated status of transformation %s to ValidatedOutput" % (transID)) else: gLogger.info( "%d problematic files for transformation %s were found" % (len(res['Value']), transID)) return ############################################################################# # # Get the transformation directories for checking # def getTransformationDirectories(self, transID): """ Get the directories for the supplied transformation from the transformation system """ directories = [] if 'TransformationDB' in self.directoryLocations: res = self.transClient.getTransformationParameters( transID, ['OutputDirectories']) if not res['OK']: gLogger.error("Failed to obtain transformation directories", res['Message']) return res if not isinstance(res['Value'], list): transDirectories = ast.literal_eval(res['Value']) else: transDirectories = res['Value'] directories = self._addDirs(transID, transDirectories, directories) if 'MetadataCatalog' in self.directoryLocations: res = self.fileCatalogClient.findDirectoriesByMetadata( {self.transfidmeta: transID}) if not res['OK']: gLogger.error("Failed to obtain metadata catalog directories", res['Message']) return res transDirectories = res['Value'] directories = self._addDirs(transID, transDirectories, directories) if not directories: gLogger.info("No output directories found") directories = sorted(directories) return S_OK(directories) @staticmethod def _addDirs(transID, newDirs, existingDirs): for nDir in newDirs: transStr = str(transID).zfill(8) if re.search(transStr, nDir): if nDir not in existingDirs: existingDirs.append(nDir) return existingDirs ############################################################################# def checkTransformationIntegrity(self, transID): """ This method contains the real work """ gLogger.info("-" * 40) gLogger.info("Checking the integrity of transformation %s" % transID) gLogger.info("-" * 40) res = self.getTransformationDirectories(transID) if not res['OK']: return res directories = res['Value'] if not directories: return S_OK() ###################################################### # # This check performs Catalog->SE for possible output directories # res = self.fc.exists(directories) if not res['OK']: gLogger.error('Failed to check directory existence', res['Message']) return res for directory, error in res['Value']['Failed']: gLogger.error('Failed to determine existance of directory', '%s %s' % (directory, error)) if res['Value']['Failed']: return S_ERROR("Failed to determine the existance of directories") directoryExists = res['Value']['Successful'] for directory in sorted(directoryExists.keys()): if not directoryExists[directory]: continue iRes = self.consistencyInspector.catalogDirectoryToSE(directory) if not iRes['OK']: gLogger.error(iRes['Message']) return iRes gLogger.info("-" * 40) gLogger.info("Completed integrity check for transformation %s" % transID) return S_OK() def finalizeCheck(self, transID): """ Move to 'WaitingIntegrity' or 'ValidatedOutput' """ res = self.integrityClient.getTransformationProblematics(int(transID)) if not res['OK']: gLogger.error( "Failed to determine whether there were associated problematic files", res['Message']) newStatus = '' elif res['Value']: gLogger.info( "%d problematic files for transformation %s were found" % (len(res['Value']), transID)) newStatus = "WaitingIntegrity" else: gLogger.info("No problematics were found for transformation %s" % transID) newStatus = "ValidatedOutput" if newStatus: res = self.transClient.setTransformationParameter( transID, 'Status', newStatus) if not res['OK']: gLogger.error( "Failed to update status of transformation %s to %s" % (transID, newStatus)) else: gLogger.info("Updated status of transformation %s to %s" % (transID, newStatus)) gLogger.info("-" * 40) return S_OK()
class CatalogPlugInTestCase(unittest.TestCase): """ Base class for the CatalogPlugin test case """ def setUp(self): self.fullMetadata = ['Status', 'CheckSumType', 'OwnerRole', 'CreationDate', 'Checksum', 'ModificationDate', 'OwnerDN', 'Mode', 'GUID', 'Size'] self.dirMetadata = self.fullMetadata + ['NumberOfSubPaths'] self.fileMetadata = self.fullMetadata + ['NumberOfLinks'] self.catalog = FileCatalog(catalogs=[catalogClientToTest]) valid = self.catalog.isOK() self.assert_(valid) self.destDir = '/lhcb/test/unit-test/TestCatalogPlugin' self.link = "%s/link" % self.destDir # Clean the existing directory self.cleanDirectory() res = self.catalog.createDirectory(self.destDir) returnValue = self.parseResult(res,self.destDir) # Register some files to work with self.numberOfFiles = 2 self.files = [] for i in range(self.numberOfFiles): lfn = "%s/testFile_%d" % (self.destDir,i) res = self.registerFile(lfn) self.assert_(res) self.files.append(lfn) def registerFile(self,lfn): pfn = 'protocol://host:port/storage/path%s' % lfn size = 10000000 se = 'DIRAC-storage' guid = makeGuid() adler = stringAdler(guid) fileDict = {} fileDict[lfn] = {'PFN':pfn,'Size':size,'SE':se,'GUID':guid,'Checksum':adler} res = self.catalog.addFile(fileDict) return self.parseResult(res,lfn) def parseResult(self,res,path): self.assert_(res['OK']) self.assert_(res['Value']) self.assert_(res['Value']['Successful']) self.assert_(res['Value']['Successful'].has_key(path)) return res['Value']['Successful'][path] def parseError(self,res,path): self.assert_(res['OK']) self.assert_(res['Value']) self.assert_(res['Value']['Failed']) self.assert_(res['Value']['Failed'].has_key(path)) return res['Value']['Failed'][path] def cleanDirectory(self): res = self.catalog.exists(self.destDir) returnValue = self.parseResult(res,self.destDir) if not returnValue: return res = self.catalog.listDirectory(self.destDir) returnValue = self.parseResult(res,self.destDir) toRemove = returnValue['Files'].keys() if toRemove: self.purgeFiles(toRemove) res = self.catalog.removeDirectory(self.destDir) returnValue = self.parseResult(res,self.destDir) self.assert_(returnValue) def purgeFiles(self,lfns): for lfn in lfns: res = self.catalog.getReplicas(lfn,True) replicas = self.parseResult(res,lfn) for se,pfn in replicas.items(): repDict = {} repDict[lfn] = {'PFN':pfn,'SE':se} res = self.catalog.removeReplica(repDict) self.parseResult(res,lfn) res = self.catalog.removeFile(lfn) self.parseResult(res,lfn) def tearDown(self): self.cleanDirectory()
class ValidateOutputDataAgent( AgentModule ): def __init__( self, *args, **kwargs ): """ c'tor """ AgentModule.__init__( self, *args, **kwargs ) self.integrityClient = DataIntegrityClient() self.fc = FileCatalog() self.transClient = TransformationClient() self.fileCatalogClient = FileCatalogClient() agentTSTypes = self.am_getOption( 'TransformationTypes', [] ) if agentTSTypes: self.transformationTypes = agentTSTypes else: self.transformationTypes = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] ) self.directoryLocations = sorted( self.am_getOption( 'DirectoryLocations', ['TransformationDB', 'MetadataCatalog'] ) ) self.activeStorages = sorted( self.am_getOption( 'ActiveSEs', [] ) ) self.transfidmeta = self.am_getOption( 'TransfIDMeta', "TransformationID" ) self.enableFlag = True ############################################################################# def initialize( self ): """ Sets defaults """ # This sets the Default Proxy to used as that defined under # /Operations/Shifter/DataManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption( 'shifterProxy', 'DataManager' ) gLogger.info( "Will treat the following transformation types: %s" % str( self.transformationTypes ) ) gLogger.info( "Will search for directories in the following locations: %s" % str( self.directoryLocations ) ) gLogger.info( "Will check the following storage elements: %s" % str( self.activeStorages ) ) gLogger.info( "Will use %s as metadata tag name for TransformationID" % self.transfidmeta ) return S_OK() ############################################################################# def execute( self ): """ The VerifyOutputData execution method """ self.enableFlag = self.am_getOption( 'EnableFlag', 'True' ) if not self.enableFlag == 'True': self.log.info( "VerifyOutputData is disabled by configuration option 'EnableFlag'" ) return S_OK( 'Disabled via CS flag' ) gLogger.info( "-" * 40 ) self.updateWaitingIntegrity() gLogger.info( "-" * 40 ) res = self.transClient.getTransformations( {'Status':'ValidatingOutput', 'Type':self.transformationTypes} ) if not res['OK']: gLogger.error( "Failed to get ValidatingOutput transformations", res['Message'] ) return res transDicts = res['Value'] if not transDicts: gLogger.info( "No transformations found in ValidatingOutput status" ) return S_OK() gLogger.info( "Found %s transformations in ValidatingOutput status" % len( transDicts ) ) for transDict in transDicts: transID = transDict['TransformationID'] res = self.checkTransformationIntegrity( int( transID ) ) if not res['OK']: gLogger.error( "Failed to perform full integrity check for transformation %d" % transID ) else: self.finalizeCheck( transID ) gLogger.info( "-" * 40 ) return S_OK() def updateWaitingIntegrity( self ): """ Get 'WaitingIntegrity' transformations, update to 'ValidatedOutput' """ gLogger.info( "Looking for transformations in the WaitingIntegrity status to update" ) res = self.transClient.getTransformations( {'Status':'WaitingIntegrity'} ) if not res['OK']: gLogger.error( "Failed to get WaitingIntegrity transformations", res['Message'] ) return res transDicts = res['Value'] if not transDicts: gLogger.info( "No transformations found in WaitingIntegrity status" ) return S_OK() gLogger.info( "Found %s transformations in WaitingIntegrity status" % len( transDicts ) ) for transDict in transDicts: transID = transDict['TransformationID'] gLogger.info( "-" * 40 ) res = self.integrityClient.getTransformationProblematics( int( transID ) ) if not res['OK']: gLogger.error( "Failed to determine waiting problematics for transformation", res['Message'] ) elif not res['Value']: res = self.transClient.setTransformationParameter( transID, 'Status', 'ValidatedOutput' ) if not res['OK']: gLogger.error( "Failed to update status of transformation %s to ValidatedOutput" % ( transID ) ) else: gLogger.info( "Updated status of transformation %s to ValidatedOutput" % ( transID ) ) else: gLogger.info( "%d problematic files for transformation %s were found" % ( len( res['Value'] ), transID ) ) return ############################################################################# # # Get the transformation directories for checking # def getTransformationDirectories( self, transID ): """ Get the directories for the supplied transformation from the transformation system """ directories = [] if 'TransformationDB' in self.directoryLocations: res = self.transClient.getTransformationParameters( transID, ['OutputDirectories'] ) if not res['OK']: gLogger.error( "Failed to obtain transformation directories", res['Message'] ) return res transDirectories = res['Value'].splitlines() directories = self._addDirs( transID, transDirectories, directories ) if 'MetadataCatalog' in self.directoryLocations: res = self.fileCatalogClient.findDirectoriesByMetadata( {self.transfidmeta:transID} ) if not res['OK']: gLogger.error( "Failed to obtain metadata catalog directories", res['Message'] ) return res transDirectories = res['Value'] directories = self._addDirs( transID, transDirectories, directories ) if not directories: gLogger.info( "No output directories found" ) directories = sorted( directories ) return S_OK( directories ) @staticmethod def _addDirs( transID, newDirs, existingDirs ): for nDir in newDirs: transStr = str( transID ).zfill( 8 ) if re.search( transStr, nDir ): if not nDir in existingDirs: existingDirs.append( nDir ) return existingDirs ############################################################################# def checkTransformationIntegrity( self, transID ): """ This method contains the real work """ gLogger.info( "-" * 40 ) gLogger.info( "Checking the integrity of transformation %s" % transID ) gLogger.info( "-" * 40 ) res = self.getTransformationDirectories( transID ) if not res['OK']: return res directories = res['Value'] if not directories: return S_OK() ###################################################### # # This check performs Catalog->SE for possible output directories # res = self.fc.exists( directories ) if not res['OK']: gLogger.error( res['Message'] ) return res for directory, error in res['Value']['Failed']: gLogger.error( 'Failed to determine existance of directory', '%s %s' % ( directory, error ) ) if res['Value']['Failed']: return S_ERROR( "Failed to determine the existance of directories" ) directoryExists = res['Value']['Successful'] for directory in sorted( directoryExists.keys() ): if not directoryExists[directory]: continue iRes = self.integrityClient.catalogDirectoryToSE( directory ) if not iRes['OK']: gLogger.error( iRes['Message'] ) return iRes ###################################################### # # This check performs SE->Catalog for possible output directories # for storageElementName in sorted( self.activeStorages ): res = self.integrityClient.storageDirectoryToCatalog( directories, storageElementName ) if not res['OK']: gLogger.error( res['Message'] ) return res gLogger.info( "-" * 40 ) gLogger.info( "Completed integrity check for transformation %s" % transID ) return S_OK() def finalizeCheck( self, transID ): """ Move to 'WaitingIntegrity' or 'ValidatedOutput' """ res = self.integrityClient.getTransformationProblematics( int( transID ) ) if not res['OK']: gLogger.error( "Failed to determine whether there were associated problematic files", res['Message'] ) newStatus = '' elif res['Value']: gLogger.info( "%d problematic files for transformation %s were found" % ( len( res['Value'] ), transID ) ) newStatus = "WaitingIntegrity" else: gLogger.info( "No problematics were found for transformation %s" % transID ) newStatus = "ValidatedOutput" if newStatus: res = self.transClient.setTransformationParameter( transID, 'Status', newStatus ) if not res['OK']: gLogger.error( "Failed to update status of transformation %s to %s" % ( transID, newStatus ) ) else: gLogger.info( "Updated status of transformation %s to %s" % ( transID, newStatus ) ) gLogger.info( "-" * 40 ) return S_OK()
class DataIntegrityClient(Client): """ The following methods are supported in the service but are not mentioned explicitly here: getProblematic() Obtains a problematic file from the IntegrityDB based on the LastUpdate time getPrognosisProblematics(prognosis) Obtains all the problematics of a particular prognosis from the integrityDB getProblematicsSummary() Obtains a count of the number of problematics for each prognosis found getDistinctPrognosis() Obtains the distinct prognosis found in the integrityDB getTransformationProblematics(prodID) Obtains the problematics for a given production incrementProblematicRetry(fileID) Increments the retry count for the supplied file ID changeProblematicPrognosis(fileID,newPrognosis) Changes the prognosis of the supplied file to the new prognosis setProblematicStatus(fileID,status) Updates the status of a problematic in the integrityDB removeProblematic(self,fileID) This removes the specified file ID from the integrity DB insertProblematic(sourceComponent,fileMetadata) Inserts file with supplied metadata into the integrity DB """ def __init__(self, **kwargs): super(DataIntegrityClient, self).__init__(**kwargs) self.setServer('DataManagement/DataIntegrity') self.dm = DataManager() self.fc = FileCatalog() def setFileProblematic(self, lfn, reason, sourceComponent=''): """ This method updates the status of the file in the FileCatalog and the IntegrityDB lfn - the lfn of the file reason - this is given to the integrity DB and should reflect the problem observed with the file sourceComponent is the component issuing the request. """ if isinstance(lfn, list): lfns = lfn elif isinstance(lfn, basestring): lfns = [lfn] else: errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN." gLogger.error(errStr) return S_ERROR(errStr) gLogger.info( "DataIntegrityClient.setFileProblematic: Attempting to update %s files." % len(lfns)) fileMetadata = {} for lfn in lfns: fileMetadata[lfn] = { 'Prognosis': reason, 'LFN': lfn, 'PFN': '', 'SE': '' } res = self.insertProblematic(sourceComponent, fileMetadata) if not res['OK']: gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB" ) return res def reportProblematicReplicas(self, replicaTuple, se, reason): """ Simple wrapper function around setReplicaProblematic """ gLogger.info('The following %s files had %s at %s' % (len(replicaTuple), reason, se)) for lfn, _pfn, se, reason in sorted(replicaTuple): if lfn: gLogger.info(lfn) res = self.setReplicaProblematic(replicaTuple, sourceComponent='DataIntegrityClient') if not res['OK']: gLogger.info('Failed to update integrity DB with replicas', res['Message']) else: gLogger.info('Successfully updated integrity DB with replicas') def setReplicaProblematic(self, replicaTuple, sourceComponent=''): """ This method updates the status of the replica in the FileCatalog and the IntegrityDB The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis} lfn - the lfn of the file pfn - the pfn if available (otherwise '') se - the storage element of the problematic replica (otherwise '') prognosis - this is given to the integrity DB and should reflect the problem observed with the file sourceComponent is the component issuing the request. """ if isinstance(replicaTuple, tuple): replicaTuple = [replicaTuple] elif isinstance(replicaTuple, list): pass else: errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples." gLogger.error(errStr) return S_ERROR(errStr) gLogger.info( "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas." % len(replicaTuple)) replicaDict = {} for lfn, pfn, se, reason in replicaTuple: replicaDict[lfn] = { 'Prognosis': reason, 'LFN': lfn, 'PFN': pfn, 'SE': se } res = self.insertProblematic(sourceComponent, replicaDict) if not res['OK']: gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB" ) return res for lfn in replicaDict.keys(): replicaDict[lfn]['Status'] = 'Problematic' res = self.fc.setReplicaStatus(replicaDict) if not res['OK']: errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas." gLogger.error(errStr, res['Message']) return res failed = res['Value']['Failed'] successful = res['Value']['Successful'] resDict = {'Successful': successful, 'Failed': failed} return S_OK(resDict) ########################################################################## # # This section contains the resolution methods for various prognoses # def __updateCompletedFiles(self, prognosis, fileID): gLogger.info("%s file (%d) is resolved" % (prognosis, fileID)) return self.setProblematicStatus(fileID, 'Resolved') def __returnProblematicError(self, fileID, res): self.incrementProblematicRetry(fileID) gLogger.error('DataIntegrityClient failure', res['Message']) return res def __updateReplicaToChecked(self, problematicDict): lfn = problematicDict['LFN'] fileID = problematicDict['FileID'] prognosis = problematicDict['Prognosis'] problematicDict['Status'] = 'Checked' res = returnSingleResult( self.fc.setReplicaStatus({lfn: problematicDict})) if not res['OK']: return self.__returnProblematicError(fileID, res) gLogger.info("%s replica (%d) is updated to Checked status" % (prognosis, fileID)) return self.__updateCompletedFiles(prognosis, fileID) def resolveCatalogPFNSizeMismatch(self, problematicDict): """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis """ lfn = problematicDict['LFN'] se = problematicDict['SE'] fileID = problematicDict['FileID'] res = returnSingleResult(self.fc.getFileSize(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) catalogSize = res['Value'] res = returnSingleResult(StorageElement(se).getFileSize(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) storageSize = res['Value'] bkKCatalog = FileCatalog(['BookkeepingDB']) res = returnSingleResult(bkKCatalog.getFileSize(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) bookkeepingSize = res['Value'] if bookkeepingSize == catalogSize == storageSize: gLogger.info( "CatalogPFNSizeMismatch replica (%d) matched all registered sizes." % fileID) return self.__updateReplicaToChecked(problematicDict) if catalogSize == bookkeepingSize: gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also" % fileID) res = returnSingleResult(self.fc.getReplicas(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) if len(res['Value']) <= 1: gLogger.info( "CatalogPFNSizeMismatch replica (%d) has no other replicas." % fileID) return S_ERROR( "Not removing catalog file mismatch since the only replica" ) else: gLogger.info( "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..." % fileID) res = self.dm.removeReplica(se, lfn) if not res['OK']: return self.__returnProblematicError(fileID, res) return self.__updateCompletedFiles('CatalogPFNSizeMismatch', fileID) if (catalogSize != bookkeepingSize) and (bookkeepingSize == storageSize): gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size" % fileID) res = self.__updateReplicaToChecked(problematicDict) if not res['OK']: return self.__returnProblematicError(fileID, res) return self.changeProblematicPrognosis(fileID, 'BKCatalogSizeMismatch') gLogger.info( "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count" % fileID) return self.incrementProblematicRetry(fileID) #FIXME: Unused? def resolvePFNNotRegistered(self, problematicDict): """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNNotRegistered prognosis """ lfn = problematicDict['LFN'] seName = problematicDict['SE'] fileID = problematicDict['FileID'] se = StorageElement(seName) res = returnSingleResult(self.fc.exists(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) if not res['Value']: # The file does not exist in the catalog res = returnSingleResult(se.removeFile(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) return self.__updateCompletedFiles('PFNNotRegistered', fileID) res = returnSingleResult(se.getFileMetadata(lfn)) if (not res['OK']) and (re.search('File does not exist', res['Message'])): gLogger.info("PFNNotRegistered replica (%d) found to be missing." % fileID) return self.__updateCompletedFiles('PFNNotRegistered', fileID) elif not res['OK']: return self.__returnProblematicError(fileID, res) storageMetadata = res['Value'] if storageMetadata['Lost']: gLogger.info( "PFNNotRegistered replica (%d) found to be Lost. Updating prognosis" % fileID) return self.changeProblematicPrognosis(fileID, 'PFNLost') if storageMetadata['Unavailable']: gLogger.info( "PFNNotRegistered replica (%d) found to be Unavailable. Updating retry count" % fileID) return self.incrementProblematicRetry(fileID) # HACK until we can obtain the space token descriptions through GFAL site = seName.split('_')[0].split('-')[0] if not storageMetadata['Cached']: if lfn.endswith('.raw'): seName = '%s-RAW' % site else: seName = '%s-RDST' % site elif storageMetadata['Migrated']: if lfn.startswith('/lhcb/data'): seName = '%s_M-DST' % site else: seName = '%s_MC_M-DST' % site else: if lfn.startswith('/lhcb/data'): seName = '%s-DST' % site else: seName = '%s_MC-DST' % site problematicDict['SE'] = seName res = returnSingleResult(se.getURL(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) problematicDict['PFN'] = res['Value'] res = returnSingleResult(self.fc.addReplica({lfn: problematicDict})) if not res['OK']: return self.__returnProblematicError(fileID, res) res = returnSingleResult(self.fc.getFileMetadata(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) if res['Value']['Size'] != storageMetadata['Size']: gLogger.info( "PFNNotRegistered replica (%d) found with catalog size mismatch. Updating prognosis" % fileID) return self.changeProblematicPrognosis(fileID, 'CatalogPFNSizeMismatch') return self.__updateCompletedFiles('PFNNotRegistered', fileID) #FIXME: Unused? def resolveLFNCatalogMissing(self, problematicDict): """ This takes the problematic dictionary returned by the integrity DB and resolved the LFNCatalogMissing prognosis """ lfn = problematicDict['LFN'] fileID = problematicDict['FileID'] res = returnSingleResult(self.fc.exists(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) if res['Value']: return self.__updateCompletedFiles('LFNCatalogMissing', fileID) # Remove the file from all catalogs # RF_NOTE : here I can do it because it's a single file, but otherwise I would need to sort the path res = returnSingleResult(self.fc.removeFile(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) return self.__updateCompletedFiles('LFNCatalogMissing', fileID) #FIXME: Unused? def resolvePFNMissing(self, problematicDict): """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNMissing prognosis """ se = problematicDict['SE'] lfn = problematicDict['LFN'] fileID = problematicDict['FileID'] res = returnSingleResult(self.fc.exists(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) if not res['Value']: gLogger.info("PFNMissing file (%d) no longer exists in catalog" % fileID) return self.__updateCompletedFiles('PFNMissing', fileID) res = returnSingleResult(StorageElement(se).exists(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) if res['Value']: gLogger.info("PFNMissing replica (%d) is no longer missing" % fileID) return self.__updateReplicaToChecked(problematicDict) gLogger.info("PFNMissing replica (%d) does not exist" % fileID) res = returnSingleResult(self.fc.getReplicas(lfn, allStatus=True)) if not res['OK']: return self.__returnProblematicError(fileID, res) replicas = res['Value'] seSite = se.split('_')[0].split('-')[0] found = False print replicas for replicaSE in replicas.keys(): if re.search(seSite, replicaSE): found = True problematicDict['SE'] = replicaSE se = replicaSE if not found: gLogger.info( "PFNMissing replica (%d) is no longer registered at SE. Resolved." % fileID) return self.__updateCompletedFiles('PFNMissing', fileID) gLogger.info( "PFNMissing replica (%d) does not exist. Removing from catalog..." % fileID) res = returnSingleResult(self.fc.removeReplica({lfn: problematicDict})) if not res['OK']: return self.__returnProblematicError(fileID, res) if len(replicas) == 1: gLogger.info( "PFNMissing replica (%d) had a single replica. Updating prognosis" % fileID) return self.changeProblematicPrognosis(fileID, 'LFNZeroReplicas') res = self.dm.replicateAndRegister(problematicDict['LFN'], se) if not res['OK']: return self.__returnProblematicError(fileID, res) # If we get here the problem is solved so we can update the integrityDB return self.__updateCompletedFiles('PFNMissing', fileID) #FIXME: Unused? def resolvePFNUnavailable(self, problematicDict): """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNUnavailable prognosis """ lfn = problematicDict['LFN'] se = problematicDict['SE'] fileID = problematicDict['FileID'] res = returnSingleResult(StorageElement(se).getFileMetadata(lfn)) if (not res['OK']) and (re.search('File does not exist', res['Message'])): # The file is no longer Unavailable but has now dissapeared completely gLogger.info( "PFNUnavailable replica (%d) found to be missing. Updating prognosis" % fileID) return self.changeProblematicPrognosis(fileID, 'PFNMissing') if (not res['OK']) or res['Value']['Unavailable']: gLogger.info( "PFNUnavailable replica (%d) found to still be Unavailable" % fileID) return self.incrementProblematicRetry(fileID) if res['Value']['Lost']: gLogger.info( "PFNUnavailable replica (%d) is now found to be Lost. Updating prognosis" % fileID) return self.changeProblematicPrognosis(fileID, 'PFNLost') gLogger.info("PFNUnavailable replica (%d) is no longer Unavailable" % fileID) # Need to make the replica okay in the Catalog return self.__updateReplicaToChecked(problematicDict) #FIXME: Unused? def resolvePFNZeroSize(self, problematicDict): """ This takes the problematic dictionary returned by the integrity DB and resolves the PFNZeroSize prognosis """ lfn = problematicDict['LFN'] seName = problematicDict['SE'] fileID = problematicDict['FileID'] se = StorageElement(seName) res = returnSingleResult(se.getFileSize(lfn)) if (not res['OK']) and (re.search('File does not exist', res['Message'])): gLogger.info( "PFNZeroSize replica (%d) found to be missing. Updating prognosis" % problematicDict['FileID']) return self.changeProblematicPrognosis(fileID, 'PFNMissing') storageSize = res['Value'] if storageSize == 0: res = returnSingleResult(se.removeFile(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) gLogger.info( "PFNZeroSize replica (%d) removed. Updating prognosis" % problematicDict['FileID']) return self.changeProblematicPrognosis(fileID, 'PFNMissing') res = returnSingleResult(self.fc.getReplicas(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) if seName not in res['Value']: gLogger.info( "PFNZeroSize replica (%d) not registered in catalog. Updating prognosis" % problematicDict['FileID']) return self.changeProblematicPrognosis(fileID, 'PFNNotRegistered') res = returnSingleResult(self.fc.getFileMetadata(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) catalogSize = res['Value']['Size'] if catalogSize != storageSize: gLogger.info( "PFNZeroSize replica (%d) size found to differ from registered metadata. Updating prognosis" % problematicDict['FileID']) return self.changeProblematicPrognosis(fileID, 'CatalogPFNSizeMismatch') return self.__updateCompletedFiles('PFNZeroSize', fileID) ############################################################################################ #FIXME: Unused? def resolveLFNZeroReplicas(self, problematicDict): """ This takes the problematic dictionary returned by the integrity DB and resolves the LFNZeroReplicas prognosis """ lfn = problematicDict['LFN'] fileID = problematicDict['FileID'] res = returnSingleResult(self.fc.getReplicas(lfn, allStatus=True)) if res['OK'] and res['Value']: gLogger.info("LFNZeroReplicas file (%d) found to have replicas" % fileID) else: gLogger.info( "LFNZeroReplicas file (%d) does not have replicas. Checking storage..." % fileID) pfnsFound = False for storageElementName in sorted( gConfig.getValue( 'Resources/StorageElementGroups/Tier1_MC_M-DST', [])): res = self.__getStoragePathExists([lfn], storageElementName) if lfn in res['Value']: gLogger.info( "LFNZeroReplicas file (%d) found storage file at %s" % (fileID, storageElementName)) self.reportProblematicReplicas( [(lfn, 'deprecatedUrl', storageElementName, 'PFNNotRegistered')], storageElementName, 'PFNNotRegistered') pfnsFound = True if not pfnsFound: gLogger.info( "LFNZeroReplicas file (%d) did not have storage files. Removing..." % fileID) res = returnSingleResult(self.fc.removeFile(lfn)) if not res['OK']: gLogger.error('DataIntegrityClient: failed to remove file', res['Message']) # Increment the number of retries for this file self.server.incrementProblematicRetry(fileID) return res gLogger.info("LFNZeroReplicas file (%d) removed from catalog" % fileID) # If we get here the problem is solved so we can update the integrityDB return self.__updateCompletedFiles('LFNZeroReplicas', fileID) def _reportProblematicFiles(self, lfns, reason): """ Simple wrapper function around setFileProblematic """ gLogger.info('The following %s files were found with %s' % (len(lfns), reason)) for lfn in sorted(lfns): gLogger.info(lfn) res = self.setFileProblematic(lfns, reason, sourceComponent='DataIntegrityClient') if not res['OK']: gLogger.info('Failed to update integrity DB with files', res['Message']) else: gLogger.info('Successfully updated integrity DB with files')
class ValidateOutputDataAgent(DIRACValidateOutputDataAgent): """ Simple extension of base class """ def __init__(self, *args, **kwargs): """ c'tor """ DIRACValidateOutputDataAgent.__init__(self, *args, **kwargs) self.integrityClient = None self.fileCatalog = None self.transClient = None self.storageUsageClient = None def initialize(self): """ standard initialize method for DIRAC agents """ res = DIRACValidateOutputDataAgent.initialize(self) if not res['OK']: return res self.integrityClient = DataIntegrityClient() self.fileCatalog = FileCatalog() self.transClient = TransformationClient() self.storageUsageClient = StorageUsageClient() return S_OK() def checkTransformationIntegrity(self, prodID): """ This method contains the real work """ gLogger.info("-" * 40) gLogger.info("Checking the integrity of production %s" % prodID) gLogger.info("-" * 40) res = self.getTransformationDirectories(prodID) if not res['OK']: return res directories = res['Value'] ###################################################### # # This check performs BK->Catalog->SE # res = self.integrityClient.productionToCatalog(prodID) if not res['OK']: gLogger.error(res['Message']) return res bk2catalogMetadata = res['Value']['CatalogMetadata'] bk2catalogReplicas = res['Value']['CatalogReplicas'] res = self.integrityClient.checkPhysicalFiles(bk2catalogReplicas, bk2catalogMetadata) if not res['OK']: gLogger.error(res['Message']) return res if not directories: return S_OK() ###################################################### # # This check performs Catalog->BK and Catalog->SE for possible output directories # res = self.fileCatalog.exists(directories) if not res['OK']: gLogger.error(res['Message']) return res for directory, error in res['Value']['Failed']: gLogger.error('Failed to determine existance of directory', '%s %s' % (directory, error)) if res['Value']['Failed']: return S_ERROR("Failed to determine the existance of directories") directoryExists = res['Value']['Successful'] for directory in sorted(directoryExists.keys()): if not directoryExists[directory]: continue iRes = self.integrityClient.catalogDirectoryToBK(directory) if not iRes['OK']: gLogger.error(iRes['Message']) return iRes catalogDirMetadata = iRes['Value']['CatalogMetadata'] catalogDirReplicas = iRes['Value']['CatalogReplicas'] catalogMetadata = {} catalogReplicas = {} for lfn in catalogDirMetadata.keys(): if lfn not in bk2catalogMetadata.keys(): catalogMetadata[lfn] = catalogDirMetadata[lfn] if lfn in catalogDirReplicas: catalogReplicas[lfn] = catalogDirReplicas[lfn] if not catalogMetadata: continue res = self.integrityClient.checkPhysicalFiles( catalogReplicas, catalogMetadata) if not res['OK']: gLogger.error(res['Message']) return res return S_OK() def getTransformationDirectories(self, transID): """ get the directories for the supplied transformation from the transformation system :param self: self reference :param int transID: transformation ID """ res = DIRACValidateOutputDataAgent.getTransformationDirectories( self, transID) if res['OK']: directories = res['Value'] else: return res if 'StorageUsage' in self.directoryLocations: res = self.storageUsageClient.getStorageDirectories( '', '', transID, []) if not res['OK']: self.log.error("Failed to obtain storage usage directories", res['Message']) return res transDirectories = res['Value'] directories = self._addDirs(transID, transDirectories, directories) if not directories: self.log.info("No output directories found") directories = sorted(directories) return S_OK(directories)
class DataIntegrityClient( Client ): """ The following methods are supported in the service but are not mentioned explicitly here: getProblematic() Obtains a problematic file from the IntegrityDB based on the LastUpdate time getPrognosisProblematics(prognosis) Obtains all the problematics of a particular prognosis from the integrityDB getProblematicsSummary() Obtains a count of the number of problematics for each prognosis found getDistinctPrognosis() Obtains the distinct prognosis found in the integrityDB getTransformationProblematics(prodID) Obtains the problematics for a given production incrementProblematicRetry(fileID) Increments the retry count for the supplied file ID changeProblematicPrognosis(fileID,newPrognosis) Changes the prognosis of the supplied file to the new prognosis setProblematicStatus(fileID,status) Updates the status of a problematic in the integrityDB removeProblematic(self,fileID) This removes the specified file ID from the integrity DB insertProblematic(sourceComponent,fileMetadata) Inserts file with supplied metadata into the integrity DB """ def __init__( self, **kwargs ): Client.__init__( self, **kwargs ) self.setServer( 'DataManagement/DataIntegrity' ) self.dm = DataManager() self.fc = FileCatalog() ########################################################################## # # This section contains the specific methods for LFC->SE checks # def catalogDirectoryToSE( self, lfnDir ): """ This obtains the replica and metadata information from the catalog for the supplied directory and checks against the storage elements. """ gLogger.info( "-" * 40 ) gLogger.info( "Performing the LFC->SE check" ) gLogger.info( "-" * 40 ) if type( lfnDir ) in types.StringTypes: lfnDir = [lfnDir] res = self.__getCatalogDirectoryContents( lfnDir ) if not res['OK']: return res replicas = res['Value']['Replicas'] catalogMetadata = res['Value']['Metadata'] res = self.__checkPhysicalFiles( replicas, catalogMetadata ) if not res['OK']: return res resDict = {'CatalogMetadata':catalogMetadata, 'CatalogReplicas':replicas} return S_OK( resDict ) def catalogFileToSE( self, lfns ): """ This obtains the replica and metadata information from the catalog and checks against the storage elements. """ gLogger.info( "-" * 40 ) gLogger.info( "Performing the LFC->SE check" ) gLogger.info( "-" * 40 ) if type( lfns ) in types.StringTypes: lfns = [lfns] res = self.__getCatalogMetadata( lfns ) if not res['OK']: return res catalogMetadata = res['Value'] res = self.__getCatalogReplicas( catalogMetadata.keys() ) if not res['OK']: return res replicas = res['Value'] res = self.__checkPhysicalFiles( replicas, catalogMetadata ) if not res['OK']: return res resDict = {'CatalogMetadata':catalogMetadata, 'CatalogReplicas':replicas} return S_OK( resDict ) def checkPhysicalFiles( self, replicas, catalogMetadata, ses = [] ): """ This obtains takes the supplied replica and metadata information obtained from the catalog and checks against the storage elements. """ gLogger.info( "-" * 40 ) gLogger.info( "Performing the LFC->SE check" ) gLogger.info( "-" * 40 ) return self.__checkPhysicalFiles( replicas, catalogMetadata, ses = ses ) def __checkPhysicalFiles( self, replicas, catalogMetadata, ses = [] ): """ This obtains the physical file metadata and checks the metadata against the catalog entries """ sePfns = {} pfnLfns = {} for lfn, replicaDict in replicas.items(): for se, pfn in replicaDict.items(): if ( ses ) and ( se not in ses ): continue if not sePfns.has_key( se ): sePfns[se] = [] sePfns[se].append( pfn ) pfnLfns[pfn] = lfn gLogger.info( '%s %s' % ( 'Storage Element'.ljust( 20 ), 'Replicas'.rjust( 20 ) ) ) for site in sortList( sePfns.keys() ): files = len( sePfns[site] ) gLogger.info( '%s %s' % ( site.ljust( 20 ), str( files ).rjust( 20 ) ) ) for se in sortList( sePfns.keys() ): pfns = sePfns[se] pfnDict = {} for pfn in pfns: pfnDict[pfn] = pfnLfns[pfn] sizeMismatch = [] res = self.__checkPhysicalFileMetadata( pfnDict, se ) if not res['OK']: gLogger.error( 'Failed to get physical file metadata.', res['Message'] ) return res for pfn, metadata in res['Value'].items(): if catalogMetadata.has_key( pfnLfns[pfn] ): if ( metadata['Size'] != catalogMetadata[pfnLfns[pfn]]['Size'] ) and ( metadata['Size'] != 0 ): sizeMismatch.append( ( pfnLfns[pfn], pfn, se, 'CatalogPFNSizeMismatch' ) ) if sizeMismatch: self.__reportProblematicReplicas( sizeMismatch, se, 'CatalogPFNSizeMismatch' ) return S_OK() def __checkPhysicalFileMetadata( self, pfnLfns, se ): """ Check obtain the physical file metadata and check the files are available """ gLogger.info( 'Checking the integrity of %s physical files at %s' % ( len( pfnLfns ), se ) ) res = StorageElement( se ).getFileMetadata( pfnLfns.keys() ) if not res['OK']: gLogger.error( 'Failed to get metadata for pfns.', res['Message'] ) return res pfnMetadataDict = res['Value']['Successful'] # If the replicas are completely missing missingReplicas = [] for pfn, reason in res['Value']['Failed'].items(): if re.search( 'File does not exist', reason ): missingReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNMissing' ) ) if missingReplicas: self.__reportProblematicReplicas( missingReplicas, se, 'PFNMissing' ) lostReplicas = [] unavailableReplicas = [] zeroSizeReplicas = [] # If the files are not accessible for pfn, pfnMetadata in pfnMetadataDict.items(): if pfnMetadata['Lost']: lostReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNLost' ) ) if pfnMetadata['Unavailable']: unavailableReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNUnavailable' ) ) if pfnMetadata['Size'] == 0: zeroSizeReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNZeroSize' ) ) if lostReplicas: self.__reportProblematicReplicas( lostReplicas, se, 'PFNLost' ) if unavailableReplicas: self.__reportProblematicReplicas( unavailableReplicas, se, 'PFNUnavailable' ) if zeroSizeReplicas: self.__reportProblematicReplicas( zeroSizeReplicas, se, 'PFNZeroSize' ) gLogger.info( 'Checking the integrity of physical files at %s complete' % se ) return S_OK( pfnMetadataDict ) ########################################################################## # # This section contains the specific methods for SE->LFC checks # def storageDirectoryToCatalog( self, lfnDir, storageElement ): """ This obtains the file found on the storage element in the supplied directories and determines whether they exist in the catalog and checks their metadata elements """ gLogger.info( "-" * 40 ) gLogger.info( "Performing the SE->LFC check at %s" % storageElement ) gLogger.info( "-" * 40 ) if type( lfnDir ) in types.StringTypes: lfnDir = [lfnDir] res = self.__getStorageDirectoryContents( lfnDir, storageElement ) if not res['OK']: return res storageFileMetadata = res['Value'] if storageFileMetadata: return self.__checkCatalogForSEFiles( storageFileMetadata, storageElement ) return S_OK( {'CatalogMetadata':{}, 'StorageMetadata':{}} ) def __checkCatalogForSEFiles( self, storageMetadata, storageElement ): gLogger.info( 'Checking %s storage files exist in the catalog' % len( storageMetadata ) ) # RF_NOTE : this comment is completely wrong # First get all the PFNs as they should be registered in the catalog res = StorageElement( storageElement ).getPfnForProtocol( storageMetadata.keys(), withPort = False ) if not res['OK']: gLogger.error( "Failed to get registered PFNs for physical files", res['Message'] ) return res for pfn, error in res['Value']['Failed'].items(): gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) ) if res['Value']['Failed']: return S_ERROR( 'Failed to obtain registered PFNs from physical file' ) for original, registered in res['Value']['Successful'].items(): storageMetadata[registered] = storageMetadata.pop( original ) # Determine whether these PFNs are registered and if so obtain the LFN res = self.fc.getLFNForPFN( storageMetadata.keys() ) if not res['OK']: gLogger.error( "Failed to get registered LFNs for PFNs", res['Message'] ) return res failedPfns = res['Value']['Failed'] notRegisteredPfns = [] for pfn, error in failedPfns.items(): if re.search( 'No such file or directory', error ): notRegisteredPfns.append( ( storageMetadata[pfn]['LFN'], pfn, storageElement, 'PFNNotRegistered' ) ) failedPfns.pop( pfn ) if notRegisteredPfns: self.__reportProblematicReplicas( notRegisteredPfns, storageElement, 'PFNNotRegistered' ) if failedPfns: return S_ERROR( 'Failed to obtain LFNs for PFNs' ) pfnLfns = res['Value']['Successful'] for pfn in storageMetadata.keys(): pfnMetadata = storageMetadata.pop( pfn ) if pfn in pfnLfns.keys(): lfn = pfnLfns[pfn] storageMetadata[lfn] = pfnMetadata storageMetadata[lfn]['PFN'] = pfn # For the LFNs found to be registered obtain the file metadata from the catalog and verify against the storage metadata res = self.__getCatalogMetadata( storageMetadata.keys() ) if not res['OK']: return res catalogMetadata = res['Value'] sizeMismatch = [] for lfn, lfnCatalogMetadata in catalogMetadata.items(): lfnStorageMetadata = storageMetadata[lfn] if ( lfnStorageMetadata['Size'] != lfnCatalogMetadata['Size'] ) and ( lfnStorageMetadata['Size'] != 0 ): sizeMismatch.append( ( lfn, storageMetadata[lfn]['PFN'], storageElement, 'CatalogPFNSizeMismatch' ) ) if sizeMismatch: self.__reportProblematicReplicas( sizeMismatch, storageElement, 'CatalogPFNSizeMismatch' ) gLogger.info( 'Checking storage files exist in the catalog complete' ) resDict = {'CatalogMetadata':catalogMetadata, 'StorageMetadata':storageMetadata} return S_OK( resDict ) def getStorageDirectoryContents( self, lfnDir, storageElement ): """ This obtains takes the supplied lfn directories and recursively obtains the files in the supplied storage element """ return self.__getStorageDirectoryContents( lfnDir, storageElement ) def __getStorageDirectoryContents( self, lfnDir, storageElement ): """ Obtians the contents of the supplied directory on the storage """ gLogger.info( 'Obtaining the contents for %s directories at %s' % ( len( lfnDir ), storageElement ) ) se = StorageElement( storageElement ) res = se.getPfnForLfn( lfnDir ) if not res['OK']: gLogger.error( "Failed to get PFNs for directories", res['Message'] ) return res for directory, error in res['Value']['Failed'].items(): gLogger.error( 'Failed to obtain directory PFN from LFNs', '%s %s' % ( directory, error ) ) if res['Value']['Failed']: return S_ERROR( 'Failed to obtain directory PFN from LFNs' ) storageDirectories = res['Value']['Successful'].values() res = se.exists( storageDirectories ) if not res['OK']: gLogger.error( "Failed to obtain existance of directories", res['Message'] ) return res for directory, error in res['Value']['Failed'].items(): gLogger.error( 'Failed to determine existance of directory', '%s %s' % ( directory, error ) ) if res['Value']['Failed']: return S_ERROR( 'Failed to determine existance of directory' ) directoryExists = res['Value']['Successful'] activeDirs = [] for directory in sortList( directoryExists.keys() ): exists = directoryExists[directory] if exists: activeDirs.append( directory ) allFiles = {} while len( activeDirs ) > 0: currentDir = activeDirs[0] res = se.listDirectory( currentDir ) activeDirs.remove( currentDir ) if not res['OK']: gLogger.error( 'Failed to get directory contents', res['Message'] ) return res elif res['Value']['Failed'].has_key( currentDir ): gLogger.error( 'Failed to get directory contents', '%s %s' % ( currentDir, res['Value']['Failed'][currentDir] ) ) return S_ERROR( res['Value']['Failed'][currentDir] ) else: dirContents = res['Value']['Successful'][currentDir] activeDirs.extend( dirContents['SubDirs'] ) fileMetadata = dirContents['Files'] # RF_NOTE This ugly trick is needed because se.getPfnPath does not follow the Successful/Failed convention # res = { "Successful" : {}, "Failed" : {} } # for pfn in fileMetadata: # inRes = se.getPfnPath( pfn ) # if inRes["OK"]: # res["Successful"][pfn] = inRes["Value"] # else: # res["Failed"][pfn] = inRes["Message"] res = se.getLfnForPfn( fileMetadata.keys() ) if not res['OK']: gLogger.error( 'Failed to get directory content LFNs', res['Message'] ) return res for pfn, error in res['Value']['Failed'].items(): gLogger.error( "Failed to get LFN for PFN", "%s %s" % ( pfn, error ) ) if res['Value']['Failed']: return S_ERROR( "Failed to get LFNs for PFNs" ) pfnLfns = res['Value']['Successful'] for pfn, lfn in pfnLfns.items(): fileMetadata[pfn]['LFN'] = lfn allFiles.update( fileMetadata ) zeroSizeFiles = [] lostFiles = [] unavailableFiles = [] for pfn in sortList( allFiles.keys() ): if os.path.basename( pfn ) == 'dirac_directory': allFiles.pop( pfn ) else: metadata = allFiles[pfn] if metadata['Size'] == 0: zeroSizeFiles.append( ( metadata['LFN'], pfn, storageElement, 'PFNZeroSize' ) ) # if metadata['Lost']: # lostFiles.append((metadata['LFN'],pfn,storageElement,'PFNLost')) # if metadata['Unavailable']: # unavailableFiles.append((metadata['LFN'],pfn,storageElement,'PFNUnavailable')) if zeroSizeFiles: self.__reportProblematicReplicas( zeroSizeFiles, storageElement, 'PFNZeroSize' ) if lostFiles: self.__reportProblematicReplicas( lostFiles, storageElement, 'PFNLost' ) if unavailableFiles: self.__reportProblematicReplicas( unavailableFiles, storageElement, 'PFNUnavailable' ) gLogger.info( 'Obtained at total of %s files for directories at %s' % ( len( allFiles ), storageElement ) ) return S_OK( allFiles ) def __getStoragePathExists( self, lfnPaths, storageElement ): gLogger.info( 'Determining the existance of %d files at %s' % ( len( lfnPaths ), storageElement ) ) se = StorageElement( storageElement ) res = se.getPfnForLfn( lfnPaths ) if not res['OK']: gLogger.error( "Failed to get PFNs for LFNs", res['Message'] ) return res for lfnPath, error in res['Value']['Failed'].items(): gLogger.error( 'Failed to obtain PFN from LFN', '%s %s' % ( lfnPath, error ) ) if res['Value']['Failed']: return S_ERROR( 'Failed to obtain PFNs from LFNs' ) lfnPfns = res['Value']['Successful'] pfnLfns = {} for lfn, pfn in lfnPfns.items(): pfnLfns[pfn] = lfn res = se.exists( pfnLfns ) if not res['OK']: gLogger.error( "Failed to obtain existance of paths", res['Message'] ) return res for lfnPath, error in res['Value']['Failed'].items(): gLogger.error( 'Failed to determine existance of path', '%s %s' % ( lfnPath, error ) ) if res['Value']['Failed']: return S_ERROR( 'Failed to determine existance of paths' ) pathExists = res['Value']['Successful'] resDict = {} for pfn, exists in pathExists.items(): if exists: resDict[pfnLfns[pfn]] = pfn return S_OK( resDict ) ########################################################################## # # This section contains the specific methods for obtaining replica and metadata information from the catalog # def __getCatalogDirectoryContents( self, lfnDir ): """ Obtain the contents of the supplied directory """ gLogger.info( 'Obtaining the catalog contents for %s directories' % len( lfnDir ) ) activeDirs = lfnDir allFiles = {} while len( activeDirs ) > 0: currentDir = activeDirs[0] res = self.fc.listDirectory( currentDir ) activeDirs.remove( currentDir ) if not res['OK']: gLogger.error( 'Failed to get directory contents', res['Message'] ) return res elif res['Value']['Failed'].has_key( currentDir ): gLogger.error( 'Failed to get directory contents', '%s %s' % ( currentDir, res['Value']['Failed'][currentDir] ) ) else: dirContents = res['Value']['Successful'][currentDir] activeDirs.extend( dirContents['SubDirs'] ) allFiles.update( dirContents['Files'] ) zeroReplicaFiles = [] zeroSizeFiles = [] allReplicaDict = {} allMetadataDict = {} for lfn, lfnDict in allFiles.items(): lfnReplicas = {} for se, replicaDict in lfnDict['Replicas'].items(): lfnReplicas[se] = replicaDict['PFN'] if not lfnReplicas: zeroReplicaFiles.append( lfn ) allReplicaDict[lfn] = lfnReplicas allMetadataDict[lfn] = lfnDict['MetaData'] if lfnDict['MetaData']['Size'] == 0: zeroSizeFiles.append( lfn ) if zeroReplicaFiles: self.__reportProblematicFiles( zeroReplicaFiles, 'LFNZeroReplicas' ) if zeroSizeFiles: self.__reportProblematicFiles( zeroSizeFiles, 'LFNZeroSize' ) gLogger.info( 'Obtained at total of %s files for the supplied directories' % len( allMetadataDict ) ) resDict = {'Metadata':allMetadataDict, 'Replicas':allReplicaDict} return S_OK( resDict ) def __getCatalogReplicas( self, lfns ): """ Obtain the file replicas from the catalog while checking that there are replicas """ gLogger.info( 'Obtaining the replicas for %s files' % len( lfns ) ) zeroReplicaFiles = [] res = self.fc.getReplicas( lfns, allStatus = True ) if not res['OK']: gLogger.error( 'Failed to get catalog replicas', res['Message'] ) return res allReplicas = res['Value']['Successful'] for lfn, error in res['Value']['Failed'].items(): if re.search( 'File has zero replicas', error ): zeroReplicaFiles.append( lfn ) if zeroReplicaFiles: self.__reportProblematicFiles( zeroReplicaFiles, 'LFNZeroReplicas' ) gLogger.info( 'Obtaining the replicas for files complete' ) return S_OK( allReplicas ) def __getCatalogMetadata( self, lfns ): """ Obtain the file metadata from the catalog while checking they exist """ if not lfns: return S_OK( {} ) gLogger.info( 'Obtaining the catalog metadata for %s files' % len( lfns ) ) missingCatalogFiles = [] zeroSizeFiles = [] res = self.fc.getFileMetadata( lfns ) if not res['OK']: gLogger.error( 'Failed to get catalog metadata', res['Message'] ) return res allMetadata = res['Value']['Successful'] for lfn, error in res['Value']['Failed'].items(): if re.search( 'No such file or directory', error ): missingCatalogFiles.append( lfn ) if missingCatalogFiles: self.__reportProblematicFiles( missingCatalogFiles, 'LFNCatalogMissing' ) for lfn, metadata in allMetadata.items(): if metadata['Size'] == 0: zeroSizeFiles.append( lfn ) if zeroSizeFiles: self.__reportProblematicFiles( zeroSizeFiles, 'LFNZeroSize' ) gLogger.info( 'Obtaining the catalog metadata complete' ) return S_OK( allMetadata ) ########################################################################## # # This section contains the methods for inserting problematic files into the integrity DB # def __reportProblematicFiles( self, lfns, reason ): """ Simple wrapper function around setFileProblematic """ gLogger.info( 'The following %s files were found with %s' % ( len( lfns ), reason ) ) for lfn in sortList( lfns ): gLogger.info( lfn ) res = self.setFileProblematic( lfns, reason, sourceComponent = 'DataIntegrityClient' ) if not res['OK']: gLogger.info( 'Failed to update integrity DB with files', res['Message'] ) else: gLogger.info( 'Successfully updated integrity DB with files' ) def setFileProblematic( self, lfn, reason, sourceComponent = '' ): """ This method updates the status of the file in the FileCatalog and the IntegrityDB lfn - the lfn of the file reason - this is given to the integrity DB and should reflect the problem observed with the file sourceComponent is the component issuing the request. """ if type( lfn ) == types.ListType: lfns = lfn elif type( lfn ) == types.StringType: lfns = [lfn] else: errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN." gLogger.error( errStr ) return S_ERROR( errStr ) gLogger.info( "DataIntegrityClient.setFileProblematic: Attempting to update %s files." % len( lfns ) ) fileMetadata = {} for lfn in lfns: fileMetadata[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':'', 'SE':''} res = self.insertProblematic( sourceComponent, fileMetadata ) if not res['OK']: gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB" ) return res def __reportProblematicReplicas( self, replicaTuple, se, reason ): """ Simple wrapper function around setReplicaProblematic """ gLogger.info( 'The following %s files had %s at %s' % ( len( replicaTuple ), reason, se ) ) for lfn, pfn, se, reason in sortList( replicaTuple ): if lfn: gLogger.info( lfn ) else: gLogger.info( pfn ) res = self.setReplicaProblematic( replicaTuple, sourceComponent = 'DataIntegrityClient' ) if not res['OK']: gLogger.info( 'Failed to update integrity DB with replicas', res['Message'] ) else: gLogger.info( 'Successfully updated integrity DB with replicas' ) def setReplicaProblematic( self, replicaTuple, sourceComponent = '' ): """ This method updates the status of the replica in the FileCatalog and the IntegrityDB The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis} lfn - the lfn of the file pfn - the pfn if available (otherwise '') se - the storage element of the problematic replica (otherwise '') prognosis - this is given to the integrity DB and should reflect the problem observed with the file sourceComponent is the component issuing the request. """ if type( replicaTuple ) == types.TupleType: replicaTuple = [replicaTuple] elif type( replicaTuple ) == types.ListType: pass else: errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples." gLogger.error( errStr ) return S_ERROR( errStr ) gLogger.info( "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas." % len( replicaTuple ) ) replicaDict = {} for lfn, pfn, se, reason in replicaTuple: replicaDict[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':pfn, 'SE':se} res = self.insertProblematic( sourceComponent, replicaDict ) if not res['OK']: gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB" ) return res for lfn in replicaDict.keys(): replicaDict[lfn]['Status'] = 'Problematic' res = self.fc.setReplicaStatus( replicaDict ) if not res['OK']: errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas." gLogger.error( errStr, res['Message'] ) return res failed = res['Value']['Failed'] successful = res['Value']['Successful'] resDict = {'Successful':successful, 'Failed':failed} return S_OK( resDict ) ########################################################################## # # This section contains the resolution methods for various prognoses # def __updateCompletedFiles( self, prognosis, fileID ): gLogger.info( "%s file (%d) is resolved" % ( prognosis, fileID ) ) return self.setProblematicStatus( fileID, 'Resolved' ) def __returnProblematicError( self, fileID, res ): self.incrementProblematicRetry( fileID ) gLogger.error( res['Message'] ) return res def __getRegisteredPFNLFN( self, pfn, storageElement ): res = StorageElement( storageElement ).getPfnForProtocol( pfn, withPort = False ) if not res['OK']: gLogger.error( "Failed to get registered PFN for physical files", res['Message'] ) return res for pfn, error in res['Value']['Failed'].items(): gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) ) return S_ERROR( 'Failed to obtain registered PFNs from physical file' ) registeredPFN = res['Value']['Successful'][pfn] res = Utils.executeSingleFileOrDirWrapper( self.fc.getLFNForPFN( registeredPFN ) ) if ( not res['OK'] ) and re.search( 'No such file or directory', res['Message'] ): return S_OK( False ) return S_OK( res['Value'] ) def __updateReplicaToChecked( self, problematicDict ): lfn = problematicDict['LFN'] fileID = problematicDict['FileID'] prognosis = problematicDict['Prognosis'] problematicDict['Status'] = 'Checked' res = Utils.executeSingleFileOrDirWrapper( self.fc.setReplicaStatus( {lfn:problematicDict} ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) gLogger.info( "%s replica (%d) is updated to Checked status" % ( prognosis, fileID ) ) return self.__updateCompletedFiles( prognosis, fileID ) def resolveCatalogPFNSizeMismatch( self, problematicDict ): """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis """ lfn = problematicDict['LFN'] pfn = problematicDict['PFN'] se = problematicDict['SE'] fileID = problematicDict['FileID'] res = Utils.executeSingleFileOrDirWrapper( self.fc.getFileSize( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) catalogSize = res['Value'] res = Utils.executeSingleFileOrDirWrapper( StorageElement( se ).getFileSize( pfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) storageSize = res['Value'] bkKCatalog = FileCatalog( ['BookkeepingDB'] ) res = Utils.executeSingleFileOrDirWrapper( bkKCatalog.getFileSize( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) bookkeepingSize = res['Value'] if bookkeepingSize == catalogSize == storageSize: gLogger.info( "CatalogPFNSizeMismatch replica (%d) matched all registered sizes." % fileID ) return self.__updateReplicaToChecked( problematicDict ) if ( catalogSize == bookkeepingSize ): gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also" % fileID ) res = Utils.executeSingleFileOrDirWrapper( self.fc.getReplicas( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) if len( res['Value'] ) <= 1: gLogger.info( "CatalogPFNSizeMismatch replica (%d) has no other replicas." % fileID ) return S_ERROR( "Not removing catalog file mismatch since the only replica" ) else: gLogger.info( "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..." % fileID ) res = self.dm.removeReplica( se, lfn ) if not res['OK']: return self.__returnProblematicError( fileID, res ) return self.__updateCompletedFiles( 'CatalogPFNSizeMismatch', fileID ) if ( catalogSize != bookkeepingSize ) and ( bookkeepingSize == storageSize ): gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size" % fileID ) res = self.__updateReplicaToChecked( problematicDict ) if not res['OK']: return self.__returnProblematicError( fileID, res ) return self.changeProblematicPrognosis( fileID, 'BKCatalogSizeMismatch' ) gLogger.info( "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count" % fileID ) return self.incrementProblematicRetry( fileID ) def resolvePFNNotRegistered( self, problematicDict ): """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNNotRegistered prognosis """ lfn = problematicDict['LFN'] pfn = problematicDict['PFN'] seName = problematicDict['SE'] fileID = problematicDict['FileID'] se = StorageElement( seName ) res = Utils.executeSingleFileOrDirWrapper( self.fc.exists( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) if not res['Value']: # The file does not exist in the catalog res = Utils.executeSingleFileOrDirWrapper( se.removeFile( pfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) return self.__updateCompletedFiles( 'PFNNotRegistered', fileID ) res = Utils.executeSingleFileOrDirWrapper( se.getFileMetadata( pfn ) ) if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ): gLogger.info( "PFNNotRegistered replica (%d) found to be missing." % fileID ) return self.__updateCompletedFiles( 'PFNNotRegistered', fileID ) elif not res['OK']: return self.__returnProblematicError( fileID, res ) storageMetadata = res['Value'] if storageMetadata['Lost']: gLogger.info( "PFNNotRegistered replica (%d) found to be Lost. Updating prognosis" % fileID ) return self.changeProblematicPrognosis( fileID, 'PFNLost' ) if storageMetadata['Unavailable']: gLogger.info( "PFNNotRegistered replica (%d) found to be Unavailable. Updating retry count" % fileID ) return self.incrementProblematicRetry( fileID ) # HACK until we can obtain the space token descriptions through GFAL site = seName.split( '_' )[0].split( '-' )[0] if not storageMetadata['Cached']: if lfn.endswith( '.raw' ): seName = '%s-RAW' % site else: seName = '%s-RDST' % site elif storageMetadata['Migrated']: if lfn.startswith( '/lhcb/data' ): seName = '%s_M-DST' % site else: seName = '%s_MC_M-DST' % site else: if lfn.startswith( '/lhcb/data' ): seName = '%s-DST' % site else: seName = '%s_MC-DST' % site problematicDict['SE'] = seName res = se.getPfnForProtocol( pfn, withPort = False ) if not res['OK']: return self.__returnProblematicError( fileID, res ) for pfn, error in res['Value']['Failed'].items(): gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) ) return S_ERROR( 'Failed to obtain registered PFNs from physical file' ) problematicDict['PFN'] = res['Value']['Successful'][pfn] res = Utils.executeSingleFileOrDirWrapper( self.fc.addReplica( {lfn:problematicDict} ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) res = Utils.executeSingleFileOrDirWrapper( self.fc.getFileMetadata( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) if res['Value']['Size'] != storageMetadata['Size']: gLogger.info( "PFNNotRegistered replica (%d) found with catalog size mismatch. Updating prognosis" % fileID ) return self.changeProblematicPrognosis( fileID, 'CatalogPFNSizeMismatch' ) return self.__updateCompletedFiles( 'PFNNotRegistered', fileID ) def resolveLFNCatalogMissing( self, problematicDict ): """ This takes the problematic dictionary returned by the integrity DB and resolved the LFNCatalogMissing prognosis """ lfn = problematicDict['LFN'] fileID = problematicDict['FileID'] res = Utils.executeSingleFileOrDirWrapper( self.fc.exists( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) if res['Value']: return self.__updateCompletedFiles( 'LFNCatalogMissing', fileID ) # Remove the file from all catalogs # RF_NOTE : here I can do it because it's a single file, but otherwise I would need to sort the path res = Utils.executeSingleFileOrDirWrapper( self.fc.removeFile( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) return self.__updateCompletedFiles( 'LFNCatalogMissing', fileID ) def resolvePFNMissing( self, problematicDict ): """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNMissing prognosis """ pfn = problematicDict['PFN'] se = problematicDict['SE'] lfn = problematicDict['LFN'] fileID = problematicDict['FileID'] res = Utils.executeSingleFileOrDirWrapper( self.fc.exists( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) if not res['Value']: gLogger.info( "PFNMissing file (%d) no longer exists in catalog" % fileID ) return self.__updateCompletedFiles( 'PFNMissing', fileID ) res = Utils.executeSingleFileOrDirWrapper( StorageElement( se ).exists( pfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) if res['Value']: gLogger.info( "PFNMissing replica (%d) is no longer missing" % fileID ) return self.__updateReplicaToChecked( problematicDict ) gLogger.info( "PFNMissing replica (%d) does not exist" % fileID ) res = Utils.executeSingleFileOrDirWrapper( self.fc.getReplicas( lfn, allStatus = True ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) replicas = res['Value'] seSite = se.split( '_' )[0].split( '-' )[0] found = False print replicas for replicaSE in replicas.keys(): if re.search( seSite, replicaSE ): found = True problematicDict['SE'] = replicaSE se = replicaSE if not found: gLogger.info( "PFNMissing replica (%d) is no longer registered at SE. Resolved." % fileID ) return self.__updateCompletedFiles( 'PFNMissing', fileID ) gLogger.info( "PFNMissing replica (%d) does not exist. Removing from catalog..." % fileID ) res = Utils.executeSingleFileOrDirWrapper( self.fc.removeReplica( {lfn:problematicDict} ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) if len( replicas ) == 1: gLogger.info( "PFNMissing replica (%d) had a single replica. Updating prognosis" % fileID ) return self.changeProblematicPrognosis( fileID, 'LFNZeroReplicas' ) res = self.dm.replicateAndRegister( problematicDict['LFN'], se ) if not res['OK']: return self.__returnProblematicError( fileID, res ) # If we get here the problem is solved so we can update the integrityDB return self.__updateCompletedFiles( 'PFNMissing', fileID ) def resolvePFNUnavailable( self, problematicDict ): """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNUnavailable prognosis """ pfn = problematicDict['PFN'] se = problematicDict['SE'] fileID = problematicDict['FileID'] res = Utils.executeSingleFileOrDirWrapper( StorageElement( se ).getFileMetadata( pfn ) ) if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ): # The file is no longer Unavailable but has now dissapeared completely gLogger.info( "PFNUnavailable replica (%d) found to be missing. Updating prognosis" % fileID ) return self.changeProblematicPrognosis( fileID, 'PFNMissing' ) if ( not res['OK'] ) or res['Value']['Unavailable']: gLogger.info( "PFNUnavailable replica (%d) found to still be Unavailable" % fileID ) return self.incrementProblematicRetry( fileID ) if res['Value']['Lost']: gLogger.info( "PFNUnavailable replica (%d) is now found to be Lost. Updating prognosis" % fileID ) return self.changeProblematicPrognosis( fileID, 'PFNLost' ) gLogger.info( "PFNUnavailable replica (%d) is no longer Unavailable" % fileID ) # Need to make the replica okay in the Catalog return self.__updateReplicaToChecked( problematicDict ) def resolvePFNZeroSize( self, problematicDict ): """ This takes the problematic dictionary returned by the integrity DB and resolves the PFNZeroSize prognosis """ pfn = problematicDict['PFN'] seName = problematicDict['SE'] fileID = problematicDict['FileID'] se = StorageElement( seName ) res = Utils.executeSingleFileOrDirWrapper( se.getFileSize( pfn ) ) if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ): gLogger.info( "PFNZeroSize replica (%d) found to be missing. Updating prognosis" % problematicDict['FileID'] ) return self.changeProblematicPrognosis( fileID, 'PFNMissing' ) storageSize = res['Value'] if storageSize == 0: res = Utils.executeSingleFileOrDirWrapper( se.removeFile( pfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) gLogger.info( "PFNZeroSize replica (%d) removed. Updating prognosis" % problematicDict['FileID'] ) return self.changeProblematicPrognosis( fileID, 'PFNMissing' ) res = self.__getRegisteredPFNLFN( pfn, seName ) if not res['OK']: return self.__returnProblematicError( fileID, res ) lfn = res['Value'] if not lfn: gLogger.info( "PFNZeroSize replica (%d) not registered in catalog. Updating prognosis" % problematicDict['FileID'] ) return self.changeProblematicPrognosis( fileID, 'PFNNotRegistered' ) res = Utils.executeSingleFileOrDirWrapper( self.fc.getFileMetadata( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) catalogSize = res['Value']['Size'] if catalogSize != storageSize: gLogger.info( "PFNZeroSize replica (%d) size found to differ from registered metadata. Updating prognosis" % problematicDict['FileID'] ) return self.changeProblematicPrognosis( fileID, 'CatalogPFNSizeMismatch' ) return self.__updateCompletedFiles( 'PFNZeroSize', fileID ) ############################################################################################ def resolveLFNZeroReplicas( self, problematicDict ): """ This takes the problematic dictionary returned by the integrity DB and resolves the LFNZeroReplicas prognosis """ lfn = problematicDict['LFN'] fileID = problematicDict['FileID'] res = Utils.executeSingleFileOrDirWrapper( self.fc.getReplicas( lfn, allStatus = True ) ) if res['OK'] and res['Value']: gLogger.info( "LFNZeroReplicas file (%d) found to have replicas" % fileID ) else: gLogger.info( "LFNZeroReplicas file (%d) does not have replicas. Checking storage..." % fileID ) pfnsFound = False for storageElementName in sortList( gConfig.getValue( 'Resources/StorageElementGroups/Tier1_MC_M-DST', [] ) ): res = self.__getStoragePathExists( [lfn], storageElementName ) if res['Value'].has_key( lfn ): gLogger.info( "LFNZeroReplicas file (%d) found storage file at %s" % ( fileID, storageElementName ) ) pfn = res['Value'][lfn] self.__reportProblematicReplicas( [( lfn, pfn, storageElementName, 'PFNNotRegistered' )], storageElementName, 'PFNNotRegistered' ) pfnsFound = True if not pfnsFound: gLogger.info( "LFNZeroReplicas file (%d) did not have storage files. Removing..." % fileID ) res = Utils.executeSingleFileOrDirWrapper( self.fc.removeFile( lfn ) ) if not res['OK']: gLogger.error( res['Message'] ) # Increment the number of retries for this file self.server.incrementProblematicRetry( fileID ) return res gLogger.info( "LFNZeroReplicas file (%d) removed from catalog" % fileID ) # If we get here the problem is solved so we can update the integrityDB return self.__updateCompletedFiles( 'LFNZeroReplicas', fileID )
class RequestPreparationAgent(AgentModule): def initialize(self): self.fileCatalog = FileCatalog() self.dm = DataManager() self.stagerClient = StorageManagerClient() self.dataIntegrityClient = DataIntegrityClient() # This sets the Default Proxy to used as that defined under # /Operations/Shifter/DataManager # the shifterProxy option in the Configuration can be used to change this default. self.am_setOption('shifterProxy', 'DataManager') return S_OK() def execute(self): res = self.prepareNewReplicas() return res def prepareNewReplicas(self): """ This is the first logical task to be executed and manages the New->Waiting transition of the Replicas """ res = self.__getNewReplicas() if not res['OK']: gLogger.fatal( "RequestPreparation.prepareNewReplicas: Failed to get replicas from StagerDB.", res['Message']) return res if not res['Value']: gLogger.info("There were no New replicas found") return res replicas = res['Value']['Replicas'] replicaIDs = res['Value']['ReplicaIDs'] gLogger.info( "RequestPreparation.prepareNewReplicas: Obtained %s New replicas for preparation." % len(replicaIDs)) # Check that the files exist in the FileCatalog res = self.__getExistingFiles(replicas.keys()) if not res['OK']: return res exist = res['Value']['Exist'] terminal = res['Value']['Missing'] failed = res['Value']['Failed'] if not exist: gLogger.error( 'RequestPreparation.prepareNewReplicas: Failed determine existance of any files' ) return S_OK() terminalReplicaIDs = {} for lfn, reason in terminal.items(): for _se, replicaID in replicas[lfn].items(): terminalReplicaIDs[replicaID] = reason replicas.pop(lfn) gLogger.info( "RequestPreparation.prepareNewReplicas: %s files exist in the FileCatalog." % len(exist)) if terminal: gLogger.info( "RequestPreparation.prepareNewReplicas: %s files do not exist in the FileCatalog." % len(terminal)) # Obtain the file sizes from the FileCatalog res = self.__getFileSize(exist) if not res['OK']: return res failed.update(res['Value']['Failed']) terminal = res['Value']['ZeroSize'] fileSizes = res['Value']['FileSizes'] if not fileSizes: gLogger.error( 'RequestPreparation.prepareNewReplicas: Failed determine sizes of any files' ) return S_OK() for lfn, reason in terminal.items(): for _se, replicaID in replicas[lfn].items(): terminalReplicaIDs[replicaID] = reason replicas.pop(lfn) gLogger.info( "RequestPreparation.prepareNewReplicas: Obtained %s file sizes from the FileCatalog." % len(fileSizes)) if terminal: gLogger.info( "RequestPreparation.prepareNewReplicas: %s files registered with zero size in the FileCatalog." % len(terminal)) # Obtain the replicas from the FileCatalog res = self.__getFileReplicas(fileSizes.keys()) if not res['OK']: return res failed.update(res['Value']['Failed']) terminal = res['Value']['ZeroReplicas'] fileReplicas = res['Value']['Replicas'] if not fileReplicas: gLogger.error( 'RequestPreparation.prepareNewReplicas: Failed determine replicas for any files' ) return S_OK() for lfn, reason in terminal.items(): for _se, replicaID in replicas[lfn].items(): terminalReplicaIDs[replicaID] = reason replicas.pop(lfn) gLogger.info( "RequestPreparation.prepareNewReplicas: Obtained replica information for %s file from the FileCatalog." % len(fileReplicas)) if terminal: gLogger.info( "RequestPreparation.prepareNewReplicas: %s files registered with zero replicas in the FileCatalog." % len(terminal)) # Check the replicas exist at the requested site replicaMetadata = [] for lfn, requestedSEs in replicas.items(): lfnReplicas = fileReplicas[lfn] for requestedSE, replicaID in requestedSEs.items(): if not requestedSE in lfnReplicas.keys(): terminalReplicaIDs[ replicaID] = "LFN not registered at requested SE" replicas[lfn].pop(requestedSE) else: replicaMetadata.append( (replicaID, lfnReplicas[requestedSE], fileSizes[lfn])) # Update the states of the files in the database if terminalReplicaIDs: gLogger.info( "RequestPreparation.prepareNewReplicas: %s replicas are terminally failed." % len(terminalReplicaIDs)) # res = self.stagerClient.updateReplicaFailure( terminalReplicaIDs ) res = self.stagerClient.updateReplicaFailure(terminalReplicaIDs) if not res['OK']: gLogger.error( "RequestPreparation.prepareNewReplicas: Failed to update replica failures.", res['Message']) if replicaMetadata: gLogger.info( "RequestPreparation.prepareNewReplicas: %s replica metadata to be updated." % len(replicaMetadata)) # Sets the Status='Waiting' of CacheReplicas records that are OK with catalogue checks res = self.stagerClient.updateReplicaInformation(replicaMetadata) if not res['OK']: gLogger.error( "RequestPreparation.prepareNewReplicas: Failed to update replica metadata.", res['Message']) return S_OK() def __getNewReplicas(self): """ This obtains the New replicas from the Replicas table and for each LFN the requested storage element """ # First obtain the New replicas from the CacheReplicas table res = self.stagerClient.getCacheReplicas({'Status': 'New'}) if not res['OK']: gLogger.error( "RequestPreparation.__getNewReplicas: Failed to get replicas with New status.", res['Message']) return res if not res['Value']: gLogger.debug( "RequestPreparation.__getNewReplicas: No New replicas found to process." ) return S_OK() else: gLogger.debug( "RequestPreparation.__getNewReplicas: Obtained %s New replicas(s) to process." % len(res['Value'])) replicas = {} replicaIDs = {} for replicaID, info in res['Value'].items(): lfn = info['LFN'] storageElement = info['SE'] if not replicas.has_key(lfn): replicas[lfn] = {} replicas[lfn][storageElement] = replicaID replicaIDs[replicaID] = (lfn, storageElement) return S_OK({'Replicas': replicas, 'ReplicaIDs': replicaIDs}) def __getExistingFiles(self, lfns): """ This checks that the files exist in the FileCatalog. """ filesExist = [] missing = {} res = self.fileCatalog.exists(lfns) if not res['OK']: gLogger.error( "RequestPreparation.__getExistingFiles: Failed to determine whether files exist.", res['Message']) return res failed = res['Value']['Failed'] for lfn, exists in res['Value']['Successful'].items(): if exists: filesExist.append(lfn) else: missing[lfn] = 'LFN not registered in the FileCatalog' if missing: for lfn, reason in missing.items(): gLogger.warn( "RequestPreparation.__getExistingFiles: %s" % reason, lfn) self.__reportProblematicFiles(missing.keys(), 'LFN-LFC-DoesntExist') return S_OK({ 'Exist': filesExist, 'Missing': missing, 'Failed': failed }) def __getFileSize(self, lfns): """ This obtains the file size from the FileCatalog. """ fileSizes = {} zeroSize = {} res = self.fileCatalog.getFileSize(lfns) if not res['OK']: gLogger.error( "RequestPreparation.__getFileSize: Failed to get sizes for files.", res['Message']) return res failed = res['Value']['Failed'] for lfn, size in res['Value']['Successful'].items(): if size == 0: zeroSize[ lfn] = "LFN registered with zero size in the FileCatalog" else: fileSizes[lfn] = size if zeroSize: for lfn, reason in zeroSize.items(): gLogger.warn("RequestPreparation.__getFileSize: %s" % reason, lfn) self.__reportProblematicFiles(zeroSize.keys(), 'LFN-LFC-ZeroSize') return S_OK({ 'FileSizes': fileSizes, 'ZeroSize': zeroSize, 'Failed': failed }) def __getFileReplicas(self, lfns): """ This obtains the replicas from the FileCatalog. """ replicas = {} noReplicas = {} res = self.dm.getActiveReplicas(lfns) if not res['OK']: gLogger.error( "RequestPreparation.__getFileReplicas: Failed to obtain file replicas.", res['Message']) return res failed = res['Value']['Failed'] for lfn, lfnReplicas in res['Value']['Successful'].items(): if len(lfnReplicas.keys()) == 0: noReplicas[ lfn] = "LFN registered with zero replicas in the FileCatalog" else: replicas[lfn] = lfnReplicas if noReplicas: for lfn, reason in noReplicas.items(): gLogger.warn( "RequestPreparation.__getFileReplicas: %s" % reason, lfn) self.__reportProblematicFiles(noReplicas.keys(), 'LFN-LFC-NoReplicas') return S_OK({ 'Replicas': replicas, 'ZeroReplicas': noReplicas, 'Failed': failed }) def __reportProblematicFiles(self, lfns, reason): return S_OK() res = self.dataIntegrityClient.setFileProblematic( lfns, reason, sourceComponent='RequestPreparationAgent') if not res['OK']: gLogger.error( "RequestPreparation.__reportProblematicFiles: Failed to report missing files.", res['Message']) return res if res['Value']['Successful']: gLogger.info( "RequestPreparation.__reportProblematicFiles: Successfully reported %s missing files." % len(res['Value']['Successful'])) if res['Value']['Failed']: gLogger.info( "RequestPreparation.__reportProblematicFiles: Failed to report %s problematic files." % len(res['Value']['Failed'])) return res
class CatalogPlugInTestCase(unittest.TestCase): """ Base class for the CatalogPlugin test case """ def setUp(self): self.fullMetadata = [ "Status", "ChecksumType", "OwnerRole", "CreationDate", "Checksum", "ModificationDate", "OwnerDN", "Mode", "GUID", "Size", ] self.dirMetadata = self.fullMetadata + ["NumberOfSubPaths"] self.fileMetadata = self.fullMetadata + ["NumberOfLinks"] self.catalog = FileCatalog(catalogs=[catalogClientToTest]) valid = self.catalog.isOK() self.assert_(valid) self.destDir = "/lhcb/test/unit-test/TestCatalogPlugin" self.link = "%s/link" % self.destDir # Clean the existing directory self.cleanDirectory() res = self.catalog.createDirectory(self.destDir) returnValue = self.parseResult(res, self.destDir) # Register some files to work with self.numberOfFiles = 2 self.files = [] for i in range(self.numberOfFiles): lfn = "%s/testFile_%d" % (self.destDir, i) res = self.registerFile(lfn) self.assert_(res) self.files.append(lfn) def registerFile(self, lfn): pfn = "protocol://host:port/storage/path%s" % lfn size = 10000000 se = "DIRAC-storage" guid = makeGuid() adler = stringAdler(guid) fileDict = {} fileDict[lfn] = {"PFN": pfn, "Size": size, "SE": se, "GUID": guid, "Checksum": adler} res = self.catalog.addFile(fileDict) return self.parseResult(res, lfn) def parseResult(self, res, path): self.assert_(res["OK"]) self.assert_(res["Value"]) self.assert_(res["Value"]["Successful"]) self.assert_(res["Value"]["Successful"].has_key(path)) return res["Value"]["Successful"][path] def parseError(self, res, path): self.assert_(res["OK"]) self.assert_(res["Value"]) self.assert_(res["Value"]["Failed"]) self.assert_(res["Value"]["Failed"].has_key(path)) return res["Value"]["Failed"][path] def cleanDirectory(self): res = self.catalog.exists(self.destDir) returnValue = self.parseResult(res, self.destDir) if not returnValue: return res = self.catalog.listDirectory(self.destDir) returnValue = self.parseResult(res, self.destDir) toRemove = returnValue["Files"].keys() if toRemove: self.purgeFiles(toRemove) res = self.catalog.removeDirectory(self.destDir) returnValue = self.parseResult(res, self.destDir) self.assert_(returnValue) def purgeFiles(self, lfns): for lfn in lfns: res = self.catalog.getReplicas(lfn, True) replicas = self.parseResult(res, lfn) for se, pfn in replicas.items(): repDict = {} repDict[lfn] = {"PFN": pfn, "SE": se} res = self.catalog.removeReplica(repDict) self.parseResult(res, lfn) res = self.catalog.removeFile(lfn) self.parseResult(res, lfn) def tearDown(self): self.cleanDirectory()
class DataIntegrityClient( Client ): """ The following methods are supported in the service but are not mentioned explicitly here: getProblematic() Obtains a problematic file from the IntegrityDB based on the LastUpdate time getPrognosisProblematics(prognosis) Obtains all the problematics of a particular prognosis from the integrityDB getProblematicsSummary() Obtains a count of the number of problematics for each prognosis found getDistinctPrognosis() Obtains the distinct prognosis found in the integrityDB getTransformationProblematics(prodID) Obtains the problematics for a given production incrementProblematicRetry(fileID) Increments the retry count for the supplied file ID changeProblematicPrognosis(fileID,newPrognosis) Changes the prognosis of the supplied file to the new prognosis setProblematicStatus(fileID,status) Updates the status of a problematic in the integrityDB removeProblematic(self,fileID) This removes the specified file ID from the integrity DB insertProblematic(sourceComponent,fileMetadata) Inserts file with supplied metadata into the integrity DB """ def __init__( self, **kwargs ): super(DataIntegrityClient, self).__init__( **kwargs ) self.setServer( 'DataManagement/DataIntegrity' ) self.dm = DataManager() self.fc = FileCatalog() def setFileProblematic( self, lfn, reason, sourceComponent = '' ): """ This method updates the status of the file in the FileCatalog and the IntegrityDB lfn - the lfn of the file reason - this is given to the integrity DB and should reflect the problem observed with the file sourceComponent is the component issuing the request. """ if isinstance( lfn, list ): lfns = lfn elif isinstance( lfn, basestring ): lfns = [lfn] else: errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN." gLogger.error( errStr ) return S_ERROR( errStr ) gLogger.info( "DataIntegrityClient.setFileProblematic: Attempting to update %s files." % len( lfns ) ) fileMetadata = {} for lfn in lfns: fileMetadata[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':'', 'SE':''} res = self.insertProblematic( sourceComponent, fileMetadata ) if not res['OK']: gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB" ) return res def reportProblematicReplicas( self, replicaTuple, se, reason ): """ Simple wrapper function around setReplicaProblematic """ gLogger.info( 'The following %s files had %s at %s' % ( len( replicaTuple ), reason, se ) ) for lfn, _pfn, se, reason in sorted( replicaTuple ): if lfn: gLogger.info( lfn ) res = self.setReplicaProblematic( replicaTuple, sourceComponent = 'DataIntegrityClient' ) if not res['OK']: gLogger.info( 'Failed to update integrity DB with replicas', res['Message'] ) else: gLogger.info( 'Successfully updated integrity DB with replicas' ) def setReplicaProblematic( self, replicaTuple, sourceComponent = '' ): """ This method updates the status of the replica in the FileCatalog and the IntegrityDB The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis} lfn - the lfn of the file pfn - the pfn if available (otherwise '') se - the storage element of the problematic replica (otherwise '') prognosis - this is given to the integrity DB and should reflect the problem observed with the file sourceComponent is the component issuing the request. """ if isinstance( replicaTuple, tuple ): replicaTuple = [replicaTuple] elif isinstance( replicaTuple, list ): pass else: errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples." gLogger.error( errStr ) return S_ERROR( errStr ) gLogger.info( "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas." % len( replicaTuple ) ) replicaDict = {} for lfn, pfn, se, reason in replicaTuple: replicaDict[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':pfn, 'SE':se} res = self.insertProblematic( sourceComponent, replicaDict ) if not res['OK']: gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB" ) return res for lfn in replicaDict.keys(): replicaDict[lfn]['Status'] = 'Problematic' res = self.fc.setReplicaStatus( replicaDict ) if not res['OK']: errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas." gLogger.error( errStr, res['Message'] ) return res failed = res['Value']['Failed'] successful = res['Value']['Successful'] resDict = {'Successful':successful, 'Failed':failed} return S_OK( resDict ) ########################################################################## # # This section contains the resolution methods for various prognoses # def __updateCompletedFiles( self, prognosis, fileID ): gLogger.info( "%s file (%d) is resolved" % ( prognosis, fileID ) ) return self.setProblematicStatus( fileID, 'Resolved' ) def __returnProblematicError( self, fileID, res ): self.incrementProblematicRetry( fileID ) gLogger.error( 'DataIntegrityClient failure', res['Message'] ) return res def __updateReplicaToChecked( self, problematicDict ): lfn = problematicDict['LFN'] fileID = problematicDict['FileID'] prognosis = problematicDict['Prognosis'] problematicDict['Status'] = 'Checked' res = returnSingleResult( self.fc.setReplicaStatus( {lfn:problematicDict} ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) gLogger.info( "%s replica (%d) is updated to Checked status" % ( prognosis, fileID ) ) return self.__updateCompletedFiles( prognosis, fileID ) def resolveCatalogPFNSizeMismatch( self, problematicDict ): """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis """ lfn = problematicDict['LFN'] se = problematicDict['SE'] fileID = problematicDict['FileID'] res = returnSingleResult( self.fc.getFileSize( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) catalogSize = res['Value'] res = returnSingleResult( StorageElement( se ).getFileSize( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) storageSize = res['Value'] bkKCatalog = FileCatalog( ['BookkeepingDB'] ) res = returnSingleResult( bkKCatalog.getFileSize( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) bookkeepingSize = res['Value'] if bookkeepingSize == catalogSize == storageSize: gLogger.info( "CatalogPFNSizeMismatch replica (%d) matched all registered sizes." % fileID ) return self.__updateReplicaToChecked( problematicDict ) if catalogSize == bookkeepingSize: gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also" % fileID ) res = returnSingleResult( self.fc.getReplicas( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) if len( res['Value'] ) <= 1: gLogger.info( "CatalogPFNSizeMismatch replica (%d) has no other replicas." % fileID ) return S_ERROR( "Not removing catalog file mismatch since the only replica" ) else: gLogger.info( "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..." % fileID ) res = self.dm.removeReplica( se, lfn ) if not res['OK']: return self.__returnProblematicError( fileID, res ) return self.__updateCompletedFiles( 'CatalogPFNSizeMismatch', fileID ) if ( catalogSize != bookkeepingSize ) and ( bookkeepingSize == storageSize ): gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size" % fileID ) res = self.__updateReplicaToChecked( problematicDict ) if not res['OK']: return self.__returnProblematicError( fileID, res ) return self.changeProblematicPrognosis( fileID, 'BKCatalogSizeMismatch' ) gLogger.info( "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count" % fileID ) return self.incrementProblematicRetry( fileID ) #FIXME: Unused? def resolvePFNNotRegistered( self, problematicDict ): """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNNotRegistered prognosis """ lfn = problematicDict['LFN'] seName = problematicDict['SE'] fileID = problematicDict['FileID'] se = StorageElement( seName ) res = returnSingleResult( self.fc.exists( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) if not res['Value']: # The file does not exist in the catalog res = returnSingleResult( se.removeFile( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) return self.__updateCompletedFiles( 'PFNNotRegistered', fileID ) res = returnSingleResult( se.getFileMetadata( lfn ) ) if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ): gLogger.info( "PFNNotRegistered replica (%d) found to be missing." % fileID ) return self.__updateCompletedFiles( 'PFNNotRegistered', fileID ) elif not res['OK']: return self.__returnProblematicError( fileID, res ) storageMetadata = res['Value'] if storageMetadata['Lost']: gLogger.info( "PFNNotRegistered replica (%d) found to be Lost. Updating prognosis" % fileID ) return self.changeProblematicPrognosis( fileID, 'PFNLost' ) if storageMetadata['Unavailable']: gLogger.info( "PFNNotRegistered replica (%d) found to be Unavailable. Updating retry count" % fileID ) return self.incrementProblematicRetry( fileID ) # HACK until we can obtain the space token descriptions through GFAL site = seName.split( '_' )[0].split( '-' )[0] if not storageMetadata['Cached']: if lfn.endswith( '.raw' ): seName = '%s-RAW' % site else: seName = '%s-RDST' % site elif storageMetadata['Migrated']: if lfn.startswith( '/lhcb/data' ): seName = '%s_M-DST' % site else: seName = '%s_MC_M-DST' % site else: if lfn.startswith( '/lhcb/data' ): seName = '%s-DST' % site else: seName = '%s_MC-DST' % site problematicDict['SE'] = seName res = returnSingleResult( se.getURL( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) problematicDict['PFN'] = res['Value'] res = returnSingleResult( self.fc.addReplica( {lfn:problematicDict} ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) res = returnSingleResult( self.fc.getFileMetadata( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) if res['Value']['Size'] != storageMetadata['Size']: gLogger.info( "PFNNotRegistered replica (%d) found with catalog size mismatch. Updating prognosis" % fileID ) return self.changeProblematicPrognosis( fileID, 'CatalogPFNSizeMismatch' ) return self.__updateCompletedFiles( 'PFNNotRegistered', fileID ) #FIXME: Unused? def resolveLFNCatalogMissing( self, problematicDict ): """ This takes the problematic dictionary returned by the integrity DB and resolved the LFNCatalogMissing prognosis """ lfn = problematicDict['LFN'] fileID = problematicDict['FileID'] res = returnSingleResult( self.fc.exists( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) if res['Value']: return self.__updateCompletedFiles( 'LFNCatalogMissing', fileID ) # Remove the file from all catalogs # RF_NOTE : here I can do it because it's a single file, but otherwise I would need to sort the path res = returnSingleResult( self.fc.removeFile( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) return self.__updateCompletedFiles( 'LFNCatalogMissing', fileID ) #FIXME: Unused? def resolvePFNMissing( self, problematicDict ): """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNMissing prognosis """ se = problematicDict['SE'] lfn = problematicDict['LFN'] fileID = problematicDict['FileID'] res = returnSingleResult( self.fc.exists( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) if not res['Value']: gLogger.info( "PFNMissing file (%d) no longer exists in catalog" % fileID ) return self.__updateCompletedFiles( 'PFNMissing', fileID ) res = returnSingleResult( StorageElement( se ).exists( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) if res['Value']: gLogger.info( "PFNMissing replica (%d) is no longer missing" % fileID ) return self.__updateReplicaToChecked( problematicDict ) gLogger.info( "PFNMissing replica (%d) does not exist" % fileID ) res = returnSingleResult( self.fc.getReplicas( lfn, allStatus = True ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) replicas = res['Value'] seSite = se.split( '_' )[0].split( '-' )[0] found = False print replicas for replicaSE in replicas.keys(): if re.search( seSite, replicaSE ): found = True problematicDict['SE'] = replicaSE se = replicaSE if not found: gLogger.info( "PFNMissing replica (%d) is no longer registered at SE. Resolved." % fileID ) return self.__updateCompletedFiles( 'PFNMissing', fileID ) gLogger.info( "PFNMissing replica (%d) does not exist. Removing from catalog..." % fileID ) res = returnSingleResult( self.fc.removeReplica( {lfn:problematicDict} ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) if len( replicas ) == 1: gLogger.info( "PFNMissing replica (%d) had a single replica. Updating prognosis" % fileID ) return self.changeProblematicPrognosis( fileID, 'LFNZeroReplicas' ) res = self.dm.replicateAndRegister( problematicDict['LFN'], se ) if not res['OK']: return self.__returnProblematicError( fileID, res ) # If we get here the problem is solved so we can update the integrityDB return self.__updateCompletedFiles( 'PFNMissing', fileID ) #FIXME: Unused? def resolvePFNUnavailable( self, problematicDict ): """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNUnavailable prognosis """ lfn = problematicDict['LFN'] se = problematicDict['SE'] fileID = problematicDict['FileID'] res = returnSingleResult( StorageElement( se ).getFileMetadata( lfn ) ) if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ): # The file is no longer Unavailable but has now dissapeared completely gLogger.info( "PFNUnavailable replica (%d) found to be missing. Updating prognosis" % fileID ) return self.changeProblematicPrognosis( fileID, 'PFNMissing' ) if ( not res['OK'] ) or res['Value']['Unavailable']: gLogger.info( "PFNUnavailable replica (%d) found to still be Unavailable" % fileID ) return self.incrementProblematicRetry( fileID ) if res['Value']['Lost']: gLogger.info( "PFNUnavailable replica (%d) is now found to be Lost. Updating prognosis" % fileID ) return self.changeProblematicPrognosis( fileID, 'PFNLost' ) gLogger.info( "PFNUnavailable replica (%d) is no longer Unavailable" % fileID ) # Need to make the replica okay in the Catalog return self.__updateReplicaToChecked( problematicDict ) #FIXME: Unused? def resolvePFNZeroSize( self, problematicDict ): """ This takes the problematic dictionary returned by the integrity DB and resolves the PFNZeroSize prognosis """ lfn = problematicDict['LFN'] seName = problematicDict['SE'] fileID = problematicDict['FileID'] se = StorageElement( seName ) res = returnSingleResult( se.getFileSize( lfn ) ) if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ): gLogger.info( "PFNZeroSize replica (%d) found to be missing. Updating prognosis" % problematicDict['FileID'] ) return self.changeProblematicPrognosis( fileID, 'PFNMissing' ) storageSize = res['Value'] if storageSize == 0: res = returnSingleResult( se.removeFile( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) gLogger.info( "PFNZeroSize replica (%d) removed. Updating prognosis" % problematicDict['FileID'] ) return self.changeProblematicPrognosis( fileID, 'PFNMissing' ) res = returnSingleResult( self.fc.getReplicas( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) if seName not in res['Value']: gLogger.info( "PFNZeroSize replica (%d) not registered in catalog. Updating prognosis" % problematicDict['FileID'] ) return self.changeProblematicPrognosis( fileID, 'PFNNotRegistered' ) res = returnSingleResult( self.fc.getFileMetadata( lfn ) ) if not res['OK']: return self.__returnProblematicError( fileID, res ) catalogSize = res['Value']['Size'] if catalogSize != storageSize: gLogger.info( "PFNZeroSize replica (%d) size found to differ from registered metadata. Updating prognosis" % problematicDict['FileID'] ) return self.changeProblematicPrognosis( fileID, 'CatalogPFNSizeMismatch' ) return self.__updateCompletedFiles( 'PFNZeroSize', fileID ) ############################################################################################ #FIXME: Unused? def resolveLFNZeroReplicas( self, problematicDict ): """ This takes the problematic dictionary returned by the integrity DB and resolves the LFNZeroReplicas prognosis """ lfn = problematicDict['LFN'] fileID = problematicDict['FileID'] res = returnSingleResult( self.fc.getReplicas( lfn, allStatus = True ) ) if res['OK'] and res['Value']: gLogger.info( "LFNZeroReplicas file (%d) found to have replicas" % fileID ) else: gLogger.info( "LFNZeroReplicas file (%d) does not have replicas. Checking storage..." % fileID ) pfnsFound = False for storageElementName in sorted( gConfig.getValue( 'Resources/StorageElementGroups/Tier1_MC_M-DST', [] ) ): res = self.__getStoragePathExists( [lfn], storageElementName ) if lfn in res['Value']: gLogger.info( "LFNZeroReplicas file (%d) found storage file at %s" % ( fileID, storageElementName ) ) self.reportProblematicReplicas( [( lfn, 'deprecatedUrl', storageElementName, 'PFNNotRegistered' )], storageElementName, 'PFNNotRegistered' ) pfnsFound = True if not pfnsFound: gLogger.info( "LFNZeroReplicas file (%d) did not have storage files. Removing..." % fileID ) res = returnSingleResult( self.fc.removeFile( lfn ) ) if not res['OK']: gLogger.error( 'DataIntegrityClient: failed to remove file', res['Message'] ) # Increment the number of retries for this file self.server.incrementProblematicRetry( fileID ) return res gLogger.info( "LFNZeroReplicas file (%d) removed from catalog" % fileID ) # If we get here the problem is solved so we can update the integrityDB return self.__updateCompletedFiles( 'LFNZeroReplicas', fileID ) def _reportProblematicFiles( self, lfns, reason ): """ Simple wrapper function around setFileProblematic """ gLogger.info( 'The following %s files were found with %s' % ( len( lfns ), reason ) ) for lfn in sorted( lfns ): gLogger.info( lfn ) res = self.setFileProblematic( lfns, reason, sourceComponent = 'DataIntegrityClient' ) if not res['OK']: gLogger.info( 'Failed to update integrity DB with files', res['Message'] ) else: gLogger.info( 'Successfully updated integrity DB with files' )
directories = [] for inputFileName in inputNames: try: inputFile = open( inputFileName, 'r' ) stringIn = inputFile.read() directories += stringIn.splitlines() inputFile.close() except: directories.append( inputFileName ) ###################################################### # # This check performs Catalog->BK and Catalog->SE for possible output directories # res = fc.exists( directories ) if not res['OK']: gLogger.error( res['Message'] ) DIRAC.exit( -2 ) for directory, error in res['Value']['Failed']: gLogger.error( 'Failed to determine existence of directory', '%s %s' % ( directory, error ) ) if res['Value']['Failed']: DIRAC.exit( -2 ) directoryExists = res['Value']['Successful'] for directory in sorted( directoryExists ): if not directoryExists[directory]: continue gLogger.info( "Checking the integrity of %s" % directory ) iRes = integrity.catalogDirectoryToBK( directory ) if not iRes['OK']: gLogger.error( 'Error getting directory content:', iRes['Message'] )