Esempio n. 1
0
  def getOutgoingFiles( self, transferDict ):
    """
    Get list of files to be processed from InputPath
    """
    inputFCName = transferDict['InputFC']
    inputPath = transferDict['InputPath']

    if inputFCName == 'LocalDisk':
      files = []
      try:
        for fileName in os.listdir( inputPath ):
          if os.path.isfile( os.path.join( inputPath, fileName ) ):
            files.append( fileName )
      except:
        pass
      return files

    inputFC = FileCatalog( [inputFCName] )
    result = inputFC.listDirectory( inputPath, True )

    if not result['OK']:
      self.log.error( result['Message'] )
      return []
    if not inputPath in result['Value']['Successful']:
      self.log.error( result['Value']['Failed'][inputPath] )
      return []

    subDirs = result['Value']['Successful'][inputPath]['SubDirs']
    files = result['Value']['Successful'][inputPath]['Files']
    for subDir in subDirs:
      self.log.info( 'Ignoring subdirectory:', subDir )
    return files.keys()
  def __getCatalogDirectoryContents(self, directories):
    """ get catalog contents under paths :directories:

    :param self: self reference
    :param list directories: list of paths in catalog
    """
    self.log.info('Obtaining the catalog contents for %d directories:' % len(directories))
    for directory in directories:
      self.log.info(directory)
    activeDirs = directories
    allFiles = {}
    fc = FileCatalog()
    while activeDirs:
      currentDir = activeDirs[0]
      res = returnSingleResult(fc.listDirectory(currentDir))
      activeDirs.remove(currentDir)
      if not res['OK'] and 'Directory does not exist' in res['Message']:  # FIXME: DFC should return errno
        self.log.info("The supplied directory %s does not exist" % currentDir)
      elif not res['OK']:
        if "No such file or directory" in res['Message']:
          self.log.info("%s: %s" % (currentDir, res['Message']))
        else:
          self.log.error("Failed to get directory %s content: %s" % (currentDir, res['Message']))
      else:
        dirContents = res['Value']
        activeDirs.extend(dirContents['SubDirs'])
        allFiles.update(dirContents['Files'])
    self.log.info("Found %d files" % len(allFiles))
    return S_OK(allFiles.keys())
Esempio n. 3
0
    def getOutgoingFiles(self, transferDict):
        """
    Get list of files to be processed from InputPath
    """
        inputFCName = transferDict['InputFC']
        inputPath = transferDict['InputPath']

        if inputFCName == 'LocalDisk':
            files = []
            try:
                for file in os.listdir(inputPath):
                    if os.path.isfile(os.path.join(inputPath, file)):
                        files.append(file)
            except:
                pass
            return files

        inputFC = FileCatalog([inputFCName])
        result = inputFC.listDirectory(inputPath, True)

        if not result['OK']:
            self.log.error(result['Message'])
            return []
        if not inputPath in result['Value']['Successful']:
            self.log.error(result['Value']['Failed'][inputPath])
            return []

        subDirs = result['Value']['Successful'][inputPath]['SubDirs']
        files = result['Value']['Successful'][inputPath]['Files']
        for dir in subDirs:
            self.log.info('Ignoring subdirectory:', dir)
        return files.keys()
Esempio n. 4
0
    def __getCatalogDirectoryContents(self, directories):
        """ get catalog contents under paths :directories:

    :param self: self reference
    :param list directories: list of paths in catalog
    """
        self.log.info('Obtaining the catalog contents for %d directories:' %
                      len(directories))
        for directory in directories:
            self.log.info(directory)
        activeDirs = directories
        allFiles = {}
        fc = FileCatalog()
        while activeDirs:
            currentDir = activeDirs[0]
            res = returnSingleResult(fc.listDirectory(currentDir))
            activeDirs.remove(currentDir)
            if not res['OK'] and 'Directory does not exist' in res[
                    'Message']:  # FIXME: DFC should return errno
                self.log.info("The supplied directory %s does not exist" %
                              currentDir)
            elif not res['OK']:
                if "No such file or directory" in res['Message']:
                    self.log.info("%s: %s" % (currentDir, res['Message']))
                else:
                    self.log.error("Failed to get directory %s content: %s" %
                                   (currentDir, res['Message']))
            else:
                dirContents = res['Value']
                activeDirs.extend(dirContents['SubDirs'])
                allFiles.update(dirContents['Files'])
        self.log.info("Found %d files" % len(allFiles))
        return S_OK(allFiles.keys())
Esempio n. 5
0
class DataIntegrityClient(Client):
    """
  The following methods are supported in the service but are not mentioned explicitly here:

          getProblematic()
             Obtains a problematic file from the IntegrityDB based on the LastUpdate time

          getPrognosisProblematics(prognosis)
            Obtains all the problematics of a particular prognosis from the integrityDB

          getProblematicsSummary()
            Obtains a count of the number of problematics for each prognosis found

          getDistinctPrognosis()
            Obtains the distinct prognosis found in the integrityDB

          getTransformationProblematics(prodID)
            Obtains the problematics for a given production

          incrementProblematicRetry(fileID)
            Increments the retry count for the supplied file ID

          changeProblematicPrognosis(fileID,newPrognosis)
            Changes the prognosis of the supplied file to the new prognosis

          setProblematicStatus(fileID,status)
            Updates the status of a problematic in the integrityDB

          removeProblematic(self,fileID)
            This removes the specified file ID from the integrity DB

          insertProblematic(sourceComponent,fileMetadata)
            Inserts file with supplied metadata into the integrity DB

  """
    def __init__(self, **kwargs):

        Client.__init__(self, **kwargs)
        self.setServer('DataManagement/DataIntegrity')
        self.dm = DataManager()
        self.fc = FileCatalog()

    ##########################################################################
    #
    # This section contains the specific methods for LFC->SE checks
    #

    def catalogDirectoryToSE(self, lfnDir):
        """ This obtains the replica and metadata information from the catalog for the supplied directory and checks against the storage elements.
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the LFC->SE check")
        gLogger.info("-" * 40)
        if type(lfnDir) in types.StringTypes:
            lfnDir = [lfnDir]
        res = self.__getCatalogDirectoryContents(lfnDir)
        if not res['OK']:
            return res
        replicas = res['Value']['Replicas']
        catalogMetadata = res['Value']['Metadata']
        res = self.__checkPhysicalFiles(replicas, catalogMetadata)
        if not res['OK']:
            return res
        resDict = {
            'CatalogMetadata': catalogMetadata,
            'CatalogReplicas': replicas
        }
        return S_OK(resDict)

    def catalogFileToSE(self, lfns):
        """ This obtains the replica and metadata information from the catalog and checks against the storage elements.
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the LFC->SE check")
        gLogger.info("-" * 40)
        if type(lfns) in types.StringTypes:
            lfns = [lfns]
        res = self.__getCatalogMetadata(lfns)
        if not res['OK']:
            return res
        catalogMetadata = res['Value']
        res = self.__getCatalogReplicas(catalogMetadata.keys())
        if not res['OK']:
            return res
        replicas = res['Value']
        res = self.__checkPhysicalFiles(replicas, catalogMetadata)
        if not res['OK']:
            return res
        resDict = {
            'CatalogMetadata': catalogMetadata,
            'CatalogReplicas': replicas
        }
        return S_OK(resDict)

    def checkPhysicalFiles(self, replicas, catalogMetadata, ses=[]):
        """ This obtains takes the supplied replica and metadata information obtained from the catalog and checks against the storage elements.
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the LFC->SE check")
        gLogger.info("-" * 40)
        return self.__checkPhysicalFiles(replicas, catalogMetadata, ses=ses)

    def __checkPhysicalFiles(self, replicas, catalogMetadata, ses=[]):
        """ This obtains the physical file metadata and checks the metadata against the catalog entries
    """
        seLfns = {}
        for lfn, replicaDict in replicas.items():
            for se, _url in replicaDict.items():
                if (ses) and (se not in ses):
                    continue
                seLfns.setdefault(se, []).append(lfn)
        gLogger.info('%s %s' %
                     ('Storage Element'.ljust(20), 'Replicas'.rjust(20)))

        for se in sortList(seLfns):
            files = len(seLfns[se])
            gLogger.info('%s %s' % (se.ljust(20), str(files).rjust(20)))

            lfns = seLfns[se]
            sizeMismatch = []
            res = self.__checkPhysicalFileMetadata(lfns, se)
            if not res['OK']:
                gLogger.error('Failed to get physical file metadata.',
                              res['Message'])
                return res
            for lfn, metadata in res['Value'].items():
                if lfn in catalogMetadata:
                    if (metadata['Size'] != catalogMetadata[lfn]['Size']) and (
                            metadata['Size'] != 0):
                        sizeMismatch.append((lfn, 'deprecatedUrl', se,
                                             'CatalogPFNSizeMismatch'))
            if sizeMismatch:
                self.__reportProblematicReplicas(sizeMismatch, se,
                                                 'CatalogPFNSizeMismatch')
        return S_OK()

    def __checkPhysicalFileMetadata(self, lfns, se):
        """ Check obtain the physical file metadata and check the files are available
    """
        gLogger.info('Checking the integrity of %s physical files at %s' %
                     (len(lfns), se))

        res = StorageElement(se).getFileMetadata(lfns)

        if not res['OK']:
            gLogger.error('Failed to get metadata for lfns.', res['Message'])
            return res
        lfnMetadataDict = res['Value']['Successful']
        # If the replicas are completely missing
        missingReplicas = []
        for lfn, reason in res['Value']['Failed'].items():
            if re.search('File does not exist', reason):
                missingReplicas.append(
                    (lfn, 'deprecatedUrl', se, 'PFNMissing'))
        if missingReplicas:
            self.__reportProblematicReplicas(missingReplicas, se, 'PFNMissing')
        lostReplicas = []
        unavailableReplicas = []
        zeroSizeReplicas = []
        # If the files are not accessible
        for lfn, lfnMetadata in lfnMetadataDict.items():
            if lfnMetadata['Lost']:
                lostReplicas.append((lfn, 'deprecatedUrl', se, 'PFNLost'))
            if lfnMetadata['Unavailable']:
                unavailableReplicas.append(
                    (lfn, 'deprecatedUrl', se, 'PFNUnavailable'))
            if lfnMetadata['Size'] == 0:
                zeroSizeReplicas.append(
                    (lfn, 'deprecatedUrl', se, 'PFNZeroSize'))
        if lostReplicas:
            self.__reportProblematicReplicas(lostReplicas, se, 'PFNLost')
        if unavailableReplicas:
            self.__reportProblematicReplicas(unavailableReplicas, se,
                                             'PFNUnavailable')
        if zeroSizeReplicas:
            self.__reportProblematicReplicas(zeroSizeReplicas, se,
                                             'PFNZeroSize')
        gLogger.info(
            'Checking the integrity of physical files at %s complete' % se)
        return S_OK(lfnMetadataDict)

    ##########################################################################
    #
    # This section contains the specific methods for SE->LFC checks
    #

    def storageDirectoryToCatalog(self, lfnDir, storageElement):
        """ This obtains the file found on the storage element in the supplied directories and determines whether they exist in the catalog and checks their metadata elements
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the SE->LFC check at %s" % storageElement)
        gLogger.info("-" * 40)
        if type(lfnDir) in types.StringTypes:
            lfnDir = [lfnDir]
        res = self.__getStorageDirectoryContents(lfnDir, storageElement)
        if not res['OK']:
            return res
        storageFileMetadata = res['Value']
        if storageFileMetadata:
            return self.__checkCatalogForSEFiles(storageFileMetadata,
                                                 storageElement)
        return S_OK({'CatalogMetadata': {}, 'StorageMetadata': {}})

    def __checkCatalogForSEFiles(self, storageMetadata, storageElement):
        gLogger.info('Checking %s storage files exist in the catalog' %
                     len(storageMetadata))

        res = self.fc.getReplicas(storageMetadata)
        if not res['OK']:
            gLogger.error("Failed to get replicas for LFN", res['Message'])
            return res
        failedLfns = res['Value']['Failed']
        successfulLfns = res['Value']['Successful']
        notRegisteredLfns = []

        for lfn in storageMetadata:
            if lfn in failedLfns:
                if 'No such file or directory' in failedLfns[lfn]:
                    notRegisteredLfns.append(
                        (lfn, 'deprecatedUrl', storageElement,
                         'LFNNotRegistered'))
                    failedLfns.pop(lfn)
            elif storageElement not in successfulLfns[lfn]:
                notRegisteredLfns.append(
                    (lfn, 'deprecatedUrl', storageElement, 'LFNNotRegistered'))

        if notRegisteredLfns:
            self.__reportProblematicReplicas(notRegisteredLfns, storageElement,
                                             'LFNNotRegistered')
        if failedLfns:
            return S_ERROR('Failed to obtain replicas')

        # For the LFNs found to be registered obtain the file metadata from the catalog and verify against the storage metadata
        res = self.__getCatalogMetadata(storageMetadata)
        if not res['OK']:
            return res
        catalogMetadata = res['Value']
        sizeMismatch = []
        for lfn, lfnCatalogMetadata in catalogMetadata.items():
            lfnStorageMetadata = storageMetadata[lfn]
            if (lfnStorageMetadata['Size'] != lfnCatalogMetadata['Size']) and (
                    lfnStorageMetadata['Size'] != 0):
                sizeMismatch.append((lfn, 'deprecatedUrl', storageElement,
                                     'CatalogPFNSizeMismatch'))
        if sizeMismatch:
            self.__reportProblematicReplicas(sizeMismatch, storageElement,
                                             'CatalogPFNSizeMismatch')
        gLogger.info('Checking storage files exist in the catalog complete')
        resDict = {
            'CatalogMetadata': catalogMetadata,
            'StorageMetadata': storageMetadata
        }
        return S_OK(resDict)

    def getStorageDirectoryContents(self, lfnDir, storageElement):
        """ This obtains takes the supplied lfn directories and recursively obtains the files in the supplied storage element
    """
        return self.__getStorageDirectoryContents(lfnDir, storageElement)

    def __getStorageDirectoryContents(self, lfnDir, storageElement):
        """ Obtians the contents of the supplied directory on the storage
    """
        gLogger.info('Obtaining the contents for %s directories at %s' %
                     (len(lfnDir), storageElement))

        se = StorageElement(storageElement)

        res = se.exists(lfnDir)
        if not res['OK']:
            gLogger.error("Failed to obtain existance of directories",
                          res['Message'])
            return res
        for directory, error in res['Value']['Failed'].items():
            gLogger.error('Failed to determine existance of directory',
                          '%s %s' % (directory, error))
        if res['Value']['Failed']:
            return S_ERROR('Failed to determine existance of directory')
        directoryExists = res['Value']['Successful']
        activeDirs = []
        for directory in sorted(directoryExists):
            exists = directoryExists[directory]
            if exists:
                activeDirs.append(directory)
        allFiles = {}
        while len(activeDirs) > 0:
            currentDir = activeDirs[0]
            res = se.listDirectory(currentDir)
            activeDirs.remove(currentDir)
            if not res['OK']:
                gLogger.error('Failed to get directory contents',
                              res['Message'])
                return res
            elif currentDir in res['Value']['Failed']:
                gLogger.error(
                    'Failed to get directory contents',
                    '%s %s' % (currentDir, res['Value']['Failed'][currentDir]))
                return S_ERROR(res['Value']['Failed'][currentDir])
            else:
                dirContents = res['Value']['Successful'][currentDir]
                activeDirs.extend(
                    se.getLFNFromURL(dirContents['SubDirs']).get(
                        'Value', {}).get('Successful', []))
                fileURLMetadata = dirContents['Files']
                fileMetadata = {}
                res = se.getLFNFromURL(fileURLMetadata)
                if not res['OK']:
                    gLogger.error('Failed to get directory content LFNs',
                                  res['Message'])
                    return res

                for url, error in res['Value']['Failed'].items():
                    gLogger.error("Failed to get LFN for URL",
                                  "%s %s" % (url, error))
                if res['Value']['Failed']:
                    return S_ERROR("Failed to get LFNs for PFNs")
                urlLfns = res['Value']['Successful']
                for urlLfn, lfn in urlLfns.items():
                    fileMetadata[lfn] = fileURLMetadata[urlLfn]
                allFiles.update(fileMetadata)

        zeroSizeFiles = []

        for lfn in sorted(allFiles):
            if os.path.basename(lfn) == 'dirac_directory':
                allFiles.pop(lfn)
            else:
                metadata = allFiles[lfn]
                if metadata['Size'] == 0:
                    zeroSizeFiles.append(
                        (lfn, 'deprecatedUrl', storageElement, 'PFNZeroSize'))
        if zeroSizeFiles:
            self.__reportProblematicReplicas(zeroSizeFiles, storageElement,
                                             'PFNZeroSize')

        gLogger.info('Obtained at total of %s files for directories at %s' %
                     (len(allFiles), storageElement))
        return S_OK(allFiles)

    def __getStoragePathExists(self, lfnPaths, storageElement):
        gLogger.info('Determining the existance of %d files at %s' %
                     (len(lfnPaths), storageElement))

        se = StorageElement(storageElement)

        res = se.exists(lfnPaths)
        if not res['OK']:
            gLogger.error("Failed to obtain existance of paths",
                          res['Message'])
            return res
        for lfnPath, error in res['Value']['Failed'].items():
            gLogger.error('Failed to determine existance of path',
                          '%s %s' % (lfnPath, error))
        if res['Value']['Failed']:
            return S_ERROR('Failed to determine existance of paths')
        pathExists = res['Value']['Successful']
        resDict = {}
        for lfn, exists in pathExists.items():
            if exists:
                resDict[lfn] = True
        return S_OK(resDict)

    ##########################################################################
    #
    # This section contains the specific methods for obtaining replica and metadata information from the catalog
    #

    def __getCatalogDirectoryContents(self, lfnDir):
        """ Obtain the contents of the supplied directory
    """
        gLogger.info('Obtaining the catalog contents for %s directories' %
                     len(lfnDir))

        activeDirs = lfnDir
        allFiles = {}
        while len(activeDirs) > 0:
            currentDir = activeDirs[0]
            res = self.fc.listDirectory(currentDir)
            activeDirs.remove(currentDir)
            if not res['OK']:
                gLogger.error('Failed to get directory contents',
                              res['Message'])
                return res
            elif res['Value']['Failed'].has_key(currentDir):
                gLogger.error(
                    'Failed to get directory contents',
                    '%s %s' % (currentDir, res['Value']['Failed'][currentDir]))
            else:
                dirContents = res['Value']['Successful'][currentDir]
                activeDirs.extend(dirContents['SubDirs'])
                allFiles.update(dirContents['Files'])

        zeroReplicaFiles = []
        zeroSizeFiles = []
        allReplicaDict = {}
        allMetadataDict = {}
        for lfn, lfnDict in allFiles.items():
            lfnReplicas = {}
            for se, replicaDict in lfnDict['Replicas'].items():
                lfnReplicas[se] = replicaDict['PFN']
            if not lfnReplicas:
                zeroReplicaFiles.append(lfn)
            allReplicaDict[lfn] = lfnReplicas
            allMetadataDict[lfn] = lfnDict['MetaData']
            if lfnDict['MetaData']['Size'] == 0:
                zeroSizeFiles.append(lfn)
        if zeroReplicaFiles:
            self.__reportProblematicFiles(zeroReplicaFiles, 'LFNZeroReplicas')
        if zeroSizeFiles:
            self.__reportProblematicFiles(zeroSizeFiles, 'LFNZeroSize')
        gLogger.info(
            'Obtained at total of %s files for the supplied directories' %
            len(allMetadataDict))
        resDict = {'Metadata': allMetadataDict, 'Replicas': allReplicaDict}
        return S_OK(resDict)

    def __getCatalogReplicas(self, lfns):
        """ Obtain the file replicas from the catalog while checking that there are replicas
    """
        gLogger.info('Obtaining the replicas for %s files' % len(lfns))

        zeroReplicaFiles = []
        res = self.fc.getReplicas(lfns, allStatus=True)
        if not res['OK']:
            gLogger.error('Failed to get catalog replicas', res['Message'])
            return res
        allReplicas = res['Value']['Successful']
        for lfn, error in res['Value']['Failed'].items():
            if re.search('File has zero replicas', error):
                zeroReplicaFiles.append(lfn)
        if zeroReplicaFiles:
            self.__reportProblematicFiles(zeroReplicaFiles, 'LFNZeroReplicas')
        gLogger.info('Obtaining the replicas for files complete')
        return S_OK(allReplicas)

    def __getCatalogMetadata(self, lfns):
        """ Obtain the file metadata from the catalog while checking they exist
    """
        if not lfns:
            return S_OK({})
        gLogger.info('Obtaining the catalog metadata for %s files' % len(lfns))

        missingCatalogFiles = []
        zeroSizeFiles = []
        res = self.fc.getFileMetadata(lfns)
        if not res['OK']:
            gLogger.error('Failed to get catalog metadata', res['Message'])
            return res
        allMetadata = res['Value']['Successful']
        for lfn, error in res['Value']['Failed'].items():
            if re.search('No such file or directory', error):
                missingCatalogFiles.append(lfn)
        if missingCatalogFiles:
            self.__reportProblematicFiles(missingCatalogFiles,
                                          'LFNCatalogMissing')
        for lfn, metadata in allMetadata.items():
            if metadata['Size'] == 0:
                zeroSizeFiles.append(lfn)
        if zeroSizeFiles:
            self.__reportProblematicFiles(zeroSizeFiles, 'LFNZeroSize')
        gLogger.info('Obtaining the catalog metadata complete')
        return S_OK(allMetadata)

    ##########################################################################
    #
    # This section contains the methods for inserting problematic files into the integrity DB
    #

    def __reportProblematicFiles(self, lfns, reason):
        """ Simple wrapper function around setFileProblematic """
        gLogger.info('The following %s files were found with %s' %
                     (len(lfns), reason))
        for lfn in sortList(lfns):
            gLogger.info(lfn)
        res = self.setFileProblematic(lfns,
                                      reason,
                                      sourceComponent='DataIntegrityClient')
        if not res['OK']:
            gLogger.info('Failed to update integrity DB with files',
                         res['Message'])
        else:
            gLogger.info('Successfully updated integrity DB with files')

    def setFileProblematic(self, lfn, reason, sourceComponent=''):
        """ This method updates the status of the file in the FileCatalog and the IntegrityDB

        lfn - the lfn of the file
        reason - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
        if type(lfn) == types.ListType:
            lfns = lfn
        elif type(lfn) == types.StringType:
            lfns = [lfn]
        else:
            errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN."
            gLogger.error(errStr)
            return S_ERROR(errStr)
        gLogger.info(
            "DataIntegrityClient.setFileProblematic: Attempting to update %s files."
            % len(lfns))
        fileMetadata = {}
        for lfn in lfns:
            fileMetadata[lfn] = {
                'Prognosis': reason,
                'LFN': lfn,
                'PFN': '',
                'SE': ''
            }
        res = self.insertProblematic(sourceComponent, fileMetadata)
        if not res['OK']:
            gLogger.error(
                "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB"
            )
        return res

    def __reportProblematicReplicas(self, replicaTuple, se, reason):
        """ Simple wrapper function around setReplicaProblematic """
        gLogger.info('The following %s files had %s at %s' %
                     (len(replicaTuple), reason, se))
        for lfn, _pfn, se, reason in sortList(replicaTuple):
            if lfn:
                gLogger.info(lfn)
        res = self.setReplicaProblematic(replicaTuple,
                                         sourceComponent='DataIntegrityClient')
        if not res['OK']:
            gLogger.info('Failed to update integrity DB with replicas',
                         res['Message'])
        else:
            gLogger.info('Successfully updated integrity DB with replicas')

    def setReplicaProblematic(self, replicaTuple, sourceComponent=''):
        """ This method updates the status of the replica in the FileCatalog and the IntegrityDB
        The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis}

        lfn - the lfn of the file
        pfn - the pfn if available (otherwise '')
        se - the storage element of the problematic replica (otherwise '')
        prognosis - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
        if type(replicaTuple) == types.TupleType:
            replicaTuple = [replicaTuple]
        elif type(replicaTuple) == types.ListType:
            pass
        else:
            errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples."
            gLogger.error(errStr)
            return S_ERROR(errStr)
        gLogger.info(
            "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas."
            % len(replicaTuple))
        replicaDict = {}
        for lfn, pfn, se, reason in replicaTuple:
            replicaDict[lfn] = {
                'Prognosis': reason,
                'LFN': lfn,
                'PFN': pfn,
                'SE': se
            }
        res = self.insertProblematic(sourceComponent, replicaDict)
        if not res['OK']:
            gLogger.error(
                "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB"
            )
            return res
        for lfn in replicaDict.keys():
            replicaDict[lfn]['Status'] = 'Problematic'

        res = self.fc.setReplicaStatus(replicaDict)
        if not res['OK']:
            errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas."
            gLogger.error(errStr, res['Message'])
            return res
        failed = res['Value']['Failed']
        successful = res['Value']['Successful']
        resDict = {'Successful': successful, 'Failed': failed}
        return S_OK(resDict)

    ##########################################################################
    #
    # This section contains the resolution methods for various prognoses
    #

    def __updateCompletedFiles(self, prognosis, fileID):
        gLogger.info("%s file (%d) is resolved" % (prognosis, fileID))
        return self.setProblematicStatus(fileID, 'Resolved')

    def __returnProblematicError(self, fileID, res):
        self.incrementProblematicRetry(fileID)
        gLogger.error('DataIntegrityClient failure', res['Message'])
        return res


#   def __getRegisteredPFNLFN( self, pfn, storageElement ):
#
#     res = StorageElement( storageElement ).getURL( pfn )
#     if not res['OK']:
#       gLogger.error( "Failed to get registered PFN for physical files", res['Message'] )
#       return res
#     for pfn, error in res['Value']['Failed'].items():
#       gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) )
#       return S_ERROR( 'Failed to obtain registered PFNs from physical file' )
#     registeredPFN = res['Value']['Successful'][pfn]
#     res = returnSingleResult( self.fc.getLFNForPFN( registeredPFN ) )
#     if ( not res['OK'] ) and re.search( 'No such file or directory', res['Message'] ):
#       return S_OK( False )
#     return S_OK( res['Value'] )

    def __updateReplicaToChecked(self, problematicDict):
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']
        prognosis = problematicDict['Prognosis']
        problematicDict['Status'] = 'Checked'

        res = returnSingleResult(
            self.fc.setReplicaStatus({lfn: problematicDict}))

        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        gLogger.info("%s replica (%d) is updated to Checked status" %
                     (prognosis, fileID))
        return self.__updateCompletedFiles(prognosis, fileID)

    def resolveCatalogPFNSizeMismatch(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis
    """
        lfn = problematicDict['LFN']
        se = problematicDict['SE']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        catalogSize = res['Value']
        res = returnSingleResult(StorageElement(se).getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        storageSize = res['Value']
        bkKCatalog = FileCatalog(['BookkeepingDB'])
        res = returnSingleResult(bkKCatalog.getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        bookkeepingSize = res['Value']
        if bookkeepingSize == catalogSize == storageSize:
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) matched all registered sizes."
                % fileID)
            return self.__updateReplicaToChecked(problematicDict)
        if (catalogSize == bookkeepingSize):
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also"
                % fileID)
            res = returnSingleResult(self.fc.getReplicas(lfn))
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            if len(res['Value']) <= 1:
                gLogger.info(
                    "CatalogPFNSizeMismatch replica (%d) has no other replicas."
                    % fileID)
                return S_ERROR(
                    "Not removing catalog file mismatch since the only replica"
                )
            else:
                gLogger.info(
                    "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..."
                    % fileID)
                res = self.dm.removeReplica(se, lfn)
                if not res['OK']:
                    return self.__returnProblematicError(fileID, res)
                return self.__updateCompletedFiles('CatalogPFNSizeMismatch',
                                                   fileID)
        if (catalogSize != bookkeepingSize) and (bookkeepingSize
                                                 == storageSize):
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size"
                % fileID)
            res = self.__updateReplicaToChecked(problematicDict)
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            return self.changeProblematicPrognosis(fileID,
                                                   'BKCatalogSizeMismatch')
        gLogger.info(
            "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count"
            % fileID)
        return self.incrementProblematicRetry(fileID)

    def resolvePFNNotRegistered(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNNotRegistered prognosis
    """
        lfn = problematicDict['LFN']
        seName = problematicDict['SE']
        fileID = problematicDict['FileID']

        se = StorageElement(seName)
        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if not res['Value']:
            # The file does not exist in the catalog
            res = returnSingleResult(se.removeFile(lfn))
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            return self.__updateCompletedFiles('PFNNotRegistered', fileID)
        res = returnSingleResult(se.getFileMetadata(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            gLogger.info("PFNNotRegistered replica (%d) found to be missing." %
                         fileID)
            return self.__updateCompletedFiles('PFNNotRegistered', fileID)
        elif not res['OK']:
            return self.__returnProblematicError(fileID, res)
        storageMetadata = res['Value']
        if storageMetadata['Lost']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found to be Lost. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNLost')
        if storageMetadata['Unavailable']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found to be Unavailable. Updating retry count"
                % fileID)
            return self.incrementProblematicRetry(fileID)

        # HACK until we can obtain the space token descriptions through GFAL
        site = seName.split('_')[0].split('-')[0]
        if not storageMetadata['Cached']:
            if lfn.endswith('.raw'):
                seName = '%s-RAW' % site
            else:
                seName = '%s-RDST' % site
        elif storageMetadata['Migrated']:
            if lfn.startswith('/lhcb/data'):
                seName = '%s_M-DST' % site
            else:
                seName = '%s_MC_M-DST' % site
        else:
            if lfn.startswith('/lhcb/data'):
                seName = '%s-DST' % site
            else:
                seName = '%s_MC-DST' % site

        problematicDict['SE'] = seName
        res = returnSingleResult(se.getURL(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)

        problematicDict['PFN'] = res['Value']

        res = returnSingleResult(self.fc.addReplica({lfn: problematicDict}))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        res = returnSingleResult(self.fc.getFileMetadata(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']['Size'] != storageMetadata['Size']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found with catalog size mismatch. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID,
                                                   'CatalogPFNSizeMismatch')
        return self.__updateCompletedFiles('PFNNotRegistered', fileID)

    def resolveLFNCatalogMissing(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the LFNCatalogMissing prognosis
    """
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']:
            return self.__updateCompletedFiles('LFNCatalogMissing', fileID)
        # Remove the file from all catalogs
        # RF_NOTE : here I can do it because it's a single file, but otherwise I would need to sort the path
        res = returnSingleResult(self.fc.removeFile(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        return self.__updateCompletedFiles('LFNCatalogMissing', fileID)

    def resolvePFNMissing(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNMissing prognosis
    """
        se = problematicDict['SE']
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if not res['Value']:
            gLogger.info("PFNMissing file (%d) no longer exists in catalog" %
                         fileID)
            return self.__updateCompletedFiles('PFNMissing', fileID)

        res = returnSingleResult(StorageElement(se).exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']:
            gLogger.info("PFNMissing replica (%d) is no longer missing" %
                         fileID)
            return self.__updateReplicaToChecked(problematicDict)
        gLogger.info("PFNMissing replica (%d) does not exist" % fileID)
        res = returnSingleResult(self.fc.getReplicas(lfn, allStatus=True))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        replicas = res['Value']
        seSite = se.split('_')[0].split('-')[0]
        found = False
        print replicas
        for replicaSE in replicas.keys():
            if re.search(seSite, replicaSE):
                found = True
                problematicDict['SE'] = replicaSE
                se = replicaSE
        if not found:
            gLogger.info(
                "PFNMissing replica (%d) is no longer registered at SE. Resolved."
                % fileID)
            return self.__updateCompletedFiles('PFNMissing', fileID)
        gLogger.info(
            "PFNMissing replica (%d) does not exist. Removing from catalog..."
            % fileID)
        res = returnSingleResult(self.fc.removeReplica({lfn: problematicDict}))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if len(replicas) == 1:
            gLogger.info(
                "PFNMissing replica (%d) had a single replica. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'LFNZeroReplicas')
        res = self.dm.replicateAndRegister(problematicDict['LFN'], se)
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        # If we get here the problem is solved so we can update the integrityDB
        return self.__updateCompletedFiles('PFNMissing', fileID)

    def resolvePFNUnavailable(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNUnavailable prognosis
    """
        lfn = problematicDict['LFN']
        se = problematicDict['SE']
        fileID = problematicDict['FileID']

        res = returnSingleResult(StorageElement(se).getFileMetadata(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            # The file is no longer Unavailable but has now dissapeared completely
            gLogger.info(
                "PFNUnavailable replica (%d) found to be missing. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')
        if (not res['OK']) or res['Value']['Unavailable']:
            gLogger.info(
                "PFNUnavailable replica (%d) found to still be Unavailable" %
                fileID)
            return self.incrementProblematicRetry(fileID)
        if res['Value']['Lost']:
            gLogger.info(
                "PFNUnavailable replica (%d) is now found to be Lost. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNLost')
        gLogger.info("PFNUnavailable replica (%d) is no longer Unavailable" %
                     fileID)
        # Need to make the replica okay in the Catalog
        return self.__updateReplicaToChecked(problematicDict)

    def resolvePFNZeroSize(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolves the PFNZeroSize prognosis
    """
        lfn = problematicDict['LFN']
        seName = problematicDict['SE']
        fileID = problematicDict['FileID']

        se = StorageElement(seName)

        res = returnSingleResult(se.getFileSize(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            gLogger.info(
                "PFNZeroSize replica (%d) found to be missing. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')
        storageSize = res['Value']
        if storageSize == 0:
            res = returnSingleResult(se.removeFile(lfn))

            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            gLogger.info(
                "PFNZeroSize replica (%d) removed. Updating prognosis" %
                problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')

        res = returnSingleResult(self.fc.getReplicas(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if seName not in res['Value']:
            gLogger.info(
                "PFNZeroSize replica (%d) not registered in catalog. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNNotRegistered')
        res = returnSingleResult(self.fc.getFileMetadata(lfn))

        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        catalogSize = res['Value']['Size']
        if catalogSize != storageSize:
            gLogger.info(
                "PFNZeroSize replica (%d) size found to differ from registered metadata. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID,
                                                   'CatalogPFNSizeMismatch')
        return self.__updateCompletedFiles('PFNZeroSize', fileID)

    ############################################################################################

    def resolveLFNZeroReplicas(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolves the LFNZeroReplicas prognosis
    """
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.getReplicas(lfn, allStatus=True))
        if res['OK'] and res['Value']:
            gLogger.info("LFNZeroReplicas file (%d) found to have replicas" %
                         fileID)
        else:
            gLogger.info(
                "LFNZeroReplicas file (%d) does not have replicas. Checking storage..."
                % fileID)
            pfnsFound = False
            for storageElementName in sorted(
                    gConfig.getValue(
                        'Resources/StorageElementGroups/Tier1_MC_M-DST', [])):
                res = self.__getStoragePathExists([lfn], storageElementName)
                if lfn in res['Value']:
                    gLogger.info(
                        "LFNZeroReplicas file (%d) found storage file at %s" %
                        (fileID, storageElementName))
                    self.__reportProblematicReplicas(
                        [(lfn, 'deprecatedUrl', storageElementName,
                          'PFNNotRegistered')], storageElementName,
                        'PFNNotRegistered')
                    pfnsFound = True
            if not pfnsFound:
                gLogger.info(
                    "LFNZeroReplicas file (%d) did not have storage files. Removing..."
                    % fileID)
                res = returnSingleResult(self.fc.removeFile(lfn))
                if not res['OK']:
                    gLogger.error('DataIntegrityClient: failed to remove file',
                                  res['Message'])
                    # Increment the number of retries for this file
                    self.server.incrementProblematicRetry(fileID)
                    return res
                gLogger.info("LFNZeroReplicas file (%d) removed from catalog" %
                             fileID)
        # If we get here the problem is solved so we can update the integrityDB
        return self.__updateCompletedFiles('LFNZeroReplicas', fileID)
Esempio n. 6
0
class CatalogPlugInTestCase(unittest.TestCase):
    """ Base class for the CatalogPlugin test case """
    def setUp(self):
        self.fullMetadata = [
            'Status', 'ChecksumType', 'OwnerRole', 'CreationDate', 'Checksum',
            'ModificationDate', 'OwnerDN', 'Mode', 'GUID', 'Size'
        ]
        self.dirMetadata = self.fullMetadata + ['NumberOfSubPaths']
        self.fileMetadata = self.fullMetadata + ['NumberOfLinks']

        self.catalog = FileCatalog(catalogs=[catalogClientToTest])
        valid = self.catalog.isOK()
        self.assertTrue(valid)
        self.destDir = '/lhcb/test/unit-test/TestCatalogPlugin'
        self.link = "%s/link" % self.destDir

        # Clean the existing directory
        self.cleanDirectory()
        res = self.catalog.createDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)

        # Register some files to work with
        self.numberOfFiles = 2
        self.files = []
        for i in xrange(self.numberOfFiles):
            lfn = "%s/testFile_%d" % (self.destDir, i)
            res = self.registerFile(lfn)
            self.assertTrue(res)
            self.files.append(lfn)

    def registerFile(self, lfn):
        pfn = 'protocol://host:port/storage/path%s' % lfn
        size = 10000000
        se = 'DIRAC-storage'
        guid = makeGuid()
        adler = stringAdler(guid)
        fileDict = {}
        fileDict[lfn] = {
            'PFN': pfn,
            'Size': size,
            'SE': se,
            'GUID': guid,
            'Checksum': adler
        }
        res = self.catalog.addFile(fileDict)
        return self.parseResult(res, lfn)

    def parseResult(self, res, path):
        self.assertTrue(res['OK'])
        self.assertTrue(res['Value'])
        self.assertTrue(res['Value']['Successful'])
        self.assertTrue(path in res['Value']['Successful'])
        return res['Value']['Successful'][path]

    def parseError(self, res, path):
        self.assertTrue(res['OK'])
        self.assertTrue(res['Value'])
        self.assertTrue(res['Value']['Failed'])
        self.assertTrue(path in res['Value']['Failed'])
        return res['Value']['Failed'][path]

    def cleanDirectory(self):
        res = self.catalog.exists(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        if not returnValue:
            return
        res = self.catalog.listDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        toRemove = returnValue['Files'].keys()
        if toRemove:
            self.purgeFiles(toRemove)
        res = self.catalog.removeDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        self.assertTrue(returnValue)

    def purgeFiles(self, lfns):
        for lfn in lfns:
            res = self.catalog.getReplicas(lfn, True)
            replicas = self.parseResult(res, lfn)
            for se, pfn in replicas.items():
                repDict = {}
                repDict[lfn] = {'PFN': pfn, 'SE': se}
                res = self.catalog.removeReplica(repDict)
                self.parseResult(res, lfn)
            res = self.catalog.removeFile(lfn)
            self.parseResult(res, lfn)

    def tearDown(self):
        self.cleanDirectory()
Esempio n. 7
0
def main():
    days = 0
    months = 0
    years = 0
    wildcard = None
    baseDir = ''
    emptyDirsFlag = False
    Script.registerSwitch("D:", "Days=",
                          "Match files older than number of days [%s]" % days)
    Script.registerSwitch(
        "M:", "Months=",
        "Match files older than number of months [%s]" % months)
    Script.registerSwitch(
        "Y:", "Years=", "Match files older than number of years [%s]" % years)
    Script.registerSwitch("w:", "Wildcard=",
                          "Wildcard for matching filenames [All]")
    Script.registerSwitch(
        "b:", "BaseDir=",
        "Base directory to begin search (default /[vo]/user/[initial]/[username])"
    )
    Script.registerSwitch("e", "EmptyDirs",
                          "Create a list of empty directories")

    Script.parseCommandLine(ignoreErrors=False)

    for switch in Script.getUnprocessedSwitches():
        if switch[0] == "D" or switch[0].lower() == "days":
            days = int(switch[1])
        if switch[0] == "M" or switch[0].lower() == "months":
            months = int(switch[1])
        if switch[0] == "Y" or switch[0].lower() == "years":
            years = int(switch[1])
        if switch[0].lower() == "w" or switch[0].lower() == "wildcard":
            wildcard = '*' + switch[1]
        if switch[0].lower() == "b" or switch[0].lower() == "basedir":
            baseDir = switch[1]
        if switch[0].lower() == "e" or switch[0].lower() == "emptydirs":
            emptyDirsFlag = True

    import DIRAC
    from DIRAC import gLogger
    from DIRAC.ConfigurationSystem.Client.Helpers.Registry import getVOForGroup
    from DIRAC.Core.Security.ProxyInfo import getProxyInfo
    from DIRAC.Resources.Catalog.FileCatalog import FileCatalog
    from datetime import datetime, timedelta
    import sys
    import os
    import time
    import fnmatch
    fc = FileCatalog()

    def isOlderThan(cTimeStruct, days):
        timeDelta = timedelta(days=days)
        maxCTime = datetime.utcnow() - timeDelta
        if cTimeStruct < maxCTime:
            return True
        return False

    withMetadata = False
    if days or months or years:
        withMetadata = True
    totalDays = 0
    if years:
        totalDays += 365 * years
    if months:
        totalDays += 30 * months
    if days:
        totalDays += days

    res = getProxyInfo(False, False)
    if not res['OK']:
        gLogger.error("Failed to get client proxy information.",
                      res['Message'])
        DIRAC.exit(2)
    proxyInfo = res['Value']
    if proxyInfo['secondsLeft'] == 0:
        gLogger.error("Proxy expired")
        DIRAC.exit(2)
    username = proxyInfo['username']
    vo = ''
    if 'group' in proxyInfo:
        vo = getVOForGroup(proxyInfo['group'])
    if not baseDir:
        if not vo:
            gLogger.error('Could not determine VO')
            Script.showHelp()
        baseDir = '/%s/user/%s/%s' % (vo, username[0], username)

    baseDir = baseDir.rstrip('/')

    gLogger.notice('Will search for files in %s%s' %
                   (baseDir, (' matching %s' % wildcard) if wildcard else ''))
    activeDirs = [baseDir]

    allFiles = []
    emptyDirs = []

    while len(activeDirs) > 0:
        currentDir = activeDirs.pop()
        res = fc.listDirectory(currentDir, withMetadata, timeout=360)
        if not res['OK']:
            gLogger.error("Error retrieving directory contents",
                          "%s %s" % (currentDir, res['Message']))
        elif currentDir in res['Value']['Failed']:
            gLogger.error(
                "Error retrieving directory contents",
                "%s %s" % (currentDir, res['Value']['Failed'][currentDir]))
        else:
            dirContents = res['Value']['Successful'][currentDir]
            subdirs = dirContents['SubDirs']
            files = dirContents['Files']
            if not subdirs and not files:
                emptyDirs.append(currentDir)
                gLogger.notice('%s: empty directory' % currentDir)
            else:
                for subdir in sorted(subdirs, reverse=True):
                    if (not withMetadata) or isOlderThan(
                            subdirs[subdir]['CreationDate'], totalDays):
                        activeDirs.append(subdir)
                for filename in sorted(files):
                    fileOK = False
                    if (not withMetadata) or isOlderThan(
                            files[filename]['MetaData']['CreationDate'],
                            totalDays):
                        if wildcard is None or fnmatch.fnmatch(
                                filename, wildcard):
                            fileOK = True
                    if not fileOK:
                        files.pop(filename)
                allFiles += sorted(files)

                if len(files) or len(subdirs):
                    gLogger.notice(
                        "%s: %d files%s, %d sub-directories" %
                        (currentDir, len(files), ' matching'
                         if withMetadata or wildcard else '', len(subdirs)))

    outputFileName = '%s.lfns' % baseDir.replace('/%s' % vo,
                                                 '%s' % vo).replace('/', '-')
    outputFile = open(outputFileName, 'w')
    for lfn in sorted(allFiles):
        outputFile.write(lfn + '\n')
    outputFile.close()
    gLogger.notice('%d matched files have been put in %s' %
                   (len(allFiles), outputFileName))

    if emptyDirsFlag:
        outputFileName = '%s.emptydirs' % baseDir.replace(
            '/%s' % vo, '%s' % vo).replace('/', '-')
        outputFile = open(outputFileName, 'w')
        for dir in sorted(emptyDirs):
            outputFile.write(dir + '\n')
        outputFile.close()
        gLogger.notice('%d empty directories have been put in %s' %
                       (len(emptyDirs), outputFileName))

    DIRAC.exit(0)
Esempio n. 8
0
if not baseDir:
  if not vo:
    gLogger.error( 'Could not determine VO' )
    Script.showHelp()
  baseDir = '/%s/user/%s/%s' % ( vo, username[0], username )

baseDir = baseDir.rstrip( '/' )

gLogger.info( 'Will search for files in %s' % baseDir )
activeDirs = [baseDir]

allFiles = []
emptyDirs = []
while len( activeDirs ) > 0:
  currentDir = activeDirs.pop()
  res = fc.listDirectory( currentDir, withMetadata, timeout = 360 )
  if not res['OK']:
    gLogger.error( "Error retrieving directory contents", "%s %s" % ( currentDir, res['Message'] ) )
  elif currentDir in res['Value']['Failed']:
    gLogger.error( "Error retrieving directory contents", "%s %s" % ( currentDir, res['Value']['Failed'][currentDir] ) )
  else:
    dirContents = res['Value']['Successful'][currentDir]
    subdirs = dirContents['SubDirs']
    files = dirContents['Files']
    if not subdirs and not files:
      emptyDirs.append( currentDir )
      gLogger.notice( '%s: empty directory' % currentDir )
    else:
      for subdir in sorted( subdirs, reverse = True ):
        if ( not withMetadata ) or isOlderThan( subdirs[subdir]['CreationDate'], totalDays ):
          activeDirs.append( subdir )
elif mct in ['pn', 'ps']:
    mcname = 'proton'
else:
    gLogger.error('Uknown config extension: ', mct)
    DIRAC.exit(2)

gLogger.notice('Working with prodName ', prodName)

from DIRAC.Resources.Catalog.FileCatalog import FileCatalog
cat = FileCatalog()

BASE_PROD_DIR = '/vo.cta.in2p3.fr/MC/PROD2/'

topMCDir = os.path.join(BASE_PROD_DIR, prodName, 'prod-2_13112014_corsika',
                        mcname)
res = cat.listDirectory(topMCDir)
NB_FILES_DIR = {}
TOTAL_SIZE_DIR = {}
gLogger.notice('Looking for Data files...')
subdirs = res['Value']['Successful'].values()[0]['SubDirs'].keys()
for adir in subdirs:
    tag = adir.split('/')[-1].split('_')[-1]
    print tag,
    NB_FILES_DIR[tag] = 0
    TOTAL_SIZE_DIR[tag] = 0
    subres = cat.listDirectory(os.path.join(adir, 'Data'))
    for xxx in subres['Value']['Successful'].values()[0]['SubDirs'].keys():
        sizeDir = cat.getDirectorySize(xxx)['Value']['Successful']
        for key, val in sizeDir.items():
            print val['Files'],
            NB_FILES_DIR[tag] += val['Files']
Esempio n. 10
0
class DIRACBackend(GridBackend):
    """Grid backend using the GFAL command line tools `gfal-*`."""

    def __init__(self, **kwargs):
        GridBackend.__init__(self, catalogue_prefix='', **kwargs)

        from DIRAC.Core.Base import Script
        Script.initialize()
        from DIRAC.FrameworkSystem.Client.ProxyManagerClient import ProxyManagerClient
        self.pm = ProxyManagerClient()

        proxy = self.pm.getUserProxiesInfo()
        if not proxy['OK']:
            raise BackendException("Proxy error.")

        from DIRAC.Interfaces.API.Dirac import Dirac
        self.dirac = Dirac()

        from DIRAC.Resources.Catalog.FileCatalog import FileCatalog
        self.fc = FileCatalog()
        from DIRAC.DataManagementSystem.Client.DataManager import DataManager
        self.dm = DataManager()

        self._xattr_cmd = sh.Command('gfal-xattr').bake(_tty_out=False)
        self._replica_checksum_cmd = sh.Command('gfal-sum').bake(_tty_out=False)
        self._bringonline_cmd = sh.Command('gfal-legacy-bringonline').bake(_tty_out=False)
        self._cp_cmd = sh.Command('gfal-copy').bake(_tty_out=False)
        self._ls_se_cmd = sh.Command('gfal-ls').bake(color='never', _tty_out=False)
        self._move_cmd = sh.Command('gfal-rename').bake(_tty_out=False)
        self._mkdir_cmd = sh.Command('gfal-mkdir').bake(_tty_out=False)

        self._replicate_cmd = sh.Command('dirac-dms-replicate-lfn').bake(_tty_out=False)
        self._add_cmd = sh.Command('dirac-dms-add-file').bake(_tty_out=False)

    @staticmethod
    def _check_return_value(ret):
        if not ret['OK']:
            raise BackendException("Failed: %s", ret['Message'])
        for path, error in ret['Value']['Failed'].items():
            if ('No such' in error) or ('Directory does not' in error):
                raise DoesNotExistException("No such file or directory.")
            else:
                raise BackendException(error)

    def _is_dir(self, lurl):
        isdir = self.fc.isDirectory(lurl)
        self._check_return_value(isdir)
        return isdir['Value']['Successful'][lurl]

    def _is_file(self, lurl):
        isfile = self.fc.isFile(lurl)
        self._check_return_value(isfile)
        return isfile['Value']['Successful'][lurl]

    def _get_dir_entry(self, lurl, infodict=None):
        """Take a lurl and return a DirEntry."""
        # If no dctionary with the information is specified, get it from the catalogue
        try:
            md = infodict['MetaData']
        except TypeError:
            md = self.fc.getFileMetadata(lurl)
            if not md['OK']:
                raise BackendException("Failed to list path '%s': %s", lurl, md['Message'])
            for path, error in md['Value']['Failed'].items():
                if 'No such file' in error:
                    # File does not exist, maybe a directory?
                    md = self.fc.getDirectoryMetadata(lurl)
                    for path, error in md['Value']['Failed'].items():
                        raise DoesNotExistException("No such file or directory.")
                else:
                    raise BackendException(md['Value']['Failed'][lurl])
            md = md['Value']['Successful'][lurl]
        return DirEntry(posixpath.basename(lurl), mode=oct(md.get('Mode', -1)), links=md.get('links', -1), gid=md['OwnerGroup'], uid=md['Owner'], size=md.get('Size', -1), modified=str(md.get('ModificationDate', '?')))

    def _iter_directory(self, lurl):
        """Iterate over entries in a directory."""

        ret = self.fc.listDirectory(lurl)
        if not ret['OK']:
            raise BackendException("Failed to list path '%s': %s", lurl, ret['Message'])
        for path, error in ret['Value']['Failed'].items():
            if 'Directory does not' in error:
                # Dir does not exist, maybe a File?
                if self.fc.isFile(lurl):
                    lst = [(lurl, None)]
                    break
                else:
                    raise DoesNotExistException("No such file or Directory.")
            else:
                raise BackendException(ret['Value']['Failed'][lurl])
        else:
            # Sort items by keys, i.e. paths
            lst = sorted(ret['Value']['Successful'][lurl]['Files'].items() + ret['Value']['Successful'][lurl]['SubDirs'].items())

        for item in lst:
            yield item # = path, dict

    def _ls(self, lurl, **kwargs):
        # Translate keyword arguments
        d = kwargs.pop('directory', False)

        if d:
            # Just the requested entry itself
            yield self._get_dir_entry(lurl)
            return

        for path, info in self._iter_directory(lurl):
            yield self._get_dir_entry(path, info)

    def _ls_se(self, surl, **kwargs):
        # Translate keyword arguments
        d = kwargs.pop('directory', False)
        args = []
        if -d:
            args.append('-d')
        args.append('-l')
        args.append(surl)
        try:
            output = self._ls_se_cmd(*args, **kwargs)
        except sh.ErrorReturnCode as e:
            if 'No such file' in e.stderr:
                raise DoesNotExistException("No such file or Directory.")
            else:
                raise BackendException(e.stderr)
        for line in output:
            fields = line.split()
            mode, links, gid, uid, size = fields[:5]
            name = fields[-1]
            modified = ' '.join(fields[5:-1])
            yield DirEntry(name, mode=mode, links=int(links), gid=gid, uid=uid, size=int(size), modified=modified)

    def _replicas(self, lurl, **kwargs):
        # Check the lurl actually exists
        self._ls(lurl, directory=True)

        rep = self.dirac.getReplicas(lurl)
        self._check_return_value(rep)
        rep = rep['Value']['Successful'][lurl]

        return rep.values()

    def _exists(self, surl, **kwargs):
        try:
            ret = self._ls_se_cmd(surl, '-d', '-l', **kwargs).strip()
        except sh.ErrorReturnCode as e:
            if 'No such file' in e.stderr:
                return False
            else:
                if len(e.stderr) == 0:
                    raise BackendException(e.stdout)
                else:
                    raise BackendException(e.stderr)
        else:
            return ret[0] != 'd' # Return `False` for directories

    def _register(self, surl, lurl, verbose=False, **kwargs):
        # Register an existing physical copy in the file catalogue
        se = storage.get_SE(surl).name
        # See if file already exists in DFC
        ret = self.fc.getFileMetadata(lurl)
        try:
            self._check_return_value(ret)
        except DoesNotExistException:
            # Add new file
            size = next(self._ls_se(surl, directory=True)).size
            checksum = self.checksum(surl)
            guid = str(uuid.uuid4()) # The guid does not seem to be important. Make it unique if possible.
            ret = self.dm.registerFile((lurl, surl, size, se, guid, checksum))
        else:
            # Add new replica
            ret = self.dm.registerReplica((lurl, surl, se))

        self._check_return_value(ret)
        if verbose:
            print_("Successfully registered replica %s of %s from %s."%(surl, lurl, se))
        return True

    def _deregister(self, surl, lurl, verbose=False, **kwargs):
        # DIRAC only needs to know the SE name to deregister a replica
        se = storage.get_SE(surl).name
        ret = self.dm.removeReplicaFromCatalog(se, [lurl])
        self._check_return_value(ret)
        if verbose:
            print_("Successfully deregistered replica of %s from %s."%(lurl, se))
        return True

    def _state(self, surl, **kwargs):
        try:
            state = self._xattr_cmd(surl, 'user.status', **kwargs).strip()
        except sh.ErrorReturnCode as e:
            if "No such file" in e.stderr:
                raise DoesNotExistException("No such file or Directory.")
            state = '?'
        except sh.SignalException_SIGSEGV:
            state = '?'
        return state

    def _checksum(self, surl, **kwargs):
        try:
            checksum = self._replica_checksum_cmd(surl, 'ADLER32', **kwargs).split()[1]
        except sh.ErrorReturnCode:
            checksum = '?'
        except sh.SignalException_SIGSEGV:
            checksum = '?'
        except IndexError:
            checksum = '?'
        return checksum

    def _bringonline(self, surl, timeout, verbose=False, **kwargs):
        if verbose:
            out = sys.stdout
        else:
            out = None
        # gfal does not notice when files come online, it seems
        # Just send a single short request, then check regularly

        if verbose:
            out = sys.stdout
        else:
            out = None

        end = time.time() + timeout

        try:
            self._bringonline_cmd('-t', 10, surl, _out=out, **kwargs)
        except sh.ErrorReturnCode as e:
            # The command fails if the file is not online
            # To be expected after 10 seconds
            if "No such file" in e.stderr:
                # Except when the file does not actually exist on the tape storage
                raise DoesNotExistException("No such file or Directory.")

        wait = 5
        while(True):
            if verbose:
                print_("Checking replica state...")
            if self.is_online(surl):
                if verbose:
                    print_("Replica brought online.")
                return True

            time_left = end - time.time()
            if time_left <= 0:
                if verbose:
                    print_("Could not bring replica online.")
                return False

            wait *= 2
            if time_left < wait:
                wait = time_left

            if verbose:
                print_("Timeout remaining: %d s"%(time_left))
                print_("Checking again in: %d s"%(wait))
            time.sleep(wait)

    def _replicate(self, source_surl, destination_surl, lurl, verbose=False, **kwargs):
        if verbose:
            out = sys.stdout
        else:
            out = None

        source = storage.get_SE(source_surl).name
        destination = storage.get_SE(destination_surl).name
        try:
            self._replicate_cmd(lurl, destination, source, _out=out, **kwargs)
        except sh.ErrorReturnCode as e:
            if 'No such file' in e.stderr:
                raise DoesNotExistException("No such file or directory.")
            else:
                if len(e.stderr) == 0:
                    raise BackendException(e.stdout)
                else:
                    raise BackendException(e.stderr)

        return True

    def _get(self, surl, localpath, verbose=False, **kwargs):
        if verbose:
            out = sys.stdout
        else:
            out = None
        try:
            self._cp_cmd('-f', '--checksum', 'ADLER32', surl, localpath, _out=out, **kwargs)
        except sh.ErrorReturnCode as e:
            if 'No such file' in e.stderr:
                raise DoesNotExistException("No such file or directory.")
            else:
                if len(e.stderr) == 0:
                    raise BackendException(e.stdout)
                else:
                    raise BackendException(e.stderr)
        return os.path.isfile(localpath)

    def _put(self, localpath, surl, lurl, verbose=False, **kwargs):
        if verbose:
            out = sys.stdout
        else:
            out = None
        se = storage.get_SE(surl).name

        try:
            self._add_cmd(lurl, localpath, se, _out=out, **kwargs)
        except sh.ErrorReturnCode as e:
            if 'No such file' in e.stderr:
                raise DoesNotExistException("No such file or directory.")
            else:
                if len(e.stderr) == 0:
                    raise BackendException(e.stdout)
                else:
                    raise BackendException(e.stderr)
        return True

    def _remove(self, surl, lurl, last=False, verbose=False, **kwargs):
        se = storage.get_SE(surl).name

        if last:
            # Delete lfn
            if verbose:
                print_("Removing all replicas of %s."%(lurl,))
            ret = self.dm.removeFile([lurl])
        else:
            if verbose:
                print_("Removing replica of %s from %s."%(lurl, se))
            ret = self.dm.removeReplica(se, [lurl])

        if not ret['OK']:
            raise BackendException('Failed: %s'%(ret['Message']))

        for lurl, error in ret['Value']['Failed'].items():
            if 'No such file' in error:
                raise DoesNotExistException("No such file or directory.")
            else:
                raise BackendException(error)

        return True

    def _rmdir(self, lurl, verbose=False):
        """Remove the an empty directory from the catalogue."""
        rep = self.fc.removeDirectory(lurl)
        self._check_return_value(rep)
        return True

    def _move_replica(self, surl, new_surl, verbose=False, **kwargs):
        if verbose:
            out = sys.stdout
        else:
            out = None

        try:
            folder = posixpath.dirname(new_surl)
            self._mkdir_cmd(folder, '-p', _out=out, **kwargs)
            self._move_cmd(surl, new_surl, _out=out, **kwargs)
        except sh.ErrorReturnCode as e:
            if 'No such file' in e.stderr:
                raise DoesNotExistException("No such file or directory.")
            else:
                if len(e.stderr) == 0:
                    raise BackendException(e.stdout)
                else:
                    raise BackendException(e.stderr)
        return True
Esempio n. 11
0
class StorageUsageAgent(AgentModule):
    ''' .. class:: StorageUsageAgent


  :param FileCatalog catalog: FileCatalog instance
  :parma mixed storageUsage: StorageUsageDB instance or its rpc client
  :param int pollingTime: polling time
  :param int activePeriod: active period on weeks
  :param threading.Lock dataLock: data lock
  :param threading.Lock replicaListLock: replica list lock
  :param DictCache proxyCache: creds cache
  '''
    catalog = None
    storageUsage = None
    pollingTime = 43200
    activePeriod = 0
    dataLock = None  # threading.Lock()
    replicaListLock = None  # threading.Lock()
    proxyCache = None  # DictCache()
    enableStartupSleep = True  # Enable a random sleep so not all the user agents start together

    def __init__(self, *args, **kwargs):
        ''' c'tor
    '''
        AgentModule.__init__(self, *args, **kwargs)

        self.__baseDir = '/lhcb'
        self.__baseDirLabel = "_".join(List.fromChar(self.__baseDir, "/"))
        self.__ignoreDirsList = []
        self.__keepDirLevels = 4

        self.__startExecutionTime = long(time.time())
        self.__dirExplorer = DirectoryExplorer(reverse=True)
        self.__processedDirs = 0
        self.__directoryOwners = {}
        self.catalog = FileCatalog()
        self.__maxToPublish = self.am_getOption('MaxDirectories', 5000)
        if self.am_getOption('DirectDB', False):
            self.storageUsage = StorageUsageDB()
        else:
            # Set a timeout of 0.1 seconds per directory (factor 5 margin)
            self.storageUsage = RPCClient('DataManagement/StorageUsage',
                                          timeout=self.am_getOption(
                                              'Timeout',
                                              int(self.__maxToPublish * 0.1)))
        self.activePeriod = self.am_getOption('ActivePeriod',
                                              self.activePeriod)
        self.dataLock = threading.Lock()
        self.replicaListLock = threading.Lock()
        self.proxyCache = DictCache(removeProxy)
        self.__noProxy = set()
        self.__catalogType = None
        self.__recalculateUsage = Operations().getValue(
            'DataManagement/RecalculateDirSize', False)
        self.enableStartupSleep = self.am_getOption('EnableStartupSleep',
                                                    self.enableStartupSleep)
        self.__publishDirQueue = {}
        self.__dirsToPublish = {}
        self.__replicaFilesUsed = set()
        self.__replicaListFilesDir = ""

    def initialize(self):
        ''' agent initialisation '''

        self.am_setOption("PollingTime", self.pollingTime)

        if self.enableStartupSleep:
            rndSleep = random.randint(1, self.pollingTime)
            self.log.info("Sleeping for %s seconds" % rndSleep)
            time.sleep(rndSleep)

        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configsorteduration can be used to change this default.
        self.am_setOption('shifterProxy', 'DataManager')

        return S_OK()

    def __writeReplicasListFiles(self, dirPathList):
        ''' dump replicas list to files '''
        self.replicaListLock.acquire()
        try:
            self.log.info("Dumping replicas for %s dirs" % len(dirPathList))
            result = self.catalog.getDirectoryReplicas(dirPathList)
            if not result['OK']:
                self.log.error("Could not get directory replicas",
                               "%s -> %s" % (dirPathList, result['Message']))
                return result
            resData = result['Value']
            filesOpened = {}
            for dirPath in dirPathList:
                if dirPath in result['Value']['Failed']:
                    self.log.error(
                        "Could not get directory replicas",
                        "%s -> %s" % (dirPath, resData['Failed'][dirPath]))
                    continue
                dirData = resData['Successful'][dirPath]
                for lfn in dirData:
                    for seName in dirData[lfn]:
                        if seName not in filesOpened:
                            filePath = os.path.join(
                                self.__replicaListFilesDir,
                                "replicas.%s.%s.filling" %
                                (seName, self.__baseDirLabel))
                            # Check if file is opened and if not open it
                            if seName not in filesOpened:
                                if seName not in self.__replicaFilesUsed:
                                    self.__replicaFilesUsed.add(seName)
                                    filesOpened[seName] = file(filePath, "w")
                                else:
                                    filesOpened[seName] = file(filePath, "a")
                        # seName file is opened. Write
                        filesOpened[seName].write("%s -> %s\n" %
                                                  (lfn, dirData[lfn][seName]))
            # Close the files
            for seName in filesOpened:
                filesOpened[seName].close()
            return S_OK()
        finally:
            self.replicaListLock.release()

    def __resetReplicaListFiles(self):
        ''' prepare directories for replica list files '''
        self.__replicaFilesUsed = set()
        self.__replicaListFilesDir = os.path.join(
            self.am_getOption("WorkDirectory"), "replicaLists")
        mkDir(self.__replicaListFilesDir)
        self.log.info("Replica Lists directory is %s" %
                      self.__replicaListFilesDir)

    def __replicaListFilesDone(self):
        ''' rotate replicas list files '''
        self.replicaListLock.acquire()
        try:
            old = re.compile(r"^replicas\.([a-zA-Z0-9\-_]*)\.%s\.old$" %
                             self.__baseDirLabel)
            current = re.compile(r"^replicas\.([a-zA-Z0-9\-_]*)\.%s$" %
                                 self.__baseDirLabel)
            filling = re.compile(
                r"^replicas\.([a-zA-Z0-9\-_]*)\.%s\.filling$" %
                self.__baseDirLabel)
            # Delete old
            for fileName in os.listdir(self.__replicaListFilesDir):
                match = old.match(fileName)
                if match:
                    os.unlink(
                        os.path.join(self.__replicaListFilesDir, fileName))
            # Current -> old
            for fileName in os.listdir(self.__replicaListFilesDir):
                match = current.match(fileName)
                if match:
                    newFileName = "replicas.%s.%s.old" % (match.group(1),
                                                          self.__baseDirLabel)
                    self.log.info(
                        "Moving \n %s\n to \n %s" %
                        (os.path.join(self.__replicaListFilesDir, fileName),
                         os.path.join(self.__replicaListFilesDir,
                                      newFileName)))
                    os.rename(
                        os.path.join(self.__replicaListFilesDir, fileName),
                        os.path.join(self.__replicaListFilesDir, newFileName))
            # filling to current
            for fileName in os.listdir(self.__replicaListFilesDir):
                match = filling.match(fileName)
                if match:
                    newFileName = "replicas.%s.%s" % (match.group(1),
                                                      self.__baseDirLabel)
                    self.log.info(
                        "Moving \n %s\n to \n %s" %
                        (os.path.join(self.__replicaListFilesDir, fileName),
                         os.path.join(self.__replicaListFilesDir,
                                      newFileName)))
                    os.rename(
                        os.path.join(self.__replicaListFilesDir, fileName),
                        os.path.join(self.__replicaListFilesDir, newFileName))

            return S_OK()
        finally:
            self.replicaListLock.release()

    def __printSummary(self):
        ''' pretty print summary '''
        res = self.storageUsage.getStorageSummary()
        if res['OK']:
            self.log.notice("Storage Usage Summary")
            self.log.notice(
                "============================================================")
            self.log.notice(
                "%-40s %20s %20s" %
                ('Storage Element', 'Number of files', 'Total size'))

            for se in sorted(res['Value']):
                site = se.split('_')[0].split('-')[0]
                gMonitor.registerActivity("%s-used" % se,
                                          "%s usage" % se,
                                          "StorageUsage/%s usage" % site,
                                          "",
                                          gMonitor.OP_MEAN,
                                          bucketLength=600)
                gMonitor.registerActivity("%s-files" % se,
                                          "%s files" % se,
                                          "StorageUsage/%s files" % site,
                                          "Files",
                                          gMonitor.OP_MEAN,
                                          bucketLength=600)

            time.sleep(2)

            for se in sorted(res['Value']):
                usage = res['Value'][se]['Size']
                files = res['Value'][se]['Files']
                self.log.notice("%-40s %20s %20s" %
                                (se, str(files), str(usage)))
                gMonitor.addMark("%s-used" % se, usage)
                gMonitor.addMark("%s-files" % se, files)

    def execute(self):
        ''' execution in one cycle '''
        self.__publishDirQueue = {}
        self.__dirsToPublish = {}
        self.__baseDir = self.am_getOption('BaseDirectory', '/lhcb')
        self.__baseDirLabel = "_".join(List.fromChar(self.__baseDir, "/"))
        self.__ignoreDirsList = self.am_getOption('Ignore', [])
        self.__keepDirLevels = self.am_getOption("KeepDirLevels", 4)

        self.__startExecutionTime = long(time.time())
        self.__dirExplorer = DirectoryExplorer(reverse=True)
        self.__resetReplicaListFiles()
        self.__noProxy = set()
        self.__processedDirs = 0
        self.__directoryOwners = {}

        self.__printSummary()

        self.__dirExplorer.addDir(self.__baseDir)
        self.log.notice("Initiating with %s as base directory." %
                        self.__baseDir)
        # Loop over all the directories and sub-directories
        totalIterTime = 0.0
        numIterations = 0.0
        iterMaxDirs = 100
        while self.__dirExplorer.isActive():
            startT = time.time()
            d2E = [
                self.__dirExplorer.getNextDir() for _i in xrange(iterMaxDirs)
                if self.__dirExplorer.isActive()
            ]
            self.__exploreDirList(d2E)
            iterTime = time.time() - startT
            totalIterTime += iterTime
            numIterations += len(d2E)
            self.log.verbose("Query took %.2f seconds for %s dirs" %
                             (iterTime, len(d2E)))
        self.log.verbose("Average query time: %2.f secs/dir" %
                         (totalIterTime / numIterations))

        # Publish remaining directories
        self.__publishData(background=False)

        # Move replica list files
        self.__replicaListFilesDone()

        # Clean records older than 1 day
        self.log.info("Finished recursive directory search.")

        if self.am_getOption("PurgeOutdatedRecords", True):
            elapsedTime = time.time() - self.__startExecutionTime
            outdatedSeconds = max(
                max(self.am_getOption("PollingTime"), elapsedTime) * 2, 86400)
            result = self.storageUsage.purgeOutdatedEntries(
                self.__baseDir, long(outdatedSeconds), self.__ignoreDirsList)
            if not result['OK']:
                return result
            self.log.notice("Purged %s outdated records" % result['Value'])
        return S_OK()

    def __exploreDirList(self, dirList):
        ''' collect directory size for directory in :dirList: '''
        # Normalise dirList first
        dirList = [os.path.realpath(d) for d in dirList]
        self.log.notice("Retrieving info for %s dirs" % len(dirList))
        # For top directories, no files anyway, hence no need to get full size
        dirContents = {}
        failed = {}
        successfull = {}
        startTime = time.time()
        nbDirs = len(dirList)
        chunkSize = 10
        if self.__catalogType == 'DFC' or dirList == [self.__baseDir]:
            # Get the content of the directory as anyway this is needed
            for dirChunk in breakListIntoChunks(dirList, chunkSize):
                res = self.catalog.listDirectory(dirChunk, True, timeout=600)
                if not res['OK']:
                    failed.update(dict.fromkeys(dirChunk, res['Message']))
                else:
                    failed.update(res['Value']['Failed'])
                    dirContents.update(res['Value']['Successful'])
            self.log.info(
                'Time to retrieve content of %d directories: %.1f seconds' %
                (nbDirs, time.time() - startTime))
            for dirPath in failed:
                dirList.remove(dirPath)
        # We don't need to get the storage usage if there are no files...
        dirListSize = [
            d for d in dirList if dirContents.get(d, {}).get('Files')
        ]

        startTime1 = time.time()
        # __recalculateUsage enables to recompute the directory usage in case the internal table is wrong
        for args in [(d, True, self.__recalculateUsage)
                     for d in breakListIntoChunks(dirListSize, chunkSize)]:
            res = self.catalog.getDirectorySize(*args, timeout=600)
            if not res['OK']:
                failed.update(dict.fromkeys(args[0], res['Message']))
            else:
                failed.update(res['Value']['Failed'])
                successfull.update(res['Value']['Successful'])
        errorReason = {}
        for dirPath in failed:
            error = str(failed[dirPath])
            errorReason.setdefault(error, []).append(dirPath)
        for error in errorReason:
            self.log.error(
                'Failed to get directory info',
                '- %s for:\n\t%s' % (error, '\n\t'.join(errorReason[error])))
        self.log.info('Time to retrieve size of %d directories: %.1f seconds' %
                      (len(dirListSize), time.time() - startTime1))
        for dirPath in [d for d in dirList if d not in failed]:
            metadata = successfull.get(dirPath, {})
            if 'SubDirs' in metadata:
                self.__processDir(dirPath, metadata)
            else:
                if not self.__catalogType:
                    self.log.info('Catalog type determined to be DFC')
                    self.__catalogType = 'DFC'
                self.__processDirDFC(dirPath, metadata, dirContents[dirPath])
        self.log.info('Time to process %d directories: %.1f seconds' %
                      (nbDirs, time.time() - startTime))
        notCommited = len(self.__publishDirQueue) + len(self.__dirsToPublish)
        self.log.notice(
            "%d dirs to be explored, %d done. %d not yet committed." %
            (self.__dirExplorer.getNumRemainingDirs(), self.__processedDirs,
             notCommited))

    def __processDirDFC(self, dirPath, metadata, subDirectories):
        ''' gets the list of subdirs that the DFC doesn't return, set the metadata like the FC
    and then call the same method as for the FC '''
        if 'SubDirs' not in subDirectories:
            self.log.error('No subdirectory item for directory', dirPath)
            return
        dirMetadata = {
            'Files': 0,
            'TotalSize': 0,
            'ClosedDirs': [],
            'SiteUsage': {}
        }
        if 'PhysicalSize' in metadata:
            dirMetadata['Files'] = metadata['LogicalFiles']
            dirMetadata['TotalSize'] = metadata['LogicalSize']
            dirMetadata['SiteUsage'] = metadata['PhysicalSize'].copy()
            dirMetadata['SiteUsage'].pop('TotalFiles', None)
            dirMetadata['SiteUsage'].pop('TotalSize', None)
        subDirs = subDirectories['SubDirs'].copy()
        dirMetadata['SubDirs'] = subDirs
        dirUsage = dirMetadata['SiteUsage']
        errorReason = {}
        for subDir in subDirs:
            self.__directoryOwners.setdefault(
                subDir,
                (subDirs[subDir]['Owner'], subDirs[subDir]['OwnerGroup']))
            subDirs[subDir] = subDirs[subDir].get('CreationTime', dateTime())
            if dirUsage:
                # This part here is for removing the recursivity introduced by the DFC
                args = [subDir]
                if len(subDir.split('/')) > self.__keepDirLevels:
                    args += [True, self.__recalculateUsage]
                result = self.catalog.getDirectorySize(*args)
                if not result['OK']:
                    errorReason.setdefault(str(result['Message']),
                                           []).append(subDir)
                else:
                    metadata = result['Value']['Successful'].get(subDir)
                    if metadata:
                        dirMetadata['Files'] -= metadata['LogicalFiles']
                        dirMetadata['TotalSize'] -= metadata['LogicalSize']
                    else:
                        errorReason.setdefault(
                            str(result['Value']['Failed'][subDir],
                                [])).append(subDir)
                if 'PhysicalSize' in metadata and dirUsage:
                    seUsage = metadata['PhysicalSize']
                    seUsage.pop('TotalFiles', None)
                    seUsage.pop('TotalSize', None)
                    for se in seUsage:
                        if se not in dirUsage:
                            self.log.error('SE used in subdir but not in dir',
                                           se)
                        else:
                            dirUsage[se]['Files'] -= seUsage[se]['Files']
                            dirUsage[se]['Size'] -= seUsage[se]['Size']
        for error in errorReason:
            self.log.error(
                'Failed to get directory info',
                '- %s for:\n\t%s' % (error, '\n\t'.join(errorReason[error])))
        for se, usage in dirUsage.items():
            # Both info should be 0 or #0
            if not usage['Files'] and not usage['Size']:
                dirUsage.pop(se)
            elif not usage['Files'] * usage['Size']:
                self.log.error('Directory inconsistent',
                               '%s @ %s: %s' % (dirPath, se, str(usage)))
        return self.__processDir(dirPath, dirMetadata)

    def __processDir(self, dirPath, dirMetadata):
        ''' calculate nb of files and size of :dirPath:, remove it if it's empty '''
        subDirs = dirMetadata['SubDirs']
        closedDirs = dirMetadata['ClosedDirs']
        ##############################
        # FIXME: Until we understand while closed dirs are not working...
        ##############################
        closedDirs = []
        prStr = "%s: found %s sub-directories" % (dirPath, len(subDirs)
                                                  if subDirs else 'no')
        if closedDirs:
            prStr += ", %s are closed (ignored)" % len(closedDirs)
        for rmDir in closedDirs + self.__ignoreDirsList:
            subDirs.pop(rmDir, None)
        numberOfFiles = long(dirMetadata['Files'])
        totalSize = long(dirMetadata['TotalSize'])
        if numberOfFiles:
            prStr += " and %s files (%s bytes)" % (numberOfFiles, totalSize)
        else:
            prStr += " and no files"
        self.log.notice(prStr)
        if closedDirs:
            self.log.verbose("Closed dirs:\n %s" % '\n'.join(closedDirs))
        siteUsage = dirMetadata['SiteUsage']
        if numberOfFiles > 0:
            dirData = {
                'Files': numberOfFiles,
                'TotalSize': totalSize,
                'SEUsage': siteUsage
            }
            self.__addDirToPublishQueue(dirPath, dirData)
            # Print statistics
            self.log.verbose(
                "%-40s %20s %20s" %
                ('Storage Element', 'Number of files', 'Total size'))
            for storageElement in sorted(siteUsage):
                usageDict = siteUsage[storageElement]
                self.log.verbose(
                    "%-40s %20s %20s" % (storageElement, str(
                        usageDict['Files']), str(usageDict['Size'])))
        # If it's empty delete it
        elif len(subDirs) == 0 and len(closedDirs) == 0:
            if dirPath != self.__baseDir:
                self.removeEmptyDir(dirPath)
                return
        # We don't need the cached information about owner
        self.__directoryOwners.pop(dirPath, None)
        rightNow = dateTime()
        chosenDirs = [
            subDir
            for subDir in subDirs if not self.activePeriod or timeInterval(
                subDirs[subDir], self.activePeriod * week).includes(rightNow)
        ]

        self.__dirExplorer.addDirList(chosenDirs)
        self.__processedDirs += 1

    def __getOwnerProxy(self, dirPath):
        ''' get owner creds for :dirPath: '''
        self.log.verbose("Retrieving dir metadata...")
        # get owner form the cached information, if not, try getDirectoryMetadata
        ownerName, ownerGroup = self.__directoryOwners.pop(
            dirPath, (None, None))
        if not ownerName or not ownerGroup:
            result = returnSingleResult(
                self.catalog.getDirectoryMetadata(dirPath))
            if not result['OK'] or 'OwnerRole' not in result['Value']:
                self.log.error("Could not get metadata info",
                               result['Message'])
                return result
            ownerRole = result['Value']['OwnerRole']
            ownerDN = result['Value']['OwnerDN']
            if ownerRole[0] != "/":
                ownerRole = "/%s" % ownerRole
            cacheKey = (ownerDN, ownerRole)
            ownerName = 'unknown'
            byGroup = False
        else:
            ownerDN = Registry.getDNForUsername(ownerName)
            if not ownerDN['OK']:
                self.log.error("Could not get DN from user name",
                               ownerDN['Message'])
                return ownerDN
            ownerDN = ownerDN['Value'][0]
            # This bloody method returns directly a string!!!!
            ownerRole = Registry.getVOMSAttributeForGroup(ownerGroup)
            byGroup = True
            # Get all groups for that VOMS Role, and add lhcb_user as in DFC this is a safe value
        ownerGroups = Registry.getGroupsWithVOMSAttribute(ownerRole) + [
            'lhcb_user'
        ]

        downErrors = []
        for ownerGroup in ownerGroups:
            if byGroup:
                ownerRole = None
                cacheKey = (ownerDN, ownerGroup)
            if cacheKey in self.__noProxy:
                return S_ERROR("Proxy not available")
                # Getting the proxy...
            upFile = self.proxyCache.get(cacheKey, 3600)
            if upFile and os.path.exists(upFile):
                self.log.verbose(
                    'Returning cached proxy for %s %s@%s [%s] in %s' %
                    (ownerName, ownerDN, ownerGroup, ownerRole, upFile))
                return S_OK(upFile)
            if ownerRole:
                result = gProxyManager.downloadVOMSProxy(
                    ownerDN,
                    ownerGroup,
                    limited=False,
                    requiredVOMSAttribute=ownerRole)
            else:
                result = gProxyManager.downloadProxy(ownerDN,
                                                     ownerGroup,
                                                     limited=False)
            if not result['OK']:
                downErrors.append("%s : %s" % (cacheKey, result['Message']))
                continue
            userProxy = result['Value']
            secsLeft = max(0, userProxy.getRemainingSecs()['Value'])
            upFile = userProxy.dumpAllToFile()
            if upFile['OK']:
                upFile = upFile['Value']
            else:
                return upFile
            self.proxyCache.add(cacheKey, secsLeft, upFile)
            self.log.info("Got proxy for %s %s@%s [%s]" %
                          (ownerName, ownerDN, ownerGroup, ownerRole))
            return S_OK(upFile)
        self.__noProxy.add(cacheKey)
        return S_ERROR("Could not download proxy for user (%s, %s):\n%s " %
                       (ownerDN, ownerRole, "\n ".join(downErrors)))

    def removeEmptyDir(self, dirPath):
        self.log.notice("Deleting empty directory %s" % dirPath)
        for useOwnerProxy in (False, True):
            result = self.__removeEmptyDir(dirPath,
                                           useOwnerProxy=useOwnerProxy)
            if result['OK']:
                self.log.info(
                    "Successfully removed empty directory from File Catalog and StorageUsageDB"
                )
                break
        return result

    def __removeEmptyDir(self, dirPath, useOwnerProxy=True):
        ''' unlink empty folder :dirPath: '''
        from DIRAC.ConfigurationSystem.Client.ConfigurationData import gConfigurationData
        if len(List.fromChar(dirPath, "/")) < self.__keepDirLevels:
            return S_OK()

        if useOwnerProxy:
            result = self.__getOwnerProxy(dirPath)
            if not result['OK']:
                if 'Proxy not available' not in result['Message']:
                    self.log.error(result['Message'])
                return result

            upFile = result['Value']
            prevProxyEnv = os.environ['X509_USER_PROXY']
            os.environ['X509_USER_PROXY'] = upFile
        try:
            gConfigurationData.setOptionInCFG(
                '/DIRAC/Security/UseServerCertificate', 'false')
            # res = self.catalog.removeDirectory( dirPath )
            res = self.catalog.writeCatalogs[0][1].removeDirectory(dirPath)
            if not res['OK']:
                self.log.error(
                    "Error removing empty directory from File Catalog.",
                    res['Message'])
                return res
            elif dirPath in res['Value']['Failed']:
                self.log.error(
                    "Failed to remove empty directory from File Catalog.",
                    res['Value']['Failed'][dirPath])
                self.log.debug(str(res))
                return S_ERROR(res['Value']['Failed'][dirPath])
            res = self.storageUsage.removeDirectory(dirPath)
            if not res['OK']:
                self.log.error(
                    "Failed to remove empty directory from Storage Usage database.",
                    res['Message'])
                return res
            return S_OK()
        finally:
            gConfigurationData.setOptionInCFG(
                '/DIRAC/Security/UseServerCertificate', 'true')
            if useOwnerProxy:
                os.environ['X509_USER_PROXY'] = prevProxyEnv

    def __addDirToPublishQueue(self, dirName, dirData):
        ''' enqueue :dirName: and :dirData: for publishing '''
        self.__publishDirQueue[dirName] = dirData
        numDirsToPublish = len(self.__publishDirQueue)
        if numDirsToPublish and numDirsToPublish % self.am_getOption(
                "PublishClusterSize", 100) == 0:
            self.__publishData(background=True)

    def __publishData(self, background=True):
        ''' publish data in a separate deamon thread '''
        self.dataLock.acquire()
        try:
            # Dump to file
            if self.am_getOption("DumpReplicasToFile", False):
                pass
                # repThread = threading.Thread( target = self.__writeReplicasListFiles,
                #                              args = ( list( self.__publishDirQueue ), ) )
            self.__dirsToPublish.update(self.__publishDirQueue)
            self.__publishDirQueue = {}
        finally:
            self.dataLock.release()
        if background:
            pubThread = threading.Thread(target=self.__executePublishData)
            pubThread.setDaemon(1)
            pubThread.start()
        else:
            self.__executePublishData()

    def __executePublishData(self):
        ''' publication thread target '''
        self.dataLock.acquire()
        try:
            if not self.__dirsToPublish:
                self.log.info("No data to be published")
                return
            if len(self.__dirsToPublish) > self.__maxToPublish:
                toPublish = {}
                for dirName in sorted(
                        self.__dirsToPublish)[:self.__maxToPublish]:
                    toPublish[dirName] = self.__dirsToPublish.pop(dirName)
            else:
                toPublish = self.__dirsToPublish
            self.log.info("Publishing usage for %d directories" %
                          len(toPublish))
            res = self.storageUsage.publishDirectories(toPublish)
            if res['OK']:
                # All is OK, reset the dictionary, even if data member!
                toPublish.clear()
            else:
                # Put back dirs to be published, due to the error
                self.__dirsToPublish.update(toPublish)
                self.log.error("Failed to publish directories", res['Message'])
            return res
        finally:
            self.dataLock.release()
Esempio n. 12
0
if not baseDir:
    if not vo:
        gLogger.error('Could not determine VO')
        Script.showHelp()
    baseDir = '/%s/user/%s/%s' % (vo, username[0], username)

baseDir = baseDir.rstrip('/')

gLogger.info('Will search for files in %s' % baseDir)
activeDirs = [baseDir]

allFiles = []
emptyDirs = []
while len(activeDirs) > 0:
    currentDir = activeDirs.pop()
    res = fc.listDirectory(currentDir, withMetadata)
    if not res['OK']:
        gLogger.error("Error retrieving directory contents",
                      "%s %s" % (currentDir, res['Message']))
    elif currentDir in res['Value']['Failed']:
        gLogger.error(
            "Error retrieving directory contents",
            "%s %s" % (currentDir, res['Value']['Failed'][currentDir]))
    else:
        dirContents = res['Value']['Successful'][currentDir]
        subdirs = dirContents['SubDirs']
        files = dirContents['Files']
        if not subdirs and not files:
            emptyDirs.append(currentDir)
            gLogger.notice('%s: empty directory' % currentDir)
        else:
if not baseDir:
  if not vo:
    gLogger.error( 'Could not determine VO' )
    Script.showHelp()
  baseDir = '/%s/user/%s/%s' % ( vo, username[0], username )

baseDir = baseDir.rstrip( '/' )

gLogger.info( 'Will search for files in %s' % baseDir )
activeDirs = [baseDir]

allFiles = []
emptyDirs = []
while len( activeDirs ) > 0:
  currentDir = activeDirs.pop()
  res = fc.listDirectory( currentDir, withMetadata )
  if not res['OK']:
    gLogger.error( "Error retrieving directory contents", "%s %s" % ( currentDir, res['Message'] ) )
  elif currentDir in res['Value']['Failed']:
    gLogger.error( "Error retrieving directory contents", "%s %s" % ( currentDir, res['Value']['Failed'][currentDir] ) )
  else:
    dirContents = res['Value']['Successful'][currentDir]
    subdirs = dirContents['SubDirs']
    files = dirContents['Files']
    if not subdirs and not files:
      emptyDirs.append( currentDir )
      gLogger.notice( '%s: empty directory' % currentDir )
    else:
      for subdir in sorted( subdirs, reverse = True ):
        if ( not withMetadata ) or isOlderThan( subdirs[subdir]['CreationDate'], totalDays ):
          activeDirs.append( subdir )
Esempio n. 14
0
class CatalogPlugInTestCase(unittest.TestCase):
  """ Base class for the CatalogPlugin test case """

  def setUp(self):
    self.fullMetadata = ['Status', 'CheckSumType', 'OwnerRole', 'CreationDate', 'Checksum', 'ModificationDate', 'OwnerDN', 'Mode', 'GUID', 'Size']
    self.dirMetadata = self.fullMetadata + ['NumberOfSubPaths']
    self.fileMetadata = self.fullMetadata + ['NumberOfLinks']

    self.catalog = FileCatalog(catalogs=[catalogClientToTest])
    valid = self.catalog.isOK()
    self.assert_(valid)
    self.destDir = '/lhcb/test/unit-test/TestCatalogPlugin'
    self.link = "%s/link" % self.destDir

    # Clean the existing directory
    self.cleanDirectory()
    res = self.catalog.createDirectory(self.destDir)
    returnValue = self.parseResult(res,self.destDir)

    # Register some files to work with
    self.numberOfFiles = 2
    self.files = []
    for i in range(self.numberOfFiles):
      lfn = "%s/testFile_%d" % (self.destDir,i)
      res = self.registerFile(lfn)
      self.assert_(res)
      self.files.append(lfn)

  def registerFile(self,lfn):
    pfn = 'protocol://host:port/storage/path%s' % lfn
    size = 10000000
    se = 'DIRAC-storage'
    guid = makeGuid()
    adler = stringAdler(guid)
    fileDict = {}
    fileDict[lfn] = {'PFN':pfn,'Size':size,'SE':se,'GUID':guid,'Checksum':adler}
    res = self.catalog.addFile(fileDict)
    return self.parseResult(res,lfn)

  def parseResult(self,res,path):
    self.assert_(res['OK'])
    self.assert_(res['Value'])
    self.assert_(res['Value']['Successful'])
    self.assert_(res['Value']['Successful'].has_key(path))
    return res['Value']['Successful'][path]

  def parseError(self,res,path):
    self.assert_(res['OK'])
    self.assert_(res['Value'])
    self.assert_(res['Value']['Failed'])
    self.assert_(res['Value']['Failed'].has_key(path))
    return res['Value']['Failed'][path]    

  def cleanDirectory(self):
    res = self.catalog.exists(self.destDir)
    returnValue = self.parseResult(res,self.destDir)
    if not returnValue:
      return
    res = self.catalog.listDirectory(self.destDir)  
    returnValue = self.parseResult(res,self.destDir)
    toRemove = returnValue['Files'].keys()
    if toRemove:
      self.purgeFiles(toRemove)
    res = self.catalog.removeDirectory(self.destDir)
    returnValue = self.parseResult(res,self.destDir)
    self.assert_(returnValue)

  def purgeFiles(self,lfns):
    for lfn in lfns:
      res = self.catalog.getReplicas(lfn,True)
      replicas = self.parseResult(res,lfn)
      for se,pfn in replicas.items():
        repDict = {}
        repDict[lfn] = {'PFN':pfn,'SE':se}
        res = self.catalog.removeReplica(repDict)
        self.parseResult(res,lfn)   
      res = self.catalog.removeFile(lfn)
      self.parseResult(res,lfn)

  def tearDown(self):
    self.cleanDirectory()
Esempio n. 15
0
if not baseDir:
  if not vo:
    gLogger.error( 'Could not determine VO' )
    Script.showHelp()
  baseDir = '/%s/user/%s/%s' % ( vo, username[0], username )

baseDir = baseDir.rstrip('/')

gLogger.info( 'Will search for files in %s' % baseDir )
activeDirs = [baseDir]

allFiles = []
emptyDirs = []
while len( activeDirs ) > 0:
  currentDir = activeDirs[0]
  res = fc.listDirectory( currentDir, verbose )
  activeDirs.remove( currentDir )
  if not res['OK']:
    gLogger.error( "Error retrieving directory contents", "%s %s" % ( currentDir, res['Message'] ) )
  elif res['Value']['Failed'].has_key( currentDir ):
    gLogger.error( "Error retrieving directory contents", "%s %s" % ( currentDir, res['Value']['Failed'][currentDir] ) )
  else:
    dirContents = res['Value']['Successful'][currentDir]
    subdirs = dirContents['SubDirs']
    empty = True
    for subdir, metadata in subdirs.items():
      if ( not verbose ) or isOlderThan( metadata['CreationDate'], totalDays ):
        activeDirs.append( subdir )
      empty = False
    for filename, fileInfo in dirContents['Files'].items():
      metadata = fileInfo['MetaData']
Esempio n. 16
0
class DataIntegrityClient( Client ):

  """  
  The following methods are supported in the service but are not mentioned explicitly here:

          getProblematic()
             Obtains a problematic file from the IntegrityDB based on the LastUpdate time

          getPrognosisProblematics(prognosis)
            Obtains all the problematics of a particular prognosis from the integrityDB

          getProblematicsSummary()
            Obtains a count of the number of problematics for each prognosis found

          getDistinctPrognosis()
            Obtains the distinct prognosis found in the integrityDB

          getTransformationProblematics(prodID)
            Obtains the problematics for a given production

          incrementProblematicRetry(fileID)
            Increments the retry count for the supplied file ID

          changeProblematicPrognosis(fileID,newPrognosis)
            Changes the prognosis of the supplied file to the new prognosis

          setProblematicStatus(fileID,status)
            Updates the status of a problematic in the integrityDB

          removeProblematic(self,fileID)
            This removes the specified file ID from the integrity DB

          insertProblematic(sourceComponent,fileMetadata)
            Inserts file with supplied metadata into the integrity DB
 
  """

  def __init__( self, **kwargs ):

    Client.__init__( self, **kwargs )
    self.setServer( 'DataManagement/DataIntegrity' )
    self.dm = DataManager()
    self.fc = FileCatalog()

  ##########################################################################
  #
  # This section contains the specific methods for LFC->SE checks
  #

  def catalogDirectoryToSE( self, lfnDir ):
    """ This obtains the replica and metadata information from the catalog for the supplied directory and checks against the storage elements.
    """
    gLogger.info( "-" * 40 )
    gLogger.info( "Performing the LFC->SE check" )
    gLogger.info( "-" * 40 )
    if type( lfnDir ) in types.StringTypes:
      lfnDir = [lfnDir]
    res = self.__getCatalogDirectoryContents( lfnDir )
    if not res['OK']:
      return res
    replicas = res['Value']['Replicas']
    catalogMetadata = res['Value']['Metadata']
    res = self.__checkPhysicalFiles( replicas, catalogMetadata )
    if not res['OK']:
      return res
    resDict = {'CatalogMetadata':catalogMetadata, 'CatalogReplicas':replicas}
    return S_OK( resDict )

  def catalogFileToSE( self, lfns ):
    """ This obtains the replica and metadata information from the catalog and checks against the storage elements.
    """
    gLogger.info( "-" * 40 )
    gLogger.info( "Performing the LFC->SE check" )
    gLogger.info( "-" * 40 )
    if type( lfns ) in types.StringTypes:
      lfns = [lfns]
    res = self.__getCatalogMetadata( lfns )
    if not res['OK']:
      return res
    catalogMetadata = res['Value']
    res = self.__getCatalogReplicas( catalogMetadata.keys() )
    if not res['OK']:
      return res
    replicas = res['Value']
    res = self.__checkPhysicalFiles( replicas, catalogMetadata )
    if not res['OK']:
      return res
    resDict = {'CatalogMetadata':catalogMetadata, 'CatalogReplicas':replicas}
    return S_OK( resDict )

  def checkPhysicalFiles( self, replicas, catalogMetadata, ses = [] ):
    """ This obtains takes the supplied replica and metadata information obtained from the catalog and checks against the storage elements.
    """
    gLogger.info( "-" * 40 )
    gLogger.info( "Performing the LFC->SE check" )
    gLogger.info( "-" * 40 )
    return self.__checkPhysicalFiles( replicas, catalogMetadata, ses = ses )

  def __checkPhysicalFiles( self, replicas, catalogMetadata, ses = [] ):
    """ This obtains the physical file metadata and checks the metadata against the catalog entries
    """
    sePfns = {}
    pfnLfns = {}
    for lfn, replicaDict in replicas.items():
      for se, pfn in replicaDict.items():
        if ( ses ) and ( se not in ses ):
          continue
        if not sePfns.has_key( se ):
          sePfns[se] = []
        sePfns[se].append( pfn )
        pfnLfns[pfn] = lfn
    gLogger.info( '%s %s' % ( 'Storage Element'.ljust( 20 ), 'Replicas'.rjust( 20 ) ) )
    for site in sortList( sePfns.keys() ):
      files = len( sePfns[site] )
      gLogger.info( '%s %s' % ( site.ljust( 20 ), str( files ).rjust( 20 ) ) )

    for se in sortList( sePfns.keys() ):
      pfns = sePfns[se]
      pfnDict = {}
      for pfn in pfns:
        pfnDict[pfn] = pfnLfns[pfn]
      sizeMismatch = []
      res = self.__checkPhysicalFileMetadata( pfnDict, se )
      if not res['OK']:
        gLogger.error( 'Failed to get physical file metadata.', res['Message'] )
        return res
      for pfn, metadata in res['Value'].items():
        if catalogMetadata.has_key( pfnLfns[pfn] ):
          if ( metadata['Size'] != catalogMetadata[pfnLfns[pfn]]['Size'] ) and ( metadata['Size'] != 0 ):
            sizeMismatch.append( ( pfnLfns[pfn], pfn, se, 'CatalogPFNSizeMismatch' ) )
      if sizeMismatch:
        self.__reportProblematicReplicas( sizeMismatch, se, 'CatalogPFNSizeMismatch' )
    return S_OK()

  def __checkPhysicalFileMetadata( self, pfnLfns, se ):
    """ Check obtain the physical file metadata and check the files are available
    """
    gLogger.info( 'Checking the integrity of %s physical files at %s' % ( len( pfnLfns ), se ) )


    res = StorageElement( se ).getFileMetadata( pfnLfns.keys() )

    if not res['OK']:
      gLogger.error( 'Failed to get metadata for pfns.', res['Message'] )
      return res
    pfnMetadataDict = res['Value']['Successful']
    # If the replicas are completely missing
    missingReplicas = []
    for pfn, reason in res['Value']['Failed'].items():
      if re.search( 'File does not exist', reason ):
        missingReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNMissing' ) )
    if missingReplicas:
      self.__reportProblematicReplicas( missingReplicas, se, 'PFNMissing' )
    lostReplicas = []
    unavailableReplicas = []
    zeroSizeReplicas = []
    # If the files are not accessible
    for pfn, pfnMetadata in pfnMetadataDict.items():
      if pfnMetadata['Lost']:
        lostReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNLost' ) )
      if pfnMetadata['Unavailable']:
        unavailableReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNUnavailable' ) )
      if pfnMetadata['Size'] == 0:
        zeroSizeReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNZeroSize' ) )
    if lostReplicas:
      self.__reportProblematicReplicas( lostReplicas, se, 'PFNLost' )
    if unavailableReplicas:
      self.__reportProblematicReplicas( unavailableReplicas, se, 'PFNUnavailable' )
    if zeroSizeReplicas:
      self.__reportProblematicReplicas( zeroSizeReplicas, se, 'PFNZeroSize' )
    gLogger.info( 'Checking the integrity of physical files at %s complete' % se )
    return S_OK( pfnMetadataDict )

  ##########################################################################
  #
  # This section contains the specific methods for SE->LFC checks
  #

  def storageDirectoryToCatalog( self, lfnDir, storageElement ):
    """ This obtains the file found on the storage element in the supplied directories and determines whether they exist in the catalog and checks their metadata elements
    """
    gLogger.info( "-" * 40 )
    gLogger.info( "Performing the SE->LFC check at %s" % storageElement )
    gLogger.info( "-" * 40 )
    if type( lfnDir ) in types.StringTypes:
      lfnDir = [lfnDir]
    res = self.__getStorageDirectoryContents( lfnDir, storageElement )
    if not res['OK']:
      return res
    storageFileMetadata = res['Value']
    if storageFileMetadata:
      return self.__checkCatalogForSEFiles( storageFileMetadata, storageElement )
    return S_OK( {'CatalogMetadata':{}, 'StorageMetadata':{}} )

  def __checkCatalogForSEFiles( self, storageMetadata, storageElement ):
    gLogger.info( 'Checking %s storage files exist in the catalog' % len( storageMetadata ) )

    # RF_NOTE : this comment is completely wrong
    # First get all the PFNs as they should be registered in the catalog
    res = StorageElement( storageElement ).getPfnForProtocol( storageMetadata.keys(), withPort = False )
    if not res['OK']:
      gLogger.error( "Failed to get registered PFNs for physical files", res['Message'] )
      return res
    for pfn, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to obtain registered PFNs from physical file' )
    for original, registered in res['Value']['Successful'].items():
      storageMetadata[registered] = storageMetadata.pop( original )
    # Determine whether these PFNs are registered and if so obtain the LFN
    res = self.fc.getLFNForPFN( storageMetadata.keys() )
    if not res['OK']:
      gLogger.error( "Failed to get registered LFNs for PFNs", res['Message'] )
      return res
    failedPfns = res['Value']['Failed']
    notRegisteredPfns = []
    for pfn, error in failedPfns.items():
      if re.search( 'No such file or directory', error ):
        notRegisteredPfns.append( ( storageMetadata[pfn]['LFN'], pfn, storageElement, 'PFNNotRegistered' ) )
        failedPfns.pop( pfn )
    if notRegisteredPfns:
      self.__reportProblematicReplicas( notRegisteredPfns, storageElement, 'PFNNotRegistered' )
    if failedPfns:
      return S_ERROR( 'Failed to obtain LFNs for PFNs' )
    pfnLfns = res['Value']['Successful']
    for pfn in storageMetadata.keys():
      pfnMetadata = storageMetadata.pop( pfn )
      if pfn in pfnLfns.keys():
        lfn = pfnLfns[pfn]
        storageMetadata[lfn] = pfnMetadata
        storageMetadata[lfn]['PFN'] = pfn
    # For the LFNs found to be registered obtain the file metadata from the catalog and verify against the storage metadata
    res = self.__getCatalogMetadata( storageMetadata.keys() )
    if not res['OK']:
      return res
    catalogMetadata = res['Value']
    sizeMismatch = []
    for lfn, lfnCatalogMetadata in catalogMetadata.items():
      lfnStorageMetadata = storageMetadata[lfn]
      if ( lfnStorageMetadata['Size'] != lfnCatalogMetadata['Size'] ) and ( lfnStorageMetadata['Size'] != 0 ):
        sizeMismatch.append( ( lfn, storageMetadata[lfn]['PFN'], storageElement, 'CatalogPFNSizeMismatch' ) )
    if sizeMismatch:
      self.__reportProblematicReplicas( sizeMismatch, storageElement, 'CatalogPFNSizeMismatch' )
    gLogger.info( 'Checking storage files exist in the catalog complete' )
    resDict = {'CatalogMetadata':catalogMetadata, 'StorageMetadata':storageMetadata}
    return S_OK( resDict )

  def getStorageDirectoryContents( self, lfnDir, storageElement ):
    """ This obtains takes the supplied lfn directories and recursively obtains the files in the supplied storage element
    """
    return self.__getStorageDirectoryContents( lfnDir, storageElement )

  def __getStorageDirectoryContents( self, lfnDir, storageElement ):
    """ Obtians the contents of the supplied directory on the storage
    """
    gLogger.info( 'Obtaining the contents for %s directories at %s' % ( len( lfnDir ), storageElement ) )

    se = StorageElement( storageElement )
    res = se.getPfnForLfn( lfnDir )

    if not res['OK']:
      gLogger.error( "Failed to get PFNs for directories", res['Message'] )
      return res
    for directory, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain directory PFN from LFNs', '%s %s' % ( directory, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to obtain directory PFN from LFNs' )
    storageDirectories = res['Value']['Successful'].values()
    res = se.exists( storageDirectories )
    if not res['OK']:
      gLogger.error( "Failed to obtain existance of directories", res['Message'] )
      return res
    for directory, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to determine existance of directory', '%s %s' % ( directory, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to determine existance of directory' )
    directoryExists = res['Value']['Successful']
    activeDirs = []
    for directory in sortList( directoryExists.keys() ):
      exists = directoryExists[directory]
      if exists:
        activeDirs.append( directory )
    allFiles = {}
    while len( activeDirs ) > 0:
      currentDir = activeDirs[0]
      res = se.listDirectory( currentDir )
      activeDirs.remove( currentDir )
      if not res['OK']:
        gLogger.error( 'Failed to get directory contents', res['Message'] )
        return res
      elif res['Value']['Failed'].has_key( currentDir ):
        gLogger.error( 'Failed to get directory contents', '%s %s' % ( currentDir, res['Value']['Failed'][currentDir] ) )
        return S_ERROR( res['Value']['Failed'][currentDir] )
      else:
        dirContents = res['Value']['Successful'][currentDir]
        activeDirs.extend( dirContents['SubDirs'] )
        fileMetadata = dirContents['Files']

        # RF_NOTE This ugly trick is needed because se.getPfnPath does not follow the Successful/Failed convention
#         res = { "Successful" : {}, "Failed" : {} }
#         for pfn in fileMetadata:
#           inRes = se.getPfnPath( pfn )
#           if inRes["OK"]:
#             res["Successful"][pfn] = inRes["Value"]
#           else:
#             res["Failed"][pfn] = inRes["Message"]
        res = se.getLfnForPfn( fileMetadata.keys() )
        if not res['OK']:
          gLogger.error( 'Failed to get directory content LFNs', res['Message'] )
          return res

        for pfn, error in res['Value']['Failed'].items():
          gLogger.error( "Failed to get LFN for PFN", "%s %s" % ( pfn, error ) )
        if res['Value']['Failed']:
          return S_ERROR( "Failed to get LFNs for PFNs" )
        pfnLfns = res['Value']['Successful']
        for pfn, lfn in pfnLfns.items():
          fileMetadata[pfn]['LFN'] = lfn
        allFiles.update( fileMetadata )
    zeroSizeFiles = []
    lostFiles = []
    unavailableFiles = []
    for pfn in sortList( allFiles.keys() ):
      if os.path.basename( pfn ) == 'dirac_directory':
        allFiles.pop( pfn )
      else:
        metadata = allFiles[pfn]
        if metadata['Size'] == 0:
          zeroSizeFiles.append( ( metadata['LFN'], pfn, storageElement, 'PFNZeroSize' ) )
        # if metadata['Lost']:
        #  lostFiles.append((metadata['LFN'],pfn,storageElement,'PFNLost'))
        # if metadata['Unavailable']:
        #  unavailableFiles.append((metadata['LFN'],pfn,storageElement,'PFNUnavailable'))
    if zeroSizeFiles:
      self.__reportProblematicReplicas( zeroSizeFiles, storageElement, 'PFNZeroSize' )
    if lostFiles:
      self.__reportProblematicReplicas( lostFiles, storageElement, 'PFNLost' )
    if unavailableFiles:
      self.__reportProblematicReplicas( unavailableFiles, storageElement, 'PFNUnavailable' )
    gLogger.info( 'Obtained at total of %s files for directories at %s' % ( len( allFiles ), storageElement ) )
    return S_OK( allFiles )

  def __getStoragePathExists( self, lfnPaths, storageElement ):
    gLogger.info( 'Determining the existance of %d files at %s' % ( len( lfnPaths ), storageElement ) )

    se = StorageElement( storageElement )
    res = se.getPfnForLfn( lfnPaths )
    if not res['OK']:
      gLogger.error( "Failed to get PFNs for LFNs", res['Message'] )
      return res
    for lfnPath, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain PFN from LFN', '%s %s' % ( lfnPath, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to obtain PFNs from LFNs' )
    lfnPfns = res['Value']['Successful']
    pfnLfns = {}
    for lfn, pfn in lfnPfns.items():
      pfnLfns[pfn] = lfn

    res = se.exists( pfnLfns )
    if not res['OK']:
      gLogger.error( "Failed to obtain existance of paths", res['Message'] )
      return res
    for lfnPath, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to determine existance of path', '%s %s' % ( lfnPath, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to determine existance of paths' )
    pathExists = res['Value']['Successful']
    resDict = {}
    for pfn, exists in pathExists.items():
      if exists:
        resDict[pfnLfns[pfn]] = pfn
    return S_OK( resDict )

  ##########################################################################
  #
  # This section contains the specific methods for obtaining replica and metadata information from the catalog
  #

  def __getCatalogDirectoryContents( self, lfnDir ):
    """ Obtain the contents of the supplied directory
    """
    gLogger.info( 'Obtaining the catalog contents for %s directories' % len( lfnDir ) )

    activeDirs = lfnDir
    allFiles = {}
    while len( activeDirs ) > 0:
      currentDir = activeDirs[0]
      res = self.fc.listDirectory( currentDir )
      activeDirs.remove( currentDir )
      if not res['OK']:
        gLogger.error( 'Failed to get directory contents', res['Message'] )
        return res
      elif res['Value']['Failed'].has_key( currentDir ):
        gLogger.error( 'Failed to get directory contents', '%s %s' % ( currentDir, res['Value']['Failed'][currentDir] ) )
      else:
        dirContents = res['Value']['Successful'][currentDir]
        activeDirs.extend( dirContents['SubDirs'] )
        allFiles.update( dirContents['Files'] )

    zeroReplicaFiles = []
    zeroSizeFiles = []
    allReplicaDict = {}
    allMetadataDict = {}
    for lfn, lfnDict in allFiles.items():
      lfnReplicas = {}
      for se, replicaDict in lfnDict['Replicas'].items():
        lfnReplicas[se] = replicaDict['PFN']
      if not lfnReplicas:
        zeroReplicaFiles.append( lfn )
      allReplicaDict[lfn] = lfnReplicas
      allMetadataDict[lfn] = lfnDict['MetaData']
      if lfnDict['MetaData']['Size'] == 0:
        zeroSizeFiles.append( lfn )
    if zeroReplicaFiles:
      self.__reportProblematicFiles( zeroReplicaFiles, 'LFNZeroReplicas' )
    if zeroSizeFiles:
      self.__reportProblematicFiles( zeroSizeFiles, 'LFNZeroSize' )
    gLogger.info( 'Obtained at total of %s files for the supplied directories' % len( allMetadataDict ) )
    resDict = {'Metadata':allMetadataDict, 'Replicas':allReplicaDict}
    return S_OK( resDict )

  def __getCatalogReplicas( self, lfns ):
    """ Obtain the file replicas from the catalog while checking that there are replicas
    """
    gLogger.info( 'Obtaining the replicas for %s files' % len( lfns ) )

    zeroReplicaFiles = []
    res = self.fc.getReplicas( lfns, allStatus = True )
    if not res['OK']:
      gLogger.error( 'Failed to get catalog replicas', res['Message'] )
      return res
    allReplicas = res['Value']['Successful']
    for lfn, error in res['Value']['Failed'].items():
      if re.search( 'File has zero replicas', error ):
        zeroReplicaFiles.append( lfn )
    if zeroReplicaFiles:
      self.__reportProblematicFiles( zeroReplicaFiles, 'LFNZeroReplicas' )
    gLogger.info( 'Obtaining the replicas for files complete' )
    return S_OK( allReplicas )

  def __getCatalogMetadata( self, lfns ):
    """ Obtain the file metadata from the catalog while checking they exist
    """
    if not lfns:
      return S_OK( {} )
    gLogger.info( 'Obtaining the catalog metadata for %s files' % len( lfns ) )

    missingCatalogFiles = []
    zeroSizeFiles = []
    res = self.fc.getFileMetadata( lfns )
    if not res['OK']:
      gLogger.error( 'Failed to get catalog metadata', res['Message'] )
      return res
    allMetadata = res['Value']['Successful']
    for lfn, error in res['Value']['Failed'].items():
      if re.search( 'No such file or directory', error ):
        missingCatalogFiles.append( lfn )
    if missingCatalogFiles:
      self.__reportProblematicFiles( missingCatalogFiles, 'LFNCatalogMissing' )
    for lfn, metadata in allMetadata.items():
      if metadata['Size'] == 0:
        zeroSizeFiles.append( lfn )
    if zeroSizeFiles:
      self.__reportProblematicFiles( zeroSizeFiles, 'LFNZeroSize' )
    gLogger.info( 'Obtaining the catalog metadata complete' )
    return S_OK( allMetadata )

  ##########################################################################
  #
  # This section contains the methods for inserting problematic files into the integrity DB
  #

  def __reportProblematicFiles( self, lfns, reason ):
    """ Simple wrapper function around setFileProblematic """
    gLogger.info( 'The following %s files were found with %s' % ( len( lfns ), reason ) )
    for lfn in sortList( lfns ):
      gLogger.info( lfn )
    res = self.setFileProblematic( lfns, reason, sourceComponent = 'DataIntegrityClient' )
    if not res['OK']:
      gLogger.info( 'Failed to update integrity DB with files', res['Message'] )
    else:
      gLogger.info( 'Successfully updated integrity DB with files' )

  def setFileProblematic( self, lfn, reason, sourceComponent = '' ):
    """ This method updates the status of the file in the FileCatalog and the IntegrityDB

        lfn - the lfn of the file
        reason - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
    if type( lfn ) == types.ListType:
      lfns = lfn
    elif type( lfn ) == types.StringType:
      lfns = [lfn]
    else:
      errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN."
      gLogger.error( errStr )
      return S_ERROR( errStr )
    gLogger.info( "DataIntegrityClient.setFileProblematic: Attempting to update %s files." % len( lfns ) )
    fileMetadata = {}
    for lfn in lfns:
      fileMetadata[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':'', 'SE':''}
    res = self.insertProblematic( sourceComponent, fileMetadata )
    if not res['OK']:
      gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB" )
    return res

  def __reportProblematicReplicas( self, replicaTuple, se, reason ):
    """ Simple wrapper function around setReplicaProblematic """
    gLogger.info( 'The following %s files had %s at %s' % ( len( replicaTuple ), reason, se ) )
    for lfn, pfn, se, reason in sortList( replicaTuple ):
      if lfn:
        gLogger.info( lfn )
      else:
        gLogger.info( pfn )
    res = self.setReplicaProblematic( replicaTuple, sourceComponent = 'DataIntegrityClient' )
    if not res['OK']:
      gLogger.info( 'Failed to update integrity DB with replicas', res['Message'] )
    else:
      gLogger.info( 'Successfully updated integrity DB with replicas' )

  def setReplicaProblematic( self, replicaTuple, sourceComponent = '' ):
    """ This method updates the status of the replica in the FileCatalog and the IntegrityDB
        The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis}

        lfn - the lfn of the file
        pfn - the pfn if available (otherwise '')
        se - the storage element of the problematic replica (otherwise '')
        prognosis - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
    if type( replicaTuple ) == types.TupleType:
      replicaTuple = [replicaTuple]
    elif type( replicaTuple ) == types.ListType:
      pass
    else:
      errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples."
      gLogger.error( errStr )
      return S_ERROR( errStr )
    gLogger.info( "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas." % len( replicaTuple ) )
    replicaDict = {}
    for lfn, pfn, se, reason in replicaTuple:
      replicaDict[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':pfn, 'SE':se}
    res = self.insertProblematic( sourceComponent, replicaDict )
    if not res['OK']:
      gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB" )
      return res
    for lfn in replicaDict.keys():
      replicaDict[lfn]['Status'] = 'Problematic'

    res = self.fc.setReplicaStatus( replicaDict )
    if not res['OK']:
      errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas."
      gLogger.error( errStr, res['Message'] )
      return res
    failed = res['Value']['Failed']
    successful = res['Value']['Successful']
    resDict = {'Successful':successful, 'Failed':failed}
    return S_OK( resDict )

  ##########################################################################
  #
  # This section contains the resolution methods for various prognoses
  #

  def __updateCompletedFiles( self, prognosis, fileID ):
    gLogger.info( "%s file (%d) is resolved" % ( prognosis, fileID ) )
    return self.setProblematicStatus( fileID, 'Resolved' )

  def __returnProblematicError( self, fileID, res ):
    self.incrementProblematicRetry( fileID )
    gLogger.error( res['Message'] )
    return res

  def __getRegisteredPFNLFN( self, pfn, storageElement ):

    res = StorageElement( storageElement ).getPfnForProtocol( pfn, withPort = False )
    if not res['OK']:
      gLogger.error( "Failed to get registered PFN for physical files", res['Message'] )
      return res
    for pfn, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) )
      return S_ERROR( 'Failed to obtain registered PFNs from physical file' )
    registeredPFN = res['Value']['Successful'][pfn]
    res = Utils.executeSingleFileOrDirWrapper( self.fc.getLFNForPFN( registeredPFN ) )
    if ( not res['OK'] ) and re.search( 'No such file or directory', res['Message'] ):
      return S_OK( False )
    return S_OK( res['Value'] )

  def __updateReplicaToChecked( self, problematicDict ):
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']
    prognosis = problematicDict['Prognosis']
    problematicDict['Status'] = 'Checked'

    res = Utils.executeSingleFileOrDirWrapper( self.fc.setReplicaStatus( {lfn:problematicDict} ) )

    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    gLogger.info( "%s replica (%d) is updated to Checked status" % ( prognosis, fileID ) )
    return self.__updateCompletedFiles( prognosis, fileID )

  def resolveCatalogPFNSizeMismatch( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis
    """
    lfn = problematicDict['LFN']
    pfn = problematicDict['PFN']
    se = problematicDict['SE']
    fileID = problematicDict['FileID']


    res = Utils.executeSingleFileOrDirWrapper( self.fc.getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    catalogSize = res['Value']
    res = Utils.executeSingleFileOrDirWrapper( StorageElement( se ).getFileSize( pfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    storageSize = res['Value']
    bkKCatalog = FileCatalog( ['BookkeepingDB'] )
    res = Utils.executeSingleFileOrDirWrapper( bkKCatalog.getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    bookkeepingSize = res['Value']
    if bookkeepingSize == catalogSize == storageSize:
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) matched all registered sizes." % fileID )
      return self.__updateReplicaToChecked( problematicDict )
    if ( catalogSize == bookkeepingSize ):
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also" % fileID )
      res = Utils.executeSingleFileOrDirWrapper( self.fc.getReplicas( lfn ) )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      if len( res['Value'] ) <= 1:
        gLogger.info( "CatalogPFNSizeMismatch replica (%d) has no other replicas." % fileID )
        return S_ERROR( "Not removing catalog file mismatch since the only replica" )
      else:
        gLogger.info( "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..." % fileID )
        res = self.dm.removeReplica( se, lfn )
        if not res['OK']:
          return self.__returnProblematicError( fileID, res )
        return self.__updateCompletedFiles( 'CatalogPFNSizeMismatch', fileID )
    if ( catalogSize != bookkeepingSize ) and ( bookkeepingSize == storageSize ):
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size" % fileID )
      res = self.__updateReplicaToChecked( problematicDict )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      return self.changeProblematicPrognosis( fileID, 'BKCatalogSizeMismatch' )
    gLogger.info( "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count" % fileID )
    return self.incrementProblematicRetry( fileID )

  def resolvePFNNotRegistered( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNNotRegistered prognosis
    """
    lfn = problematicDict['LFN']
    pfn = problematicDict['PFN']
    seName = problematicDict['SE']
    fileID = problematicDict['FileID']

    se = StorageElement( seName )
    res = Utils.executeSingleFileOrDirWrapper( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if not res['Value']:
      # The file does not exist in the catalog
      res = Utils.executeSingleFileOrDirWrapper( se.removeFile( pfn ) )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )
    res = Utils.executeSingleFileOrDirWrapper( se.getFileMetadata( pfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      gLogger.info( "PFNNotRegistered replica (%d) found to be missing." % fileID )
      return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )
    elif not res['OK']:
      return self.__returnProblematicError( fileID, res )
    storageMetadata = res['Value']
    if storageMetadata['Lost']:
      gLogger.info( "PFNNotRegistered replica (%d) found to be Lost. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNLost' )
    if storageMetadata['Unavailable']:
      gLogger.info( "PFNNotRegistered replica (%d) found to be Unavailable. Updating retry count" % fileID )
      return self.incrementProblematicRetry( fileID )

    # HACK until we can obtain the space token descriptions through GFAL
    site = seName.split( '_' )[0].split( '-' )[0]
    if not storageMetadata['Cached']:
      if lfn.endswith( '.raw' ):
        seName = '%s-RAW' % site
      else:
        seName = '%s-RDST' % site
    elif storageMetadata['Migrated']:
      if lfn.startswith( '/lhcb/data' ):
        seName = '%s_M-DST' % site
      else:
        seName = '%s_MC_M-DST' % site
    else:
      if lfn.startswith( '/lhcb/data' ):
        seName = '%s-DST' % site
      else:
        seName = '%s_MC-DST' % site

    problematicDict['SE'] = seName
    res = se.getPfnForProtocol( pfn, withPort = False )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    for pfn, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) )
      return S_ERROR( 'Failed to obtain registered PFNs from physical file' )
    problematicDict['PFN'] = res['Value']['Successful'][pfn]

    res = Utils.executeSingleFileOrDirWrapper( self.fc.addReplica( {lfn:problematicDict} ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    res = Utils.executeSingleFileOrDirWrapper( self.fc.getFileMetadata( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']['Size'] != storageMetadata['Size']:
      gLogger.info( "PFNNotRegistered replica (%d) found with catalog size mismatch. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'CatalogPFNSizeMismatch' )
    return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )

  def resolveLFNCatalogMissing( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the LFNCatalogMissing prognosis
    """
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = Utils.executeSingleFileOrDirWrapper( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']:
      return self.__updateCompletedFiles( 'LFNCatalogMissing', fileID )
    # Remove the file from all catalogs
    # RF_NOTE : here I can do it because it's a single file, but otherwise I would need to sort the path
    res = Utils.executeSingleFileOrDirWrapper( self.fc.removeFile( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    return self.__updateCompletedFiles( 'LFNCatalogMissing', fileID )

  def resolvePFNMissing( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNMissing prognosis
    """
    pfn = problematicDict['PFN']
    se = problematicDict['SE']
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = Utils.executeSingleFileOrDirWrapper( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if not res['Value']:
      gLogger.info( "PFNMissing file (%d) no longer exists in catalog" % fileID )
      return self.__updateCompletedFiles( 'PFNMissing', fileID )

    res = Utils.executeSingleFileOrDirWrapper( StorageElement( se ).exists( pfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']:
      gLogger.info( "PFNMissing replica (%d) is no longer missing" % fileID )
      return self.__updateReplicaToChecked( problematicDict )
    gLogger.info( "PFNMissing replica (%d) does not exist" % fileID )
    res = Utils.executeSingleFileOrDirWrapper( self.fc.getReplicas( lfn, allStatus = True ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    replicas = res['Value']
    seSite = se.split( '_' )[0].split( '-' )[0]
    found = False
    print replicas
    for replicaSE in replicas.keys():
      if re.search( seSite, replicaSE ):
        found = True
        problematicDict['SE'] = replicaSE
        se = replicaSE
    if not found:
      gLogger.info( "PFNMissing replica (%d) is no longer registered at SE. Resolved." % fileID )
      return self.__updateCompletedFiles( 'PFNMissing', fileID )
    gLogger.info( "PFNMissing replica (%d) does not exist. Removing from catalog..." % fileID )
    res = Utils.executeSingleFileOrDirWrapper( self.fc.removeReplica( {lfn:problematicDict} ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if len( replicas ) == 1:
      gLogger.info( "PFNMissing replica (%d) had a single replica. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'LFNZeroReplicas' )
    res = self.dm.replicateAndRegister( problematicDict['LFN'], se )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    # If we get here the problem is solved so we can update the integrityDB
    return self.__updateCompletedFiles( 'PFNMissing', fileID )

  def resolvePFNUnavailable( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNUnavailable prognosis
    """
    pfn = problematicDict['PFN']
    se = problematicDict['SE']
    fileID = problematicDict['FileID']

    res = Utils.executeSingleFileOrDirWrapper( StorageElement( se ).getFileMetadata( pfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      # The file is no longer Unavailable but has now dissapeared completely
      gLogger.info( "PFNUnavailable replica (%d) found to be missing. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )
    if ( not res['OK'] ) or res['Value']['Unavailable']:
      gLogger.info( "PFNUnavailable replica (%d) found to still be Unavailable" % fileID )
      return self.incrementProblematicRetry( fileID )
    if res['Value']['Lost']:
      gLogger.info( "PFNUnavailable replica (%d) is now found to be Lost. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNLost' )
    gLogger.info( "PFNUnavailable replica (%d) is no longer Unavailable" % fileID )
    # Need to make the replica okay in the Catalog
    return self.__updateReplicaToChecked( problematicDict )

  def resolvePFNZeroSize( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolves the PFNZeroSize prognosis
    """
    pfn = problematicDict['PFN']
    seName = problematicDict['SE']
    fileID = problematicDict['FileID']

    se = StorageElement( seName )

    res = Utils.executeSingleFileOrDirWrapper( se.getFileSize( pfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      gLogger.info( "PFNZeroSize replica (%d) found to be missing. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )
    storageSize = res['Value']
    if storageSize == 0:
      res = Utils.executeSingleFileOrDirWrapper( se.removeFile( pfn ) )

      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      gLogger.info( "PFNZeroSize replica (%d) removed. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )
    res = self.__getRegisteredPFNLFN( pfn, seName )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    lfn = res['Value']
    if not lfn:
      gLogger.info( "PFNZeroSize replica (%d) not registered in catalog. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNNotRegistered' )
    res = Utils.executeSingleFileOrDirWrapper( self.fc.getFileMetadata( lfn ) )

    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    catalogSize = res['Value']['Size']
    if catalogSize != storageSize:
      gLogger.info( "PFNZeroSize replica (%d) size found to differ from registered metadata. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'CatalogPFNSizeMismatch' )
    return self.__updateCompletedFiles( 'PFNZeroSize', fileID )

  ############################################################################################

  def resolveLFNZeroReplicas( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolves the LFNZeroReplicas prognosis
    """
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = Utils.executeSingleFileOrDirWrapper( self.fc.getReplicas( lfn, allStatus = True ) )
    if res['OK'] and res['Value']:
      gLogger.info( "LFNZeroReplicas file (%d) found to have replicas" % fileID )
    else:
      gLogger.info( "LFNZeroReplicas file (%d) does not have replicas. Checking storage..." % fileID )
      pfnsFound = False
      for storageElementName in sortList( gConfig.getValue( 'Resources/StorageElementGroups/Tier1_MC_M-DST', [] ) ):
        res = self.__getStoragePathExists( [lfn], storageElementName )
        if res['Value'].has_key( lfn ):
          gLogger.info( "LFNZeroReplicas file (%d) found storage file at %s" % ( fileID, storageElementName ) )
          pfn = res['Value'][lfn]
          self.__reportProblematicReplicas( [( lfn, pfn, storageElementName, 'PFNNotRegistered' )], storageElementName, 'PFNNotRegistered' )
          pfnsFound = True
      if not pfnsFound:
        gLogger.info( "LFNZeroReplicas file (%d) did not have storage files. Removing..." % fileID )
        res = Utils.executeSingleFileOrDirWrapper( self.fc.removeFile( lfn ) )
        if not res['OK']:
          gLogger.error( res['Message'] )
          # Increment the number of retries for this file
          self.server.incrementProblematicRetry( fileID )
          return res
        gLogger.info( "LFNZeroReplicas file (%d) removed from catalog" % fileID )
    # If we get here the problem is solved so we can update the integrityDB
    return self.__updateCompletedFiles( 'LFNZeroReplicas', fileID )
Esempio n. 17
0
class CatalogPlugInTestCase(unittest.TestCase):
    """Base class for the CatalogPlugin test case"""
    def setUp(self):
        self.fullMetadata = [
            "Status",
            "ChecksumType",
            "OwnerRole",
            "CreationDate",
            "Checksum",
            "ModificationDate",
            "OwnerDN",
            "Mode",
            "GUID",
            "Size",
        ]
        self.dirMetadata = self.fullMetadata + ["NumberOfSubPaths"]
        self.fileMetadata = self.fullMetadata + ["NumberOfLinks"]

        self.catalog = FileCatalog(catalogs=[catalogClientToTest])
        valid = self.catalog.isOK()
        self.assertTrue(valid)
        self.destDir = "/lhcb/test/unit-test/TestCatalogPlugin"
        self.link = "%s/link" % self.destDir

        # Clean the existing directory
        self.cleanDirectory()
        res = self.catalog.createDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)

        # Register some files to work with
        self.numberOfFiles = 2
        self.files = []
        for i in range(self.numberOfFiles):
            lfn = "%s/testFile_%d" % (self.destDir, i)
            res = self.registerFile(lfn)
            self.assertTrue(res)
            self.files.append(lfn)

    def registerFile(self, lfn):
        pfn = "protocol://host:port/storage/path%s" % lfn
        size = 10000000
        se = "DIRAC-storage"
        guid = makeGuid()
        adler = stringAdler(guid)
        fileDict = {}
        fileDict[lfn] = {
            "PFN": pfn,
            "Size": size,
            "SE": se,
            "GUID": guid,
            "Checksum": adler
        }
        res = self.catalog.addFile(fileDict)
        return self.parseResult(res, lfn)

    def parseResult(self, res, path):
        self.assertTrue(res["OK"])
        self.assertTrue(res["Value"])
        self.assertTrue(res["Value"]["Successful"])
        self.assertTrue(path in res["Value"]["Successful"])
        return res["Value"]["Successful"][path]

    def parseError(self, res, path):
        self.assertTrue(res["OK"])
        self.assertTrue(res["Value"])
        self.assertTrue(res["Value"]["Failed"])
        self.assertTrue(path in res["Value"]["Failed"])
        return res["Value"]["Failed"][path]

    def cleanDirectory(self):
        res = self.catalog.exists(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        if not returnValue:
            return
        res = self.catalog.listDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        toRemove = list(returnValue["Files"])
        if toRemove:
            self.purgeFiles(toRemove)
        res = self.catalog.removeDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        self.assertTrue(returnValue)

    def purgeFiles(self, lfns):
        for lfn in lfns:
            res = self.catalog.getReplicas(lfn, True)
            replicas = self.parseResult(res, lfn)
            for se, pfn in replicas.items():
                repDict = {}
                repDict[lfn] = {"PFN": pfn, "SE": se}
                res = self.catalog.removeReplica(repDict)
                self.parseResult(res, lfn)
            res = self.catalog.removeFile(lfn)
            self.parseResult(res, lfn)

    def tearDown(self):
        self.cleanDirectory()
Esempio n. 18
0
  if not vo:
    gLogger.error( 'Could not determine VO' )
    Script.showHelp()
  baseDir = '/%s/user/%s/%s' % ( vo, username[0], username )

baseDir = baseDir.rstrip( '/' )

gLogger.notice( 'Will search for files in %s%s' % ( baseDir, ( ' matching %s' % wildcard ) if wildcard else '' ) )
activeDirs = [baseDir]

allFiles = []
emptyDirs = []

while len( activeDirs ) > 0:
  currentDir = activeDirs.pop()
  res = fc.listDirectory( currentDir, withMetadata, timeout = 360 )
  if not res['OK']:
    gLogger.error( "Error retrieving directory contents", "%s %s" % ( currentDir, res['Message'] ) )
  elif currentDir in res['Value']['Failed']:
    gLogger.error( "Error retrieving directory contents", "%s %s" % ( currentDir, res['Value']['Failed'][currentDir] ) )
  else:
    dirContents = res['Value']['Successful'][currentDir]
    subdirs = dirContents['SubDirs']
    files = dirContents['Files']
    if not subdirs and not files:
      emptyDirs.append( currentDir )
      gLogger.notice( '%s: empty directory' % currentDir )
    else:
      for subdir in sorted( subdirs, reverse = True ):
        if ( not withMetadata ) or isOlderThan( subdirs[subdir]['CreationDate'], totalDays ):
          activeDirs.append( subdir )
Esempio n. 19
0
    if not vo:
        gLogger.error('Could not determine VO')
        Script.showHelp()
    baseDir = '/%s/user/%s/%s' % (vo, username[0], username)

baseDir = baseDir.rstrip('/')

gLogger.info('Will search for files in %s' % baseDir)
activeDirs = [baseDir]

allFiles = []
allDirs = []
emptyDirs = []
while len(activeDirs) > 0:
    currentDir = activeDirs[0]
    res = fc.listDirectory(currentDir, verbose)
    activeDirs.remove(currentDir)
    if not res['OK']:
        gLogger.error("Error retrieving directory contents",
                      "%s %s" % (currentDir, res['Message']))
    elif res['Value']['Failed'].has_key(currentDir):
        gLogger.error(
            "Error retrieving directory contents",
            "%s %s" % (currentDir, res['Value']['Failed'][currentDir]))
    else:
        dirContents = res['Value']['Successful'][currentDir]
        subdirs = dirContents['SubDirs']
        empty = True
        for subdir, metadata in subdirs.items():
            if (not verbose) or isOlderThan(metadata['CreationDate'],
                                            totalDays):
Esempio n. 20
0
class CatalogPlugInTestCase(unittest.TestCase):
    """ Base class for the CatalogPlugin test case """

    def setUp(self):
        self.fullMetadata = [
            "Status",
            "ChecksumType",
            "OwnerRole",
            "CreationDate",
            "Checksum",
            "ModificationDate",
            "OwnerDN",
            "Mode",
            "GUID",
            "Size",
        ]
        self.dirMetadata = self.fullMetadata + ["NumberOfSubPaths"]
        self.fileMetadata = self.fullMetadata + ["NumberOfLinks"]

        self.catalog = FileCatalog(catalogs=[catalogClientToTest])
        valid = self.catalog.isOK()
        self.assert_(valid)
        self.destDir = "/lhcb/test/unit-test/TestCatalogPlugin"
        self.link = "%s/link" % self.destDir

        # Clean the existing directory
        self.cleanDirectory()
        res = self.catalog.createDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)

        # Register some files to work with
        self.numberOfFiles = 2
        self.files = []
        for i in range(self.numberOfFiles):
            lfn = "%s/testFile_%d" % (self.destDir, i)
            res = self.registerFile(lfn)
            self.assert_(res)
            self.files.append(lfn)

    def registerFile(self, lfn):
        pfn = "protocol://host:port/storage/path%s" % lfn
        size = 10000000
        se = "DIRAC-storage"
        guid = makeGuid()
        adler = stringAdler(guid)
        fileDict = {}
        fileDict[lfn] = {"PFN": pfn, "Size": size, "SE": se, "GUID": guid, "Checksum": adler}
        res = self.catalog.addFile(fileDict)
        return self.parseResult(res, lfn)

    def parseResult(self, res, path):
        self.assert_(res["OK"])
        self.assert_(res["Value"])
        self.assert_(res["Value"]["Successful"])
        self.assert_(res["Value"]["Successful"].has_key(path))
        return res["Value"]["Successful"][path]

    def parseError(self, res, path):
        self.assert_(res["OK"])
        self.assert_(res["Value"])
        self.assert_(res["Value"]["Failed"])
        self.assert_(res["Value"]["Failed"].has_key(path))
        return res["Value"]["Failed"][path]

    def cleanDirectory(self):
        res = self.catalog.exists(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        if not returnValue:
            return
        res = self.catalog.listDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        toRemove = returnValue["Files"].keys()
        if toRemove:
            self.purgeFiles(toRemove)
        res = self.catalog.removeDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        self.assert_(returnValue)

    def purgeFiles(self, lfns):
        for lfn in lfns:
            res = self.catalog.getReplicas(lfn, True)
            replicas = self.parseResult(res, lfn)
            for se, pfn in replicas.items():
                repDict = {}
                repDict[lfn] = {"PFN": pfn, "SE": se}
                res = self.catalog.removeReplica(repDict)
                self.parseResult(res, lfn)
            res = self.catalog.removeFile(lfn)
            self.parseResult(res, lfn)

    def tearDown(self):
        self.cleanDirectory()