Ejemplo n.º 1
0
  def resolveCatalogPFNSizeMismatch( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis
    """
    lfn = problematicDict['LFN']
    pfn = problematicDict['PFN']
    se = problematicDict['SE']
    fileID = problematicDict['FileID']


    res = Utils.executeSingleFileOrDirWrapper( self.fc.getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    catalogSize = res['Value']
    res = Utils.executeSingleFileOrDirWrapper( StorageElement( se ).getFileSize( pfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    storageSize = res['Value']
    bkKCatalog = FileCatalog( ['BookkeepingDB'] )
    res = Utils.executeSingleFileOrDirWrapper( bkKCatalog.getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    bookkeepingSize = res['Value']
    if bookkeepingSize == catalogSize == storageSize:
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) matched all registered sizes." % fileID )
      return self.__updateReplicaToChecked( problematicDict )
    if ( catalogSize == bookkeepingSize ):
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also" % fileID )
      res = Utils.executeSingleFileOrDirWrapper( self.fc.getReplicas( lfn ) )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      if len( res['Value'] ) <= 1:
        gLogger.info( "CatalogPFNSizeMismatch replica (%d) has no other replicas." % fileID )
        return S_ERROR( "Not removing catalog file mismatch since the only replica" )
      else:
        gLogger.info( "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..." % fileID )
        res = self.dm.removeReplica( se, lfn )
        if not res['OK']:
          return self.__returnProblematicError( fileID, res )
        return self.__updateCompletedFiles( 'CatalogPFNSizeMismatch', fileID )
    if ( catalogSize != bookkeepingSize ) and ( bookkeepingSize == storageSize ):
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size" % fileID )
      res = self.__updateReplicaToChecked( problematicDict )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      return self.changeProblematicPrognosis( fileID, 'BKCatalogSizeMismatch' )
    gLogger.info( "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count" % fileID )
    return self.incrementProblematicRetry( fileID )
Ejemplo n.º 2
0
  def resolveCatalogPFNSizeMismatch( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis
    """
    lfn = problematicDict['LFN']
    se = problematicDict['SE']
    fileID = problematicDict['FileID']


    res = returnSingleResult( self.fc.getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    catalogSize = res['Value']
    res = returnSingleResult( StorageElement( se ).getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    storageSize = res['Value']
    bkKCatalog = FileCatalog( ['BookkeepingDB'] )
    res = returnSingleResult( bkKCatalog.getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    bookkeepingSize = res['Value']
    if bookkeepingSize == catalogSize == storageSize:
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) matched all registered sizes." % fileID )
      return self.__updateReplicaToChecked( problematicDict )
    if catalogSize == bookkeepingSize:
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also" % fileID )
      res = returnSingleResult( self.fc.getReplicas( lfn ) )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      if len( res['Value'] ) <= 1:
        gLogger.info( "CatalogPFNSizeMismatch replica (%d) has no other replicas." % fileID )
        return S_ERROR( "Not removing catalog file mismatch since the only replica" )
      else:
        gLogger.info( "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..." % fileID )
        res = self.dm.removeReplica( se, lfn )
        if not res['OK']:
          return self.__returnProblematicError( fileID, res )
        return self.__updateCompletedFiles( 'CatalogPFNSizeMismatch', fileID )
    if ( catalogSize != bookkeepingSize ) and ( bookkeepingSize == storageSize ):
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size" % fileID )
      res = self.__updateReplicaToChecked( problematicDict )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      return self.changeProblematicPrognosis( fileID, 'BKCatalogSizeMismatch' )
    gLogger.info( "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count" % fileID )
    return self.incrementProblematicRetry( fileID )
class RequestPreparationAgent( AgentModule ):

  def initialize( self ):
    self.fileCatalog = FileCatalog()
    self.dm = DataManager()
    self.stagerClient = StorageManagerClient()
    self.dataIntegrityClient = DataIntegrityClient()
    # This sets the Default Proxy to used as that defined under
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    return S_OK()

  def execute( self ):
    """ This is the first logical task to be executed and manages the New->Waiting transition of the Replicas
    """
    res = self.__getNewReplicas()
    if not res['OK']:
      gLogger.fatal( "RequestPreparation.prepareNewReplicas: Failed to get replicas from StagerDB.", res['Message'] )
      return res
    if not res['Value']:
      gLogger.info( "There were no New replicas found" )
      return res
    replicas = res['Value']['Replicas']
    replicaIDs = res['Value']['ReplicaIDs']
    gLogger.info( "RequestPreparation.prepareNewReplicas: Obtained %s New replicas for preparation." % len( replicaIDs ) )

    # Check if the files exist in the FileCatalog
    res = self.__getExistingFiles( replicas )
    if not res['OK']:
      return res
    exist = res['Value']['Exist']
    terminal = res['Value']['Missing']
    failed = res['Value']['Failed']
    if not exist:
      gLogger.error( 'RequestPreparation.prepareNewReplicas: Failed to determine the existence of any file' )
      return S_OK()
    terminalReplicaIDs = {}
    for lfn, reason in terminal.items():
      for replicaID in replicas[lfn].values():
        terminalReplicaIDs[replicaID] = reason
      replicas.pop( lfn )
    gLogger.info( "RequestPreparation.prepareNewReplicas: %s files exist in the FileCatalog." % len( exist ) )
    if terminal:
      gLogger.info( "RequestPreparation.prepareNewReplicas: %s files do not exist in the FileCatalog." % len( terminal ) )

    # Obtain the file sizes from the FileCatalog
    res = self.__getFileSize( exist )
    if not res['OK']:
      return res
    failed.update( res['Value']['Failed'] )
    terminal = res['Value']['ZeroSize']
    fileSizes = res['Value']['FileSizes']
    if not fileSizes:
      gLogger.error( 'RequestPreparation.prepareNewReplicas: Failed determine sizes of any files' )
      return S_OK()
    for lfn, reason in terminal.items():
      for _se, replicaID in replicas[lfn].items():
        terminalReplicaIDs[replicaID] = reason
      replicas.pop( lfn )
    gLogger.info( "RequestPreparation.prepareNewReplicas: Obtained %s file sizes from the FileCatalog." % len( fileSizes ) )
    if terminal:
      gLogger.info( "RequestPreparation.prepareNewReplicas: %s files registered with zero size in the FileCatalog." % len( terminal ) )

    # Obtain the replicas from the FileCatalog
    res = self.__getFileReplicas( fileSizes.keys() )
    if not res['OK']:
      return res
    failed.update( res['Value']['Failed'] )
    terminal = res['Value']['ZeroReplicas']
    fileReplicas = res['Value']['Replicas']
    if not fileReplicas:
      gLogger.error( 'RequestPreparation.prepareNewReplicas: Failed determine replicas for any files' )
      return S_OK()
    for lfn, reason in terminal.items():
      for _se, replicaID in replicas[lfn].items():
        terminalReplicaIDs[replicaID] = reason
      replicas.pop( lfn )
    gLogger.info( "RequestPreparation.prepareNewReplicas: Obtained replica information for %s file from the FileCatalog." % len( fileReplicas ) )
    if terminal:
      gLogger.info( "RequestPreparation.prepareNewReplicas: %s files registered with zero replicas in the FileCatalog." % len( terminal ) )

    # Check the replicas exist at the requested site
    replicaMetadata = []
    for lfn, requestedSEs in replicas.items():
      lfnReplicas = fileReplicas.get( lfn )

      # This should not happen in principle, but it was seen
      # after a corrupted staging request has entered the DB
      if not lfnReplicas:
        gLogger.error( "Missing replicas information", "%s %s" % ( lfn, requestedSEs ) )
        continue

      for requestedSE, replicaID in requestedSEs.items():
        if not requestedSE in lfnReplicas.keys():
          terminalReplicaIDs[replicaID] = "LFN not registered at requested SE"
          replicas[lfn].pop( requestedSE )
        else:
          replicaMetadata.append( ( replicaID, lfnReplicas[requestedSE], fileSizes[lfn] ) )

    # Update the states of the files in the database
    if terminalReplicaIDs:
      gLogger.info( "RequestPreparation.prepareNewReplicas: %s replicas are terminally failed." % len( terminalReplicaIDs ) )
      # res = self.stagerClient.updateReplicaFailure( terminalReplicaIDs )
      res = self.stagerClient.updateReplicaFailure( terminalReplicaIDs )
      if not res['OK']:
        gLogger.error( "RequestPreparation.prepareNewReplicas: Failed to update replica failures.", res['Message'] )
    if replicaMetadata:
      gLogger.info( "RequestPreparation.prepareNewReplicas: %s replica metadata to be updated." % len( replicaMetadata ) )
      # Sets the Status='Waiting' of CacheReplicas records that are OK with catalogue checks
      res = self.stagerClient.updateReplicaInformation( replicaMetadata )
      if not res['OK']:
        gLogger.error( "RequestPreparation.prepareNewReplicas: Failed to update replica metadata.", res['Message'] )
    return S_OK()

  def __getNewReplicas( self ):
    """ This obtains the New replicas from the Replicas table and for each LFN the requested storage element """
    # First obtain the New replicas from the CacheReplicas table
    res = self.stagerClient.getCacheReplicas( {'Status':'New'} )
    if not res['OK']:
      gLogger.error( "RequestPreparation.__getNewReplicas: Failed to get replicas with New status.", res['Message'] )
      return res
    if not res['Value']:
      gLogger.debug( "RequestPreparation.__getNewReplicas: No New replicas found to process." )
      return S_OK()
    else:
      gLogger.debug( "RequestPreparation.__getNewReplicas: Obtained %s New replicas(s) to process." % len( res['Value'] ) )
    replicas = {}
    replicaIDs = {}
    for replicaID, info in res['Value'].items():
      lfn = info['LFN']
      storageElement = info['SE']
      replicas.setdefault( lfn, {} )[storageElement] = replicaID
      replicaIDs[replicaID] = ( lfn, storageElement )
    return S_OK( {'Replicas':replicas, 'ReplicaIDs':replicaIDs} )

  def __getExistingFiles( self, lfns ):
    """ This checks that the files exist in the FileCatalog. """
    res = self.fileCatalog.exists( list( set( lfns ) ) )
    if not res['OK']:
      gLogger.error( "RequestPreparation.__getExistingFiles: Failed to determine whether files exist.", res['Message'] )
      return res
    failed = res['Value']['Failed']
    success = res['Value']['Successful']
    exist = [lfn for lfn, exists in success.items() if exists]
    missing = list( set( success ) - set( exist ) )
    if missing:
      reason = 'LFN not registered in the FC'
      gLogger.warn( "RequestPreparation.__getExistingFiles: %s" % reason, '\n'.join( [''] + missing ) )
      self.__reportProblematicFiles( missing, 'LFN-LFC-DoesntExist' )
      missing = dict.fromkeys( missing, reason )
    else:
      missing = {}
    return S_OK( {'Exist':exist, 'Missing':missing, 'Failed':failed} )

  def __getFileSize( self, lfns ):
    """ This obtains the file size from the FileCatalog. """
    fileSizes = {}
    zeroSize = {}
    res = self.fileCatalog.getFileSize( lfns )
    if not res['OK']:
      gLogger.error( "RequestPreparation.__getFileSize: Failed to get sizes for files.", res['Message'] )
      return res
    failed = res['Value']['Failed']
    for lfn, size in res['Value']['Successful'].items():
      if size == 0:
        zeroSize[lfn] = "LFN registered with zero size in the FileCatalog"
      else:
        fileSizes[lfn] = size
    if zeroSize:
      for lfn, reason in zeroSize.items():
        gLogger.warn( "RequestPreparation.__getFileSize: %s" % reason, lfn )
      self.__reportProblematicFiles( zeroSize.keys(), 'LFN-LFC-ZeroSize' )
    return S_OK( {'FileSizes':fileSizes, 'ZeroSize':zeroSize, 'Failed':failed} )

  def __getFileReplicas( self, lfns ):
    """ This obtains the replicas from the FileCatalog. """
    replicas = {}
    noReplicas = {}
    res = self.dm.getActiveReplicas( lfns )
    if not res['OK']:
      gLogger.error( "RequestPreparation.__getFileReplicas: Failed to obtain file replicas.", res['Message'] )
      return res
    failed = res['Value']['Failed']
    for lfn, lfnReplicas in res['Value']['Successful'].items():
      if len( lfnReplicas.keys() ) == 0:
        noReplicas[lfn] = "LFN registered with zero replicas in the FileCatalog"
      else:
        replicas[lfn] = lfnReplicas
    if noReplicas:
      for lfn, reason in noReplicas.items():
        gLogger.warn( "RequestPreparation.__getFileReplicas: %s" % reason, lfn )
      self.__reportProblematicFiles( noReplicas.keys(), 'LFN-LFC-NoReplicas' )
    return S_OK( {'Replicas':replicas, 'ZeroReplicas':noReplicas, 'Failed':failed} )

  def __reportProblematicFiles( self, lfns, reason ):
    return S_OK()
    res = self.dataIntegrityClient.setFileProblematic( lfns, reason, sourceComponent = 'RequestPreparationAgent' )
    if not res['OK']:
      gLogger.error( "RequestPreparation.__reportProblematicFiles: Failed to report missing files.", res['Message'] )
      return res
    if res['Value']['Successful']:
      gLogger.info( "RequestPreparation.__reportProblematicFiles: Successfully reported %s missing files." % len( res['Value']['Successful'] ) )
    if res['Value']['Failed']:
      gLogger.info( "RequestPreparation.__reportProblematicFiles: Failed to report %s problematic files." % len( res['Value']['Failed'] ) )
    return res
Ejemplo n.º 4
0
class RequestPreparationAgent(AgentModule):
    def initialize(self):
        self.fileCatalog = FileCatalog()
        self.dm = DataManager()
        self.stagerClient = StorageManagerClient()
        self.dataIntegrityClient = DataIntegrityClient()
        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption("shifterProxy", "DataManager")

        return S_OK()

    def execute(self):
        """This is the first logical task to be executed and manages the New->Waiting transition of the Replicas"""
        res = self.__getNewReplicas()
        if not res["OK"]:
            gLogger.fatal(
                "RequestPreparation.prepareNewReplicas: Failed to get replicas from StagerDB.", res["Message"]
            )
            return res
        if not res["Value"]:
            gLogger.info("There were no New replicas found")
            return res
        replicas = res["Value"]["Replicas"]
        replicaIDs = res["Value"]["ReplicaIDs"]
        gLogger.info(
            "RequestPreparation.prepareNewReplicas: Obtained %s New replicas for preparation." % len(replicaIDs)
        )

        # Check if the files exist in the FileCatalog
        res = self.__getExistingFiles(replicas)
        if not res["OK"]:
            return res
        exist = res["Value"]["Exist"]
        terminal = res["Value"]["Missing"]
        failed = res["Value"]["Failed"]
        if not exist:
            gLogger.error("RequestPreparation.prepareNewReplicas: Failed to determine the existence of any file")
            return S_OK()
        terminalReplicaIDs = {}
        for lfn, reason in terminal.items():
            for replicaID in replicas[lfn].values():
                terminalReplicaIDs[replicaID] = reason
            replicas.pop(lfn)
        gLogger.info("RequestPreparation.prepareNewReplicas: %s files exist in the FileCatalog." % len(exist))
        if terminal:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s files do not exist in the FileCatalog." % len(terminal)
            )

        # Obtain the file sizes from the FileCatalog
        res = self.__getFileSize(exist)
        if not res["OK"]:
            return res
        failed.update(res["Value"]["Failed"])
        terminal = res["Value"]["ZeroSize"]
        fileSizes = res["Value"]["FileSizes"]
        if not fileSizes:
            gLogger.error("RequestPreparation.prepareNewReplicas: Failed determine sizes of any files")
            return S_OK()
        for lfn, reason in terminal.items():
            for _se, replicaID in replicas[lfn].items():
                terminalReplicaIDs[replicaID] = reason
            replicas.pop(lfn)
        gLogger.info(
            "RequestPreparation.prepareNewReplicas: Obtained %s file sizes from the FileCatalog." % len(fileSizes)
        )
        if terminal:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s files registered with zero size in the FileCatalog."
                % len(terminal)
            )

        # Obtain the replicas from the FileCatalog
        res = self.__getFileReplicas(list(fileSizes))
        if not res["OK"]:
            return res
        failed.update(res["Value"]["Failed"])
        terminal = res["Value"]["ZeroReplicas"]
        fileReplicas = res["Value"]["Replicas"]
        if not fileReplicas:
            gLogger.error("RequestPreparation.prepareNewReplicas: Failed determine replicas for any files")
            return S_OK()
        for lfn, reason in terminal.items():
            for _se, replicaID in replicas[lfn].items():
                terminalReplicaIDs[replicaID] = reason
            replicas.pop(lfn)
        gLogger.info(
            "RequestPreparation.prepareNewReplicas: Obtained replica information for %s file from the FileCatalog."
            % len(fileReplicas)
        )
        if terminal:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s files registered with zero replicas in the FileCatalog."
                % len(terminal)
            )

        # Check the replicas exist at the requested site
        replicaMetadata = []
        for lfn, requestedSEs in replicas.items():
            lfnReplicas = fileReplicas.get(lfn)

            # This should not happen in principle, but it was seen
            # after a corrupted staging request has entered the DB
            if not lfnReplicas:
                gLogger.error("Missing replicas information", "%s %s" % (lfn, requestedSEs))
                continue

            for requestedSE, replicaID in requestedSEs.items():
                if requestedSE not in lfnReplicas.keys():
                    terminalReplicaIDs[replicaID] = "LFN not registered at requested SE"
                    replicas[lfn].pop(requestedSE)
                else:
                    replicaMetadata.append((replicaID, lfnReplicas[requestedSE], fileSizes[lfn]))

        # Update the states of the files in the database
        if terminalReplicaIDs:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s replicas are terminally failed." % len(terminalReplicaIDs)
            )
            # res = self.stagerClient.updateReplicaFailure( terminalReplicaIDs )
            res = self.stagerClient.updateReplicaFailure(terminalReplicaIDs)
            if not res["OK"]:
                gLogger.error(
                    "RequestPreparation.prepareNewReplicas: Failed to update replica failures.", res["Message"]
                )
        if replicaMetadata:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s replica metadata to be updated." % len(replicaMetadata)
            )
            # Sets the Status='Waiting' of CacheReplicas records that are OK with catalogue checks
            res = self.stagerClient.updateReplicaInformation(replicaMetadata)
            if not res["OK"]:
                gLogger.error(
                    "RequestPreparation.prepareNewReplicas: Failed to update replica metadata.", res["Message"]
                )
        return S_OK()

    def __getNewReplicas(self):
        """This obtains the New replicas from the Replicas table and for each LFN the requested storage element"""
        # First obtain the New replicas from the CacheReplicas table
        res = self.stagerClient.getCacheReplicas({"Status": "New"})
        if not res["OK"]:
            gLogger.error(
                "RequestPreparation.__getNewReplicas: Failed to get replicas with New status.", res["Message"]
            )
            return res
        if not res["Value"]:
            gLogger.debug("RequestPreparation.__getNewReplicas: No New replicas found to process.")
            return S_OK()
        else:
            gLogger.debug(
                "RequestPreparation.__getNewReplicas: Obtained %s New replicas(s) to process." % len(res["Value"])
            )
        replicas = {}
        replicaIDs = {}
        for replicaID, info in res["Value"].items():
            lfn = info["LFN"]
            storageElement = info["SE"]
            replicas.setdefault(lfn, {})[storageElement] = replicaID
            replicaIDs[replicaID] = (lfn, storageElement)
        return S_OK({"Replicas": replicas, "ReplicaIDs": replicaIDs})

    def __getExistingFiles(self, lfns):
        """This checks that the files exist in the FileCatalog."""
        res = self.fileCatalog.exists(list(set(lfns)))
        if not res["OK"]:
            gLogger.error(
                "RequestPreparation.__getExistingFiles: Failed to determine whether files exist.", res["Message"]
            )
            return res
        failed = res["Value"]["Failed"]
        success = res["Value"]["Successful"]
        exist = [lfn for lfn, exists in success.items() if exists]
        missing = list(set(success) - set(exist))
        if missing:
            reason = "LFN not registered in the FC"
            gLogger.warn("RequestPreparation.__getExistingFiles: %s" % reason, "\n".join([""] + missing))
            self.__reportProblematicFiles(missing, "LFN-LFC-DoesntExist")
            missing = dict.fromkeys(missing, reason)
        else:
            missing = {}
        return S_OK({"Exist": exist, "Missing": missing, "Failed": failed})

    def __getFileSize(self, lfns):
        """This obtains the file size from the FileCatalog."""
        fileSizes = {}
        zeroSize = {}
        res = self.fileCatalog.getFileSize(lfns)
        if not res["OK"]:
            gLogger.error("RequestPreparation.__getFileSize: Failed to get sizes for files.", res["Message"])
            return res
        failed = res["Value"]["Failed"]
        for lfn, size in res["Value"]["Successful"].items():
            if size == 0:
                zeroSize[lfn] = "LFN registered with zero size in the FileCatalog"
            else:
                fileSizes[lfn] = size
        if zeroSize:
            for lfn, reason in zeroSize.items():
                gLogger.warn("RequestPreparation.__getFileSize: %s" % reason, lfn)
            self.__reportProblematicFiles(zeroSize.keys(), "LFN-LFC-ZeroSize")
        return S_OK({"FileSizes": fileSizes, "ZeroSize": zeroSize, "Failed": failed})

    def __getFileReplicas(self, lfns):
        """This obtains the replicas from the FileCatalog."""
        replicas = {}
        noReplicas = {}
        res = self.dm.getActiveReplicas(lfns)
        if not res["OK"]:
            gLogger.error("RequestPreparation.__getFileReplicas: Failed to obtain file replicas.", res["Message"])
            return res
        failed = res["Value"]["Failed"]
        for lfn, lfnReplicas in res["Value"]["Successful"].items():
            if len(lfnReplicas) == 0:
                noReplicas[lfn] = "LFN registered with zero replicas in the FileCatalog"
            else:
                replicas[lfn] = lfnReplicas
        if noReplicas:
            for lfn, reason in noReplicas.items():
                gLogger.warn("RequestPreparation.__getFileReplicas: %s" % reason, lfn)
            self.__reportProblematicFiles(list(noReplicas), "LFN-LFC-NoReplicas")
        return S_OK({"Replicas": replicas, "ZeroReplicas": noReplicas, "Failed": failed})

    def __reportProblematicFiles(self, lfns, reason):
        return S_OK()
        res = self.dataIntegrityClient.setFileProblematic(lfns, reason, sourceComponent="RequestPreparationAgent")
        if not res["OK"]:
            gLogger.error(
                "RequestPreparation.__reportProblematicFiles: Failed to report missing files.", res["Message"]
            )
            return res
        if res["Value"]["Successful"]:
            gLogger.info(
                "RequestPreparation.__reportProblematicFiles: Successfully reported %s missing files."
                % len(res["Value"]["Successful"])
            )
        if res["Value"]["Failed"]:
            gLogger.info(
                "RequestPreparation.__reportProblematicFiles: Failed to report %s problematic files."
                % len(res["Value"]["Failed"])
            )
        return res
Ejemplo n.º 5
0
class PluginUtilities(object):
  """
  Utility class used by plugins
  """

  def __init__(self, plugin='Standard', transClient=None, dataManager=None, fc=None,
               debug=False, transInThread=None, transID=None):
    """
    c'tor

    Setting defaults
    """
    # clients
    if transClient is None:
      self.transClient = TransformationClient()
    else:
      self.transClient = transClient
    if dataManager is None:
      self.dm = DataManager()
    else:
      self.dm = dataManager
    if fc is None:
      self.fc = FileCatalog()
    else:
      self.fc = fc

    self.dmsHelper = DMSHelpers()

    self.plugin = plugin
    self.transID = transID
    self.params = {}
    self.groupSize = 0
    self.maxFiles = 0
    self.cachedLFNSize = {}
    self.transString = ''
    self.debug = debug
    if transInThread is None:
      self.transInThread = {}
    else:
      self.transInThread = transInThread

    self.log = gLogger.getSubLogger(self.plugin +
                                    self.transInThread.get(self.transID, ' [NoThread] [%s] ' % self.transID))
    # FIXME: This doesn't work (yet) but should soon, will allow scripts to get the context
    self.log.showHeaders(True)

  def logVerbose(self, message, param=''):
    """ logger helper """
    if self.debug:
      log = gLogger.getSubLogger(self.plugin + ' (V)' +
                                 self.transInThread.get(self.transID, ' [NoThread] [%d] ' % self.transID))
      log.info(message, param)
    else:
      self.log.verbose(message, param)

  def logDebug(self, message, param=''):
    """ logger helper """
    self.log.debug(message, param)

  def logInfo(self, message, param=''):
    """ logger helper """
    self.log.info(message, param)

  def logWarn(self, message, param=''):
    """ logger helper """
    self.log.warn(message, param)

  def logError(self, message, param=''):
    """ logger helper """
    self.log.error(message, param)

  def logException(self, message, param='', lException=False):
    """ logger helper """
    self.log.exception(message, param, lException)

  def setParameters(self, params):
    """ Set the transformation parameters and extract transID """
    self.params = params
    self.transID = params['TransformationID']
    self.log = gLogger.getSubLogger(self.plugin +
                                    self.transInThread.get(self.transID, ' [NoThread] [%d] ' % self.transID))

  # @timeThis
  def groupByReplicas(self, files, status):
    """
    Generates tasks based on the location of the input data

   :param dict fileReplicas:
              {'/this/is/at.1': ['SE1'],
               '/this/is/at.12': ['SE1', 'SE2'],
               '/this/is/at.2': ['SE2'],
               '/this/is/at_123': ['SE1', 'SE2', 'SE3'],
               '/this/is/at_23': ['SE2', 'SE3'],
               '/this/is/at_4': ['SE4']}

    """
    tasks = []
    nTasks = 0

    if not files:
      return S_OK(tasks)

    files = dict(files)

    # Parameters
    if not self.groupSize:
      self.groupSize = self.getPluginParam('GroupSize', 10)
    flush = (status == 'Flush')
    self.logVerbose(
        "groupByReplicas: %d files, groupSize %d, flush %s" %
        (len(files), self.groupSize, flush))

    # Consider files by groups of SEs, a file is only in one group
    # Then consider files site by site, but a file can now be at more than one site
    for groupSE in (True, False):
      if not files:
        break
      seFiles = getFileGroups(files, groupSE=groupSE)
      self.logDebug("fileGroups set: ", seFiles)

      for replicaSE in sortSEs(seFiles):
        lfns = seFiles[replicaSE]
        if lfns:
          tasksLfns = breakListIntoChunks(lfns, self.groupSize)
          lfnsInTasks = []
          for taskLfns in tasksLfns:
            if flush or (len(taskLfns) >= self.groupSize):
              tasks.append((replicaSE, taskLfns))
              lfnsInTasks += taskLfns
          # In case the file was at more than one site, remove it from the other sites' list
          # Remove files from global list
          for lfn in lfnsInTasks:
            files.pop(lfn)
          if not groupSE:
            # Remove files from other SEs
            for se in [se for se in seFiles if se != replicaSE]:
              seFiles[se] = [lfn for lfn in seFiles[se] if lfn not in lfnsInTasks]
      self.logVerbose(
          "groupByReplicas: %d tasks created (groupSE %s)" %
          (len(tasks) - nTasks, str(groupSE)), "%d files not included in tasks" %
          len(files))
      nTasks = len(tasks)

    return S_OK(tasks)

  def createTasksBySize(self, lfns, replicaSE, fileSizes=None, flush=False):
    """
    Split files in groups according to the size and create tasks for a given SE
    """
    tasks = []
    if fileSizes is None:
      fileSizes = self._getFileSize(lfns).get('Value')
    if fileSizes is None:
      self.logWarn('Error getting file sizes, no tasks created')
      return tasks
    taskLfns = []
    taskSize = 0
    if not self.groupSize:
      # input size in GB converted to bytes
      self.groupSize = float(self.getPluginParam('GroupSize', 1.)) * 1000 * 1000 * 1000
    if not self.maxFiles:
      # FIXME: prepare for chaging the name of the ambiguoug  CS option
      self.maxFiles = self.getPluginParam('MaxFilesPerTask', self.getPluginParam('MaxFiles', 100))
    lfns = sorted(lfns, key=fileSizes.get)
    for lfn in lfns:
      size = fileSizes.get(lfn, 0)
      if size:
        if size > self.groupSize:
          tasks.append((replicaSE, [lfn]))
        else:
          taskSize += size
          taskLfns.append(lfn)
          if (taskSize > self.groupSize) or (len(taskLfns) >= self.maxFiles):
            tasks.append((replicaSE, taskLfns))
            taskLfns = []
            taskSize = 0
    if flush and taskLfns:
      tasks.append((replicaSE, taskLfns))
    if not tasks and not flush and taskLfns:
      self.logVerbose(
          'Not enough data to create a task, and flush not set (%d bytes for groupSize %d)' %
          (taskSize, self.groupSize))
    return tasks

  # @timeThis
  def groupBySize(self, files, status):
    """
    Generate a task for a given amount of data
    """
    tasks = []
    nTasks = 0

    if not len(files):
      return S_OK(tasks)

    files = dict(files)
    # Parameters
    if not self.groupSize:
      # input size in GB converted to bytes
      self.groupSize = float(self.getPluginParam('GroupSize', 1)) * 1000 * 1000 * 1000
    flush = (status == 'Flush')
    self.logVerbose(
        "groupBySize: %d files, groupSize: %d, flush: %s" %
        (len(files), self.groupSize, flush))

    # Get the file sizes
    res = self._getFileSize(list(files))
    if not res['OK']:
      return res
    fileSizes = res['Value']

    for groupSE in (True, False):
      if not files:
        break
      seFiles = getFileGroups(files, groupSE=groupSE)

      for replicaSE in sorted(seFiles) if groupSE else sortSEs(seFiles):
        lfns = seFiles[replicaSE]
        newTasks = self.createTasksBySize(lfns, replicaSE, fileSizes=fileSizes, flush=flush)
        lfnsInTasks = []
        for task in newTasks:
          lfnsInTasks += task[1]
        tasks += newTasks

        # Remove the selected files from the size cache
        self.clearCachedFileSize(lfnsInTasks)
        if not groupSE:
          # Remove files from other SEs
          for se in [se for se in seFiles if se != replicaSE]:
            seFiles[se] = [lfn for lfn in seFiles[se] if lfn not in lfnsInTasks]
        # Remove files from global list
        for lfn in lfnsInTasks:
          files.pop(lfn)

      self.logVerbose(
          "groupBySize: %d tasks created with groupSE %s" %
          (len(tasks) - nTasks, str(groupSE)))
      self.logVerbose("groupBySize: %d files have not been included in tasks" % len(files))
      nTasks = len(tasks)

    self.logVerbose("Grouped %d files by size" % len(files))
    return S_OK(tasks)

  def getExistingCounters(self, normalise=False, requestedSites=[]):
    res = self.transClient.getCounters('TransformationFiles', ['UsedSE'],
                                       {'TransformationID': self.params['TransformationID']})
    if not res['OK']:
      return res
    usageDict = {}
    for usedDict, count in res['Value']:
      usedSE = usedDict['UsedSE']
      if usedSE != 'Unknown':
        usageDict[usedSE] = count
    if requestedSites:
      siteDict = {}
      for se, count in usageDict.items():
        res = getSitesForSE(se)
        if not res['OK']:
          return res
        for site in res['Value']:
          if site in requestedSites:
            siteDict[site] = count
      usageDict = siteDict.copy()
    if normalise:
      usageDict = self._normaliseShares(usageDict)
    return S_OK(usageDict)

  # @timeThis
  def _getFileSize(self, lfns):
    """ Get file size from a cache, if not from the catalog
    #FIXME: have to fill the cachedLFNSize!
    """
    lfns = list(lfns)
    cachedLFNSize = dict(self.cachedLFNSize)

    fileSizes = {}
    for lfn in [lfn for lfn in lfns if lfn in cachedLFNSize]:
      fileSizes[lfn] = cachedLFNSize[lfn]
    self.logDebug(
        "Found cache hit for File size for %d files out of %d" %
        (len(fileSizes), len(lfns)))
    lfns = [lfn for lfn in lfns if lfn not in cachedLFNSize]
    if lfns:
      fileSizes = self._getFileSizeFromCatalog(lfns, fileSizes)
      if not fileSizes['OK']:
        self.logError(fileSizes['Message'])
        return fileSizes
      fileSizes = fileSizes['Value']
    return S_OK(fileSizes)

  # @timeThis
  def _getFileSizeFromCatalog(self, lfns, fileSizes):
    """
    Get file size from the catalog
    """
    lfns = list(lfns)
    fileSizes = dict(fileSizes)

    res = self.fc.getFileSize(lfns)
    if not res['OK']:
      return S_ERROR("Failed to get sizes for all files: %s" % res['Message'])
    if res['Value']['Failed']:
      errorReason = sorted(set(res['Value']['Failed'].values()))
      self.logWarn("Failed to get sizes for %d files:" % len(res['Value']['Failed']), errorReason)
    fileSizes.update(res['Value']['Successful'])
    self.cachedLFNSize.update((res['Value']['Successful']))
    self.logVerbose("Got size of %d files from catalog" % len(lfns))
    return S_OK(fileSizes)

  def clearCachedFileSize(self, lfns):
    """ Utility function
    """
    for lfn in [lfn for lfn in lfns if lfn in self.cachedLFNSize]:
      self.cachedLFNSize.pop(lfn)

  def getPluginParam(self, name, default=None):
    """ Get plugin parameters using specific settings or settings defined in the CS
        Caution: the type returned is that of the default value
    """
    # get the value of a parameter looking 1st in the CS
    if default is not None:
      valueType = type(default)
    else:
      valueType = None
    # First look at a generic value...
    optionPath = "TransformationPlugins/%s" % (name)
    value = Operations().getValue(optionPath, None)
    self.logVerbose("Default plugin param %s: '%s'" % (optionPath, value))
    # Then look at a plugin-specific value
    optionPath = "TransformationPlugins/%s/%s" % (self.plugin, name)
    value = Operations().getValue(optionPath, value)
    self.logVerbose("Specific plugin param %s: '%s'" % (optionPath, value))
    if value is not None:
      default = value
    # Finally look at a transformation-specific parameter
    value = self.params.get(name, default)
    self.logVerbose(
        "Transformation plugin param %s: '%s'. Convert to %s" %
        (name, value, str(valueType)))
    if valueType and not isinstance(value, valueType):
      if valueType is list:
        try:
          value = ast.literal_eval(value) if value and value != 'None' else []
        # literal_eval('SE-DST') -> ValueError
        # literal_eval('SE_MC-DST') -> SyntaxError
        # Don't ask...
        except (ValueError, SyntaxError):
          value = [val for val in value.replace(' ', '').split(',') if val]

      elif valueType is int:
        value = int(value)
      elif valueType is float:
        value = float(value)
      elif valueType is bool:
        if value in ('False', 'No', 'None', None, 0):
          value = False
        else:
          value = bool(value)
      elif valueType is not str:
        self.logWarn(
            "Unknown parameter type (%s) for %s, passed as string" %
            (str(valueType), name))
    self.logVerbose("Final plugin param %s: '%s'" % (name, value))
    return value

  @staticmethod
  def _normaliseShares(originalShares):
    """ Normalize shares to 1 """
    total = sum(float(share) for share in originalShares.values())
    return dict([(site, 100. * float(share) / total if total else 0.)
                 for site, share in originalShares.items()])

  def uniqueSEs(self, ses):
    """ return a list of SEs that are not physically the same """
    newSEs = []
    for se in ses:
      if not self.isSameSEInList(se, newSEs):
        newSEs.append(se)
    return newSEs

  def isSameSE(self, se1, se2):
    """ Check if 2 SEs are indeed the same.

        :param se1: name of the first StorageElement
        :param se2: name of the second StorageElement

        :returns: True/False if they are considered the same.
                  See :py:mod:`~DIRAC.Resources.Storage.StorageElement.StorageElementItem.isSameSE`
    """
    if se1 == se2:
      return True

    return StorageElement(se1).isSameSE(StorageElement(se2))

  def isSameSEInList(self, se1, seList):
    """ Check if an SE is the same as any in a list """
    if se1 in seList:
      return True
    for se in seList:
      if self.isSameSE(se1, se):
        return True
    return False

  def closerSEs(self, existingSEs, targetSEs, local=False):
    """ Order the targetSEs such that the first ones are closer to existingSEs. Keep all elements in targetSEs
    """
    setTarget = set(targetSEs)
    sameSEs = set([se1 for se1 in setTarget for se2 in existingSEs if self.isSameSE(se1, se2)])
    targetSEs = setTarget - set(sameSEs)
    if targetSEs:
      # Some SEs are left, look for sites
      existingSites = [self.dmsHelper.getLocalSiteForSE(se).get('Value')
                       for se in existingSEs]
      existingSites = set([site for site in existingSites if site])
      closeSEs = set([se for se in targetSEs
                      if self.dmsHelper.getLocalSiteForSE(se).get('Value') in existingSites])
      # print existingSEs, existingSites, targetSEs, closeSEs
      otherSEs = targetSEs - closeSEs
      targetSEs = list(closeSEs)
      random.shuffle(targetSEs)
      if not local and otherSEs:
        otherSEs = list(otherSEs)
        random.shuffle(otherSEs)
        targetSEs += otherSEs
    else:
      targetSEs = []
    return (targetSEs + list(sameSEs)) if not local else targetSEs

  @staticmethod
  def seParamtoList(inputParam):
    """Transform ``inputParam`` to list.

    :param inputParam: can be string, list, or string representation of list
    :returns: list
    """
    if not inputParam:
      return []
    if inputParam.count('['):
      return eval(inputParam)  # pylint: disable=eval-used
    elif isinstance(inputParam, list):
      return inputParam
    return [inputParam]
Ejemplo n.º 6
0
class DataIntegrityClient(Client):
    """Client exposing the DataIntegrity Service."""
    def __init__(self, **kwargs):

        super(DataIntegrityClient, self).__init__(**kwargs)
        self.setServer('DataManagement/DataIntegrity')
        self.dm = DataManager()
        self.fc = FileCatalog()

    def setFileProblematic(self, lfn, reason, sourceComponent=''):
        """ This method updates the status of the file in the FileCatalog and the IntegrityDB

        lfn - the lfn of the file
        reason - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
        if isinstance(lfn, list):
            lfns = lfn
        elif isinstance(lfn, six.string_types):
            lfns = [lfn]
        else:
            errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN."
            gLogger.error(errStr)
            return S_ERROR(errStr)
        gLogger.info(
            "DataIntegrityClient.setFileProblematic: Attempting to update %s files."
            % len(lfns))
        fileMetadata = {}
        for lfn in lfns:
            fileMetadata[lfn] = {
                'Prognosis': reason,
                'LFN': lfn,
                'PFN': '',
                'SE': ''
            }
        res = self.insertProblematic(sourceComponent, fileMetadata)
        if not res['OK']:
            gLogger.error(
                "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB"
            )
        return res

    def reportProblematicReplicas(self, replicaTuple, se, reason):
        """ Simple wrapper function around setReplicaProblematic """
        gLogger.info('The following %s files had %s at %s' %
                     (len(replicaTuple), reason, se))
        for lfn, _pfn, se, reason in sorted(replicaTuple):
            if lfn:
                gLogger.info(lfn)
        res = self.setReplicaProblematic(replicaTuple,
                                         sourceComponent='DataIntegrityClient')
        if not res['OK']:
            gLogger.info('Failed to update integrity DB with replicas',
                         res['Message'])
        else:
            gLogger.info('Successfully updated integrity DB with replicas')

    def setReplicaProblematic(self, replicaTuple, sourceComponent=''):
        """ This method updates the status of the replica in the FileCatalog and the IntegrityDB
        The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis}

        lfn - the lfn of the file
        pfn - the pfn if available (otherwise '')
        se - the storage element of the problematic replica (otherwise '')
        prognosis - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
        if isinstance(replicaTuple, tuple):
            replicaTuple = [replicaTuple]
        elif isinstance(replicaTuple, list):
            pass
        else:
            errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples."
            gLogger.error(errStr)
            return S_ERROR(errStr)
        gLogger.info(
            "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas."
            % len(replicaTuple))
        replicaDict = {}
        for lfn, pfn, se, reason in replicaTuple:
            replicaDict[lfn] = {
                'Prognosis': reason,
                'LFN': lfn,
                'PFN': pfn,
                'SE': se
            }
        res = self.insertProblematic(sourceComponent, replicaDict)
        if not res['OK']:
            gLogger.error(
                "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB"
            )
            return res
        for lfn in replicaDict.keys():
            replicaDict[lfn]['Status'] = 'Problematic'

        res = self.fc.setReplicaStatus(replicaDict)
        if not res['OK']:
            errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas."
            gLogger.error(errStr, res['Message'])
            return res
        failed = res['Value']['Failed']
        successful = res['Value']['Successful']
        resDict = {'Successful': successful, 'Failed': failed}
        return S_OK(resDict)

    ##########################################################################
    #
    # This section contains the resolution methods for various prognoses
    #

    def __updateCompletedFiles(self, prognosis, fileID):
        gLogger.info("%s file (%d) is resolved" % (prognosis, fileID))
        return self.setProblematicStatus(fileID, 'Resolved')

    def __returnProblematicError(self, fileID, res):
        self.incrementProblematicRetry(fileID)
        gLogger.error('DataIntegrityClient failure', res['Message'])
        return res

    def __updateReplicaToChecked(self, problematicDict):
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']
        prognosis = problematicDict['Prognosis']
        problematicDict['Status'] = 'Checked'

        res = returnSingleResult(
            self.fc.setReplicaStatus({lfn: problematicDict}))

        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        gLogger.info("%s replica (%d) is updated to Checked status" %
                     (prognosis, fileID))
        return self.__updateCompletedFiles(prognosis, fileID)

    def resolveCatalogPFNSizeMismatch(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis
    """
        lfn = problematicDict['LFN']
        se = problematicDict['SE']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        catalogSize = res['Value']
        res = returnSingleResult(StorageElement(se).getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        storageSize = res['Value']
        bkKCatalog = FileCatalog(['BookkeepingDB'])
        res = returnSingleResult(bkKCatalog.getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        bookkeepingSize = res['Value']
        if bookkeepingSize == catalogSize == storageSize:
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) matched all registered sizes."
                % fileID)
            return self.__updateReplicaToChecked(problematicDict)
        if catalogSize == bookkeepingSize:
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also"
                % fileID)
            res = returnSingleResult(self.fc.getReplicas(lfn))
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            if len(res['Value']) <= 1:
                gLogger.info(
                    "CatalogPFNSizeMismatch replica (%d) has no other replicas."
                    % fileID)
                return S_ERROR(
                    "Not removing catalog file mismatch since the only replica"
                )
            else:
                gLogger.info(
                    "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..."
                    % fileID)
                res = self.dm.removeReplica(se, lfn)
                if not res['OK']:
                    return self.__returnProblematicError(fileID, res)
                return self.__updateCompletedFiles('CatalogPFNSizeMismatch',
                                                   fileID)
        if (catalogSize != bookkeepingSize) and (bookkeepingSize
                                                 == storageSize):
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size"
                % fileID)
            res = self.__updateReplicaToChecked(problematicDict)
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            return self.changeProblematicPrognosis(fileID,
                                                   'BKCatalogSizeMismatch')
        gLogger.info(
            "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count"
            % fileID)
        return self.incrementProblematicRetry(fileID)

    ############################################################################################

    def _reportProblematicFiles(self, lfns, reason):
        """ Simple wrapper function around setFileProblematic
    """
        gLogger.info('The following %s files were found with %s' %
                     (len(lfns), reason))
        for lfn in sorted(lfns):
            gLogger.info(lfn)
        res = self.setFileProblematic(lfns,
                                      reason,
                                      sourceComponent='DataIntegrityClient')
        if not res['OK']:
            gLogger.info('Failed to update integrity DB with files',
                         res['Message'])
        else:
            gLogger.info('Successfully updated integrity DB with files')
Ejemplo n.º 7
0
class helper_TransferAgent(object):

  def __init__(self, transferAgent, gTransferDB):
    self.transferAgent =transferAgent
    self.transferDB = gTransferDB
    gLogger.info("Creating File Catalog")
    self.fileCatalog = FileCatalog()

  def helper_add_transfer(self, result):
    if not result:
      gLogger.error("There is no infomation")
      return False

    res = self.transferDB.get_TransferRequest(condDict={
                                                      "id": result.trans_req_id
                                                      })
    if not res["OK"]:
      return False
    req_list = res["Value"]
    if len(req_list) != 1:
      return False
    req =  TransRequestEntryWithID._make(req_list[0])

    # construct the info
    info = {"id": result.id,
            "LFN": result.LFN,
            "srcSE": req.srcSE,
            "dstSE": req.dstSE,
            "retransfer": -1,
            "error": ""}
    # Add the Transfer
    worker = gTransferFactory.generate(req.protocol, info)
    self.transferAgent.transfer_worker.append(worker)
    # Change the status
    self.helper_status_update(
        self.transferDB.tables["TransferFileList"],
        result.id,
        {"status":"transfer", 
          "start_time":datetime.datetime.utcnow()})
    # Add Accounting:
    d = {}
    d["User"] = req.username
    d["Source"] = req.srcSE
    d["Destination"] = req.dstSE
    d["Protocol"] = req.protocol
    d["FinalStatus"] = "OK"
    d["TransferSize"] = 0 # TODO
    r = self.fileCatalog.getFileSize(result.LFN)
    if r["OK"]:
      if r["Value"]["Successful"]:
        d["TransferSize"] = r["Value"]["Successful"][result.LFN]
    d["TransferTime"] = 1 # 1s 
    d["TransferOK"] = 1
    d["TransferTotal"] = 1
    acct_dt = DataTransfer()
    acct_dt.setValuesFromDict(d)
    acct_dt.setNowAsStartAndEndTime()
    # save it 
    worker.acct_dt = acct_dt

    return True

  def helper_remove_transfer(self, worker):
    info = worker.info
    gLogger.info("File.id = %d -> finish" % info["id"])
    self.helper_status_update(
        self.transferDB.tables["TransferFileList"],
        info["id"],
        {"status":"finish", 
          "finish_time": datetime.datetime.utcnow()})
    # Accounting
    acct_dt = worker.acct_dt
    acct_dt.setEndTime()
    # TODO
    d = {}
    td = acct_dt.endTime-acct_dt.startTime
    td_s = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6
    d["TransferTime"] = td_s # 1s 
    if info["error"]:
      d["FinalStatus"] = "FAILED"
      d["TransferOK"] = 0
    else:  
      d["FinalStatus"] = "OK"
      d["TransferOK"] = 1

    acct_dt.setValuesFromDict(d)

    acct_dt.commit()
    gLogger.info("Submit Accounting Data")
    
  def helper_check_request(self):
    """
      check if the *transfer* request are ok.
      if the whole files are *finish*, then this request
      will become *finish*.
    """
    infoDict = {"status": "transfer"}
    res = self.transferDB.get_TransferRequest(condDict = infoDict)
    if not res["OK"]:
      return
    reqlist = map(TransRequestEntryWithID._make, res["Value"])
    for req in reqlist:
      res = self.transferDB._query(
          'select count(*) from %(table)s where trans_req_id = %(id)d and status not in %(status_list)s' % {
             "table": self.transferDB.tables["TransferFileList"], 
             "id": req.id,
             "status_list": '("finish", "kill")' # XXX finish or kill means this request is ok.
             }
          )
      if not res["OK"]:
        # TODO
        continue
      count = res["Value"][0][0]
      if count == 0:
        # if all status is finish,
        # the req status --> finish
        gLogger.info("req.id %d change from %s to finish" % (req.id, req.status))
        self.helper_status_update(
            self.transferDB.tables["TransferRequest"],
            req.id,
            {"status":"finish"})
    return 

  def helper_get_new_request(self):
    # 1. get the *new* File in the <<Transfer File List>>.
    #    if we get, goto <<Add New Transfer>>
    already_load_status = False
    result_new_file = self.helper_get_new_File()
    # 1.1 2014.04.20
    #     They want to the other requests are also loaded,
    #     so I have to not return immediately
    if result_new_file:
      already_load_status = True
    # 2. if we can't get, use should get a *new* request
    #    from the <<Transfer Request>>.
    #    if we can't get, return False. STOP
    self.helper_check_request()
    result = self.helper_get_new_request_entry()
    if result:
      # 3. add the filelist in the dataset to the << Transfer File List >>
      condDict = {"name":result.dataset}  
      res = self.transferDB.get_Dataset(condDict)
      if not res["OK"]:
        gLogger.error(res)
        return None
      filelist = res["Value"]
      # update the status in << Request >>
      if len(filelist) > 0:
        req_status = "transfer"
      else:
        req_status = "finish"
      self.helper_status_update(self.transferDB.tables["TransferRequest"],
                                result.id,
                                {"status":req_status})
      self.transferDB.insert_TransferFileList(result.id, filelist)
    # 4. get the *new* File Again.
    # 5. can't get, return False. STOP
    # 4.prelude
    #    If already loaded, return the last result
    if already_load_status and result_new_file:
      return result_new_file
      
    result = self.helper_get_new_File()
    return result

  def helper_get_new_request_entry(self):
    """
    TransRequestEntryWithID(
      id=1L, 
      username='******', 
      dataset='my-dataset', 
      srcSE='IHEP-USER', 
      dstSE='IHEPD-USER', 
      submit_time=datetime.datetime(2013, 3, 13, 20, 9, 34), 
      status='new')
    """
    condDict = {"status": "new"}
    res = self.transferDB.get_TransferRequest(condDict)
    if not res["OK"]:
      return None
    req_list = res["Value"]
    len_req = len(req_list)
    if len_req:
      # random select
      tmp_idx = random.randint(0, len_req-1)
      return TransRequestEntryWithID._make(req_list[tmp_idx])
    pass

  def helper_get_new_File(self):
    """
    >>> helper.helper_get_new_File()
    TransFileListEntryWithID(
      id=1L, 
      LFN='/path/does/not/exist', 
      trans_req_id=1L, 
      start_time=None, 
      finish_time=None, 
      status='new')
    """
    condDict = {"status": "new"}
    res = self.transferDB.get_TransferFileList(condDict)
    if not res["OK"]:
      gLogger.error(res)
      return None
    filelist = res["Value"]
    gLogger.info("Filelist:")
    gLogger.info(filelist)
    len_files = len(filelist)
    if  len_files > 0:
      tmp_idx = random.randint(0, len_files-1)
      gLogger.info("get file entry index randomly: %d/%d"%(tmp_idx, len_files))
      gLogger.info("get file entry", filelist[tmp_idx])
      return TransFileListEntryWithID._make(filelist[tmp_idx])
    return None

  def helper_status_update(self, table, id, toUpdate):
    res = self.transferDB.updateFields(
                              table,
                              updateDict = toUpdate,
                              condDict = {"id":id},
                              )
    print res

  def helper_error_report(self, worker, reason):
    self.helper_status_update(self.transferDB.tables["TransferFileList"],
                              worker.info["id"],
                              {"error": reason})

  def check_worker_status(self, worker):
    """check whether the file transfer is kill(in DB)"""
    res = self.transferDB.getFields(self.transferDB.tables["TransferFileList"],
                                    outFields = ["status"],
                                    condDict = {"id":worker.info["id"]})
    if not res["OK"]:
      gLogger.error(res)
      return

    if not res["Value"]:
      return

    if len(res["Value"]) != 1:
      gLogger.error[res]
      return 

    status = res["Value"][0][0]
    if status == "kill":
      gLogger.info("check worker should be killed: ", status)
      worker.proc.kill()
Ejemplo n.º 8
0
class TransformationPlugin( PluginBase ):
  """ A TransformationPlugin object should be instantiated by every transformation.
  """

  def __init__( self, plugin, transClient = None, dataManager = None ):
    """ plugin name has to be passed in: it will then be executed as one of the functions below, e.g.
        plugin = 'BySize' will execute TransformationPlugin('BySize')._BySize()
    """
    super( TransformationPlugin, self ).__init__( plugin )

    self.data = {}
    self.files = False
    if transClient is None:
      self.transClient = TransformationClient()
    else:
      self.transClient = transClient

    if dataManager is None:
      self.dm = DataManager()
    else:
      self.dm = dataManager

    self.fc = FileCatalog()


  def isOK( self ):
    self.valid = True
    if ( not self.data ) or ( not self.params ):
      self.valid = False
    return self.valid

  def setInputData( self, data ):
    self.data = data

  def setTransformationFiles( self, files ): #TODO ADDED
    self.files = files

  def _Standard( self ):
    """ Simply group by replica location
    """
    res = self._groupByReplicas()
    if not res['OK']:
      return res
    newTasks = []
    for _se, lfns in res['Value']:
      newTasks.append( ( '', lfns ) )
    return S_OK( newTasks )

  def _BySize( self ):
    """ Alias for groupBySize
    """
    return self._groupBySize()

  def _Broadcast( self ):
    """ This plug-in takes files found at the sourceSE and broadcasts to all (or a selection of) targetSEs.
    """
    if not self.params:
      return S_ERROR( "TransformationPlugin._Broadcast: The 'Broadcast' plugin requires additional parameters." )

    targetseParam = self.params['TargetSE']
    targetSEs = []
    sourceSEs = eval( self.params['SourceSE'] )
    if targetseParam.count( '[' ):
      targetSEs = eval( targetseParam )
    elif type(targetseParam)==type([]):
      targetSEs = targetseParam
    else:
      targetSEs = [targetseParam]
    #sourceSEs = eval(self.params['SourceSE'])
    #targetSEs = eval(self.params['TargetSE'])
    destinations = int( self.params.get( 'Destinations', 0 ) )
    if destinations and ( destinations >= len(targetSEs) ):
      destinations = 0

    status = self.params['Status']
    groupSize = self.params['GroupSize']#Number of files per tasks

    fileGroups = self._getFileGroups( self.data )#groups by SE
    targetSELfns = {}
    for replicaSE, lfns in fileGroups.items():
      ses = replicaSE.split( ',' )
      #sourceSites = self._getSitesForSEs(ses)
      atSource = False
      for se in ses:
        if se in sourceSEs:
          atSource = True
      if not atSource:
        continue

      for lfn in lfns:
        targets = []
        sources = self._getSitesForSEs( ses )
        random.shuffle( targetSEs )
        for targetSE in targetSEs:
          site = self._getSiteForSE( targetSE )['Value']
          if not site in sources:
            if ( destinations ) and ( len( targets ) >= destinations ):
              continue
            sources.append( site )
          targets.append( targetSE )#after all, if someone wants to copy to the source, it's his choice
        strTargetSEs = str.join( ',', sorted( targets ) )
        if not targetSELfns.has_key( strTargetSEs ):
          targetSELfns[strTargetSEs] = []
        targetSELfns[strTargetSEs].append( lfn )
    tasks = []
    for ses, lfns in targetSELfns.items():
      tasksLfns = breakListIntoChunks(lfns, groupSize)
      for taskLfns in tasksLfns:
        if ( status == 'Flush' ) or ( len( taskLfns ) >= int( groupSize ) ):
          #do not allow groups smaller than the groupSize, except if transformation is in flush state
          tasks.append( ( ses, taskLfns ) )
    return S_OK( tasks )

  def _ByShare( self, shareType = 'CPU' ):
    """ first get the shares from the CS, and then makes the grouping looking at the history
    """
    res = self._getShares( shareType, normalise = True )
    if not res['OK']:
      return res
    cpuShares = res['Value']
    gLogger.info( "Obtained the following target shares (%):" )
    for site in sorted( cpuShares.keys() ):
      gLogger.info( "%s: %.1f" % ( site.ljust( 15 ), cpuShares[site] ) )

    # Get the existing destinations from the transformationDB
    res = self._getExistingCounters( requestedSites = cpuShares.keys() )
    if not res['OK']:
      gLogger.error( "Failed to get existing file share", res['Message'] )
      return res
    existingCount = res['Value']
    if existingCount:
      gLogger.info( "Existing site utilization (%):" )
      normalisedExistingCount = self._normaliseShares( existingCount.copy() )
      for se in sorted( normalisedExistingCount.keys() ):
        gLogger.info( "%s: %.1f" % ( se.ljust( 15 ), normalisedExistingCount[se] ) )

    # Group the input files by their existing replicas
    res = self._groupByReplicas()
    if not res['OK']:
      return res
    replicaGroups = res['Value']

    tasks = []
    # For the replica groups 
    for replicaSE, lfns in replicaGroups:
      possibleSEs = replicaSE.split( ',' )
      # Determine the next site based on requested shares, existing usage and candidate sites
      res = self._getNextSite( existingCount, cpuShares, candidates = self._getSitesForSEs( possibleSEs ) )
      if not res['OK']:
        gLogger.error( "Failed to get next destination SE", res['Message'] )
        continue
      targetSite = res['Value']
      # Resolve the ses for the target site
      res = getSEsForSite( targetSite )
      if not res['OK']:
        continue
      ses = res['Value']
      # Determine the selected SE and create the task 
      for chosenSE in ses:
        if chosenSE in possibleSEs:
          tasks.append( ( chosenSE, lfns ) )
          if not existingCount.has_key( targetSite ):
            existingCount[targetSite] = 0
          existingCount[targetSite] += len( lfns )
    return S_OK( tasks )

  def _getShares( self, shareType, normalise = False ):
    """ Takes share from the CS, eventually normalize them
    """
    res = gConfig.getOptionsDict( '/Resources/Shares/%s' % shareType )
    if not res['OK']:
      return res
    if not res['Value']:
      return S_ERROR( "/Resources/Shares/%s option contains no shares" % shareType )
    shares = res['Value']
    for site, value in shares.items():
      shares[site] = float( value )
    if normalise:
      shares = self._normaliseShares( shares )
    if not shares:
      return S_ERROR( "No non-zero shares defined" )
    return S_OK( shares )

  def _getExistingCounters( self, normalise = False, requestedSites = [] ):
    res = self.transClient.getCounters( 'TransformationFiles', ['UsedSE'],
                                        {'TransformationID':self.params['TransformationID']} )
    if not res['OK']:
      return res
    usageDict = {}
    for usedDict, count in res['Value']:
      usedSE = usedDict['UsedSE']
      if usedSE != 'Unknown':
        usageDict[usedSE] = count
    if requestedSites:
      siteDict = {}
      for se, count in usageDict.items():
        res = getSitesForSE( se, gridName = 'LCG' )
        if not res['OK']:
          return res
        for site in res['Value']:
          if site in requestedSites:
            siteDict[site] = count
      usageDict = siteDict.copy()
    if normalise:
      usageDict = self._normaliseShares( usageDict )
    return S_OK( usageDict )

  @classmethod
  def _normaliseShares( self, originalShares ):
    shares = originalShares.copy()
    total = 0.0
    for site in shares.keys():
      share = float( shares[site] )
      shares[site] = share
      total += share
    for site in shares.keys():
      share = 100.0 * ( shares[site] / total )
      shares[site] = share
    return shares

  def _getNextSite( self, existingCount, cpuShares, candidates = [] ):
    # normalise the shares
    siteShare = self._normaliseShares( existingCount )
    # then fill the missing share values to 0
    for site in cpuShares.keys():
      if ( not siteShare.has_key( site ) ):
        siteShare[site] = 0.0
    # determine which site is furthest from its share
    chosenSite = ''
    minShareShortFall = -float( "inf" )
    for site, cpuShare in cpuShares.items():
      if ( candidates ) and not ( site in candidates ):
        continue
      if not cpuShare:
        continue
      existingShare = siteShare[site]
      shareShortFall = cpuShare - existingShare
      if shareShortFall > minShareShortFall:
        minShareShortFall = shareShortFall
        chosenSite = site
    return S_OK( chosenSite )

  def _groupByReplicas( self ):
    """ Generates a job based on the location of the input data """
    if not self.params:
      return S_ERROR( "TransformationPlugin._Standard: The 'Standard' plug-in requires parameters." )
    status = self.params['Status']
    groupSize = self.params['GroupSize']
    # Group files by SE
    fileGroups = self._getFileGroups( self.data )
    # Create tasks based on the group size
    tasks = []
    for replicaSE in sorted( fileGroups.keys() ):
      lfns = fileGroups[replicaSE]
      tasksLfns = breakListIntoChunks( lfns, groupSize )
      for taskLfns in tasksLfns:
        if ( status == 'Flush' ) or ( len( taskLfns ) >= int( groupSize ) ):
          tasks.append( ( replicaSE, taskLfns ) )
    return S_OK( tasks )

  def _groupBySize( self ):
    """ Generate a task for a given amount of data """
    if not self.params:
      return S_ERROR( "TransformationPlugin._BySize: The 'BySize' plug-in requires parameters." )
    status = self.params['Status']
    requestedSize = float( self.params['GroupSize'] ) * 1000 * 1000 * 1000 # input size in GB converted to bytes
    maxFiles = self.params.get( 'MaxFiles', 100 )
    # Group files by SE
    fileGroups = self._getFileGroups( self.data )
    # Get the file sizes
    res = self.fc.getFileSize( self.data )
    if not res['OK']:
      return S_ERROR( "Failed to get sizes for files" )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to get sizes for all files" )
    fileSizes = res['Value']['Successful']
    tasks = []
    for replicaSE, lfns in fileGroups.items():
      taskLfns = []
      taskSize = 0
      for lfn in lfns:
        taskSize += fileSizes[lfn]
        taskLfns.append( lfn )
        if ( taskSize > requestedSize ) or ( len( taskLfns ) >= maxFiles ):
          tasks.append( ( replicaSE, taskLfns ) )
          taskLfns = []
          taskSize = 0
      if ( status == 'Flush' ) and taskLfns:
        tasks.append( ( replicaSE, taskLfns ) )
    return S_OK( tasks )

  @classmethod
  def _getFileGroups( cls, fileReplicas ):
    """ get file groups dictionary { "SE1,SE2,SE3" : [ lfn1, lfn2 ], ... }
    
    :param dict fileReplicas: { lfn : [SE1, SE2, SE3], ... }
    """
    fileGroups = {}
    for lfn, replicas in fileReplicas.items():
      replicaSEs = ",".join( sorted( list( set( replicas ) ) ) )
      if replicaSEs not in fileGroups:
        fileGroups[replicaSEs] = []
      fileGroups[replicaSEs].append( lfn )
    return fileGroups

  @classmethod
  def _getSiteForSE( cls, se ):
    """ Get site name for the given SE
    """
    result = getSitesForSE( se, gridName = 'LCG' )
    if not result['OK']:
      return result
    if result['Value']:
      return S_OK( result['Value'][0] )
    return S_OK( '' )

  @classmethod
  def _getSitesForSEs( cls, seList ):
    """ Get all the sites for the given SE list
    """
    sites = []
    for se in seList:
      result = getSitesForSE( se, gridName = 'LCG' )
      if result['OK']:
        sites += result['Value']
    return sites
Ejemplo n.º 9
0
class DataIntegrityClient( Client ):

  """
  The following methods are supported in the service but are not mentioned explicitly here:

          getProblematic()
             Obtains a problematic file from the IntegrityDB based on the LastUpdate time

          getPrognosisProblematics(prognosis)
            Obtains all the problematics of a particular prognosis from the integrityDB

          getProblematicsSummary()
            Obtains a count of the number of problematics for each prognosis found

          getDistinctPrognosis()
            Obtains the distinct prognosis found in the integrityDB

          getTransformationProblematics(prodID)
            Obtains the problematics for a given production

          incrementProblematicRetry(fileID)
            Increments the retry count for the supplied file ID

          changeProblematicPrognosis(fileID,newPrognosis)
            Changes the prognosis of the supplied file to the new prognosis

          setProblematicStatus(fileID,status)
            Updates the status of a problematic in the integrityDB

          removeProblematic(self,fileID)
            This removes the specified file ID from the integrity DB

          insertProblematic(sourceComponent,fileMetadata)
            Inserts file with supplied metadata into the integrity DB

  """

  def __init__( self, **kwargs ):

    super( DataIntegrityClient, self ).__init__( **kwargs )
    self.setServer( 'DataManagement/DataIntegrity' )
    self.dm = DataManager()
    self.fc = FileCatalog()

  def setFileProblematic( self, lfn, reason, sourceComponent = '' ):
    """ This method updates the status of the file in the FileCatalog and the IntegrityDB

        lfn - the lfn of the file
        reason - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
    if isinstance( lfn, list ):
      lfns = lfn
    elif isinstance( lfn, basestring ):
      lfns = [lfn]
    else:
      errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN."
      gLogger.error( errStr )
      return S_ERROR( errStr )
    gLogger.info( "DataIntegrityClient.setFileProblematic: Attempting to update %s files." % len( lfns ) )
    fileMetadata = {}
    for lfn in lfns:
      fileMetadata[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':'', 'SE':''}
    res = self.insertProblematic( sourceComponent, fileMetadata )
    if not res['OK']:
      gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB" )
    return res

  def reportProblematicReplicas( self, replicaTuple, se, reason ):
    """ Simple wrapper function around setReplicaProblematic """
    gLogger.info( 'The following %s files had %s at %s' % ( len( replicaTuple ), reason, se ) )
    for lfn, _pfn, se, reason in sorted( replicaTuple ):
      if lfn:
        gLogger.info( lfn )
    res = self.setReplicaProblematic( replicaTuple, sourceComponent = 'DataIntegrityClient' )
    if not res['OK']:
      gLogger.info( 'Failed to update integrity DB with replicas', res['Message'] )
    else:
      gLogger.info( 'Successfully updated integrity DB with replicas' )

  def setReplicaProblematic( self, replicaTuple, sourceComponent = '' ):
    """ This method updates the status of the replica in the FileCatalog and the IntegrityDB
        The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis}

        lfn - the lfn of the file
        pfn - the pfn if available (otherwise '')
        se - the storage element of the problematic replica (otherwise '')
        prognosis - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
    if isinstance( replicaTuple, tuple ):
      replicaTuple = [replicaTuple]
    elif isinstance( replicaTuple, list ):
      pass
    else:
      errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples."
      gLogger.error( errStr )
      return S_ERROR( errStr )
    gLogger.info( "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas." % len( replicaTuple ) )
    replicaDict = {}
    for lfn, pfn, se, reason in replicaTuple:
      replicaDict[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':pfn, 'SE':se}
    res = self.insertProblematic( sourceComponent, replicaDict )
    if not res['OK']:
      gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB" )
      return res
    for lfn in replicaDict.keys():
      replicaDict[lfn]['Status'] = 'Problematic'

    res = self.fc.setReplicaStatus( replicaDict )
    if not res['OK']:
      errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas."
      gLogger.error( errStr, res['Message'] )
      return res
    failed = res['Value']['Failed']
    successful = res['Value']['Successful']
    resDict = {'Successful':successful, 'Failed':failed}
    return S_OK( resDict )

  ##########################################################################
  #
  # This section contains the resolution methods for various prognoses
  #

  def __updateCompletedFiles( self, prognosis, fileID ):
    gLogger.info( "%s file (%d) is resolved" % ( prognosis, fileID ) )
    return self.setProblematicStatus( fileID, 'Resolved' )

  def __returnProblematicError( self, fileID, res ):
    self.incrementProblematicRetry( fileID )
    gLogger.error( 'DataIntegrityClient failure', res['Message'] )
    return res

  def __updateReplicaToChecked( self, problematicDict ):
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']
    prognosis = problematicDict['Prognosis']
    problematicDict['Status'] = 'Checked'

    res = returnSingleResult( self.fc.setReplicaStatus( {lfn:problematicDict} ) )

    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    gLogger.info( "%s replica (%d) is updated to Checked status" % ( prognosis, fileID ) )
    return self.__updateCompletedFiles( prognosis, fileID )

  def resolveCatalogPFNSizeMismatch( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis
    """
    lfn = problematicDict['LFN']
    se = problematicDict['SE']
    fileID = problematicDict['FileID']


    res = returnSingleResult( self.fc.getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    catalogSize = res['Value']
    res = returnSingleResult( StorageElement( se ).getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    storageSize = res['Value']
    bkKCatalog = FileCatalog( ['BookkeepingDB'] )
    res = returnSingleResult( bkKCatalog.getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    bookkeepingSize = res['Value']
    if bookkeepingSize == catalogSize == storageSize:
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) matched all registered sizes." % fileID )
      return self.__updateReplicaToChecked( problematicDict )
    if catalogSize == bookkeepingSize:
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also" % fileID )
      res = returnSingleResult( self.fc.getReplicas( lfn ) )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      if len( res['Value'] ) <= 1:
        gLogger.info( "CatalogPFNSizeMismatch replica (%d) has no other replicas." % fileID )
        return S_ERROR( "Not removing catalog file mismatch since the only replica" )
      else:
        gLogger.info( "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..." % fileID )
        res = self.dm.removeReplica( se, lfn )
        if not res['OK']:
          return self.__returnProblematicError( fileID, res )
        return self.__updateCompletedFiles( 'CatalogPFNSizeMismatch', fileID )
    if ( catalogSize != bookkeepingSize ) and ( bookkeepingSize == storageSize ):
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size" % fileID )
      res = self.__updateReplicaToChecked( problematicDict )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      return self.changeProblematicPrognosis( fileID, 'BKCatalogSizeMismatch' )
    gLogger.info( "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count" % fileID )
    return self.incrementProblematicRetry( fileID )

  ############################################################################################

  def _reportProblematicFiles( self, lfns, reason ):
    """ Simple wrapper function around setFileProblematic
    """
    gLogger.info( 'The following %s files were found with %s' % ( len( lfns ), reason ) )
    for lfn in sorted( lfns ):
      gLogger.info( lfn )
    res = self.setFileProblematic( lfns, reason, sourceComponent = 'DataIntegrityClient' )
    if not res['OK']:
      gLogger.info( 'Failed to update integrity DB with files', res['Message'] )
    else:
      gLogger.info( 'Successfully updated integrity DB with files' )
Ejemplo n.º 10
0
class DataIntegrityClient( Client ):

  """  
  The following methods are supported in the service but are not mentioned explicitly here:

          getProblematic()
             Obtains a problematic file from the IntegrityDB based on the LastUpdate time

          getPrognosisProblematics(prognosis)
            Obtains all the problematics of a particular prognosis from the integrityDB

          getProblematicsSummary()
            Obtains a count of the number of problematics for each prognosis found

          getDistinctPrognosis()
            Obtains the distinct prognosis found in the integrityDB

          getTransformationProblematics(prodID)
            Obtains the problematics for a given production

          incrementProblematicRetry(fileID)
            Increments the retry count for the supplied file ID

          changeProblematicPrognosis(fileID,newPrognosis)
            Changes the prognosis of the supplied file to the new prognosis

          setProblematicStatus(fileID,status)
            Updates the status of a problematic in the integrityDB

          removeProblematic(self,fileID)
            This removes the specified file ID from the integrity DB

          insertProblematic(sourceComponent,fileMetadata)
            Inserts file with supplied metadata into the integrity DB
 
  """

  def __init__( self, **kwargs ):

    Client.__init__( self, **kwargs )
    self.setServer( 'DataManagement/DataIntegrity' )
    self.dm = DataManager()
    self.fc = FileCatalog()

  ##########################################################################
  #
  # This section contains the specific methods for LFC->SE checks
  #

  def catalogDirectoryToSE( self, lfnDir ):
    """ This obtains the replica and metadata information from the catalog for the supplied directory and checks against the storage elements.
    """
    gLogger.info( "-" * 40 )
    gLogger.info( "Performing the LFC->SE check" )
    gLogger.info( "-" * 40 )
    if type( lfnDir ) in types.StringTypes:
      lfnDir = [lfnDir]
    res = self.__getCatalogDirectoryContents( lfnDir )
    if not res['OK']:
      return res
    replicas = res['Value']['Replicas']
    catalogMetadata = res['Value']['Metadata']
    res = self.__checkPhysicalFiles( replicas, catalogMetadata )
    if not res['OK']:
      return res
    resDict = {'CatalogMetadata':catalogMetadata, 'CatalogReplicas':replicas}
    return S_OK( resDict )

  def catalogFileToSE( self, lfns ):
    """ This obtains the replica and metadata information from the catalog and checks against the storage elements.
    """
    gLogger.info( "-" * 40 )
    gLogger.info( "Performing the LFC->SE check" )
    gLogger.info( "-" * 40 )
    if type( lfns ) in types.StringTypes:
      lfns = [lfns]
    res = self.__getCatalogMetadata( lfns )
    if not res['OK']:
      return res
    catalogMetadata = res['Value']
    res = self.__getCatalogReplicas( catalogMetadata.keys() )
    if not res['OK']:
      return res
    replicas = res['Value']
    res = self.__checkPhysicalFiles( replicas, catalogMetadata )
    if not res['OK']:
      return res
    resDict = {'CatalogMetadata':catalogMetadata, 'CatalogReplicas':replicas}
    return S_OK( resDict )

  def checkPhysicalFiles( self, replicas, catalogMetadata, ses = [] ):
    """ This obtains takes the supplied replica and metadata information obtained from the catalog and checks against the storage elements.
    """
    gLogger.info( "-" * 40 )
    gLogger.info( "Performing the LFC->SE check" )
    gLogger.info( "-" * 40 )
    return self.__checkPhysicalFiles( replicas, catalogMetadata, ses = ses )

  def __checkPhysicalFiles( self, replicas, catalogMetadata, ses = [] ):
    """ This obtains the physical file metadata and checks the metadata against the catalog entries
    """
    sePfns = {}
    pfnLfns = {}
    for lfn, replicaDict in replicas.items():
      for se, pfn in replicaDict.items():
        if ( ses ) and ( se not in ses ):
          continue
        if not sePfns.has_key( se ):
          sePfns[se] = []
        sePfns[se].append( pfn )
        pfnLfns[pfn] = lfn
    gLogger.info( '%s %s' % ( 'Storage Element'.ljust( 20 ), 'Replicas'.rjust( 20 ) ) )
    for site in sortList( sePfns.keys() ):
      files = len( sePfns[site] )
      gLogger.info( '%s %s' % ( site.ljust( 20 ), str( files ).rjust( 20 ) ) )

    for se in sortList( sePfns.keys() ):
      pfns = sePfns[se]
      pfnDict = {}
      for pfn in pfns:
        pfnDict[pfn] = pfnLfns[pfn]
      sizeMismatch = []
      res = self.__checkPhysicalFileMetadata( pfnDict, se )
      if not res['OK']:
        gLogger.error( 'Failed to get physical file metadata.', res['Message'] )
        return res
      for pfn, metadata in res['Value'].items():
        if catalogMetadata.has_key( pfnLfns[pfn] ):
          if ( metadata['Size'] != catalogMetadata[pfnLfns[pfn]]['Size'] ) and ( metadata['Size'] != 0 ):
            sizeMismatch.append( ( pfnLfns[pfn], pfn, se, 'CatalogPFNSizeMismatch' ) )
      if sizeMismatch:
        self.__reportProblematicReplicas( sizeMismatch, se, 'CatalogPFNSizeMismatch' )
    return S_OK()

  def __checkPhysicalFileMetadata( self, pfnLfns, se ):
    """ Check obtain the physical file metadata and check the files are available
    """
    gLogger.info( 'Checking the integrity of %s physical files at %s' % ( len( pfnLfns ), se ) )


    res = StorageElement( se ).getFileMetadata( pfnLfns.keys() )

    if not res['OK']:
      gLogger.error( 'Failed to get metadata for pfns.', res['Message'] )
      return res
    pfnMetadataDict = res['Value']['Successful']
    # If the replicas are completely missing
    missingReplicas = []
    for pfn, reason in res['Value']['Failed'].items():
      if re.search( 'File does not exist', reason ):
        missingReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNMissing' ) )
    if missingReplicas:
      self.__reportProblematicReplicas( missingReplicas, se, 'PFNMissing' )
    lostReplicas = []
    unavailableReplicas = []
    zeroSizeReplicas = []
    # If the files are not accessible
    for pfn, pfnMetadata in pfnMetadataDict.items():
      if pfnMetadata['Lost']:
        lostReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNLost' ) )
      if pfnMetadata['Unavailable']:
        unavailableReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNUnavailable' ) )
      if pfnMetadata['Size'] == 0:
        zeroSizeReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNZeroSize' ) )
    if lostReplicas:
      self.__reportProblematicReplicas( lostReplicas, se, 'PFNLost' )
    if unavailableReplicas:
      self.__reportProblematicReplicas( unavailableReplicas, se, 'PFNUnavailable' )
    if zeroSizeReplicas:
      self.__reportProblematicReplicas( zeroSizeReplicas, se, 'PFNZeroSize' )
    gLogger.info( 'Checking the integrity of physical files at %s complete' % se )
    return S_OK( pfnMetadataDict )

  ##########################################################################
  #
  # This section contains the specific methods for SE->LFC checks
  #

  def storageDirectoryToCatalog( self, lfnDir, storageElement ):
    """ This obtains the file found on the storage element in the supplied directories and determines whether they exist in the catalog and checks their metadata elements
    """
    gLogger.info( "-" * 40 )
    gLogger.info( "Performing the SE->LFC check at %s" % storageElement )
    gLogger.info( "-" * 40 )
    if type( lfnDir ) in types.StringTypes:
      lfnDir = [lfnDir]
    res = self.__getStorageDirectoryContents( lfnDir, storageElement )
    if not res['OK']:
      return res
    storageFileMetadata = res['Value']
    if storageFileMetadata:
      return self.__checkCatalogForSEFiles( storageFileMetadata, storageElement )
    return S_OK( {'CatalogMetadata':{}, 'StorageMetadata':{}} )

  def __checkCatalogForSEFiles( self, storageMetadata, storageElement ):
    gLogger.info( 'Checking %s storage files exist in the catalog' % len( storageMetadata ) )

    # RF_NOTE : this comment is completely wrong
    # First get all the PFNs as they should be registered in the catalog
    res = StorageElement( storageElement ).getPfnForProtocol( storageMetadata.keys(), withPort = False )
    if not res['OK']:
      gLogger.error( "Failed to get registered PFNs for physical files", res['Message'] )
      return res
    for pfn, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to obtain registered PFNs from physical file' )
    for original, registered in res['Value']['Successful'].items():
      storageMetadata[registered] = storageMetadata.pop( original )
    # Determine whether these PFNs are registered and if so obtain the LFN
    res = self.fc.getLFNForPFN( storageMetadata.keys() )
    if not res['OK']:
      gLogger.error( "Failed to get registered LFNs for PFNs", res['Message'] )
      return res
    failedPfns = res['Value']['Failed']
    notRegisteredPfns = []
    for pfn, error in failedPfns.items():
      if re.search( 'No such file or directory', error ):
        notRegisteredPfns.append( ( storageMetadata[pfn]['LFN'], pfn, storageElement, 'PFNNotRegistered' ) )
        failedPfns.pop( pfn )
    if notRegisteredPfns:
      self.__reportProblematicReplicas( notRegisteredPfns, storageElement, 'PFNNotRegistered' )
    if failedPfns:
      return S_ERROR( 'Failed to obtain LFNs for PFNs' )
    pfnLfns = res['Value']['Successful']
    for pfn in storageMetadata.keys():
      pfnMetadata = storageMetadata.pop( pfn )
      if pfn in pfnLfns.keys():
        lfn = pfnLfns[pfn]
        storageMetadata[lfn] = pfnMetadata
        storageMetadata[lfn]['PFN'] = pfn
    # For the LFNs found to be registered obtain the file metadata from the catalog and verify against the storage metadata
    res = self.__getCatalogMetadata( storageMetadata.keys() )
    if not res['OK']:
      return res
    catalogMetadata = res['Value']
    sizeMismatch = []
    for lfn, lfnCatalogMetadata in catalogMetadata.items():
      lfnStorageMetadata = storageMetadata[lfn]
      if ( lfnStorageMetadata['Size'] != lfnCatalogMetadata['Size'] ) and ( lfnStorageMetadata['Size'] != 0 ):
        sizeMismatch.append( ( lfn, storageMetadata[lfn]['PFN'], storageElement, 'CatalogPFNSizeMismatch' ) )
    if sizeMismatch:
      self.__reportProblematicReplicas( sizeMismatch, storageElement, 'CatalogPFNSizeMismatch' )
    gLogger.info( 'Checking storage files exist in the catalog complete' )
    resDict = {'CatalogMetadata':catalogMetadata, 'StorageMetadata':storageMetadata}
    return S_OK( resDict )

  def getStorageDirectoryContents( self, lfnDir, storageElement ):
    """ This obtains takes the supplied lfn directories and recursively obtains the files in the supplied storage element
    """
    return self.__getStorageDirectoryContents( lfnDir, storageElement )

  def __getStorageDirectoryContents( self, lfnDir, storageElement ):
    """ Obtians the contents of the supplied directory on the storage
    """
    gLogger.info( 'Obtaining the contents for %s directories at %s' % ( len( lfnDir ), storageElement ) )

    se = StorageElement( storageElement )
    res = se.getPfnForLfn( lfnDir )

    if not res['OK']:
      gLogger.error( "Failed to get PFNs for directories", res['Message'] )
      return res
    for directory, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain directory PFN from LFNs', '%s %s' % ( directory, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to obtain directory PFN from LFNs' )
    storageDirectories = res['Value']['Successful'].values()
    res = se.exists( storageDirectories )
    if not res['OK']:
      gLogger.error( "Failed to obtain existance of directories", res['Message'] )
      return res
    for directory, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to determine existance of directory', '%s %s' % ( directory, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to determine existance of directory' )
    directoryExists = res['Value']['Successful']
    activeDirs = []
    for directory in sortList( directoryExists.keys() ):
      exists = directoryExists[directory]
      if exists:
        activeDirs.append( directory )
    allFiles = {}
    while len( activeDirs ) > 0:
      currentDir = activeDirs[0]
      res = se.listDirectory( currentDir )
      activeDirs.remove( currentDir )
      if not res['OK']:
        gLogger.error( 'Failed to get directory contents', res['Message'] )
        return res
      elif res['Value']['Failed'].has_key( currentDir ):
        gLogger.error( 'Failed to get directory contents', '%s %s' % ( currentDir, res['Value']['Failed'][currentDir] ) )
        return S_ERROR( res['Value']['Failed'][currentDir] )
      else:
        dirContents = res['Value']['Successful'][currentDir]
        activeDirs.extend( dirContents['SubDirs'] )
        fileMetadata = dirContents['Files']

        # RF_NOTE This ugly trick is needed because se.getPfnPath does not follow the Successful/Failed convention
#         res = { "Successful" : {}, "Failed" : {} }
#         for pfn in fileMetadata:
#           inRes = se.getPfnPath( pfn )
#           if inRes["OK"]:
#             res["Successful"][pfn] = inRes["Value"]
#           else:
#             res["Failed"][pfn] = inRes["Message"]
        res = se.getLfnForPfn( fileMetadata.keys() )
        if not res['OK']:
          gLogger.error( 'Failed to get directory content LFNs', res['Message'] )
          return res

        for pfn, error in res['Value']['Failed'].items():
          gLogger.error( "Failed to get LFN for PFN", "%s %s" % ( pfn, error ) )
        if res['Value']['Failed']:
          return S_ERROR( "Failed to get LFNs for PFNs" )
        pfnLfns = res['Value']['Successful']
        for pfn, lfn in pfnLfns.items():
          fileMetadata[pfn]['LFN'] = lfn
        allFiles.update( fileMetadata )
    zeroSizeFiles = []
    lostFiles = []
    unavailableFiles = []
    for pfn in sortList( allFiles.keys() ):
      if os.path.basename( pfn ) == 'dirac_directory':
        allFiles.pop( pfn )
      else:
        metadata = allFiles[pfn]
        if metadata['Size'] == 0:
          zeroSizeFiles.append( ( metadata['LFN'], pfn, storageElement, 'PFNZeroSize' ) )
        # if metadata['Lost']:
        #  lostFiles.append((metadata['LFN'],pfn,storageElement,'PFNLost'))
        # if metadata['Unavailable']:
        #  unavailableFiles.append((metadata['LFN'],pfn,storageElement,'PFNUnavailable'))
    if zeroSizeFiles:
      self.__reportProblematicReplicas( zeroSizeFiles, storageElement, 'PFNZeroSize' )
    if lostFiles:
      self.__reportProblematicReplicas( lostFiles, storageElement, 'PFNLost' )
    if unavailableFiles:
      self.__reportProblematicReplicas( unavailableFiles, storageElement, 'PFNUnavailable' )
    gLogger.info( 'Obtained at total of %s files for directories at %s' % ( len( allFiles ), storageElement ) )
    return S_OK( allFiles )

  def __getStoragePathExists( self, lfnPaths, storageElement ):
    gLogger.info( 'Determining the existance of %d files at %s' % ( len( lfnPaths ), storageElement ) )

    se = StorageElement( storageElement )
    res = se.getPfnForLfn( lfnPaths )
    if not res['OK']:
      gLogger.error( "Failed to get PFNs for LFNs", res['Message'] )
      return res
    for lfnPath, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain PFN from LFN', '%s %s' % ( lfnPath, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to obtain PFNs from LFNs' )
    lfnPfns = res['Value']['Successful']
    pfnLfns = {}
    for lfn, pfn in lfnPfns.items():
      pfnLfns[pfn] = lfn

    res = se.exists( pfnLfns )
    if not res['OK']:
      gLogger.error( "Failed to obtain existance of paths", res['Message'] )
      return res
    for lfnPath, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to determine existance of path', '%s %s' % ( lfnPath, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to determine existance of paths' )
    pathExists = res['Value']['Successful']
    resDict = {}
    for pfn, exists in pathExists.items():
      if exists:
        resDict[pfnLfns[pfn]] = pfn
    return S_OK( resDict )

  ##########################################################################
  #
  # This section contains the specific methods for obtaining replica and metadata information from the catalog
  #

  def __getCatalogDirectoryContents( self, lfnDir ):
    """ Obtain the contents of the supplied directory
    """
    gLogger.info( 'Obtaining the catalog contents for %s directories' % len( lfnDir ) )

    activeDirs = lfnDir
    allFiles = {}
    while len( activeDirs ) > 0:
      currentDir = activeDirs[0]
      res = self.fc.listDirectory( currentDir )
      activeDirs.remove( currentDir )
      if not res['OK']:
        gLogger.error( 'Failed to get directory contents', res['Message'] )
        return res
      elif res['Value']['Failed'].has_key( currentDir ):
        gLogger.error( 'Failed to get directory contents', '%s %s' % ( currentDir, res['Value']['Failed'][currentDir] ) )
      else:
        dirContents = res['Value']['Successful'][currentDir]
        activeDirs.extend( dirContents['SubDirs'] )
        allFiles.update( dirContents['Files'] )

    zeroReplicaFiles = []
    zeroSizeFiles = []
    allReplicaDict = {}
    allMetadataDict = {}
    for lfn, lfnDict in allFiles.items():
      lfnReplicas = {}
      for se, replicaDict in lfnDict['Replicas'].items():
        lfnReplicas[se] = replicaDict['PFN']
      if not lfnReplicas:
        zeroReplicaFiles.append( lfn )
      allReplicaDict[lfn] = lfnReplicas
      allMetadataDict[lfn] = lfnDict['MetaData']
      if lfnDict['MetaData']['Size'] == 0:
        zeroSizeFiles.append( lfn )
    if zeroReplicaFiles:
      self.__reportProblematicFiles( zeroReplicaFiles, 'LFNZeroReplicas' )
    if zeroSizeFiles:
      self.__reportProblematicFiles( zeroSizeFiles, 'LFNZeroSize' )
    gLogger.info( 'Obtained at total of %s files for the supplied directories' % len( allMetadataDict ) )
    resDict = {'Metadata':allMetadataDict, 'Replicas':allReplicaDict}
    return S_OK( resDict )

  def __getCatalogReplicas( self, lfns ):
    """ Obtain the file replicas from the catalog while checking that there are replicas
    """
    gLogger.info( 'Obtaining the replicas for %s files' % len( lfns ) )

    zeroReplicaFiles = []
    res = self.fc.getReplicas( lfns, allStatus = True )
    if not res['OK']:
      gLogger.error( 'Failed to get catalog replicas', res['Message'] )
      return res
    allReplicas = res['Value']['Successful']
    for lfn, error in res['Value']['Failed'].items():
      if re.search( 'File has zero replicas', error ):
        zeroReplicaFiles.append( lfn )
    if zeroReplicaFiles:
      self.__reportProblematicFiles( zeroReplicaFiles, 'LFNZeroReplicas' )
    gLogger.info( 'Obtaining the replicas for files complete' )
    return S_OK( allReplicas )

  def __getCatalogMetadata( self, lfns ):
    """ Obtain the file metadata from the catalog while checking they exist
    """
    if not lfns:
      return S_OK( {} )
    gLogger.info( 'Obtaining the catalog metadata for %s files' % len( lfns ) )

    missingCatalogFiles = []
    zeroSizeFiles = []
    res = self.fc.getFileMetadata( lfns )
    if not res['OK']:
      gLogger.error( 'Failed to get catalog metadata', res['Message'] )
      return res
    allMetadata = res['Value']['Successful']
    for lfn, error in res['Value']['Failed'].items():
      if re.search( 'No such file or directory', error ):
        missingCatalogFiles.append( lfn )
    if missingCatalogFiles:
      self.__reportProblematicFiles( missingCatalogFiles, 'LFNCatalogMissing' )
    for lfn, metadata in allMetadata.items():
      if metadata['Size'] == 0:
        zeroSizeFiles.append( lfn )
    if zeroSizeFiles:
      self.__reportProblematicFiles( zeroSizeFiles, 'LFNZeroSize' )
    gLogger.info( 'Obtaining the catalog metadata complete' )
    return S_OK( allMetadata )

  ##########################################################################
  #
  # This section contains the methods for inserting problematic files into the integrity DB
  #

  def __reportProblematicFiles( self, lfns, reason ):
    """ Simple wrapper function around setFileProblematic """
    gLogger.info( 'The following %s files were found with %s' % ( len( lfns ), reason ) )
    for lfn in sortList( lfns ):
      gLogger.info( lfn )
    res = self.setFileProblematic( lfns, reason, sourceComponent = 'DataIntegrityClient' )
    if not res['OK']:
      gLogger.info( 'Failed to update integrity DB with files', res['Message'] )
    else:
      gLogger.info( 'Successfully updated integrity DB with files' )

  def setFileProblematic( self, lfn, reason, sourceComponent = '' ):
    """ This method updates the status of the file in the FileCatalog and the IntegrityDB

        lfn - the lfn of the file
        reason - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
    if type( lfn ) == types.ListType:
      lfns = lfn
    elif type( lfn ) == types.StringType:
      lfns = [lfn]
    else:
      errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN."
      gLogger.error( errStr )
      return S_ERROR( errStr )
    gLogger.info( "DataIntegrityClient.setFileProblematic: Attempting to update %s files." % len( lfns ) )
    fileMetadata = {}
    for lfn in lfns:
      fileMetadata[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':'', 'SE':''}
    res = self.insertProblematic( sourceComponent, fileMetadata )
    if not res['OK']:
      gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB" )
    return res

  def __reportProblematicReplicas( self, replicaTuple, se, reason ):
    """ Simple wrapper function around setReplicaProblematic """
    gLogger.info( 'The following %s files had %s at %s' % ( len( replicaTuple ), reason, se ) )
    for lfn, pfn, se, reason in sortList( replicaTuple ):
      if lfn:
        gLogger.info( lfn )
      else:
        gLogger.info( pfn )
    res = self.setReplicaProblematic( replicaTuple, sourceComponent = 'DataIntegrityClient' )
    if not res['OK']:
      gLogger.info( 'Failed to update integrity DB with replicas', res['Message'] )
    else:
      gLogger.info( 'Successfully updated integrity DB with replicas' )

  def setReplicaProblematic( self, replicaTuple, sourceComponent = '' ):
    """ This method updates the status of the replica in the FileCatalog and the IntegrityDB
        The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis}

        lfn - the lfn of the file
        pfn - the pfn if available (otherwise '')
        se - the storage element of the problematic replica (otherwise '')
        prognosis - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
    if type( replicaTuple ) == types.TupleType:
      replicaTuple = [replicaTuple]
    elif type( replicaTuple ) == types.ListType:
      pass
    else:
      errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples."
      gLogger.error( errStr )
      return S_ERROR( errStr )
    gLogger.info( "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas." % len( replicaTuple ) )
    replicaDict = {}
    for lfn, pfn, se, reason in replicaTuple:
      replicaDict[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':pfn, 'SE':se}
    res = self.insertProblematic( sourceComponent, replicaDict )
    if not res['OK']:
      gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB" )
      return res
    for lfn in replicaDict.keys():
      replicaDict[lfn]['Status'] = 'Problematic'

    res = self.fc.setReplicaStatus( replicaDict )
    if not res['OK']:
      errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas."
      gLogger.error( errStr, res['Message'] )
      return res
    failed = res['Value']['Failed']
    successful = res['Value']['Successful']
    resDict = {'Successful':successful, 'Failed':failed}
    return S_OK( resDict )

  ##########################################################################
  #
  # This section contains the resolution methods for various prognoses
  #

  def __updateCompletedFiles( self, prognosis, fileID ):
    gLogger.info( "%s file (%d) is resolved" % ( prognosis, fileID ) )
    return self.setProblematicStatus( fileID, 'Resolved' )

  def __returnProblematicError( self, fileID, res ):
    self.incrementProblematicRetry( fileID )
    gLogger.error( res['Message'] )
    return res

  def __getRegisteredPFNLFN( self, pfn, storageElement ):

    res = StorageElement( storageElement ).getPfnForProtocol( pfn, withPort = False )
    if not res['OK']:
      gLogger.error( "Failed to get registered PFN for physical files", res['Message'] )
      return res
    for pfn, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) )
      return S_ERROR( 'Failed to obtain registered PFNs from physical file' )
    registeredPFN = res['Value']['Successful'][pfn]
    res = Utils.executeSingleFileOrDirWrapper( self.fc.getLFNForPFN( registeredPFN ) )
    if ( not res['OK'] ) and re.search( 'No such file or directory', res['Message'] ):
      return S_OK( False )
    return S_OK( res['Value'] )

  def __updateReplicaToChecked( self, problematicDict ):
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']
    prognosis = problematicDict['Prognosis']
    problematicDict['Status'] = 'Checked'

    res = Utils.executeSingleFileOrDirWrapper( self.fc.setReplicaStatus( {lfn:problematicDict} ) )

    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    gLogger.info( "%s replica (%d) is updated to Checked status" % ( prognosis, fileID ) )
    return self.__updateCompletedFiles( prognosis, fileID )

  def resolveCatalogPFNSizeMismatch( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis
    """
    lfn = problematicDict['LFN']
    pfn = problematicDict['PFN']
    se = problematicDict['SE']
    fileID = problematicDict['FileID']


    res = Utils.executeSingleFileOrDirWrapper( self.fc.getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    catalogSize = res['Value']
    res = Utils.executeSingleFileOrDirWrapper( StorageElement( se ).getFileSize( pfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    storageSize = res['Value']
    bkKCatalog = FileCatalog( ['BookkeepingDB'] )
    res = Utils.executeSingleFileOrDirWrapper( bkKCatalog.getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    bookkeepingSize = res['Value']
    if bookkeepingSize == catalogSize == storageSize:
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) matched all registered sizes." % fileID )
      return self.__updateReplicaToChecked( problematicDict )
    if ( catalogSize == bookkeepingSize ):
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also" % fileID )
      res = Utils.executeSingleFileOrDirWrapper( self.fc.getReplicas( lfn ) )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      if len( res['Value'] ) <= 1:
        gLogger.info( "CatalogPFNSizeMismatch replica (%d) has no other replicas." % fileID )
        return S_ERROR( "Not removing catalog file mismatch since the only replica" )
      else:
        gLogger.info( "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..." % fileID )
        res = self.dm.removeReplica( se, lfn )
        if not res['OK']:
          return self.__returnProblematicError( fileID, res )
        return self.__updateCompletedFiles( 'CatalogPFNSizeMismatch', fileID )
    if ( catalogSize != bookkeepingSize ) and ( bookkeepingSize == storageSize ):
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size" % fileID )
      res = self.__updateReplicaToChecked( problematicDict )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      return self.changeProblematicPrognosis( fileID, 'BKCatalogSizeMismatch' )
    gLogger.info( "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count" % fileID )
    return self.incrementProblematicRetry( fileID )

  def resolvePFNNotRegistered( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNNotRegistered prognosis
    """
    lfn = problematicDict['LFN']
    pfn = problematicDict['PFN']
    seName = problematicDict['SE']
    fileID = problematicDict['FileID']

    se = StorageElement( seName )
    res = Utils.executeSingleFileOrDirWrapper( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if not res['Value']:
      # The file does not exist in the catalog
      res = Utils.executeSingleFileOrDirWrapper( se.removeFile( pfn ) )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )
    res = Utils.executeSingleFileOrDirWrapper( se.getFileMetadata( pfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      gLogger.info( "PFNNotRegistered replica (%d) found to be missing." % fileID )
      return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )
    elif not res['OK']:
      return self.__returnProblematicError( fileID, res )
    storageMetadata = res['Value']
    if storageMetadata['Lost']:
      gLogger.info( "PFNNotRegistered replica (%d) found to be Lost. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNLost' )
    if storageMetadata['Unavailable']:
      gLogger.info( "PFNNotRegistered replica (%d) found to be Unavailable. Updating retry count" % fileID )
      return self.incrementProblematicRetry( fileID )

    # HACK until we can obtain the space token descriptions through GFAL
    site = seName.split( '_' )[0].split( '-' )[0]
    if not storageMetadata['Cached']:
      if lfn.endswith( '.raw' ):
        seName = '%s-RAW' % site
      else:
        seName = '%s-RDST' % site
    elif storageMetadata['Migrated']:
      if lfn.startswith( '/lhcb/data' ):
        seName = '%s_M-DST' % site
      else:
        seName = '%s_MC_M-DST' % site
    else:
      if lfn.startswith( '/lhcb/data' ):
        seName = '%s-DST' % site
      else:
        seName = '%s_MC-DST' % site

    problematicDict['SE'] = seName
    res = se.getPfnForProtocol( pfn, withPort = False )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    for pfn, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) )
      return S_ERROR( 'Failed to obtain registered PFNs from physical file' )
    problematicDict['PFN'] = res['Value']['Successful'][pfn]

    res = Utils.executeSingleFileOrDirWrapper( self.fc.addReplica( {lfn:problematicDict} ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    res = Utils.executeSingleFileOrDirWrapper( self.fc.getFileMetadata( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']['Size'] != storageMetadata['Size']:
      gLogger.info( "PFNNotRegistered replica (%d) found with catalog size mismatch. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'CatalogPFNSizeMismatch' )
    return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )

  def resolveLFNCatalogMissing( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the LFNCatalogMissing prognosis
    """
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = Utils.executeSingleFileOrDirWrapper( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']:
      return self.__updateCompletedFiles( 'LFNCatalogMissing', fileID )
    # Remove the file from all catalogs
    # RF_NOTE : here I can do it because it's a single file, but otherwise I would need to sort the path
    res = Utils.executeSingleFileOrDirWrapper( self.fc.removeFile( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    return self.__updateCompletedFiles( 'LFNCatalogMissing', fileID )

  def resolvePFNMissing( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNMissing prognosis
    """
    pfn = problematicDict['PFN']
    se = problematicDict['SE']
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = Utils.executeSingleFileOrDirWrapper( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if not res['Value']:
      gLogger.info( "PFNMissing file (%d) no longer exists in catalog" % fileID )
      return self.__updateCompletedFiles( 'PFNMissing', fileID )

    res = Utils.executeSingleFileOrDirWrapper( StorageElement( se ).exists( pfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']:
      gLogger.info( "PFNMissing replica (%d) is no longer missing" % fileID )
      return self.__updateReplicaToChecked( problematicDict )
    gLogger.info( "PFNMissing replica (%d) does not exist" % fileID )
    res = Utils.executeSingleFileOrDirWrapper( self.fc.getReplicas( lfn, allStatus = True ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    replicas = res['Value']
    seSite = se.split( '_' )[0].split( '-' )[0]
    found = False
    print replicas
    for replicaSE in replicas.keys():
      if re.search( seSite, replicaSE ):
        found = True
        problematicDict['SE'] = replicaSE
        se = replicaSE
    if not found:
      gLogger.info( "PFNMissing replica (%d) is no longer registered at SE. Resolved." % fileID )
      return self.__updateCompletedFiles( 'PFNMissing', fileID )
    gLogger.info( "PFNMissing replica (%d) does not exist. Removing from catalog..." % fileID )
    res = Utils.executeSingleFileOrDirWrapper( self.fc.removeReplica( {lfn:problematicDict} ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if len( replicas ) == 1:
      gLogger.info( "PFNMissing replica (%d) had a single replica. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'LFNZeroReplicas' )
    res = self.dm.replicateAndRegister( problematicDict['LFN'], se )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    # If we get here the problem is solved so we can update the integrityDB
    return self.__updateCompletedFiles( 'PFNMissing', fileID )

  def resolvePFNUnavailable( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNUnavailable prognosis
    """
    pfn = problematicDict['PFN']
    se = problematicDict['SE']
    fileID = problematicDict['FileID']

    res = Utils.executeSingleFileOrDirWrapper( StorageElement( se ).getFileMetadata( pfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      # The file is no longer Unavailable but has now dissapeared completely
      gLogger.info( "PFNUnavailable replica (%d) found to be missing. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )
    if ( not res['OK'] ) or res['Value']['Unavailable']:
      gLogger.info( "PFNUnavailable replica (%d) found to still be Unavailable" % fileID )
      return self.incrementProblematicRetry( fileID )
    if res['Value']['Lost']:
      gLogger.info( "PFNUnavailable replica (%d) is now found to be Lost. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNLost' )
    gLogger.info( "PFNUnavailable replica (%d) is no longer Unavailable" % fileID )
    # Need to make the replica okay in the Catalog
    return self.__updateReplicaToChecked( problematicDict )

  def resolvePFNZeroSize( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolves the PFNZeroSize prognosis
    """
    pfn = problematicDict['PFN']
    seName = problematicDict['SE']
    fileID = problematicDict['FileID']

    se = StorageElement( seName )

    res = Utils.executeSingleFileOrDirWrapper( se.getFileSize( pfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      gLogger.info( "PFNZeroSize replica (%d) found to be missing. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )
    storageSize = res['Value']
    if storageSize == 0:
      res = Utils.executeSingleFileOrDirWrapper( se.removeFile( pfn ) )

      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      gLogger.info( "PFNZeroSize replica (%d) removed. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )
    res = self.__getRegisteredPFNLFN( pfn, seName )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    lfn = res['Value']
    if not lfn:
      gLogger.info( "PFNZeroSize replica (%d) not registered in catalog. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNNotRegistered' )
    res = Utils.executeSingleFileOrDirWrapper( self.fc.getFileMetadata( lfn ) )

    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    catalogSize = res['Value']['Size']
    if catalogSize != storageSize:
      gLogger.info( "PFNZeroSize replica (%d) size found to differ from registered metadata. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'CatalogPFNSizeMismatch' )
    return self.__updateCompletedFiles( 'PFNZeroSize', fileID )

  ############################################################################################

  def resolveLFNZeroReplicas( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolves the LFNZeroReplicas prognosis
    """
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = Utils.executeSingleFileOrDirWrapper( self.fc.getReplicas( lfn, allStatus = True ) )
    if res['OK'] and res['Value']:
      gLogger.info( "LFNZeroReplicas file (%d) found to have replicas" % fileID )
    else:
      gLogger.info( "LFNZeroReplicas file (%d) does not have replicas. Checking storage..." % fileID )
      pfnsFound = False
      for storageElementName in sortList( gConfig.getValue( 'Resources/StorageElementGroups/Tier1_MC_M-DST', [] ) ):
        res = self.__getStoragePathExists( [lfn], storageElementName )
        if res['Value'].has_key( lfn ):
          gLogger.info( "LFNZeroReplicas file (%d) found storage file at %s" % ( fileID, storageElementName ) )
          pfn = res['Value'][lfn]
          self.__reportProblematicReplicas( [( lfn, pfn, storageElementName, 'PFNNotRegistered' )], storageElementName, 'PFNNotRegistered' )
          pfnsFound = True
      if not pfnsFound:
        gLogger.info( "LFNZeroReplicas file (%d) did not have storage files. Removing..." % fileID )
        res = Utils.executeSingleFileOrDirWrapper( self.fc.removeFile( lfn ) )
        if not res['OK']:
          gLogger.error( res['Message'] )
          # Increment the number of retries for this file
          self.server.incrementProblematicRetry( fileID )
          return res
        gLogger.info( "LFNZeroReplicas file (%d) removed from catalog" % fileID )
    # If we get here the problem is solved so we can update the integrityDB
    return self.__updateCompletedFiles( 'LFNZeroReplicas', fileID )
Ejemplo n.º 11
0
class PluginUtilities(object):
    """
  Utility class used by plugins
  """
    def __init__(self,
                 plugin='Standard',
                 transClient=None,
                 dataManager=None,
                 fc=None,
                 debug=False,
                 transInThread=None,
                 transID=None):
        """
    c'tor

    Setting defaults
    """
        # clients
        if transClient is None:
            self.transClient = TransformationClient()
        else:
            self.transClient = transClient
        if dataManager is None:
            self.dm = DataManager()
        else:
            self.dm = dataManager
        if fc is None:
            self.fc = FileCatalog()
        else:
            self.fc = fc

        self.dmsHelper = DMSHelpers()

        self.plugin = plugin
        self.transID = transID
        self.params = {}
        self.groupSize = 0
        self.maxFiles = 0
        self.cachedLFNSize = {}
        self.transString = ''
        self.debug = debug
        self.seConfig = {}
        if transInThread is None:
            self.transInThread = {}
        else:
            self.transInThread = transInThread

        self.log = gLogger.getSubLogger("%s/PluginUtilities" % plugin)

    def logVerbose(self, message, param=''):
        if self.debug:
            self.log.info('(V)' + self.transString + message, param)
        else:
            self.log.verbose(self.transString + message, param)

    def logDebug(self, message, param=''):
        self.log.debug(self.transString + message, param)

    def logInfo(self, message, param=''):
        self.log.info(self.transString + message, param)

    def logWarn(self, message, param=''):
        self.log.warn(self.transString + message, param)

    def logError(self, message, param=''):
        self.log.error(self.transString + message, param)

    def logException(self, message, param='', lException=False):
        self.log.exception(self.transString + message, param, lException)

    def setParameters(self, params):
        self.params = params
        self.transID = params['TransformationID']
        self.transString = self.transInThread.get(
            self.transID,
            ' [NoThread] [%d] ' % self.transID) + '%s: ' % self.plugin

    @timeThis
    def groupByReplicas(self, files, status):
        """
    Generates tasks based on the location of the input data

   :param dict fileReplicas:
              {'/this/is/at.1': ['SE1'],
               '/this/is/at.12': ['SE1', 'SE2'],
               '/this/is/at.2': ['SE2'],
               '/this/is/at_123': ['SE1', 'SE2', 'SE3'],
               '/this/is/at_23': ['SE2', 'SE3'],
               '/this/is/at_4': ['SE4']}

    """
        tasks = []
        nTasks = 0

        if not len(files):
            return S_OK(tasks)

        files = dict(files)

        # Parameters
        if not self.groupSize:
            self.groupSize = self.getPluginParam('GroupSize', 10)
        flush = (status == 'Flush')
        self.logVerbose("groupByReplicas: %d files, groupSize %d, flush %s" %
                        (len(files), self.groupSize, flush))

        # Consider files by groups of SEs, a file is only in one group
        # Then consider files site by site, but a file can now be at more than one site
        for groupSE in (True, False):
            if not files:
                break
            seFiles = getFileGroups(files, groupSE=groupSE)
            self.logDebug("fileGroups set: ", seFiles)

            for replicaSE in sortSEs(seFiles):
                lfns = seFiles[replicaSE]
                if lfns:
                    tasksLfns = breakListIntoChunks(lfns, self.groupSize)
                    lfnsInTasks = []
                    for taskLfns in tasksLfns:
                        if (flush and not groupSE) or (len(taskLfns) >=
                                                       self.groupSize):
                            tasks.append((replicaSE, taskLfns))
                            lfnsInTasks += taskLfns
                    # In case the file was at more than one site, remove it from the other sites' list
                    # Remove files from global list
                    for lfn in lfnsInTasks:
                        files.pop(lfn)
                    if not groupSE:
                        # Remove files from other SEs
                        for se in [se for se in seFiles if se != replicaSE]:
                            seFiles[se] = [
                                lfn for lfn in seFiles[se]
                                if lfn not in lfnsInTasks
                            ]
            self.logVerbose(
                "groupByReplicas: %d tasks created (groupSE %s), %d files not included in tasks"
                % (len(tasks) - nTasks, str(groupSE), len(files)))
            nTasks = len(tasks)

        return S_OK(tasks)

    def createTasksBySize(self, lfns, replicaSE, fileSizes=None, flush=False):
        """
    Split files in groups according to the size and create tasks for a given SE
    """
        tasks = []
        if fileSizes is None:
            fileSizes = self._getFileSize(lfns).get('Value')
        if fileSizes is None:
            self.logWarn('Error getting file sizes, no tasks created')
            return tasks
        taskLfns = []
        taskSize = 0
        if not self.groupSize:
            self.groupSize = float(
                self.getPluginParam('GroupSize', 1.)
            ) * 1000 * 1000 * 1000  # input size in GB converted to bytes
        if not self.maxFiles:
            self.maxFiles = self.getPluginParam('MaxFiles', 100)
        lfns = sorted(lfns, key=fileSizes.get)
        for lfn in lfns:
            size = fileSizes.get(lfn, 0)
            if size:
                if size > self.groupSize:
                    tasks.append((replicaSE, [lfn]))
                else:
                    taskSize += size
                    taskLfns.append(lfn)
                    if (taskSize > self.groupSize) or (len(taskLfns) >=
                                                       self.maxFiles):
                        tasks.append((replicaSE, taskLfns))
                        taskLfns = []
                        taskSize = 0
        if flush and taskLfns:
            tasks.append((replicaSE, taskLfns))
        return tasks

    @timeThis
    def groupBySize(self, files, status):
        """
    Generate a task for a given amount of data
    """
        tasks = []
        nTasks = 0

        if not len(files):
            return S_OK(tasks)

        files = dict(files)
        # Parameters
        if not self.groupSize:
            self.groupSize = float(self.getPluginParam(
                'GroupSize',
                1)) * 1000 * 1000 * 1000  # input size in GB converted to bytes
        flush = (status == 'Flush')
        self.logVerbose("groupBySize: %d files, groupSize: %d, flush: %s" %
                        (len(files), self.groupSize, flush))

        # Get the file sizes
        res = self._getFileSize(files.keys())
        if not res['OK']:
            return res
        fileSizes = res['Value']

        for groupSE in (True, False):
            if not files:
                break
            seFiles = getFileGroups(files, groupSE=groupSE)

            for replicaSE in sorted(seFiles) if groupSE else sortSEs(seFiles):
                lfns = seFiles[replicaSE]
                newTasks = self.createTasksBySize(lfns,
                                                  replicaSE,
                                                  fileSizes=fileSizes,
                                                  flush=flush)
                lfnsInTasks = []
                for task in newTasks:
                    lfnsInTasks += task[1]
                tasks += newTasks

                # Remove the selected files from the size cache
                self.clearCachedFileSize(lfnsInTasks)
                if not groupSE:
                    # Remove files from other SEs
                    for se in [se for se in seFiles if se != replicaSE]:
                        seFiles[se] = [
                            lfn for lfn in seFiles[se]
                            if lfn not in lfnsInTasks
                        ]
                # Remove files from global list
                for lfn in lfnsInTasks:
                    files.pop(lfn)

            self.logVerbose("groupBySize: %d tasks created with groupSE %s" %
                            (len(tasks) - nTasks, str(groupSE)))
            self.logVerbose(
                "groupBySize: %d files have not been included in tasks" %
                len(files))
            nTasks = len(tasks)

        self.logVerbose("Grouped %d files by size" % len(files))
        return S_OK(tasks)

    def getExistingCounters(self, normalise=False, requestedSites=[]):
        res = self.transClient.getCounters(
            'TransformationFiles', ['UsedSE'],
            {'TransformationID': self.params['TransformationID']})
        if not res['OK']:
            return res
        usageDict = {}
        for usedDict, count in res['Value']:
            usedSE = usedDict['UsedSE']
            if usedSE != 'Unknown':
                usageDict[usedSE] = count
        if requestedSites:
            siteDict = {}
            for se, count in usageDict.items():
                res = getSitesForSE(se)
                if not res['OK']:
                    return res
                for site in res['Value']:
                    if site in requestedSites:
                        siteDict[site] = count
            usageDict = siteDict.copy()
        if normalise:
            usageDict = self._normaliseShares(usageDict)
        return S_OK(usageDict)

    @timeThis
    def _getFileSize(self, lfns):
        """ Get file size from a cache, if not from the catalog
    #FIXME: have to fill the cachedLFNSize!
    """
        lfns = list(lfns)
        cachedLFNSize = dict(self.cachedLFNSize)

        fileSizes = {}
        for lfn in [lfn for lfn in lfns if lfn in cachedLFNSize]:
            fileSizes[lfn] = cachedLFNSize[lfn]
        self.logDebug("Found cache hit for File size for %d files out of %d" %
                      (len(fileSizes), len(lfns)))
        lfns = [lfn for lfn in lfns if lfn not in cachedLFNSize]
        if lfns:
            fileSizes = self._getFileSizeFromCatalog(lfns, fileSizes)
            if not fileSizes['OK']:
                self.logError(fileSizes['Message'])
                return fileSizes
            fileSizes = fileSizes['Value']
        return S_OK(fileSizes)

    @timeThis
    def _getFileSizeFromCatalog(self, lfns, fileSizes):
        """
    Get file size from the catalog
    """
        lfns = list(lfns)
        fileSizes = dict(fileSizes)

        res = self.fc.getFileSize(lfns)
        if not res['OK']:
            return S_ERROR("Failed to get sizes for all files: %s" %
                           res['Message'])
        if res['Value']['Failed']:
            errorReason = sorted(set(res['Value']['Failed'].values()))
            self.logWarn(
                "Failed to get sizes for %d files:" %
                len(res['Value']['Failed']), errorReason)
        fileSizes.update(res['Value']['Successful'])
        self.cachedLFNSize.update((res['Value']['Successful']))
        self.logVerbose("Got size of %d files from catalog" % len(lfns))
        return S_OK(fileSizes)

    def clearCachedFileSize(self, lfns):
        """ Utility function
    """
        for lfn in [lfn for lfn in lfns if lfn in self.cachedLFNSize]:
            self.cachedLFNSize.pop(lfn)

    def getPluginParam(self, name, default=None):
        """ Get plugin parameters using specific settings or settings defined in the CS
        Caution: the type returned is that of the default value
    """
        # get the value of a parameter looking 1st in the CS
        if default != None:
            valueType = type(default)
        else:
            valueType = None
        # First look at a generic value...
        optionPath = "TransformationPlugins/%s" % (name)
        value = Operations().getValue(optionPath, None)
        self.logVerbose("Default plugin param %s: '%s'" % (optionPath, value))
        # Then look at a plugin-specific value
        optionPath = "TransformationPlugins/%s/%s" % (self.plugin, name)
        value = Operations().getValue(optionPath, value)
        self.logVerbose("Specific plugin param %s: '%s'" % (optionPath, value))
        if value != None:
            default = value
        # Finally look at a transformation-specific parameter
        value = self.params.get(name, default)
        self.logVerbose("Transformation plugin param %s: '%s'. Convert to %s" %
                        (name, value, str(valueType)))
        if valueType and type(value) is not valueType:
            if valueType is list:
                try:
                    value = ast.literal_eval(
                        value) if value and value != 'None' else []
                except Exception:
                    value = [
                        val for val in value.replace(' ', '').split(',') if val
                    ]
            elif valueType is int:
                value = int(value)
            elif valueType is float:
                value = float(value)
            elif valueType is bool:
                if value in ('False', 'No', 'None', None, 0):
                    value = False
                else:
                    value = bool(value)
            elif valueType is not str:
                self.logWarn(
                    "Unknown parameter type (%s) for %s, passed as string" %
                    (str(valueType), name))
        self.logVerbose("Final plugin param %s: '%s'" % (name, value))
        return value

    @staticmethod
    def _normaliseShares(originalShares):
        shares = originalShares.copy()
        total = 0.0
        for site in shares.keys():
            share = float(shares[site])
            shares[site] = share
            total += share
        for site in shares.keys():
            share = 100.0 * (shares[site] / total)
            shares[site] = share
        return shares

    def uniqueSEs(self, ses):
        newSEs = []
        for se in ses:
            if not self.isSameSEInList(se, newSEs):
                newSEs.append(se)
        return newSEs

    def isSameSE(self, se1, se2):
        if se1 == se2:
            return True
        for se in (se1, se2):
            if se not in self.seConfig:
                self.seConfig[se] = {}
                res = StorageElement(se).getStorageParameters('SRM2')
                if res['OK']:
                    params = res['Value']
                    for item in ('Host', 'Path'):
                        self.seConfig[se][item] = params[item].replace(
                            't1d1', 't0d1')
                else:
                    self.logError(
                        "Error getting StorageElement parameters for %s" % se,
                        res['Message'])

        return self.seConfig[se1] == self.seConfig[se2]

    def isSameSEInList(self, se1, seList):
        if se1 in seList:
            return True
        for se in seList:
            if self.isSameSE(se1, se):
                return True
        return False

    def closerSEs(self, existingSEs, targetSEs, local=False):
        """ Order the targetSEs such that the first ones are closer to existingSEs. Keep all elements in targetSEs
    """
        setTarget = set(targetSEs)
        sameSEs = set([
            se1 for se1 in setTarget for se2 in existingSEs
            if self.isSameSE(se1, se2)
        ])
        targetSEs = setTarget - set(sameSEs)
        if targetSEs:
            # Some SEs are left, look for sites
            existingSites = [
                self.dmsHelper.getLocalSiteForSE(se).get('Value')
                for se in existingSEs if not self.dmsHelper.isSEArchive(se)
            ]
            existingSites = set([site for site in existingSites if site])
            closeSEs = set([
                se for se in targetSEs if self.dmsHelper.getLocalSiteForSE(
                    se).get('Value') in existingSites
            ])
            # print existingSEs, existingSites, targetSEs, closeSEs
            otherSEs = targetSEs - closeSEs
            targetSEs = list(closeSEs)
            random.shuffle(targetSEs)
            if not local and otherSEs:
                otherSEs = list(otherSEs)
                random.shuffle(otherSEs)
                targetSEs += otherSEs
        else:
            targetSEs = []
        return (targetSEs + list(sameSEs)) if not local else targetSEs
Ejemplo n.º 12
0
class RequestPreparationAgent(AgentModule):
    def initialize(self):
        self.fileCatalog = FileCatalog()
        self.dm = DataManager()
        self.stagerClient = StorageManagerClient()
        self.dataIntegrityClient = DataIntegrityClient()
        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption('shifterProxy', 'DataManager')

        return S_OK()

    def execute(self):
        res = self.prepareNewReplicas()
        return res

    def prepareNewReplicas(self):
        """ This is the first logical task to be executed and manages the New->Waiting transition of the Replicas
    """
        res = self.__getNewReplicas()
        if not res['OK']:
            gLogger.fatal(
                "RequestPreparation.prepareNewReplicas: Failed to get replicas from StagerDB.",
                res['Message'])
            return res
        if not res['Value']:
            gLogger.info("There were no New replicas found")
            return res
        replicas = res['Value']['Replicas']
        replicaIDs = res['Value']['ReplicaIDs']
        gLogger.info(
            "RequestPreparation.prepareNewReplicas: Obtained %s New replicas for preparation."
            % len(replicaIDs))

        # Check that the files exist in the FileCatalog
        res = self.__getExistingFiles(replicas.keys())
        if not res['OK']:
            return res
        exist = res['Value']['Exist']
        terminal = res['Value']['Missing']
        failed = res['Value']['Failed']
        if not exist:
            gLogger.error(
                'RequestPreparation.prepareNewReplicas: Failed determine existance of any files'
            )
            return S_OK()
        terminalReplicaIDs = {}
        for lfn, reason in terminal.items():
            for _se, replicaID in replicas[lfn].items():
                terminalReplicaIDs[replicaID] = reason
            replicas.pop(lfn)
        gLogger.info(
            "RequestPreparation.prepareNewReplicas: %s files exist in the FileCatalog."
            % len(exist))
        if terminal:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s files do not exist in the FileCatalog."
                % len(terminal))

        # Obtain the file sizes from the FileCatalog
        res = self.__getFileSize(exist)
        if not res['OK']:
            return res
        failed.update(res['Value']['Failed'])
        terminal = res['Value']['ZeroSize']
        fileSizes = res['Value']['FileSizes']
        if not fileSizes:
            gLogger.error(
                'RequestPreparation.prepareNewReplicas: Failed determine sizes of any files'
            )
            return S_OK()
        for lfn, reason in terminal.items():
            for _se, replicaID in replicas[lfn].items():
                terminalReplicaIDs[replicaID] = reason
            replicas.pop(lfn)
        gLogger.info(
            "RequestPreparation.prepareNewReplicas: Obtained %s file sizes from the FileCatalog."
            % len(fileSizes))
        if terminal:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s files registered with zero size in the FileCatalog."
                % len(terminal))

        # Obtain the replicas from the FileCatalog
        res = self.__getFileReplicas(fileSizes.keys())
        if not res['OK']:
            return res
        failed.update(res['Value']['Failed'])
        terminal = res['Value']['ZeroReplicas']
        fileReplicas = res['Value']['Replicas']
        if not fileReplicas:
            gLogger.error(
                'RequestPreparation.prepareNewReplicas: Failed determine replicas for any files'
            )
            return S_OK()
        for lfn, reason in terminal.items():
            for _se, replicaID in replicas[lfn].items():
                terminalReplicaIDs[replicaID] = reason
            replicas.pop(lfn)
        gLogger.info(
            "RequestPreparation.prepareNewReplicas: Obtained replica information for %s file from the FileCatalog."
            % len(fileReplicas))
        if terminal:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s files registered with zero replicas in the FileCatalog."
                % len(terminal))

        # Check the replicas exist at the requested site
        replicaMetadata = []
        for lfn, requestedSEs in replicas.items():
            lfnReplicas = fileReplicas[lfn]
            for requestedSE, replicaID in requestedSEs.items():
                if not requestedSE in lfnReplicas.keys():
                    terminalReplicaIDs[
                        replicaID] = "LFN not registered at requested SE"
                    replicas[lfn].pop(requestedSE)
                else:
                    replicaMetadata.append(
                        (replicaID, lfnReplicas[requestedSE], fileSizes[lfn]))

        # Update the states of the files in the database
        if terminalReplicaIDs:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s replicas are terminally failed."
                % len(terminalReplicaIDs))
            # res = self.stagerClient.updateReplicaFailure( terminalReplicaIDs )
            res = self.stagerClient.updateReplicaFailure(terminalReplicaIDs)
            if not res['OK']:
                gLogger.error(
                    "RequestPreparation.prepareNewReplicas: Failed to update replica failures.",
                    res['Message'])
        if replicaMetadata:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s replica metadata to be updated."
                % len(replicaMetadata))
            # Sets the Status='Waiting' of CacheReplicas records that are OK with catalogue checks
            res = self.stagerClient.updateReplicaInformation(replicaMetadata)
            if not res['OK']:
                gLogger.error(
                    "RequestPreparation.prepareNewReplicas: Failed to update replica metadata.",
                    res['Message'])
        return S_OK()

    def __getNewReplicas(self):
        """ This obtains the New replicas from the Replicas table and for each LFN the requested storage element """
        # First obtain the New replicas from the CacheReplicas table
        res = self.stagerClient.getCacheReplicas({'Status': 'New'})
        if not res['OK']:
            gLogger.error(
                "RequestPreparation.__getNewReplicas: Failed to get replicas with New status.",
                res['Message'])
            return res
        if not res['Value']:
            gLogger.debug(
                "RequestPreparation.__getNewReplicas: No New replicas found to process."
            )
            return S_OK()
        else:
            gLogger.debug(
                "RequestPreparation.__getNewReplicas: Obtained %s New replicas(s) to process."
                % len(res['Value']))
        replicas = {}
        replicaIDs = {}
        for replicaID, info in res['Value'].items():
            lfn = info['LFN']
            storageElement = info['SE']
            if not replicas.has_key(lfn):
                replicas[lfn] = {}
            replicas[lfn][storageElement] = replicaID
            replicaIDs[replicaID] = (lfn, storageElement)
        return S_OK({'Replicas': replicas, 'ReplicaIDs': replicaIDs})

    def __getExistingFiles(self, lfns):
        """ This checks that the files exist in the FileCatalog. """
        filesExist = []
        missing = {}
        res = self.fileCatalog.exists(lfns)
        if not res['OK']:
            gLogger.error(
                "RequestPreparation.__getExistingFiles: Failed to determine whether files exist.",
                res['Message'])
            return res
        failed = res['Value']['Failed']
        for lfn, exists in res['Value']['Successful'].items():
            if exists:
                filesExist.append(lfn)
            else:
                missing[lfn] = 'LFN not registered in the FileCatalog'
        if missing:
            for lfn, reason in missing.items():
                gLogger.warn(
                    "RequestPreparation.__getExistingFiles: %s" % reason, lfn)
            self.__reportProblematicFiles(missing.keys(),
                                          'LFN-LFC-DoesntExist')
        return S_OK({
            'Exist': filesExist,
            'Missing': missing,
            'Failed': failed
        })

    def __getFileSize(self, lfns):
        """ This obtains the file size from the FileCatalog. """
        fileSizes = {}
        zeroSize = {}
        res = self.fileCatalog.getFileSize(lfns)
        if not res['OK']:
            gLogger.error(
                "RequestPreparation.__getFileSize: Failed to get sizes for files.",
                res['Message'])
            return res
        failed = res['Value']['Failed']
        for lfn, size in res['Value']['Successful'].items():
            if size == 0:
                zeroSize[
                    lfn] = "LFN registered with zero size in the FileCatalog"
            else:
                fileSizes[lfn] = size
        if zeroSize:
            for lfn, reason in zeroSize.items():
                gLogger.warn("RequestPreparation.__getFileSize: %s" % reason,
                             lfn)
            self.__reportProblematicFiles(zeroSize.keys(), 'LFN-LFC-ZeroSize')
        return S_OK({
            'FileSizes': fileSizes,
            'ZeroSize': zeroSize,
            'Failed': failed
        })

    def __getFileReplicas(self, lfns):
        """ This obtains the replicas from the FileCatalog. """
        replicas = {}
        noReplicas = {}
        res = self.dm.getActiveReplicas(lfns)
        if not res['OK']:
            gLogger.error(
                "RequestPreparation.__getFileReplicas: Failed to obtain file replicas.",
                res['Message'])
            return res
        failed = res['Value']['Failed']
        for lfn, lfnReplicas in res['Value']['Successful'].items():
            if len(lfnReplicas.keys()) == 0:
                noReplicas[
                    lfn] = "LFN registered with zero replicas in the FileCatalog"
            else:
                replicas[lfn] = lfnReplicas
        if noReplicas:
            for lfn, reason in noReplicas.items():
                gLogger.warn(
                    "RequestPreparation.__getFileReplicas: %s" % reason, lfn)
            self.__reportProblematicFiles(noReplicas.keys(),
                                          'LFN-LFC-NoReplicas')
        return S_OK({
            'Replicas': replicas,
            'ZeroReplicas': noReplicas,
            'Failed': failed
        })

    def __reportProblematicFiles(self, lfns, reason):
        return S_OK()
        res = self.dataIntegrityClient.setFileProblematic(
            lfns, reason, sourceComponent='RequestPreparationAgent')
        if not res['OK']:
            gLogger.error(
                "RequestPreparation.__reportProblematicFiles: Failed to report missing files.",
                res['Message'])
            return res
        if res['Value']['Successful']:
            gLogger.info(
                "RequestPreparation.__reportProblematicFiles: Successfully reported %s missing files."
                % len(res['Value']['Successful']))
        if res['Value']['Failed']:
            gLogger.info(
                "RequestPreparation.__reportProblematicFiles: Failed to report %s problematic files."
                % len(res['Value']['Failed']))
        return res
Ejemplo n.º 13
0
class helper_TransferAgent(object):
    def __init__(self, transferAgent, gTransferDB):
        self.transferAgent = transferAgent
        self.transferDB = gTransferDB
        gLogger.info("Creating File Catalog")
        self.fileCatalog = FileCatalog()

    def helper_add_transfer(self, result):
        if not result:
            gLogger.error("There is no infomation")
            return False

        res = self.transferDB.get_TransferRequest(
            condDict={"id": result.trans_req_id})
        if not res["OK"]:
            return False
        req_list = res["Value"]
        if len(req_list) != 1:
            return False
        req = TransRequestEntryWithID._make(req_list[0])

        # construct the info
        info = {
            "id": result.id,
            "LFN": result.LFN,
            "srcSE": req.srcSE,
            "dstSE": req.dstSE,
            "retransfer": -1,
            "error": ""
        }
        # Add the Transfer
        worker = gTransferFactory.generate(req.protocol, info)
        if worker is None:
            return True
        self.transferAgent.transfer_worker.append(worker)
        # Change the status
        self.helper_status_update(self.transferDB.tables["TransferFileList"],
                                  result.id, {
                                      "status": "transfer",
                                      "start_time": datetime.datetime.utcnow()
                                  })
        # Add Accounting:
        d = {}
        d["User"] = req.username
        d["Source"] = req.srcSE
        d["Destination"] = req.dstSE
        d["Protocol"] = req.protocol
        d["FinalStatus"] = "OK"
        d["TransferSize"] = 0  # TODO
        r = self.fileCatalog.getFileSize(result.LFN)
        if r["OK"]:
            if r["Value"]["Successful"]:
                d["TransferSize"] = r["Value"]["Successful"][result.LFN]
        d["TransferTime"] = 1  # 1s
        d["TransferOK"] = 1
        d["TransferTotal"] = 1
        acct_dt = DataTransfer()
        acct_dt.setValuesFromDict(d)
        acct_dt.setNowAsStartAndEndTime()
        # save it
        worker.acct_dt = acct_dt

        return True

    def helper_remove_transfer(self, worker):
        info = worker.info
        gLogger.info("File.id = %d -> finish" % info["id"])
        self.helper_status_update(
            self.transferDB.tables["TransferFileList"], info["id"], {
                "status": "finish",
                "finish_time": datetime.datetime.utcnow()
            })
        # Accounting
        acct_dt = worker.acct_dt
        acct_dt.setEndTime()
        # TODO
        d = {}
        td = acct_dt.endTime - acct_dt.startTime
        td_s = (td.microseconds +
                (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6
        d["TransferTime"] = td_s  # 1s
        if info["error"]:
            d["FinalStatus"] = "FAILED"
            d["TransferOK"] = 0
        else:
            d["FinalStatus"] = "OK"
            d["TransferOK"] = 1

        acct_dt.setValuesFromDict(d)

        acct_dt.commit()
        gLogger.info("Submit Accounting Data")

    def helper_check_request(self):
        """
      check if the *transfer* request are ok.
      if the whole files are *finish*, then this request
      will become *finish*.
    """
        infoDict = {"status": "transfer"}
        res = self.transferDB.get_TransferRequest(condDict=infoDict)
        if not res["OK"]:
            return
        reqlist = map(TransRequestEntryWithID._make, res["Value"])
        for req in reqlist:
            res = self.transferDB._query(
                'select count(*) from %(table)s where trans_req_id = %(id)d and status not in %(status_list)s'
                % {
                    "table": self.transferDB.tables["TransferFileList"],
                    "id": req.id,
                    "status_list":
                    '("finish", "kill")'  # XXX finish or kill means this request is ok.
                })
            if not res["OK"]:
                # TODO
                continue
            count = res["Value"][0][0]
            if count == 0:
                # if all status is finish,
                # the req status --> finish
                gLogger.info("req.id %d change from %s to finish" %
                             (req.id, req.status))
                self.helper_status_update(
                    self.transferDB.tables["TransferRequest"], req.id,
                    {"status": "finish"})
        return

    def helper_get_new_request(self):
        # 1. get the *new* File in the <<Transfer File List>>.
        #    if we get, goto <<Add New Transfer>>
        already_load_status = False
        result_new_file = self.helper_get_new_File()
        # 1.1 2014.04.20
        #     They want to the other requests are also loaded,
        #     so I have to not return immediately
        if result_new_file:
            already_load_status = True
        # 2. if we can't get, use should get a *new* request
        #    from the <<Transfer Request>>.
        #    if we can't get, return False. STOP
        self.helper_check_request()
        result = self.helper_get_new_request_entry()
        if result:
            # 3. add the filelist in the dataset to the << Transfer File List >>
            condDict = {"name": result.dataset}
            res = self.transferDB.get_Dataset(condDict)
            if not res["OK"]:
                gLogger.error(res)
                return None
            filelist = res["Value"]
            # update the status in << Request >>
            if len(filelist) > 0:
                req_status = "transfer"
            else:
                req_status = "finish"
            self.helper_status_update(
                self.transferDB.tables["TransferRequest"], result.id,
                {"status": req_status})
            self.transferDB.insert_TransferFileList(result.id, filelist)
        # 4. get the *new* File Again.
        # 5. can't get, return False. STOP
        # 4.prelude
        #    If already loaded, return the last result
        if already_load_status and result_new_file:
            return result_new_file

        result = self.helper_get_new_File()
        return result

    def helper_get_new_request_entry(self):
        """
    TransRequestEntryWithID(
      id=1L, 
      username='******', 
      dataset='my-dataset', 
      srcSE='IHEP-USER', 
      dstSE='IHEPD-USER', 
      submit_time=datetime.datetime(2013, 3, 13, 20, 9, 34), 
      status='new')
    """
        condDict = {"status": "new"}
        res = self.transferDB.get_TransferRequest(condDict)
        if not res["OK"]:
            return None
        req_list = res["Value"]
        len_req = len(req_list)
        if len_req:
            # random select
            tmp_idx = random.randint(0, len_req - 1)
            return TransRequestEntryWithID._make(req_list[tmp_idx])
        pass

    def helper_get_new_File(self):
        """
    >>> helper.helper_get_new_File()
    TransFileListEntryWithID(
      id=1L, 
      LFN='/path/does/not/exist', 
      trans_req_id=1L, 
      start_time=None, 
      finish_time=None, 
      status='new')
    """
        condDict = {"status": "new"}
        res = self.transferDB.get_TransferFileList(condDict)
        if not res["OK"]:
            gLogger.error(res)
            return None
        filelist = res["Value"]
        gLogger.info("Filelist:")
        gLogger.info(filelist)
        len_files = len(filelist)
        if len_files > 0:
            tmp_idx = random.randint(0, len_files - 1)
            gLogger.info("get file entry index randomly: %d/%d" %
                         (tmp_idx, len_files))
            gLogger.info("get file entry", filelist[tmp_idx])
            return TransFileListEntryWithID._make(filelist[tmp_idx])
        return None

    def helper_status_update(self, table, id, toUpdate):
        res = self.transferDB.updateFields(
            table,
            updateDict=toUpdate,
            condDict={"id": id},
        )
        print res

    def helper_error_report(self, worker, reason):
        self.helper_status_update(self.transferDB.tables["TransferFileList"],
                                  worker.info["id"], {"error": reason})

    def check_worker_status(self, worker):
        """check whether the file transfer is kill(in DB)"""
        res = self.transferDB.getFields(
            self.transferDB.tables["TransferFileList"],
            outFields=["status"],
            condDict={"id": worker.info["id"]})
        if not res["OK"]:
            gLogger.error(res)
            return

        if not res["Value"]:
            return

        if len(res["Value"]) != 1:
            gLogger.error[res]
            return

        status = res["Value"][0][0]
        if status == "kill":
            gLogger.info("check worker should be killed: ", status)
            worker.proc.kill()
Ejemplo n.º 14
0
class PluginUtilities(object):
  """
  Utility class used by plugins
  """

  def __init__(self, plugin='Standard', transClient=None, dataManager=None, fc=None,
               debug=False, transInThread=None, transID=None):
    """
    c'tor

    Setting defaults
    """
    # clients
    if transClient is None:
      self.transClient = TransformationClient()
    else:
      self.transClient = transClient
    if dataManager is None:
      self.dm = DataManager()
    else:
      self.dm = dataManager
    if fc is None:
      self.fc = FileCatalog()
    else:
      self.fc = fc

    self.dmsHelper = DMSHelpers()

    self.plugin = plugin
    self.transID = transID
    self.params = {}
    self.groupSize = 0
    self.maxFiles = 0
    self.cachedLFNSize = {}
    self.transString = ''
    self.debug = debug
    if transInThread is None:
      self.transInThread = {}
    else:
      self.transInThread = transInThread

    self.log = gLogger.getSubLogger(plugin)

  def logVerbose(self, message, param=''):
    """ logger helper """
    if self.debug:
      self.log.info('(V)' + self.transString + message, param)
    else:
      self.log.verbose(self.transString + message, param)

  def logDebug(self, message, param=''):
    """ logger helper """
    self.log.debug(self.transString + message, param)

  def logInfo(self, message, param=''):
    """ logger helper """
    self.log.info(self.transString + message, param)

  def logWarn(self, message, param=''):
    """ logger helper """
    self.log.warn(self.transString + message, param)

  def logError(self, message, param=''):
    """ logger helper """
    self.log.error(self.transString + message, param)

  def logException(self, message, param='', lException=False):
    """ logger helper """
    self.log.exception(self.transString + message, param, lException)

  def setParameters(self, params):
    """ Set the transformation parameters and extract transID """
    self.params = params
    self.transID = params['TransformationID']
    self.transString = self.transInThread.get(self.transID, ' [NoThread] [%d] ' % self.transID)

  # @timeThis
  def groupByReplicas(self, files, status):
    """
    Generates tasks based on the location of the input data

   :param dict fileReplicas:
              {'/this/is/at.1': ['SE1'],
               '/this/is/at.12': ['SE1', 'SE2'],
               '/this/is/at.2': ['SE2'],
               '/this/is/at_123': ['SE1', 'SE2', 'SE3'],
               '/this/is/at_23': ['SE2', 'SE3'],
               '/this/is/at_4': ['SE4']}

    """
    tasks = []
    nTasks = 0

    if not files:
      return S_OK(tasks)

    files = dict(files)

    # Parameters
    if not self.groupSize:
      self.groupSize = self.getPluginParam('GroupSize', 10)
    flush = (status == 'Flush')
    self.logVerbose(
        "groupByReplicas: %d files, groupSize %d, flush %s" %
        (len(files), self.groupSize, flush))

    # Consider files by groups of SEs, a file is only in one group
    # Then consider files site by site, but a file can now be at more than one site
    for groupSE in (True, False):
      if not files:
        break
      seFiles = getFileGroups(files, groupSE=groupSE)
      self.logDebug("fileGroups set: ", seFiles)

      for replicaSE in sortSEs(seFiles):
        lfns = seFiles[replicaSE]
        if lfns:
          tasksLfns = breakListIntoChunks(lfns, self.groupSize)
          lfnsInTasks = []
          for taskLfns in tasksLfns:
            if flush or (len(taskLfns) >= self.groupSize):
              tasks.append((replicaSE, taskLfns))
              lfnsInTasks += taskLfns
          # In case the file was at more than one site, remove it from the other sites' list
          # Remove files from global list
          for lfn in lfnsInTasks:
            files.pop(lfn)
          if not groupSE:
            # Remove files from other SEs
            for se in [se for se in seFiles if se != replicaSE]:
              seFiles[se] = [lfn for lfn in seFiles[se] if lfn not in lfnsInTasks]
      self.logVerbose(
          "groupByReplicas: %d tasks created (groupSE %s)" %
          (len(tasks) - nTasks, str(groupSE)), "%d files not included in tasks" %
          len(files))
      nTasks = len(tasks)

    return S_OK(tasks)

  def createTasksBySize(self, lfns, replicaSE, fileSizes=None, flush=False):
    """
    Split files in groups according to the size and create tasks for a given SE
    """
    tasks = []
    if fileSizes is None:
      fileSizes = self._getFileSize(lfns).get('Value')
    if fileSizes is None:
      self.logWarn('Error getting file sizes, no tasks created')
      return tasks
    taskLfns = []
    taskSize = 0
    if not self.groupSize:
      # input size in GB converted to bytes
      self.groupSize = float(self.getPluginParam('GroupSize', 1.)) * 1000 * 1000 * 1000
    if not self.maxFiles:
      # FIXME: prepare for chaging the name of the ambiguoug  CS option
      self.maxFiles = self.getPluginParam('MaxFilesPerTask', self.getPluginParam('MaxFiles', 100))
    lfns = sorted(lfns, key=fileSizes.get)
    for lfn in lfns:
      size = fileSizes.get(lfn, 0)
      if size:
        if size > self.groupSize:
          tasks.append((replicaSE, [lfn]))
        else:
          taskSize += size
          taskLfns.append(lfn)
          if (taskSize > self.groupSize) or (len(taskLfns) >= self.maxFiles):
            tasks.append((replicaSE, taskLfns))
            taskLfns = []
            taskSize = 0
    if flush and taskLfns:
      tasks.append((replicaSE, taskLfns))
    if not tasks and not flush and taskLfns:
      self.logVerbose(
          'Not enough data to create a task, and flush not set (%d bytes for groupSize %d)' %
          (taskSize, self.groupSize))
    return tasks

  # @timeThis
  def groupBySize(self, files, status):
    """
    Generate a task for a given amount of data
    """
    tasks = []
    nTasks = 0

    if not len(files):
      return S_OK(tasks)

    files = dict(files)
    # Parameters
    if not self.groupSize:
      # input size in GB converted to bytes
      self.groupSize = float(self.getPluginParam('GroupSize', 1)) * 1000 * 1000 * 1000
    flush = (status == 'Flush')
    self.logVerbose(
        "groupBySize: %d files, groupSize: %d, flush: %s" %
        (len(files), self.groupSize, flush))

    # Get the file sizes
    res = self._getFileSize(files.keys())
    if not res['OK']:
      return res
    fileSizes = res['Value']

    for groupSE in (True, False):
      if not files:
        break
      seFiles = getFileGroups(files, groupSE=groupSE)

      for replicaSE in sorted(seFiles) if groupSE else sortSEs(seFiles):
        lfns = seFiles[replicaSE]
        newTasks = self.createTasksBySize(lfns, replicaSE, fileSizes=fileSizes, flush=flush)
        lfnsInTasks = []
        for task in newTasks:
          lfnsInTasks += task[1]
        tasks += newTasks

        # Remove the selected files from the size cache
        self.clearCachedFileSize(lfnsInTasks)
        if not groupSE:
          # Remove files from other SEs
          for se in [se for se in seFiles if se != replicaSE]:
            seFiles[se] = [lfn for lfn in seFiles[se] if lfn not in lfnsInTasks]
        # Remove files from global list
        for lfn in lfnsInTasks:
          files.pop(lfn)

      self.logVerbose(
          "groupBySize: %d tasks created with groupSE %s" %
          (len(tasks) - nTasks, str(groupSE)))
      self.logVerbose("groupBySize: %d files have not been included in tasks" % len(files))
      nTasks = len(tasks)

    self.logVerbose("Grouped %d files by size" % len(files))
    return S_OK(tasks)

  def getExistingCounters(self, normalise=False, requestedSites=[]):
    res = self.transClient.getCounters('TransformationFiles', ['UsedSE'],
                                       {'TransformationID': self.params['TransformationID']})
    if not res['OK']:
      return res
    usageDict = {}
    for usedDict, count in res['Value']:
      usedSE = usedDict['UsedSE']
      if usedSE != 'Unknown':
        usageDict[usedSE] = count
    if requestedSites:
      siteDict = {}
      for se, count in usageDict.items():
        res = getSitesForSE(se)
        if not res['OK']:
          return res
        for site in res['Value']:
          if site in requestedSites:
            siteDict[site] = count
      usageDict = siteDict.copy()
    if normalise:
      usageDict = self._normaliseShares(usageDict)
    return S_OK(usageDict)

  # @timeThis
  def _getFileSize(self, lfns):
    """ Get file size from a cache, if not from the catalog
    #FIXME: have to fill the cachedLFNSize!
    """
    lfns = list(lfns)
    cachedLFNSize = dict(self.cachedLFNSize)

    fileSizes = {}
    for lfn in [lfn for lfn in lfns if lfn in cachedLFNSize]:
      fileSizes[lfn] = cachedLFNSize[lfn]
    self.logDebug(
        "Found cache hit for File size for %d files out of %d" %
        (len(fileSizes), len(lfns)))
    lfns = [lfn for lfn in lfns if lfn not in cachedLFNSize]
    if lfns:
      fileSizes = self._getFileSizeFromCatalog(lfns, fileSizes)
      if not fileSizes['OK']:
        self.logError(fileSizes['Message'])
        return fileSizes
      fileSizes = fileSizes['Value']
    return S_OK(fileSizes)

  # @timeThis
  def _getFileSizeFromCatalog(self, lfns, fileSizes):
    """
    Get file size from the catalog
    """
    lfns = list(lfns)
    fileSizes = dict(fileSizes)

    res = self.fc.getFileSize(lfns)
    if not res['OK']:
      return S_ERROR("Failed to get sizes for all files: %s" % res['Message'])
    if res['Value']['Failed']:
      errorReason = sorted(set(res['Value']['Failed'].values()))
      self.logWarn("Failed to get sizes for %d files:" % len(res['Value']['Failed']), errorReason)
    fileSizes.update(res['Value']['Successful'])
    self.cachedLFNSize.update((res['Value']['Successful']))
    self.logVerbose("Got size of %d files from catalog" % len(lfns))
    return S_OK(fileSizes)

  def clearCachedFileSize(self, lfns):
    """ Utility function
    """
    for lfn in [lfn for lfn in lfns if lfn in self.cachedLFNSize]:
      self.cachedLFNSize.pop(lfn)

  def getPluginParam(self, name, default=None):
    """ Get plugin parameters using specific settings or settings defined in the CS
        Caution: the type returned is that of the default value
    """
    # get the value of a parameter looking 1st in the CS
    if default is not None:
      valueType = type(default)
    else:
      valueType = None
    # First look at a generic value...
    optionPath = "TransformationPlugins/%s" % (name)
    value = Operations().getValue(optionPath, None)
    self.logVerbose("Default plugin param %s: '%s'" % (optionPath, value))
    # Then look at a plugin-specific value
    optionPath = "TransformationPlugins/%s/%s" % (self.plugin, name)
    value = Operations().getValue(optionPath, value)
    self.logVerbose("Specific plugin param %s: '%s'" % (optionPath, value))
    if value is not None:
      default = value
    # Finally look at a transformation-specific parameter
    value = self.params.get(name, default)
    self.logVerbose(
        "Transformation plugin param %s: '%s'. Convert to %s" %
        (name, value, str(valueType)))
    if valueType and not isinstance(value, valueType):
      if valueType is list:
        try:
          value = ast.literal_eval(value) if value and value != 'None' else []
        # literal_eval('SE-DST') -> ValueError
        # literal_eval('SE_MC-DST') -> SyntaxError
        # Don't ask...
        except (ValueError, SyntaxError):
          value = [val for val in value.replace(' ', '').split(',') if val]

      elif valueType is int:
        value = int(value)
      elif valueType is float:
        value = float(value)
      elif valueType is bool:
        if value in ('False', 'No', 'None', None, 0):
          value = False
        else:
          value = bool(value)
      elif valueType is not str:
        self.logWarn(
            "Unknown parameter type (%s) for %s, passed as string" %
            (str(valueType), name))
    self.logVerbose("Final plugin param %s: '%s'" % (name, value))
    return value

  @staticmethod
  def _normaliseShares(originalShares):
    """ Normalize shares to 1 """
    total = sum(float(share) for share in originalShares.values())
    return dict([(site, 100. * float(share) / total if total else 0.)
                 for site, share in originalShares.items()])

  def uniqueSEs(self, ses):
    """ return a list of SEs that are not physically the same """
    newSEs = []
    for se in ses:
      if not self.isSameSEInList(se, newSEs):
        newSEs.append(se)
    return newSEs

  def isSameSE(self, se1, se2):
    """ Check if 2 SEs are indeed the same.

        :param se1: name of the first StorageElement
        :param se2: name of the second StorageElement

        :returns: True/False if they are considered the same.
                  See :py:mod:`~DIRAC.Resources.Storage.StorageElement.StorageElementItem.isSameSE`
    """
    if se1 == se2:
      return True

    return StorageElement(se1).isSameSE(StorageElement(se2))

  def isSameSEInList(self, se1, seList):
    """ Check if an SE is the same as any in a list """
    if se1 in seList:
      return True
    for se in seList:
      if self.isSameSE(se1, se):
        return True
    return False

  def closerSEs(self, existingSEs, targetSEs, local=False):
    """ Order the targetSEs such that the first ones are closer to existingSEs. Keep all elements in targetSEs
    """
    setTarget = set(targetSEs)
    sameSEs = set([se1 for se1 in setTarget for se2 in existingSEs if self.isSameSE(se1, se2)])
    targetSEs = setTarget - set(sameSEs)
    if targetSEs:
      # Some SEs are left, look for sites
      existingSites = [self.dmsHelper.getLocalSiteForSE(se).get('Value')
                       for se in existingSEs]
      existingSites = set([site for site in existingSites if site])
      closeSEs = set([se for se in targetSEs
                      if self.dmsHelper.getLocalSiteForSE(se).get('Value') in existingSites])
      # print existingSEs, existingSites, targetSEs, closeSEs
      otherSEs = targetSEs - closeSEs
      targetSEs = list(closeSEs)
      random.shuffle(targetSEs)
      if not local and otherSEs:
        otherSEs = list(otherSEs)
        random.shuffle(otherSEs)
        targetSEs += otherSEs
    else:
      targetSEs = []
    return (targetSEs + list(sameSEs)) if not local else targetSEs
Ejemplo n.º 15
0
class DataIntegrityClient(Client):
    """
  The following methods are supported in the service but are not mentioned explicitly here:

          getProblematic()
             Obtains a problematic file from the IntegrityDB based on the LastUpdate time

          getPrognosisProblematics(prognosis)
            Obtains all the problematics of a particular prognosis from the integrityDB

          getProblematicsSummary()
            Obtains a count of the number of problematics for each prognosis found

          getDistinctPrognosis()
            Obtains the distinct prognosis found in the integrityDB

          getTransformationProblematics(prodID)
            Obtains the problematics for a given production

          incrementProblematicRetry(fileID)
            Increments the retry count for the supplied file ID

          changeProblematicPrognosis(fileID,newPrognosis)
            Changes the prognosis of the supplied file to the new prognosis

          setProblematicStatus(fileID,status)
            Updates the status of a problematic in the integrityDB

          removeProblematic(self,fileID)
            This removes the specified file ID from the integrity DB

          insertProblematic(sourceComponent,fileMetadata)
            Inserts file with supplied metadata into the integrity DB

  """
    def __init__(self, **kwargs):

        super(DataIntegrityClient, self).__init__(**kwargs)
        self.setServer('DataManagement/DataIntegrity')
        self.dm = DataManager()
        self.fc = FileCatalog()

    def setFileProblematic(self, lfn, reason, sourceComponent=''):
        """ This method updates the status of the file in the FileCatalog and the IntegrityDB

        lfn - the lfn of the file
        reason - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
        if isinstance(lfn, list):
            lfns = lfn
        elif isinstance(lfn, basestring):
            lfns = [lfn]
        else:
            errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN."
            gLogger.error(errStr)
            return S_ERROR(errStr)
        gLogger.info(
            "DataIntegrityClient.setFileProblematic: Attempting to update %s files."
            % len(lfns))
        fileMetadata = {}
        for lfn in lfns:
            fileMetadata[lfn] = {
                'Prognosis': reason,
                'LFN': lfn,
                'PFN': '',
                'SE': ''
            }
        res = self.insertProblematic(sourceComponent, fileMetadata)
        if not res['OK']:
            gLogger.error(
                "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB"
            )
        return res

    def reportProblematicReplicas(self, replicaTuple, se, reason):
        """ Simple wrapper function around setReplicaProblematic """
        gLogger.info('The following %s files had %s at %s' %
                     (len(replicaTuple), reason, se))
        for lfn, _pfn, se, reason in sorted(replicaTuple):
            if lfn:
                gLogger.info(lfn)
        res = self.setReplicaProblematic(replicaTuple,
                                         sourceComponent='DataIntegrityClient')
        if not res['OK']:
            gLogger.info('Failed to update integrity DB with replicas',
                         res['Message'])
        else:
            gLogger.info('Successfully updated integrity DB with replicas')

    def setReplicaProblematic(self, replicaTuple, sourceComponent=''):
        """ This method updates the status of the replica in the FileCatalog and the IntegrityDB
        The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis}

        lfn - the lfn of the file
        pfn - the pfn if available (otherwise '')
        se - the storage element of the problematic replica (otherwise '')
        prognosis - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
        if isinstance(replicaTuple, tuple):
            replicaTuple = [replicaTuple]
        elif isinstance(replicaTuple, list):
            pass
        else:
            errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples."
            gLogger.error(errStr)
            return S_ERROR(errStr)
        gLogger.info(
            "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas."
            % len(replicaTuple))
        replicaDict = {}
        for lfn, pfn, se, reason in replicaTuple:
            replicaDict[lfn] = {
                'Prognosis': reason,
                'LFN': lfn,
                'PFN': pfn,
                'SE': se
            }
        res = self.insertProblematic(sourceComponent, replicaDict)
        if not res['OK']:
            gLogger.error(
                "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB"
            )
            return res
        for lfn in replicaDict.keys():
            replicaDict[lfn]['Status'] = 'Problematic'

        res = self.fc.setReplicaStatus(replicaDict)
        if not res['OK']:
            errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas."
            gLogger.error(errStr, res['Message'])
            return res
        failed = res['Value']['Failed']
        successful = res['Value']['Successful']
        resDict = {'Successful': successful, 'Failed': failed}
        return S_OK(resDict)

    ##########################################################################
    #
    # This section contains the resolution methods for various prognoses
    #

    def __updateCompletedFiles(self, prognosis, fileID):
        gLogger.info("%s file (%d) is resolved" % (prognosis, fileID))
        return self.setProblematicStatus(fileID, 'Resolved')

    def __returnProblematicError(self, fileID, res):
        self.incrementProblematicRetry(fileID)
        gLogger.error('DataIntegrityClient failure', res['Message'])
        return res

    def __updateReplicaToChecked(self, problematicDict):
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']
        prognosis = problematicDict['Prognosis']
        problematicDict['Status'] = 'Checked'

        res = returnSingleResult(
            self.fc.setReplicaStatus({lfn: problematicDict}))

        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        gLogger.info("%s replica (%d) is updated to Checked status" %
                     (prognosis, fileID))
        return self.__updateCompletedFiles(prognosis, fileID)

    def resolveCatalogPFNSizeMismatch(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis
    """
        lfn = problematicDict['LFN']
        se = problematicDict['SE']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        catalogSize = res['Value']
        res = returnSingleResult(StorageElement(se).getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        storageSize = res['Value']
        bkKCatalog = FileCatalog(['BookkeepingDB'])
        res = returnSingleResult(bkKCatalog.getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        bookkeepingSize = res['Value']
        if bookkeepingSize == catalogSize == storageSize:
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) matched all registered sizes."
                % fileID)
            return self.__updateReplicaToChecked(problematicDict)
        if catalogSize == bookkeepingSize:
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also"
                % fileID)
            res = returnSingleResult(self.fc.getReplicas(lfn))
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            if len(res['Value']) <= 1:
                gLogger.info(
                    "CatalogPFNSizeMismatch replica (%d) has no other replicas."
                    % fileID)
                return S_ERROR(
                    "Not removing catalog file mismatch since the only replica"
                )
            else:
                gLogger.info(
                    "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..."
                    % fileID)
                res = self.dm.removeReplica(se, lfn)
                if not res['OK']:
                    return self.__returnProblematicError(fileID, res)
                return self.__updateCompletedFiles('CatalogPFNSizeMismatch',
                                                   fileID)
        if (catalogSize != bookkeepingSize) and (bookkeepingSize
                                                 == storageSize):
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size"
                % fileID)
            res = self.__updateReplicaToChecked(problematicDict)
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            return self.changeProblematicPrognosis(fileID,
                                                   'BKCatalogSizeMismatch')
        gLogger.info(
            "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count"
            % fileID)
        return self.incrementProblematicRetry(fileID)

    #FIXME: Unused?
    def resolvePFNNotRegistered(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNNotRegistered prognosis
    """
        lfn = problematicDict['LFN']
        seName = problematicDict['SE']
        fileID = problematicDict['FileID']

        se = StorageElement(seName)
        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if not res['Value']:
            # The file does not exist in the catalog
            res = returnSingleResult(se.removeFile(lfn))
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            return self.__updateCompletedFiles('PFNNotRegistered', fileID)
        res = returnSingleResult(se.getFileMetadata(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            gLogger.info("PFNNotRegistered replica (%d) found to be missing." %
                         fileID)
            return self.__updateCompletedFiles('PFNNotRegistered', fileID)
        elif not res['OK']:
            return self.__returnProblematicError(fileID, res)
        storageMetadata = res['Value']
        if storageMetadata['Lost']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found to be Lost. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNLost')
        if storageMetadata['Unavailable']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found to be Unavailable. Updating retry count"
                % fileID)
            return self.incrementProblematicRetry(fileID)

        # HACK until we can obtain the space token descriptions through GFAL
        site = seName.split('_')[0].split('-')[0]
        if not storageMetadata['Cached']:
            if lfn.endswith('.raw'):
                seName = '%s-RAW' % site
            else:
                seName = '%s-RDST' % site
        elif storageMetadata['Migrated']:
            if lfn.startswith('/lhcb/data'):
                seName = '%s_M-DST' % site
            else:
                seName = '%s_MC_M-DST' % site
        else:
            if lfn.startswith('/lhcb/data'):
                seName = '%s-DST' % site
            else:
                seName = '%s_MC-DST' % site

        problematicDict['SE'] = seName
        res = returnSingleResult(se.getURL(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)

        problematicDict['PFN'] = res['Value']

        res = returnSingleResult(self.fc.addReplica({lfn: problematicDict}))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        res = returnSingleResult(self.fc.getFileMetadata(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']['Size'] != storageMetadata['Size']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found with catalog size mismatch. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID,
                                                   'CatalogPFNSizeMismatch')
        return self.__updateCompletedFiles('PFNNotRegistered', fileID)

    #FIXME: Unused?
    def resolveLFNCatalogMissing(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the LFNCatalogMissing prognosis
    """
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']:
            return self.__updateCompletedFiles('LFNCatalogMissing', fileID)
        # Remove the file from all catalogs
        # RF_NOTE : here I can do it because it's a single file, but otherwise I would need to sort the path
        res = returnSingleResult(self.fc.removeFile(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        return self.__updateCompletedFiles('LFNCatalogMissing', fileID)

    #FIXME: Unused?
    def resolvePFNMissing(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNMissing prognosis
    """
        se = problematicDict['SE']
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if not res['Value']:
            gLogger.info("PFNMissing file (%d) no longer exists in catalog" %
                         fileID)
            return self.__updateCompletedFiles('PFNMissing', fileID)

        res = returnSingleResult(StorageElement(se).exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']:
            gLogger.info("PFNMissing replica (%d) is no longer missing" %
                         fileID)
            return self.__updateReplicaToChecked(problematicDict)
        gLogger.info("PFNMissing replica (%d) does not exist" % fileID)
        res = returnSingleResult(self.fc.getReplicas(lfn, allStatus=True))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        replicas = res['Value']
        seSite = se.split('_')[0].split('-')[0]
        found = False
        print replicas
        for replicaSE in replicas.keys():
            if re.search(seSite, replicaSE):
                found = True
                problematicDict['SE'] = replicaSE
                se = replicaSE
        if not found:
            gLogger.info(
                "PFNMissing replica (%d) is no longer registered at SE. Resolved."
                % fileID)
            return self.__updateCompletedFiles('PFNMissing', fileID)
        gLogger.info(
            "PFNMissing replica (%d) does not exist. Removing from catalog..."
            % fileID)
        res = returnSingleResult(self.fc.removeReplica({lfn: problematicDict}))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if len(replicas) == 1:
            gLogger.info(
                "PFNMissing replica (%d) had a single replica. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'LFNZeroReplicas')
        res = self.dm.replicateAndRegister(problematicDict['LFN'], se)
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        # If we get here the problem is solved so we can update the integrityDB
        return self.__updateCompletedFiles('PFNMissing', fileID)

    #FIXME: Unused?
    def resolvePFNUnavailable(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNUnavailable prognosis
    """
        lfn = problematicDict['LFN']
        se = problematicDict['SE']
        fileID = problematicDict['FileID']

        res = returnSingleResult(StorageElement(se).getFileMetadata(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            # The file is no longer Unavailable but has now dissapeared completely
            gLogger.info(
                "PFNUnavailable replica (%d) found to be missing. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')
        if (not res['OK']) or res['Value']['Unavailable']:
            gLogger.info(
                "PFNUnavailable replica (%d) found to still be Unavailable" %
                fileID)
            return self.incrementProblematicRetry(fileID)
        if res['Value']['Lost']:
            gLogger.info(
                "PFNUnavailable replica (%d) is now found to be Lost. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNLost')
        gLogger.info("PFNUnavailable replica (%d) is no longer Unavailable" %
                     fileID)
        # Need to make the replica okay in the Catalog
        return self.__updateReplicaToChecked(problematicDict)

    #FIXME: Unused?
    def resolvePFNZeroSize(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolves the PFNZeroSize prognosis
    """
        lfn = problematicDict['LFN']
        seName = problematicDict['SE']
        fileID = problematicDict['FileID']

        se = StorageElement(seName)

        res = returnSingleResult(se.getFileSize(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            gLogger.info(
                "PFNZeroSize replica (%d) found to be missing. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')
        storageSize = res['Value']
        if storageSize == 0:
            res = returnSingleResult(se.removeFile(lfn))

            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            gLogger.info(
                "PFNZeroSize replica (%d) removed. Updating prognosis" %
                problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')

        res = returnSingleResult(self.fc.getReplicas(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if seName not in res['Value']:
            gLogger.info(
                "PFNZeroSize replica (%d) not registered in catalog. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNNotRegistered')
        res = returnSingleResult(self.fc.getFileMetadata(lfn))

        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        catalogSize = res['Value']['Size']
        if catalogSize != storageSize:
            gLogger.info(
                "PFNZeroSize replica (%d) size found to differ from registered metadata. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID,
                                                   'CatalogPFNSizeMismatch')
        return self.__updateCompletedFiles('PFNZeroSize', fileID)

    ############################################################################################

    #FIXME: Unused?
    def resolveLFNZeroReplicas(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolves the LFNZeroReplicas prognosis
    """
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.getReplicas(lfn, allStatus=True))
        if res['OK'] and res['Value']:
            gLogger.info("LFNZeroReplicas file (%d) found to have replicas" %
                         fileID)
        else:
            gLogger.info(
                "LFNZeroReplicas file (%d) does not have replicas. Checking storage..."
                % fileID)
            pfnsFound = False
            for storageElementName in sorted(
                    gConfig.getValue(
                        'Resources/StorageElementGroups/Tier1_MC_M-DST', [])):
                res = self.__getStoragePathExists([lfn], storageElementName)
                if lfn in res['Value']:
                    gLogger.info(
                        "LFNZeroReplicas file (%d) found storage file at %s" %
                        (fileID, storageElementName))
                    self.reportProblematicReplicas(
                        [(lfn, 'deprecatedUrl', storageElementName,
                          'PFNNotRegistered')], storageElementName,
                        'PFNNotRegistered')
                    pfnsFound = True
            if not pfnsFound:
                gLogger.info(
                    "LFNZeroReplicas file (%d) did not have storage files. Removing..."
                    % fileID)
                res = returnSingleResult(self.fc.removeFile(lfn))
                if not res['OK']:
                    gLogger.error('DataIntegrityClient: failed to remove file',
                                  res['Message'])
                    # Increment the number of retries for this file
                    self.server.incrementProblematicRetry(fileID)
                    return res
                gLogger.info("LFNZeroReplicas file (%d) removed from catalog" %
                             fileID)
        # If we get here the problem is solved so we can update the integrityDB
        return self.__updateCompletedFiles('LFNZeroReplicas', fileID)

    def _reportProblematicFiles(self, lfns, reason):
        """ Simple wrapper function around setFileProblematic
    """
        gLogger.info('The following %s files were found with %s' %
                     (len(lfns), reason))
        for lfn in sorted(lfns):
            gLogger.info(lfn)
        res = self.setFileProblematic(lfns,
                                      reason,
                                      sourceComponent='DataIntegrityClient')
        if not res['OK']:
            gLogger.info('Failed to update integrity DB with files',
                         res['Message'])
        else:
            gLogger.info('Successfully updated integrity DB with files')
Ejemplo n.º 16
0
class DataIntegrityClient(Client):
    """
  The following methods are supported in the service but are not mentioned explicitly here:

          getProblematic()
             Obtains a problematic file from the IntegrityDB based on the LastUpdate time

          getPrognosisProblematics(prognosis)
            Obtains all the problematics of a particular prognosis from the integrityDB

          getProblematicsSummary()
            Obtains a count of the number of problematics for each prognosis found

          getDistinctPrognosis()
            Obtains the distinct prognosis found in the integrityDB

          getTransformationProblematics(prodID)
            Obtains the problematics for a given production

          incrementProblematicRetry(fileID)
            Increments the retry count for the supplied file ID

          changeProblematicPrognosis(fileID,newPrognosis)
            Changes the prognosis of the supplied file to the new prognosis

          setProblematicStatus(fileID,status)
            Updates the status of a problematic in the integrityDB

          removeProblematic(self,fileID)
            This removes the specified file ID from the integrity DB

          insertProblematic(sourceComponent,fileMetadata)
            Inserts file with supplied metadata into the integrity DB

  """
    def __init__(self, **kwargs):

        Client.__init__(self, **kwargs)
        self.setServer('DataManagement/DataIntegrity')
        self.dm = DataManager()
        self.fc = FileCatalog()

    ##########################################################################
    #
    # This section contains the specific methods for LFC->SE checks
    #

    def catalogDirectoryToSE(self, lfnDir):
        """ This obtains the replica and metadata information from the catalog for the supplied directory and checks against the storage elements.
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the LFC->SE check")
        gLogger.info("-" * 40)
        if type(lfnDir) in types.StringTypes:
            lfnDir = [lfnDir]
        res = self.__getCatalogDirectoryContents(lfnDir)
        if not res['OK']:
            return res
        replicas = res['Value']['Replicas']
        catalogMetadata = res['Value']['Metadata']
        res = self.__checkPhysicalFiles(replicas, catalogMetadata)
        if not res['OK']:
            return res
        resDict = {
            'CatalogMetadata': catalogMetadata,
            'CatalogReplicas': replicas
        }
        return S_OK(resDict)

    def catalogFileToSE(self, lfns):
        """ This obtains the replica and metadata information from the catalog and checks against the storage elements.
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the LFC->SE check")
        gLogger.info("-" * 40)
        if type(lfns) in types.StringTypes:
            lfns = [lfns]
        res = self.__getCatalogMetadata(lfns)
        if not res['OK']:
            return res
        catalogMetadata = res['Value']
        res = self.__getCatalogReplicas(catalogMetadata.keys())
        if not res['OK']:
            return res
        replicas = res['Value']
        res = self.__checkPhysicalFiles(replicas, catalogMetadata)
        if not res['OK']:
            return res
        resDict = {
            'CatalogMetadata': catalogMetadata,
            'CatalogReplicas': replicas
        }
        return S_OK(resDict)

    def checkPhysicalFiles(self, replicas, catalogMetadata, ses=[]):
        """ This obtains takes the supplied replica and metadata information obtained from the catalog and checks against the storage elements.
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the LFC->SE check")
        gLogger.info("-" * 40)
        return self.__checkPhysicalFiles(replicas, catalogMetadata, ses=ses)

    def __checkPhysicalFiles(self, replicas, catalogMetadata, ses=[]):
        """ This obtains the physical file metadata and checks the metadata against the catalog entries
    """
        seLfns = {}
        for lfn, replicaDict in replicas.items():
            for se, _url in replicaDict.items():
                if (ses) and (se not in ses):
                    continue
                seLfns.setdefault(se, []).append(lfn)
        gLogger.info('%s %s' %
                     ('Storage Element'.ljust(20), 'Replicas'.rjust(20)))

        for se in sortList(seLfns):
            files = len(seLfns[se])
            gLogger.info('%s %s' % (se.ljust(20), str(files).rjust(20)))

            lfns = seLfns[se]
            sizeMismatch = []
            res = self.__checkPhysicalFileMetadata(lfns, se)
            if not res['OK']:
                gLogger.error('Failed to get physical file metadata.',
                              res['Message'])
                return res
            for lfn, metadata in res['Value'].items():
                if lfn in catalogMetadata:
                    if (metadata['Size'] != catalogMetadata[lfn]['Size']) and (
                            metadata['Size'] != 0):
                        sizeMismatch.append((lfn, 'deprecatedUrl', se,
                                             'CatalogPFNSizeMismatch'))
            if sizeMismatch:
                self.__reportProblematicReplicas(sizeMismatch, se,
                                                 'CatalogPFNSizeMismatch')
        return S_OK()

    def __checkPhysicalFileMetadata(self, lfns, se):
        """ Check obtain the physical file metadata and check the files are available
    """
        gLogger.info('Checking the integrity of %s physical files at %s' %
                     (len(lfns), se))

        res = StorageElement(se).getFileMetadata(lfns)

        if not res['OK']:
            gLogger.error('Failed to get metadata for lfns.', res['Message'])
            return res
        lfnMetadataDict = res['Value']['Successful']
        # If the replicas are completely missing
        missingReplicas = []
        for lfn, reason in res['Value']['Failed'].items():
            if re.search('File does not exist', reason):
                missingReplicas.append(
                    (lfn, 'deprecatedUrl', se, 'PFNMissing'))
        if missingReplicas:
            self.__reportProblematicReplicas(missingReplicas, se, 'PFNMissing')
        lostReplicas = []
        unavailableReplicas = []
        zeroSizeReplicas = []
        # If the files are not accessible
        for lfn, lfnMetadata in lfnMetadataDict.items():
            if lfnMetadata['Lost']:
                lostReplicas.append((lfn, 'deprecatedUrl', se, 'PFNLost'))
            if lfnMetadata['Unavailable']:
                unavailableReplicas.append(
                    (lfn, 'deprecatedUrl', se, 'PFNUnavailable'))
            if lfnMetadata['Size'] == 0:
                zeroSizeReplicas.append(
                    (lfn, 'deprecatedUrl', se, 'PFNZeroSize'))
        if lostReplicas:
            self.__reportProblematicReplicas(lostReplicas, se, 'PFNLost')
        if unavailableReplicas:
            self.__reportProblematicReplicas(unavailableReplicas, se,
                                             'PFNUnavailable')
        if zeroSizeReplicas:
            self.__reportProblematicReplicas(zeroSizeReplicas, se,
                                             'PFNZeroSize')
        gLogger.info(
            'Checking the integrity of physical files at %s complete' % se)
        return S_OK(lfnMetadataDict)

    ##########################################################################
    #
    # This section contains the specific methods for SE->LFC checks
    #

    def storageDirectoryToCatalog(self, lfnDir, storageElement):
        """ This obtains the file found on the storage element in the supplied directories and determines whether they exist in the catalog and checks their metadata elements
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the SE->LFC check at %s" % storageElement)
        gLogger.info("-" * 40)
        if type(lfnDir) in types.StringTypes:
            lfnDir = [lfnDir]
        res = self.__getStorageDirectoryContents(lfnDir, storageElement)
        if not res['OK']:
            return res
        storageFileMetadata = res['Value']
        if storageFileMetadata:
            return self.__checkCatalogForSEFiles(storageFileMetadata,
                                                 storageElement)
        return S_OK({'CatalogMetadata': {}, 'StorageMetadata': {}})

    def __checkCatalogForSEFiles(self, storageMetadata, storageElement):
        gLogger.info('Checking %s storage files exist in the catalog' %
                     len(storageMetadata))

        res = self.fc.getReplicas(storageMetadata)
        if not res['OK']:
            gLogger.error("Failed to get replicas for LFN", res['Message'])
            return res
        failedLfns = res['Value']['Failed']
        successfulLfns = res['Value']['Successful']
        notRegisteredLfns = []

        for lfn in storageMetadata:
            if lfn in failedLfns:
                if 'No such file or directory' in failedLfns[lfn]:
                    notRegisteredLfns.append(
                        (lfn, 'deprecatedUrl', storageElement,
                         'LFNNotRegistered'))
                    failedLfns.pop(lfn)
            elif storageElement not in successfulLfns[lfn]:
                notRegisteredLfns.append(
                    (lfn, 'deprecatedUrl', storageElement, 'LFNNotRegistered'))

        if notRegisteredLfns:
            self.__reportProblematicReplicas(notRegisteredLfns, storageElement,
                                             'LFNNotRegistered')
        if failedLfns:
            return S_ERROR('Failed to obtain replicas')

        # For the LFNs found to be registered obtain the file metadata from the catalog and verify against the storage metadata
        res = self.__getCatalogMetadata(storageMetadata)
        if not res['OK']:
            return res
        catalogMetadata = res['Value']
        sizeMismatch = []
        for lfn, lfnCatalogMetadata in catalogMetadata.items():
            lfnStorageMetadata = storageMetadata[lfn]
            if (lfnStorageMetadata['Size'] != lfnCatalogMetadata['Size']) and (
                    lfnStorageMetadata['Size'] != 0):
                sizeMismatch.append((lfn, 'deprecatedUrl', storageElement,
                                     'CatalogPFNSizeMismatch'))
        if sizeMismatch:
            self.__reportProblematicReplicas(sizeMismatch, storageElement,
                                             'CatalogPFNSizeMismatch')
        gLogger.info('Checking storage files exist in the catalog complete')
        resDict = {
            'CatalogMetadata': catalogMetadata,
            'StorageMetadata': storageMetadata
        }
        return S_OK(resDict)

    def getStorageDirectoryContents(self, lfnDir, storageElement):
        """ This obtains takes the supplied lfn directories and recursively obtains the files in the supplied storage element
    """
        return self.__getStorageDirectoryContents(lfnDir, storageElement)

    def __getStorageDirectoryContents(self, lfnDir, storageElement):
        """ Obtians the contents of the supplied directory on the storage
    """
        gLogger.info('Obtaining the contents for %s directories at %s' %
                     (len(lfnDir), storageElement))

        se = StorageElement(storageElement)

        res = se.exists(lfnDir)
        if not res['OK']:
            gLogger.error("Failed to obtain existance of directories",
                          res['Message'])
            return res
        for directory, error in res['Value']['Failed'].items():
            gLogger.error('Failed to determine existance of directory',
                          '%s %s' % (directory, error))
        if res['Value']['Failed']:
            return S_ERROR('Failed to determine existance of directory')
        directoryExists = res['Value']['Successful']
        activeDirs = []
        for directory in sorted(directoryExists):
            exists = directoryExists[directory]
            if exists:
                activeDirs.append(directory)
        allFiles = {}
        while len(activeDirs) > 0:
            currentDir = activeDirs[0]
            res = se.listDirectory(currentDir)
            activeDirs.remove(currentDir)
            if not res['OK']:
                gLogger.error('Failed to get directory contents',
                              res['Message'])
                return res
            elif currentDir in res['Value']['Failed']:
                gLogger.error(
                    'Failed to get directory contents',
                    '%s %s' % (currentDir, res['Value']['Failed'][currentDir]))
                return S_ERROR(res['Value']['Failed'][currentDir])
            else:
                dirContents = res['Value']['Successful'][currentDir]
                activeDirs.extend(
                    se.getLFNFromURL(dirContents['SubDirs']).get(
                        'Value', {}).get('Successful', []))
                fileURLMetadata = dirContents['Files']
                fileMetadata = {}
                res = se.getLFNFromURL(fileURLMetadata)
                if not res['OK']:
                    gLogger.error('Failed to get directory content LFNs',
                                  res['Message'])
                    return res

                for url, error in res['Value']['Failed'].items():
                    gLogger.error("Failed to get LFN for URL",
                                  "%s %s" % (url, error))
                if res['Value']['Failed']:
                    return S_ERROR("Failed to get LFNs for PFNs")
                urlLfns = res['Value']['Successful']
                for urlLfn, lfn in urlLfns.items():
                    fileMetadata[lfn] = fileURLMetadata[urlLfn]
                allFiles.update(fileMetadata)

        zeroSizeFiles = []

        for lfn in sorted(allFiles):
            if os.path.basename(lfn) == 'dirac_directory':
                allFiles.pop(lfn)
            else:
                metadata = allFiles[lfn]
                if metadata['Size'] == 0:
                    zeroSizeFiles.append(
                        (lfn, 'deprecatedUrl', storageElement, 'PFNZeroSize'))
        if zeroSizeFiles:
            self.__reportProblematicReplicas(zeroSizeFiles, storageElement,
                                             'PFNZeroSize')

        gLogger.info('Obtained at total of %s files for directories at %s' %
                     (len(allFiles), storageElement))
        return S_OK(allFiles)

    def __getStoragePathExists(self, lfnPaths, storageElement):
        gLogger.info('Determining the existance of %d files at %s' %
                     (len(lfnPaths), storageElement))

        se = StorageElement(storageElement)

        res = se.exists(lfnPaths)
        if not res['OK']:
            gLogger.error("Failed to obtain existance of paths",
                          res['Message'])
            return res
        for lfnPath, error in res['Value']['Failed'].items():
            gLogger.error('Failed to determine existance of path',
                          '%s %s' % (lfnPath, error))
        if res['Value']['Failed']:
            return S_ERROR('Failed to determine existance of paths')
        pathExists = res['Value']['Successful']
        resDict = {}
        for lfn, exists in pathExists.items():
            if exists:
                resDict[lfn] = True
        return S_OK(resDict)

    ##########################################################################
    #
    # This section contains the specific methods for obtaining replica and metadata information from the catalog
    #

    def __getCatalogDirectoryContents(self, lfnDir):
        """ Obtain the contents of the supplied directory
    """
        gLogger.info('Obtaining the catalog contents for %s directories' %
                     len(lfnDir))

        activeDirs = lfnDir
        allFiles = {}
        while len(activeDirs) > 0:
            currentDir = activeDirs[0]
            res = self.fc.listDirectory(currentDir)
            activeDirs.remove(currentDir)
            if not res['OK']:
                gLogger.error('Failed to get directory contents',
                              res['Message'])
                return res
            elif res['Value']['Failed'].has_key(currentDir):
                gLogger.error(
                    'Failed to get directory contents',
                    '%s %s' % (currentDir, res['Value']['Failed'][currentDir]))
            else:
                dirContents = res['Value']['Successful'][currentDir]
                activeDirs.extend(dirContents['SubDirs'])
                allFiles.update(dirContents['Files'])

        zeroReplicaFiles = []
        zeroSizeFiles = []
        allReplicaDict = {}
        allMetadataDict = {}
        for lfn, lfnDict in allFiles.items():
            lfnReplicas = {}
            for se, replicaDict in lfnDict['Replicas'].items():
                lfnReplicas[se] = replicaDict['PFN']
            if not lfnReplicas:
                zeroReplicaFiles.append(lfn)
            allReplicaDict[lfn] = lfnReplicas
            allMetadataDict[lfn] = lfnDict['MetaData']
            if lfnDict['MetaData']['Size'] == 0:
                zeroSizeFiles.append(lfn)
        if zeroReplicaFiles:
            self.__reportProblematicFiles(zeroReplicaFiles, 'LFNZeroReplicas')
        if zeroSizeFiles:
            self.__reportProblematicFiles(zeroSizeFiles, 'LFNZeroSize')
        gLogger.info(
            'Obtained at total of %s files for the supplied directories' %
            len(allMetadataDict))
        resDict = {'Metadata': allMetadataDict, 'Replicas': allReplicaDict}
        return S_OK(resDict)

    def __getCatalogReplicas(self, lfns):
        """ Obtain the file replicas from the catalog while checking that there are replicas
    """
        gLogger.info('Obtaining the replicas for %s files' % len(lfns))

        zeroReplicaFiles = []
        res = self.fc.getReplicas(lfns, allStatus=True)
        if not res['OK']:
            gLogger.error('Failed to get catalog replicas', res['Message'])
            return res
        allReplicas = res['Value']['Successful']
        for lfn, error in res['Value']['Failed'].items():
            if re.search('File has zero replicas', error):
                zeroReplicaFiles.append(lfn)
        if zeroReplicaFiles:
            self.__reportProblematicFiles(zeroReplicaFiles, 'LFNZeroReplicas')
        gLogger.info('Obtaining the replicas for files complete')
        return S_OK(allReplicas)

    def __getCatalogMetadata(self, lfns):
        """ Obtain the file metadata from the catalog while checking they exist
    """
        if not lfns:
            return S_OK({})
        gLogger.info('Obtaining the catalog metadata for %s files' % len(lfns))

        missingCatalogFiles = []
        zeroSizeFiles = []
        res = self.fc.getFileMetadata(lfns)
        if not res['OK']:
            gLogger.error('Failed to get catalog metadata', res['Message'])
            return res
        allMetadata = res['Value']['Successful']
        for lfn, error in res['Value']['Failed'].items():
            if re.search('No such file or directory', error):
                missingCatalogFiles.append(lfn)
        if missingCatalogFiles:
            self.__reportProblematicFiles(missingCatalogFiles,
                                          'LFNCatalogMissing')
        for lfn, metadata in allMetadata.items():
            if metadata['Size'] == 0:
                zeroSizeFiles.append(lfn)
        if zeroSizeFiles:
            self.__reportProblematicFiles(zeroSizeFiles, 'LFNZeroSize')
        gLogger.info('Obtaining the catalog metadata complete')
        return S_OK(allMetadata)

    ##########################################################################
    #
    # This section contains the methods for inserting problematic files into the integrity DB
    #

    def __reportProblematicFiles(self, lfns, reason):
        """ Simple wrapper function around setFileProblematic """
        gLogger.info('The following %s files were found with %s' %
                     (len(lfns), reason))
        for lfn in sortList(lfns):
            gLogger.info(lfn)
        res = self.setFileProblematic(lfns,
                                      reason,
                                      sourceComponent='DataIntegrityClient')
        if not res['OK']:
            gLogger.info('Failed to update integrity DB with files',
                         res['Message'])
        else:
            gLogger.info('Successfully updated integrity DB with files')

    def setFileProblematic(self, lfn, reason, sourceComponent=''):
        """ This method updates the status of the file in the FileCatalog and the IntegrityDB

        lfn - the lfn of the file
        reason - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
        if type(lfn) == types.ListType:
            lfns = lfn
        elif type(lfn) == types.StringType:
            lfns = [lfn]
        else:
            errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN."
            gLogger.error(errStr)
            return S_ERROR(errStr)
        gLogger.info(
            "DataIntegrityClient.setFileProblematic: Attempting to update %s files."
            % len(lfns))
        fileMetadata = {}
        for lfn in lfns:
            fileMetadata[lfn] = {
                'Prognosis': reason,
                'LFN': lfn,
                'PFN': '',
                'SE': ''
            }
        res = self.insertProblematic(sourceComponent, fileMetadata)
        if not res['OK']:
            gLogger.error(
                "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB"
            )
        return res

    def __reportProblematicReplicas(self, replicaTuple, se, reason):
        """ Simple wrapper function around setReplicaProblematic """
        gLogger.info('The following %s files had %s at %s' %
                     (len(replicaTuple), reason, se))
        for lfn, _pfn, se, reason in sortList(replicaTuple):
            if lfn:
                gLogger.info(lfn)
        res = self.setReplicaProblematic(replicaTuple,
                                         sourceComponent='DataIntegrityClient')
        if not res['OK']:
            gLogger.info('Failed to update integrity DB with replicas',
                         res['Message'])
        else:
            gLogger.info('Successfully updated integrity DB with replicas')

    def setReplicaProblematic(self, replicaTuple, sourceComponent=''):
        """ This method updates the status of the replica in the FileCatalog and the IntegrityDB
        The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis}

        lfn - the lfn of the file
        pfn - the pfn if available (otherwise '')
        se - the storage element of the problematic replica (otherwise '')
        prognosis - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
        if type(replicaTuple) == types.TupleType:
            replicaTuple = [replicaTuple]
        elif type(replicaTuple) == types.ListType:
            pass
        else:
            errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples."
            gLogger.error(errStr)
            return S_ERROR(errStr)
        gLogger.info(
            "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas."
            % len(replicaTuple))
        replicaDict = {}
        for lfn, pfn, se, reason in replicaTuple:
            replicaDict[lfn] = {
                'Prognosis': reason,
                'LFN': lfn,
                'PFN': pfn,
                'SE': se
            }
        res = self.insertProblematic(sourceComponent, replicaDict)
        if not res['OK']:
            gLogger.error(
                "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB"
            )
            return res
        for lfn in replicaDict.keys():
            replicaDict[lfn]['Status'] = 'Problematic'

        res = self.fc.setReplicaStatus(replicaDict)
        if not res['OK']:
            errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas."
            gLogger.error(errStr, res['Message'])
            return res
        failed = res['Value']['Failed']
        successful = res['Value']['Successful']
        resDict = {'Successful': successful, 'Failed': failed}
        return S_OK(resDict)

    ##########################################################################
    #
    # This section contains the resolution methods for various prognoses
    #

    def __updateCompletedFiles(self, prognosis, fileID):
        gLogger.info("%s file (%d) is resolved" % (prognosis, fileID))
        return self.setProblematicStatus(fileID, 'Resolved')

    def __returnProblematicError(self, fileID, res):
        self.incrementProblematicRetry(fileID)
        gLogger.error('DataIntegrityClient failure', res['Message'])
        return res


#   def __getRegisteredPFNLFN( self, pfn, storageElement ):
#
#     res = StorageElement( storageElement ).getURL( pfn )
#     if not res['OK']:
#       gLogger.error( "Failed to get registered PFN for physical files", res['Message'] )
#       return res
#     for pfn, error in res['Value']['Failed'].items():
#       gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) )
#       return S_ERROR( 'Failed to obtain registered PFNs from physical file' )
#     registeredPFN = res['Value']['Successful'][pfn]
#     res = returnSingleResult( self.fc.getLFNForPFN( registeredPFN ) )
#     if ( not res['OK'] ) and re.search( 'No such file or directory', res['Message'] ):
#       return S_OK( False )
#     return S_OK( res['Value'] )

    def __updateReplicaToChecked(self, problematicDict):
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']
        prognosis = problematicDict['Prognosis']
        problematicDict['Status'] = 'Checked'

        res = returnSingleResult(
            self.fc.setReplicaStatus({lfn: problematicDict}))

        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        gLogger.info("%s replica (%d) is updated to Checked status" %
                     (prognosis, fileID))
        return self.__updateCompletedFiles(prognosis, fileID)

    def resolveCatalogPFNSizeMismatch(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis
    """
        lfn = problematicDict['LFN']
        se = problematicDict['SE']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        catalogSize = res['Value']
        res = returnSingleResult(StorageElement(se).getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        storageSize = res['Value']
        bkKCatalog = FileCatalog(['BookkeepingDB'])
        res = returnSingleResult(bkKCatalog.getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        bookkeepingSize = res['Value']
        if bookkeepingSize == catalogSize == storageSize:
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) matched all registered sizes."
                % fileID)
            return self.__updateReplicaToChecked(problematicDict)
        if (catalogSize == bookkeepingSize):
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also"
                % fileID)
            res = returnSingleResult(self.fc.getReplicas(lfn))
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            if len(res['Value']) <= 1:
                gLogger.info(
                    "CatalogPFNSizeMismatch replica (%d) has no other replicas."
                    % fileID)
                return S_ERROR(
                    "Not removing catalog file mismatch since the only replica"
                )
            else:
                gLogger.info(
                    "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..."
                    % fileID)
                res = self.dm.removeReplica(se, lfn)
                if not res['OK']:
                    return self.__returnProblematicError(fileID, res)
                return self.__updateCompletedFiles('CatalogPFNSizeMismatch',
                                                   fileID)
        if (catalogSize != bookkeepingSize) and (bookkeepingSize
                                                 == storageSize):
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size"
                % fileID)
            res = self.__updateReplicaToChecked(problematicDict)
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            return self.changeProblematicPrognosis(fileID,
                                                   'BKCatalogSizeMismatch')
        gLogger.info(
            "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count"
            % fileID)
        return self.incrementProblematicRetry(fileID)

    def resolvePFNNotRegistered(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNNotRegistered prognosis
    """
        lfn = problematicDict['LFN']
        seName = problematicDict['SE']
        fileID = problematicDict['FileID']

        se = StorageElement(seName)
        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if not res['Value']:
            # The file does not exist in the catalog
            res = returnSingleResult(se.removeFile(lfn))
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            return self.__updateCompletedFiles('PFNNotRegistered', fileID)
        res = returnSingleResult(se.getFileMetadata(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            gLogger.info("PFNNotRegistered replica (%d) found to be missing." %
                         fileID)
            return self.__updateCompletedFiles('PFNNotRegistered', fileID)
        elif not res['OK']:
            return self.__returnProblematicError(fileID, res)
        storageMetadata = res['Value']
        if storageMetadata['Lost']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found to be Lost. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNLost')
        if storageMetadata['Unavailable']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found to be Unavailable. Updating retry count"
                % fileID)
            return self.incrementProblematicRetry(fileID)

        # HACK until we can obtain the space token descriptions through GFAL
        site = seName.split('_')[0].split('-')[0]
        if not storageMetadata['Cached']:
            if lfn.endswith('.raw'):
                seName = '%s-RAW' % site
            else:
                seName = '%s-RDST' % site
        elif storageMetadata['Migrated']:
            if lfn.startswith('/lhcb/data'):
                seName = '%s_M-DST' % site
            else:
                seName = '%s_MC_M-DST' % site
        else:
            if lfn.startswith('/lhcb/data'):
                seName = '%s-DST' % site
            else:
                seName = '%s_MC-DST' % site

        problematicDict['SE'] = seName
        res = returnSingleResult(se.getURL(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)

        problematicDict['PFN'] = res['Value']

        res = returnSingleResult(self.fc.addReplica({lfn: problematicDict}))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        res = returnSingleResult(self.fc.getFileMetadata(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']['Size'] != storageMetadata['Size']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found with catalog size mismatch. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID,
                                                   'CatalogPFNSizeMismatch')
        return self.__updateCompletedFiles('PFNNotRegistered', fileID)

    def resolveLFNCatalogMissing(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the LFNCatalogMissing prognosis
    """
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']:
            return self.__updateCompletedFiles('LFNCatalogMissing', fileID)
        # Remove the file from all catalogs
        # RF_NOTE : here I can do it because it's a single file, but otherwise I would need to sort the path
        res = returnSingleResult(self.fc.removeFile(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        return self.__updateCompletedFiles('LFNCatalogMissing', fileID)

    def resolvePFNMissing(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNMissing prognosis
    """
        se = problematicDict['SE']
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if not res['Value']:
            gLogger.info("PFNMissing file (%d) no longer exists in catalog" %
                         fileID)
            return self.__updateCompletedFiles('PFNMissing', fileID)

        res = returnSingleResult(StorageElement(se).exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']:
            gLogger.info("PFNMissing replica (%d) is no longer missing" %
                         fileID)
            return self.__updateReplicaToChecked(problematicDict)
        gLogger.info("PFNMissing replica (%d) does not exist" % fileID)
        res = returnSingleResult(self.fc.getReplicas(lfn, allStatus=True))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        replicas = res['Value']
        seSite = se.split('_')[0].split('-')[0]
        found = False
        print replicas
        for replicaSE in replicas.keys():
            if re.search(seSite, replicaSE):
                found = True
                problematicDict['SE'] = replicaSE
                se = replicaSE
        if not found:
            gLogger.info(
                "PFNMissing replica (%d) is no longer registered at SE. Resolved."
                % fileID)
            return self.__updateCompletedFiles('PFNMissing', fileID)
        gLogger.info(
            "PFNMissing replica (%d) does not exist. Removing from catalog..."
            % fileID)
        res = returnSingleResult(self.fc.removeReplica({lfn: problematicDict}))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if len(replicas) == 1:
            gLogger.info(
                "PFNMissing replica (%d) had a single replica. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'LFNZeroReplicas')
        res = self.dm.replicateAndRegister(problematicDict['LFN'], se)
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        # If we get here the problem is solved so we can update the integrityDB
        return self.__updateCompletedFiles('PFNMissing', fileID)

    def resolvePFNUnavailable(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNUnavailable prognosis
    """
        lfn = problematicDict['LFN']
        se = problematicDict['SE']
        fileID = problematicDict['FileID']

        res = returnSingleResult(StorageElement(se).getFileMetadata(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            # The file is no longer Unavailable but has now dissapeared completely
            gLogger.info(
                "PFNUnavailable replica (%d) found to be missing. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')
        if (not res['OK']) or res['Value']['Unavailable']:
            gLogger.info(
                "PFNUnavailable replica (%d) found to still be Unavailable" %
                fileID)
            return self.incrementProblematicRetry(fileID)
        if res['Value']['Lost']:
            gLogger.info(
                "PFNUnavailable replica (%d) is now found to be Lost. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNLost')
        gLogger.info("PFNUnavailable replica (%d) is no longer Unavailable" %
                     fileID)
        # Need to make the replica okay in the Catalog
        return self.__updateReplicaToChecked(problematicDict)

    def resolvePFNZeroSize(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolves the PFNZeroSize prognosis
    """
        lfn = problematicDict['LFN']
        seName = problematicDict['SE']
        fileID = problematicDict['FileID']

        se = StorageElement(seName)

        res = returnSingleResult(se.getFileSize(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            gLogger.info(
                "PFNZeroSize replica (%d) found to be missing. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')
        storageSize = res['Value']
        if storageSize == 0:
            res = returnSingleResult(se.removeFile(lfn))

            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            gLogger.info(
                "PFNZeroSize replica (%d) removed. Updating prognosis" %
                problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')

        res = returnSingleResult(self.fc.getReplicas(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if seName not in res['Value']:
            gLogger.info(
                "PFNZeroSize replica (%d) not registered in catalog. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNNotRegistered')
        res = returnSingleResult(self.fc.getFileMetadata(lfn))

        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        catalogSize = res['Value']['Size']
        if catalogSize != storageSize:
            gLogger.info(
                "PFNZeroSize replica (%d) size found to differ from registered metadata. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID,
                                                   'CatalogPFNSizeMismatch')
        return self.__updateCompletedFiles('PFNZeroSize', fileID)

    ############################################################################################

    def resolveLFNZeroReplicas(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolves the LFNZeroReplicas prognosis
    """
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.getReplicas(lfn, allStatus=True))
        if res['OK'] and res['Value']:
            gLogger.info("LFNZeroReplicas file (%d) found to have replicas" %
                         fileID)
        else:
            gLogger.info(
                "LFNZeroReplicas file (%d) does not have replicas. Checking storage..."
                % fileID)
            pfnsFound = False
            for storageElementName in sorted(
                    gConfig.getValue(
                        'Resources/StorageElementGroups/Tier1_MC_M-DST', [])):
                res = self.__getStoragePathExists([lfn], storageElementName)
                if lfn in res['Value']:
                    gLogger.info(
                        "LFNZeroReplicas file (%d) found storage file at %s" %
                        (fileID, storageElementName))
                    self.__reportProblematicReplicas(
                        [(lfn, 'deprecatedUrl', storageElementName,
                          'PFNNotRegistered')], storageElementName,
                        'PFNNotRegistered')
                    pfnsFound = True
            if not pfnsFound:
                gLogger.info(
                    "LFNZeroReplicas file (%d) did not have storage files. Removing..."
                    % fileID)
                res = returnSingleResult(self.fc.removeFile(lfn))
                if not res['OK']:
                    gLogger.error('DataIntegrityClient: failed to remove file',
                                  res['Message'])
                    # Increment the number of retries for this file
                    self.server.incrementProblematicRetry(fileID)
                    return res
                gLogger.info("LFNZeroReplicas file (%d) removed from catalog" %
                             fileID)
        # If we get here the problem is solved so we can update the integrityDB
        return self.__updateCompletedFiles('LFNZeroReplicas', fileID)
Ejemplo n.º 17
0
class DataIntegrityClient( Client ):

  """
  The following methods are supported in the service but are not mentioned explicitly here:

          getProblematic()
             Obtains a problematic file from the IntegrityDB based on the LastUpdate time

          getPrognosisProblematics(prognosis)
            Obtains all the problematics of a particular prognosis from the integrityDB

          getProblematicsSummary()
            Obtains a count of the number of problematics for each prognosis found

          getDistinctPrognosis()
            Obtains the distinct prognosis found in the integrityDB

          getTransformationProblematics(prodID)
            Obtains the problematics for a given production

          incrementProblematicRetry(fileID)
            Increments the retry count for the supplied file ID

          changeProblematicPrognosis(fileID,newPrognosis)
            Changes the prognosis of the supplied file to the new prognosis

          setProblematicStatus(fileID,status)
            Updates the status of a problematic in the integrityDB

          removeProblematic(self,fileID)
            This removes the specified file ID from the integrity DB

          insertProblematic(sourceComponent,fileMetadata)
            Inserts file with supplied metadata into the integrity DB

  """

  def __init__( self, **kwargs ):

    super(DataIntegrityClient, self).__init__( **kwargs )
    self.setServer( 'DataManagement/DataIntegrity' )
    self.dm = DataManager()
    self.fc = FileCatalog()

  def setFileProblematic( self, lfn, reason, sourceComponent = '' ):
    """ This method updates the status of the file in the FileCatalog and the IntegrityDB

        lfn - the lfn of the file
        reason - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
    if isinstance( lfn, list ):
      lfns = lfn
    elif isinstance( lfn, basestring ):
      lfns = [lfn]
    else:
      errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN."
      gLogger.error( errStr )
      return S_ERROR( errStr )
    gLogger.info( "DataIntegrityClient.setFileProblematic: Attempting to update %s files." % len( lfns ) )
    fileMetadata = {}
    for lfn in lfns:
      fileMetadata[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':'', 'SE':''}
    res = self.insertProblematic( sourceComponent, fileMetadata )
    if not res['OK']:
      gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB" )
    return res

  def reportProblematicReplicas( self, replicaTuple, se, reason ):
    """ Simple wrapper function around setReplicaProblematic """
    gLogger.info( 'The following %s files had %s at %s' % ( len( replicaTuple ), reason, se ) )
    for lfn, _pfn, se, reason in sorted( replicaTuple ):
      if lfn:
        gLogger.info( lfn )
    res = self.setReplicaProblematic( replicaTuple, sourceComponent = 'DataIntegrityClient' )
    if not res['OK']:
      gLogger.info( 'Failed to update integrity DB with replicas', res['Message'] )
    else:
      gLogger.info( 'Successfully updated integrity DB with replicas' )

  def setReplicaProblematic( self, replicaTuple, sourceComponent = '' ):
    """ This method updates the status of the replica in the FileCatalog and the IntegrityDB
        The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis}

        lfn - the lfn of the file
        pfn - the pfn if available (otherwise '')
        se - the storage element of the problematic replica (otherwise '')
        prognosis - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
    if isinstance( replicaTuple, tuple ):
      replicaTuple = [replicaTuple]
    elif isinstance( replicaTuple, list ):
      pass
    else:
      errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples."
      gLogger.error( errStr )
      return S_ERROR( errStr )
    gLogger.info( "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas." % len( replicaTuple ) )
    replicaDict = {}
    for lfn, pfn, se, reason in replicaTuple:
      replicaDict[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':pfn, 'SE':se}
    res = self.insertProblematic( sourceComponent, replicaDict )
    if not res['OK']:
      gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB" )
      return res
    for lfn in replicaDict.keys():
      replicaDict[lfn]['Status'] = 'Problematic'

    res = self.fc.setReplicaStatus( replicaDict )
    if not res['OK']:
      errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas."
      gLogger.error( errStr, res['Message'] )
      return res
    failed = res['Value']['Failed']
    successful = res['Value']['Successful']
    resDict = {'Successful':successful, 'Failed':failed}
    return S_OK( resDict )

  ##########################################################################
  #
  # This section contains the resolution methods for various prognoses
  #

  def __updateCompletedFiles( self, prognosis, fileID ):
    gLogger.info( "%s file (%d) is resolved" % ( prognosis, fileID ) )
    return self.setProblematicStatus( fileID, 'Resolved' )

  def __returnProblematicError( self, fileID, res ):
    self.incrementProblematicRetry( fileID )
    gLogger.error( 'DataIntegrityClient failure', res['Message'] )
    return res

  def __updateReplicaToChecked( self, problematicDict ):
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']
    prognosis = problematicDict['Prognosis']
    problematicDict['Status'] = 'Checked'

    res = returnSingleResult( self.fc.setReplicaStatus( {lfn:problematicDict} ) )

    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    gLogger.info( "%s replica (%d) is updated to Checked status" % ( prognosis, fileID ) )
    return self.__updateCompletedFiles( prognosis, fileID )

  def resolveCatalogPFNSizeMismatch( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis
    """
    lfn = problematicDict['LFN']
    se = problematicDict['SE']
    fileID = problematicDict['FileID']


    res = returnSingleResult( self.fc.getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    catalogSize = res['Value']
    res = returnSingleResult( StorageElement( se ).getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    storageSize = res['Value']
    bkKCatalog = FileCatalog( ['BookkeepingDB'] )
    res = returnSingleResult( bkKCatalog.getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    bookkeepingSize = res['Value']
    if bookkeepingSize == catalogSize == storageSize:
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) matched all registered sizes." % fileID )
      return self.__updateReplicaToChecked( problematicDict )
    if catalogSize == bookkeepingSize:
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also" % fileID )
      res = returnSingleResult( self.fc.getReplicas( lfn ) )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      if len( res['Value'] ) <= 1:
        gLogger.info( "CatalogPFNSizeMismatch replica (%d) has no other replicas." % fileID )
        return S_ERROR( "Not removing catalog file mismatch since the only replica" )
      else:
        gLogger.info( "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..." % fileID )
        res = self.dm.removeReplica( se, lfn )
        if not res['OK']:
          return self.__returnProblematicError( fileID, res )
        return self.__updateCompletedFiles( 'CatalogPFNSizeMismatch', fileID )
    if ( catalogSize != bookkeepingSize ) and ( bookkeepingSize == storageSize ):
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size" % fileID )
      res = self.__updateReplicaToChecked( problematicDict )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      return self.changeProblematicPrognosis( fileID, 'BKCatalogSizeMismatch' )
    gLogger.info( "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count" % fileID )
    return self.incrementProblematicRetry( fileID )

  #FIXME: Unused?
  def resolvePFNNotRegistered( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNNotRegistered prognosis
    """
    lfn = problematicDict['LFN']
    seName = problematicDict['SE']
    fileID = problematicDict['FileID']

    se = StorageElement( seName )
    res = returnSingleResult( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if not res['Value']:
      # The file does not exist in the catalog
      res = returnSingleResult( se.removeFile( lfn ) )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )
    res = returnSingleResult( se.getFileMetadata( lfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      gLogger.info( "PFNNotRegistered replica (%d) found to be missing." % fileID )
      return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )
    elif not res['OK']:
      return self.__returnProblematicError( fileID, res )
    storageMetadata = res['Value']
    if storageMetadata['Lost']:
      gLogger.info( "PFNNotRegistered replica (%d) found to be Lost. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNLost' )
    if storageMetadata['Unavailable']:
      gLogger.info( "PFNNotRegistered replica (%d) found to be Unavailable. Updating retry count" % fileID )
      return self.incrementProblematicRetry( fileID )

    # HACK until we can obtain the space token descriptions through GFAL
    site = seName.split( '_' )[0].split( '-' )[0]
    if not storageMetadata['Cached']:
      if lfn.endswith( '.raw' ):
        seName = '%s-RAW' % site
      else:
        seName = '%s-RDST' % site
    elif storageMetadata['Migrated']:
      if lfn.startswith( '/lhcb/data' ):
        seName = '%s_M-DST' % site
      else:
        seName = '%s_MC_M-DST' % site
    else:
      if lfn.startswith( '/lhcb/data' ):
        seName = '%s-DST' % site
      else:
        seName = '%s_MC-DST' % site

    problematicDict['SE'] = seName
    res = returnSingleResult( se.getURL( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )

    problematicDict['PFN'] = res['Value']

    res = returnSingleResult( self.fc.addReplica( {lfn:problematicDict} ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    res = returnSingleResult( self.fc.getFileMetadata( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']['Size'] != storageMetadata['Size']:
      gLogger.info( "PFNNotRegistered replica (%d) found with catalog size mismatch. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'CatalogPFNSizeMismatch' )
    return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )

  #FIXME: Unused?
  def resolveLFNCatalogMissing( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the LFNCatalogMissing prognosis
    """
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = returnSingleResult( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']:
      return self.__updateCompletedFiles( 'LFNCatalogMissing', fileID )
    # Remove the file from all catalogs
    # RF_NOTE : here I can do it because it's a single file, but otherwise I would need to sort the path
    res = returnSingleResult( self.fc.removeFile( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    return self.__updateCompletedFiles( 'LFNCatalogMissing', fileID )

  #FIXME: Unused?
  def resolvePFNMissing( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNMissing prognosis
    """
    se = problematicDict['SE']
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = returnSingleResult( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if not res['Value']:
      gLogger.info( "PFNMissing file (%d) no longer exists in catalog" % fileID )
      return self.__updateCompletedFiles( 'PFNMissing', fileID )

    res = returnSingleResult( StorageElement( se ).exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']:
      gLogger.info( "PFNMissing replica (%d) is no longer missing" % fileID )
      return self.__updateReplicaToChecked( problematicDict )
    gLogger.info( "PFNMissing replica (%d) does not exist" % fileID )
    res = returnSingleResult( self.fc.getReplicas( lfn, allStatus = True ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    replicas = res['Value']
    seSite = se.split( '_' )[0].split( '-' )[0]
    found = False
    print replicas
    for replicaSE in replicas.keys():
      if re.search( seSite, replicaSE ):
        found = True
        problematicDict['SE'] = replicaSE
        se = replicaSE
    if not found:
      gLogger.info( "PFNMissing replica (%d) is no longer registered at SE. Resolved." % fileID )
      return self.__updateCompletedFiles( 'PFNMissing', fileID )
    gLogger.info( "PFNMissing replica (%d) does not exist. Removing from catalog..." % fileID )
    res = returnSingleResult( self.fc.removeReplica( {lfn:problematicDict} ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if len( replicas ) == 1:
      gLogger.info( "PFNMissing replica (%d) had a single replica. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'LFNZeroReplicas' )
    res = self.dm.replicateAndRegister( problematicDict['LFN'], se )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    # If we get here the problem is solved so we can update the integrityDB
    return self.__updateCompletedFiles( 'PFNMissing', fileID )

  #FIXME: Unused?
  def resolvePFNUnavailable( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNUnavailable prognosis
    """
    lfn = problematicDict['LFN']
    se = problematicDict['SE']
    fileID = problematicDict['FileID']

    res = returnSingleResult( StorageElement( se ).getFileMetadata( lfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      # The file is no longer Unavailable but has now dissapeared completely
      gLogger.info( "PFNUnavailable replica (%d) found to be missing. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )
    if ( not res['OK'] ) or res['Value']['Unavailable']:
      gLogger.info( "PFNUnavailable replica (%d) found to still be Unavailable" % fileID )
      return self.incrementProblematicRetry( fileID )
    if res['Value']['Lost']:
      gLogger.info( "PFNUnavailable replica (%d) is now found to be Lost. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNLost' )
    gLogger.info( "PFNUnavailable replica (%d) is no longer Unavailable" % fileID )
    # Need to make the replica okay in the Catalog
    return self.__updateReplicaToChecked( problematicDict )

  #FIXME: Unused?
  def resolvePFNZeroSize( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolves the PFNZeroSize prognosis
    """
    lfn = problematicDict['LFN']
    seName = problematicDict['SE']
    fileID = problematicDict['FileID']

    se = StorageElement( seName )

    res = returnSingleResult( se.getFileSize( lfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      gLogger.info( "PFNZeroSize replica (%d) found to be missing. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )
    storageSize = res['Value']
    if storageSize == 0:
      res = returnSingleResult( se.removeFile( lfn ) )

      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      gLogger.info( "PFNZeroSize replica (%d) removed. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )


    res = returnSingleResult( self.fc.getReplicas( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if seName not in res['Value']:
      gLogger.info( "PFNZeroSize replica (%d) not registered in catalog. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNNotRegistered' )
    res = returnSingleResult( self.fc.getFileMetadata( lfn ) )

    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    catalogSize = res['Value']['Size']
    if catalogSize != storageSize:
      gLogger.info( "PFNZeroSize replica (%d) size found to differ from registered metadata. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'CatalogPFNSizeMismatch' )
    return self.__updateCompletedFiles( 'PFNZeroSize', fileID )

  ############################################################################################

  #FIXME: Unused?
  def resolveLFNZeroReplicas( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolves the LFNZeroReplicas prognosis
    """
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = returnSingleResult( self.fc.getReplicas( lfn, allStatus = True ) )
    if res['OK'] and res['Value']:
      gLogger.info( "LFNZeroReplicas file (%d) found to have replicas" % fileID )
    else:
      gLogger.info( "LFNZeroReplicas file (%d) does not have replicas. Checking storage..." % fileID )
      pfnsFound = False
      for storageElementName in sorted( gConfig.getValue( 'Resources/StorageElementGroups/Tier1_MC_M-DST', [] ) ):
        res = self.__getStoragePathExists( [lfn], storageElementName )
        if lfn in res['Value']:
          gLogger.info( "LFNZeroReplicas file (%d) found storage file at %s" % ( fileID, storageElementName ) )
          self.reportProblematicReplicas( [( lfn, 'deprecatedUrl', storageElementName, 'PFNNotRegistered' )], storageElementName, 'PFNNotRegistered' )
          pfnsFound = True
      if not pfnsFound:
        gLogger.info( "LFNZeroReplicas file (%d) did not have storage files. Removing..." % fileID )
        res = returnSingleResult( self.fc.removeFile( lfn ) )
        if not res['OK']:
          gLogger.error( 'DataIntegrityClient: failed to remove file', res['Message'] )
          # Increment the number of retries for this file
          self.server.incrementProblematicRetry( fileID )
          return res
        gLogger.info( "LFNZeroReplicas file (%d) removed from catalog" % fileID )
    # If we get here the problem is solved so we can update the integrityDB
    return self.__updateCompletedFiles( 'LFNZeroReplicas', fileID )


  def _reportProblematicFiles( self, lfns, reason ):
    """ Simple wrapper function around setFileProblematic
    """
    gLogger.info( 'The following %s files were found with %s' % ( len( lfns ), reason ) )
    for lfn in sorted( lfns ):
      gLogger.info( lfn )
    res = self.setFileProblematic( lfns, reason, sourceComponent = 'DataIntegrityClient' )
    if not res['OK']:
      gLogger.info( 'Failed to update integrity DB with files', res['Message'] )
    else:
      gLogger.info( 'Successfully updated integrity DB with files' )