Ejemplo n.º 1
0
  def initialize( self ):
    """ agent's initalisation

    :param self: self reference
    """
    # # save tarsnferDB handler
    self.transferDB = TransferDB()
    # # read config options
    self.maxJobsPerChannel = self.am_getOption( 'MaxJobsPerChannel', self.maxJobsPerChannel )
    self.log.info( "max jobs/channel = %s" % self.maxJobsPerChannel )

    # # checksum test
    self.cksmTest = bool( self.am_getOption( "ChecksumTest", False ) )
    # # ckecksum type
    if self.cksmTest:
      self.cksmType = str( self.am_getOption( "ChecksumType", self.__defaultCksmType ) ).upper()
      if self.cksmType and self.cksmType not in ( "ADLER32", "MD5", "SHA1" ):
        self.log.warn( "unknown checksum type: %s, will use default %s" % ( self.cksmType, self.__defaultCksmType ) )
        self.cksmType = self.__defaultCksmType

    self.log.info( "checksum test is %s" % ( { True : "enabled using %s checksum" % self.cksmType,
                                               False : "disabled"}[self.cksmTest] ) )


    # This sets the Default Proxy to used as that defined under
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    self.filesBeingStaged = {}
    return S_OK()
Ejemplo n.º 2
0
    def initialize(self):
        self.TransferDB = TransferDB()

        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption('shifterProxy', 'DataManager')

        return S_OK()
Ejemplo n.º 3
0
 def initialize(self):
     """ agent's initialisation """
     self.transferDB = TransferDB()
     self.am_setOption("shifterProxy", "DataManager")
     self.minThreads = self.am_getOption("MinThreads", self.minThreads)
     self.maxThreads = self.am_getOption("MaxThreads", self.maxThreads)
     minmax = (abs(self.minThreads), abs(self.maxThreads))
     self.minThreads, self.maxThreads = min(minmax), max(minmax)
     self.log.info("ThreadPool min threads = %s" % self.minThreads)
     self.log.info("ThreadPool max threads = %s" % self.maxThreads)
     self.threadPool = ThreadPool(self.minThreads, self.maxThreads)
     self.threadPool.daemonize()
     return S_OK()
Ejemplo n.º 4
0
    def initialize(self):

        self.section = PathFinder.getAgentSection(AGENT_NAME)
        self.RequestDB = RequestDBMySQL()
        self.TransferDB = TransferDB()
        self.DataLog = DataLoggingClient()
        self.factory = StorageFactory()
        self.rm = ReplicaManager()

        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption('shifterProxy', 'DataManager')

        return S_OK()
Ejemplo n.º 5
0
 def transferDB(cls):
     """ TransferDB getter
 :param cls: class reference
 """
     if not cls.__transferDB:
         cls.__transferDB = TransferDB()
     return cls.__transferDB
Ejemplo n.º 6
0
  def initialize( self ):
    """ agent's initalisation

    :param self: self reference
    """
    ## save tarsnferDB handler
    self.transferDB = TransferDB()
    ## read config options
    self.maxJobsPerChannel = self.am_getOption( 'MaxJobsPerChannel', 2 )
    self.log.info("max jobs/channel = %s" % self.maxJobsPerChannel )

    ## checksum test
    self.cksmTest = bool( self.am_getOption( "ChecksumTest", False ) )  
    ## ckecksum type
    if self.cksmTest:
      self.cksmType = str( self.am_getOption( "ChecksumType", self.__defaultCksmType ) ).upper()
      if self.cksmType and self.cksmType not in ( "ADLER32", "MD5", "SHA1" ):
        self.log.warn("unknown checksum type: %s, will use default %s" % ( self.cksmType, self.__defaultCksmType ) )
        self.cksmType = self.__defaultCksmType                      


    self.log.info( "checksum test is %s" % ( { True : "enabled using %s checksum" % self.cksmType,
                                               False : "disabled"}[self.cksmTest] ) )
      
    
    # This sets the Default Proxy to used as that defined under 
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )
    return S_OK()
Ejemplo n.º 7
0
    def transferDB(self):
        """ TransferDB facade 

    :param self: self reference
    """
        if not self.__transferDB:
            self.__transferDB = TransferDB()
        return self.__transferDB
Ejemplo n.º 8
0
def initializeTransferDBMonitoringHandler(serviceInfo):
    """ handler initialization

  :param tuple serviceInfo: service info
  """
    global gTransferDB
    gTransferDB = TransferDB()
    return S_OK()
Ejemplo n.º 9
0
 def initialize( self ):
   """ agent's initialisation """
   self.transferDB = TransferDB()
   # This sets the Default Proxy to used as that defined under 
   # /Operations/Shifter/DataManager
   # the shifterProxy option in the Configuration can be used to change this default.
   self.am_setOption( 'shifterProxy', 'DataManager' )
   return S_OK()
Ejemplo n.º 10
0
  def initialize( self ):

    self.TransferDB = TransferDB()
    self.maxJobsPerChannel = self.am_getOption( 'MaxJobsPerChannel', 2 )

    # This sets the Default Proxy to used as that defined under 
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )
    return S_OK()
Ejemplo n.º 11
0
 def initialize( self ):
   """ agent's initialisation """
   self.transferDB = TransferDB()
   self.am_setOption( "shifterProxy", "DataManager" )
   self.minThreads = self.am_getOption( "MinThreads", self.minThreads )
   self.maxThreads = self.am_getOption( "MaxThreads", self.maxThreads )
   minmax = ( abs( self.minThreads ), abs( self.maxThreads ) )
   self.minThreads, self.maxThreads = min( minmax ), max( minmax )
   self.log.info( "ThreadPool min threads = %s" % self.minThreads )
   self.log.info( "ThreadPool max threads = %s" % self.maxThreads )
   self.threadPool = ThreadPool( self.minThreads, self.maxThreads )
   self.threadPool.daemonize()
   return S_OK()
Ejemplo n.º 12
0
  def initialize( self ):

    self.section = PathFinder.getAgentSection( AGENT_NAME )
    self.RequestDB = RequestDBMySQL()
    self.TransferDB = TransferDB()
    self.DataLog = DataLoggingClient()
    self.factory = StorageFactory()
    self.rm = ReplicaManager()

    # This sets the Default Proxy to used as that defined under
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    return S_OK()
def initializeTransferDBMonitoringHandler( serviceInfo ):
  """ handler initialization

  :param tuple serviceInfo: service info
  """

  global transferDB
  transferDB = TransferDB()

  monitoringSection = PathFinder.getServiceSection( "DataManagement/TransferDBMonitoring" )
  #Get data location
  retDict = gConfig.getOption( "%s/DataLocation" % monitoringSection )
  if not retDict["OK"]:
    return retDict
  dataPath = retDict["Value"].strip()
  if "/" != dataPath[0]:
    dataPath = os.path.realpath( "%s/%s" % ( gConfig.getValue( '/LocalSite/InstancePath', rootPath ), dataPath ) )
  gLogger.info( "Data will be written into %s path" % dataPath )

  ## check data path 
  try:
    ## exists??   
    if os.path.exists( dataPath ):
      ## and it's a dir??
      if os.path.isdir( dataPath):
        ## and writable??
        if os.access( dataPath, os.W_OK ):
          return S_OK()
        else:
          return S_ERROR( "Data path %s exists, but it is not writable!" % dataPath )
      else:
        return S_ERROR( "Data path %s exists, but points to a file!" % dataPath  )
    else:
      ## create 
      os.makedirs( dataPath )

  except ( OSError, IOError ) , anError:
    return S_ERROR( str(anError) )
Ejemplo n.º 14
0
class FTSMonitorAgent( AgentModule ):

  def initialize( self ):
    self.TransferDB = TransferDB()

    # This sets the Default Proxy to used as that defined under 
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    return S_OK()

  def execute( self ):

    #########################################################################
    #  Get the details for all active FTS requests
    gLogger.info( 'Obtaining requests to monitor' )
    res = self.TransferDB.getFTSReq()
    if not res['OK']:
      gLogger.error( "Failed to get FTS requests", res['Message'] )
      return res
    if not res['Value']:
      gLogger.info( "FTSMonitorAgent. No FTS requests found to monitor." )
      return S_OK()
    ftsReqs = res['Value']
    gLogger.info( 'Found %s FTS jobs' % len( ftsReqs ) )

    #######################################################################
    # Check them all....
    i = 1
    for ftsReqDict in ftsReqs:
      infoStr = "\n\n##################################################################################\n\n"
      infoStr = "%sStarting monitoring loop %s of %s\n\n" % ( infoStr, i, len( ftsReqs ) )
      gLogger.info( infoStr )
      res = self.monitorTransfer( ftsReqDict )
      i += 1
    return S_OK()

  def monitorTransfer( self, ftsReqDict ):
    """ Monitors transfer  obtained from TransferDB
    """

    ftsReqID = ftsReqDict['FTSReqID']
    ftsGUID = ftsReqDict['FTSGuid']
    ftsServer = ftsReqDict['FTSServer']
    channelID = ftsReqDict['ChannelID']
    sourceSE = ftsReqDict['SourceSE']
    targetSE = ftsReqDict['TargetSE']

    oFTSRequest = FTSRequest()
    oFTSRequest.setFTSServer( ftsServer )
    oFTSRequest.setFTSGUID( ftsGUID )
    oFTSRequest.setSourceSE( sourceSE )
    oFTSRequest.setTargetSE( targetSE )

    #########################################################################
    # Perform summary update of the FTS Request and update FTSReq entries.
    gLogger.info( 'Perform summary update of the FTS Request' )
    infoStr = "Monitoring FTS Job:\n\n"
    infoStr = "%sglite-transfer-status -s %s -l %s\n" % ( infoStr, ftsServer, ftsGUID )
    infoStr = "%s%s%s\n" % ( infoStr, 'FTS GUID:'.ljust( 20 ), ftsGUID )
    infoStr = "%s%s%s\n\n" % ( infoStr, 'FTS Server:'.ljust( 20 ), ftsServer )
    gLogger.info( infoStr )
    res = oFTSRequest.summary()
    self.TransferDB.setFTSReqLastMonitor( ftsReqID )
    if not res['OK']:
      gLogger.error( "Failed to update the FTS request summary", res['Message'] )
      return res
    res = oFTSRequest.dumpSummary()
    if not res['OK']:
      gLogger.error( "Failed to get FTS request summary", res['Message'] )
      return res
    gLogger.info( res['Value'] )
    res = oFTSRequest.getPercentageComplete()
    if not res['OK']:
      gLogger.error( "Failed to get FTS percentage complete", res['Message'] )
      return res
    gLogger.info( 'FTS Request found to be %.1f percent complete' % res['Value'] )
    self.TransferDB.setFTSReqAttribute( ftsReqID, 'PercentageComplete', res['Value'] )
    self.TransferDB.addLoggingEvent( ftsReqID, res['Value'] )

    #########################################################################
    # Update the information in the TransferDB if the transfer is terminal.
    res = oFTSRequest.isRequestTerminal()
    if not res['OK']:
      gLogger.error( "Failed to determine whether FTS request terminal", res['Message'] )
      return res
    if not res['Value']:
      return S_OK()
    gLogger.info( 'FTS Request found to be terminal, updating file states' )

    #########################################################################
    # Get the LFNS associated to the FTS request
    gLogger.info( 'Obtaining the LFNs associated to this request' )
    res = self.TransferDB.getFTSReqLFNs( ftsReqID )
    if not res['OK']:
      gLogger.error( "Failed to obtain FTS request LFNs", res['Message'] )
      return res
    files = res['Value']
    if not files:
      gLogger.error( 'No files present for transfer' )
      return S_ERROR( 'No files were found in the DB' )
    lfns = files.keys()
    gLogger.info( 'Obtained %s files' % len( lfns ) )
    for lfn in lfns:
      oFTSRequest.setLFN( lfn )

    res = oFTSRequest.monitor()
    if not res['OK']:
      gLogger.error( "Failed to perform detailed monitoring of FTS request", res['Message'] )
      return res
    res = oFTSRequest.getFailed()
    if not res['OK']:
      gLogger.error( "Failed to obtained failed files for FTS request", res['Message'] )
      return res
    failedFiles = res['Value']
    res = oFTSRequest.getDone()
    if not res['OK']:
      gLogger.error( "Failed to obtained successful files for FTS request", res['Message'] )
      return res
    completedFiles = res['Value']

    fileToFTSUpdates = []
    completedFileIDs = []
    for lfn in completedFiles:
      fileID = files[lfn]
      completedFileIDs.append( fileID )
      transferTime = 0
      res = oFTSRequest.getTransferTime( lfn )
      if res['OK']:
        transferTime = res['Value']
      fileToFTSUpdates.append( ( fileID, 'Completed', '', 0, transferTime ) )

    filesToRetry = []
    filesToReschedule = []
    for lfn in failedFiles:
      fileID = files[lfn]
      failReason = ''
      res = oFTSRequest.getFailReason( lfn )
      if res['OK']:
        failReason = res['Value']
      if self.missingSource( failReason ):
        gLogger.error( 'The source SURL does not exist.', '%s %s' % ( lfn, oFTSRequest.getSourceSURL( lfn ) ) )
        filesToReschedule.append( fileID )
      else:
        filesToRetry.append( fileID )
      gLogger.error( 'Failed to replicate file on channel.', "%s %s" % ( channelID, failReason ) )
      fileToFTSUpdates.append( ( fileID, 'Failed', failReason, 0, 0 ) )

    allUpdated = True
    if filesToRetry:
      gLogger.info( 'Updating the Channel table for files to retry' )
      res = self.TransferDB.resetFileChannelStatus( channelID, filesToRetry )
      if not res['OK']:
        gLogger.error( 'Failed to update the Channel table for file to retry.', res['Message'] )
        allUpdated = False
    for fileID in filesToReschedule:
      gLogger.info( 'Updating the Channel table for files to reschedule' )
      res = self.TransferDB.setFileChannelStatus( channelID, fileID, 'Failed' )
      if not res['OK']:
        gLogger.error( 'Failed to update Channel table for failed files.', res['Message'] )
        allUpdated = False

    if completedFileIDs:
      gLogger.info( 'Updating the Channel table for successful files' )
      res = self.TransferDB.updateCompletedChannelStatus( channelID, completedFileIDs )
      if not res['OK']:
        gLogger.error( 'Failed to update the Channel table for successful files.', res['Message'] )
        allUpdated = False
      gLogger.info( 'Updating the Channel table for ancestors of successful files' )
      res = self.TransferDB.updateAncestorChannelStatus( channelID, completedFileIDs )
      if not res['OK']:
        gLogger.error( 'Failed to update the Channel table for ancestors of successful files.', res['Message'] )
        allUpdated = False
      
      #
      #  cibak: temporary switch off of regitstration in FileToCat
      #
      #gLogger.info( 'Updating the FileToCat table for successful files' )
      #res = self.TransferDB.setRegistrationWaiting( channelID, completedFileIDs )
      #if not res['OK']:
      #  gLogger.error( 'Failed to update the FileToCat table for successful files.', res['Message'] )
      #  allUpdated = False

    if fileToFTSUpdates:
      gLogger.info( 'Updating the FileToFTS table for files' )
      res = self.TransferDB.setFileToFTSFileAttributes( ftsReqID, channelID, fileToFTSUpdates )
      if not res['OK']:
        gLogger.error( 'Failed to update the FileToFTS table for files.', res['Message'] )
        allUpdated = False

    if allUpdated:
      res = oFTSRequest.finalize()
      if not res['OK']:
        gLogger.error( "Failed to perform the finalization for the FTS request", res['Message'] )
        return res

      gLogger.info( 'Adding logging event for FTS request' )
      # Now set the FTSReq status to terminal so that it is not monitored again
      res = self.TransferDB.addLoggingEvent( ftsReqID, 'Finished' )
      if not res['OK']:
        gLogger.error( 'Failed to add logging event for FTS Request', res['Message'] )

      gLogger.info( 'Updating FTS request status' )
      res = self.TransferDB.setFTSReqStatus( ftsReqID, 'Finished' )
      if not res['OK']:
        gLogger.error( 'Failed update FTS Request status', res['Message'] )
    return S_OK()

  def missingSource( self, failReason ):
    missingSourceErrors = ['SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] Failed',
                           'SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] No such file or directory',
                           'SOURCE error during PREPARATION phase: \[INVALID_PATH\] Failed',
                           'SOURCE error during PREPARATION phase: \[INVALID_PATH\] The requested file either does not exist',
                           'TRANSFER error during TRANSFER phase: \[INVALID_PATH\] the server sent an error response: 500 500 Command failed. : open error: No such file or directory',
                           'SOURCE error during TRANSFER_PREPARATION phase: \[USER_ERROR\] source file doesnt exist']
    for error in missingSourceErrors:
      if re.search( error, failReason ):
        return 1
    return 0
Ejemplo n.º 15
0
class FTSMonitorAgent( AgentModule ):
  """
  .. class:: FTSMonitorAgent

  """
  ## transfer DB handle
  transferDB = None

  def initialize( self ):
    """ agent's initialisation """
    self.transferDB = TransferDB()
    # This sets the Default Proxy to used as that defined under 
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )
    return S_OK()

  def execute( self ):
    """ execution in one cycle """

    #########################################################################
    #  Get the details for all active FTS requests
    self.log.info( 'Obtaining requests to monitor' )
    res = self.transferDB.getFTSReq()
    if not res['OK']:
      self.log.error( "Failed to get FTS requests", res['Message'] )
      return res
    if not res['Value']:
      self.log.info( "FTSMonitorAgent. No FTS requests found to monitor." )
      return S_OK()
    ftsReqs = res['Value']
    self.log.info( 'Found %s FTS jobs' % len( ftsReqs ) )

    #######################################################################
    # Check them all....
    i = 1
    for ftsReqDict in ftsReqs:
      infoStr = "\n\n##################################################################################\n\n"
      infoStr = "%sStarting monitoring loop %s of %s\n\n" % ( infoStr, i, len( ftsReqs ) )
      self.log.info( infoStr )
      res = self.monitorTransfer( ftsReqDict )
      i += 1
    return S_OK()

  def monitorTransfer( self, ftsReqDict ):
    """ monitors transfer  obtained from TransferDB """

    ftsReqID = ftsReqDict['FTSReqID']
    ftsGUID = ftsReqDict['FTSGuid']
    ftsServer = ftsReqDict['FTSServer']
    channelID = ftsReqDict['ChannelID']
    sourceSE = ftsReqDict['SourceSE']
    targetSE = ftsReqDict['TargetSE']

    oFTSRequest = FTSRequest()
    oFTSRequest.setFTSServer( ftsServer )
    oFTSRequest.setFTSGUID( ftsGUID )
    oFTSRequest.setSourceSE( sourceSE )
    oFTSRequest.setTargetSE( targetSE )

    #########################################################################
    # Perform summary update of the FTS Request and update FTSReq entries.
    self.log.info( 'Perform summary update of the FTS Request' )
    infoStr = "Monitoring FTS Job:\n\n"
    infoStr = "%sglite-transfer-status -s %s -l %s\n" % ( infoStr, ftsServer, ftsGUID )
    infoStr = "%s%s%s\n" % ( infoStr, 'FTS GUID:'.ljust( 20 ), ftsGUID )
    infoStr = "%s%s%s\n\n" % ( infoStr, 'FTS Server:'.ljust( 20 ), ftsServer )
    self.log.info( infoStr )
    res = oFTSRequest.summary()
    self.transferDB.setFTSReqLastMonitor( ftsReqID )
    if not res['OK']:
      self.log.error( "Failed to update the FTS request summary", res['Message'] )
      if "getTransferJobSummary2: Not authorised to query request" in res["Message"]:
        self.log.error("FTS job is not existing at the FTS server anymore, will clean it up on TransferDB side")

        ## get fileIDs
        fileIDs = self.transferDB.getFTSReqFileIDs( ftsReqID )
        if not fileIDs["OK"]:
          self.log.error("Unable to retrieve FileIDs associated to %s request" % ftsReqID )
          return fileIDs
        fileIDs = fileIDs["Value"]
      
        ## update FileToFTS table, this is just a clean up, no worry if somethings goes wrong
        for fileID in fileIDs:
          fileStatus = self.transferDB.setFileToFTSFileAttribute( ftsReqID, fileID, 
                                                                  "Status", "Failed" )
          if not fileStatus["OK"]:
            self.log.error("Unable to set FileToFTS status to Failed for FileID %s: %s" % ( fileID, 
                                                                                           fileStatus["Message"] ) )
                          
          failReason = self.transferDB.setFileToFTSFileAttribute( ftsReqID, fileID, 
                                                                  "Reason", "FTS job expired on server" )
          if not failReason["OK"]:
            self.log.error("Unable to set FileToFTS reason for FileID %s: %s" % ( fileID, 
                                                                                 failReason["Message"] ) )

        ## update Channel table
        resetChannels = self.transferDB.resetFileChannelStatus( channelID, fileIDs )
        if not resetChannels["OK"]:
          self.log.error("Failed to reset Channel table for files to retry")
          return resetChannels   

        ## update FTSReq table
        self.log.info( 'Setting FTS request status to Finished' )
        ftsReqStatus = self.transferDB.setFTSReqStatus( ftsReqID, 'Finished' )
        if not ftsReqStatus['OK']:
          self.log.error( 'Failed update FTS Request status', ftsReqStatus['Message'] )
          return ftsReqStatus
        ## if we land here, everything should be OK
        return S_OK()

      return res
    res = oFTSRequest.dumpSummary()
    if not res['OK']:
      self.log.error( "Failed to get FTS request summary", res['Message'] )
      return res
    self.log.info( res['Value'] )
    res = oFTSRequest.getPercentageComplete()
    if not res['OK']:
      self.log.error( "Failed to get FTS percentage complete", res['Message'] )
      return res
    self.log.info( 'FTS Request found to be %.1f percent complete' % res['Value'] )
    self.transferDB.setFTSReqAttribute( ftsReqID, 'PercentageComplete', res['Value'] )
    self.transferDB.addLoggingEvent( ftsReqID, res['Value'] )

    #########################################################################
    # Update the information in the TransferDB if the transfer is terminal.
    res = oFTSRequest.isRequestTerminal()
    if not res['OK']:
      self.log.error( "Failed to determine whether FTS request terminal", res['Message'] )
      return res
    if not res['Value']:
      return S_OK()
    self.log.info( 'FTS Request found to be terminal, updating file states' )

    #########################################################################
    # Get the LFNS associated to the FTS request
    self.log.info( 'Obtaining the LFNs associated to this request' )
    res = self.transferDB.getFTSReqLFNs( ftsReqID, channelID, sourceSE )
    if not res['OK']:
      self.log.error( "Failed to obtain FTS request LFNs", res['Message'] )
      return res
    files = res['Value']
    if not files:
      self.log.error( 'No files present for transfer' )
      return S_ERROR( 'No files were found in the DB' )
    lfns = files.keys()
    self.log.info( 'Obtained %s files' % len( lfns ) )
    for lfn in lfns:
      oFTSRequest.setLFN( lfn )

    res = oFTSRequest.monitor()
    if not res['OK']:
      self.log.error( "Failed to perform detailed monitoring of FTS request", res['Message'] )
      return res
    res = oFTSRequest.getFailed()
    if not res['OK']:
      self.log.error( "Failed to obtained failed files for FTS request", res['Message'] )
      return res
    failedFiles = res['Value']
    res = oFTSRequest.getDone()
    if not res['OK']:
      self.log.error( "Failed to obtained successful files for FTS request", res['Message'] )
      return res
    completedFiles = res['Value']

    # An LFN can be included more than once if it was entered into more than one Request. 
    # FTS will only do the transfer once. We need to identify all FileIDs
    res = self.transferDB.getFTSReqFileIDs( ftsReqID )
    if not res['OK']:
      self.log.error( 'Failed to get FileIDs associated to FTS Request', res['Message'] )
      return res
    fileIDs = res['Value']
    res = self.transferDB.getAttributesForFilesList( fileIDs, ['LFN'] )
    if not res['OK']:
      self.log.error( 'Failed to get LFNs associated to FTS Request', res['Message'] )
      return res
    fileIDDict = res['Value']

    fileToFTSUpdates = []
    completedFileIDs = []

    filesToRetry = []
    filesToFail = []

    for fileID, fileDict in fileIDDict.items():
      lfn = fileDict['LFN']
      if lfn in completedFiles:
        completedFileIDs.append( fileID )
        transferTime = 0
        res = oFTSRequest.getTransferTime( lfn )
        if res['OK']:
          transferTime = res['Value']
        fileToFTSUpdates.append( ( fileID, 'Completed', '', 0, transferTime ) )

      if lfn in failedFiles:
        failReason = ''
        res = oFTSRequest.getFailReason( lfn )
        if res['OK']:
          failReason = res['Value']
        if self.missingSource( failReason ):
          self.log.error( 'The source SURL does not exist.', '%s %s' % ( lfn, oFTSRequest.getSourceSURL( lfn ) ) )
          filesToFail.append( fileID )
        else:
          filesToRetry.append( fileID )
        self.log.error( 'Failed to replicate file on channel.', "%s %s" % ( channelID, failReason ) )
        fileToFTSUpdates.append( ( fileID, 'Failed', failReason, 0, 0 ) )

    allUpdated = True
    if filesToRetry:
      self.log.info( 'Updating the Channel table for files to retry' )
      res = self.transferDB.resetFileChannelStatus( channelID, filesToRetry )
      if not res['OK']:
        self.log.error( 'Failed to update the Channel table for file to retry.', res['Message'] )
        allUpdated = False
    for fileID in filesToFail:
      self.log.info( 'Updating the Channel table for files to reschedule' )
      res = self.transferDB.setFileChannelStatus( channelID, fileID, 'Failed' )
      if not res['OK']:
        self.log.error( 'Failed to update Channel table for failed files.', res['Message'] )
        allUpdated = False

    if completedFileIDs:
      self.log.info( 'Updating the Channel table for successful files' )
      res = self.transferDB.updateCompletedChannelStatus( channelID, completedFileIDs )
      if not res['OK']:
        self.log.error( 'Failed to update the Channel table for successful files.', res['Message'] )
        allUpdated = False
      self.log.info( 'Updating the Channel table for ancestors of successful files' )
      res = self.transferDB.updateAncestorChannelStatus( channelID, completedFileIDs )
      if not res['OK']:
        self.log.error( 'Failed to update the Channel table for ancestors of successful files.', res['Message'] )
        allUpdated = False

    if fileToFTSUpdates:
      self.log.info( 'Updating the FileToFTS table for files' )
      res = self.transferDB.setFileToFTSFileAttributes( ftsReqID, channelID, fileToFTSUpdates )
      if not res['OK']:
        self.log.error( 'Failed to update the FileToFTS table for files.', res['Message'] )
        allUpdated = False

    if allUpdated:
      res = oFTSRequest.finalize()
      if not res['OK']:
        self.log.error( "Failed to perform the finalization for the FTS request", res['Message'] )
        return res

      self.log.info( 'Adding logging event for FTS request' )
      # Now set the FTSReq status to terminal so that it is not monitored again
      res = self.transferDB.addLoggingEvent( ftsReqID, 'Finished' )
      if not res['OK']:
        self.log.error( 'Failed to add logging event for FTS Request', res['Message'] )

      res = oFTSRequest.getFailedRegistrations()
      failedRegistrations = res['Value']
      regFailedFileIDs = []
      regDoneFileIDs = []
      regForgetFileIDs = []
      for fileID, fileDict in fileIDDict.items():
        lfn = fileDict['LFN']
        if lfn in failedRegistrations:
          self.log.info( 'Setting failed registration in FileToCat back to Waiting', lfn )
          regFailedFileIDs.append( fileID )
          # if the LFN appears more than once, FileToCat needs to be reset only once 
          del failedRegistrations[lfn]
        elif lfn in completedFiles:
          regDoneFileIDs.append( fileID )
        elif fileID in filesToFail:
          regForgetFileIDs.append( fileID )


      if regFailedFileIDs:
        res = self.transferDB.setRegistrationWaiting( channelID, regFailedFileIDs )
        if not res['OK']:
          self.log.error( 'Failed to reset entries in FileToCat', res['Message'] )
          return res

      if regDoneFileIDs:
        res = self.transferDB.setRegistrationDone( channelID, regDoneFileIDs )
        if not res['OK']:
          self.log.error( 'Failed to set entries Done in FileToCat', res['Message'] )
          return res

      if regForgetFileIDs:
        # This entries could also be set to Failed, but currently there is no method to do so.
        res = self.transferDB.setRegistrationDone( channelID, regForgetFileIDs )
        if not res['OK']:
          self.log.error( 'Failed to set entries Done in FileToCat', res['Message'] )
          return res

      self.log.info( 'Updating FTS request status' )
      res = self.transferDB.setFTSReqStatus( ftsReqID, 'Finished' )
      if not res['OK']:
        self.log.error( 'Failed update FTS Request status', res['Message'] )
    return S_OK()

  def missingSource( self, failReason ):
    """ check if message sent by FTS server is concering missing source file 
    
    :param self: self reference
    :param str failReason: message sent by FTS server
    """
    missingSourceErrors = [ 
      'SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] Failed',
      'SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] No such file or directory',
      'SOURCE error during PREPARATION phase: \[INVALID_PATH\] Failed',
      'SOURCE error during PREPARATION phase: \[INVALID_PATH\] The requested file either does not exist',
      'TRANSFER error during TRANSFER phase: \[INVALID_PATH\] the server sent an error response: 500 500 Command failed. : open error: No such file or directory',
      'SOURCE error during TRANSFER_PREPARATION phase: \[USER_ERROR\] source file doesnt exist' ]
    for error in missingSourceErrors:
      if re.search( error, failReason ):
        return 1
    return 0
Ejemplo n.º 16
0
class FTSMonitorAgent( AgentModule ):
  """
  .. class:: FTSMonitorAgent

  Monitor submitted FTS jobs.
  """
  # # transfer DB handle
  transferDB = None
  # # thread pool
  threadPool = None
  # # min threads
  minThreads = 1
  # # max threads
  maxThreads = 10

  # # missing source regexp patterns
  missingSourceErrors = [
    re.compile( r"SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] Failed" ),
    re.compile( r"SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] No such file or directory" ),
    re.compile( r"SOURCE error during PREPARATION phase: \[INVALID_PATH\] Failed" ),
    re.compile( r"SOURCE error during PREPARATION phase: \[INVALID_PATH\] The requested file either does not exist" ),
    re.compile( r"TRANSFER error during TRANSFER phase: \[INVALID_PATH\] the server sent an error response: 500 500"\
               " Command failed. : open error: No such file or directory" ),
    re.compile( r"SOURCE error during TRANSFER_PREPARATION phase: \[USER_ERROR\] source file doesnt exist" ) ]

  def initialize( self ):
    """ agent's initialisation """
    self.transferDB = TransferDB()
    self.am_setOption( "shifterProxy", "DataManager" )
    self.minThreads = self.am_getOption( "MinThreads", self.minThreads )
    self.maxThreads = self.am_getOption( "MaxThreads", self.maxThreads )
    minmax = ( abs( self.minThreads ), abs( self.maxThreads ) )
    self.minThreads, self.maxThreads = min( minmax ), max( minmax )
    self.log.info( "ThreadPool min threads = %s" % self.minThreads )
    self.log.info( "ThreadPool max threads = %s" % self.maxThreads )
    self.threadPool = ThreadPool( self.minThreads, self.maxThreads )
    self.threadPool.daemonize()
    return S_OK()

  def execute( self ):
    """ push jobs to the thread pool """
    self.log.info( "Obtaining requests to monitor" )
    res = self.transferDB.getFTSReq()
    if not res["OK"]:
      self.log.error( "Failed to get FTS requests", res['Message'] )
      return res
    if not res["Value"]:
      self.log.info( "No FTS requests found to monitor." )
      return S_OK()
    ftsReqs = res["Value"]
    self.log.info( "Found %s FTS jobs" % len( ftsReqs ) )
    i = 1
    for ftsJob in ftsReqs:
      while True:
        self.log.debug( "submitting FTS Job %s FTSReqID=%s to monitor" % ( i, ftsJob["FTSReqID"] ) )
        ret = self.threadPool.generateJobAndQueueIt( self.monitorTransfer, args = ( ftsJob, ), )
        if ret["OK"]:
          i += 1
          break
        # # sleep 1 second to proceed
        time.sleep( 1 )

    self.threadPool.processAllResults()
    return S_OK()

  def ftsJobExpired( self, ftsReqID, channelID ):
    """ clean up when FTS job had expired on the server side

    :param int ftsReqID: FTSReq.FTSReqID
    :param int channelID: FTSReq.ChannelID
    """
    log = gLogger.getSubLogger( "@%s" % str( ftsReqID ) )
    fileIDs = self.transferDB.getFTSReqFileIDs( ftsReqID )
    if not fileIDs["OK"]:
      log.error( "Unable to retrieve FileIDs associated to %s request" % ftsReqID )
      return fileIDs
    fileIDs = fileIDs["Value"]

    # # update FileToFTS table, this is just a clean up, no worry if somethings goes wrong
    for fileID in fileIDs:
      fileStatus = self.transferDB.setFileToFTSFileAttribute( ftsReqID, fileID,
                                                              "Status", "Failed" )
      if not fileStatus["OK"]:
        log.error( "Unable to set FileToFTS status to 'Failed' for FileID %s: %s" % ( fileID,
                                                                                     fileStatus["Message"] ) )

      failReason = self.transferDB.setFileToFTSFileAttribute( ftsReqID, fileID,
                                                              "Reason", "FTS job expired on server" )
      if not failReason["OK"]:
        log.error( "Unable to set FileToFTS reason for FileID %s: %s" % ( fileID,
                                                                         failReason["Message"] ) )
    # # update Channel table
    resetChannels = self.transferDB.resetFileChannelStatus( channelID, fileIDs )
    if not resetChannels["OK"]:
      log.error( "Failed to reset Channel table for files to retry" )
      return resetChannels

    # # update FTSReq table
    log.info( "Setting FTS request status to 'Finished'" )
    ftsReqStatus = self.transferDB.setFTSReqStatus( ftsReqID, "Finished" )
    if not ftsReqStatus["OK"]:
      log.error( "Failed update FTS Request status", ftsReqStatus["Message"] )
      return ftsReqStatus

    # # if we land here, everything should be OK
    return S_OK()

  def monitorTransfer( self, ftsReqDict ):
    """ monitors transfer obtained from TransferDB

    :param dict ftsReqDict: FTS job dictionary
    """
    ftsReqID = ftsReqDict.get( "FTSReqID" )
    ftsGUID = ftsReqDict.get( "FTSGuid" )
    ftsServer = ftsReqDict.get( "FTSServer" )
    channelID = ftsReqDict.get( "ChannelID" )
    sourceSE = ftsReqDict.get( "SourceSE" )
    targetSE = ftsReqDict.get( "TargetSE" )

    oFTSRequest = FTSRequest()
    oFTSRequest.setFTSServer( ftsServer )
    oFTSRequest.setFTSGUID( ftsGUID )
    oFTSRequest.setSourceSE( sourceSE )
    oFTSRequest.setTargetSE( targetSE )

    log = gLogger.getSubLogger( "@%s" % str( ftsReqID ) )

    #########################################################################
    # Perform summary update of the FTS Request and update FTSReq entries.
    log.info( "Perform summary update of the FTS Request" )
    infoStr = [ "glite-transfer-status -s %s -l %s" % ( ftsServer, ftsGUID ) ]
    infoStr.append( "FTS GUID:   %s" % ftsGUID )
    infoStr.append( "FTS Server: %s" % ftsServer )
    log.info( "\n".join( infoStr ) )
    res = oFTSRequest.summary()
    self.transferDB.setFTSReqLastMonitor( ftsReqID )
    if not res["OK"]:
      log.error( "Failed to update the FTS request summary", res["Message"] )
      if "getTransferJobSummary2: Not authorised to query request" in res["Message"]:
        log.error( "FTS job is not existing at the FTS server anymore, will clean it up on TransferDB side" )
        cleanUp = self.ftsJobExpired( ftsReqID, channelID )
        if not cleanUp["OK"]:
          log.error( cleanUp["Message"] )
        return cleanUp
      return res

    res = oFTSRequest.dumpSummary()
    if not res['OK']:
      log.error( "Failed to get FTS request summary", res["Message"] )
      return res
    log.info( res['Value'] )
    res = oFTSRequest.getPercentageComplete()
    if not res['OK']:
      log.error( "Failed to get FTS percentage complete", res["Message"] )
      return res
    log.info( 'FTS Request found to be %.1f percent complete' % res["Value"] )
    self.transferDB.setFTSReqAttribute( ftsReqID, "PercentageComplete", res["Value"] )
    self.transferDB.addLoggingEvent( ftsReqID, res["Value"] )

    #########################################################################
    # Update the information in the TransferDB if the transfer is terminal.
    res = oFTSRequest.isRequestTerminal()
    if not res["OK"]:
      log.error( "Failed to determine whether FTS request terminal", res["Message"] )
      return res
    if not res["Value"]:
      return S_OK()
    # # request is terminal
    return self.terminalRequest( oFTSRequest, ftsReqID, channelID, sourceSE )

  def terminalRequest( self, oFTSRequest, ftsReqID, channelID, sourceSE ):
    """ process terminal FTS job

    :param FTSRequest oFTSRequest: FTSRequest instance
    :param int ftsReqID: FTSReq.FTSReqID
    :param int channelID: FTSReq.ChannelID
    :param str sourceSE: FTSReq.SourceSE
    """
    log = gLogger.getSubLogger( "@%s" % ftsReqID )

    log.info( "FTS Request found to be terminal, updating file states" )
    #########################################################################
    # Get the LFNS associated to the FTS request
    log.info( "Obtaining the LFNs associated to this request" )
    res = self.transferDB.getFTSReqLFNs( ftsReqID, channelID, sourceSE )
    if not res["OK"]:
      log.error( "Failed to obtain FTS request LFNs", res['Message'] )
      return res
    files = res["Value"]
    if not files:
      log.error( "No files present for transfer" )
      return S_ERROR( "No files were found in the DB" )

    lfns = files.keys()
    log.debug( "Obtained %s files" % len( lfns ) )
    for lfn in lfns:
      oFTSRequest.setLFN( lfn )

    res = oFTSRequest.monitor()
    if not res["OK"]:
      log.error( "Failed to perform detailed monitoring of FTS request", res["Message"] )
      return res
    res = oFTSRequest.getFailed()
    if not res["OK"]:
      log.error( "Failed to obtained failed files for FTS request", res["Message"] )
      return res
    failedFiles = res["Value"]
    res = oFTSRequest.getDone()
    if not res["OK"]:
      log.error( "Failed to obtained successful files for FTS request", res["Message"] )
      return res
    completedFiles = res["Value"]

    # An LFN can be included more than once if it was entered into more than one Request.
    # FTS will only do the transfer once. We need to identify all FileIDs
    res = self.transferDB.getFTSReqFileIDs( ftsReqID )
    if not res["OK"]:
      log.error( "Failed to get FileIDs associated to FTS Request", res["Message"] )
      return res
    fileIDs = res["Value"]
    res = self.transferDB.getAttributesForFilesList( fileIDs, ["LFN"] )
    if not res["OK"]:
      log.error( "Failed to get LFNs associated to FTS Request", res["Message"] )
      return res
    fileIDDict = res["Value"]

    fileToFTSUpdates = []
    completedFileIDs = []
    filesToRetry = []
    filesToFail = []

    for fileID, fileDict in fileIDDict.items():
      lfn = fileDict['LFN']
      if lfn in completedFiles:
        completedFileIDs.append( fileID )
        transferTime = 0
        res = oFTSRequest.getTransferTime( lfn )
        if res["OK"]:
          transferTime = res["Value"]
        fileToFTSUpdates.append( ( fileID, "Completed", "", 0, transferTime ) )

      if lfn in failedFiles:
        failReason = ""
        res = oFTSRequest.getFailReason( lfn )
        if res["OK"]:
          failReason = res["Value"]
        if "Source file/user checksum mismatch" in failReason:
          filesToFail.append( fileID )
          continue
        if self.missingSource( failReason ):
          log.error( "The source SURL does not exist.", "%s %s" % ( lfn, oFTSRequest.getSourceSURL( lfn ) ) )
          filesToFail.append( fileID )
        else:
          filesToRetry.append( fileID )
        log.error( "Failed to replicate file on channel.", "%s %s" % ( channelID, failReason ) )
        fileToFTSUpdates.append( ( fileID, "Failed", failReason, 0, 0 ) )

    # # update TransferDB.FileToFTS table
    updateFileToFTS = self.updateFileToFTS( ftsReqID, channelID,
                                            filesToRetry, filesToFail,
                                            completedFileIDs, fileToFTSUpdates )

    if updateFileToFTS["OK"] and updateFileToFTS["Value"]:
      res = oFTSRequest.finalize()
      if not res["OK"]:
        log.error( "Failed to perform the finalization for the FTS request", res["Message"] )
        return res

      log.info( 'Adding logging event for FTS request' )
      # Now set the FTSReq status to terminal so that it is not monitored again
      res = self.transferDB.addLoggingEvent( ftsReqID, 'Finished' )
      if not res['OK']:
        log.error( 'Failed to add logging event for FTS Request', res['Message'] )

      # update TransferDB.FileToCat table
      updateFileToCat = self.updateFileToCat( oFTSRequest, channelID, fileIDDict, completedFiles, filesToFail )
      if not updateFileToCat["OK"]:
        log.error( updateFileToCat["Message"] )

      log.debug( "Updating FTS request status" )
      res = self.transferDB.setFTSReqStatus( ftsReqID, 'Finished' )
      if not res['OK']:
        log.error( 'Failed update FTS Request status', res['Message'] )
    return S_OK()


  def updateFileToFTS( self, ftsReqID, channelID, filesToRetry, filesToFail, completedFileIDs, fileToFTSUpdates ):
    """ update TransferDB.FileToFTS table for finished request

    :param int ftsReqID: FTSReq.FTSReqID
    :param int channelID: FTSReq.ChannelID
    :param list filesToRetry: FileIDs to retry
    :param list filesToFail: FileIDs for failed files
    :param list completedFileIDs: files completed
    :param list fileToFTSUpdates: ???
    """
    log = gLogger.getSubLogger( "@%s" % ftsReqID )

    allUpdated = True

    res = self.transferDB.resetFileChannelStatus( channelID, filesToRetry ) if filesToRetry else S_OK()
    if not res["OK"]:
      log.error( "Failed to update the Channel table for file to retry.", res["Message"] )
      allUpdated = False

    for fileID in filesToFail:
      log.info( "Updating the Channel table for files to reschedule" )
      res = self.transferDB.setFileToReschedule( fileID )
      if not res["OK"]:
        log.error( "Failed to update Channel table for failed files.", res["Message"] )
        allUpdated = False
      elif res["Value"] == "max reschedule attempt reached":
        log.error( "setting Channel status to 'Failed' : " % res["Value"] )
        res = self.transferDB.setFileChannelStatus( channelID, fileID, 'Failed' )
        if not res["OK"]:
          log.error( "Failed to update Channel table for failed files.", res["Message"] )
          allUpdated = False

    if completedFileIDs:
      res = self.transferDB.updateCompletedChannelStatus( channelID, completedFileIDs )
      if not res["OK"]:
        log.error( "Failed to update the Channel table for successful files.", res["Message"] )
        allUpdated = False
      res = self.transferDB.updateAncestorChannelStatus( channelID, completedFileIDs )
      if not res["OK"]:
        log.error( 'Failed to update the Channel table for ancestors of successful files.', res['Message'] )
        allUpdated = False

    if fileToFTSUpdates:
      res = self.transferDB.setFileToFTSFileAttributes( ftsReqID, channelID, fileToFTSUpdates )
      if not res["OK"]:
        log.error( "Failed to update the FileToFTS table for files.", res["Message"] )
        allUpdated = False

    return S_OK( allUpdated )

  def updateFileToCat( self, oFTSRequest, channelID, fileIDDict, completedFiles, filesToFail ):
    """ update TransferDB.FileToCat table for finished request

    :param FTSRequest oFTSRequest: FTSRequest instance
    :param int ftsReqID: FTSReq.FTSReqID
    :param dict fileIDDict: fileIDs dictionary
    :param int channelID: FTSReq.ChannelID
    """
    res = oFTSRequest.getFailedRegistrations()
    failedRegistrations = res["Value"]
    regFailedFileIDs = []
    regDoneFileIDs = []
    regForgetFileIDs = []
    for fileID, fileDict in fileIDDict.items():
      lfn = fileDict['LFN']

      if lfn in failedRegistrations:
        regFailedFileIDs.append( fileID )
        # if the LFN appears more than once, FileToCat needs to be reset only once
        del failedRegistrations[lfn]
      elif lfn in completedFiles:
        regDoneFileIDs.append( fileID )
      elif fileID in filesToFail:
        regForgetFileIDs.append( fileID )

    res = self.transferDB.setRegistrationWaiting( channelID, regFailedFileIDs ) if regFailedFileIDs else S_OK()
    if not res["OK"]:
      res["Message"] = "Failed to reset entries in FileToCat: %s" % res["Message"]
      return res

    res = self.transferDB.setRegistrationDone( channelID, regDoneFileIDs ) if regDoneFileIDs else S_OK()
    if not res["OK"]:
      res["Message"] = "Failed to set entries Done in FileToCat: %s" % res["Message"]
      return res

    # This entries could also be set to Failed, but currently there is no method to do so.
    res = self.transferDB.setRegistrationDone( channelID, regForgetFileIDs ) if regForgetFileIDs else S_OK()
    if not res["OK"]:
      res["Message"] = "Failed to set entries Done in FileToCat: %s" % res["Message"]
      return res

    return S_OK()

  @classmethod
  def missingSource( cls, failReason ):
    """ check if message sent by FTS server is concering missing source file

    :param str failReason: message sent by FTS server
    """
    for error in cls.missingSourceErrors:
      if error.search( failReason ):
        return 1
    return 0
Ejemplo n.º 17
0
class FTSMonitorAgent(AgentModule):
    def initialize(self):
        self.TransferDB = TransferDB()

        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption('shifterProxy', 'DataManager')

        return S_OK()

    def execute(self):

        #########################################################################
        #  Get the details for all active FTS requests
        gLogger.info('Obtaining requests to monitor')
        res = self.TransferDB.getFTSReq()
        if not res['OK']:
            gLogger.error("Failed to get FTS requests", res['Message'])
            return res
        if not res['Value']:
            gLogger.info("FTSMonitorAgent. No FTS requests found to monitor.")
            return S_OK()
        ftsReqs = res['Value']
        gLogger.info('Found %s FTS jobs' % len(ftsReqs))

        #######################################################################
        # Check them all....
        i = 1
        for ftsReqDict in ftsReqs:
            infoStr = "\n\n##################################################################################\n\n"
            infoStr = "%sStarting monitoring loop %s of %s\n\n" % (
                infoStr, i, len(ftsReqs))
            gLogger.info(infoStr)
            res = self.monitorTransfer(ftsReqDict)
            i += 1
        return S_OK()

    def monitorTransfer(self, ftsReqDict):
        """ Monitors transfer  obtained from TransferDB
    """

        ftsReqID = ftsReqDict['FTSReqID']
        ftsGUID = ftsReqDict['FTSGuid']
        ftsServer = ftsReqDict['FTSServer']
        channelID = ftsReqDict['ChannelID']
        sourceSE = ftsReqDict['SourceSE']
        targetSE = ftsReqDict['TargetSE']

        oFTSRequest = FTSRequest()
        oFTSRequest.setFTSServer(ftsServer)
        oFTSRequest.setFTSGUID(ftsGUID)
        oFTSRequest.setSourceSE(sourceSE)
        oFTSRequest.setTargetSE(targetSE)

        #########################################################################
        # Perform summary update of the FTS Request and update FTSReq entries.
        gLogger.info('Perform summary update of the FTS Request')
        infoStr = "Monitoring FTS Job:\n\n"
        infoStr = "%sglite-transfer-status -s %s -l %s\n" % (
            infoStr, ftsServer, ftsGUID)
        infoStr = "%s%s%s\n" % (infoStr, 'FTS GUID:'.ljust(20), ftsGUID)
        infoStr = "%s%s%s\n\n" % (infoStr, 'FTS Server:'.ljust(20), ftsServer)
        gLogger.info(infoStr)
        res = oFTSRequest.summary()
        self.TransferDB.setFTSReqLastMonitor(ftsReqID)
        if not res['OK']:
            gLogger.error("Failed to update the FTS request summary",
                          res['Message'])
            return res
        res = oFTSRequest.dumpSummary()
        if not res['OK']:
            gLogger.error("Failed to get FTS request summary", res['Message'])
            return res
        gLogger.info(res['Value'])
        res = oFTSRequest.getPercentageComplete()
        if not res['OK']:
            gLogger.error("Failed to get FTS percentage complete",
                          res['Message'])
            return res
        gLogger.info('FTS Request found to be %.1f percent complete' %
                     res['Value'])
        self.TransferDB.setFTSReqAttribute(ftsReqID, 'PercentageComplete',
                                           res['Value'])
        self.TransferDB.addLoggingEvent(ftsReqID, res['Value'])

        #########################################################################
        # Update the information in the TransferDB if the transfer is terminal.
        res = oFTSRequest.isRequestTerminal()
        if not res['OK']:
            gLogger.error("Failed to determine whether FTS request terminal",
                          res['Message'])
            return res
        if not res['Value']:
            return S_OK()
        gLogger.info('FTS Request found to be terminal, updating file states')

        #########################################################################
        # Get the LFNS associated to the FTS request
        gLogger.info('Obtaining the LFNs associated to this request')
        res = self.TransferDB.getFTSReqLFNs(ftsReqID)
        if not res['OK']:
            gLogger.error("Failed to obtain FTS request LFNs", res['Message'])
            return res
        files = res['Value']
        if not files:
            gLogger.error('No files present for transfer')
            return S_ERROR('No files were found in the DB')
        lfns = files.keys()
        gLogger.info('Obtained %s files' % len(lfns))
        for lfn in lfns:
            oFTSRequest.setLFN(lfn)

        res = oFTSRequest.monitor()
        if not res['OK']:
            gLogger.error(
                "Failed to perform detailed monitoring of FTS request",
                res['Message'])
            return res
        res = oFTSRequest.getFailed()
        if not res['OK']:
            gLogger.error("Failed to obtained failed files for FTS request",
                          res['Message'])
            return res
        failedFiles = res['Value']
        res = oFTSRequest.getDone()
        if not res['OK']:
            gLogger.error(
                "Failed to obtained successful files for FTS request",
                res['Message'])
            return res
        completedFiles = res['Value']

        fileToFTSUpdates = []
        completedFileIDs = []
        for lfn in completedFiles:
            fileID = files[lfn]
            completedFileIDs.append(fileID)
            transferTime = 0
            res = oFTSRequest.getTransferTime(lfn)
            if res['OK']:
                transferTime = res['Value']
            fileToFTSUpdates.append((fileID, 'Completed', '', 0, transferTime))

        filesToRetry = []
        filesToReschedule = []
        for lfn in failedFiles:
            fileID = files[lfn]
            failReason = ''
            res = oFTSRequest.getFailReason(lfn)
            if res['OK']:
                failReason = res['Value']
            if self.missingSource(failReason):
                gLogger.error('The source SURL does not exist.',
                              '%s %s' % (lfn, oFTSRequest.getSourceSURL(lfn)))
                filesToReschedule.append(fileID)
            else:
                filesToRetry.append(fileID)
            gLogger.error('Failed to replicate file on channel.',
                          "%s %s" % (channelID, failReason))
            fileToFTSUpdates.append((fileID, 'Failed', failReason, 0, 0))

        allUpdated = True
        if filesToRetry:
            gLogger.info('Updating the Channel table for files to retry')
            res = self.TransferDB.resetFileChannelStatus(
                channelID, filesToRetry)
            if not res['OK']:
                gLogger.error(
                    'Failed to update the Channel table for file to retry.',
                    res['Message'])
                allUpdated = False
        for fileID in filesToReschedule:
            gLogger.info('Updating the Channel table for files to reschedule')
            res = self.TransferDB.setFileChannelStatus(channelID, fileID,
                                                       'Failed')
            if not res['OK']:
                gLogger.error(
                    'Failed to update Channel table for failed files.',
                    res['Message'])
                allUpdated = False

        if completedFileIDs:
            gLogger.info('Updating the Channel table for successful files')
            res = self.TransferDB.updateCompletedChannelStatus(
                channelID, completedFileIDs)
            if not res['OK']:
                gLogger.error(
                    'Failed to update the Channel table for successful files.',
                    res['Message'])
                allUpdated = False
            gLogger.info(
                'Updating the Channel table for ancestors of successful files')
            res = self.TransferDB.updateAncestorChannelStatus(
                channelID, completedFileIDs)
            if not res['OK']:
                gLogger.error(
                    'Failed to update the Channel table for ancestors of successful files.',
                    res['Message'])
                allUpdated = False

            #
            #  cibak: temporary switch off of regitstration in FileToCat
            #
            #gLogger.info( 'Updating the FileToCat table for successful files' )
            #res = self.TransferDB.setRegistrationWaiting( channelID, completedFileIDs )
            #if not res['OK']:
            #  gLogger.error( 'Failed to update the FileToCat table for successful files.', res['Message'] )
            #  allUpdated = False

        if fileToFTSUpdates:
            gLogger.info('Updating the FileToFTS table for files')
            res = self.TransferDB.setFileToFTSFileAttributes(
                ftsReqID, channelID, fileToFTSUpdates)
            if not res['OK']:
                gLogger.error(
                    'Failed to update the FileToFTS table for files.',
                    res['Message'])
                allUpdated = False

        if allUpdated:
            res = oFTSRequest.finalize()
            if not res['OK']:
                gLogger.error(
                    "Failed to perform the finalization for the FTS request",
                    res['Message'])
                return res

            gLogger.info('Adding logging event for FTS request')
            # Now set the FTSReq status to terminal so that it is not monitored again
            res = self.TransferDB.addLoggingEvent(ftsReqID, 'Finished')
            if not res['OK']:
                gLogger.error('Failed to add logging event for FTS Request',
                              res['Message'])

            gLogger.info('Updating FTS request status')
            res = self.TransferDB.setFTSReqStatus(ftsReqID, 'Finished')
            if not res['OK']:
                gLogger.error('Failed update FTS Request status',
                              res['Message'])
        return S_OK()

    def missingSource(self, failReason):
        missingSourceErrors = [
            'SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] Failed',
            'SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] No such file or directory',
            'SOURCE error during PREPARATION phase: \[INVALID_PATH\] Failed',
            'SOURCE error during PREPARATION phase: \[INVALID_PATH\] The requested file either does not exist',
            'TRANSFER error during TRANSFER phase: \[INVALID_PATH\] the server sent an error response: 500 500 Command failed. : open error: No such file or directory',
            'SOURCE error during TRANSFER_PREPARATION phase: \[USER_ERROR\] source file doesnt exist'
        ]
        for error in missingSourceErrors:
            if re.search(error, failReason):
                return 1
        return 0
Ejemplo n.º 18
0
class FTSSubmitAgent( AgentModule ):

  def initialize( self ):

    self.TransferDB = TransferDB()
    self.maxJobsPerChannel = self.am_getOption( 'MaxJobsPerChannel', 2 )

    # This sets the Default Proxy to used as that defined under 
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )
    return S_OK()

  def execute( self ):

    #########################################################################
    #  Obtain the eligible channels for submission.
    gLogger.info( 'Obtaining channels eligible for submission.' )
    res = self.TransferDB.selectChannelsForSubmission( self.maxJobsPerChannel )
    if not res['OK']:
      gLogger.error( "Failed to retrieve channels for submission.", res['Message'] )
      return S_OK()
    elif not res['Value']:
      gLogger.info( "FTSSubmitAgent. No channels eligible for submission." )
      return S_OK()
    channelDicts = res['Value']
    gLogger.info( 'Found %s eligible channels.' % len( channelDicts ) )

    #########################################################################
    # Submit to all the eligible waiting channels.
    i = 1
    for channelDict in channelDicts:
      infoStr = "\n\n##################################################################################\n\n"
      infoStr = "%sStarting submission loop %s of %s\n\n" % ( infoStr, i, len( channelDicts ) )
      gLogger.info( infoStr )
      res = self.submitTransfer( channelDict )
      i += 1
    return S_OK()

  def submitTransfer( self, channelDict ):
    """ This method creates and submits FTS jobs based on information it gets from the DB
    """

    # Create the FTSRequest object for preparing the submission
    oFTSRequest = FTSRequest()
    channelID = channelDict['ChannelID']
    filesPerJob = channelDict['NumFiles']

    #########################################################################
    #  Obtain the first files in the selected channel.
    gLogger.info( "FTSSubmitAgent.submitTransfer: Attempting to obtain files to transfer on channel %s" % channelID )
    res = self.TransferDB.getFilesForChannel( channelID, 2 * filesPerJob )
    if not res['OK']:
      errStr = 'FTSSubmitAgent.%s' % res['Message']
      gLogger.error( errStr )
      return S_OK()
    if not res['Value']:
      gLogger.info( "FTSSubmitAgent.submitTransfer: No files to found for channel." )
      return S_OK()
    filesDict = res['Value']
    gLogger.info( 'Obtained %s files for channel' % len( filesDict['Files'] ) )

    sourceSE = filesDict['SourceSE']
    oFTSRequest.setSourceSE( sourceSE )
    targetSE = filesDict['TargetSE']
    oFTSRequest.setTargetSE( targetSE )
    gLogger.info( "FTSSubmitAgent.submitTransfer: Attempting to obtain files for %s to %s channel." % ( sourceSE, targetSE ) )
    files = filesDict['Files']

    #########################################################################
    #  Populate the FTS Request with the files.
    gLogger.info( 'Populating the FTS request with file information' )
    fileIDs = []
    totalSize = 0
    fileIDSizes = {}
    for file in files:
      lfn = file['LFN']
      oFTSRequest.setLFN( lfn )
      oFTSRequest.setSourceSURL( lfn, file['SourceSURL'] )
      oFTSRequest.setTargetSURL( lfn, file['TargetSURL'] )
      fileID = file['FileID']
      fileIDs.append( fileID )
      totalSize += file['Size']
      fileIDSizes[fileID] = file['Size']

    #########################################################################
    #  Submit the FTS request and retrieve the FTS GUID/Server
    gLogger.info( 'Submitting the FTS request' )
    res = oFTSRequest.submit()
    if not res['OK']:
      errStr = "FTSSubmitAgent.%s" % res['Message']
      gLogger.error( errStr )
      gLogger.info( 'Updating the Channel table for files to retry' )
      res = self.TransferDB.resetFileChannelStatus( channelID, fileIDs )
      if not res['OK']:
        gLogger.error( 'Failed to update the Channel table for file to retry.', res['Message'] )
      return S_ERROR( errStr )
    ftsGUID = res['Value']['ftsGUID']
    ftsServer = res['Value']['ftsServer']
    infoStr = """Submitted FTS Job:
    
              FTS Guid: %s
              FTS Server: %s
              ChannelID: %s
              SourceSE: %s
              TargetSE: %s
              Files: %s

""" % ( ftsGUID, ftsServer, str( channelID ), sourceSE, targetSE, str( len( files ) ) )
    gLogger.info( infoStr )

    #########################################################################
    #  Insert the FTS Req details and add the number of files and size
    res = self.TransferDB.insertFTSReq( ftsGUID, ftsServer, channelID )
    if not res['OK']:
      errStr = "FTSSubmitAgent.%s" % res['Message']
      gLogger.error( errStr )
      return S_ERROR( errStr )
    ftsReqID = res['Value']
    gLogger.info( 'Obtained FTS RequestID %s' % ftsReqID )
    res = self.TransferDB.setFTSReqAttribute( ftsReqID, 'SourceSE', sourceSE )
    if not res['OK']:
      gLogger.error( "Failed to set SourceSE for FTSRequest", res['Message'] )
    res = self.TransferDB.setFTSReqAttribute( ftsReqID, 'TargetSE', targetSE )
    if not res['OK']:
      gLogger.error( "Failed to set TargetSE for FTSRequest", res['Message'] )
    res = self.TransferDB.setFTSReqAttribute( ftsReqID, 'NumberOfFiles', len( fileIDs ) )
    if not res['OK']:
      gLogger.error( "Failed to set NumberOfFiles for FTSRequest", res['Message'] )
    res = self.TransferDB.setFTSReqAttribute( ftsReqID, 'TotalSize', totalSize )
    if not res['OK']:
      gLogger.error( "Failed to set TotalSize for FTSRequest", res['Message'] )

    #########################################################################
    #  Insert the submission event in the FTSReqLogging table
    event = 'Submitted'
    res = self.TransferDB.addLoggingEvent( ftsReqID, event )
    if not res['OK']:
      errStr = "FTSSubmitAgent.%s" % res['Message']
      gLogger.error( errStr )

    #########################################################################
    #  Insert the FileToFTS details and remove the files from the channel
    gLogger.info( 'Setting the files as Executing in the Channel table' )
    res = self.TransferDB.setChannelFilesExecuting( channelID, fileIDs )
    if not res['OK']:
      gLogger.error( 'Failed to update the Channel tables for files.', res['Message'] )

    lfns = []
    fileToFTSFileAttributes = []
    for file in files:
      lfn = file['LFN']
      fileID = file['FileID']
      lfns.append( lfn )
      fileToFTSFileAttributes.append( ( fileID, fileIDSizes[fileID] ) )

    gLogger.info( 'Populating the FileToFTS table with file information' )
    res = self.TransferDB.setFTSReqFiles( ftsReqID, channelID, fileToFTSFileAttributes )
    if not res['OK']:
      gLogger.error( 'Failed to populate the FileToFTS table with files.' )
Ejemplo n.º 19
0
 def setUp(self):
     self.TransferDB = TransferDB()
     self.RequestDB = RequestDB('mysql')
Ejemplo n.º 20
0
class FTSSubmitAgent(AgentModule):
    def initialize(self):

        self.TransferDB = TransferDB()
        self.maxJobsPerChannel = self.am_getOption('MaxJobsPerChannel', 2)

        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption('shifterProxy', 'DataManager')
        return S_OK()

    def execute(self):

        #########################################################################
        #  Obtain the eligible channels for submission.
        gLogger.info('Obtaining channels eligible for submission.')
        res = self.TransferDB.selectChannelsForSubmission(
            self.maxJobsPerChannel)
        if not res['OK']:
            gLogger.error("Failed to retrieve channels for submission.",
                          res['Message'])
            return S_OK()
        elif not res['Value']:
            gLogger.info(
                "FTSSubmitAgent. No channels eligible for submission.")
            return S_OK()
        channelDicts = res['Value']
        gLogger.info('Found %s eligible channels.' % len(channelDicts))

        #########################################################################
        # Submit to all the eligible waiting channels.
        i = 1
        for channelDict in channelDicts:
            infoStr = "\n\n##################################################################################\n\n"
            infoStr = "%sStarting submission loop %s of %s\n\n" % (
                infoStr, i, len(channelDicts))
            gLogger.info(infoStr)
            res = self.submitTransfer(channelDict)
            i += 1
        return S_OK()

    def submitTransfer(self, channelDict):
        """ This method creates and submits FTS jobs based on information it gets from the DB
    """

        # Create the FTSRequest object for preparing the submission
        oFTSRequest = FTSRequest()
        channelID = channelDict['ChannelID']
        filesPerJob = channelDict['NumFiles']

        #########################################################################
        #  Obtain the first files in the selected channel.
        gLogger.info(
            "FTSSubmitAgent.submitTransfer: Attempting to obtain files to transfer on channel %s"
            % channelID)
        res = self.TransferDB.getFilesForChannel(channelID, 2 * filesPerJob)
        if not res['OK']:
            errStr = 'FTSSubmitAgent.%s' % res['Message']
            gLogger.error(errStr)
            return S_OK()
        if not res['Value']:
            gLogger.info(
                "FTSSubmitAgent.submitTransfer: No files to found for channel."
            )
            return S_OK()
        filesDict = res['Value']
        gLogger.info('Obtained %s files for channel' % len(filesDict['Files']))

        sourceSE = filesDict['SourceSE']
        oFTSRequest.setSourceSE(sourceSE)
        targetSE = filesDict['TargetSE']
        oFTSRequest.setTargetSE(targetSE)
        gLogger.info(
            "FTSSubmitAgent.submitTransfer: Attempting to obtain files for %s to %s channel."
            % (sourceSE, targetSE))
        files = filesDict['Files']

        #########################################################################
        #  Populate the FTS Request with the files.
        gLogger.info('Populating the FTS request with file information')
        fileIDs = []
        totalSize = 0
        fileIDSizes = {}
        for file in files:
            lfn = file['LFN']
            oFTSRequest.setLFN(lfn)
            oFTSRequest.setSourceSURL(lfn, file['SourceSURL'])
            oFTSRequest.setTargetSURL(lfn, file['TargetSURL'])
            fileID = file['FileID']
            fileIDs.append(fileID)
            totalSize += file['Size']
            fileIDSizes[fileID] = file['Size']

        #########################################################################
        #  Submit the FTS request and retrieve the FTS GUID/Server
        gLogger.info('Submitting the FTS request')
        res = oFTSRequest.submit()
        if not res['OK']:
            errStr = "FTSSubmitAgent.%s" % res['Message']
            gLogger.error(errStr)
            gLogger.info('Updating the Channel table for files to retry')
            res = self.TransferDB.resetFileChannelStatus(channelID, fileIDs)
            if not res['OK']:
                gLogger.error(
                    'Failed to update the Channel table for file to retry.',
                    res['Message'])
            return S_ERROR(errStr)
        ftsGUID = res['Value']['ftsGUID']
        ftsServer = res['Value']['ftsServer']
        infoStr = """Submitted FTS Job:
    
              FTS Guid: %s
              FTS Server: %s
              ChannelID: %s
              SourceSE: %s
              TargetSE: %s
              Files: %s

""" % (ftsGUID, ftsServer, str(channelID), sourceSE, targetSE, str(len(files)))
        gLogger.info(infoStr)

        #########################################################################
        #  Insert the FTS Req details and add the number of files and size
        res = self.TransferDB.insertFTSReq(ftsGUID, ftsServer, channelID)
        if not res['OK']:
            errStr = "FTSSubmitAgent.%s" % res['Message']
            gLogger.error(errStr)
            return S_ERROR(errStr)
        ftsReqID = res['Value']
        gLogger.info('Obtained FTS RequestID %s' % ftsReqID)
        res = self.TransferDB.setFTSReqAttribute(ftsReqID, 'SourceSE',
                                                 sourceSE)
        if not res['OK']:
            gLogger.error("Failed to set SourceSE for FTSRequest",
                          res['Message'])
        res = self.TransferDB.setFTSReqAttribute(ftsReqID, 'TargetSE',
                                                 targetSE)
        if not res['OK']:
            gLogger.error("Failed to set TargetSE for FTSRequest",
                          res['Message'])
        res = self.TransferDB.setFTSReqAttribute(ftsReqID, 'NumberOfFiles',
                                                 len(fileIDs))
        if not res['OK']:
            gLogger.error("Failed to set NumberOfFiles for FTSRequest",
                          res['Message'])
        res = self.TransferDB.setFTSReqAttribute(ftsReqID, 'TotalSize',
                                                 totalSize)
        if not res['OK']:
            gLogger.error("Failed to set TotalSize for FTSRequest",
                          res['Message'])

        #########################################################################
        #  Insert the submission event in the FTSReqLogging table
        event = 'Submitted'
        res = self.TransferDB.addLoggingEvent(ftsReqID, event)
        if not res['OK']:
            errStr = "FTSSubmitAgent.%s" % res['Message']
            gLogger.error(errStr)

        #########################################################################
        #  Insert the FileToFTS details and remove the files from the channel
        gLogger.info('Setting the files as Executing in the Channel table')
        res = self.TransferDB.setChannelFilesExecuting(channelID, fileIDs)
        if not res['OK']:
            gLogger.error('Failed to update the Channel tables for files.',
                          res['Message'])

        lfns = []
        fileToFTSFileAttributes = []
        for file in files:
            lfn = file['LFN']
            fileID = file['FileID']
            lfns.append(lfn)
            fileToFTSFileAttributes.append((fileID, fileIDSizes[fileID]))

        gLogger.info('Populating the FileToFTS table with file information')
        res = self.TransferDB.setFTSReqFiles(ftsReqID, channelID,
                                             fileToFTSFileAttributes)
        if not res['OK']:
            gLogger.error('Failed to populate the FileToFTS table with files.')
Ejemplo n.º 21
0
class FTSSubmitAgent( AgentModule ):
  """ 
  .. class:: FTSSubmitAgent

  This class is submitting previosly scheduled files to the FTS system using helper class FTSRequest. 

  Files to be transferred are read from TransferDB.Channel table, only those with Status = 'Waiting'.
  After submision TransferDB.Channel.Status is set to 'Executing'. The rest of state propagation is 
  done in FTSMonitorAgent.

  An information about newly created FTS request is hold in TransferDB.FTSReq (request itself) table and  
  TransferDB.FileToFTS (files in request) and TransferDB.FileToCat (files to be registered, for 
  failover only). 
  """
  ## placaholder for TransferDB reference 
  transferDB = None
  ## placeholder for max job per channel 
  maxJobsPerChannel = 2
  ## placeholder for checksum test option flag 
  cksmTest = False
  ## placeholder for checksum type 
  cksmType = ""
  ## default checksum type
  __defaultCksmType = "ADLER32"

  def initialize( self ):
    """ agent's initalisation

    :param self: self reference
    """
    ## save tarsnferDB handler
    self.transferDB = TransferDB()
    ## read config options
    self.maxJobsPerChannel = self.am_getOption( 'MaxJobsPerChannel', 2 )
    self.log.info("max jobs/channel = %s" % self.maxJobsPerChannel )

    ## checksum test
    self.cksmTest = bool( self.am_getOption( "ChecksumTest", False ) )  
    ## ckecksum type
    if self.cksmTest:
      self.cksmType = str( self.am_getOption( "ChecksumType", self.__defaultCksmType ) ).upper()
      if self.cksmType and self.cksmType not in ( "ADLER32", "MD5", "SHA1" ):
        self.log.warn("unknown checksum type: %s, will use default %s" % ( self.cksmType, self.__defaultCksmType ) )
        self.cksmType = self.__defaultCksmType                      


    self.log.info( "checksum test is %s" % ( { True : "enabled using %s checksum" % self.cksmType,
                                               False : "disabled"}[self.cksmTest] ) )
      
    
    # This sets the Default Proxy to used as that defined under 
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )
    return S_OK()

  def execute( self ):
    """ execution in one agent's cycle

    :param self: self reference
    """

    #########################################################################
    #  Obtain the eligible channels for submission.
    self.log.info( 'Obtaining channels eligible for submission.' )
    res = self.transferDB.selectChannelsForSubmission( self.maxJobsPerChannel )
    if not res['OK']:
      self.log.error( "Failed to retrieve channels for submission.", res['Message'] )
      return S_OK()
    elif not res['Value']:
      self.log.info( "FTSSubmitAgent. No channels eligible for submission." )
      return S_OK()
    channelDicts = res['Value']
    self.log.info( 'Found %s eligible channels.' % len( channelDicts ) )

    #########################################################################
    # Submit to all the eligible waiting channels.
    i = 1
    for channelDict in channelDicts:
      infoStr = "\n\n##################################################################################\n\n"
      infoStr = "%sStarting submission loop %s of %s\n\n" % ( infoStr, i, len( channelDicts ) )
      self.log.info( infoStr )
      res = self.submitTransfer( channelDict )
      i += 1
    return S_OK()

  def submitTransfer( self, channelDict ):
    """ create and submit FTS jobs based on information it gets from the DB

    :param self: self reference
    :param dict channelDict: dict with channel info as read from TransferDB.selectChannelsForSubmission
    """

    # Create the FTSRequest object for preparing the submission
    oFTSRequest = FTSRequest()
    channelID = channelDict['ChannelID']
    filesPerJob = channelDict['NumFiles']

    #########################################################################
    #  Obtain the first files in the selected channel.
    self.log.info( "FTSSubmitAgent.submitTransfer: Attempting to obtain files to transfer on channel %s" % channelID )
    res = self.transferDB.getFilesForChannel( channelID, 2 * filesPerJob )
    if not res['OK']:
      errStr = 'FTSSubmitAgent.%s' % res['Message']
      self.log.error( errStr )
      return S_OK()
    if not res['Value']:
      self.log.info( "FTSSubmitAgent.submitTransfer: No files to found for channel." )
      return S_OK()
    filesDict = res['Value']
    self.log.info( 'Obtained %s files for channel' % len( filesDict['Files'] ) )

    sourceSE = filesDict['SourceSE']
    oFTSRequest.setSourceSE( sourceSE )
    targetSE = filesDict['TargetSE']
    oFTSRequest.setTargetSE( targetSE )
    self.log.info( "FTSSubmitAgent.submitTransfer: Attempting to obtain files for %s to %s channel." % ( sourceSE, 
                                                                                                         targetSE ) )
    files = filesDict['Files']

    ## enable/disable cksm test
    oFTSRequest.setCksmTest( self.cksmTest )
    if self.cksmType:
      oFTSRequest.setCksmType( self.cksmType )

    #########################################################################
    #  Populate the FTS Request with the files.
    self.log.info( 'Populating the FTS request with file information' )
    fileIDs = []
    totalSize = 0
    fileIDSizes = {}
    for fileMeta in files:
      lfn = fileMeta['LFN']
      oFTSRequest.setLFN( lfn )
      oFTSRequest.setSourceSURL( lfn, fileMeta['SourceSURL'] )
      oFTSRequest.setTargetSURL( lfn, fileMeta['TargetSURL'] )
      fileID = fileMeta['FileID']
      fileIDs.append( fileID )
      totalSize += fileMeta['Size']
      fileIDSizes[fileID] = fileMeta['Size']

    #########################################################################
    #  Submit the FTS request and retrieve the FTS GUID/Server
    self.log.info( 'Submitting the FTS request' )
    res = oFTSRequest.submit()
    if not res['OK']:
      errStr = "FTSSubmitAgent.%s" % res['Message']
      self.log.error( errStr )
      self.log.info( 'Updating the Channel table for files to retry' )
      res = self.transferDB.resetFileChannelStatus( channelID, fileIDs )
      if not res['OK']:
        self.log.error( 'Failed to update the Channel table for file to retry.', res['Message'] )
      return S_ERROR( errStr )
    ftsGUID = res['Value']['ftsGUID']
    ftsServer = res['Value']['ftsServer']
    infoStr = """Submitted FTS Job:
    
              FTS Guid: %s
              FTS Server: %s
              ChannelID: %s
              SourceSE: %s
              TargetSE: %s
              Files: %s

""" % ( ftsGUID, ftsServer, str( channelID ), sourceSE, targetSE, str( len( files ) ) )
    self.log.info( infoStr )

    ## filter out skipped files
    failedFiles = oFTSRequest.getFailed()
    if not failedFiles["OK"]:
      self.log.warn("Unable to read skipped LFNs.")
    failedFiles = failedFiles["Value"] if "Value" in failedFiles else []
    failedIDs = [ meta["FileID"] for meta in files if meta["LFN"] in failedFiles ]
    ## only submitted
    fileIDs = [ fileID for fileID in fileIDs if fileID not in failedIDs ]
    ## sub failed from total size
    totalSize -= sum( [ meta["Size"] for meta in files if meta["LFN"] in failedFiles ] )

    #########################################################################
    #  Insert the FTS Req details and add the number of files and size
    res = self.transferDB.insertFTSReq( ftsGUID, ftsServer, channelID )
    if not res['OK']:
      errStr = "FTSSubmitAgent.%s" % res['Message']
      self.log.error( errStr )
      return S_ERROR( errStr )
    ftsReqID = res['Value']
    self.log.info( 'Obtained FTS RequestID %s' % ftsReqID )
    res = self.transferDB.setFTSReqAttribute( ftsReqID, 'SourceSE', sourceSE )
    if not res['OK']:
      self.log.error( "Failed to set SourceSE for FTSRequest", res['Message'] )
    res = self.transferDB.setFTSReqAttribute( ftsReqID, 'TargetSE', targetSE )
    if not res['OK']:
      self.log.error( "Failed to set TargetSE for FTSRequest", res['Message'] )
    res = self.transferDB.setFTSReqAttribute( ftsReqID, 'NumberOfFiles', len( fileIDs ) )
    if not res['OK']:
      self.log.error( "Failed to set NumberOfFiles for FTSRequest", res['Message'] )
    res = self.transferDB.setFTSReqAttribute( ftsReqID, 'TotalSize', totalSize )
    if not res['OK']:
      self.log.error( "Failed to set TotalSize for FTSRequest", res['Message'] )

    #########################################################################
    #  Insert the submission event in the FTSReqLogging table
    event = 'Submitted'
    res = self.transferDB.addLoggingEvent( ftsReqID, event )
    if not res['OK']:
      errStr = "FTSSubmitAgent.%s" % res['Message']
      self.log.error( errStr )

    #########################################################################
    #  Insert the FileToFTS details and remove the files from the channel
    self.log.info( 'Setting the files as Executing in the Channel table' )
    res = self.transferDB.setChannelFilesExecuting( channelID, fileIDs )
    if not res['OK']:
      self.log.error( 'Failed to update the Channel tables for files.', res['Message'] )

    lfns = []
    fileToFTSFileAttributes = []
    for fileMeta in files:
      lfn = fileMeta['LFN']
      fileID = fileMeta['FileID']
      lfns.append( lfn )
      fileToFTSFileAttributes.append( ( fileID, fileIDSizes[fileID] ) )

    self.log.info( 'Populating the FileToFTS table with file information' )
    res = self.transferDB.setFTSReqFiles( ftsReqID, channelID, fileToFTSFileAttributes )
    if not res['OK']:
      self.log.error( 'Failed to populate the FileToFTS table with files.' )
Ejemplo n.º 22
0
 def setUp(self):
     self.TransferDB = TransferDB()
Ejemplo n.º 23
0
class FTSMonitorAgent(AgentModule):
    """
  .. class:: FTSMonitorAgent

  Monitor submitted FTS jobs.
  """
    # # transfer DB handle
    transferDB = None
    # # thread pool
    threadPool = None
    # # min threads
    minThreads = 1
    # # max threads
    maxThreads = 10

    # # missing source regexp patterns
    missingSourceErrors = [
      re.compile( r"SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] Failed" ),
      re.compile( r"SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] No such file or directory" ),
      re.compile( r"SOURCE error during PREPARATION phase: \[INVALID_PATH\] Failed" ),
      re.compile( r"SOURCE error during PREPARATION phase: \[INVALID_PATH\] The requested file either does not exist" ),
      re.compile( r"TRANSFER error during TRANSFER phase: \[INVALID_PATH\] the server sent an error response: 500 500"\
                 " Command failed. : open error: No such file or directory" ),
      re.compile( r"SOURCE error during TRANSFER_PREPARATION phase: \[USER_ERROR\] source file doesnt exist" ) ]

    def initialize(self):
        """ agent's initialisation """
        self.transferDB = TransferDB()
        self.am_setOption("shifterProxy", "DataManager")
        self.minThreads = self.am_getOption("MinThreads", self.minThreads)
        self.maxThreads = self.am_getOption("MaxThreads", self.maxThreads)
        minmax = (abs(self.minThreads), abs(self.maxThreads))
        self.minThreads, self.maxThreads = min(minmax), max(minmax)
        self.log.info("ThreadPool min threads = %s" % self.minThreads)
        self.log.info("ThreadPool max threads = %s" % self.maxThreads)
        self.threadPool = ThreadPool(self.minThreads, self.maxThreads)
        self.threadPool.daemonize()
        return S_OK()

    def execute(self):
        """ push jobs to the thread pool """
        self.log.info("Obtaining requests to monitor")
        res = self.transferDB.getFTSReq()
        if not res["OK"]:
            self.log.error("Failed to get FTS requests", res['Message'])
            return res
        if not res["Value"]:
            self.log.info("No FTS requests found to monitor.")
            return S_OK()
        ftsReqs = res["Value"]
        self.log.info("Found %s FTS jobs" % len(ftsReqs))
        i = 1
        for ftsJob in ftsReqs:
            while True:
                self.log.debug("submitting FTS Job %s FTSReqID=%s to monitor" %
                               (i, ftsJob["FTSReqID"]))
                ret = self.threadPool.generateJobAndQueueIt(
                    self.monitorTransfer,
                    args=(ftsJob, ),
                )
                if ret["OK"]:
                    i += 1
                    break
                # # sleep 1 second to proceed
                time.sleep(1)

        self.threadPool.processAllResults()
        return S_OK()

    def ftsJobExpired(self, ftsReqID, channelID):
        """ clean up when FTS job had expired on the server side

    :param int ftsReqID: FTSReq.FTSReqID
    :param int channelID: FTSReq.ChannelID
    """
        log = gLogger.getSubLogger("@%s" % str(ftsReqID))
        fileIDs = self.transferDB.getFTSReqFileIDs(ftsReqID)
        if not fileIDs["OK"]:
            log.error("Unable to retrieve FileIDs associated to %s request" %
                      ftsReqID)
            return fileIDs
        fileIDs = fileIDs["Value"]

        # # update FileToFTS table, this is just a clean up, no worry if somethings goes wrong
        for fileID in fileIDs:
            fileStatus = self.transferDB.setFileToFTSFileAttribute(
                ftsReqID, fileID, "Status", "Failed")
            if not fileStatus["OK"]:
                log.error(
                    "Unable to set FileToFTS status to 'Failed' for FileID %s: %s"
                    % (fileID, fileStatus["Message"]))

            failReason = self.transferDB.setFileToFTSFileAttribute(
                ftsReqID, fileID, "Reason", "FTS job expired on server")
            if not failReason["OK"]:
                log.error("Unable to set FileToFTS reason for FileID %s: %s" %
                          (fileID, failReason["Message"]))
        # # update Channel table
        resetChannels = self.transferDB.resetFileChannelStatus(
            channelID, fileIDs)
        if not resetChannels["OK"]:
            log.error("Failed to reset Channel table for files to retry")
            return resetChannels

        # # update FTSReq table
        log.info("Setting FTS request status to 'Finished'")
        ftsReqStatus = self.transferDB.setFTSReqStatus(ftsReqID, "Finished")
        if not ftsReqStatus["OK"]:
            log.error("Failed update FTS Request status",
                      ftsReqStatus["Message"])
            return ftsReqStatus

        # # if we land here, everything should be OK
        return S_OK()

    def monitorTransfer(self, ftsReqDict):
        """ monitors transfer obtained from TransferDB

    :param dict ftsReqDict: FTS job dictionary
    """
        ftsReqID = ftsReqDict.get("FTSReqID")
        ftsGUID = ftsReqDict.get("FTSGuid")
        ftsServer = ftsReqDict.get("FTSServer")
        channelID = ftsReqDict.get("ChannelID")
        sourceSE = ftsReqDict.get("SourceSE")
        targetSE = ftsReqDict.get("TargetSE")

        oFTSRequest = FTSRequest()
        oFTSRequest.setFTSServer(ftsServer)
        oFTSRequest.setFTSGUID(ftsGUID)
        oFTSRequest.setSourceSE(sourceSE)
        oFTSRequest.setTargetSE(targetSE)

        log = gLogger.getSubLogger("@%s" % str(ftsReqID))

        #########################################################################
        # Perform summary update of the FTS Request and update FTSReq entries.
        log.info("Perform summary update of the FTS Request")
        infoStr = ["glite-transfer-status -s %s -l %s" % (ftsServer, ftsGUID)]
        infoStr.append("FTS GUID:   %s" % ftsGUID)
        infoStr.append("FTS Server: %s" % ftsServer)
        log.info("\n".join(infoStr))
        res = oFTSRequest.summary()
        self.transferDB.setFTSReqLastMonitor(ftsReqID)
        if not res["OK"]:
            log.error("Failed to update the FTS request summary",
                      res["Message"])
            if "getTransferJobSummary2: Not authorised to query request" in res[
                    "Message"]:
                log.error(
                    "FTS job is not existing at the FTS server anymore, will clean it up on TransferDB side"
                )
                cleanUp = self.ftsJobExpired(ftsReqID, channelID)
                if not cleanUp["OK"]:
                    log.error(cleanUp["Message"])
                return cleanUp
            return res

        res = oFTSRequest.dumpSummary()
        if not res['OK']:
            log.error("Failed to get FTS request summary", res["Message"])
            return res
        log.info(res['Value'])
        res = oFTSRequest.getPercentageComplete()
        if not res['OK']:
            log.error("Failed to get FTS percentage complete", res["Message"])
            return res
        log.info('FTS Request found to be %.1f percent complete' %
                 res["Value"])
        self.transferDB.setFTSReqAttribute(ftsReqID, "PercentageComplete",
                                           res["Value"])
        self.transferDB.addLoggingEvent(ftsReqID, res["Value"])

        #########################################################################
        # Update the information in the TransferDB if the transfer is terminal.
        res = oFTSRequest.isRequestTerminal()
        if not res["OK"]:
            log.error("Failed to determine whether FTS request terminal",
                      res["Message"])
            return res
        if not res["Value"]:
            return S_OK()
        # # request is terminal
        return self.terminalRequest(oFTSRequest, ftsReqID, channelID, sourceSE)

    def terminalRequest(self, oFTSRequest, ftsReqID, channelID, sourceSE):
        """ process terminal FTS job

    :param FTSRequest oFTSRequest: FTSRequest instance
    :param int ftsReqID: FTSReq.FTSReqID
    :param int channelID: FTSReq.ChannelID
    :param str sourceSE: FTSReq.SourceSE
    """
        log = gLogger.getSubLogger("@%s" % ftsReqID)

        log.info("FTS Request found to be terminal, updating file states")
        #########################################################################
        # Get the LFNS associated to the FTS request
        log.info("Obtaining the LFNs associated to this request")
        res = self.transferDB.getFTSReqLFNs(ftsReqID, channelID, sourceSE)
        if not res["OK"]:
            log.error("Failed to obtain FTS request LFNs", res['Message'])
            return res
        files = res["Value"]
        if not files:
            log.error("No files present for transfer")
            return S_ERROR("No files were found in the DB")

        lfns = files.keys()
        log.debug("Obtained %s files" % len(lfns))
        for lfn in lfns:
            oFTSRequest.setLFN(lfn)

        res = oFTSRequest.monitor()
        if not res["OK"]:
            log.error("Failed to perform detailed monitoring of FTS request",
                      res["Message"])
            return res
        res = oFTSRequest.getFailed()
        if not res["OK"]:
            log.error("Failed to obtained failed files for FTS request",
                      res["Message"])
            return res
        failedFiles = res["Value"]
        res = oFTSRequest.getDone()
        if not res["OK"]:
            log.error("Failed to obtained successful files for FTS request",
                      res["Message"])
            return res
        completedFiles = res["Value"]

        # An LFN can be included more than once if it was entered into more than one Request.
        # FTS will only do the transfer once. We need to identify all FileIDs
        res = self.transferDB.getFTSReqFileIDs(ftsReqID)
        if not res["OK"]:
            log.error("Failed to get FileIDs associated to FTS Request",
                      res["Message"])
            return res
        fileIDs = res["Value"]
        res = self.transferDB.getAttributesForFilesList(fileIDs, ["LFN"])
        if not res["OK"]:
            log.error("Failed to get LFNs associated to FTS Request",
                      res["Message"])
            return res
        fileIDDict = res["Value"]

        fileToFTSUpdates = []
        completedFileIDs = []
        filesToRetry = []
        filesToFail = []

        for fileID, fileDict in fileIDDict.items():
            lfn = fileDict['LFN']
            if lfn in completedFiles:
                completedFileIDs.append(fileID)
                transferTime = 0
                res = oFTSRequest.getTransferTime(lfn)
                if res["OK"]:
                    transferTime = res["Value"]
                fileToFTSUpdates.append(
                    (fileID, "Completed", "", 0, transferTime))

            if lfn in failedFiles:
                failReason = ""
                res = oFTSRequest.getFailReason(lfn)
                if res["OK"]:
                    failReason = res["Value"]
                if "Source file/user checksum mismatch" in failReason:
                    filesToFail.append(fileID)
                    continue
                if self.missingSource(failReason):
                    log.error("The source SURL does not exist.",
                              "%s %s" % (lfn, oFTSRequest.getSourceSURL(lfn)))
                    filesToFail.append(fileID)
                else:
                    filesToRetry.append(fileID)
                log.error("Failed to replicate file on channel.",
                          "%s %s" % (channelID, failReason))
                fileToFTSUpdates.append((fileID, "Failed", failReason, 0, 0))

        # # update TransferDB.FileToFTS table
        updateFileToFTS = self.updateFileToFTS(ftsReqID, channelID,
                                               filesToRetry, filesToFail,
                                               completedFileIDs,
                                               fileToFTSUpdates)

        if updateFileToFTS["OK"] and updateFileToFTS["Value"]:
            res = oFTSRequest.finalize()
            if not res["OK"]:
                log.error(
                    "Failed to perform the finalization for the FTS request",
                    res["Message"])
                return res

            log.info('Adding logging event for FTS request')
            # Now set the FTSReq status to terminal so that it is not monitored again
            res = self.transferDB.addLoggingEvent(ftsReqID, 'Finished')
            if not res['OK']:
                log.error('Failed to add logging event for FTS Request',
                          res['Message'])

            # update TransferDB.FileToCat table
            updateFileToCat = self.updateFileToCat(oFTSRequest, channelID,
                                                   fileIDDict, completedFiles,
                                                   filesToFail)
            if not updateFileToCat["OK"]:
                log.error(updateFileToCat["Message"])

            log.debug("Updating FTS request status")
            res = self.transferDB.setFTSReqStatus(ftsReqID, 'Finished')
            if not res['OK']:
                log.error('Failed update FTS Request status', res['Message'])
        return S_OK()

    def updateFileToFTS(self, ftsReqID, channelID, filesToRetry, filesToFail,
                        completedFileIDs, fileToFTSUpdates):
        """ update TransferDB.FileToFTS table for finished request

    :param int ftsReqID: FTSReq.FTSReqID
    :param int channelID: FTSReq.ChannelID
    :param list filesToRetry: FileIDs to retry
    :param list filesToFail: FileIDs for failed files
    :param list completedFileIDs: files completed
    :param list fileToFTSUpdates: ???
    """
        log = gLogger.getSubLogger("@%s" % ftsReqID)

        allUpdated = True

        res = self.transferDB.resetFileChannelStatus(
            channelID, filesToRetry) if filesToRetry else S_OK()
        if not res["OK"]:
            log.error("Failed to update the Channel table for file to retry.",
                      res["Message"])
            allUpdated = False

        for fileID in filesToFail:
            log.info("Updating the Channel table for files to reschedule")
            res = self.transferDB.setFileToReschedule(fileID)
            if not res["OK"]:
                log.error("Failed to update Channel table for failed files.",
                          res["Message"])
                allUpdated = False
            elif res["Value"] == "max reschedule attempt reached":
                log.error("setting Channel status to 'Failed' : " %
                          res["Value"])
                res = self.transferDB.setFileChannelStatus(
                    channelID, fileID, 'Failed')
                if not res["OK"]:
                    log.error(
                        "Failed to update Channel table for failed files.",
                        res["Message"])
                    allUpdated = False

        if completedFileIDs:
            res = self.transferDB.updateCompletedChannelStatus(
                channelID, completedFileIDs)
            if not res["OK"]:
                log.error(
                    "Failed to update the Channel table for successful files.",
                    res["Message"])
                allUpdated = False
            res = self.transferDB.updateAncestorChannelStatus(
                channelID, completedFileIDs)
            if not res["OK"]:
                log.error(
                    'Failed to update the Channel table for ancestors of successful files.',
                    res['Message'])
                allUpdated = False

        if fileToFTSUpdates:
            res = self.transferDB.setFileToFTSFileAttributes(
                ftsReqID, channelID, fileToFTSUpdates)
            if not res["OK"]:
                log.error("Failed to update the FileToFTS table for files.",
                          res["Message"])
                allUpdated = False

        return S_OK(allUpdated)

    def updateFileToCat(self, oFTSRequest, channelID, fileIDDict,
                        completedFiles, filesToFail):
        """ update TransferDB.FileToCat table for finished request

    :param FTSRequest oFTSRequest: FTSRequest instance
    :param int ftsReqID: FTSReq.FTSReqID
    :param dict fileIDDict: fileIDs dictionary
    :param int channelID: FTSReq.ChannelID
    """
        res = oFTSRequest.getFailedRegistrations()
        failedRegistrations = res["Value"]
        regFailedFileIDs = []
        regDoneFileIDs = []
        regForgetFileIDs = []
        for fileID, fileDict in fileIDDict.items():
            lfn = fileDict['LFN']

            if lfn in failedRegistrations:
                regFailedFileIDs.append(fileID)
                # if the LFN appears more than once, FileToCat needs to be reset only once
                del failedRegistrations[lfn]
            elif lfn in completedFiles:
                regDoneFileIDs.append(fileID)
            elif fileID in filesToFail:
                regForgetFileIDs.append(fileID)

        res = self.transferDB.setRegistrationWaiting(
            channelID, regFailedFileIDs) if regFailedFileIDs else S_OK()
        if not res["OK"]:
            res["Message"] = "Failed to reset entries in FileToCat: %s" % res[
                "Message"]
            return res

        res = self.transferDB.setRegistrationDone(
            channelID, regDoneFileIDs) if regDoneFileIDs else S_OK()
        if not res["OK"]:
            res["Message"] = "Failed to set entries Done in FileToCat: %s" % res[
                "Message"]
            return res

        # This entries could also be set to Failed, but currently there is no method to do so.
        res = self.transferDB.setRegistrationDone(
            channelID, regForgetFileIDs) if regForgetFileIDs else S_OK()
        if not res["OK"]:
            res["Message"] = "Failed to set entries Done in FileToCat: %s" % res[
                "Message"]
            return res

        return S_OK()

    @classmethod
    def missingSource(cls, failReason):
        """ check if message sent by FTS server is concering missing source file

    :param str failReason: message sent by FTS server
    """
        for error in cls.missingSourceErrors:
            if error.search(failReason):
                return 1
        return 0
Ejemplo n.º 24
0
class ReplicationScheduler( AgentModule ):

  def initialize( self ):

    self.section = PathFinder.getAgentSection( AGENT_NAME )
    self.RequestDB = RequestDBMySQL()
    self.TransferDB = TransferDB()
    self.DataLog = DataLoggingClient()
    self.factory = StorageFactory()
    self.rm = ReplicaManager()

    # This sets the Default Proxy to used as that defined under
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    return S_OK()

  def execute( self ):
    """ The main agent execution method """

    # This allows dynamic changing of the throughput timescale
    self.throughputTimescale = self.am_getOption( 'ThroughputTimescale', 3600 )
    self.throughputTimescale = 60 * 60 * 1
    #print 'ThroughputTimescale:',self.throughputTimescale
    ######################################################################################
    #
    #  Obtain information on the current state of the channel queues
    #

    res = self.TransferDB.getChannelQueues()
    if not res['OK']:
      errStr = "ReplicationScheduler._execute: Failed to get channel queues from TransferDB."
      gLogger.error( errStr, res['Message'] )
      return S_OK()
    if not res['Value']:
      gLogger.info( "ReplicationScheduler._execute: No active channels found for replication." )
      return S_OK()
    channels = res['Value']

    res = self.TransferDB.getChannelObservedThroughput( self.throughputTimescale )
    if not res['OK']:
      errStr = "ReplicationScheduler._execute: Failed to get observed throughput from TransferDB."
      gLogger.error( errStr, res['Message'] )
      return S_OK()
    if not res['Value']:
      gLogger.info( "ReplicationScheduler._execute: No active channels found for replication." )
      return S_OK()
    bandwidths = res['Value']

    self.strategyHandler = StrategyHandler( bandwidths, channels, self.section )

    processedRequests = []
    requestsPresent = True
    while requestsPresent:

      ######################################################################################
      #
      #  The first step is to obtain a transfer request from the RequestDB which should be scheduled.
      #

      gLogger.info( "ReplicationScheduler._execute: Contacting RequestDB for suitable requests." )
      res = self.RequestDB.getRequest( 'transfer' )
      if not res['OK']:
        gLogger.error( "ReplicationScheduler._execute: Failed to get a request list from RequestDB.", res['Message'] )
        continue
      if not res['Value']:
        gLogger.info( "ReplicationScheduler._execute: No requests found in RequestDB." )
        requestsPresent = False
        return S_OK()
      requestString = res['Value']['RequestString']
      requestName = res['Value']['RequestName']
      gLogger.info( "ReplicationScheduler._execute: Obtained Request %s from RequestDB." % ( requestName ) )

      ######################################################################################
      #
      #  The request must then be parsed to obtain the sub-requests, their attributes and files.
      #

      logStr = 'ReplicationScheduler._execute: Parsing Request %s.' % ( requestName )
      gLogger.info( logStr )
      oRequest = RequestContainer( requestString )
      res = oRequest.getAttribute( 'RequestID' )
      if not res['OK']:
        gLogger.error( 'ReplicationScheduler._execute: Failed to get requestID.', res['Message'] )
        return S_ERROR( 'ReplicationScheduler._execute: Failed to get number of sub-requests.' )
      requestID = res['Value']
      if requestID in processedRequests:
        # Break the loop once we have iterated once over all requests
        res = self.RequestDB.updateRequest( requestName, requestString )
        if not res['OK']:
          gLogger.error( "Failed to update request", "%s %s" % ( requestName, res['Message'] ) )
        return S_OK()

      processedRequests.append( requestID )

      res = oRequest.getNumSubRequests( 'transfer' )
      if not res['OK']:
        gLogger.error( 'ReplicationScheduler._execute: Failed to get number of sub-requests.', res['Message'] )
        return S_ERROR( 'ReplicationScheduler._execute: Failed to get number of sub-requests.' )
      numberRequests = res['Value']
      gLogger.info( "ReplicationScheduler._execute: '%s' found with %s sub-requests." % ( requestName, numberRequests ) )

      ######################################################################################
      #
      #  The important request attributes are the source and target SEs.
      #

      for ind in range( numberRequests ):
        gLogger.info( "ReplicationScheduler._execute: Treating sub-request %s from '%s'." % ( ind, requestName ) )
        attributes = oRequest.getSubRequestAttributes( ind, 'transfer' )['Value']
        if attributes['Status'] != 'Waiting':
          #  If the sub-request is already in terminal state
          gLogger.info( "ReplicationScheduler._execute: Sub-request %s is status '%s' and  not to be executed." % ( ind, attributes['Status'] ) )
          continue

        sourceSE = attributes['SourceSE']
        targetSE = attributes['TargetSE']
        """ This section should go in the transfer request class """
        if type( targetSE ) in types.StringTypes:
          if re.search( ',', targetSE ):
            targetSEs = targetSE.split( ',' )
          else:
            targetSEs = [targetSE]
        """----------------------------------------------------- """
        operation = attributes['Operation']
        reqRepStrategy = None
        if operation in self.strategyHandler.getSupportedStrategies():
          reqRepStrategy = operation

        ######################################################################################
        #
        # Then obtain the file attribute of interest are the  LFN and FileID
        #

        res = oRequest.getSubRequestFiles( ind, 'transfer' )
        if not res['OK']:
          gLogger.error( 'ReplicationScheduler._execute: Failed to obtain sub-request files.' , res['Message'] )
          continue
        files = res['Value']
        gLogger.info( "ReplicationScheduler._execute: Sub-request %s found with %s files." % ( ind, len( files ) ) )
        filesDict = {}
        for file in files:
          lfn = file['LFN']
          if file['Status'] != 'Waiting':
            gLogger.debug( "ReplicationScheduler._execute: %s will not be scheduled because it is %s." % ( lfn, file['Status'] ) )
          else:
            fileID = file['FileID']
            filesDict[lfn] = fileID
        if not filesDict:
          gLogger.info( "ReplicationScheduler._execute: No Waiting files found for request" )
          continue
        notSched = len( files ) - len( filesDict )
        if notSched:
          gLogger.info( "ReplicationScheduler._execute: %d files found not Waiting" % notSched )

        ######################################################################################
        #
        #  Now obtain replica information for the files associated to the sub-request.
        #

        lfns = filesDict.keys()
        gLogger.info( "ReplicationScheduler._execute: Obtaining replica information for %d sub-request files." % len( lfns ) )
        res = self.rm.getCatalogReplicas( lfns )
        if not res['OK']:
          gLogger.error( "ReplicationScheduler._execute: Failed to get replica information.", res['Message'] )
          continue
        for lfn, failure in res['Value']['Failed'].items():
          gLogger.error( "ReplicationScheduler._execute: Failed to get replicas.", '%s: %s' % ( lfn, failure ) )
        replicas = res['Value']['Successful']
        if not replicas.keys():
          gLogger.error( "ReplicationScheduler._execute: Failed to get replica information for all files." )
          continue

        ######################################################################################
        #
        #  Now obtain the file sizes for the files associated to the sub-request.
        #

        lfns = replicas.keys()
        gLogger.info( "ReplicationScheduler._execute: Obtaining file sizes for %d sub-request files." % len( lfns ) )
        res = self.rm.getCatalogFileMetadata( lfns )
        if not res['OK']:
          gLogger.error( "ReplicationScheduler._execute: Failed to get file size information.", res['Message'] )
          continue
        for lfn, failure in res['Value']['Failed'].items():
          gLogger.error( 'ReplicationScheduler._execute: Failed to get file size.', '%s: %s' % ( lfn, failure ) )
        metadata = res['Value']['Successful']
        if not metadata.keys():
          gLogger.error( "ReplicationScheduler._execute: Failed to get metadata for all files." )
          continue

        ######################################################################################
        #
        # For each LFN determine the replication tree
        #

        for lfn in sortList( metadata.keys() ):
          fileSize = metadata[lfn]['Size']
          lfnReps = replicas[lfn]
          fileID = filesDict[lfn]

          targets = []
          for targetSE in targetSEs:
            if targetSE in lfnReps.keys():
              gLogger.debug( "ReplicationScheduler.execute: %s already present at %s." % ( lfn, targetSE ) )
            else:
              targets.append( targetSE )
          if not targets:
            gLogger.info( "ReplicationScheduler.execute: %s present at all targets." % lfn )
            oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Status', 'Done' )
            continue
          if not lfnReps:
            gLogger.error( "ReplicationScheduler.execute: The file has no replicas.", lfn )
            continue
          res = self.strategyHandler.determineReplicationTree( sourceSE, targets, lfnReps, fileSize, strategy = reqRepStrategy )
          if not res['OK']:
            gLogger.error( "ReplicationScheduler.execute: Failed to determine replication tree.", res['Message'] )
            continue
          tree = res['Value']

          ######################################################################################
          #
          # For each item in the replication tree obtain the source and target SURLS
          #

          for channelID, dict in tree.items():
            gLogger.info( "ReplicationScheduler.execute: processing for channel %d %s" % ( channelID, str( dict ) ) )
            hopSourceSE = dict['SourceSE']
            hopDestSE = dict['DestSE']
            hopAncestor = dict['Ancestor']

            # Get the sourceSURL
            if hopAncestor:
              status = 'Waiting%s' % ( hopAncestor )
              res = self.obtainLFNSURL( hopSourceSE, lfn )
              if not res['OK']:
                errStr = res['Message']
                gLogger.error( errStr )
                return S_ERROR( errStr )
              sourceSURL = res['Value']
            else:
              status = 'Waiting'
              res = self.resolvePFNSURL( hopSourceSE, lfnReps[hopSourceSE] )
              if not res['OK']:
                sourceSURL = lfnReps[hopSourceSE]
              else:
                sourceSURL = res['Value']

            # Get the targetSURL
            res = self.obtainLFNSURL( hopDestSE, lfn )
            if not res['OK']:
              errStr = res['Message']
              gLogger.error( errStr )
              return S_ERROR( errStr )
            targetSURL = res['Value']

            ######################################################################################
            #
            # For each item in the replication tree add the file to the channel
            #
            res = self.TransferDB.addFileToChannel( channelID, fileID, hopSourceSE, sourceSURL, hopDestSE, targetSURL, fileSize, fileStatus = status )
            if not res['OK']:
              errStr = res['Message']
              gLogger.error( "ReplicationScheduler._execute: Failed to add File to Channel." , "%s %s" % ( fileID, channelID ) )
              return S_ERROR( errStr )
            res = self.TransferDB.addFileRegistration( channelID, fileID, lfn, targetSURL, hopDestSE )
            if not res['OK']:
              errStr = res['Message']
              gLogger.error( "ReplicationScheduler._execute: Failed to add File registration." , "%s %s" % ( fileID, channelID ) )
              result = self.TransferDB.removeFileFromChannel( channelID, fileID )
              if not result['OK']:
                errStr += result['Message']
                gLogger.error( "ReplicationScheduler._execute: Failed to remove File." , "%s %s" % ( fileID, channelID ) )
              return S_ERROR( errStr )
            oRequest.setSubRequestFileAttributeValue( ind, 'transfer', lfn, 'Status', 'Scheduled' )
          res = self.TransferDB.addReplicationTree( fileID, tree )

        if oRequest.isSubRequestEmpty( ind, 'transfer' )['Value']:
          oRequest.setSubRequestStatus( ind, 'transfer', 'Scheduled' )

      ################################################
      #  Generate the new request string after operation
      requestString = oRequest.toXML()['Value']
      res = self.RequestDB.updateRequest( requestName, requestString )
      if not res['OK']:
        gLogger.error( "ReplicationScheduler._execute: Failed to update request", "%s %s" % ( requestName, res['Message'] ) )

  def obtainLFNSURL( self, targetSE, lfn ):
    """ Creates the targetSURL for the storage and LFN supplied
    """
    res = self.factory.getStorages( targetSE, protocolList = ['SRM2'] )
    if not res['OK']:
      errStr = 'ReplicationScheduler._execute: Failed to create SRM2 storage for %s: %s. ' % ( targetSE, res['Message'] )
      gLogger.error( errStr )
      return S_ERROR( errStr )
    storageObjects = res['Value']['StorageObjects']
    for storageObject in storageObjects:
      res = storageObject.getCurrentURL( lfn )
      if res['OK']:
        return res
    gLogger.error( 'ReplicationScheduler._execute: Failed to get SRM compliant storage.' , targetSE )
    return S_ERROR( 'ReplicationScheduler._execute: Failed to get SRM compliant storage.' )

  def resolvePFNSURL( self, sourceSE, pfn ):
    """ Creates the targetSURL for the storage and LFN supplied
    """
    res = self.rm.getPfnForProtocol( [pfn], sourceSE )
    if not res['OK']:
      return res
    if pfn in res['Value']['Failed'].keys():
      return S_ERROR( res['Value']['Failed'][pfn] )
    return S_OK( res['Value']['Successful'][pfn] )
Ejemplo n.º 25
0
class FTSSubmitAgent( AgentModule ):
  """
  .. class:: FTSSubmitAgent

  This class is submitting previously scheduled files to the FTS system using helper class FTSRequest.

  Files to be transferred are read from TransferDB.Channel table, only those with Status = 'Waiting'.
  After submission TransferDB.Channel.Status is set to 'Executing'. The rest of state propagation is
  done in FTSMonitorAgent.

  An information about newly created FTS request is hold in TransferDB.FTSReq (request itself) table and
  TransferDB.FileToFTS (files in request) and TransferDB.FileToCat (files to be registered, for
  failover only).
  """
  # # placaholder for TransferDB reference
  transferDB = None
  # # placeholder for max job per channel
  maxJobsPerChannel = 2
  # # placeholder for checksum test option flag
  cksmTest = False
  # # placeholder for checksum type
  cksmType = ""
  # # default checksum type
  __defaultCksmType = "ADLER32"

  def initialize( self ):
    """ agent's initalisation

    :param self: self reference
    """
    # # save tarsnferDB handler
    self.transferDB = TransferDB()
    # # read config options
    self.maxJobsPerChannel = self.am_getOption( 'MaxJobsPerChannel', self.maxJobsPerChannel )
    self.log.info( "max jobs/channel = %s" % self.maxJobsPerChannel )

    # # checksum test
    self.cksmTest = bool( self.am_getOption( "ChecksumTest", False ) )
    # # ckecksum type
    if self.cksmTest:
      self.cksmType = str( self.am_getOption( "ChecksumType", self.__defaultCksmType ) ).upper()
      if self.cksmType and self.cksmType not in ( "ADLER32", "MD5", "SHA1" ):
        self.log.warn( "unknown checksum type: %s, will use default %s" % ( self.cksmType, self.__defaultCksmType ) )
        self.cksmType = self.__defaultCksmType

    self.log.info( "checksum test is %s" % ( { True : "enabled using %s checksum" % self.cksmType,
                                               False : "disabled"}[self.cksmTest] ) )


    # This sets the Default Proxy to used as that defined under
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    self.filesBeingStaged = {}
    return S_OK()

  def execute( self ):
    """ execution in one agent's cycle

    :param self: self reference
    """

    #########################################################################
    #  Obtain the eligible channels for submission.
    self.log.info( 'Obtaining channels eligible for submission.' )
    res = self.transferDB.selectChannelsForSubmission( self.maxJobsPerChannel )
    if not res['OK']:
      self.log.error( "Failed to retrieve channels for submission.", res['Message'] )
      return S_OK()
    elif not res['Value']:
      self.log.info( "FTSSubmitAgent. No channels eligible for submission." )
      return S_OK()
    channelDicts = res['Value']
    self.log.info( 'Found %s eligible channels.' % len( channelDicts ) )

    #########################################################################
    # Submit to all the eligible waiting channels.
    i = 1
    for channelDict in channelDicts:
      infoStr = "\n\n##################################################################################\n\n"
      infoStr = "%sStarting submission loop %s of %s\n\n" % ( infoStr, i, len( channelDicts ) )
      self.log.info( infoStr )
      res = self.submitTransfer( channelDict )
      i += 1
    return S_OK()

  def submitTransfer( self, channelDict ):
    """ create and submit FTS jobs based on information it gets from the DB

    :param self: self reference
    :param dict channelDict: dict with channel info as read from TransferDB.selectChannelsForSubmission
    """

    channelID = channelDict['ChannelID']
    filesPerJob = channelDict['NumFiles']

    #########################################################################
    #  Obtain the first files in the selected channel.
    self.log.info( "FTSSubmitAgent.submitTransfer: Attempting to obtain files for channel %s: %s to %s"
                   % ( channelID, channelDict['Source'], channelDict['Destination'] ) )
    res = self.transferDB.getFilesForChannel( channelID, 2 * filesPerJob )
    if not res['OK']:
      errStr = 'FTSSubmitAgent.%s' % res['Message']
      self.log.error( errStr )
      return S_OK()
    if not res['Value']:
      self.log.info( "FTSSubmitAgent.submitTransfer: No files found for channel." )
      return S_OK()
    filesDict = res['Value']
    sourceSE = filesDict['SourceSE']
    targetSE = filesDict['TargetSE']
    self.log.info( 'Obtained %s files for channel %s to %s' % ( len( filesDict['Files'] ), sourceSE, targetSE ) )
    if self.filesBeingStaged.get( channelID ):
      self.log.info( '%d files are currently in staging status' % len( self.filesBeingStaged[channelID] ) )

    # Create the FTSRequest object for preparing the submission
    oFTSRequest = FTSRequest()
    oFTSRequest.setSourceSE( sourceSE )
    oFTSRequest.setTargetSE( targetSE )
    files = filesDict['Files']

    # # enable/disable cksm test
    oFTSRequest.setCksmTest( self.cksmTest )
    if self.cksmType:
      oFTSRequest.setCksmType( self.cksmType )

    #########################################################################
    #  Populate the FTS Request with the files.
    self.log.info( 'Populating the FTS request with file information' )
    fileIDs = []
    totalSize = 0
    fileIDSizes = {}
    lfns = set()
    for fileMeta in files:
      lfn = fileMeta['LFN']
      lfns.add( lfn )
      oFTSRequest.setLFN( lfn )
      oFTSRequest.setSourceSURL( lfn, fileMeta['SourceSURL'] )
      oFTSRequest.setTargetSURL( lfn, fileMeta['TargetSURL'] )
      if lfn in self.filesBeingStaged.get( channelID, [] ):
        oFTSRequest.setStatus( lfn, 'Staging' )
      fileID = fileMeta['FileID']
      fileIDs.append( fileID )
      totalSize += fileMeta['Size']
      fileIDSizes[fileID] = fileMeta['Size']

    oFTSRequest.resolveSource()
    noSource = [ lfn for lfn, fileInfo in oFTSRequest.fileDict.items()
                     if fileInfo.get( "Status", "" ) == "Failed" and fileInfo.get( "Reason", "" ) in ( "No replica at SourceSE",
                                                                                                   "Source file does not exist" ) ]
    toReschedule = [fileMeta["FileID"] for fileMeta in files if fileMeta["LFN"] in noSource]

    if toReschedule:
      self.log.info( "Found %s files to reschedule" % len( toReschedule ) )
      for fileID in toReschedule:
        res = self.transferDB.setFileToReschedule( fileID )
        if not res["OK"]:
          self.log.error( "Failed to update Channel table for failed files.", res["Message"] )
        elif res["Value"] == "max reschedule attempt reached":
          self.log.error( "setting Channel status to 'Failed' : " % res["Value"] )
          res = self.transferDB.setFileChannelStatus( channelID, fileID, 'Failed' )
          if not res["OK"]:
            self.log.error( "Failed to update Channel table for failed files.", res["Message"] )

    #########################################################################
    #  Submit the FTS request and retrieve the FTS GUID/Server
    self.log.info( 'Submitting the FTS request' )
    res = oFTSRequest.submit()
    if not res['OK']:
      errStr = "FTSSubmitAgent.submit: %s" % res['Message']
      self.log.error( errStr )
      self.log.info( 'Updating the Channel table for files to retry' )
      res = self.transferDB.resetFileChannelStatus( channelID, fileIDs )
      if not res['OK']:
        self.log.error( 'Failed to update the Channel table for file to retry.', res['Message'] )
      return S_ERROR( errStr )
    ftsGUID = res['Value']['ftsGUID']
    ftsServer = res['Value']['ftsServer']
    nbSubmitted = res['Value']['submittedFiles']
    infoStr = """Submitted FTS Job:

              FTS Guid: %s
              FTS Server: %s
              ChannelID: %s
              SourceSE: %s
              TargetSE: %s
              Files: %s

""" % ( ftsGUID, ftsServer, str( channelID ), sourceSE, targetSE, str( nbSubmitted ) )
    self.log.info( infoStr )

    # # filter out skipped files
    failedFiles = oFTSRequest.getFailed()['Value']
    stagingFiles = oFTSRequest.getStaging()['Value']
    # cache files being staged
    self.filesBeingStaged.setdefault( channelID, set() ).update( stagingFiles )
    submittedFiles = lfns.difference( failedFiles, stagingFiles )
    # files being submitted are staged
    self.filesBeingStaged[channelID] -= submittedFiles
    failedIDs = set( [ meta["FileID"] for meta in files if meta["LFN"] in failedFiles ] )
    stagingIDs = set( [ meta["FileID"] for meta in files if meta["LFN"] in stagingFiles ] )
    # # only submitted
    submittedIDs = set( fileIDs ) - failedIDs - stagingIDs
    # # only count the submitted size
    totalSize = sum( [ meta["Size"] for meta in files if meta["FileID"] in submittedIDs ] )

    #########################################################################
    #  Insert the FTS Req details and add the number of files and size
    res = self.transferDB.insertFTSReq( ftsGUID, ftsServer, channelID )
    if not res['OK']:
      errStr = "FTSSubmitAgent.%s" % res['Message']
      self.log.error( errStr )
      return S_ERROR( errStr )
    ftsReqID = res['Value']
    self.log.info( 'Obtained FTS RequestID %s' % ftsReqID )
    res = self.transferDB.setFTSReqAttribute( ftsReqID, 'SourceSE', sourceSE )
    if not res['OK']:
      self.log.error( "Failed to set SourceSE for FTSRequest", res['Message'] )
    res = self.transferDB.setFTSReqAttribute( ftsReqID, 'TargetSE', targetSE )
    if not res['OK']:
      self.log.error( "Failed to set TargetSE for FTSRequest", res['Message'] )
    res = self.transferDB.setFTSReqAttribute( ftsReqID, 'NumberOfFiles', len( submittedIDs ) )
    if not res['OK']:
      self.log.error( "Failed to set NumberOfFiles for FTSRequest", res['Message'] )
    res = self.transferDB.setFTSReqAttribute( ftsReqID, 'TotalSize', totalSize )
    if not res['OK']:
      self.log.error( "Failed to set TotalSize for FTSRequest", res['Message'] )

    #########################################################################
    #  Insert the submission event in the FTSReqLogging table
    event = 'Submitted'
    res = self.transferDB.addLoggingEvent( ftsReqID, event )
    if not res['OK']:
      errStr = "FTSSubmitAgent.%s" % res['Message']
      self.log.error( errStr )

    #########################################################################
    #  Insert the FileToFTS details and remove the files from the channel
    self.log.info( 'Setting the files as Executing in the Channel table' )
    res = self.transferDB.setChannelFilesExecuting( channelID, list( submittedIDs ) )
    if not res['OK']:
      self.log.error( 'Failed to update the Channel tables for files.', res['Message'] )

    lfns = []
    fileToFTSFileAttributes = []
    for fileMeta in files:
      fileID = fileMeta['FileID']
      # Staging is not an error case
      if fileID not in stagingIDs:
        lfns.append( fileMeta['LFN'] )
        fileToFTSFileAttributes.append( ( fileID, fileIDSizes[fileID] ) )

    self.log.info( 'Populating the FileToFTS table with file information' )
    res = self.transferDB.setFTSReqFiles( ftsReqID, channelID, fileToFTSFileAttributes )
    if not res['OK']:
      self.log.error( 'Failed to populate the FileToFTS table with files.' )
Ejemplo n.º 26
0
class ReplicationScheduler(AgentModule):
    def initialize(self):

        self.section = PathFinder.getAgentSection(AGENT_NAME)
        self.RequestDB = RequestDBMySQL()
        self.TransferDB = TransferDB()
        self.DataLog = DataLoggingClient()
        self.factory = StorageFactory()
        self.rm = ReplicaManager()

        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption('shifterProxy', 'DataManager')

        return S_OK()

    def execute(self):
        """ The main agent execution method """

        # This allows dynamic changing of the throughput timescale
        self.throughputTimescale = self.am_getOption('ThroughputTimescale',
                                                     3600)
        self.throughputTimescale = 60 * 60 * 1
        #print 'ThroughputTimescale:',self.throughputTimescale
        ######################################################################################
        #
        #  Obtain information on the current state of the channel queues
        #

        res = self.TransferDB.getChannelQueues()
        if not res['OK']:
            errStr = "ReplicationScheduler._execute: Failed to get channel queues from TransferDB."
            gLogger.error(errStr, res['Message'])
            return S_OK()
        if not res['Value']:
            gLogger.info(
                "ReplicationScheduler._execute: No active channels found for replication."
            )
            return S_OK()
        channels = res['Value']

        res = self.TransferDB.getChannelObservedThroughput(
            self.throughputTimescale)
        if not res['OK']:
            errStr = "ReplicationScheduler._execute: Failed to get observed throughput from TransferDB."
            gLogger.error(errStr, res['Message'])
            return S_OK()
        if not res['Value']:
            gLogger.info(
                "ReplicationScheduler._execute: No active channels found for replication."
            )
            return S_OK()
        bandwidths = res['Value']

        self.strategyHandler = StrategyHandler(bandwidths, channels,
                                               self.section)

        processedRequests = []
        requestsPresent = True
        while requestsPresent:

            ######################################################################################
            #
            #  The first step is to obtain a transfer request from the RequestDB which should be scheduled.
            #

            gLogger.info(
                "ReplicationScheduler._execute: Contacting RequestDB for suitable requests."
            )
            res = self.RequestDB.getRequest('transfer')
            if not res['OK']:
                gLogger.error(
                    "ReplicationScheduler._execute: Failed to get a request list from RequestDB.",
                    res['Message'])
                continue
            if not res['Value']:
                gLogger.info(
                    "ReplicationScheduler._execute: No requests found in RequestDB."
                )
                requestsPresent = False
                return S_OK()
            requestString = res['Value']['RequestString']
            requestName = res['Value']['RequestName']
            gLogger.info(
                "ReplicationScheduler._execute: Obtained Request %s from RequestDB."
                % (requestName))

            ######################################################################################
            #
            #  The request must then be parsed to obtain the sub-requests, their attributes and files.
            #

            logStr = 'ReplicationScheduler._execute: Parsing Request %s.' % (
                requestName)
            gLogger.info(logStr)
            oRequest = RequestContainer(requestString)
            res = oRequest.getAttribute('RequestID')
            if not res['OK']:
                gLogger.error(
                    'ReplicationScheduler._execute: Failed to get requestID.',
                    res['Message'])
                return S_ERROR(
                    'ReplicationScheduler._execute: Failed to get number of sub-requests.'
                )
            requestID = res['Value']
            if requestID in processedRequests:
                # Break the loop once we have iterated once over all requests
                res = self.RequestDB.updateRequest(requestName, requestString)
                if not res['OK']:
                    gLogger.error("Failed to update request",
                                  "%s %s" % (requestName, res['Message']))
                return S_OK()

            processedRequests.append(requestID)

            res = oRequest.getNumSubRequests('transfer')
            if not res['OK']:
                gLogger.error(
                    'ReplicationScheduler._execute: Failed to get number of sub-requests.',
                    res['Message'])
                return S_ERROR(
                    'ReplicationScheduler._execute: Failed to get number of sub-requests.'
                )
            numberRequests = res['Value']
            gLogger.info(
                "ReplicationScheduler._execute: '%s' found with %s sub-requests."
                % (requestName, numberRequests))

            ######################################################################################
            #
            #  The important request attributes are the source and target SEs.
            #

            for ind in range(numberRequests):
                gLogger.info(
                    "ReplicationScheduler._execute: Treating sub-request %s from '%s'."
                    % (ind, requestName))
                attributes = oRequest.getSubRequestAttributes(
                    ind, 'transfer')['Value']
                if attributes['Status'] != 'Waiting':
                    #  If the sub-request is already in terminal state
                    gLogger.info(
                        "ReplicationScheduler._execute: Sub-request %s is status '%s' and  not to be executed."
                        % (ind, attributes['Status']))
                    continue

                sourceSE = attributes['SourceSE']
                targetSE = attributes['TargetSE']
                """ This section should go in the transfer request class """
                if type(targetSE) in types.StringTypes:
                    if re.search(',', targetSE):
                        targetSEs = targetSE.split(',')
                    else:
                        targetSEs = [targetSE]
                """----------------------------------------------------- """
                operation = attributes['Operation']
                reqRepStrategy = None
                if operation in self.strategyHandler.getSupportedStrategies():
                    reqRepStrategy = operation

                ######################################################################################
                #
                # Then obtain the file attribute of interest are the  LFN and FileID
                #

                res = oRequest.getSubRequestFiles(ind, 'transfer')
                if not res['OK']:
                    gLogger.error(
                        'ReplicationScheduler._execute: Failed to obtain sub-request files.',
                        res['Message'])
                    continue
                files = res['Value']
                gLogger.info(
                    "ReplicationScheduler._execute: Sub-request %s found with %s files."
                    % (ind, len(files)))
                filesDict = {}
                for file in files:
                    lfn = file['LFN']
                    if file['Status'] != 'Waiting':
                        gLogger.debug(
                            "ReplicationScheduler._execute: %s will not be scheduled because it is %s."
                            % (lfn, file['Status']))
                    else:
                        fileID = file['FileID']
                        filesDict[lfn] = fileID
                if not filesDict:
                    gLogger.info(
                        "ReplicationScheduler._execute: No Waiting files found for request"
                    )
                    continue
                notSched = len(files) - len(filesDict)
                if notSched:
                    gLogger.info(
                        "ReplicationScheduler._execute: %d files found not Waiting"
                        % notSched)

                ######################################################################################
                #
                #  Now obtain replica information for the files associated to the sub-request.
                #

                lfns = filesDict.keys()
                gLogger.info(
                    "ReplicationScheduler._execute: Obtaining replica information for %d sub-request files."
                    % len(lfns))
                res = self.rm.getCatalogReplicas(lfns)
                if not res['OK']:
                    gLogger.error(
                        "ReplicationScheduler._execute: Failed to get replica information.",
                        res['Message'])
                    continue
                for lfn, failure in res['Value']['Failed'].items():
                    gLogger.error(
                        "ReplicationScheduler._execute: Failed to get replicas.",
                        '%s: %s' % (lfn, failure))
                replicas = res['Value']['Successful']
                if not replicas.keys():
                    gLogger.error(
                        "ReplicationScheduler._execute: Failed to get replica information for all files."
                    )
                    continue

                ######################################################################################
                #
                #  Now obtain the file sizes for the files associated to the sub-request.
                #

                lfns = replicas.keys()
                gLogger.info(
                    "ReplicationScheduler._execute: Obtaining file sizes for %d sub-request files."
                    % len(lfns))
                res = self.rm.getCatalogFileMetadata(lfns)
                if not res['OK']:
                    gLogger.error(
                        "ReplicationScheduler._execute: Failed to get file size information.",
                        res['Message'])
                    continue
                for lfn, failure in res['Value']['Failed'].items():
                    gLogger.error(
                        'ReplicationScheduler._execute: Failed to get file size.',
                        '%s: %s' % (lfn, failure))
                metadata = res['Value']['Successful']
                if not metadata.keys():
                    gLogger.error(
                        "ReplicationScheduler._execute: Failed to get metadata for all files."
                    )
                    continue

                ######################################################################################
                #
                # For each LFN determine the replication tree
                #

                for lfn in sortList(metadata.keys()):
                    fileSize = metadata[lfn]['Size']
                    lfnReps = replicas[lfn]
                    fileID = filesDict[lfn]

                    targets = []
                    for targetSE in targetSEs:
                        if targetSE in lfnReps.keys():
                            gLogger.debug(
                                "ReplicationScheduler.execute: %s already present at %s."
                                % (lfn, targetSE))
                        else:
                            targets.append(targetSE)
                    if not targets:
                        gLogger.info(
                            "ReplicationScheduler.execute: %s present at all targets."
                            % lfn)
                        oRequest.setSubRequestFileAttributeValue(
                            ind, 'transfer', lfn, 'Status', 'Done')
                        continue
                    if not lfnReps:
                        gLogger.error(
                            "ReplicationScheduler.execute: The file has no replicas.",
                            lfn)
                        continue
                    res = self.strategyHandler.determineReplicationTree(
                        sourceSE,
                        targets,
                        lfnReps,
                        fileSize,
                        strategy=reqRepStrategy)
                    if not res['OK']:
                        gLogger.error(
                            "ReplicationScheduler.execute: Failed to determine replication tree.",
                            res['Message'])
                        continue
                    tree = res['Value']

                    ######################################################################################
                    #
                    # For each item in the replication tree obtain the source and target SURLS
                    #

                    for channelID, dict in tree.items():
                        gLogger.info(
                            "ReplicationScheduler.execute: processing for channel %d %s"
                            % (channelID, str(dict)))
                        hopSourceSE = dict['SourceSE']
                        hopDestSE = dict['DestSE']
                        hopAncestor = dict['Ancestor']

                        # Get the sourceSURL
                        if hopAncestor:
                            status = 'Waiting%s' % (hopAncestor)
                            res = self.obtainLFNSURL(hopSourceSE, lfn)
                            if not res['OK']:
                                errStr = res['Message']
                                gLogger.error(errStr)
                                return S_ERROR(errStr)
                            sourceSURL = res['Value']
                        else:
                            status = 'Waiting'
                            res = self.resolvePFNSURL(hopSourceSE,
                                                      lfnReps[hopSourceSE])
                            if not res['OK']:
                                sourceSURL = lfnReps[hopSourceSE]
                            else:
                                sourceSURL = res['Value']

                        # Get the targetSURL
                        res = self.obtainLFNSURL(hopDestSE, lfn)
                        if not res['OK']:
                            errStr = res['Message']
                            gLogger.error(errStr)
                            return S_ERROR(errStr)
                        targetSURL = res['Value']

                        ######################################################################################
                        #
                        # For each item in the replication tree add the file to the channel
                        #
                        res = self.TransferDB.addFileToChannel(
                            channelID,
                            fileID,
                            hopSourceSE,
                            sourceSURL,
                            hopDestSE,
                            targetSURL,
                            fileSize,
                            fileStatus=status)
                        if not res['OK']:
                            errStr = res['Message']
                            gLogger.error(
                                "ReplicationScheduler._execute: Failed to add File to Channel.",
                                "%s %s" % (fileID, channelID))
                            return S_ERROR(errStr)
                        res = self.TransferDB.addFileRegistration(
                            channelID, fileID, lfn, targetSURL, hopDestSE)
                        if not res['OK']:
                            errStr = res['Message']
                            gLogger.error(
                                "ReplicationScheduler._execute: Failed to add File registration.",
                                "%s %s" % (fileID, channelID))
                            result = self.TransferDB.removeFileFromChannel(
                                channelID, fileID)
                            if not result['OK']:
                                errStr += result['Message']
                                gLogger.error(
                                    "ReplicationScheduler._execute: Failed to remove File.",
                                    "%s %s" % (fileID, channelID))
                            return S_ERROR(errStr)
                        oRequest.setSubRequestFileAttributeValue(
                            ind, 'transfer', lfn, 'Status', 'Scheduled')
                    res = self.TransferDB.addReplicationTree(fileID, tree)

                if oRequest.isSubRequestEmpty(ind, 'transfer')['Value']:
                    oRequest.setSubRequestStatus(ind, 'transfer', 'Scheduled')

            ################################################
            #  Generate the new request string after operation
            requestString = oRequest.toXML()['Value']
            res = self.RequestDB.updateRequest(requestName, requestString)
            if not res['OK']:
                gLogger.error(
                    "ReplicationScheduler._execute: Failed to update request",
                    "%s %s" % (requestName, res['Message']))

    def obtainLFNSURL(self, targetSE, lfn):
        """ Creates the targetSURL for the storage and LFN supplied
    """
        res = self.factory.getStorages(targetSE, protocolList=['SRM2'])
        if not res['OK']:
            errStr = 'ReplicationScheduler._execute: Failed to create SRM2 storage for %s: %s. ' % (
                targetSE, res['Message'])
            gLogger.error(errStr)
            return S_ERROR(errStr)
        storageObjects = res['Value']['StorageObjects']
        for storageObject in storageObjects:
            res = storageObject.getCurrentURL(lfn)
            if res['OK']:
                return res
        gLogger.error(
            'ReplicationScheduler._execute: Failed to get SRM compliant storage.',
            targetSE)
        return S_ERROR(
            'ReplicationScheduler._execute: Failed to get SRM compliant storage.'
        )

    def resolvePFNSURL(self, sourceSE, pfn):
        """ Creates the targetSURL for the storage and LFN supplied
    """
        res = self.rm.getPfnForProtocol([pfn], sourceSE)
        if not res['OK']:
            return res
        if pfn in res['Value']['Failed'].keys():
            return S_ERROR(res['Value']['Failed'][pfn])
        return S_OK(res['Value']['Successful'][pfn])