def __transferIfNotRegistered( self, file, transferDict ):
   result = self.isRegisteredInOutputCatalog( file, transferDict )
   if not result[ 'OK' ]:
     self.log.error( result[ 'Message' ] )
     return result
   #Already registered. Need to delete
   if result[ 'Value' ]:
     self.log.info( "Transfer file %s is already registered in the output catalog" % file )
     #Delete
     filePath = os.path.join( transferDict[ 'InputPath' ], file )
     if transferDict[ 'InputFC' ] == 'LocalDisk':
       os.unlink( filePath )
     #FIXME: what is inFile supposed to be ??
     else:
       inputFC = FileCatalog( [ transferDict['InputFC'] ] )
       replicaDict = inputFC.getReplicas( filePath )
       if not replicaDict['OK']:
         self.log.error( "Error deleting file", replicaDict['Message'] )
       elif not inFile in replicaDict['Value']['Successful']:
         self.log.error( "Error deleting file", replicaDict['Value']['Failed'][inFile] )
       else:
         seList = replicaDict['Value']['Successful'][inFile].keys()
         for se in seList:
           se = StorageElement( se )
           self.log.info( 'Removing from %s:' % se.name, inFile )
           se.removeFile( inFile )
         inputFC.removeFile( file )
     self.log.info( "File %s deleted from %s" % ( file, transferDict[ 'InputFC' ] ) )
     self.__processingFiles.discard( file )
     return S_OK( file )
   #Do the transfer
   return self.__retrieveAndUploadFile( file, transferDict )
Example #2
0
def main():
    # Registering arguments will automatically add their description to the help menu
    Script.registerArgument(
        ("LFN:      LFN",
         "File:     File name containing a list of affected LFNs"))
    Script.registerArgument(" SE:       Name of Storage Element")
    Script.registerArgument(" Status:   New Status for the replica")
    Script.parseCommandLine(ignoreErrors=False)

    import DIRAC
    from DIRAC import gLogger
    from DIRAC.Resources.Catalog.FileCatalog import FileCatalog
    import os

    # parseCommandLine show help when mandatory arguments are not specified or incorrect argument
    inputFileName, storageElement, status = Script.getPositionalArgs(
        group=True)

    if os.path.exists(inputFileName):
        inputFile = open(inputFileName, "r")
        string = inputFile.read()
        inputFile.close()
        lfns = sorted(string.splitlines())
    else:
        lfns = [inputFileName]

    fc = FileCatalog()

    res = fc.getReplicas(lfns, allStatus=True)
    if not res["OK"]:
        gLogger.error("Failed to get catalog replicas.", res["Message"])
        DIRAC.exit(-1)
    lfnDict = {}
    for lfn, error in res["Value"]["Failed"].items():
        gLogger.error("Failed to get replicas for file.",
                      "%s:%s" % (lfn, error))
    for lfn, replicas in res["Value"]["Successful"].items():
        if storageElement not in replicas.keys():
            gLogger.error("LFN not registered at provided storage element.",
                          "%s %s" % (lfn, storageElement))
        else:
            lfnDict[lfn] = {
                "SE": storageElement,
                "PFN": replicas[storageElement],
                "Status": status
            }
    if not lfnDict:
        gLogger.error("No files found at the supplied storage element.")
        DIRAC.exit(2)

    res = fc.setReplicaStatus(lfnDict)
    if not res["OK"]:
        gLogger.error("Failed to set catalog replica status.", res["Message"])
        DIRAC.exit(-1)
    for lfn, error in res["Value"]["Failed"].items():
        gLogger.error("Failed to set replica status for file.",
                      "%s:%s" % (lfn, error))
    gLogger.notice("Successfully updated the status of %d files at %s." %
                   (len(res["Value"]["Successful"].keys()), storageElement))
    DIRAC.exit(0)
def main():
    Script.parseCommandLine(ignoreErrors=False)

    import DIRAC
    from DIRAC import gLogger
    from DIRAC.Resources.Catalog.FileCatalog import FileCatalog
    import os

    args = Script.getPositionalArgs()
    if not len(args) == 3:
        Script.showHelp()

    inputFileName = args[0]
    storageElement = args[1]
    status = args[2]

    if os.path.exists(inputFileName):
        inputFile = open(inputFileName, 'r')
        string = inputFile.read()
        inputFile.close()
        lfns = sorted(string.splitlines())
    else:
        lfns = [inputFileName]

    fc = FileCatalog()

    res = fc.getReplicas(lfns, allStatus=True)
    if not res['OK']:
        gLogger.error("Failed to get catalog replicas.", res['Message'])
        DIRAC.exit(-1)
    lfnDict = {}
    for lfn, error in res['Value']['Failed'].items():
        gLogger.error("Failed to get replicas for file.",
                      "%s:%s" % (lfn, error))
    for lfn, replicas in res['Value']['Successful'].items():
        if storageElement not in replicas.keys():
            gLogger.error("LFN not registered at provided storage element.",
                          "%s %s" % (lfn, storageElement))
        else:
            lfnDict[lfn] = {
                'SE': storageElement,
                'PFN': replicas[storageElement],
                'Status': status
            }
    if not lfnDict:
        gLogger.error("No files found at the supplied storage element.")
        DIRAC.exit(2)

    res = fc.setReplicaStatus(lfnDict)
    if not res['OK']:
        gLogger.error("Failed to set catalog replica status.", res['Message'])
        DIRAC.exit(-1)
    for lfn, error in res['Value']['Failed'].items():
        gLogger.error("Failed to set replica status for file.",
                      "%s:%s" % (lfn, error))
    gLogger.notice("Successfully updated the status of %d files at %s." %
                   (len(res['Value']['Successful'].keys()), storageElement))
    DIRAC.exit(0)
Example #4
0
  def __call__( self ):
    """ action for 'removeFile' operation  """
    # # get waiting files
    waitingFiles = self.getWaitingFilesList()
    fc = FileCatalog( self.operation.catalogList )

    res = fc.getReplicas( [wf.LFN for wf in waitingFiles] )
    if not res['OK']:
      gMonitor.addMark( "RemoveFileAtt" )
      gMonitor.addMark( "RemoveFileFail" )
      return res

    # We check the status of the SE from the LFN that are successful
    # No idea what to do with the others...
    succ = res['Value']['Successful']
    targetSEs = set( [se for lfn in succ for se in succ[lfn] ] )

    if targetSEs:
      bannedTargets = self.checkSEsRSS( targetSEs, access = 'RemoveAccess' )
      if not bannedTargets['OK']:
        gMonitor.addMark( "RemoveFileAtt" )
        gMonitor.addMark( "RemoveFileFail" )
        return bannedTargets

      if bannedTargets['Value']:
        return S_OK( "%s targets are banned for removal" % ",".join( bannedTargets['Value'] ) )

    # # prepare waiting file dict
    toRemoveDict = dict( [ ( opFile.LFN, opFile ) for opFile in waitingFiles ] )
    gMonitor.addMark( "RemoveFileAtt", len( toRemoveDict ) )

    # # 1st step - bulk removal
    self.log.debug( "bulk removal of %s files" % len( toRemoveDict ) )
    bulkRemoval = self.bulkRemoval( toRemoveDict )
    if not bulkRemoval["OK"]:
      self.log.error( "Bulk file removal failed", bulkRemoval["Message"] )
    else:
      gMonitor.addMark( "RemoveFileOK", len( toRemoveDict ) - len( bulkRemoval["Value"] ) )
      toRemoveDict = bulkRemoval["Value"]

    # # 2nd step - single file removal
    for lfn, opFile in toRemoveDict.items():
      self.log.info( "removing single file %s" % lfn )
      singleRemoval = self.singleRemoval( opFile )
      if not singleRemoval["OK"]:
        self.log.error( 'Error removing single file', singleRemoval["Message"] )
        gMonitor.addMark( "RemoveFileFail", 1 )
      else:
        self.log.info( "file %s has been removed" % lfn )
        gMonitor.addMark( "RemoveFileOK", 1 )

    # # set
    failedFiles = [ ( lfn, opFile ) for ( lfn, opFile ) in toRemoveDict.items()
                    if opFile.Status in ( "Failed", "Waiting" ) ]
    if failedFiles:
      self.operation.Error = "failed to remove %d files" % len( failedFiles )

    return S_OK()
 def isRegisteredInOutputCatalog( self, file, transferDict ):
   fc = FileCatalog( [ transferDict[ 'OutputFC' ] ] )
   lfn = os.path.join( transferDict['OutputPath'], os.path.basename( file ) )
   result = fc.getReplicas( lfn )
   if not result[ 'OK' ]:
     return result
   if lfn not in result[ 'Value' ][ 'Successful' ]:
     return S_OK( False )
   replicas = result[ 'Value' ][ 'Successful' ][ lfn ]
   for seName in List.fromChar( transferDict[ 'OutputSE' ], "," ):
     if seName in replicas:
       self.log.verbose( "Transfer file %s is already registered in %s SE" % ( file, seName ) )
       return S_OK( True )
   return S_OK( False )
Example #6
0
 def isRegisteredInOutputCatalog(self, file, transferDict):
     fc = FileCatalog([transferDict['OutputFC']])
     lfn = os.path.join(transferDict['OutputPath'], os.path.basename(file))
     result = fc.getReplicas(lfn)
     if not result['OK']:
         return result
     if lfn not in result['Value']['Successful']:
         return S_OK(False)
     replicas = result['Value']['Successful'][lfn]
     for seName in List.fromChar(transferDict['OutputSE'], ","):
         if seName in replicas:
             self.log.verbose(
                 "Transfer file %s is already registered in %s SE" %
                 (file, seName))
             return S_OK(True)
     return S_OK(False)
Example #7
0
def main():
    # Registering arguments will automatically add their description to the help menu
    Script.registerArgument(("LocalFile: Path to local file containing LFNs",
                             "LFN:       Logical File Name"))
    Script.registerArgument(" SE:        Storage Element")
    Script.registerArgument(" status:    status")
    Script.parseCommandLine()

    from DIRAC.Resources.Catalog.FileCatalog import FileCatalog

    catalog = FileCatalog()
    import os

    # parseCommandLine show help when mandatory arguments are not specified or incorrect argument
    inputFileName, se, newStatus = Script.getPositionalArgs(group=True)

    if os.path.exists(inputFileName):
        inputFile = open(inputFileName, "r")
        string = inputFile.read()
        lfns = string.splitlines()
        inputFile.close()
    else:
        lfns = [inputFileName]

    res = catalog.getReplicas(lfns, True)
    if not res["OK"]:
        print(res["Message"])
        DIRACExit(-1)
    replicas = res["Value"]["Successful"]

    lfnDict = {}
    for lfn in lfns:
        lfnDict[lfn] = {}
        lfnDict[lfn]["SE"] = se
        lfnDict[lfn]["Status"] = newStatus
        lfnDict[lfn]["PFN"] = replicas[lfn][se]

    res = catalog.setReplicaStatus(lfnDict)
    if not res["OK"]:
        print("ERROR:", res["Message"])
    if res["Value"]["Failed"]:
        print("Failed to update %d replica status" %
              len(res["Value"]["Failed"]))
    if res["Value"]["Successful"]:
        print("Successfully updated %d replica status" %
              len(res["Value"]["Successful"]))
def main():
  Script.parseCommandLine()

  from DIRAC.Resources.Catalog.FileCatalog import FileCatalog
  catalog = FileCatalog()
  import os
  args = Script.getPositionalArgs()
  if not len(args) == 3:
    Script.showHelp(exitCode=1)
  else:
    inputFileName = args[0]
    se = args[1]
    newStatus = args[2]

  if os.path.exists(inputFileName):
    inputFile = open(inputFileName, 'r')
    string = inputFile.read()
    lfns = string.splitlines()
    inputFile.close()
  else:
    lfns = [inputFileName]

  res = catalog.getReplicas(lfns, True)
  if not res['OK']:
    print(res['Message'])
    DIRACExit(-1)
  replicas = res['Value']['Successful']

  lfnDict = {}
  for lfn in lfns:
    lfnDict[lfn] = {}
    lfnDict[lfn]['SE'] = se
    lfnDict[lfn]['Status'] = newStatus
    lfnDict[lfn]['PFN'] = replicas[lfn][se]

  res = catalog.setReplicaStatus(lfnDict)
  if not res['OK']:
    print("ERROR:", res['Message'])
  if res['Value']['Failed']:
    print("Failed to update %d replica status" % len(res['Value']['Failed']))
  if res['Value']['Successful']:
    print("Successfully updated %d replica status" % len(res['Value']['Successful']))
Example #9
0
  def __getSEListFromReplicas(self, lfnDict):
    """ Get the SEs which have a replica of the lfn
    @param: self - self reference
    @param: string lfn - lfn for which the replicas are retrieved
    @returns S_ERROR when retrieving replicas failed
             S_OK(SEList) otherwise

    """
    fc = FileCatalog()
    # lfnDict = {lfn : True}
    res = fc.getReplicas(lfnDict)
    if not res['OK']:
      self.log.debug("readFederation.__compareFileListWithCatalog: Completely failed to get Replicas")
      return S_ERROR("getReplicas: %s" % res['Message'])
    
    res = res['Value']
    # if not lfn in res['Successful']:
    #   self.log.debug("readFederation.__compareFileListWithCatalog: Failed to get Replicas")
    #   return S_ERROR("getReplicas: %s" % res['Failed'][lfn])
    
    # we have a list of replicas for a given LFN. SEList contains all the SE
    # that store that file according to the catalog
    return res.get('Successful', None)
Example #10
0
 def __transferIfNotRegistered(self, file, transferDict):
     result = self.isRegisteredInOutputCatalog(file, transferDict)
     if not result['OK']:
         self.log.error(result['Message'])
         return result
     #Already registered. Need to delete
     if result['Value']:
         self.log.info(
             "Transfer file %s is already registered in the output catalog"
             % file)
         #Delete
         filePath = os.path.join(transferDict['InputPath'], file)
         if transferDict['InputFC'] == 'LocalDisk':
             os.unlink(filePath)
         else:
             inputFC = FileCatalog([transferDict['InputFC']])
             replicaDict = inputFC.getReplicas(filePath)
             if not replicaDict['OK']:
                 self.log.error("Error deleting file",
                                replicaDict['Message'])
             elif not inFile in replicaDict['Value']['Successful']:
                 self.log.error("Error deleting file",
                                replicaDict['Value']['Failed'][inFile])
             else:
                 seList = replicaDict['Value']['Successful'][inFile].keys()
                 for se in seList:
                     se = StorageElement(se)
                     self.log.info('Removing from %s:' % se.name, inFile)
                     se.removeFile(inFile)
                 inputFC.removeFile(file)
         self.log.info("File %s deleted from %s" %
                       (file, transferDict['InputFC']))
         self.__processingFiles.discard(file)
         return S_OK(file)
     #Do the transfer
     return self.__retrieveAndUploadFile(file, transferDict)
class ReplicateAndRegister( DMSRequestOperationsBase ):
  """
  .. class:: ReplicateAndRegister

  ReplicateAndRegister operation handler
  """

  def __init__( self, operation = None, csPath = None ):
    """c'tor

    :param self: self reference
    :param Operation operation: Operation instance
    :param str csPath: CS path for this handler
    """
    super( ReplicateAndRegister, self ).__init__( operation, csPath )
    # # own gMonitor stuff for files
    gMonitor.registerActivity( "ReplicateAndRegisterAtt", "Replicate and register attempted",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "ReplicateOK", "Replications successful",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "ReplicateFail", "Replications failed",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "RegisterOK", "Registrations successful",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "RegisterFail", "Registrations failed",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    # # for FTS
    gMonitor.registerActivity( "FTSScheduleAtt", "Files schedule attempted",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "FTSScheduleOK", "File schedule successful",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "FTSScheduleFail", "File schedule failed",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    # # SE cache
    self.seCache = {}

    # Clients
    self.fc = FileCatalog()
    self.ftsClient = FTSClient()

  def __call__( self ):
    """ call me maybe """
    # # check replicas first
    checkReplicas = self.__checkReplicas()
    if not checkReplicas["OK"]:
      self.log.error( checkReplicas["Message"] )
    if hasattr( self, "FTSMode" ) and getattr( self, "FTSMode" ):
      bannedGroups = getattr( self, "FTSBannedGroups" ) if hasattr( self, "FTSBannedGroups" ) else ()
      if self.request.OwnerGroup in bannedGroups:
        self.log.info( "usage of FTS system is banned for request's owner" )
        return self.rmTransfer()
      return self.ftsTransfer()
    return self.rmTransfer()

  def __checkReplicas( self ):
    """ check done replicas and update file states  """
    waitingFiles = dict( [ ( opFile.LFN, opFile ) for opFile in self.operation
                          if opFile.Status in ( "Waiting", "Scheduled" ) ] )
    targetSESet = set( self.operation.targetSEList )

    replicas = self.fc.getReplicas( waitingFiles.keys() )
    if not replicas["OK"]:
      self.log.error( replicas["Message"] )
      return replicas

    reMissing = re.compile( "no such file or directory" )
    for failedLFN, errStr in replicas["Value"]["Failed"].items():
      waitingFiles[failedLFN].Error = errStr
      if reMissing.search( errStr.lower() ):
        self.log.error( "file %s does not exists" % failedLFN )
        gMonitor.addMark( "ReplicateFail", len( targetSESet ) )
        waitingFiles[failedLFN].Status = "Failed"

    for successfulLFN, reps in replicas["Value"]["Successful"].items():
      if targetSESet.issubset( set( reps ) ):
        self.log.info( "file %s has been replicated to all targets" % successfulLFN )
        waitingFiles[successfulLFN].Status = "Done"

    return S_OK()

  def _addMetadataToFiles( self, toSchedule ):
    """ Add metadata to those files that need to be scheduled through FTS

        toSchedule is a dictionary:
        {'lfn1': [opFile, validReplicas, validTargets], 'lfn2': [opFile, validReplicas, validTargets]}
    """
    if toSchedule:
      self.log.info( "found %s files to schedule, getting metadata from FC" % len( toSchedule ) )
      lfns = toSchedule.keys()
    else:
      self.log.info( "No files to schedule" )
      return S_OK()

    res = self.fc.getFileMetadata( lfns )
    if not res['OK']:
      return res
    else:
      if res['Value']['Failed']:
        self.log.warn( "Can't schedule %d files: problems getting the metadata: %s" % ( len( res['Value']['Failed'] ),
                                                                                ', '.join( res['Value']['Failed'] ) ) )
      metadata = res['Value']['Successful']

    filesToScheduleList = []

    for lfnsToSchedule, lfnMetadata in metadata.items():
      opFileToSchedule = toSchedule[lfnsToSchedule][0]
      opFileToSchedule.GUID = lfnMetadata['GUID']
      opFileToSchedule.Checksum = metadata[lfnsToSchedule]['Checksum']
      opFileToSchedule.ChecksumType = metadata[lfnsToSchedule]['CheckSumType']
      opFileToSchedule.Size = metadata[lfnsToSchedule]['Size']

      filesToScheduleList.append( ( opFileToSchedule.toJSON()['Value'],
                                    toSchedule[lfnsToSchedule][1],
                                    toSchedule[lfnsToSchedule][2] ) )

    return S_OK( filesToScheduleList )



  def _filterReplicas( self, opFile ):
    """ filter out banned/invalid source SEs """
    return filterReplicas( opFile, logger = self.log, dataManager = self.dm, seCache = self.seCache )

  def ftsTransfer( self ):
    """ replicate and register using FTS """

    self.log.info( "scheduling files in FTS..." )

    bannedTargets = self.checkSEsRSS()
    if not bannedTargets['OK']:
      gMonitor.addMark( "FTSScheduleAtt" )
      gMonitor.addMark( "FTSScheduleFail" )
      return bannedTargets

    if bannedTargets['Value']:
      return S_OK( "%s targets are banned for writing" % ",".join( bannedTargets['Value'] ) )

    # Can continue now
    self.log.verbose( "No targets banned for writing" )

    toSchedule = {}

    for opFile in self.getWaitingFilesList():
      opFile.Error = ''
      gMonitor.addMark( "FTSScheduleAtt" )
      # # check replicas
      replicas = self._filterReplicas( opFile )
      if not replicas["OK"]:
        continue
      replicas = replicas["Value"]

      validReplicas = replicas["Valid"]
      bannedReplicas = replicas["Banned"]
      noReplicas = replicas['NoReplicas']
      badReplicas = replicas['Bad']
      noPFN = replicas['NoPFN']

      if not validReplicas:
        gMonitor.addMark( "FTSScheduleFail" )
        if bannedReplicas:
          self.log.warn( "unable to schedule '%s', replicas only at banned SEs" % opFile.LFN )
        elif noReplicas:
          self.log.error( "unable to schedule %s, file doesn't exist" % opFile.LFN )
          opFile.Error = 'No replicas found'
          opFile.Status = 'Failed'
        elif badReplicas:
          self.log.error( "unable to schedule %s, all replicas have a bad checksum" % opFile.LFN )
          opFile.Error = 'All replicas have a bad checksum'
          opFile.Status = 'Failed'
        elif noPFN:
          self.log.warn( "unable to schedule %s, could not get a PFN" % opFile.LFN )

      else:
        validTargets = list( set( self.operation.targetSEList ) - set( validReplicas ) )
        if not validTargets:
          self.log.info( "file %s is already present at all targets" % opFile.LFN )
          opFile.Status = "Done"
        else:
          toSchedule[opFile.LFN] = [ opFile, validReplicas, validTargets ]

    res = self._addMetadataToFiles( toSchedule )
    if not res['OK']:
      return res
    else:
      filesToScheduleList = res['Value']


    if filesToScheduleList:

      ftsSchedule = self.ftsClient.ftsSchedule( self.request.RequestID,
                                                self.operation.OperationID,
                                                filesToScheduleList )
      if not ftsSchedule["OK"]:
        self.log.error( ftsSchedule["Message"] )
        return ftsSchedule

      # might have nothing to schedule
      ftsSchedule = ftsSchedule["Value"]
      if not ftsSchedule:
        return S_OK()

      for fileID in ftsSchedule["Successful"]:
        gMonitor.addMark( "FTSScheduleOK", 1 )
        for opFile in self.operation:
          if fileID == opFile.FileID:
            opFile.Status = "Scheduled"
            self.log.debug( "%s has been scheduled for FTS" % opFile.LFN )
      self.log.info( "%d files have been scheduled to FTS" % len( ftsSchedule['Successful'] ) )

      for fileID in ftsSchedule["Failed"]:
        gMonitor.addMark( "FTSScheduleFail", 1 )
        for opFile in self.operation:
          if fileID == opFile.FileID:
            opFile.Error = ftsSchedule["Failed"][fileID]
            if 'sourceSURL equals to targetSURL' in opFile.Error:
              # In this case there is no need to continue
              opFile.Status = 'Failed'
            self.log.warn( "unable to schedule %s for FTS: %s" % ( opFile.LFN, opFile.Error ) )
    else:
      self.log.info( "No files to schedule after metadata checks" )

    # Just in case some transfers could not be scheduled, try them with RM
    return self.rmTransfer( fromFTS = True )

  def rmTransfer( self, fromFTS = False ):
    """ replicate and register using dataManager  """
    # # get waiting files. If none just return
    waitingFiles = self.getWaitingFilesList()
    if not waitingFiles:
      return S_OK()
    if fromFTS:
      self.log.info( "Trying transfer using replica manager as FTS failed" )
    else:
      self.log.info( "Transferring files using Data manager..." )
    # # source SE
    sourceSE = self.operation.SourceSE if self.operation.SourceSE else None
    if sourceSE:
      # # check source se for read
      sourceRead = self.rssSEStatus( sourceSE, "ReadAccess" )
      if not sourceRead["OK"]:
        self.log.info( sourceRead["Message"] )
        for opFile in self.operation:
          opFile.Error = sourceRead["Message"]
        self.operation.Error = sourceRead["Message"]
        gMonitor.addMark( "ReplicateAndRegisterAtt", len( self.operation ) )
        gMonitor.addMark( "ReplicateFail", len( self.operation ) )
        return sourceRead

      if not sourceRead["Value"]:
        self.operation.Error = "SourceSE %s is banned for reading" % sourceSE
        self.log.info( self.operation.Error )
        return S_OK( self.operation.Error )

    # # check targetSEs for write
    bannedTargets = self.checkSEsRSS()
    if not bannedTargets['OK']:
      gMonitor.addMark( "ReplicateAndRegisterAtt", len( self.operation ) )
      gMonitor.addMark( "ReplicateFail", len( self.operation ) )
      return bannedTargets

    if bannedTargets['Value']:
      return S_OK( "%s targets are banned for writing" % ",".join( bannedTargets['Value'] ) )

    # Can continue now
    self.log.verbose( "No targets banned for writing" )

    # # loop over files
    for opFile in waitingFiles:

      gMonitor.addMark( "ReplicateAndRegisterAtt", 1 )
      opFile.Error = ''
      lfn = opFile.LFN

      # Check if replica is at the specified source
      replicas = self._filterReplicas( opFile )
      if not replicas["OK"]:
        self.log.error( replicas["Message"] )
        continue
      replicas = replicas["Value"]
      if not replicas["Valid"]:
        self.log.warn( "unable to find valid replicas for %s" % lfn )
        continue
      # # get the first one in the list
      if sourceSE not in replicas['Valid']:
        if sourceSE:
          self.log.warn( "%s is not at specified sourceSE %s, changed to %s" % ( lfn, sourceSE, replicas["Valid"][0] ) )
        sourceSE = replicas["Valid"][0]

      # # loop over targetSE
      catalog = self.operation.Catalog
      for targetSE in self.operation.targetSEList:

        # # call DataManager
        if targetSE == sourceSE:
          self.log.warn( "Request to replicate %s to the source SE: %s" % ( lfn, sourceSE ) )
          continue
        res = self.dm.replicateAndRegister( lfn, targetSE, sourceSE = sourceSE, catalog = catalog )
        if res["OK"]:

          if lfn in res["Value"]["Successful"]:

            if "replicate" in res["Value"]["Successful"][lfn]:

              repTime = res["Value"]["Successful"][lfn]["replicate"]
              prString = "file %s replicated at %s in %s s." % ( lfn, targetSE, repTime )

              gMonitor.addMark( "ReplicateOK", 1 )

              if "register" in res["Value"]["Successful"][lfn]:

                gMonitor.addMark( "RegisterOK", 1 )
                regTime = res["Value"]["Successful"][lfn]["register"]
                prString += ' and registered in %s s.' % regTime
                self.log.info( prString )
              else:

                gMonitor.addMark( "RegisterFail", 1 )
                prString += " but failed to register"
                self.log.warn( prString )

                opFile.Error = "Failed to register"
                # # add register replica operation
                registerOperation = self.getRegisterOperation( opFile, targetSE )
                self.request.insertAfter( registerOperation, self.operation )

            else:

              self.log.error( "failed to replicate %s to %s." % ( lfn, targetSE ) )
              gMonitor.addMark( "ReplicateFail", 1 )
              opFile.Error = "Failed to replicate"

          else:

            gMonitor.addMark( "ReplicateFail", 1 )
            reason = res["Value"]["Failed"][lfn]
            self.log.error( "failed to replicate and register file %s at %s:" % ( lfn, targetSE ), reason )
            opFile.Error = reason

        else:

          gMonitor.addMark( "ReplicateFail", 1 )
          opFile.Error = "DataManager error: %s" % res["Message"]
          self.log.error( opFile.Error )

      if not opFile.Error:
        if len( self.operation.targetSEList ) > 1:
          self.log.info( "file %s has been replicated to all targetSEs" % lfn )
        opFile.Status = "Done"


    return S_OK()
Example #12
0
class ReplicateAndRegister(DMSRequestOperationsBase):
  """
  .. class:: ReplicateAndRegister

  ReplicateAndRegister operation handler
  """

  def __init__(self, operation=None, csPath=None):
    """c'tor

    :param self: self reference
    :param Operation operation: Operation instance
    :param str csPath: CS path for this handler
    """
    super(ReplicateAndRegister, self).__init__(operation, csPath)
    # # own gMonitor stuff for files
    gMonitor.registerActivity("ReplicateAndRegisterAtt", "Replicate and register attempted",
                              "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM)
    gMonitor.registerActivity("ReplicateOK", "Replications successful",
                              "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM)
    gMonitor.registerActivity("ReplicateFail", "Replications failed",
                              "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM)
    gMonitor.registerActivity("RegisterOK", "Registrations successful",
                              "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM)
    gMonitor.registerActivity("RegisterFail", "Registrations failed",
                              "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM)
    # # for FTS
    gMonitor.registerActivity("FTSScheduleAtt", "Files schedule attempted",
                              "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM)
    gMonitor.registerActivity("FTSScheduleOK", "File schedule successful",
                              "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM)
    gMonitor.registerActivity("FTSScheduleFail", "File schedule failed",
                              "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM)
    # # SE cache

    # Clients
    self.fc = FileCatalog()

  def __call__(self):
    """ call me maybe """
    # # check replicas first
    checkReplicas = self.__checkReplicas()
    if not checkReplicas["OK"]:
      self.log.error('Failed to check replicas', checkReplicas["Message"])
    if hasattr(self, "FTSMode") and getattr(self, "FTSMode"):
      bannedGroups = getattr(self, "FTSBannedGroups") if hasattr(self, "FTSBannedGroups") else ()
      if self.request.OwnerGroup in bannedGroups:
        self.log.verbose("usage of FTS system is banned for request's owner")
        return self.dmTransfer()

      if getattr(self, 'UseNewFTS3', False):
        return self.fts3Transfer()
      else:
        return self.ftsTransfer()

    return self.dmTransfer()

  def __checkReplicas(self):
    """ check done replicas and update file states  """
    waitingFiles = dict([(opFile.LFN, opFile) for opFile in self.operation
                         if opFile.Status in ("Waiting", "Scheduled")])
    targetSESet = set(self.operation.targetSEList)

    replicas = self.fc.getReplicas(waitingFiles.keys())
    if not replicas["OK"]:
      self.log.error('Failed to get replicas', replicas["Message"])
      return replicas

    reMissing = re.compile(r".*such file.*")
    for failedLFN, errStr in replicas["Value"]["Failed"].iteritems():
      waitingFiles[failedLFN].Error = errStr
      if reMissing.search(errStr.lower()):
        self.log.error("File does not exists", failedLFN)
        gMonitor.addMark("ReplicateFail", len(targetSESet))
        waitingFiles[failedLFN].Status = "Failed"

    for successfulLFN, reps in replicas["Value"]["Successful"].iteritems():
      if targetSESet.issubset(set(reps)):
        self.log.info("file %s has been replicated to all targets" % successfulLFN)
        waitingFiles[successfulLFN].Status = "Done"

    return S_OK()

  def _addMetadataToFiles(self, toSchedule):
    """ Add metadata to those files that need to be scheduled through FTS

        toSchedule is a dictionary:
        {'lfn1': opFile, 'lfn2': opFile}
    """
    if toSchedule:
      self.log.info("found %s files to schedule, getting metadata from FC" % len(toSchedule))
    else:
      self.log.verbose("No files to schedule")
      return S_OK([])

    res = self.fc.getFileMetadata(toSchedule.keys())
    if not res['OK']:
      return res
    else:
      if res['Value']['Failed']:
        self.log.warn("Can't schedule %d files: problems getting the metadata: %s" %
                      (len(res['Value']['Failed']), ', '.join(res['Value']['Failed'])))
      metadata = res['Value']['Successful']

    filesToSchedule = {}

    for lfn, lfnMetadata in metadata.iteritems():
      opFileToSchedule = toSchedule[lfn][0]
      opFileToSchedule.GUID = lfnMetadata['GUID']
      # In principle this is defined already in filterReplicas()
      if not opFileToSchedule.Checksum:
        opFileToSchedule.Checksum = metadata[lfn]['Checksum']
        opFileToSchedule.ChecksumType = metadata[lfn]['ChecksumType']
      opFileToSchedule.Size = metadata[lfn]['Size']

      filesToSchedule[opFileToSchedule.LFN] = opFileToSchedule

    return S_OK(filesToSchedule)

  def _filterReplicas(self, opFile):
    """ filter out banned/invalid source SEs """
    return filterReplicas(opFile, logger=self.log, dataManager=self.dm)

  def ftsTransfer(self):
    """ replicate and register using FTS """

    self.log.info("scheduling files in FTS...")

    bannedTargets = self.checkSEsRSS()
    if not bannedTargets['OK']:
      gMonitor.addMark("FTSScheduleAtt")
      gMonitor.addMark("FTSScheduleFail")
      return bannedTargets

    if bannedTargets['Value']:
      return S_OK("%s targets are banned for writing" % ",".join(bannedTargets['Value']))

    # Can continue now
    self.log.verbose("No targets banned for writing")

    toSchedule = {}

    delayExecution = 0
    errors = defaultdict(int)
    for opFile in self.getWaitingFilesList():
      opFile.Error = ''
      gMonitor.addMark("FTSScheduleAtt")
      # # check replicas
      replicas = self._filterReplicas(opFile)
      if not replicas["OK"]:
        continue
      replicas = replicas["Value"]

      validReplicas = replicas.get("Valid")
      noMetaReplicas = replicas.get("NoMetadata")
      noReplicas = replicas.get('NoReplicas')
      badReplicas = replicas.get('Bad')
      noActiveReplicas = replicas.get('NoActiveReplicas')

      if validReplicas:
        validTargets = list(set(self.operation.targetSEList) - set(validReplicas))
        if not validTargets:
          self.log.info("file %s is already present at all targets" % opFile.LFN)
          opFile.Status = "Done"
        else:
          toSchedule[opFile.LFN] = [opFile, validReplicas, validTargets]
      else:
        gMonitor.addMark("FTSScheduleFail")
        if noMetaReplicas:
          err = "Couldn't get metadata"
          errors[err] += 1
          self.log.verbose(
              "unable to schedule '%s', %s at %s" %
              (opFile.LFN, err, ','.join(noMetaReplicas)))
          opFile.Error = err
        elif noReplicas:
          err = "File doesn't exist"
          errors[err] += 1
          self.log.error("Unable to schedule transfer",
                         "%s %s at %s" % (opFile.LFN, err, ','.join(noReplicas)))
          opFile.Error = err
          opFile.Status = 'Failed'
        elif badReplicas:
          err = "All replicas have a bad checksum"
          errors[err] += 1
          self.log.error("Unable to schedule transfer",
                         "%s, %s at %s" % (opFile.LFN, err, ','.join(badReplicas)))
          opFile.Error = err
          opFile.Status = 'Failed'
        elif noActiveReplicas:
          err = "No active replica found"
          errors[err] += 1
          self.log.verbose("Unable to schedule transfer",
                           "%s, %s at %s" % (opFile.LFN, err, ','.join(noActiveReplicas)))
          opFile.Error = err
          # All source SEs are banned, delay execution by 1 hour
          delayExecution = 60

    if delayExecution:
      self.log.info("Delay execution of the request by %d minutes" % delayExecution)
      self.request.delayNextExecution(delayExecution)
    # Log error counts
    for error, count in errors.iteritems():
      self.log.error(error, 'for %d files' % count)

    filesToScheduleList = []
    res = self._addMetadataToFiles(toSchedule)
    if not res['OK']:
      return res
    else:
      filesToSchedule = res['Value']

      for lfn in filesToSchedule:
        filesToScheduleList.append((filesToSchedule[lfn][0].toJSON()['Value'],
                                    toSchedule[lfn][1],
                                    toSchedule[lfn][2]))

    if filesToScheduleList:

      ftsSchedule = FTSClient().ftsSchedule(self.request.RequestID,
                                            self.operation.OperationID,
                                            filesToScheduleList)
      if not ftsSchedule["OK"]:
        self.log.error("Completely failed to schedule to FTS:", ftsSchedule["Message"])
        return ftsSchedule

      # might have nothing to schedule
      ftsSchedule = ftsSchedule["Value"]
      if not ftsSchedule:
        return S_OK()

      self.log.info("%d files have been scheduled to FTS" % len(ftsSchedule['Successful']))
      for opFile in self.operation:
        fileID = opFile.FileID
        if fileID in ftsSchedule["Successful"]:
          gMonitor.addMark("FTSScheduleOK", 1)
          opFile.Status = "Scheduled"
          self.log.debug("%s has been scheduled for FTS" % opFile.LFN)
        elif fileID in ftsSchedule["Failed"]:
          gMonitor.addMark("FTSScheduleFail", 1)
          opFile.Error = ftsSchedule["Failed"][fileID]
          if 'sourceSURL equals to targetSURL' in opFile.Error:
            # In this case there is no need to continue
            opFile.Status = 'Failed'
          self.log.warn("unable to schedule %s for FTS: %s" % (opFile.LFN, opFile.Error))
    else:
      self.log.info("No files to schedule after metadata checks")

    # Just in case some transfers could not be scheduled, try them with RM
    return self.dmTransfer(fromFTS=True)

  def _checkExistingFTS3Operations(self):
    """
       Check if there are ongoing FTS3Operation for the current RMS Operation

       Under some conditions, we can be trying to schedule files while
       there is still an FTS transfer going on. This typically happens
       when the REA hangs. To prevent further race condition, we check
       if there are FTS3Operations in a non Final state matching the
       current operation ID. If so, we put the corresponding files in
       scheduled mode. We will then wait till the FTS3 Operation performs
       the callback

       :returns: S_OK with True if we can go on, False if we should stop the processing
    """

    res = FTS3Client().getOperationsFromRMSOpID(self.operation.OperationID)

    if not res['OK']:
      self.log.debug(
          "Could not get FTS3Operations matching OperationID",
          self.operation.OperationID)
      return res

    existingFTSOperations = res['Value']
    # It is ok to have FTS Operations in a final state, so we
    # care only about the others
    unfinishedFTSOperations = [
        ops for ops in existingFTSOperations if ops.status not in FTS3TransferOperation.FINAL_STATES]

    if not unfinishedFTSOperations:
      self.log.debug("No ongoing FTS3Operations, all good")
      return S_OK(True)

    self.log.warn("Some FTS3Operations already exist for the RMS Operation:",
                  [op.operationID for op in unfinishedFTSOperations])

    # This would really be a screwed up situation !
    if len(unfinishedFTSOperations) > 1:
      self.log.warn("That's a serious problem !!")

    # We take the rmsFileID of the files in the Operations,
    # find the corresponding File object, and set them scheduled
    rmsFileIDsToSetScheduled = set(
        [ftsFile.rmsFileID for ftsOp in unfinishedFTSOperations for ftsFile in ftsOp.ftsFiles])

    for opFile in self.operation:
      # If it is in the DB, it has a FileID
      opFileID = opFile.FileID
      if opFileID in rmsFileIDsToSetScheduled:
        self.log.warn("Setting RMSFile as already scheduled", opFileID)
        opFile.Status = "Scheduled"

    # We return here such that the Request is set back to Scheduled in the DB
    # With no further modification
    return S_OK(False)

  def fts3Transfer(self):
    """ replicate and register using FTS3 """

    self.log.info("scheduling files in FTS3...")

    # Check first if we do not have ongoing transfers

    res = self._checkExistingFTS3Operations()
    if not res['OK']:
      return res

    # if res['Value'] is False
    # it means that there are ongoing transfers
    # and we should stop here
    if res['Value'] is False:
      # return S_OK such that the request is put back
      return S_OK()

    fts3Files = []
    toSchedule = {}

    # Dict which maps the FileID to the object
    rmsFilesIds = {}

    for opFile in self.getWaitingFilesList():
      rmsFilesIds[opFile.FileID] = opFile

      opFile.Error = ''
      gMonitor.addMark("FTSScheduleAtt")
      # # check replicas
      replicas = self._filterReplicas(opFile)
      if not replicas["OK"]:
        continue
      replicas = replicas["Value"]

      validReplicas = replicas["Valid"]
      noMetaReplicas = replicas["NoMetadata"]
      noReplicas = replicas['NoReplicas']
      badReplicas = replicas['Bad']
      noPFN = replicas['NoPFN']

      if validReplicas:
        validTargets = list(set(self.operation.targetSEList) - set(validReplicas))
        if not validTargets:
          self.log.info("file %s is already present at all targets" % opFile.LFN)
          opFile.Status = "Done"
        else:
          toSchedule[opFile.LFN] = [opFile, validTargets]

      else:
        gMonitor.addMark("FTSScheduleFail")
        if noMetaReplicas:
          self.log.warn("unable to schedule '%s', couldn't get metadata at %s" % (opFile.LFN, ','.join(noMetaReplicas)))
          opFile.Error = "Couldn't get metadata"
        elif noReplicas:
          self.log.error(
              "Unable to schedule transfer", "File %s doesn't exist at %s" %
              (opFile.LFN, ','.join(noReplicas)))
          opFile.Error = 'No replicas found'
          opFile.Status = 'Failed'
        elif badReplicas:
          self.log.error(
              "Unable to schedule transfer",
              "File %s, all replicas have a bad checksum at %s" %
              (opFile.LFN,
               ','.join(badReplicas)))
          opFile.Error = 'All replicas have a bad checksum'
          opFile.Status = 'Failed'
        elif noPFN:
          self.log.warn(
              "unable to schedule %s, could not get a PFN at %s" %
              (opFile.LFN, ','.join(noPFN)))

    res = self._addMetadataToFiles(toSchedule)
    if not res['OK']:
      return res
    else:
      filesToSchedule = res['Value']

      for lfn in filesToSchedule:
        opFile = filesToSchedule[lfn]
        validTargets = toSchedule[lfn][1]
        for targetSE in validTargets:
          ftsFile = FTS3File.fromRMSFile(opFile, targetSE)
          fts3Files.append(ftsFile)

    if fts3Files:
      res = Registry.getUsernameForDN(self.request.OwnerDN)
      if not res['OK']:
        self.log.error(
            "Cannot get username for DN", "%s %s" %
            (self.request.OwnerDN, res['Message']))
        return res

      username = res['Value']
      fts3Operation = FTS3TransferOperation.fromRMSObjects(self.request, self.operation, username)
      fts3Operation.ftsFiles = fts3Files

      ftsSchedule = FTS3Client().persistOperation(fts3Operation)
      if not ftsSchedule["OK"]:
        self.log.error("Completely failed to schedule to FTS3:", ftsSchedule["Message"])
        return ftsSchedule

      # might have nothing to schedule
      ftsSchedule = ftsSchedule["Value"]
      self.log.info("Scheduled with FTS3Operation id %s" % ftsSchedule)

      self.log.info("%d files have been scheduled to FTS3" % len(fts3Files))

      for ftsFile in fts3Files:
        opFile = rmsFilesIds[ftsFile.rmsFileID]
        gMonitor.addMark("FTSScheduleOK", 1)
        opFile.Status = "Scheduled"
        self.log.debug("%s has been scheduled for FTS" % opFile.LFN)
    else:
      self.log.info("No files to schedule after metadata checks")

    # Just in case some transfers could not be scheduled, try them with RM
    return self.dmTransfer(fromFTS=True)

  def dmTransfer(self, fromFTS=False):
    """ replicate and register using dataManager  """
    # # get waiting files. If none just return
    # # source SE
    sourceSE = self.operation.SourceSE if self.operation.SourceSE else None
    if sourceSE:
      # # check source se for read
      bannedSource = self.checkSEsRSS(sourceSE, 'ReadAccess')
      if not bannedSource["OK"]:
        gMonitor.addMark("ReplicateAndRegisterAtt", len(self.operation))
        gMonitor.addMark("ReplicateFail", len(self.operation))
        return bannedSource

      if bannedSource["Value"]:
        self.operation.Error = "SourceSE %s is banned for reading" % sourceSE
        self.log.info(self.operation.Error)
        return S_OK(self.operation.Error)

    # # check targetSEs for write
    bannedTargets = self.checkSEsRSS()
    if not bannedTargets['OK']:
      gMonitor.addMark("ReplicateAndRegisterAtt", len(self.operation))
      gMonitor.addMark("ReplicateFail", len(self.operation))
      return bannedTargets

    if bannedTargets['Value']:
      self.operation.Error = "%s targets are banned for writing" % ",".join(bannedTargets['Value'])
      return S_OK(self.operation.Error)

    # Can continue now
    self.log.verbose("No targets banned for writing")

    waitingFiles = self.getWaitingFilesList()
    if not waitingFiles:
      return S_OK()
    # # loop over files
    if fromFTS:
      self.log.info("Trying transfer using replica manager as FTS failed")
    else:
      self.log.info("Transferring files using Data manager...")
    errors = defaultdict(int)
    delayExecution = 0
    for opFile in waitingFiles:
      if opFile.Error in ("Couldn't get metadata",
                          "File doesn't exist",
                          'No active replica found',
                          "All replicas have a bad checksum",):
        err = "File already in error status"
        errors[err] += 1

      gMonitor.addMark("ReplicateAndRegisterAtt", 1)
      opFile.Error = ''
      lfn = opFile.LFN

      # Check if replica is at the specified source
      replicas = self._filterReplicas(opFile)
      if not replicas["OK"]:
        self.log.error('Failed to check replicas', replicas["Message"])
        continue
      replicas = replicas["Value"]
      validReplicas = replicas.get("Valid")
      noMetaReplicas = replicas.get("NoMetadata")
      noReplicas = replicas.get('NoReplicas')
      badReplicas = replicas.get('Bad')
      noActiveReplicas = replicas.get('NoActiveReplicas')

      if not validReplicas:
        gMonitor.addMark("ReplicateFail")
        if noMetaReplicas:
          err = "Couldn't get metadata"
          errors[err] += 1
          self.log.verbose(
              "unable to replicate '%s', couldn't get metadata at %s" %
              (opFile.LFN, ','.join(noMetaReplicas)))
          opFile.Error = err
        elif noReplicas:
          err = "File doesn't exist"
          errors[err] += 1
          self.log.verbose(
              "Unable to replicate", "File %s doesn't exist at %s" %
              (opFile.LFN, ','.join(noReplicas)))
          opFile.Error = err
          opFile.Status = 'Failed'
        elif badReplicas:
          err = "All replicas have a bad checksum"
          errors[err] += 1
          self.log.error(
              "Unable to replicate", "%s, all replicas have a bad checksum at %s" %
              (opFile.LFN, ','.join(badReplicas)))
          opFile.Error = err
          opFile.Status = 'Failed'
        elif noActiveReplicas:
          err = "No active replica found"
          errors[err] += 1
          self.log.verbose("Unable to schedule transfer",
                           "%s, %s at %s" % (opFile.LFN, err, ','.join(noActiveReplicas)))
          opFile.Error = err
          # All source SEs are banned, delay execution by 1 hour
          delayExecution = 60
        continue
      # # get the first one in the list
      if sourceSE not in validReplicas:
        if sourceSE:
          err = "File not at specified source"
          errors[err] += 1
          self.log.warn(
              "%s is not at specified sourceSE %s, changed to %s" %
              (lfn, sourceSE, validReplicas[0]))
        sourceSE = validReplicas[0]

      # # loop over targetSE
      catalogs = self.operation.Catalog
      if catalogs:
        catalogs = [cat.strip() for cat in catalogs.split(',')]

      for targetSE in self.operation.targetSEList:

        # # call DataManager
        if targetSE in validReplicas:
          self.log.warn("Request to replicate %s to an existing location: %s" % (lfn, targetSE))
          opFile.Status = 'Done'
          continue
        res = self.dm.replicateAndRegister(lfn, targetSE, sourceSE=sourceSE, catalog=catalogs)
        if res["OK"]:

          if lfn in res["Value"]["Successful"]:

            if "replicate" in res["Value"]["Successful"][lfn]:

              repTime = res["Value"]["Successful"][lfn]["replicate"]
              prString = "file %s replicated at %s in %s s." % (lfn, targetSE, repTime)

              gMonitor.addMark("ReplicateOK", 1)

              if "register" in res["Value"]["Successful"][lfn]:

                gMonitor.addMark("RegisterOK", 1)
                regTime = res["Value"]["Successful"][lfn]["register"]
                prString += ' and registered in %s s.' % regTime
                self.log.info(prString)
              else:

                gMonitor.addMark("RegisterFail", 1)
                prString += " but failed to register"
                self.log.warn(prString)

                opFile.Error = "Failed to register"
                # # add register replica operation
                registerOperation = self.getRegisterOperation(
                    opFile, targetSE, type='RegisterReplica')
                self.request.insertAfter(registerOperation, self.operation)

            else:

              self.log.error("Failed to replicate", "%s to %s" % (lfn, targetSE))
              gMonitor.addMark("ReplicateFail", 1)
              opFile.Error = "Failed to replicate"

          else:

            gMonitor.addMark("ReplicateFail", 1)
            reason = res["Value"]["Failed"][lfn]
            self.log.error(
                "Failed to replicate and register", "File %s at %s:" %
                (lfn, targetSE), reason)
            opFile.Error = reason

        else:

          gMonitor.addMark("ReplicateFail", 1)
          opFile.Error = "DataManager error: %s" % res["Message"]
          self.log.error("DataManager error", res["Message"])

      if not opFile.Error:
        if len(self.operation.targetSEList) > 1:
          self.log.info("file %s has been replicated to all targetSEs" % lfn)
        opFile.Status = "Done"
    # Log error counts
    if delayExecution:
      self.log.info("Delay execution of the request by %d minutes" % delayExecution)
      self.request.delayNextExecution(delayExecution)
    for error, count in errors.iteritems():
      self.log.error(error, 'for %d files' % count)

    return S_OK()
Example #13
0
inputFileName = args[0]
storageElement = args[1]
status = args[2]

if os.path.exists(inputFileName):
    inputFile = open(inputFileName, 'r')
    string = inputFile.read()
    inputFile.close()
    lfns = sorted(string.splitlines())
else:
    lfns = [inputFileName]

fc = FileCatalog()

replicaDict = {}
res = fc.getReplicas(lfns, allStatus=True)
if not res['OK']:
    gLogger.error("Failed to get catalog replicas.", res['Message'])
    DIRAC.exit(-1)
lfnDict = {}
for lfn, error in res['Value']['Failed'].items():
    gLogger.error("Failed to get replicas for file.", "%s:%s" % (lfn, error))
for lfn, replicas in res['Value']['Successful'].items():
    if not storageElement in replicas.keys():
        gLogger.error("LFN not registered at provided storage element.",
                      "%s %s" % (lfn, storageElement))
    else:
        lfnDict[lfn] = {
            'SE': storageElement,
            'PFN': replicas[storageElement],
            'Status': status
Example #14
0
class FTSRequest( object ):
  """
  .. class:: FTSRequest

  Helper class for FTS job submission and monitoring.
  """

  # # default checksum type
  __defaultCksmType = "ADLER32"
  # # flag to disablr/enable checksum test, default: disabled
  __cksmTest = False

  def __init__( self ):
    """c'tor

    :param self: self reference
    """
    self.log = gLogger.getSubLogger( self.__class__.__name__, True )

    # # final states tuple
    self.finalStates = ( 'Canceled', 'Failed', 'Hold',
                         'Finished', 'FinishedDirty' )
    # # failed states tuple
    self.failedStates = ( 'Canceled', 'Failed',
                          'Hold', 'FinishedDirty' )
    # # successful states tuple
    self.successfulStates = ( 'Finished', 'Done' )
    # # all file states tuple
    self.fileStates = ( 'Done', 'Active', 'Pending', 'Ready', 'Canceled', 'Failed',
                        'Finishing', 'Finished', 'Submitted', 'Hold', 'Waiting' )

    self.newlyCompletedFiles = []
    self.newlyFailedFiles = []

    self.statusSummary = {}

    # # request status
    self.requestStatus = 'Unknown'

    # # dict for FTS job files
    self.fileDict = {}
    # # dict for replicas information
    self.catalogReplicas = {}
    # # dict for metadata information
    self.catalogMetadata = {}
    # # dict for files that failed to register
    self.failedRegistrations = {}

    # # placehoder for FileCatalog reference
    self.oCatalog = None

    # # submit timestamp
    self.submitTime = ''

    # # placeholder FTS job GUID
    self.ftsGUID = ''
    # # placeholder for FTS server URL
    self.ftsServer = ''
    # # not used
    self.priority = 3

    # # flag marking FTS job completness
    self.isTerminal = False
    # # completness percentage
    self.percentageComplete = 0.0

    # # source SE name
    self.sourceSE = ''
    # # flag marking source SE validity
    self.sourceValid = False
    # # source space token
    self.sourceToken = ''

    # # target SE name
    self.targetSE = ''
    # # flag marking target SE validity
    self.targetValid = False
    # # target space token
    self.targetToken = ''

    # # whatever
    self.dumpStr = ''

    # # placeholder for surl file
    self.surlFile = None

    # # placeholder for target StorageElement
    self.oTargetSE = None
    # # placeholder for source StorageElement
    self.oSourceSE = None

    # # checksum type, set it to default
    self.__cksmType = self.__defaultCksmType
    # # disable checksum test by default
    self.__cksmTest = False


    # # statuses that prevent submitting to FTS
    self.noSubmitStatus = ( 'Failed', 'Done', 'Staging' )

    # # were sources resolved?
    self.sourceResolved = False

    # # Number of file transfers actually submitted
    self.submittedFiles = 0

  ####################################################################
  #
  #  Methods for setting/getting/checking the SEs
  #

  def setSourceSE( self, se ):
    """ set SE for source

    :param self: self reference
    :param str se: source SE name
    """
    if se == self.targetSE:
      return S_ERROR( "SourceSE is TargetSE" )
    self.sourceSE = se
    self.oSourceSE = StorageElement( self.sourceSE )
    return self.__checkSourceSE()

  def getSourceSE( self ):
    """ source SE getter

    :param self: self reference
    """
    if not self.sourceSE:
      return S_ERROR( "Source SE not defined" )
    return S_OK( self.sourceSE )

  def setSourceToken( self, token ):
    """ set source space token

    :param self: self reference
    :param str token: source space token
    """
    self.sourceToken = token
    return S_OK()

  def getSourceToken( self ):
    """ source space token getter

    :param self: self reference
    """
    if not self.sourceToken:
      return S_ERROR( "Source token not defined" )
    return S_OK( self.sourceToken )

  def __checkSourceSE( self ):
    """ check source SE availability

    :param self: self reference
    """
    if not self.sourceSE:
      return S_ERROR( "SourceSE not set" )
    res = self.oSourceSE.isValid( 'Read' )
    if not res['OK']:
      return S_ERROR( "SourceSE not available for reading" )
    res = self.__getSESpaceToken( self.oSourceSE )
    if not res['OK']:
      self.log.error( "FTSRequest failed to get SRM Space Token for SourceSE", res['Message'] )
      return S_ERROR( "SourceSE does not support FTS transfers" )

    if self.__cksmTest:
      res = self.oSourceSE.getChecksumType()
      if not res["OK"]:
        self.log.error( "Unable to get checksum type for SourceSE %s: %s" % ( self.sourceSE,
                                                                             res["Message"] ) )
        cksmType = res["Value"]
        if cksmType in ( "NONE", "NULL" ):
          self.log.warn( "Checksum type set to %s at SourceSE %s, disabling checksum test" % ( cksmType,
                                                                                              self.sourceSE ) )
          self.__cksmTest = False
        elif cksmType != self.__cksmType:
          self.log.warn( "Checksum type mismatch, disabling checksum test" )
          self.__cksmTest = False

    self.sourceToken = res['Value']
    self.sourceValid = True
    return S_OK()

  def setTargetSE( self, se ):
    """ set target SE

    :param self: self reference
    :param str se: target SE name
    """
    if se == self.sourceSE:
      return S_ERROR( "TargetSE is SourceSE" )
    self.targetSE = se
    self.oTargetSE = StorageElement( self.targetSE )
    return self.__checkTargetSE()

  def getTargetSE( self ):
    """ target SE getter

    :param self: self reference
    """
    if not self.targetSE:
      return S_ERROR( "Target SE not defined" )
    return S_OK( self.targetSE )

  def setTargetToken( self, token ):
    """ target space token setter

    :param self: self reference
    :param str token: target space token
    """
    self.targetToken = token
    return S_OK()

  def getTargetToken( self ):
    """ target space token getter

    :param self: self reference
    """
    if not self.targetToken:
      return S_ERROR( "Target token not defined" )
    return S_OK( self.targetToken )

  def __checkTargetSE( self ):
    """ check target SE availability

    :param self: self reference
    """
    if not self.targetSE:
      return S_ERROR( "TargetSE not set" )
    res = self.oTargetSE.isValid( 'Write' )
    if not res['OK']:
      return S_ERROR( "TargetSE not available for writing" )
    res = self.__getSESpaceToken( self.oTargetSE )
    if not res['OK']:
      self.log.error( "FTSRequest failed to get SRM Space Token for TargetSE", res['Message'] )
      return S_ERROR( "TargetSE does not support FTS transfers" )

    # # check checksum types
    if self.__cksmTest:
      res = self.oTargetSE.getChecksumType()
      if not res["OK"]:
        self.log.error( "Unable to get checksum type for TargetSE %s: %s" % ( self.targetSE,
                                                                             res["Message"] ) )
        cksmType = res["Value"]
        if cksmType in ( "NONE", "NULL" ):
          self.log.warn( "Checksum type set to %s at TargetSE %s, disabling checksum test" % ( cksmType,
                                                                                              self.targetSE ) )
          self.__cksmTest = False
        elif cksmType != self.__cksmType:
          self.log.warn( "Checksum type mismatch, disabling checksum test" )
          self.__cksmTest = False

    self.targetToken = res['Value']
    self.targetValid = True
    return S_OK()

  @staticmethod
  def __getSESpaceToken( oSE ):
    """ get space token from StorageElement instance

    :param self: self reference
    :param StorageElement oSE: StorageElement instance
    """
    res = oSE.getStorageParameters( "SRM2" )
    if not res['OK']:
      return res
    return S_OK( res['Value'].get( 'SpaceToken' ) )

  ####################################################################
  #
  #  Methods for setting/getting FTS request parameters
  #

  def setFTSGUID( self, guid ):
    """ FTS job GUID setter

    :param self: self reference
    :param str guid: string containg GUID
    """
    if not checkGuid( guid ):
      return S_ERROR( "Incorrect GUID format" )
    self.ftsGUID = guid
    return S_OK()

  def getFTSGUID( self ):
    """ FTS job GUID getter

    :param self: self refenece
    """
    if not self.ftsGUID:
      return S_ERROR( "FTSGUID not set" )
    return S_OK( self.ftsGUID )

  def setFTSServer( self, server ):
    """ FTS server setter

    :param self: self reference
    :param str server: FTS server URL
    """
    self.ftsServer = server
    return S_OK()

  def getFTSServer( self ):
    """ FTS server getter

    :param self: self reference
    """
    if not self.ftsServer:
      return S_ERROR( "FTSServer not set" )
    return S_OK( self.ftsServer )

  def setPriority( self, priority ):
    """ set priority for FTS job

    :param self: self reference
    :param int priority: a new priority
    """
    if not type( priority ) in ( IntType, LongType ):
      return S_ERROR( "Priority must be integer" )
    if priority < 0:
      priority = 0
    elif priority > 5:
      priority = 5
    self.priority = priority
    return S_OK( self.priority )

  def getPriority( self ):
    """ FTS job priority getter

    :param self: self reference
    """
    return S_OK( self.priority )

  def getPercentageComplete( self ):
    """ get completness percentage

    :param self: self reference
    """
    completedFiles = 0
    totalFiles = 0
    for state in self.statusSummary:
      if state in self.successfulStates:
        completedFiles += self.statusSummary[state]
      totalFiles += self.statusSummary[state]
    self.percentageComplete = ( float( completedFiles ) * 100.0 ) / float( totalFiles )
    return S_OK( self.percentageComplete )

  def isRequestTerminal( self ):
    """ check if FTS job has terminated

    :param self: self reference
    """
    if self.requestStatus in self.finalStates:
      self.isTerminal = True
    return S_OK( self.isTerminal )

  def getStatus( self ):
    """ get FTS job status

    :param self: self reference
    """
    return S_OK( self.requestStatus )


  def setCksmType( self, cksm = None ):
    """ set checksum type to use

    :param self: self reference
    :param mixed cksm: checksum type, should be one of 'Adler32', 'md5', 'sha1', None
    """
    if str( cksm ).upper() not in ( "ADLER32", "MD5", "SHA1", "NONE" ):
      return S_ERROR( "Not supported checksum type: %s" % str( cksm ) )
    if not cksm:
      self.__cksmType = None
      return S_OK( False )
    self.__cksmType = str( cksm ).upper()
    return S_OK( True )

  def getCksmType( self ):
    """ get checksum type

    :param self: self reference
    """
    return S_OK( self.__cksmType )

  def setCksmTest( self, cksmTest = False ):
    """ set cksm test

    :param self: self reference
    :param bool cksmTest: flag to enable/disable checksum test
    """
    self.__cksmTest = bool( cksmTest )
    return S_OK( self.__cksmTest )

  def getCksmTest( self ):
    """ get cksm test flag

    :param self: self reference
    """
    return S_OK( self.__cksmTest )

  ####################################################################
  #
  #  Methods for setting/getting/checking files and their metadata
  #

  def setLFN( self, lfn ):
    """ add LFN :lfn: to :fileDict:

    :param self: self reference
    :param str lfn: LFN to add to
    """
    self.fileDict.setdefault( lfn, {'Status':'Waiting'} )
    return S_OK()

  def setStatus( self, lfn, status ):
    """ set status of a file """
    return( self.__setFileParameter( lfn, 'Status', status ) )

  def setSourceSURL( self, lfn, surl ):
    """ source SURL setter

    :param self: self reference
    :param str lfn: LFN
    :param str surl: source SURL
    """
    target = self.fileDict[lfn].get( 'Target' )
    if target == surl:
      return S_ERROR( "Source and target the same" )
    return( self.__setFileParameter( lfn, 'Source', surl ) )

  def getSourceSURL( self, lfn ):
    """ get source SURL for LFN :lfn:

    :param self: self reference
    :param str lfn: LFN
    """
    return self.__getFileParameter( lfn, 'Source' )

  def setTargetSURL( self, lfn, surl ):
    """ set target SURL for LFN :lfn:

    :param self: self reference
    :param str lfn: LFN
    :param str surl: target SURL
    """
    source = self.fileDict[lfn].get( 'Source' )
    if source == surl:
      return S_ERROR( "Source and target the same" )
    return( self.__setFileParameter( lfn, 'Target', surl ) )

  def getTargetSURL( self, lfn ):
    """ target SURL getter

    :param self: self reference
    :param str lfn: LFN
    """
    return self.__getFileParameter( lfn, 'Target' )

  def getFailReason( self, lfn ):
    """ get fail reason for file :lfn:

    :param self: self reference
    :param str lfn: LFN
    """
    return self.__getFileParameter( lfn, 'Reason' )

  def getRetries( self, lfn ):
    """ get number of attepmts made to transfer file :lfn:

    :param self: self reference
    :param str lfn: LFN
    """
    return self.__getFileParameter( lfn, 'Retries' )

  def getTransferTime( self, lfn ):
    """ get duration of transfer for file :lfn:

    :param self: self reference
    :param str lfn: LFN
    """
    return self.__getFileParameter( lfn, 'Duration' )

  def getFailed( self ):
    """ get list of wrongly transferred LFNs

    :param self: self reference
    """
    return S_OK( [ lfn for lfn in self.fileDict
                   if self.fileDict[lfn].get( 'Status', '' ) in self.failedStates ] )

  def getStaging( self ):
    """ get files set for prestaging """
    return S_OK( [lfn for lfn in self.fileDict
                  if self.fileDict[lfn].get( 'Status', '' ) == 'Staging'] )

  def getDone( self ):
    """ get list of succesfully transferred LFNs

    :param self: self reference
    """
    return S_OK( [ lfn for lfn in self.fileDict
                   if self.fileDict[lfn].get( 'Status', '' ) in self.successfulStates ] )

  def __setFileParameter( self, lfn, paramName, paramValue ):
    """ set :paramName: to :paramValue: for :lfn: file

    :param self: self reference
    :param str lfn: LFN
    :param str paramName: parameter name
    :param mixed paramValue: a new parameter value
    """
    self.setLFN( lfn )
    self.fileDict[lfn][paramName] = paramValue
    return S_OK()

  def __getFileParameter( self, lfn, paramName ):
    """ get value of :paramName: for file :lfn:

    :param self: self reference
    :param str lfn: LFN
    :param str paramName: parameter name
    """
    if lfn not in self.fileDict:
      return S_ERROR( "Supplied file not set" )
    if paramName not in self.fileDict[lfn]:
      return S_ERROR( "%s not set for file" % paramName )
    return S_OK( self.fileDict[lfn][paramName] )

  ####################################################################
  #
  #  Methods for submission
  #

  def submit( self, monitor = False, printOutput = True ):
    """ submit FTS job

    :param self: self reference
    :param bool monitor: flag to monitor progress of FTS job
    :param bool printOutput: flag to print output of execution to stdout
    """
    res = self.__isSubmissionValid()
    if not res['OK']:
      return res
    res = self.__createSURLPairFile()
    if not res['OK']:
      return res
    res = self.__submitFTSTransfer()
    if not res['OK']:
      return res
    resDict = { 'ftsGUID' : self.ftsGUID, 'ftsServer' : self.ftsServer, 'submittedFiles' : self.submittedFiles }
    if monitor or printOutput:
      gLogger.always( "Submitted %s@%s" % ( self.ftsGUID, self.ftsServer ) )
      if monitor:
        self.monitor( untilTerminal = True, printOutput = printOutput )
    return S_OK( resDict )

  def __isSubmissionValid( self ):
    """ check validity of job before submission

    :param self: self reference
    """
    if not self.fileDict:
      return S_ERROR( "No files set" )
    if not self.sourceValid:
      return S_ERROR( "SourceSE not valid" )
    if not self.targetValid:
      return S_ERROR( "TargetSE not valid" )
    if not self.ftsServer:
      res = self.__resolveFTSServer()
      if not res['OK']:
        return S_ERROR( "FTSServer not valid" )
    self.resolveSource()
    self.resolveTarget()
    res = self.__filesToSubmit()
    if not res['OK']:
      return S_ERROR( "No files to submit" )
    return S_OK()

  def __getCatalogObject( self ):
    """ CatalogInterface instance facade

    :param self: self reference
    """
    try:
      if not self.oCatalog:
        self.oCatalog = FileCatalog()
      return S_OK()
    except:
      return S_ERROR()

  def __updateReplicaCache( self, lfns = None, overwrite = False ):
    """ update replica cache for list of :lfns:

    :param self: self reference
    :param mixed lfns: list of LFNs
    :param bool overwrite: flag to trigger cache clearing and updating
    """
    if not lfns:
      lfns = self.fileDict.keys()
    toUpdate = [ lfn for lfn in lfns if ( lfn not in self.catalogReplicas ) or overwrite ]
    if not toUpdate:
      return S_OK()
    res = self.__getCatalogObject()
    if not res['OK']:
      return res
    res = self.oCatalog.getReplicas( toUpdate )
    if not res['OK']:
      return S_ERROR( "Failed to update replica cache: %s" % res['Message'] )
    for lfn, error in res['Value']['Failed'].items():
      self.__setFileParameter( lfn, 'Reason', error )
      self.__setFileParameter( lfn, 'Status', 'Failed' )
    for lfn, replicas in res['Value']['Successful'].items():
      self.catalogReplicas[lfn] = replicas
    return S_OK()

  def __updateMetadataCache( self, lfns = None, overwrite = False ):
    """ update metadata cache for list of LFNs

    :param self: self reference
    :param list lnfs: list of LFNs
    :param bool overwrite: flag to trigger cache clearing and updating
    """
    if not lfns:
      lfns = self.fileDict.keys()
    toUpdate = [ lfn for lfn in lfns if ( lfn not in self.catalogMetadata ) or overwrite ]
    if not toUpdate:
      return S_OK()
    res = self.__getCatalogObject()
    if not res['OK']:
      return res
    res = self.oCatalog.getFileMetadata( toUpdate )
    if not res['OK']:
      return S_ERROR( "Failed to get source catalog metadata: %s" % res['Message'] )
    for lfn, error in res['Value']['Failed'].items():
      self.__setFileParameter( lfn, 'Reason', error )
      self.__setFileParameter( lfn, 'Status', 'Failed' )
    for lfn, metadata in res['Value']['Successful'].items():
      self.catalogMetadata[lfn] = metadata
    return S_OK()

  def resolveSource( self ):
    """ resolve source SE eligible for submission

    :param self: self reference
    """

    # Avoid resolving sources twice
    if self.sourceResolved:
      return S_OK()
    # Only resolve files that need a transfer
    toResolve = [ lfn for lfn in self.fileDict if self.fileDict[lfn].get( "Status", "" ) != "Failed" ]
    if not toResolve:
      return S_OK()
    res = self.__updateMetadataCache( toResolve )
    if not res['OK']:
      return res
    res = self.__updateReplicaCache( toResolve )
    if not res['OK']:
      return res

    # Define the source URLs
    for lfn in toResolve:
      replicas = self.catalogReplicas.get( lfn, {} )
      if self.sourceSE not in replicas:
        gLogger.warn( "resolveSource: skipping %s - not replicas at SourceSE %s" % ( lfn, self.sourceSE ) )
        self.__setFileParameter( lfn, 'Reason', "No replica at SourceSE" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue
      # Fix first the PFN
      pfn = self.oSourceSE.getPfnForLfn( lfn ).get( 'Value', {} ).get( 'Successful', {} ).get( lfn, replicas[self.sourceSE] )
      res = Utils.executeSingleFileOrDirWrapper( self.oSourceSE.getPfnForProtocol( pfn, protocol = 'SRM2', withPort = True ) )
      if not res['OK']:
        gLogger.warn( "resolveSource: skipping %s - %s" % ( lfn, res["Message"] ) )
        self.__setFileParameter( lfn, 'Reason', res['Message'] )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue
      res = self.setSourceSURL( lfn, res['Value'] )
      if not res['OK']:
        gLogger.warn( "resolveSource: skipping %s - %s" % ( lfn, res["Message"] ) )
        self.__setFileParameter( lfn, 'Reason', res['Message'] )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue

    toResolve = {}
    for lfn in self.fileDict:
      if "Source" in self.fileDict[lfn]:
        toResolve[self.fileDict[lfn]['Source']] = lfn
    if not toResolve:
      return S_ERROR( "No eligible Source files" )

    # Get metadata of the sources, to check for existance, availability and caching
    res = self.oSourceSE.getFileMetadata( toResolve.keys() )
    if not res['OK']:
      return S_ERROR( "Failed to check source file metadata" )

    for pfn, error in res['Value']['Failed'].items():
      lfn = toResolve[pfn]
      if re.search( 'File does not exist', error ):
        gLogger.warn( "resolveSource: skipping %s - source file does not exists" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source file does not exist" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      else:
        gLogger.warn( "resolveSource: skipping %s - failed to get source metadata" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Failed to get Source metadata" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
    toStage = []

    nbStagedFiles = 0
    for pfn, metadata in res['Value']['Successful'].items():
      lfn = toResolve[pfn]
      lfnStatus = self.fileDict.get( lfn, {} ).get( 'Status' )
      if metadata['Unavailable']:
        gLogger.warn( "resolveSource: skipping %s - source file unavailable" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source file Unavailable" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      elif metadata['Lost']:
        gLogger.warn( "resolveSource: skipping %s - source file lost" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source file Lost" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      elif not metadata['Cached']:
        if lfnStatus != 'Staging':
          toStage.append( pfn )
      elif metadata['Size'] != self.catalogMetadata[lfn]['Size']:
        gLogger.warn( "resolveSource: skipping %s - source file size mismatch" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source size mismatch" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      elif self.catalogMetadata[lfn]['Checksum'] and metadata['Checksum'] and \
            not ( compareAdler( metadata['Checksum'], self.catalogMetadata[lfn]['Checksum'] ) ):
        gLogger.warn( "resolveSource: skipping %s - source file checksum mismatch" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source checksum mismatch" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      elif lfnStatus == 'Staging':
        # file that was staging is now cached
        self.__setFileParameter( lfn, 'Status', 'Waiting' )
        nbStagedFiles += 1

    # Some files were being staged
    if nbStagedFiles:
      self.log.info( 'resolveSource: %d files have been staged' % nbStagedFiles )

    # Launching staging of files not in cache
    if toStage:
      gLogger.warn( "resolveSource: %s source files not cached, prestaging..." % len( toStage ) )
      stage = self.oSourceSE.prestageFile( toStage )
      if not stage["OK"]:
        gLogger.error( "resolveSource: error is prestaging - %s" % stage["Message"] )
        for pfn in toStage:
          lfn = toResolve[pfn]
          self.__setFileParameter( lfn, 'Reason', stage["Message"] )
          self.__setFileParameter( lfn, 'Status', 'Failed' )
      else:
        for pfn in toStage:
          lfn = toResolve[pfn]
          if pfn in stage['Value']['Successful']:
            self.__setFileParameter( lfn, 'Status', 'Staging' )
          elif pfn in stage['Value']['Failed']:
            self.__setFileParameter( lfn, 'Reason', stage['Value']['Failed'][pfn] )
            self.__setFileParameter( lfn, 'Status', 'Failed' )

    self.sourceResolved = True
    return S_OK()

  def resolveTarget( self ):
    """ find target SE eligible for submission

    :param self: self reference
    """
    toResolve = [ lfn for lfn in self.fileDict
                 if self.fileDict[lfn].get( 'Status' ) not in self.noSubmitStatus ]
    if not toResolve:
      return S_OK()
    res = self.__updateReplicaCache( toResolve )
    if not res['OK']:
      return res
    for lfn in toResolve:
      res = self.oTargetSE.getPfnForLfn( lfn )
      if not res['OK'] or lfn not in res['Value']['Successful']:
        gLogger.warn( "resolveTarget: skipping %s - failed to create target pfn" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Failed to create Target" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue
      pfn = res['Value']['Successful'][lfn]
      res = self.oTargetSE.getPfnForProtocol( pfn, protocol = 'SRM2', withPort = True )
      if not res['OK'] or pfn not in res['Value']['Successful']:
        reason = res.get( 'Message', res.get( 'Value', {} ).get( 'Failed', {} ).get( pfn ) )
        gLogger.warn( "resolveTarget: skipping %s - %s" % ( lfn, reason ) )
        self.__setFileParameter( lfn, 'Reason', reason )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue
      pfn = res['Value']['Successful'][pfn]
      res = self.setTargetSURL( lfn, pfn )
      if not res['OK']:
        gLogger.warn( "resolveTarget: skipping %s - %s" % ( lfn, res["Message"] ) )
        self.__setFileParameter( lfn, 'Reason', res['Message'] )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue
    toResolve = {}
    for lfn in self.fileDict:
      if "Target" in self.fileDict[lfn]:
        toResolve[self.fileDict[lfn]['Target']] = lfn
    if not toResolve:
      return S_ERROR( "No eligible Target files" )
    res = self.oTargetSE.exists( toResolve.keys() )
    if not res['OK']:
      return S_ERROR( "Failed to check target existence" )
    for pfn, error in res['Value']['Failed'].items():
      lfn = toResolve[pfn]
      self.__setFileParameter( lfn, 'Reason', error )
      self.__setFileParameter( lfn, 'Status', 'Failed' )
    toRemove = []
    for pfn, exists in res['Value']['Successful'].items():
      if exists:
        lfn = toResolve[pfn]
        res = self.getSourceSURL( lfn )
        if not res['OK']:
          gLogger.warn( "resolveTarget: skipping %s - target exists" % lfn )
          self.__setFileParameter( lfn, 'Reason', "Target exists" )
          self.__setFileParameter( lfn, 'Status', 'Failed' )
        elif res['Value'] == pfn:
          gLogger.warn( "resolveTarget: skipping %s - source and target pfns are the same" % lfn )
          self.__setFileParameter( lfn, 'Reason', "Source and Target the same" )
          self.__setFileParameter( lfn, 'Status', 'Failed' )
        else:
          toRemove.append( pfn )
    if toRemove:
      self.oTargetSE.removeFile( toRemove )
    return S_OK()

  def __filesToSubmit( self ):
    """
    check if there is at least one file to submit

    :return: S_OK if at least one file is present, S_ERROR otherwise
    """
    for lfn in self.fileDict:
      lfnStatus = self.fileDict[lfn].get( 'Status' )
      source = self.fileDict[lfn].get( 'Source' )
      target = self.fileDict[lfn].get( 'Target' )
      if lfnStatus not in self.noSubmitStatus and source and target:
        return S_OK()
    return S_ERROR()

  def __createSURLPairFile( self ):
    """ create LFNs file for glite-transfer-submit command

    This file consists one line for each fiel to be transferred:

    sourceSURL targetSURL [CHECKSUMTYPE:CHECKSUM]

    :param self: self reference
    """
    fd, fileName = tempfile.mkstemp()
    surlFile = os.fdopen( fd, 'w' )
    for lfn in self.fileDict:
      lfnStatus = self.fileDict[lfn].get( 'Status' )
      source = self.fileDict[lfn].get( 'Source' )
      target = self.fileDict[lfn].get( 'Target' )
      if lfnStatus not in self.noSubmitStatus and source and target:
        cksmStr = ""
        # # add chsmType:cksm only if cksmType is specified, else let FTS decide by itself
        if self.__cksmTest and self.__cksmType:
          checkSum = self.catalogMetadata.get( lfn, {} ).get( 'Checksum' )
          if checkSum:
            cksmStr = " %s:%s" % ( self.__cksmType, intAdlerToHex( hexAdlerToInt( checkSum ) ) )
        surlFile.write( "%s %s%s\n" % ( source, target, cksmStr ) )
        self.submittedFiles += 1
    surlFile.close()
    self.surlFile = fileName
    return S_OK()

  def __submitFTSTransfer( self ):
    """ create and execute glite-transfer-submit CLI command

    :param self: self reference
    """
    comm = [ 'glite-transfer-submit', '-s', self.ftsServer, '-f', self.surlFile, '-o' ]
    if self.targetToken:
      comm += [ '-t', self.targetToken ]
    if self.sourceToken:
      comm += [ '-S', self.sourceToken ]
    if self.__cksmTest:
      comm.append( "--compare-checksums" )
    gLogger.verbose( 'Executing %s' % ' '.join( comm ) )
    res = executeGridCommand( '', comm )
    os.remove( self.surlFile )
    if not res['OK']:
      return res
    returnCode, output, errStr = res['Value']
    if not returnCode == 0:
      return S_ERROR( errStr )
    guid = output.replace( '\n', '' )
    if not checkGuid( guid ):
      return S_ERROR( 'Wrong GUID format returned' )
    self.ftsGUID = guid
    # if self.priority != 3:
    #  comm = ['glite-transfer-setpriority','-s', self.ftsServer,self.ftsGUID,str(self.priority)]
    #  executeGridCommand('',comm)
    return res

  def __getFTSServer( self, site ):
    try:
      configPath = '/Resources/FTSEndpoints/%s' % site
      endpointURL = gConfig.getValue( configPath )
      if not endpointURL:
        errStr = "FTSRequest.__getFTSServer: Failed to find FTS endpoint, check CS entry for '%s'." % site
        return S_ERROR( errStr )
      return S_OK( endpointURL )
    except Exception, x:
      return S_ERROR( 'FTSRequest.__getFTSServer: Failed to obtain endpoint details from CS' )
Example #15
0
class CatalogPlugInTestCase(unittest.TestCase):
  """ Base class for the CatalogPlugin test case """

  def setUp(self):
    self.fullMetadata = ['Status', 'CheckSumType', 'OwnerRole', 'CreationDate', 'Checksum', 'ModificationDate', 'OwnerDN', 'Mode', 'GUID', 'Size']
    self.dirMetadata = self.fullMetadata + ['NumberOfSubPaths']
    self.fileMetadata = self.fullMetadata + ['NumberOfLinks']

    self.catalog = FileCatalog(catalogs=[catalogClientToTest])
    valid = self.catalog.isOK()
    self.assert_(valid)
    self.destDir = '/lhcb/test/unit-test/TestCatalogPlugin'
    self.link = "%s/link" % self.destDir

    # Clean the existing directory
    self.cleanDirectory()
    res = self.catalog.createDirectory(self.destDir)
    returnValue = self.parseResult(res,self.destDir)

    # Register some files to work with
    self.numberOfFiles = 2
    self.files = []
    for i in range(self.numberOfFiles):
      lfn = "%s/testFile_%d" % (self.destDir,i)
      res = self.registerFile(lfn)
      self.assert_(res)
      self.files.append(lfn)

  def registerFile(self,lfn):
    pfn = 'protocol://host:port/storage/path%s' % lfn
    size = 10000000
    se = 'DIRAC-storage'
    guid = makeGuid()
    adler = stringAdler(guid)
    fileDict = {}
    fileDict[lfn] = {'PFN':pfn,'Size':size,'SE':se,'GUID':guid,'Checksum':adler}
    res = self.catalog.addFile(fileDict)
    return self.parseResult(res,lfn)

  def parseResult(self,res,path):
    self.assert_(res['OK'])
    self.assert_(res['Value'])
    self.assert_(res['Value']['Successful'])
    self.assert_(res['Value']['Successful'].has_key(path))
    return res['Value']['Successful'][path]

  def parseError(self,res,path):
    self.assert_(res['OK'])
    self.assert_(res['Value'])
    self.assert_(res['Value']['Failed'])
    self.assert_(res['Value']['Failed'].has_key(path))
    return res['Value']['Failed'][path]    

  def cleanDirectory(self):
    res = self.catalog.exists(self.destDir)
    returnValue = self.parseResult(res,self.destDir)
    if not returnValue:
      return
    res = self.catalog.listDirectory(self.destDir)  
    returnValue = self.parseResult(res,self.destDir)
    toRemove = returnValue['Files'].keys()
    if toRemove:
      self.purgeFiles(toRemove)
    res = self.catalog.removeDirectory(self.destDir)
    returnValue = self.parseResult(res,self.destDir)
    self.assert_(returnValue)

  def purgeFiles(self,lfns):
    for lfn in lfns:
      res = self.catalog.getReplicas(lfn,True)
      replicas = self.parseResult(res,lfn)
      for se,pfn in replicas.items():
        repDict = {}
        repDict[lfn] = {'PFN':pfn,'SE':se}
        res = self.catalog.removeReplica(repDict)
        self.parseResult(res,lfn)   
      res = self.catalog.removeFile(lfn)
      self.parseResult(res,lfn)

  def tearDown(self):
    self.cleanDirectory()
Example #16
0
    def __retrieveAndUploadFile(self, file, outputDict):
        """
    Retrieve, Upload, and remove
    """
        fileName = file
        inputPath = outputDict['InputPath']
        inputFCName = outputDict['InputFC']
        inBytes = 0
        if inputFCName == 'LocalDisk':
            inFile = file
            file = os.path.join(inputPath, file)
        else:
            inputFC = FileCatalog([inputFCName])

            inFile = os.path.join(inputPath, file)
            replicaDict = inputFC.getReplicas(inFile)
            if not replicaDict['OK']:
                self.log.error(replicaDict['Message'])
                return S_ERROR(fileName)
            if not inFile in replicaDict['Value']['Successful']:
                self.log.error(replicaDict['Value']['Failed'][inFile])
                return S_ERROR(fileName)
            seList = replicaDict['Value']['Successful'][inFile].keys()

            inputSE = StorageElement(seList[0])
            self.log.info('Retrieving from %s:' % inputSE.name, inFile)
            # ret = inputSE.getFile( inFile )
            # lcg_util binding prevent multithreading, use subprocess instead
            res = pythonCall(2 * 3600, inputSE.getFile, inFile)
            if not res['OK']:
                self.log.error(res['Message'])
                return S_ERROR(fileName)
            ret = res['Value']
            if not ret['OK']:
                self.log.error(ret['Message'])
                return S_ERROR(fileName)
            if not inFile in ret['Value']['Successful']:
                self.log.error(ret['Value']['Failed'][inFile])
                return S_ERROR(fileName)

        if os.path.isfile(file):
            inBytes = os.stat(file)[6]

        outputPath = outputDict['OutputPath']
        outputFCName = outputDict['OutputFC']
        replicaManager = ReplicaManager()
        outFile = os.path.join(outputPath, os.path.basename(file))
        transferOK = False
        for outputSEName in List.fromChar(outputDict['OutputSE'], ","):
            outputSE = StorageElement(outputSEName)
            self.log.info('Trying to upload to %s:' % outputSE.name, outFile)
            # ret = replicaManager.putAndRegister( outFile, os.path.realpath( file ), outputSE.name, catalog=outputFCName )
            # lcg_util binding prevent multithreading, use subprocess instead
            result = pythonCall(2 * 3600,
                                replicaManager.putAndRegister,
                                outFile,
                                os.path.realpath(file),
                                outputSE.name,
                                catalog=outputFCName)
            if result['OK'] and result['Value']['OK']:
                if outFile in result['Value']['Value']['Successful']:
                    transferOK = True
                    break
                else:
                    self.log.error(result['Value']['Value']['Failed'][outFile])
            else:
                if result['OK']:
                    self.log.error(result['Value']['Message'])
                else:
                    self.log.error(result['Message'])

        if not transferOK:
            return S_ERROR(fileName)

        if result['OK'] or not inputFCName == 'LocalDisk':
            os.unlink(file)

        if not result['OK']:
            self.log.error(ret['Message'])
            return S_ERROR(fileName)

        self.log.info("Finished transferring %s [%s bytes]" %
                      (inFile, inBytes))
        self.__okTransferredFiles += 1
        self.__okTransferredBytes += inBytes

        if inputFCName == 'LocalDisk':
            return S_OK(fileName)

        # Now the file is on final SE/FC, remove from input SE/FC
        for se in seList:
            se = StorageElement(se)
            self.log.info('Removing from %s:' % se.name, inFile)
            se.removeFile(inFile)

        inputFC.removeFile(inFile)

        return S_OK(fileName)
Example #17
0
class CatalogPlugInTestCase(unittest.TestCase):
    """ Base class for the CatalogPlugin test case """

    def setUp(self):
        self.fullMetadata = [
            "Status",
            "ChecksumType",
            "OwnerRole",
            "CreationDate",
            "Checksum",
            "ModificationDate",
            "OwnerDN",
            "Mode",
            "GUID",
            "Size",
        ]
        self.dirMetadata = self.fullMetadata + ["NumberOfSubPaths"]
        self.fileMetadata = self.fullMetadata + ["NumberOfLinks"]

        self.catalog = FileCatalog(catalogs=[catalogClientToTest])
        valid = self.catalog.isOK()
        self.assert_(valid)
        self.destDir = "/lhcb/test/unit-test/TestCatalogPlugin"
        self.link = "%s/link" % self.destDir

        # Clean the existing directory
        self.cleanDirectory()
        res = self.catalog.createDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)

        # Register some files to work with
        self.numberOfFiles = 2
        self.files = []
        for i in range(self.numberOfFiles):
            lfn = "%s/testFile_%d" % (self.destDir, i)
            res = self.registerFile(lfn)
            self.assert_(res)
            self.files.append(lfn)

    def registerFile(self, lfn):
        pfn = "protocol://host:port/storage/path%s" % lfn
        size = 10000000
        se = "DIRAC-storage"
        guid = makeGuid()
        adler = stringAdler(guid)
        fileDict = {}
        fileDict[lfn] = {"PFN": pfn, "Size": size, "SE": se, "GUID": guid, "Checksum": adler}
        res = self.catalog.addFile(fileDict)
        return self.parseResult(res, lfn)

    def parseResult(self, res, path):
        self.assert_(res["OK"])
        self.assert_(res["Value"])
        self.assert_(res["Value"]["Successful"])
        self.assert_(res["Value"]["Successful"].has_key(path))
        return res["Value"]["Successful"][path]

    def parseError(self, res, path):
        self.assert_(res["OK"])
        self.assert_(res["Value"])
        self.assert_(res["Value"]["Failed"])
        self.assert_(res["Value"]["Failed"].has_key(path))
        return res["Value"]["Failed"][path]

    def cleanDirectory(self):
        res = self.catalog.exists(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        if not returnValue:
            return
        res = self.catalog.listDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        toRemove = returnValue["Files"].keys()
        if toRemove:
            self.purgeFiles(toRemove)
        res = self.catalog.removeDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        self.assert_(returnValue)

    def purgeFiles(self, lfns):
        for lfn in lfns:
            res = self.catalog.getReplicas(lfn, True)
            replicas = self.parseResult(res, lfn)
            for se, pfn in replicas.items():
                repDict = {}
                repDict[lfn] = {"PFN": pfn, "SE": se}
                res = self.catalog.removeReplica(repDict)
                self.parseResult(res, lfn)
            res = self.catalog.removeFile(lfn)
            self.parseResult(res, lfn)

    def tearDown(self):
        self.cleanDirectory()
Example #18
0
class FTSRequest( object ):
  """
  .. class:: FTSRequest

  Helper class for FTS job submission and monitoring.
  """

  # # default checksum type
  __defaultCksmType = "ADLER32"
  # # flag to disablr/enable checksum test, default: disabled
  __cksmTest = False

  def __init__( self ):
    """c'tor

    :param self: self reference
    """
    self.log = gLogger.getSubLogger( self.__class__.__name__, True )

    # # final states tuple
    self.finalStates = ( 'Canceled', 'Failed', 'Hold',
                         'Finished', 'FinishedDirty' )
    # # failed states tuple
    self.failedStates = ( 'Canceled', 'Failed',
                          'Hold', 'FinishedDirty' )
    # # successful states tuple
    self.successfulStates = ( 'Finished', 'Done' )
    # # all file states tuple
    self.fileStates = ( 'Done', 'Active', 'Pending', 'Ready', 'Canceled', 'Failed',
                        'Finishing', 'Finished', 'Submitted', 'Hold', 'Waiting' )

    self.statusSummary = {}

    # # request status
    self.requestStatus = 'Unknown'

    # # dict for FTS job files
    self.fileDict = {}
    # # dict for replicas information
    self.catalogReplicas = {}
    # # dict for metadata information
    self.catalogMetadata = {}
    # # dict for files that failed to register
    self.failedRegistrations = {}

    # # placehoder for FileCatalog reference
    self.oCatalog = None

    # # submit timestamp
    self.submitTime = ''

    # # placeholder FTS job GUID
    self.ftsGUID = ''
    # # placeholder for FTS server URL
    self.ftsServer = ''

    # # flag marking FTS job completness
    self.isTerminal = False
    # # completness percentage
    self.percentageComplete = 0.0

    # # source SE name
    self.sourceSE = ''
    # # flag marking source SE validity
    self.sourceValid = False
    # # source space token
    self.sourceToken = ''

    # # target SE name
    self.targetSE = ''
    # # flag marking target SE validity
    self.targetValid = False
    # # target space token
    self.targetToken = ''

    # # placeholder for target StorageElement
    self.oTargetSE = None
    # # placeholder for source StorageElement
    self.oSourceSE = None

    # # checksum type, set it to default
    self.__cksmType = self.__defaultCksmType
    # # disable checksum test by default
    self.__cksmTest = False

    # # statuses that prevent submitting to FTS
    self.noSubmitStatus = ( 'Failed', 'Done', 'Staging' )

    # # were sources resolved?
    self.sourceResolved = False

    # # Number of file transfers actually submitted
    self.submittedFiles = 0
    self.transferTime = 0

    self.submitCommand = Operations().getValue( 'DataManagement/FTSPlacement/FTS2/SubmitCommand', 'glite-transfer-submit' )
    self.monitorCommand = Operations().getValue( 'DataManagement/FTSPlacement/FTS2/MonitorCommand', 'glite-transfer-status' )
    self.ftsJob = None
    self.ftsFiles = []

  ####################################################################
  #
  #  Methods for setting/getting/checking the SEs
  #

  def setSourceSE( self, se ):
    """ set SE for source

    :param self: self reference
    :param str se: source SE name
    """
    if se == self.targetSE:
      return S_ERROR( "SourceSE is TargetSE" )
    self.sourceSE = se
    self.oSourceSE = StorageElement( self.sourceSE )
    return self.__checkSourceSE()

  def __checkSourceSE( self ):
    """ check source SE availability

    :param self: self reference
    """
    if not self.sourceSE:
      return S_ERROR( "SourceSE not set" )
    res = self.oSourceSE.isValid( 'Read' )
    if not res['OK']:
      return S_ERROR( "SourceSE not available for reading" )
    res = self.__getSESpaceToken( self.oSourceSE )
    if not res['OK']:
      self.log.error( "FTSRequest failed to get SRM Space Token for SourceSE", res['Message'] )
      return S_ERROR( "SourceSE does not support FTS transfers" )

    if self.__cksmTest:
      res = self.oSourceSE.getChecksumType()
      if not res["OK"]:
        self.log.error( "Unable to get checksum type for SourceSE %s: %s" % ( self.sourceSE,
                                                                             res["Message"] ) )
        cksmType = res["Value"]
        if cksmType in ( "NONE", "NULL" ):
          self.log.warn( "Checksum type set to %s at SourceSE %s, disabling checksum test" % ( cksmType,
                                                                                              self.sourceSE ) )
          self.__cksmTest = False
        elif cksmType != self.__cksmType:
          self.log.warn( "Checksum type mismatch, disabling checksum test" )
          self.__cksmTest = False

    self.sourceToken = res['Value']
    self.sourceValid = True
    return S_OK()

  def setTargetSE( self, se ):
    """ set target SE

    :param self: self reference
    :param str se: target SE name
    """
    if se == self.sourceSE:
      return S_ERROR( "TargetSE is SourceSE" )
    self.targetSE = se
    self.oTargetSE = StorageElement( self.targetSE )
    return self.__checkTargetSE()

  def setTargetToken( self, token ):
    """ target space token setter

    :param self: self reference
    :param str token: target space token
    """
    self.targetToken = token
    return S_OK()

  def __checkTargetSE( self ):
    """ check target SE availability

    :param self: self reference
    """
    if not self.targetSE:
      return S_ERROR( "TargetSE not set" )
    res = self.oTargetSE.isValid( 'Write' )
    if not res['OK']:
      return S_ERROR( "TargetSE not available for writing" )
    res = self.__getSESpaceToken( self.oTargetSE )
    if not res['OK']:
      self.log.error( "FTSRequest failed to get SRM Space Token for TargetSE", res['Message'] )
      return S_ERROR( "TargetSE does not support FTS transfers" )

    # # check checksum types
    if self.__cksmTest:
      res = self.oTargetSE.getChecksumType()
      if not res["OK"]:
        self.log.error( "Unable to get checksum type for TargetSE %s: %s" % ( self.targetSE,
                                                                             res["Message"] ) )
        cksmType = res["Value"]
        if cksmType in ( "NONE", "NULL" ):
          self.log.warn( "Checksum type set to %s at TargetSE %s, disabling checksum test" % ( cksmType,
                                                                                              self.targetSE ) )
          self.__cksmTest = False
        elif cksmType != self.__cksmType:
          self.log.warn( "Checksum type mismatch, disabling checksum test" )
          self.__cksmTest = False

    self.targetToken = res['Value']
    self.targetValid = True
    return S_OK()

  @staticmethod
  def __getSESpaceToken( oSE ):
    """ get space token from StorageElement instance

    :param self: self reference
    :param StorageElement oSE: StorageElement instance
    """
    res = oSE.getStorageParameters( "SRM2" )
    if not res['OK']:
      return res
    return S_OK( res['Value'].get( 'SpaceToken' ) )

  ####################################################################
  #
  #  Methods for setting/getting FTS request parameters
  #

  def setFTSGUID( self, guid ):
    """ FTS job GUID setter

    :param self: self reference
    :param str guid: string containg GUID
    """
    if not checkGuid( guid ):
      return S_ERROR( "Incorrect GUID format" )
    self.ftsGUID = guid
    return S_OK()


  def setFTSServer( self, server ):
    """ FTS server setter

    :param self: self reference
    :param str server: FTS server URL
    """
    self.ftsServer = server
    return S_OK()

  def isRequestTerminal( self ):
    """ check if FTS job has terminated

    :param self: self reference
    """
    if self.requestStatus in self.finalStates:
      self.isTerminal = True
    return S_OK( self.isTerminal )

  def setCksmTest( self, cksmTest = False ):
    """ set cksm test

    :param self: self reference
    :param bool cksmTest: flag to enable/disable checksum test
    """
    self.__cksmTest = bool( cksmTest )
    return S_OK( self.__cksmTest )

  ####################################################################
  #
  #  Methods for setting/getting/checking files and their metadata
  #

  def setLFN( self, lfn ):
    """ add LFN :lfn: to :fileDict:

    :param self: self reference
    :param str lfn: LFN to add to
    """
    self.fileDict.setdefault( lfn, {'Status':'Waiting'} )
    return S_OK()

  def setSourceSURL( self, lfn, surl ):
    """ source SURL setter

    :param self: self reference
    :param str lfn: LFN
    :param str surl: source SURL
    """
    target = self.fileDict[lfn].get( 'Target' )
    if target == surl:
      return S_ERROR( "Source and target the same" )
    return self.__setFileParameter( lfn, 'Source', surl )

  def getSourceSURL( self, lfn ):
    """ get source SURL for LFN :lfn:

    :param self: self reference
    :param str lfn: LFN
    """
    return self.__getFileParameter( lfn, 'Source' )

  def setTargetSURL( self, lfn, surl ):
    """ set target SURL for LFN :lfn:

    :param self: self reference
    :param str lfn: LFN
    :param str surl: target SURL
    """
    source = self.fileDict[lfn].get( 'Source' )
    if source == surl:
      return S_ERROR( "Source and target the same" )
    return self.__setFileParameter( lfn, 'Target', surl )

  def getFailReason( self, lfn ):
    """ get fail reason for file :lfn:

    :param self: self reference
    :param str lfn: LFN
    """
    return self.__getFileParameter( lfn, 'Reason' )

  def getRetries( self, lfn ):
    """ get number of attepmts made to transfer file :lfn:

    :param self: self reference
    :param str lfn: LFN
    """
    return self.__getFileParameter( lfn, 'Retries' )

  def getTransferTime( self, lfn ):
    """ get duration of transfer for file :lfn:

    :param self: self reference
    :param str lfn: LFN
    """
    return self.__getFileParameter( lfn, 'Duration' )

  def getFailed( self ):
    """ get list of wrongly transferred LFNs

    :param self: self reference
    """
    return S_OK( [ lfn for lfn in self.fileDict
                   if self.fileDict[lfn].get( 'Status', '' ) in self.failedStates ] )

  def getStaging( self ):
    """ get files set for prestaging """
    return S_OK( [lfn for lfn in self.fileDict
                  if self.fileDict[lfn].get( 'Status', '' ) == 'Staging'] )

  def getDone( self ):
    """ get list of succesfully transferred LFNs

    :param self: self reference
    """
    return S_OK( [ lfn for lfn in self.fileDict
                   if self.fileDict[lfn].get( 'Status', '' ) in self.successfulStates ] )

  def __setFileParameter( self, lfn, paramName, paramValue ):
    """ set :paramName: to :paramValue: for :lfn: file

    :param self: self reference
    :param str lfn: LFN
    :param str paramName: parameter name
    :param mixed paramValue: a new parameter value
    """
    self.setLFN( lfn )
    self.fileDict[lfn][paramName] = paramValue
    return S_OK()

  def __getFileParameter( self, lfn, paramName ):
    """ get value of :paramName: for file :lfn:

    :param self: self reference
    :param str lfn: LFN
    :param str paramName: parameter name
    """
    if lfn not in self.fileDict:
      return S_ERROR( "Supplied file not set" )
    if paramName not in self.fileDict[lfn]:
      return S_ERROR( "%s not set for file" % paramName )
    return S_OK( self.fileDict[lfn][paramName] )

  ####################################################################
  #
  #  Methods for submission
  #

  def submit( self, monitor = False, printOutput = True ):
    """ submit FTS job

    :param self: self reference
    :param bool monitor: flag to monitor progress of FTS job
    :param bool printOutput: flag to print output of execution to stdout
    """
    res = self.__prepareForSubmission()
    if not res['OK']:
      return res
    res = self.__submitFTSTransfer()
    if not res['OK']:
      return res
    resDict = { 'ftsGUID' : self.ftsGUID, 'ftsServer' : self.ftsServer, 'submittedFiles' : self.submittedFiles }
    if monitor or printOutput:
      gLogger.always( "Submitted %s@%s" % ( self.ftsGUID, self.ftsServer ) )
      if monitor:
        self.monitor( untilTerminal = True, printOutput = printOutput, full = False )
    return S_OK( resDict )

  def __prepareForSubmission( self ):
    """ check validity of job before submission

    :param self: self reference
    """
    if not self.fileDict:
      return S_ERROR( "No files set" )
    if not self.sourceValid:
      return S_ERROR( "SourceSE not valid" )
    if not self.targetValid:
      return S_ERROR( "TargetSE not valid" )
    if not self.ftsServer:
      res = self.__resolveFTSServer()
      if not res['OK']:
        return S_ERROR( "FTSServer not valid" )
    self.resolveSource()
    self.resolveTarget()
    res = self.__filesToSubmit()
    if not res['OK']:
      return S_ERROR( "No files to submit" )
    return S_OK()

  def __getCatalogObject( self ):
    """ CatalogInterface instance facade

    :param self: self reference
    """
    try:
      if not self.oCatalog:
        self.oCatalog = FileCatalog()
      return S_OK()
    except:
      return S_ERROR()

  def __updateReplicaCache( self, lfns = None, overwrite = False ):
    """ update replica cache for list of :lfns:

    :param self: self reference
    :param mixed lfns: list of LFNs
    :param bool overwrite: flag to trigger cache clearing and updating
    """
    if not lfns:
      lfns = self.fileDict.keys()
    toUpdate = [ lfn for lfn in lfns if ( lfn not in self.catalogReplicas ) or overwrite ]
    if not toUpdate:
      return S_OK()
    res = self.__getCatalogObject()
    if not res['OK']:
      return res
    res = self.oCatalog.getReplicas( toUpdate )
    if not res['OK']:
      return S_ERROR( "Failed to update replica cache: %s" % res['Message'] )
    for lfn, error in res['Value']['Failed'].items():
      self.__setFileParameter( lfn, 'Reason', error )
      self.__setFileParameter( lfn, 'Status', 'Failed' )
    for lfn, replicas in res['Value']['Successful'].items():
      self.catalogReplicas[lfn] = replicas
    return S_OK()

  def __updateMetadataCache( self, lfns = None ):
    """ update metadata cache for list of LFNs

    :param self: self reference
    :param list lnfs: list of LFNs
    """
    if not lfns:
      lfns = self.fileDict.keys()
    toUpdate = [ lfn for lfn in lfns if lfn not in self.catalogMetadata ]
    if not toUpdate:
      return S_OK()
    res = self.__getCatalogObject()
    if not res['OK']:
      return res
    res = self.oCatalog.getFileMetadata( toUpdate )
    if not res['OK']:
      return S_ERROR( "Failed to get source catalog metadata: %s" % res['Message'] )
    for lfn, error in res['Value']['Failed'].items():
      self.__setFileParameter( lfn, 'Reason', error )
      self.__setFileParameter( lfn, 'Status', 'Failed' )
    for lfn, metadata in res['Value']['Successful'].items():
      self.catalogMetadata[lfn] = metadata
    return S_OK()

  def resolveSource( self ):
    """ resolve source SE eligible for submission

    :param self: self reference
    """

    # Avoid resolving sources twice
    if self.sourceResolved:
      return S_OK()
    # Only resolve files that need a transfer
    toResolve = [ lfn for lfn in self.fileDict if self.fileDict[lfn].get( "Status", "" ) != "Failed" ]
    if not toResolve:
      return S_OK()
    res = self.__updateMetadataCache( toResolve )
    if not res['OK']:
      return res
    res = self.__updateReplicaCache( toResolve )
    if not res['OK']:
      return res

    # Define the source URLs
    for lfn in toResolve:
      replicas = self.catalogReplicas.get( lfn, {} )
      if self.sourceSE not in replicas:
        gLogger.warn( "resolveSource: skipping %s - not replicas at SourceSE %s" % ( lfn, self.sourceSE ) )
        self.__setFileParameter( lfn, 'Reason', "No replica at SourceSE" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue
      # Fix first the PFN
      pfn = self.oSourceSE.getPfnForLfn( lfn ).get( 'Value', {} ).get( 'Successful', {} ).get( lfn, replicas[self.sourceSE] )
      res = returnSingleResult( self.oSourceSE.getPfnForProtocol( pfn, protocol = 'SRM2', withPort = True ) )
      if not res['OK']:
        gLogger.warn( "resolveSource: skipping %s - %s" % ( lfn, res["Message"] ) )
        self.__setFileParameter( lfn, 'Reason', res['Message'] )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue
      res = self.setSourceSURL( lfn, res['Value'] )
      if not res['OK']:
        gLogger.warn( "resolveSource: skipping %s - %s" % ( lfn, res["Message"] ) )
        self.__setFileParameter( lfn, 'Reason', res['Message'] )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue

    toResolve = {}
    for lfn in self.fileDict:
      if "Source" in self.fileDict[lfn]:
        toResolve[self.fileDict[lfn]['Source']] = lfn
    if not toResolve:
      return S_ERROR( "No eligible Source files" )

    # Get metadata of the sources, to check for existance, availability and caching
    res = self.oSourceSE.getFileMetadata( toResolve.keys() )
    if not res['OK']:
      return S_ERROR( "Failed to check source file metadata" )

    for pfn, error in res['Value']['Failed'].items():
      lfn = toResolve[pfn]
      if re.search( 'File does not exist', error ):
        gLogger.warn( "resolveSource: skipping %s - source file does not exists" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source file does not exist" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      else:
        gLogger.warn( "resolveSource: skipping %s - failed to get source metadata" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Failed to get Source metadata" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
    toStage = []

    nbStagedFiles = 0
    for pfn, metadata in res['Value']['Successful'].items():
      lfn = toResolve[pfn]
      lfnStatus = self.fileDict.get( lfn, {} ).get( 'Status' )
      if metadata['Unavailable']:
        gLogger.warn( "resolveSource: skipping %s - source file unavailable" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source file Unavailable" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      elif metadata['Lost']:
        gLogger.warn( "resolveSource: skipping %s - source file lost" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source file Lost" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      elif not metadata['Cached']:
        if lfnStatus != 'Staging':
          toStage.append( pfn )
      elif metadata['Size'] != self.catalogMetadata[lfn]['Size']:
        gLogger.warn( "resolveSource: skipping %s - source file size mismatch" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source size mismatch" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      elif self.catalogMetadata[lfn]['Checksum'] and metadata['Checksum'] and \
            not compareAdler( metadata['Checksum'], self.catalogMetadata[lfn]['Checksum'] ):
        gLogger.warn( "resolveSource: skipping %s - source file checksum mismatch" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source checksum mismatch" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      elif lfnStatus == 'Staging':
        # file that was staging is now cached
        self.__setFileParameter( lfn, 'Status', 'Waiting' )
        nbStagedFiles += 1

    # Some files were being staged
    if nbStagedFiles:
      self.log.info( 'resolveSource: %d files have been staged' % nbStagedFiles )

    # Launching staging of files not in cache
    if toStage:
      gLogger.warn( "resolveSource: %s source files not cached, prestaging..." % len( toStage ) )
      stage = self.oSourceSE.prestageFile( toStage )
      if not stage["OK"]:
        gLogger.error( "resolveSource: error is prestaging - %s" % stage["Message"] )
        for pfn in toStage:
          lfn = toResolve[pfn]
          self.__setFileParameter( lfn, 'Reason', stage["Message"] )
          self.__setFileParameter( lfn, 'Status', 'Failed' )
      else:
        for pfn in toStage:
          lfn = toResolve[pfn]
          if pfn in stage['Value']['Successful']:
            self.__setFileParameter( lfn, 'Status', 'Staging' )
          elif pfn in stage['Value']['Failed']:
            self.__setFileParameter( lfn, 'Reason', stage['Value']['Failed'][pfn] )
            self.__setFileParameter( lfn, 'Status', 'Failed' )

    self.sourceResolved = True
    return S_OK()

  def resolveTarget( self ):
    """ find target SE eligible for submission

    :param self: self reference
    """
    toResolve = [ lfn for lfn in self.fileDict
                 if self.fileDict[lfn].get( 'Status' ) not in self.noSubmitStatus ]
    if not toResolve:
      return S_OK()
    res = self.__updateReplicaCache( toResolve )
    if not res['OK']:
      return res
    for lfn in toResolve:
      res = self.oTargetSE.getPfnForLfn( lfn )
      if not res['OK'] or lfn not in res['Value']['Successful']:
        gLogger.warn( "resolveTarget: skipping %s - failed to create target pfn" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Failed to create Target" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue
      pfn = res['Value']['Successful'][lfn]
      res = self.oTargetSE.getPfnForProtocol( pfn, protocol = 'SRM2', withPort = True )
      if not res['OK'] or pfn not in res['Value']['Successful']:
        reason = res.get( 'Message', res.get( 'Value', {} ).get( 'Failed', {} ).get( pfn ) )
        gLogger.warn( "resolveTarget: skipping %s - %s" % ( lfn, reason ) )
        self.__setFileParameter( lfn, 'Reason', reason )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue
      pfn = res['Value']['Successful'][pfn]
      res = self.setTargetSURL( lfn, pfn )
      if not res['OK']:
        gLogger.warn( "resolveTarget: skipping %s - %s" % ( lfn, res["Message"] ) )
        self.__setFileParameter( lfn, 'Reason', res['Message'] )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue
    toResolve = {}
    for lfn in self.fileDict:
      if "Target" in self.fileDict[lfn]:
        toResolve[self.fileDict[lfn]['Target']] = lfn
    if not toResolve:
      return S_ERROR( "No eligible Target files" )
    res = self.oTargetSE.exists( toResolve.keys() )
    if not res['OK']:
      return S_ERROR( "Failed to check target existence" )
    for pfn, error in res['Value']['Failed'].items():
      lfn = toResolve[pfn]
      self.__setFileParameter( lfn, 'Reason', error )
      self.__setFileParameter( lfn, 'Status', 'Failed' )
    toRemove = []
    for pfn, exists in res['Value']['Successful'].items():
      if exists:
        lfn = toResolve[pfn]
        res = self.getSourceSURL( lfn )
        if not res['OK']:
          gLogger.warn( "resolveTarget: skipping %s - target exists" % lfn )
          self.__setFileParameter( lfn, 'Reason', "Target exists" )
          self.__setFileParameter( lfn, 'Status', 'Failed' )
        elif res['Value'] == pfn:
          gLogger.warn( "resolveTarget: skipping %s - source and target pfns are the same" % lfn )
          self.__setFileParameter( lfn, 'Reason', "Source and Target the same" )
          self.__setFileParameter( lfn, 'Status', 'Failed' )
        else:
          toRemove.append( pfn )
    if toRemove:
      self.oTargetSE.removeFile( toRemove )
    return S_OK()

  def __filesToSubmit( self ):
    """
    check if there is at least one file to submit

    :return: S_OK if at least one file is present, S_ERROR otherwise
    """
    for lfn in self.fileDict:
      lfnStatus = self.fileDict[lfn].get( 'Status' )
      source = self.fileDict[lfn].get( 'Source' )
      target = self.fileDict[lfn].get( 'Target' )
      if lfnStatus not in self.noSubmitStatus and source and target:
        return S_OK()
    return S_ERROR()

  def __createFTSFiles( self ):
    """ create LFNs file for glite-transfer-submit command

    This file consists one line for each fiel to be transferred:

    sourceSURL targetSURL [CHECKSUMTYPE:CHECKSUM]

    :param self: self reference
    """
    self.__updateMetadataCache()
    for lfn in self.fileDict:
      lfnStatus = self.fileDict[lfn].get( 'Status' )
      if lfnStatus not in self.noSubmitStatus:
        cksmStr = ""
        # # add chsmType:cksm only if cksmType is specified, else let FTS decide by itself
        if self.__cksmTest and self.__cksmType:
          checkSum = self.catalogMetadata.get( lfn, {} ).get( 'Checksum' )
          if checkSum:
            cksmStr = " %s:%s" % ( self.__cksmType, intAdlerToHex( hexAdlerToInt( checkSum ) ) )
        ftsFile = FTSFile()
        ftsFile.LFN = lfn
        ftsFile.SourceSURL = self.fileDict[lfn].get( 'Source' )
        ftsFile.TargetSURL = self.fileDict[lfn].get( 'Target' )
        ftsFile.SourceSE = self.sourceSE
        ftsFile.TargetSE = self.targetSE
        ftsFile.Status = self.fileDict[lfn].get( 'Status' )
        ftsFile.Checksum = cksmStr
        ftsFile.Size = self.catalogMetadata.get( lfn, {} ).get( 'Size' )
        self.ftsFiles.append( ftsFile )
        self.submittedFiles += 1
    return S_OK()

  def __createFTSJob( self, guid = None ):
    self.__createFTSFiles()
    ftsJob = FTSJob()
    ftsJob.RequestID = 0
    ftsJob.OperationID = 0
    ftsJob.SourceSE = self.sourceSE
    ftsJob.TargetSE = self.targetSE
    ftsJob.SourceToken = self.sourceToken
    ftsJob.TargetToken = self.targetToken
    ftsJob.FTSServer = self.ftsServer
    if guid:
      ftsJob.FTSGUID = guid

    for ftsFile in self.ftsFiles:
      ftsFile.Attempt += 1
      ftsFile.Error = ""
      ftsJob.addFile( ftsFile )
    self.ftsJob = ftsJob

  def __submitFTSTransfer( self ):
    """ create and execute glite-transfer-submit CLI command

    :param self: self reference
    """
    log = gLogger.getSubLogger( 'Submit' )
    self.__createFTSJob()

    submit = self.ftsJob.submitFTS2( command = self.submitCommand )
    if not submit["OK"]:
      log.error( "unable to submit FTSJob: %s" % submit["Message"] )
      return submit

    log.info( "FTSJob '%s'@'%s' has been submitted" % ( self.ftsJob.FTSGUID, self.ftsJob.FTSServer ) )

    # # update statuses for job files
    for ftsFile in self.ftsJob:
      ftsFile.FTSGUID = self.ftsJob.FTSGUID
      ftsFile.Status = "Submitted"
      ftsFile.Attempt += 1

    log.info( "FTSJob '%s'@'%s' has been submitted" % ( self.ftsJob.FTSGUID, self.ftsJob.FTSServer ) )
    self.ftsGUID = self.ftsJob.FTSGUID
    return S_OK()

  def __resolveFTSServer( self ):
    """
    resolve FTS server to use, it should be the closest one from target SE

    :param self: self reference
    """
    from DIRAC.ConfigurationSystem.Client.Helpers.Resources import getFTSServersForSites
    if not self.targetSE:
      return S_ERROR( "Target SE not set" )
    res = getSitesForSE( self.targetSE )
    if not res['OK'] or not res['Value']:
      return S_ERROR( "Could not determine target site" )
    targetSites = res['Value']

    targetSite = ''
    for targetSite in targetSites:
      targetFTS = getFTSServersForSites( [targetSite] )
      if targetFTS['OK']:
        ftsTarget = targetFTS['Value'][targetSite]
        if ftsTarget:
          self.ftsServer = ftsTarget
          return S_OK( self.ftsServer )
      else:
        return targetFTS
    return S_ERROR( 'No FTS server found for %s' % targetSite )

  ####################################################################
  #
  #  Methods for monitoring
  #

  def summary( self, untilTerminal = False, printOutput = False ):
    """ summary of FTS job

    :param self: self reference
    :param bool untilTerminal: flag to monitor FTS job to its final state
    :param bool printOutput: flag to print out monitoring information to the stdout
    """
    res = self.__isSummaryValid()
    if not res['OK']:
      return res
    while not self.isTerminal:
      res = self.__parseOutput( full = True )
      if not res['OK']:
        return res
      if untilTerminal:
        self.__print()
      self.isRequestTerminal()
      if res['Value'] or ( not untilTerminal ):
        break
      time.sleep( 1 )
    if untilTerminal:
      print ""
    if printOutput and ( not untilTerminal ):
      return self.dumpSummary( printOutput = printOutput )
    return S_OK()

  def monitor( self, untilTerminal = False, printOutput = False, full = True ):
    """ monitor FTS job

    :param self: self reference
    :param bool untilTerminal: flag to monitor FTS job to its final state
    :param bool printOutput: flag to print out monitoring information to the stdout
    """
    if not self.ftsJob:
      self.resolveSource()
      self.__createFTSJob( self.ftsGUID )
    res = self.__isSummaryValid()
    if not res['OK']:
      return res
    if untilTerminal:
      res = self.summary( untilTerminal = untilTerminal, printOutput = printOutput )
      if not res['OK']:
        return res
    res = self.__parseOutput( full = full )
    if not res['OK']:
      return res
    if untilTerminal:
      self.finalize()
    if printOutput:
      self.dump()
    return res

  def dumpSummary( self, printOutput = False ):
    """ get FTS job summary as str

    :param self: self reference
    :param bool printOutput: print summary to stdout
    """

    outStr = ''
    for status in sorted( self.statusSummary ):
      if self.statusSummary[status]:
        outStr = '%s\t%-10s : %-10s\n' % ( outStr, status, str( self.statusSummary[status] ) )
    outStr = outStr.rstrip( '\n' )
    if printOutput:
      print outStr
    return S_OK( outStr )

  def __print( self ):
    """ print progress bar of FTS job completeness to stdout

    :param self: self reference
    """
    width = 100
    bits = int( ( width * self.percentageComplete ) / 100 )
    outStr = "|%s>%s| %.1f%s %s %s" % ( "="*bits, " "*( width - bits ),
                                        self.percentageComplete, "%",
                                        self.requestStatus, " "*10 )
    sys.stdout.write( "%s\r" % ( outStr ) )
    sys.stdout.flush()

  def dump( self ):
    """ print FTS job parameters and files to stdout

    :param self: self reference
    """
    print "%-10s : %-10s" % ( "Status", self.requestStatus )
    print "%-10s : %-10s" % ( "Source", self.sourceSE )
    print "%-10s : %-10s" % ( "Target", self.targetSE )
    print "%-10s : %-128s" % ( "Server", self.ftsServer )
    print "%-10s : %-128s" % ( "GUID", self.ftsGUID )
    for lfn in sorted( self.fileDict ):
      print "\n  %-15s : %-128s" % ( 'LFN', lfn )
      for key in ['Source', 'Target', 'Status', 'Reason', 'Duration']:
        print "  %-15s : %-128s" % ( key, str( self.fileDict[lfn].get( key ) ) )
    return S_OK()

  def __isSummaryValid( self ):
    """ check validity of FTS job summary report

    :param self: self reference
    """
    if not self.ftsServer:
      return S_ERROR( "FTSServer not set" )
    if not self.ftsGUID:
      return S_ERROR( "FTSGUID not set" )
    return S_OK()

  def __parseOutput( self, full = False ):
    """ execute glite-transfer-status command and parse its output

    :param self: self reference
    :param bool full: glite-transfer-status verbosity level, when set, collect information of files as well
    """
    monitor = self.ftsJob.monitorFTS2( command = self.monitorCommand, full = full )
    if not monitor['OK']:
      return monitor
    self.percentageComplete = self.ftsJob.Completeness
    self.requestStatus = self.ftsJob.Status
    self.submitTime = self.ftsJob.SubmitTime

    statusSummary = monitor['Value']
    if statusSummary:
      for state in statusSummary:
        self.statusSummary[state] = statusSummary[state]

    self.transferTime = 0
    for ftsFile in self.ftsJob:
      lfn = ftsFile.LFN
      self.__setFileParameter( lfn, 'Status', ftsFile.Status )
      self.__setFileParameter( lfn, 'Reason', ftsFile.Error )
      self.__setFileParameter( lfn, 'Duration', ftsFile._duration )
      targetURL = self.__getFileParameter( lfn, 'Target' )
      if not targetURL['OK']:
        self.__setFileParameter( lfn, 'Target', ftsFile.TargetSURL )
      self.transferTime += int( ftsFile._duration )
    return S_OK()

  ####################################################################
  #
  #  Methods for finalization
  #

  def finalize( self ):
    """ finalize FTS job

    :param self: self reference
    """
    self.__updateMetadataCache()
    transEndTime = dateTime()
    regStartTime = time.time()
    res = self.getTransferStatistics()
    transDict = res['Value']

    res = self.__registerSuccessful( transDict['transLFNs'] )

    regSuc, regTotal = res['Value']
    regTime = time.time() - regStartTime
    if self.sourceSE and self.targetSE:
      self.__sendAccounting( regSuc, regTotal, regTime, transEndTime, transDict )
    return S_OK()

  def getTransferStatistics( self ):
    """ collect information of Transfers that can be used by Accounting

    :param self: self reference
    """
    transDict = { 'transTotal': len( self.fileDict ),
                  'transLFNs': [],
                  'transOK': 0,
                  'transSize': 0 }

    for lfn in self.fileDict:
      if self.fileDict[lfn].get( 'Status' ) in self.successfulStates:
        if self.fileDict[lfn].get( 'Duration', 0 ):
          transDict['transLFNs'].append( lfn )
          transDict['transOK'] += 1
          if lfn in self.catalogMetadata:
            transDict['transSize'] += self.catalogMetadata[lfn].get( 'Size', 0 )

    return S_OK( transDict )

  def getFailedRegistrations( self ):
    """ get failed registrations dict

    :param self: self reference
    """
    return S_OK( self.failedRegistrations )

  def __registerSuccessful( self, transLFNs ):
    """ register successfully transferred files to the catalogs,
    fill failedRegistrations dict for files that failed to register

    :param self: self reference
    :param list transLFNs: LFNs in FTS job
    """
    self.failedRegistrations = {}
    toRegister = {}
    for lfn in transLFNs:
      res = returnSingleResult( self.oTargetSE.getPfnForProtocol( self.fileDict[lfn].get( 'Target' ), protocol = 'SRM2', withPort = False ) )
      if not res['OK']:
        self.__setFileParameter( lfn, 'Reason', res['Message'] )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      else:
        toRegister[lfn] = { 'PFN' : res['Value'], 'SE' : self.targetSE }
    if not toRegister:
      return S_OK( ( 0, 0 ) )
    res = self.__getCatalogObject()
    if not res['OK']:
      for lfn in toRegister:
        self.failedRegistrations = toRegister
        self.log.error( 'Failed to get Catalog Object', res['Message'] )
        return S_OK( ( 0, len( toRegister ) ) )
    res = self.oCatalog.addReplica( toRegister )
    if not res['OK']:
      self.failedRegistrations = toRegister
      self.log.error( 'Failed to get Catalog Object', res['Message'] )
      return S_OK( ( 0, len( toRegister ) ) )
    for lfn, error in res['Value']['Failed'].items():
      self.failedRegistrations[lfn] = toRegister[lfn]
      self.log.error( 'Registration of Replica failed', '%s : %s' % ( lfn, str( error ) ) )
    return S_OK( ( len( res['Value']['Successful'] ), len( toRegister ) ) )

  def __sendAccounting( self, regSuc, regTotal, regTime, transEndTime, transDict ):
    """ send accounting record

    :param self: self reference
    :param regSuc: number of files successfully registered
    :param regTotal: number of files attepted to register
    :param regTime: time stamp at the end of registration
    :param transEndTime: time stamp at the end of FTS job
    :param dict transDict: dict holding couters for files being transerred, their sizes and successfull transfers
    """

    oAccounting = DataOperation()
    oAccounting.setEndTime( transEndTime )
    oAccounting.setStartTime( self.submitTime )

    accountingDict = {}
    accountingDict['OperationType'] = 'replicateAndRegister'
    result = getProxyInfo()
    if not result['OK']:
      userName = '******'
    else:
      userName = result['Value'].get( 'username', 'unknown' )
    accountingDict['User'] = userName
    accountingDict['Protocol'] = 'FTS' if 'fts3' not in self.ftsServer else 'FTS3'
    accountingDict['RegistrationTime'] = regTime
    accountingDict['RegistrationOK'] = regSuc
    accountingDict['RegistrationTotal'] = regTotal
    accountingDict['TransferOK'] = transDict['transOK']
    accountingDict['TransferTotal'] = transDict['transTotal']
    accountingDict['TransferSize'] = transDict['transSize']
    accountingDict['FinalStatus'] = self.requestStatus
    accountingDict['Source'] = self.sourceSE
    accountingDict['Destination'] = self.targetSE
    accountingDict['TransferTime'] = self.transferTime
    oAccounting.setValuesFromDict( accountingDict )
    self.log.verbose( "Attempting to commit accounting message..." )
    oAccounting.commit()
    self.log.verbose( "...committed." )
    return S_OK()
class CatalogPlugInTestCase(unittest.TestCase):
    """ Base class for the CatalogPlugin test case """
    def setUp(self):
        self.fullMetadata = [
            'Status', 'ChecksumType', 'OwnerRole', 'CreationDate', 'Checksum',
            'ModificationDate', 'OwnerDN', 'Mode', 'GUID', 'Size'
        ]
        self.dirMetadata = self.fullMetadata + ['NumberOfSubPaths']
        self.fileMetadata = self.fullMetadata + ['NumberOfLinks']

        self.catalog = FileCatalog(catalogs=[catalogClientToTest])
        valid = self.catalog.isOK()
        self.assertTrue(valid)
        self.destDir = '/lhcb/test/unit-test/TestCatalogPlugin'
        self.link = "%s/link" % self.destDir

        # Clean the existing directory
        self.cleanDirectory()
        res = self.catalog.createDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)

        # Register some files to work with
        self.numberOfFiles = 2
        self.files = []
        for i in xrange(self.numberOfFiles):
            lfn = "%s/testFile_%d" % (self.destDir, i)
            res = self.registerFile(lfn)
            self.assertTrue(res)
            self.files.append(lfn)

    def registerFile(self, lfn):
        pfn = 'protocol://host:port/storage/path%s' % lfn
        size = 10000000
        se = 'DIRAC-storage'
        guid = makeGuid()
        adler = stringAdler(guid)
        fileDict = {}
        fileDict[lfn] = {
            'PFN': pfn,
            'Size': size,
            'SE': se,
            'GUID': guid,
            'Checksum': adler
        }
        res = self.catalog.addFile(fileDict)
        return self.parseResult(res, lfn)

    def parseResult(self, res, path):
        self.assertTrue(res['OK'])
        self.assertTrue(res['Value'])
        self.assertTrue(res['Value']['Successful'])
        self.assertTrue(path in res['Value']['Successful'])
        return res['Value']['Successful'][path]

    def parseError(self, res, path):
        self.assertTrue(res['OK'])
        self.assertTrue(res['Value'])
        self.assertTrue(res['Value']['Failed'])
        self.assertTrue(path in res['Value']['Failed'])
        return res['Value']['Failed'][path]

    def cleanDirectory(self):
        res = self.catalog.exists(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        if not returnValue:
            return
        res = self.catalog.listDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        toRemove = returnValue['Files'].keys()
        if toRemove:
            self.purgeFiles(toRemove)
        res = self.catalog.removeDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        self.assertTrue(returnValue)

    def purgeFiles(self, lfns):
        for lfn in lfns:
            res = self.catalog.getReplicas(lfn, True)
            replicas = self.parseResult(res, lfn)
            for se, pfn in replicas.items():
                repDict = {}
                repDict[lfn] = {'PFN': pfn, 'SE': se}
                res = self.catalog.removeReplica(repDict)
                self.parseResult(res, lfn)
            res = self.catalog.removeFile(lfn)
            self.parseResult(res, lfn)

    def tearDown(self):
        self.cleanDirectory()
  Script.showHelp()
  DIRACExit( -1 )
else:
  inputFileName = args[0]
  se = args[1]
  newStatus = args[2]

if os.path.exists(inputFileName):
  inputFile = open(inputFileName,'r')
  string = inputFile.read()
  lfns = string.splitlines()
  inputFile.close()
else:
  lfns = [inputFileName]

res = catalog.getReplicas( lfns, True )
if not res['OK']:
  print res['Message']
  DIRACExit( -1 )
replicas = res['Value']['Successful']

lfnDict = {}
for lfn in lfns:
  lfnDict[lfn] = {}
  lfnDict[lfn]['SE'] = se
  lfnDict[lfn]['Status'] = newStatus
  lfnDict[lfn]['PFN'] = replicas[lfn][se]

res = catalog.setReplicaStatus( lfnDict )
if not res['OK']:
  print "ERROR:",res['Message']
  def __retrieveAndUploadFile( self, file, outputDict ):
    """
    Retrieve, Upload, and remove
    """
    fileName = file
    inputPath = outputDict['InputPath']
    inputFCName = outputDict['InputFC']
    inBytes = 0
    if inputFCName == 'LocalDisk':
      inFile = file
      file = os.path.join( inputPath, file )
    else:
      inputFC = FileCatalog( [inputFCName] )

      inFile = os.path.join( inputPath, file )
      replicaDict = inputFC.getReplicas( inFile )
      if not replicaDict['OK']:
        self.log.error( replicaDict['Message'] )
        return S_ERROR( fileName )
      if not inFile in replicaDict['Value']['Successful']:
        self.log.error( replicaDict['Value']['Failed'][inFile] )
        return S_ERROR( fileName )
      seList = replicaDict['Value']['Successful'][inFile].keys()

      inputSE = StorageElement( seList[0] )
      self.log.info( 'Retrieving from %s:' % inputSE.name, inFile )
      # ret = inputSE.getFile( inFile )
      # lcg_util binding prevent multithreading, use subprocess instead
      res = pythonCall( 2 * 3600, inputSE.getFile, inFile )
      if not res['OK']:
        self.log.error( res['Message'] )
        return S_ERROR( fileName )
      ret = res['Value']
      if not ret['OK']:
        self.log.error( ret['Message'] )
        return S_ERROR( fileName )
      if not inFile in ret['Value']['Successful']:
        self.log.error( ret['Value']['Failed'][inFile] )
        return S_ERROR( fileName )

    if os.path.isfile( file ):
      inBytes = os.stat( file )[6]

    outputPath = outputDict['OutputPath']
    outputFCName = outputDict['OutputFC']
    replicaManager = ReplicaManager()
    outFile = os.path.join( outputPath, os.path.basename( file ) )
    transferOK = False
    for outputSEName in List.fromChar( outputDict['OutputSE'], "," ):
      outputSE = StorageElement( outputSEName )
      self.log.info( 'Trying to upload to %s:' % outputSE.name, outFile )
      # ret = replicaManager.putAndRegister( outFile, os.path.realpath( file ), outputSE.name, catalog=outputFCName )
      # lcg_util binding prevent multithreading, use subprocess instead
      result = pythonCall( 2 * 3600, replicaManager.putAndRegister, outFile, os.path.realpath( file ), outputSE.name, catalog = outputFCName )
      if result['OK'] and result['Value']['OK']:
        if outFile in result['Value']['Value']['Successful']:
          transferOK = True
          break
        else:
          self.log.error( result['Value']['Value']['Failed'][outFile] )
      else:
        if result['OK']:
          self.log.error( result['Value']['Message'] )
        else:
          self.log.error( result['Message'] )

    if not transferOK:
      return S_ERROR( fileName )

    if result['OK'] or not inputFCName == 'LocalDisk':
      os.unlink( file )

    if not result['OK']:
      self.log.error( ret['Message'] )
      return S_ERROR( fileName )

    self.log.info( "Finished transferring %s [%s bytes]" % ( inFile, inBytes ) )
    self.__okTransferredFiles += 1
    self.__okTransferredBytes += inBytes

    if inputFCName == 'LocalDisk':
      return S_OK( fileName )

    # Now the file is on final SE/FC, remove from input SE/FC
    for se in seList:
      se = StorageElement( se )
      self.log.info( 'Removing from %s:' % se.name, inFile )
      se.removeFile( inFile )

    inputFC.removeFile( inFile )

    return S_OK( fileName )
class RequestPreparationAgent( AgentModule ):

  def initialize( self ):
    self.fileCatalog = FileCatalog()
    #self.stagerClient = StorageManagerClient()
    self.dataIntegrityClient = DataIntegrityClient()
    self.storageDB = StorageManagementDB()
    # This sets the Default Proxy to used as that defined under
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    return S_OK()

  def execute( self ):
    res = self.prepareNewReplicas()
    return res

  def prepareNewReplicas( self ):
    """ This is the first logical task to be executed and manages the New->Waiting transition of the Replicas
    """
    res = self.__getNewReplicas()
    if not res['OK']:
      gLogger.fatal( "RequestPreparation.prepareNewReplicas: Failed to get replicas from StagerDB.", res['Message'] )
      return res
    if not res['Value']:
      gLogger.info( "There were no New replicas found" )
      return res
    replicas = res['Value']['Replicas']
    replicaIDs = res['Value']['ReplicaIDs']
    gLogger.info( "RequestPreparation.prepareNewReplicas: Obtained %s New replicas for preparation." % len( replicaIDs ) )

    # Check that the files exist in the FileCatalog
    res = self.__getExistingFiles( replicas.keys() )
    if not res['OK']:
      return res
    exist = res['Value']['Exist']
    terminal = res['Value']['Missing']
    failed = res['Value']['Failed']
    if not exist:
      gLogger.error( 'RequestPreparation.prepareNewReplicas: Failed determine existance of any files' )
      return S_OK()
    terminalReplicaIDs = {}
    for lfn, reason in terminal.items():
      for se, replicaID in replicas[lfn].items():
        terminalReplicaIDs[replicaID] = reason
      replicas.pop( lfn )
    gLogger.info( "RequestPreparation.prepareNewReplicas: %s files exist in the FileCatalog." % len( exist ) )
    if terminal:
      gLogger.info( "RequestPreparation.prepareNewReplicas: %s files do not exist in the FileCatalog." % len( terminal ) )

    # Obtain the file sizes from the FileCatalog
    res = self.__getFileSize( exist )
    if not res['OK']:
      return res
    failed.update( res['Value']['Failed'] )
    terminal = res['Value']['ZeroSize']
    fileSizes = res['Value']['FileSizes']
    if not fileSizes:
      gLogger.error( 'RequestPreparation.prepareNewReplicas: Failed determine sizes of any files' )
      return S_OK()
    for lfn, reason in terminal.items():
      for se, replicaID in replicas[lfn].items():
        terminalReplicaIDs[replicaID] = reason
      replicas.pop( lfn )
    gLogger.info( "RequestPreparation.prepareNewReplicas: Obtained %s file sizes from the FileCatalog." % len( fileSizes ) )
    if terminal:
      gLogger.info( "RequestPreparation.prepareNewReplicas: %s files registered with zero size in the FileCatalog." % len( terminal ) )

    # Obtain the replicas from the FileCatalog
    res = self.__getFileReplicas( fileSizes.keys() )
    if not res['OK']:
      return res
    failed.update( res['Value']['Failed'] )
    terminal = res['Value']['ZeroReplicas']
    fileReplicas = res['Value']['Replicas']
    if not fileReplicas:
      gLogger.error( 'RequestPreparation.prepareNewReplicas: Failed determine replicas for any files' )
      return S_OK()
    for lfn, reason in terminal.items():
      for se, replicaID in replicas[lfn].items():
        terminalReplicaIDs[replicaID] = reason
      replicas.pop( lfn )
    gLogger.info( "RequestPreparation.prepareNewReplicas: Obtained replica information for %s file from the FileCatalog." % len( fileReplicas ) )
    if terminal:
      gLogger.info( "RequestPreparation.prepareNewReplicas: %s files registered with zero replicas in the FileCatalog." % len( terminal ) )

    # Check the replicas exist at the requested site
    replicaMetadata = []
    for lfn, requestedSEs in replicas.items():
      lfnReplicas = fileReplicas[lfn]
      for requestedSE, replicaID in requestedSEs.items():
        if not requestedSE in lfnReplicas.keys():
          terminalReplicaIDs[replicaID] = "LFN not registered at requested SE"
          replicas[lfn].pop( requestedSE )
        else:
          replicaMetadata.append( ( replicaID, lfnReplicas[requestedSE], fileSizes[lfn] ) )

    # Update the states of the files in the database
    if terminalReplicaIDs:
      gLogger.info( "RequestPreparation.prepareNewReplicas: %s replicas are terminally failed." % len( terminalReplicaIDs ) )
      #res = self.stagerClient.updateReplicaFailure( terminalReplicaIDs )
      res = self.storageDB.updateReplicaFailure( terminalReplicaIDs )
      if not res['OK']:
        gLogger.error( "RequestPreparation.prepareNewReplicas: Failed to update replica failures.", res['Message'] )
    if replicaMetadata:
      gLogger.info( "RequestPreparation.prepareNewReplicas: %s replica metadata to be updated." % len( replicaMetadata ) )
      # Sets the Status='Waiting' of CacheReplicas records that are OK with catalogue checks
      res = self.storageDB.updateReplicaInformation( replicaMetadata )
      if not res['OK']:
        gLogger.error( "RequestPreparation.prepareNewReplicas: Failed to update replica metadata.", res['Message'] )
    return S_OK()

  def __getNewReplicas( self ):
    """ This obtains the New replicas from the Replicas table and for each LFN the requested storage element """
    # First obtain the New replicas from the CacheReplicas table
    res = self.storageDB.getCacheReplicas( {'Status':'New'} )
    if not res['OK']:
      gLogger.error( "RequestPreparation.__getNewReplicas: Failed to get replicas with New status.", res['Message'] )
      return res
    if not res['Value']:
      gLogger.debug( "RequestPreparation.__getNewReplicas: No New replicas found to process." )
      return S_OK()
    else:
      gLogger.debug( "RequestPreparation.__getNewReplicas: Obtained %s New replicas(s) to process." % len( res['Value'] ) )
    replicas = {}
    replicaIDs = {}
    for replicaID, info in res['Value'].items():
      lfn = info['LFN']
      storageElement = info['SE']
      if not replicas.has_key( lfn ):
        replicas[lfn] = {}
      replicas[lfn][storageElement] = replicaID
      replicaIDs[replicaID] = ( lfn, storageElement )
    return S_OK( {'Replicas':replicas, 'ReplicaIDs':replicaIDs} )

  def __getExistingFiles( self, lfns ):
    """ This checks that the files exist in the FileCatalog. """
    filesExist = []
    missing = {}
    res = self.fileCatalog.exists( lfns )
    if not res['OK']:
      gLogger.error( "RequestPreparation.__getExistingFiles: Failed to determine whether files exist.", res['Message'] )
      return res
    failed = res['Value']['Failed']
    for lfn, exists in res['Value']['Successful'].items():
      if exists:
        filesExist.append( lfn )
      else:
        missing[lfn] = 'LFN not registered in the FileCatalog'
    if missing:
      for lfn, reason in missing.items():
        gLogger.warn( "RequestPreparation.__getExistingFiles: %s" % reason, lfn )
      self.__reportProblematicFiles( missing.keys(), 'LFN-LFC-DoesntExist' )
    return S_OK( {'Exist':filesExist, 'Missing':missing, 'Failed':failed} )

  def __getFileSize( self, lfns ):
    """ This obtains the file size from the FileCatalog. """
    failed = []
    fileSizes = {}
    zeroSize = {}
    res = self.fileCatalog.getFileSize( lfns )
    if not res['OK']:
      gLogger.error( "RequestPreparation.__getFileSize: Failed to get sizes for files.", res['Message'] )
      return res
    failed = res['Value']['Failed']
    for lfn, size in res['Value']['Successful'].items():
      if size == 0:
        zeroSize[lfn] = "LFN registered with zero size in the FileCatalog"
      else:
        fileSizes[lfn] = size
    if zeroSize:
      for lfn, reason in zeroSize.items():
        gLogger.warn( "RequestPreparation.__getFileSize: %s" % reason, lfn )
      self.__reportProblematicFiles( zeroSize.keys(), 'LFN-LFC-ZeroSize' )
    return S_OK( {'FileSizes':fileSizes, 'ZeroSize':zeroSize, 'Failed':failed} )

  def __getFileReplicas( self, lfns ):
    """ This obtains the replicas from the FileCatalog. """
    replicas = {}
    noReplicas = {}
    res = self.fileCatalog.getReplicas( lfns )
    if not res['OK']:
      gLogger.error( "RequestPreparation.__getFileReplicas: Failed to obtain file replicas.", res['Message'] )
      return res
    failed = res['Value']['Failed']
    for lfn, lfnReplicas in res['Value']['Successful'].items():
      if len( lfnReplicas.keys() ) == 0:
        noReplicas[lfn] = "LFN registered with zero replicas in the FileCatalog"
      else:
        replicas[lfn] = lfnReplicas
    if noReplicas:
      for lfn, reason in noReplicas.items():
        gLogger.warn( "RequestPreparation.__getFileReplicas: %s" % reason, lfn )
      self.__reportProblematicFiles( noReplicas.keys(), 'LFN-LFC-NoReplicas' )
    return S_OK( {'Replicas':replicas, 'ZeroReplicas':noReplicas, 'Failed':failed} )

  def __reportProblematicFiles( self, lfns, reason ):
    return S_OK()
    res = self.dataIntegrityClient.setFileProblematic( lfns, reason, self.name )
    if not res['OK']:
      gLogger.error( "RequestPreparation.__reportProblematicFiles: Failed to report missing files.", res['Message'] )
      return res
    if res['Value']['Successful']:
      gLogger.info( "RequestPreparation.__reportProblematicFiles: Successfully reported %s missing files." % len( res['Value']['Successful'] ) )
    if res['Value']['Failed']:
      gLogger.info( "RequestPreparation.__reportProblematicFiles: Failed to report %s problematic files." % len( res['Value']['Failed'] ) )
    return res
Example #23
0
class DataIntegrityClient( Client ):

  """
  The following methods are supported in the service but are not mentioned explicitly here:

          getProblematic()
             Obtains a problematic file from the IntegrityDB based on the LastUpdate time

          getPrognosisProblematics(prognosis)
            Obtains all the problematics of a particular prognosis from the integrityDB

          getProblematicsSummary()
            Obtains a count of the number of problematics for each prognosis found

          getDistinctPrognosis()
            Obtains the distinct prognosis found in the integrityDB

          getTransformationProblematics(prodID)
            Obtains the problematics for a given production

          incrementProblematicRetry(fileID)
            Increments the retry count for the supplied file ID

          changeProblematicPrognosis(fileID,newPrognosis)
            Changes the prognosis of the supplied file to the new prognosis

          setProblematicStatus(fileID,status)
            Updates the status of a problematic in the integrityDB

          removeProblematic(self,fileID)
            This removes the specified file ID from the integrity DB

          insertProblematic(sourceComponent,fileMetadata)
            Inserts file with supplied metadata into the integrity DB

  """

  def __init__( self, **kwargs ):

    super( DataIntegrityClient, self ).__init__( **kwargs )
    self.setServer( 'DataManagement/DataIntegrity' )
    self.dm = DataManager()
    self.fc = FileCatalog()

  def setFileProblematic( self, lfn, reason, sourceComponent = '' ):
    """ This method updates the status of the file in the FileCatalog and the IntegrityDB

        lfn - the lfn of the file
        reason - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
    if isinstance( lfn, list ):
      lfns = lfn
    elif isinstance( lfn, basestring ):
      lfns = [lfn]
    else:
      errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN."
      gLogger.error( errStr )
      return S_ERROR( errStr )
    gLogger.info( "DataIntegrityClient.setFileProblematic: Attempting to update %s files." % len( lfns ) )
    fileMetadata = {}
    for lfn in lfns:
      fileMetadata[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':'', 'SE':''}
    res = self.insertProblematic( sourceComponent, fileMetadata )
    if not res['OK']:
      gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB" )
    return res

  def reportProblematicReplicas( self, replicaTuple, se, reason ):
    """ Simple wrapper function around setReplicaProblematic """
    gLogger.info( 'The following %s files had %s at %s' % ( len( replicaTuple ), reason, se ) )
    for lfn, _pfn, se, reason in sorted( replicaTuple ):
      if lfn:
        gLogger.info( lfn )
    res = self.setReplicaProblematic( replicaTuple, sourceComponent = 'DataIntegrityClient' )
    if not res['OK']:
      gLogger.info( 'Failed to update integrity DB with replicas', res['Message'] )
    else:
      gLogger.info( 'Successfully updated integrity DB with replicas' )

  def setReplicaProblematic( self, replicaTuple, sourceComponent = '' ):
    """ This method updates the status of the replica in the FileCatalog and the IntegrityDB
        The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis}

        lfn - the lfn of the file
        pfn - the pfn if available (otherwise '')
        se - the storage element of the problematic replica (otherwise '')
        prognosis - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
    if isinstance( replicaTuple, tuple ):
      replicaTuple = [replicaTuple]
    elif isinstance( replicaTuple, list ):
      pass
    else:
      errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples."
      gLogger.error( errStr )
      return S_ERROR( errStr )
    gLogger.info( "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas." % len( replicaTuple ) )
    replicaDict = {}
    for lfn, pfn, se, reason in replicaTuple:
      replicaDict[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':pfn, 'SE':se}
    res = self.insertProblematic( sourceComponent, replicaDict )
    if not res['OK']:
      gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB" )
      return res
    for lfn in replicaDict.keys():
      replicaDict[lfn]['Status'] = 'Problematic'

    res = self.fc.setReplicaStatus( replicaDict )
    if not res['OK']:
      errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas."
      gLogger.error( errStr, res['Message'] )
      return res
    failed = res['Value']['Failed']
    successful = res['Value']['Successful']
    resDict = {'Successful':successful, 'Failed':failed}
    return S_OK( resDict )

  ##########################################################################
  #
  # This section contains the resolution methods for various prognoses
  #

  def __updateCompletedFiles( self, prognosis, fileID ):
    gLogger.info( "%s file (%d) is resolved" % ( prognosis, fileID ) )
    return self.setProblematicStatus( fileID, 'Resolved' )

  def __returnProblematicError( self, fileID, res ):
    self.incrementProblematicRetry( fileID )
    gLogger.error( 'DataIntegrityClient failure', res['Message'] )
    return res

  def __updateReplicaToChecked( self, problematicDict ):
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']
    prognosis = problematicDict['Prognosis']
    problematicDict['Status'] = 'Checked'

    res = returnSingleResult( self.fc.setReplicaStatus( {lfn:problematicDict} ) )

    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    gLogger.info( "%s replica (%d) is updated to Checked status" % ( prognosis, fileID ) )
    return self.__updateCompletedFiles( prognosis, fileID )

  def resolveCatalogPFNSizeMismatch( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis
    """
    lfn = problematicDict['LFN']
    se = problematicDict['SE']
    fileID = problematicDict['FileID']


    res = returnSingleResult( self.fc.getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    catalogSize = res['Value']
    res = returnSingleResult( StorageElement( se ).getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    storageSize = res['Value']
    bkKCatalog = FileCatalog( ['BookkeepingDB'] )
    res = returnSingleResult( bkKCatalog.getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    bookkeepingSize = res['Value']
    if bookkeepingSize == catalogSize == storageSize:
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) matched all registered sizes." % fileID )
      return self.__updateReplicaToChecked( problematicDict )
    if catalogSize == bookkeepingSize:
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also" % fileID )
      res = returnSingleResult( self.fc.getReplicas( lfn ) )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      if len( res['Value'] ) <= 1:
        gLogger.info( "CatalogPFNSizeMismatch replica (%d) has no other replicas." % fileID )
        return S_ERROR( "Not removing catalog file mismatch since the only replica" )
      else:
        gLogger.info( "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..." % fileID )
        res = self.dm.removeReplica( se, lfn )
        if not res['OK']:
          return self.__returnProblematicError( fileID, res )
        return self.__updateCompletedFiles( 'CatalogPFNSizeMismatch', fileID )
    if ( catalogSize != bookkeepingSize ) and ( bookkeepingSize == storageSize ):
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size" % fileID )
      res = self.__updateReplicaToChecked( problematicDict )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      return self.changeProblematicPrognosis( fileID, 'BKCatalogSizeMismatch' )
    gLogger.info( "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count" % fileID )
    return self.incrementProblematicRetry( fileID )

  ############################################################################################

  def _reportProblematicFiles( self, lfns, reason ):
    """ Simple wrapper function around setFileProblematic
    """
    gLogger.info( 'The following %s files were found with %s' % ( len( lfns ), reason ) )
    for lfn in sorted( lfns ):
      gLogger.info( lfn )
    res = self.setFileProblematic( lfns, reason, sourceComponent = 'DataIntegrityClient' )
    if not res['OK']:
      gLogger.info( 'Failed to update integrity DB with files', res['Message'] )
    else:
      gLogger.info( 'Successfully updated integrity DB with files' )
class DataIntegrityClient( Client ):

  """
  The following methods are supported in the service but are not mentioned explicitly here:

          getProblematic()
             Obtains a problematic file from the IntegrityDB based on the LastUpdate time

          getPrognosisProblematics(prognosis)
            Obtains all the problematics of a particular prognosis from the integrityDB

          getProblematicsSummary()
            Obtains a count of the number of problematics for each prognosis found

          getDistinctPrognosis()
            Obtains the distinct prognosis found in the integrityDB

          getTransformationProblematics(prodID)
            Obtains the problematics for a given production

          incrementProblematicRetry(fileID)
            Increments the retry count for the supplied file ID

          changeProblematicPrognosis(fileID,newPrognosis)
            Changes the prognosis of the supplied file to the new prognosis

          setProblematicStatus(fileID,status)
            Updates the status of a problematic in the integrityDB

          removeProblematic(self,fileID)
            This removes the specified file ID from the integrity DB

          insertProblematic(sourceComponent,fileMetadata)
            Inserts file with supplied metadata into the integrity DB

  """

  def __init__( self, **kwargs ):

    super(DataIntegrityClient, self).__init__( **kwargs )
    self.setServer( 'DataManagement/DataIntegrity' )
    self.dm = DataManager()
    self.fc = FileCatalog()

  def setFileProblematic( self, lfn, reason, sourceComponent = '' ):
    """ This method updates the status of the file in the FileCatalog and the IntegrityDB

        lfn - the lfn of the file
        reason - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
    if isinstance( lfn, list ):
      lfns = lfn
    elif isinstance( lfn, basestring ):
      lfns = [lfn]
    else:
      errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN."
      gLogger.error( errStr )
      return S_ERROR( errStr )
    gLogger.info( "DataIntegrityClient.setFileProblematic: Attempting to update %s files." % len( lfns ) )
    fileMetadata = {}
    for lfn in lfns:
      fileMetadata[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':'', 'SE':''}
    res = self.insertProblematic( sourceComponent, fileMetadata )
    if not res['OK']:
      gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB" )
    return res

  def reportProblematicReplicas( self, replicaTuple, se, reason ):
    """ Simple wrapper function around setReplicaProblematic """
    gLogger.info( 'The following %s files had %s at %s' % ( len( replicaTuple ), reason, se ) )
    for lfn, _pfn, se, reason in sorted( replicaTuple ):
      if lfn:
        gLogger.info( lfn )
    res = self.setReplicaProblematic( replicaTuple, sourceComponent = 'DataIntegrityClient' )
    if not res['OK']:
      gLogger.info( 'Failed to update integrity DB with replicas', res['Message'] )
    else:
      gLogger.info( 'Successfully updated integrity DB with replicas' )

  def setReplicaProblematic( self, replicaTuple, sourceComponent = '' ):
    """ This method updates the status of the replica in the FileCatalog and the IntegrityDB
        The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis}

        lfn - the lfn of the file
        pfn - the pfn if available (otherwise '')
        se - the storage element of the problematic replica (otherwise '')
        prognosis - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
    if isinstance( replicaTuple, tuple ):
      replicaTuple = [replicaTuple]
    elif isinstance( replicaTuple, list ):
      pass
    else:
      errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples."
      gLogger.error( errStr )
      return S_ERROR( errStr )
    gLogger.info( "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas." % len( replicaTuple ) )
    replicaDict = {}
    for lfn, pfn, se, reason in replicaTuple:
      replicaDict[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':pfn, 'SE':se}
    res = self.insertProblematic( sourceComponent, replicaDict )
    if not res['OK']:
      gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB" )
      return res
    for lfn in replicaDict.keys():
      replicaDict[lfn]['Status'] = 'Problematic'

    res = self.fc.setReplicaStatus( replicaDict )
    if not res['OK']:
      errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas."
      gLogger.error( errStr, res['Message'] )
      return res
    failed = res['Value']['Failed']
    successful = res['Value']['Successful']
    resDict = {'Successful':successful, 'Failed':failed}
    return S_OK( resDict )

  ##########################################################################
  #
  # This section contains the resolution methods for various prognoses
  #

  def __updateCompletedFiles( self, prognosis, fileID ):
    gLogger.info( "%s file (%d) is resolved" % ( prognosis, fileID ) )
    return self.setProblematicStatus( fileID, 'Resolved' )

  def __returnProblematicError( self, fileID, res ):
    self.incrementProblematicRetry( fileID )
    gLogger.error( 'DataIntegrityClient failure', res['Message'] )
    return res

  def __updateReplicaToChecked( self, problematicDict ):
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']
    prognosis = problematicDict['Prognosis']
    problematicDict['Status'] = 'Checked'

    res = returnSingleResult( self.fc.setReplicaStatus( {lfn:problematicDict} ) )

    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    gLogger.info( "%s replica (%d) is updated to Checked status" % ( prognosis, fileID ) )
    return self.__updateCompletedFiles( prognosis, fileID )

  def resolveCatalogPFNSizeMismatch( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis
    """
    lfn = problematicDict['LFN']
    se = problematicDict['SE']
    fileID = problematicDict['FileID']


    res = returnSingleResult( self.fc.getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    catalogSize = res['Value']
    res = returnSingleResult( StorageElement( se ).getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    storageSize = res['Value']
    bkKCatalog = FileCatalog( ['BookkeepingDB'] )
    res = returnSingleResult( bkKCatalog.getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    bookkeepingSize = res['Value']
    if bookkeepingSize == catalogSize == storageSize:
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) matched all registered sizes." % fileID )
      return self.__updateReplicaToChecked( problematicDict )
    if catalogSize == bookkeepingSize:
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also" % fileID )
      res = returnSingleResult( self.fc.getReplicas( lfn ) )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      if len( res['Value'] ) <= 1:
        gLogger.info( "CatalogPFNSizeMismatch replica (%d) has no other replicas." % fileID )
        return S_ERROR( "Not removing catalog file mismatch since the only replica" )
      else:
        gLogger.info( "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..." % fileID )
        res = self.dm.removeReplica( se, lfn )
        if not res['OK']:
          return self.__returnProblematicError( fileID, res )
        return self.__updateCompletedFiles( 'CatalogPFNSizeMismatch', fileID )
    if ( catalogSize != bookkeepingSize ) and ( bookkeepingSize == storageSize ):
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size" % fileID )
      res = self.__updateReplicaToChecked( problematicDict )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      return self.changeProblematicPrognosis( fileID, 'BKCatalogSizeMismatch' )
    gLogger.info( "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count" % fileID )
    return self.incrementProblematicRetry( fileID )

  #FIXME: Unused?
  def resolvePFNNotRegistered( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNNotRegistered prognosis
    """
    lfn = problematicDict['LFN']
    seName = problematicDict['SE']
    fileID = problematicDict['FileID']

    se = StorageElement( seName )
    res = returnSingleResult( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if not res['Value']:
      # The file does not exist in the catalog
      res = returnSingleResult( se.removeFile( lfn ) )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )
    res = returnSingleResult( se.getFileMetadata( lfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      gLogger.info( "PFNNotRegistered replica (%d) found to be missing." % fileID )
      return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )
    elif not res['OK']:
      return self.__returnProblematicError( fileID, res )
    storageMetadata = res['Value']
    if storageMetadata['Lost']:
      gLogger.info( "PFNNotRegistered replica (%d) found to be Lost. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNLost' )
    if storageMetadata['Unavailable']:
      gLogger.info( "PFNNotRegistered replica (%d) found to be Unavailable. Updating retry count" % fileID )
      return self.incrementProblematicRetry( fileID )

    # HACK until we can obtain the space token descriptions through GFAL
    site = seName.split( '_' )[0].split( '-' )[0]
    if not storageMetadata['Cached']:
      if lfn.endswith( '.raw' ):
        seName = '%s-RAW' % site
      else:
        seName = '%s-RDST' % site
    elif storageMetadata['Migrated']:
      if lfn.startswith( '/lhcb/data' ):
        seName = '%s_M-DST' % site
      else:
        seName = '%s_MC_M-DST' % site
    else:
      if lfn.startswith( '/lhcb/data' ):
        seName = '%s-DST' % site
      else:
        seName = '%s_MC-DST' % site

    problematicDict['SE'] = seName
    res = returnSingleResult( se.getURL( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )

    problematicDict['PFN'] = res['Value']

    res = returnSingleResult( self.fc.addReplica( {lfn:problematicDict} ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    res = returnSingleResult( self.fc.getFileMetadata( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']['Size'] != storageMetadata['Size']:
      gLogger.info( "PFNNotRegistered replica (%d) found with catalog size mismatch. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'CatalogPFNSizeMismatch' )
    return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )

  #FIXME: Unused?
  def resolveLFNCatalogMissing( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the LFNCatalogMissing prognosis
    """
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = returnSingleResult( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']:
      return self.__updateCompletedFiles( 'LFNCatalogMissing', fileID )
    # Remove the file from all catalogs
    # RF_NOTE : here I can do it because it's a single file, but otherwise I would need to sort the path
    res = returnSingleResult( self.fc.removeFile( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    return self.__updateCompletedFiles( 'LFNCatalogMissing', fileID )

  #FIXME: Unused?
  def resolvePFNMissing( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNMissing prognosis
    """
    se = problematicDict['SE']
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = returnSingleResult( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if not res['Value']:
      gLogger.info( "PFNMissing file (%d) no longer exists in catalog" % fileID )
      return self.__updateCompletedFiles( 'PFNMissing', fileID )

    res = returnSingleResult( StorageElement( se ).exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']:
      gLogger.info( "PFNMissing replica (%d) is no longer missing" % fileID )
      return self.__updateReplicaToChecked( problematicDict )
    gLogger.info( "PFNMissing replica (%d) does not exist" % fileID )
    res = returnSingleResult( self.fc.getReplicas( lfn, allStatus = True ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    replicas = res['Value']
    seSite = se.split( '_' )[0].split( '-' )[0]
    found = False
    print replicas
    for replicaSE in replicas.keys():
      if re.search( seSite, replicaSE ):
        found = True
        problematicDict['SE'] = replicaSE
        se = replicaSE
    if not found:
      gLogger.info( "PFNMissing replica (%d) is no longer registered at SE. Resolved." % fileID )
      return self.__updateCompletedFiles( 'PFNMissing', fileID )
    gLogger.info( "PFNMissing replica (%d) does not exist. Removing from catalog..." % fileID )
    res = returnSingleResult( self.fc.removeReplica( {lfn:problematicDict} ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if len( replicas ) == 1:
      gLogger.info( "PFNMissing replica (%d) had a single replica. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'LFNZeroReplicas' )
    res = self.dm.replicateAndRegister( problematicDict['LFN'], se )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    # If we get here the problem is solved so we can update the integrityDB
    return self.__updateCompletedFiles( 'PFNMissing', fileID )

  #FIXME: Unused?
  def resolvePFNUnavailable( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNUnavailable prognosis
    """
    lfn = problematicDict['LFN']
    se = problematicDict['SE']
    fileID = problematicDict['FileID']

    res = returnSingleResult( StorageElement( se ).getFileMetadata( lfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      # The file is no longer Unavailable but has now dissapeared completely
      gLogger.info( "PFNUnavailable replica (%d) found to be missing. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )
    if ( not res['OK'] ) or res['Value']['Unavailable']:
      gLogger.info( "PFNUnavailable replica (%d) found to still be Unavailable" % fileID )
      return self.incrementProblematicRetry( fileID )
    if res['Value']['Lost']:
      gLogger.info( "PFNUnavailable replica (%d) is now found to be Lost. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNLost' )
    gLogger.info( "PFNUnavailable replica (%d) is no longer Unavailable" % fileID )
    # Need to make the replica okay in the Catalog
    return self.__updateReplicaToChecked( problematicDict )

  #FIXME: Unused?
  def resolvePFNZeroSize( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolves the PFNZeroSize prognosis
    """
    lfn = problematicDict['LFN']
    seName = problematicDict['SE']
    fileID = problematicDict['FileID']

    se = StorageElement( seName )

    res = returnSingleResult( se.getFileSize( lfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      gLogger.info( "PFNZeroSize replica (%d) found to be missing. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )
    storageSize = res['Value']
    if storageSize == 0:
      res = returnSingleResult( se.removeFile( lfn ) )

      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      gLogger.info( "PFNZeroSize replica (%d) removed. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )


    res = returnSingleResult( self.fc.getReplicas( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if seName not in res['Value']:
      gLogger.info( "PFNZeroSize replica (%d) not registered in catalog. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNNotRegistered' )
    res = returnSingleResult( self.fc.getFileMetadata( lfn ) )

    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    catalogSize = res['Value']['Size']
    if catalogSize != storageSize:
      gLogger.info( "PFNZeroSize replica (%d) size found to differ from registered metadata. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'CatalogPFNSizeMismatch' )
    return self.__updateCompletedFiles( 'PFNZeroSize', fileID )

  ############################################################################################

  #FIXME: Unused?
  def resolveLFNZeroReplicas( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolves the LFNZeroReplicas prognosis
    """
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = returnSingleResult( self.fc.getReplicas( lfn, allStatus = True ) )
    if res['OK'] and res['Value']:
      gLogger.info( "LFNZeroReplicas file (%d) found to have replicas" % fileID )
    else:
      gLogger.info( "LFNZeroReplicas file (%d) does not have replicas. Checking storage..." % fileID )
      pfnsFound = False
      for storageElementName in sorted( gConfig.getValue( 'Resources/StorageElementGroups/Tier1_MC_M-DST', [] ) ):
        res = self.__getStoragePathExists( [lfn], storageElementName )
        if lfn in res['Value']:
          gLogger.info( "LFNZeroReplicas file (%d) found storage file at %s" % ( fileID, storageElementName ) )
          self.reportProblematicReplicas( [( lfn, 'deprecatedUrl', storageElementName, 'PFNNotRegistered' )], storageElementName, 'PFNNotRegistered' )
          pfnsFound = True
      if not pfnsFound:
        gLogger.info( "LFNZeroReplicas file (%d) did not have storage files. Removing..." % fileID )
        res = returnSingleResult( self.fc.removeFile( lfn ) )
        if not res['OK']:
          gLogger.error( 'DataIntegrityClient: failed to remove file', res['Message'] )
          # Increment the number of retries for this file
          self.server.incrementProblematicRetry( fileID )
          return res
        gLogger.info( "LFNZeroReplicas file (%d) removed from catalog" % fileID )
    # If we get here the problem is solved so we can update the integrityDB
    return self.__updateCompletedFiles( 'LFNZeroReplicas', fileID )


  def _reportProblematicFiles( self, lfns, reason ):
    """ Simple wrapper function around setFileProblematic
    """
    gLogger.info( 'The following %s files were found with %s' % ( len( lfns ), reason ) )
    for lfn in sorted( lfns ):
      gLogger.info( lfn )
    res = self.setFileProblematic( lfns, reason, sourceComponent = 'DataIntegrityClient' )
    if not res['OK']:
      gLogger.info( 'Failed to update integrity DB with files', res['Message'] )
    else:
      gLogger.info( 'Successfully updated integrity DB with files' )
Example #25
0
class DataIntegrityClient(Client):
    """Client exposing the DataIntegrity Service."""
    def __init__(self, **kwargs):

        super(DataIntegrityClient, self).__init__(**kwargs)
        self.setServer('DataManagement/DataIntegrity')
        self.dm = DataManager()
        self.fc = FileCatalog()

    def setFileProblematic(self, lfn, reason, sourceComponent=''):
        """ This method updates the status of the file in the FileCatalog and the IntegrityDB

        lfn - the lfn of the file
        reason - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
        if isinstance(lfn, list):
            lfns = lfn
        elif isinstance(lfn, six.string_types):
            lfns = [lfn]
        else:
            errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN."
            gLogger.error(errStr)
            return S_ERROR(errStr)
        gLogger.info(
            "DataIntegrityClient.setFileProblematic: Attempting to update %s files."
            % len(lfns))
        fileMetadata = {}
        for lfn in lfns:
            fileMetadata[lfn] = {
                'Prognosis': reason,
                'LFN': lfn,
                'PFN': '',
                'SE': ''
            }
        res = self.insertProblematic(sourceComponent, fileMetadata)
        if not res['OK']:
            gLogger.error(
                "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB"
            )
        return res

    def reportProblematicReplicas(self, replicaTuple, se, reason):
        """ Simple wrapper function around setReplicaProblematic """
        gLogger.info('The following %s files had %s at %s' %
                     (len(replicaTuple), reason, se))
        for lfn, _pfn, se, reason in sorted(replicaTuple):
            if lfn:
                gLogger.info(lfn)
        res = self.setReplicaProblematic(replicaTuple,
                                         sourceComponent='DataIntegrityClient')
        if not res['OK']:
            gLogger.info('Failed to update integrity DB with replicas',
                         res['Message'])
        else:
            gLogger.info('Successfully updated integrity DB with replicas')

    def setReplicaProblematic(self, replicaTuple, sourceComponent=''):
        """ This method updates the status of the replica in the FileCatalog and the IntegrityDB
        The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis}

        lfn - the lfn of the file
        pfn - the pfn if available (otherwise '')
        se - the storage element of the problematic replica (otherwise '')
        prognosis - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
        if isinstance(replicaTuple, tuple):
            replicaTuple = [replicaTuple]
        elif isinstance(replicaTuple, list):
            pass
        else:
            errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples."
            gLogger.error(errStr)
            return S_ERROR(errStr)
        gLogger.info(
            "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas."
            % len(replicaTuple))
        replicaDict = {}
        for lfn, pfn, se, reason in replicaTuple:
            replicaDict[lfn] = {
                'Prognosis': reason,
                'LFN': lfn,
                'PFN': pfn,
                'SE': se
            }
        res = self.insertProblematic(sourceComponent, replicaDict)
        if not res['OK']:
            gLogger.error(
                "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB"
            )
            return res
        for lfn in replicaDict.keys():
            replicaDict[lfn]['Status'] = 'Problematic'

        res = self.fc.setReplicaStatus(replicaDict)
        if not res['OK']:
            errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas."
            gLogger.error(errStr, res['Message'])
            return res
        failed = res['Value']['Failed']
        successful = res['Value']['Successful']
        resDict = {'Successful': successful, 'Failed': failed}
        return S_OK(resDict)

    ##########################################################################
    #
    # This section contains the resolution methods for various prognoses
    #

    def __updateCompletedFiles(self, prognosis, fileID):
        gLogger.info("%s file (%d) is resolved" % (prognosis, fileID))
        return self.setProblematicStatus(fileID, 'Resolved')

    def __returnProblematicError(self, fileID, res):
        self.incrementProblematicRetry(fileID)
        gLogger.error('DataIntegrityClient failure', res['Message'])
        return res

    def __updateReplicaToChecked(self, problematicDict):
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']
        prognosis = problematicDict['Prognosis']
        problematicDict['Status'] = 'Checked'

        res = returnSingleResult(
            self.fc.setReplicaStatus({lfn: problematicDict}))

        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        gLogger.info("%s replica (%d) is updated to Checked status" %
                     (prognosis, fileID))
        return self.__updateCompletedFiles(prognosis, fileID)

    def resolveCatalogPFNSizeMismatch(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis
    """
        lfn = problematicDict['LFN']
        se = problematicDict['SE']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        catalogSize = res['Value']
        res = returnSingleResult(StorageElement(se).getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        storageSize = res['Value']
        bkKCatalog = FileCatalog(['BookkeepingDB'])
        res = returnSingleResult(bkKCatalog.getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        bookkeepingSize = res['Value']
        if bookkeepingSize == catalogSize == storageSize:
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) matched all registered sizes."
                % fileID)
            return self.__updateReplicaToChecked(problematicDict)
        if catalogSize == bookkeepingSize:
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also"
                % fileID)
            res = returnSingleResult(self.fc.getReplicas(lfn))
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            if len(res['Value']) <= 1:
                gLogger.info(
                    "CatalogPFNSizeMismatch replica (%d) has no other replicas."
                    % fileID)
                return S_ERROR(
                    "Not removing catalog file mismatch since the only replica"
                )
            else:
                gLogger.info(
                    "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..."
                    % fileID)
                res = self.dm.removeReplica(se, lfn)
                if not res['OK']:
                    return self.__returnProblematicError(fileID, res)
                return self.__updateCompletedFiles('CatalogPFNSizeMismatch',
                                                   fileID)
        if (catalogSize != bookkeepingSize) and (bookkeepingSize
                                                 == storageSize):
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size"
                % fileID)
            res = self.__updateReplicaToChecked(problematicDict)
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            return self.changeProblematicPrognosis(fileID,
                                                   'BKCatalogSizeMismatch')
        gLogger.info(
            "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count"
            % fileID)
        return self.incrementProblematicRetry(fileID)

    ############################################################################################

    def _reportProblematicFiles(self, lfns, reason):
        """ Simple wrapper function around setFileProblematic
    """
        gLogger.info('The following %s files were found with %s' %
                     (len(lfns), reason))
        for lfn in sorted(lfns):
            gLogger.info(lfn)
        res = self.setFileProblematic(lfns,
                                      reason,
                                      sourceComponent='DataIntegrityClient')
        if not res['OK']:
            gLogger.info('Failed to update integrity DB with files',
                         res['Message'])
        else:
            gLogger.info('Successfully updated integrity DB with files')
class CatalogPlugInTestCase(unittest.TestCase):
    """Base class for the CatalogPlugin test case"""
    def setUp(self):
        self.fullMetadata = [
            "Status",
            "ChecksumType",
            "OwnerRole",
            "CreationDate",
            "Checksum",
            "ModificationDate",
            "OwnerDN",
            "Mode",
            "GUID",
            "Size",
        ]
        self.dirMetadata = self.fullMetadata + ["NumberOfSubPaths"]
        self.fileMetadata = self.fullMetadata + ["NumberOfLinks"]

        self.catalog = FileCatalog(catalogs=[catalogClientToTest])
        valid = self.catalog.isOK()
        self.assertTrue(valid)
        self.destDir = "/lhcb/test/unit-test/TestCatalogPlugin"
        self.link = "%s/link" % self.destDir

        # Clean the existing directory
        self.cleanDirectory()
        res = self.catalog.createDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)

        # Register some files to work with
        self.numberOfFiles = 2
        self.files = []
        for i in range(self.numberOfFiles):
            lfn = "%s/testFile_%d" % (self.destDir, i)
            res = self.registerFile(lfn)
            self.assertTrue(res)
            self.files.append(lfn)

    def registerFile(self, lfn):
        pfn = "protocol://host:port/storage/path%s" % lfn
        size = 10000000
        se = "DIRAC-storage"
        guid = makeGuid()
        adler = stringAdler(guid)
        fileDict = {}
        fileDict[lfn] = {
            "PFN": pfn,
            "Size": size,
            "SE": se,
            "GUID": guid,
            "Checksum": adler
        }
        res = self.catalog.addFile(fileDict)
        return self.parseResult(res, lfn)

    def parseResult(self, res, path):
        self.assertTrue(res["OK"])
        self.assertTrue(res["Value"])
        self.assertTrue(res["Value"]["Successful"])
        self.assertTrue(path in res["Value"]["Successful"])
        return res["Value"]["Successful"][path]

    def parseError(self, res, path):
        self.assertTrue(res["OK"])
        self.assertTrue(res["Value"])
        self.assertTrue(res["Value"]["Failed"])
        self.assertTrue(path in res["Value"]["Failed"])
        return res["Value"]["Failed"][path]

    def cleanDirectory(self):
        res = self.catalog.exists(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        if not returnValue:
            return
        res = self.catalog.listDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        toRemove = list(returnValue["Files"])
        if toRemove:
            self.purgeFiles(toRemove)
        res = self.catalog.removeDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        self.assertTrue(returnValue)

    def purgeFiles(self, lfns):
        for lfn in lfns:
            res = self.catalog.getReplicas(lfn, True)
            replicas = self.parseResult(res, lfn)
            for se, pfn in replicas.items():
                repDict = {}
                repDict[lfn] = {"PFN": pfn, "SE": se}
                res = self.catalog.removeReplica(repDict)
                self.parseResult(res, lfn)
            res = self.catalog.removeFile(lfn)
            self.parseResult(res, lfn)

    def tearDown(self):
        self.cleanDirectory()
Example #27
0
if not len(args) == 3:
    Script.showHelp(exitCode=1)
else:
    inputFileName = args[0]
    se = args[1]
    newStatus = args[2]

if os.path.exists(inputFileName):
    inputFile = open(inputFileName, 'r')
    string = inputFile.read()
    lfns = string.splitlines()
    inputFile.close()
else:
    lfns = [inputFileName]

res = catalog.getReplicas(lfns, True)
if not res['OK']:
    print(res['Message'])
    DIRACExit(-1)
replicas = res['Value']['Successful']

lfnDict = {}
for lfn in lfns:
    lfnDict[lfn] = {}
    lfnDict[lfn]['SE'] = se
    lfnDict[lfn]['Status'] = newStatus
    lfnDict[lfn]['PFN'] = replicas[lfn][se]

res = catalog.setReplicaStatus(lfnDict)
if not res['OK']:
    print("ERROR:", res['Message'])
Example #28
0
  def __call__( self ):
    """ action for 'removeFile' operation  """
    # # get waiting files
    waitingFiles = self.getWaitingFilesList()
    fc = FileCatalog( self.operation.catalogList )

    res = fc.getReplicas( [wf.LFN for wf in waitingFiles] )
    if not res['OK']:
      gMonitor.addMark( "RemoveFileAtt" )
      gMonitor.addMark( "RemoveFileFail" )
      return res

    # We check the status of the SE from the LFN that are successful
    # No idea what to do with the others...
    replicas = res['Value']['Successful']
    targetSEs = set( [se for lfn in replicas for se in replicas[lfn] ] )

    if targetSEs:
      # Check if SEs are allowed for remove but don't fail yet the operation if SEs are always banned
      bannedTargets = self.checkSEsRSS( targetSEs, access = 'RemoveAccess', failIfBanned = False )
      if not bannedTargets['OK']:
        gMonitor.addMark( "RemoveFileAtt" )
        gMonitor.addMark( "RemoveFileFail" )
        return bannedTargets
      bannedTargets = set( bannedTargets['Value'] )
    else:
      bannedTargets = set()

    # # prepare waiting file dict
    # # We take only files that have no replica at the banned SEs... If no replica, don't
    toRemoveDict = dict( ( opFile.LFN, opFile ) for opFile in waitingFiles \
                         if not bannedTargets or not bannedTargets.intersection( replicas.get( opFile.LFN, [] ) ) )
    # If some SEs are always banned, set Failed the files that cannot be removed
    if bannedTargets and 'always banned' in self.operation.Error:
      for opFile in waitingFiles:
        if opFile.LFN not in toRemoveDict:
          # Set the files that cannot be removed Failed
          opFile.Error = self.operation.Error
          opFile.Status = "Failed"
      if not toRemoveDict:
        # If there are no files that can be removed, exit, else try once to remove them anyway
        return S_OK( "%s targets are always banned for removal" % ",".join( sorted( bannedTargets ) ) )

    if toRemoveDict:
      gMonitor.addMark( "RemoveFileAtt", len( toRemoveDict ) )
        # # 1st step - bulk removal
      self.log.debug( "bulk removal of %s files" % len( toRemoveDict ) )
      bulkRemoval = self.bulkRemoval( toRemoveDict )
      if not bulkRemoval["OK"]:
        self.log.error( "Bulk file removal failed", bulkRemoval["Message"] )
      else:
        gMonitor.addMark( "RemoveFileOK", len( toRemoveDict ) - len( bulkRemoval["Value"] ) )
        toRemoveDict = bulkRemoval["Value"]

      # # 2nd step - single file removal
      for lfn, opFile in toRemoveDict.iteritems():
        self.log.info( "removing single file %s" % lfn )
        singleRemoval = self.singleRemoval( opFile )
        if not singleRemoval["OK"]:
          self.log.error( 'Error removing single file', singleRemoval["Message"] )
          gMonitor.addMark( "RemoveFileFail", 1 )
        else:
          self.log.info( "file %s has been removed" % lfn )
          gMonitor.addMark( "RemoveFileOK", 1 )

      # # set
      failedFiles = [ ( lfn, opFile ) for ( lfn, opFile ) in toRemoveDict.iteritems()
                      if opFile.Status in ( "Failed", "Waiting" ) ]
      if failedFiles:
        self.operation.Error = "failed to remove %d files" % len( failedFiles )

    if bannedTargets:
      return S_OK( "%s targets are banned for removal" % ",".join( sorted( bannedTargets ) ) )
    return S_OK()
Example #29
0
class RequestPreparationAgent(AgentModule):
    def initialize(self):
        self.fileCatalog = FileCatalog()
        #self.stagerClient = StorageManagerClient()
        self.dataIntegrityClient = DataIntegrityClient()
        self.storageDB = StorageManagementDB()
        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption('shifterProxy', 'DataManager')

        return S_OK()

    def execute(self):
        res = self.prepareNewReplicas()
        return res

    def prepareNewReplicas(self):
        """ This is the first logical task to be executed and manages the New->Waiting transition of the Replicas
    """
        res = self.__getNewReplicas()
        if not res['OK']:
            gLogger.fatal(
                "RequestPreparation.prepareNewReplicas: Failed to get replicas from StagerDB.",
                res['Message'])
            return res
        if not res['Value']:
            gLogger.info("There were no New replicas found")
            return res
        replicas = res['Value']['Replicas']
        replicaIDs = res['Value']['ReplicaIDs']
        gLogger.info(
            "RequestPreparation.prepareNewReplicas: Obtained %s New replicas for preparation."
            % len(replicaIDs))

        # Check that the files exist in the FileCatalog
        res = self.__getExistingFiles(replicas.keys())
        if not res['OK']:
            return res
        exist = res['Value']['Exist']
        terminal = res['Value']['Missing']
        failed = res['Value']['Failed']
        if not exist:
            gLogger.error(
                'RequestPreparation.prepareNewReplicas: Failed determine existance of any files'
            )
            return S_OK()
        terminalReplicaIDs = {}
        for lfn, reason in terminal.items():
            for se, replicaID in replicas[lfn].items():
                terminalReplicaIDs[replicaID] = reason
            replicas.pop(lfn)
        gLogger.info(
            "RequestPreparation.prepareNewReplicas: %s files exist in the FileCatalog."
            % len(exist))
        if terminal:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s files do not exist in the FileCatalog."
                % len(terminal))

        # Obtain the file sizes from the FileCatalog
        res = self.__getFileSize(exist)
        if not res['OK']:
            return res
        failed.update(res['Value']['Failed'])
        terminal = res['Value']['ZeroSize']
        fileSizes = res['Value']['FileSizes']
        if not fileSizes:
            gLogger.error(
                'RequestPreparation.prepareNewReplicas: Failed determine sizes of any files'
            )
            return S_OK()
        for lfn, reason in terminal.items():
            for se, replicaID in replicas[lfn].items():
                terminalReplicaIDs[replicaID] = reason
            replicas.pop(lfn)
        gLogger.info(
            "RequestPreparation.prepareNewReplicas: Obtained %s file sizes from the FileCatalog."
            % len(fileSizes))
        if terminal:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s files registered with zero size in the FileCatalog."
                % len(terminal))

        # Obtain the replicas from the FileCatalog
        res = self.__getFileReplicas(fileSizes.keys())
        if not res['OK']:
            return res
        failed.update(res['Value']['Failed'])
        terminal = res['Value']['ZeroReplicas']
        fileReplicas = res['Value']['Replicas']
        if not fileReplicas:
            gLogger.error(
                'RequestPreparation.prepareNewReplicas: Failed determine replicas for any files'
            )
            return S_OK()
        for lfn, reason in terminal.items():
            for se, replicaID in replicas[lfn].items():
                terminalReplicaIDs[replicaID] = reason
            replicas.pop(lfn)
        gLogger.info(
            "RequestPreparation.prepareNewReplicas: Obtained replica information for %s file from the FileCatalog."
            % len(fileReplicas))
        if terminal:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s files registered with zero replicas in the FileCatalog."
                % len(terminal))

        # Check the replicas exist at the requested site
        replicaMetadata = []
        for lfn, requestedSEs in replicas.items():
            lfnReplicas = fileReplicas[lfn]
            for requestedSE, replicaID in requestedSEs.items():
                if not requestedSE in lfnReplicas.keys():
                    terminalReplicaIDs[
                        replicaID] = "LFN not registered at requested SE"
                    replicas[lfn].pop(requestedSE)
                else:
                    replicaMetadata.append(
                        (replicaID, lfnReplicas[requestedSE], fileSizes[lfn]))

        # Update the states of the files in the database
        if terminalReplicaIDs:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s replicas are terminally failed."
                % len(terminalReplicaIDs))
            #res = self.stagerClient.updateReplicaFailure( terminalReplicaIDs )
            res = self.storageDB.updateReplicaFailure(terminalReplicaIDs)
            if not res['OK']:
                gLogger.error(
                    "RequestPreparation.prepareNewReplicas: Failed to update replica failures.",
                    res['Message'])
        if replicaMetadata:
            gLogger.info(
                "RequestPreparation.prepareNewReplicas: %s replica metadata to be updated."
                % len(replicaMetadata))
            # Sets the Status='Waiting' of CacheReplicas records that are OK with catalogue checks
            res = self.storageDB.updateReplicaInformation(replicaMetadata)
            if not res['OK']:
                gLogger.error(
                    "RequestPreparation.prepareNewReplicas: Failed to update replica metadata.",
                    res['Message'])
        return S_OK()

    def __getNewReplicas(self):
        """ This obtains the New replicas from the Replicas table and for each LFN the requested storage element """
        # First obtain the New replicas from the CacheReplicas table
        res = self.storageDB.getCacheReplicas({'Status': 'New'})
        if not res['OK']:
            gLogger.error(
                "RequestPreparation.__getNewReplicas: Failed to get replicas with New status.",
                res['Message'])
            return res
        if not res['Value']:
            gLogger.debug(
                "RequestPreparation.__getNewReplicas: No New replicas found to process."
            )
            return S_OK()
        else:
            gLogger.debug(
                "RequestPreparation.__getNewReplicas: Obtained %s New replicas(s) to process."
                % len(res['Value']))
        replicas = {}
        replicaIDs = {}
        for replicaID, info in res['Value'].items():
            lfn = info['LFN']
            storageElement = info['SE']
            if not replicas.has_key(lfn):
                replicas[lfn] = {}
            replicas[lfn][storageElement] = replicaID
            replicaIDs[replicaID] = (lfn, storageElement)
        return S_OK({'Replicas': replicas, 'ReplicaIDs': replicaIDs})

    def __getExistingFiles(self, lfns):
        """ This checks that the files exist in the FileCatalog. """
        filesExist = []
        missing = {}
        res = self.fileCatalog.exists(lfns)
        if not res['OK']:
            gLogger.error(
                "RequestPreparation.__getExistingFiles: Failed to determine whether files exist.",
                res['Message'])
            return res
        failed = res['Value']['Failed']
        for lfn, exists in res['Value']['Successful'].items():
            if exists:
                filesExist.append(lfn)
            else:
                missing[lfn] = 'LFN not registered in the FileCatalog'
        if missing:
            for lfn, reason in missing.items():
                gLogger.warn(
                    "RequestPreparation.__getExistingFiles: %s" % reason, lfn)
            self.__reportProblematicFiles(missing.keys(),
                                          'LFN-LFC-DoesntExist')
        return S_OK({
            'Exist': filesExist,
            'Missing': missing,
            'Failed': failed
        })

    def __getFileSize(self, lfns):
        """ This obtains the file size from the FileCatalog. """
        failed = []
        fileSizes = {}
        zeroSize = {}
        res = self.fileCatalog.getFileSize(lfns)
        if not res['OK']:
            gLogger.error(
                "RequestPreparation.__getFileSize: Failed to get sizes for files.",
                res['Message'])
            return res
        failed = res['Value']['Failed']
        for lfn, size in res['Value']['Successful'].items():
            if size == 0:
                zeroSize[
                    lfn] = "LFN registered with zero size in the FileCatalog"
            else:
                fileSizes[lfn] = size
        if zeroSize:
            for lfn, reason in zeroSize.items():
                gLogger.warn("RequestPreparation.__getFileSize: %s" % reason,
                             lfn)
            self.__reportProblematicFiles(zeroSize.keys(), 'LFN-LFC-ZeroSize')
        return S_OK({
            'FileSizes': fileSizes,
            'ZeroSize': zeroSize,
            'Failed': failed
        })

    def __getFileReplicas(self, lfns):
        """ This obtains the replicas from the FileCatalog. """
        replicas = {}
        noReplicas = {}
        res = self.fileCatalog.getReplicas(lfns)
        if not res['OK']:
            gLogger.error(
                "RequestPreparation.__getFileReplicas: Failed to obtain file replicas.",
                res['Message'])
            return res
        failed = res['Value']['Failed']
        for lfn, lfnReplicas in res['Value']['Successful'].items():
            if len(lfnReplicas.keys()) == 0:
                noReplicas[
                    lfn] = "LFN registered with zero replicas in the FileCatalog"
            else:
                replicas[lfn] = lfnReplicas
        if noReplicas:
            for lfn, reason in noReplicas.items():
                gLogger.warn(
                    "RequestPreparation.__getFileReplicas: %s" % reason, lfn)
            self.__reportProblematicFiles(noReplicas.keys(),
                                          'LFN-LFC-NoReplicas')
        return S_OK({
            'Replicas': replicas,
            'ZeroReplicas': noReplicas,
            'Failed': failed
        })

    def __reportProblematicFiles(self, lfns, reason):
        return S_OK()
        res = self.dataIntegrityClient.setFileProblematic(
            lfns, reason, self.name)
        if not res['OK']:
            gLogger.error(
                "RequestPreparation.__reportProblematicFiles: Failed to report missing files.",
                res['Message'])
            return res
        if res['Value']['Successful']:
            gLogger.info(
                "RequestPreparation.__reportProblematicFiles: Successfully reported %s missing files."
                % len(res['Value']['Successful']))
        if res['Value']['Failed']:
            gLogger.info(
                "RequestPreparation.__reportProblematicFiles: Failed to report %s problematic files."
                % len(res['Value']['Failed']))
        return res
Example #30
0
class FTSRequest( object ):
  """
  .. class:: FTSRequest

  Helper class for FTS job submission and monitoring.
  """

  # # default checksum type
  __defaultCksmType = "ADLER32"
  # # flag to disablr/enable checksum test, default: disabled
  __cksmTest = False

  def __init__( self ):
    """c'tor

    :param self: self reference
    """
    self.log = gLogger.getSubLogger( self.__class__.__name__, True )

    # # final states tuple
    self.finalStates = ( 'Canceled', 'Failed', 'Hold',
                         'Finished', 'FinishedDirty' )
    # # failed states tuple
    self.failedStates = ( 'Canceled', 'Failed',
                          'Hold', 'FinishedDirty' )
    # # successful states tuple
    self.successfulStates = ( 'Finished', 'Done' )
    # # all file states tuple
    self.fileStates = ( 'Done', 'Active', 'Pending', 'Ready', 'Canceled', 'Failed',
                        'Finishing', 'Finished', 'Submitted', 'Hold', 'Waiting' )

    self.statusSummary = {}

    # # request status
    self.requestStatus = 'Unknown'

    # # dict for FTS job files
    self.fileDict = {}
    # # dict for replicas information
    self.catalogReplicas = {}
    # # dict for metadata information
    self.catalogMetadata = {}
    # # dict for files that failed to register
    self.failedRegistrations = {}

    # # placehoder for FileCatalog reference
    self.oCatalog = None

    # # submit timestamp
    self.submitTime = ''

    # # placeholder FTS job GUID
    self.ftsGUID = ''
    # # placeholder for FTS server URL
    self.ftsServer = ''

    # # flag marking FTS job completness
    self.isTerminal = False
    # # completness percentage
    self.percentageComplete = 0.0

    # # source SE name
    self.sourceSE = ''
    # # flag marking source SE validity
    self.sourceValid = False
    # # source space token
    self.sourceToken = ''

    # # target SE name
    self.targetSE = ''
    # # flag marking target SE validity
    self.targetValid = False
    # # target space token
    self.targetToken = ''

    # # placeholder for target StorageElement
    self.oTargetSE = None
    # # placeholder for source StorageElement
    self.oSourceSE = None

    # # checksum type, set it to default
    self.__cksmType = self.__defaultCksmType
    # # disable checksum test by default
    self.__cksmTest = False

    # # statuses that prevent submitting to FTS
    self.noSubmitStatus = ( 'Failed', 'Done', 'Staging' )

    # # were sources resolved?
    self.sourceResolved = False

    # # Number of file transfers actually submitted
    self.submittedFiles = 0
    self.transferTime = 0

    self.submitCommand = Operations().getValue( 'DataManagement/FTSPlacement/FTS2/SubmitCommand', 'glite-transfer-submit' )
    self.monitorCommand = Operations().getValue( 'DataManagement/FTSPlacement/FTS2/MonitorCommand', 'glite-transfer-status' )
    self.ftsVersion = Operations().getValue( 'DataManagement/FTSVersion', 'FTS2' )
    self.ftsJob = None
    self.ftsFiles = []

  ####################################################################
  #
  #  Methods for setting/getting/checking the SEs
  #

  def setSourceSE( self, se ):
    """ set SE for source

    :param self: self reference
    :param str se: source SE name
    """
    if se == self.targetSE:
      return S_ERROR( "SourceSE is TargetSE" )
    self.sourceSE = se
    self.oSourceSE = StorageElement( self.sourceSE )
    return self.__checkSourceSE()

  def __checkSourceSE( self ):
    """ check source SE availability

    :param self: self reference
    """
    if not self.sourceSE:
      return S_ERROR( "SourceSE not set" )
    res = self.oSourceSE.isValid( 'Read' )
    if not res['OK']:
      return S_ERROR( "SourceSE not available for reading" )
    res = self.__getSESpaceToken( self.oSourceSE )
    if not res['OK']:
      self.log.error( "FTSRequest failed to get SRM Space Token for SourceSE", res['Message'] )
      return S_ERROR( "SourceSE does not support FTS transfers" )

    if self.__cksmTest:
      cksmType = self.oSourceSE.checksumType()
      if cksmType in ( "NONE", "NULL" ):
        self.log.warn( "Checksum type set to %s at SourceSE %s, disabling checksum test" % ( cksmType,
                                                                                            self.sourceSE ) )
        self.__cksmTest = False
      elif cksmType != self.__cksmType:
        self.log.warn( "Checksum type mismatch, disabling checksum test" )
        self.__cksmTest = False

    self.sourceToken = res['Value']
    self.sourceValid = True
    return S_OK()

  def setTargetSE( self, se ):
    """ set target SE

    :param self: self reference
    :param str se: target SE name
    """
    if se == self.sourceSE:
      return S_ERROR( "TargetSE is SourceSE" )
    self.targetSE = se
    self.oTargetSE = StorageElement( self.targetSE )
    return self.__checkTargetSE()

  def setTargetToken( self, token ):
    """ target space token setter

    :param self: self reference
    :param str token: target space token
    """
    self.targetToken = token
    return S_OK()

  def __checkTargetSE( self ):
    """ check target SE availability

    :param self: self reference
    """
    if not self.targetSE:
      return S_ERROR( "TargetSE not set" )
    res = self.oTargetSE.isValid( 'Write' )
    if not res['OK']:
      return S_ERROR( "TargetSE not available for writing" )
    res = self.__getSESpaceToken( self.oTargetSE )
    if not res['OK']:
      self.log.error( "FTSRequest failed to get SRM Space Token for TargetSE", res['Message'] )
      return S_ERROR( "TargetSE does not support FTS transfers" )

    # # check checksum types
    if self.__cksmTest:
      cksmType = self.oTargetSE.checksumType()
      if cksmType in ( "NONE", "NULL" ):
        self.log.warn( "Checksum type set to %s at TargetSE %s, disabling checksum test" % ( cksmType,
                                                                                            self.targetSE ) )
        self.__cksmTest = False
      elif cksmType != self.__cksmType:
        self.log.warn( "Checksum type mismatch, disabling checksum test" )
        self.__cksmTest = False

    self.targetToken = res['Value']
    self.targetValid = True
    return S_OK()

  @staticmethod
  def __getSESpaceToken( oSE ):
    """ get space token from StorageElement instance

    :param self: self reference
    :param StorageElement oSE: StorageElement instance
    """
    res = oSE.getStorageParameters( protocol = 'srm' )
    if not res['OK']:
      return res
    return S_OK( res['Value'].get( 'SpaceToken' ) )

  ####################################################################
  #
  #  Methods for setting/getting FTS request parameters
  #

  def setFTSGUID( self, guid ):
    """ FTS job GUID setter

    :param self: self reference
    :param str guid: string containg GUID
    """
    if not checkGuid( guid ):
      return S_ERROR( "Incorrect GUID format" )
    self.ftsGUID = guid
    return S_OK()


  def setFTSServer( self, server ):
    """ FTS server setter

    :param self: self reference
    :param str server: FTS server URL
    """
    self.ftsServer = server
    return S_OK()

  def isRequestTerminal( self ):
    """ check if FTS job has terminated

    :param self: self reference
    """
    if self.requestStatus in self.finalStates:
      self.isTerminal = True
    return S_OK( self.isTerminal )

  def setCksmTest( self, cksmTest = False ):
    """ set cksm test

    :param self: self reference
    :param bool cksmTest: flag to enable/disable checksum test
    """
    self.__cksmTest = bool( cksmTest )
    return S_OK( self.__cksmTest )

  ####################################################################
  #
  #  Methods for setting/getting/checking files and their metadata
  #

  def setLFN( self, lfn ):
    """ add LFN :lfn: to :fileDict:

    :param self: self reference
    :param str lfn: LFN to add to
    """
    self.fileDict.setdefault( lfn, {'Status':'Waiting'} )
    return S_OK()

  def setSourceSURL( self, lfn, surl ):
    """ source SURL setter

    :param self: self reference
    :param str lfn: LFN
    :param str surl: source SURL
    """
    target = self.fileDict[lfn].get( 'Target' )
    if target == surl:
      return S_ERROR( "Source and target the same" )
    return self.__setFileParameter( lfn, 'Source', surl )

  def getSourceSURL( self, lfn ):
    """ get source SURL for LFN :lfn:

    :param self: self reference
    :param str lfn: LFN
    """
    return self.__getFileParameter( lfn, 'Source' )

  def setTargetSURL( self, lfn, surl ):
    """ set target SURL for LFN :lfn:

    :param self: self reference
    :param str lfn: LFN
    :param str surl: target SURL
    """
    source = self.fileDict[lfn].get( 'Source' )
    if source == surl:
      return S_ERROR( "Source and target the same" )
    return self.__setFileParameter( lfn, 'Target', surl )

  def getFailReason( self, lfn ):
    """ get fail reason for file :lfn:

    :param self: self reference
    :param str lfn: LFN
    """
    return self.__getFileParameter( lfn, 'Reason' )

  def getRetries( self, lfn ):
    """ get number of attepmts made to transfer file :lfn:

    :param self: self reference
    :param str lfn: LFN
    """
    return self.__getFileParameter( lfn, 'Retries' )

  def getTransferTime( self, lfn ):
    """ get duration of transfer for file :lfn:

    :param self: self reference
    :param str lfn: LFN
    """
    return self.__getFileParameter( lfn, 'Duration' )

  def getFailed( self ):
    """ get list of wrongly transferred LFNs

    :param self: self reference
    """
    return S_OK( [ lfn for lfn in self.fileDict
                   if self.fileDict[lfn].get( 'Status', '' ) in self.failedStates ] )

  def getStaging( self ):
    """ get files set for prestaging """
    return S_OK( [lfn for lfn in self.fileDict
                  if self.fileDict[lfn].get( 'Status', '' ) == 'Staging'] )

  def getDone( self ):
    """ get list of succesfully transferred LFNs

    :param self: self reference
    """
    return S_OK( [ lfn for lfn in self.fileDict
                   if self.fileDict[lfn].get( 'Status', '' ) in self.successfulStates ] )

  def __setFileParameter( self, lfn, paramName, paramValue ):
    """ set :paramName: to :paramValue: for :lfn: file

    :param self: self reference
    :param str lfn: LFN
    :param str paramName: parameter name
    :param mixed paramValue: a new parameter value
    """
    self.setLFN( lfn )
    self.fileDict[lfn][paramName] = paramValue
    return S_OK()

  def __getFileParameter( self, lfn, paramName ):
    """ get value of :paramName: for file :lfn:

    :param self: self reference
    :param str lfn: LFN
    :param str paramName: parameter name
    """
    if lfn not in self.fileDict:
      return S_ERROR( "Supplied file not set" )
    if paramName not in self.fileDict[lfn]:
      return S_ERROR( "%s not set for file" % paramName )
    return S_OK( self.fileDict[lfn][paramName] )

  ####################################################################
  #
  #  Methods for submission
  #

  def submit( self, monitor = False, printOutput = True ):
    """ submit FTS job

    :param self: self reference
    :param bool monitor: flag to monitor progress of FTS job
    :param bool printOutput: flag to print output of execution to stdout
    """
    res = self.__prepareForSubmission()
    if not res['OK']:
      return res
    res = self.__submitFTSTransfer()
    if not res['OK']:
      return res
    resDict = { 'ftsGUID' : self.ftsGUID, 'ftsServer' : self.ftsServer, 'submittedFiles' : self.submittedFiles }
    if monitor or printOutput:
      gLogger.always( "Submitted %s@%s" % ( self.ftsGUID, self.ftsServer ) )
      if monitor:
        self.monitor( untilTerminal = True, printOutput = printOutput, full = False )
    return S_OK( resDict )

  def __prepareForSubmission( self ):
    """ check validity of job before submission

    :param self: self reference
    """
    if not self.fileDict:
      return S_ERROR( "No files set" )
    if not self.sourceValid:
      return S_ERROR( "SourceSE not valid" )
    if not self.targetValid:
      return S_ERROR( "TargetSE not valid" )
    if not self.ftsServer:
      res = self.__resolveFTSServer()
      if not res['OK']:
        return S_ERROR( "FTSServer not valid" )
    self.resolveSource()
    self.resolveTarget()
    res = self.__filesToSubmit()
    if not res['OK']:
      return S_ERROR( "No files to submit" )
    return S_OK()

  def __getCatalogObject( self ):
    """ CatalogInterface instance facade

    :param self: self reference
    """
    try:
      if not self.oCatalog:
        self.oCatalog = FileCatalog()
      return S_OK()
    except:
      return S_ERROR()

  def __updateReplicaCache( self, lfns = None, overwrite = False ):
    """ update replica cache for list of :lfns:

    :param self: self reference
    :param mixed lfns: list of LFNs
    :param bool overwrite: flag to trigger cache clearing and updating
    """
    if not lfns:
      lfns = self.fileDict.keys()
    toUpdate = [ lfn for lfn in lfns if ( lfn not in self.catalogReplicas ) or overwrite ]
    if not toUpdate:
      return S_OK()
    res = self.__getCatalogObject()
    if not res['OK']:
      return res
    res = self.oCatalog.getReplicas( toUpdate )
    if not res['OK']:
      return S_ERROR( "Failed to update replica cache: %s" % res['Message'] )
    for lfn, error in res['Value']['Failed'].items():
      self.__setFileParameter( lfn, 'Reason', error )
      self.__setFileParameter( lfn, 'Status', 'Failed' )
    for lfn, replicas in res['Value']['Successful'].items():
      self.catalogReplicas[lfn] = replicas
    return S_OK()

  def __updateMetadataCache( self, lfns = None ):
    """ update metadata cache for list of LFNs

    :param self: self reference
    :param list lnfs: list of LFNs
    """
    if not lfns:
      lfns = self.fileDict.keys()
    toUpdate = [ lfn for lfn in lfns if lfn not in self.catalogMetadata ]
    if not toUpdate:
      return S_OK()
    res = self.__getCatalogObject()
    if not res['OK']:
      return res
    res = self.oCatalog.getFileMetadata( toUpdate )
    if not res['OK']:
      return S_ERROR( "Failed to get source catalog metadata: %s" % res['Message'] )
    for lfn, error in res['Value']['Failed'].items():
      self.__setFileParameter( lfn, 'Reason', error )
      self.__setFileParameter( lfn, 'Status', 'Failed' )
    for lfn, metadata in res['Value']['Successful'].items():
      self.catalogMetadata[lfn] = metadata
    return S_OK()

  def resolveSource( self ):
    """ resolve source SE eligible for submission

    :param self: self reference
    """

    # Avoid resolving sources twice
    if self.sourceResolved:
      return S_OK()
    # Only resolve files that need a transfer
    toResolve = [ lfn for lfn in self.fileDict if self.fileDict[lfn].get( "Status", "" ) != "Failed" ]
    if not toResolve:
      return S_OK()
    res = self.__updateMetadataCache( toResolve )
    if not res['OK']:
      return res
    res = self.__updateReplicaCache( toResolve )
    if not res['OK']:
      return res

    # Define the source URLs
    for lfn in toResolve:
      replicas = self.catalogReplicas.get( lfn, {} )
      if self.sourceSE not in replicas:
        gLogger.warn( "resolveSource: skipping %s - not replicas at SourceSE %s" % ( lfn, self.sourceSE ) )
        self.__setFileParameter( lfn, 'Reason', "No replica at SourceSE" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue

      res = returnSingleResult( self.oSourceSE.getURL( lfn, protocol = 'srm' ) )
      if not res['OK']:
        gLogger.warn( "resolveSource: skipping %s - %s" % ( lfn, res["Message"] ) )
        self.__setFileParameter( lfn, 'Reason', res['Message'] )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue
      res = self.setSourceSURL( lfn, res['Value'] )
      if not res['OK']:
        gLogger.warn( "resolveSource: skipping %s - %s" % ( lfn, res["Message"] ) )
        self.__setFileParameter( lfn, 'Reason', res['Message'] )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue

    toResolve = []
    for lfn in self.fileDict:
      if "Source" in self.fileDict[lfn]:
        toResolve.append( lfn )
    if not toResolve:
      return S_ERROR( "No eligible Source files" )

    # Get metadata of the sources, to check for existance, availability and caching
    res = self.oSourceSE.getFileMetadata( toResolve )
    if not res['OK']:
      return S_ERROR( "Failed to check source file metadata" )

    for lfn, error in res['Value']['Failed'].items():
      if re.search( 'File does not exist', error ):
        gLogger.warn( "resolveSource: skipping %s - source file does not exists" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source file does not exist" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      else:
        gLogger.warn( "resolveSource: skipping %s - failed to get source metadata" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Failed to get Source metadata" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
    toStage = []

    nbStagedFiles = 0
    for lfn, metadata in res['Value']['Successful'].items():
      lfnStatus = self.fileDict.get( lfn, {} ).get( 'Status' )
      if metadata.get( 'Unavailable', False ):
        gLogger.warn( "resolveSource: skipping %s - source file unavailable" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source file Unavailable" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      elif metadata.get( 'Lost', False ):
        gLogger.warn( "resolveSource: skipping %s - source file lost" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source file Lost" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      elif not metadata.get( 'Cached', metadata['Accessible'] ):
        if lfnStatus != 'Staging':
          toStage.append( lfn )
      elif metadata['Size'] != self.catalogMetadata[lfn]['Size']:
        gLogger.warn( "resolveSource: skipping %s - source file size mismatch" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source size mismatch" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      elif self.catalogMetadata[lfn]['Checksum'] and metadata['Checksum'] and \
            not compareAdler( metadata['Checksum'], self.catalogMetadata[lfn]['Checksum'] ):
        gLogger.warn( "resolveSource: skipping %s - source file checksum mismatch" % lfn )
        self.__setFileParameter( lfn, 'Reason', "Source checksum mismatch" )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      elif lfnStatus == 'Staging':
        # file that was staging is now cached
        self.__setFileParameter( lfn, 'Status', 'Waiting' )
        nbStagedFiles += 1

    # Some files were being staged
    if nbStagedFiles:
      self.log.info( 'resolveSource: %d files have been staged' % nbStagedFiles )

    # Launching staging of files not in cache
    if toStage:
      gLogger.warn( "resolveSource: %s source files not cached, prestaging..." % len( toStage ) )
      stage = self.oSourceSE.prestageFile( toStage )
      if not stage["OK"]:
        gLogger.error( "resolveSource: error is prestaging", stage["Message"] )
        for lfn in toStage:
          self.__setFileParameter( lfn, 'Reason', stage["Message"] )
          self.__setFileParameter( lfn, 'Status', 'Failed' )
      else:
        for lfn in toStage:
          if lfn in stage['Value']['Successful']:
            self.__setFileParameter( lfn, 'Status', 'Staging' )
          elif lfn in stage['Value']['Failed']:
            self.__setFileParameter( lfn, 'Reason', stage['Value']['Failed'][lfn] )
            self.__setFileParameter( lfn, 'Status', 'Failed' )

    self.sourceResolved = True
    return S_OK()

  def resolveTarget( self ):
    """ find target SE eligible for submission

    :param self: self reference
    """
    toResolve = [ lfn for lfn in self.fileDict
                 if self.fileDict[lfn].get( 'Status' ) not in self.noSubmitStatus ]
    if not toResolve:
      return S_OK()
    res = self.__updateReplicaCache( toResolve )
    if not res['OK']:
      return res
    for lfn in toResolve:
      res = returnSingleResult( self.oTargetSE.getURL( lfn, protocol = 'srm' ) )
      if not res['OK']:
        reason = res.get( 'Message', res['Message'] )
        gLogger.warn( "resolveTarget: skipping %s - %s" % ( lfn, reason ) )
        self.__setFileParameter( lfn, 'Reason', reason )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue

      res = self.setTargetSURL( lfn, res['Value'] )
      if not res['OK']:
        gLogger.warn( "resolveTarget: skipping %s - %s" % ( lfn, res["Message"] ) )
        self.__setFileParameter( lfn, 'Reason', res['Message'] )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
        continue
    toResolve = []
    for lfn in self.fileDict:
      if "Target" in self.fileDict[lfn]:
        toResolve.append( lfn )
    if not toResolve:
      return S_ERROR( "No eligible Target files" )
    res = self.oTargetSE.exists( toResolve )
    if not res['OK']:
      return S_ERROR( "Failed to check target existence" )
    for lfn, error in res['Value']['Failed'].items():
      self.__setFileParameter( lfn, 'Reason', error )
      self.__setFileParameter( lfn, 'Status', 'Failed' )
    toRemove = []
    for lfn, exists in res['Value']['Successful'].items():
      if exists:
        res = self.getSourceSURL( lfn )
        if not res['OK']:
          gLogger.warn( "resolveTarget: skipping %s - target exists" % lfn )
          self.__setFileParameter( lfn, 'Reason', "Target exists" )
          self.__setFileParameter( lfn, 'Status', 'Failed' )
        elif res['Value'] == self.fileDict[lfn]['Target']:
          gLogger.warn( "resolveTarget: skipping %s - source and target pfns are the same" % lfn )
          self.__setFileParameter( lfn, 'Reason', "Source and Target the same" )
          self.__setFileParameter( lfn, 'Status', 'Failed' )
        else:
          toRemove.append( lfn )
    if toRemove:
      self.oTargetSE.removeFile( toRemove )
    return S_OK()

  def __filesToSubmit( self ):
    """
    check if there is at least one file to submit

    :return: S_OK if at least one file is present, S_ERROR otherwise
    """
    for lfn in self.fileDict:
      lfnStatus = self.fileDict[lfn].get( 'Status' )
      source = self.fileDict[lfn].get( 'Source' )
      target = self.fileDict[lfn].get( 'Target' )
      if lfnStatus not in self.noSubmitStatus and source and target:
        return S_OK()
    return S_ERROR()

  def __createFTSFiles( self ):
    """ create LFNs file for glite-transfer-submit command

    This file consists one line for each fiel to be transferred:

    sourceSURL targetSURL [CHECKSUMTYPE:CHECKSUM]

    :param self: self reference
    """
    self.__updateMetadataCache()
    for lfn in self.fileDict:
      lfnStatus = self.fileDict[lfn].get( 'Status' )
      if lfnStatus not in self.noSubmitStatus:
        cksmStr = ""
        # # add chsmType:cksm only if cksmType is specified, else let FTS decide by itself
        if self.__cksmTest and self.__cksmType:
          checkSum = self.catalogMetadata.get( lfn, {} ).get( 'Checksum' )
          if checkSum:
            cksmStr = " %s:%s" % ( self.__cksmType, intAdlerToHex( hexAdlerToInt( checkSum ) ) )
        ftsFile = FTSFile()
        ftsFile.LFN = lfn
        ftsFile.SourceSURL = self.fileDict[lfn].get( 'Source' )
        ftsFile.TargetSURL = self.fileDict[lfn].get( 'Target' )
        ftsFile.SourceSE = self.sourceSE
        ftsFile.TargetSE = self.targetSE
        ftsFile.Status = self.fileDict[lfn].get( 'Status' )
        ftsFile.Checksum = cksmStr
        ftsFile.Size = self.catalogMetadata.get( lfn, {} ).get( 'Size' )
        self.ftsFiles.append( ftsFile )
        self.submittedFiles += 1
    return S_OK()

  def __createFTSJob( self, guid = None ):
    self.__createFTSFiles()
    ftsJob = FTSJob()
    ftsJob.RequestID = 0
    ftsJob.OperationID = 0
    ftsJob.SourceSE = self.sourceSE
    ftsJob.TargetSE = self.targetSE
    ftsJob.SourceToken = self.sourceToken
    ftsJob.TargetToken = self.targetToken
    ftsJob.FTSServer = self.ftsServer
    if guid:
      ftsJob.FTSGUID = guid

    for ftsFile in self.ftsFiles:
      ftsFile.Attempt += 1
      ftsFile.Error = ""
      ftsJob.addFile( ftsFile )
    self.ftsJob = ftsJob

  def __submitFTSTransfer( self ):
    """ create and execute glite-transfer-submit CLI command

    :param self: self reference
    """
    log = gLogger.getSubLogger( 'Submit' )
    self.__createFTSJob()

    submit = self.ftsJob.submitFTS( self.ftsVersion, command = self.submitCommand )
    if not submit["OK"]:
      log.error( "unable to submit FTSJob: %s" % submit["Message"] )
      return submit

    log.info( "FTSJob '%s'@'%s' has been submitted" % ( self.ftsJob.FTSGUID, self.ftsJob.FTSServer ) )

    # # update statuses for job files
    for ftsFile in self.ftsJob:
      ftsFile.FTSGUID = self.ftsJob.FTSGUID
      ftsFile.Status = "Submitted"
      ftsFile.Attempt += 1

    log.info( "FTSJob '%s'@'%s' has been submitted" % ( self.ftsJob.FTSGUID, self.ftsJob.FTSServer ) )
    self.ftsGUID = self.ftsJob.FTSGUID
    return S_OK()

  def __resolveFTSServer( self ):
    """
    resolve FTS server to use, it should be the closest one from target SE

    :param self: self reference
    """
    if self.ftsVersion.upper() == 'FTS2':

      from DIRAC.ConfigurationSystem.Client.Helpers.Resources import getFTS2ServersForSites
      if not self.targetSE:
        return S_ERROR( "Target SE not set" )
      res = getSitesForSE( self.targetSE )
      if not res['OK'] or not res['Value']:
        return S_ERROR( "Could not determine target site" )
      targetSites = res['Value']

      targetSite = ''
      for targetSite in targetSites:
        targetFTS = getFTS2ServersForSites( [targetSite] )
        if targetFTS['OK']:
          ftsTarget = targetFTS['Value'][targetSite]
          if ftsTarget:
            self.ftsServer = ftsTarget
            return S_OK( self.ftsServer )
        else:
          return targetFTS

    elif self.ftsVersion.upper() == 'FTS3':

      from DIRAC.ConfigurationSystem.Client.Helpers.Resources import getFTS3Servers
      res = getFTS3Servers()
      if not res['OK']:
        return res
      ftsServerList = res['Value']
      if ftsServerList:
        # Here we take the first one, regardless of the policy...
        # Unclean but all this will disapear after refactoring the fts code
        self.ftsServer = ftsServerList[0]
        return S_OK( self.ftsServer )

    else:
      return S_ERROR( 'Unknown FTS version %s' % self.ftsVersion )


    return S_ERROR( 'No FTS server found for %s' % targetSite )

  ####################################################################
  #
  #  Methods for monitoring
  #

  def summary( self, untilTerminal = False, printOutput = False ):
    """ summary of FTS job

    :param self: self reference
    :param bool untilTerminal: flag to monitor FTS job to its final state
    :param bool printOutput: flag to print out monitoring information to the stdout
    """
    res = self.__isSummaryValid()
    if not res['OK']:
      return res
    while not self.isTerminal:
      res = self.__parseOutput( full = True )
      if not res['OK']:
        return res
      if untilTerminal:
        self.__print()
      self.isRequestTerminal()
      if res['Value'] or ( not untilTerminal ):
        break
      time.sleep( 1 )
    if untilTerminal:
      print ""
    if printOutput and ( not untilTerminal ):
      return self.dumpSummary( printOutput = printOutput )
    return S_OK()

  def monitor( self, untilTerminal = False, printOutput = False, full = True ):
    """ monitor FTS job

    :param self: self reference
    :param bool untilTerminal: flag to monitor FTS job to its final state
    :param bool printOutput: flag to print out monitoring information to the stdout
    """
    if not self.ftsJob:
      self.resolveSource()
      self.__createFTSJob( self.ftsGUID )
    res = self.__isSummaryValid()
    if not res['OK']:
      return res
    if untilTerminal:
      res = self.summary( untilTerminal = untilTerminal, printOutput = printOutput )
      if not res['OK']:
        return res
    res = self.__parseOutput( full = full )
    if not res['OK']:
      return res
    if untilTerminal:
      self.finalize()
    if printOutput:
      self.dump()
    return res

  def dumpSummary( self, printOutput = False ):
    """ get FTS job summary as str

    :param self: self reference
    :param bool printOutput: print summary to stdout
    """

    outStr = ''
    for status in sorted( self.statusSummary ):
      if self.statusSummary[status]:
        outStr = '%s\t%-10s : %-10s\n' % ( outStr, status, str( self.statusSummary[status] ) )
    outStr = outStr.rstrip( '\n' )
    if printOutput:
      print outStr
    return S_OK( outStr )

  def __print( self ):
    """ print progress bar of FTS job completeness to stdout

    :param self: self reference
    """
    width = 100
    bits = int( ( width * self.percentageComplete ) / 100 )
    outStr = "|%s>%s| %.1f%s %s %s" % ( "="*bits, " "*( width - bits ),
                                        self.percentageComplete, "%",
                                        self.requestStatus, " "*10 )
    sys.stdout.write( "%s\r" % ( outStr ) )
    sys.stdout.flush()

  def dump( self ):
    """ print FTS job parameters and files to stdout

    :param self: self reference
    """
    print "%-10s : %-10s" % ( "Status", self.requestStatus )
    print "%-10s : %-10s" % ( "Source", self.sourceSE )
    print "%-10s : %-10s" % ( "Target", self.targetSE )
    print "%-10s : %-128s" % ( "Server", self.ftsServer )
    print "%-10s : %-128s" % ( "GUID", self.ftsGUID )
    for lfn in sorted( self.fileDict ):
      print "\n  %-15s : %-128s" % ( 'LFN', lfn )
      for key in ['Source', 'Target', 'Status', 'Reason', 'Duration']:
        print "  %-15s : %-128s" % ( key, str( self.fileDict[lfn].get( key ) ) )
    return S_OK()

  def __isSummaryValid( self ):
    """ check validity of FTS job summary report

    :param self: self reference
    """
    if not self.ftsServer:
      return S_ERROR( "FTSServer not set" )
    if not self.ftsGUID:
      return S_ERROR( "FTSGUID not set" )
    return S_OK()

  def __parseOutput( self, full = False ):
    """ execute glite-transfer-status command and parse its output

    :param self: self reference
    :param bool full: glite-transfer-status verbosity level, when set, collect information of files as well
    """
    monitor = self.ftsJob.monitorFTS( self.ftsVersion, command = self.monitorCommand, full = full )
    if not monitor['OK']:
      return monitor
    self.percentageComplete = self.ftsJob.Completeness
    self.requestStatus = self.ftsJob.Status
    self.submitTime = self.ftsJob.SubmitTime

    statusSummary = monitor['Value']
    if statusSummary:
      for state in statusSummary:
        self.statusSummary[state] = statusSummary[state]

    self.transferTime = 0
    for ftsFile in self.ftsJob:
      lfn = ftsFile.LFN
      self.__setFileParameter( lfn, 'Status', ftsFile.Status )
      self.__setFileParameter( lfn, 'Reason', ftsFile.Error )
      self.__setFileParameter( lfn, 'Duration', ftsFile._duration )
      targetURL = self.__getFileParameter( lfn, 'Target' )
      if not targetURL['OK']:
        self.__setFileParameter( lfn, 'Target', ftsFile.TargetSURL )
      sourceURL = self.__getFileParameter( lfn, 'Source' )
      if not sourceURL['OK']:
        self.__setFileParameter( lfn, 'Source', ftsFile.SourceSURL )
      self.transferTime += int( ftsFile._duration )
    return S_OK()

  ####################################################################
  #
  #  Methods for finalization
  #

  def finalize( self ):
    """ finalize FTS job

    :param self: self reference
    """
    self.__updateMetadataCache()
    transEndTime = dateTime()
    regStartTime = time.time()
    res = self.getTransferStatistics()
    transDict = res['Value']

    res = self.__registerSuccessful( transDict['transLFNs'] )

    regSuc, regTotal = res['Value']
    regTime = time.time() - regStartTime
    if self.sourceSE and self.targetSE:
      self.__sendAccounting( regSuc, regTotal, regTime, transEndTime, transDict )
    return S_OK()

  def getTransferStatistics( self ):
    """ collect information of Transfers that can be used by Accounting

    :param self: self reference
    """
    transDict = { 'transTotal': len( self.fileDict ),
                  'transLFNs': [],
                  'transOK': 0,
                  'transSize': 0 }

    for lfn in self.fileDict:
      if self.fileDict[lfn].get( 'Status' ) in self.successfulStates:
        if self.fileDict[lfn].get( 'Duration', 0 ):
          transDict['transLFNs'].append( lfn )
          transDict['transOK'] += 1
          if lfn in self.catalogMetadata:
            transDict['transSize'] += self.catalogMetadata[lfn].get( 'Size', 0 )

    return S_OK( transDict )

  def getFailedRegistrations( self ):
    """ get failed registrations dict

    :param self: self reference
    """
    return S_OK( self.failedRegistrations )

  def __registerSuccessful( self, transLFNs ):
    """ register successfully transferred files to the catalogs,
    fill failedRegistrations dict for files that failed to register

    :param self: self reference
    :param list transLFNs: LFNs in FTS job
    """
    self.failedRegistrations = {}
    toRegister = {}
    for lfn in transLFNs:
      res = returnSingleResult( self.oTargetSE.getURL( self.fileDict[lfn].get( 'Target' ), protocol = 'srm' ) )
      if not res['OK']:
        self.__setFileParameter( lfn, 'Reason', res['Message'] )
        self.__setFileParameter( lfn, 'Status', 'Failed' )
      else:
        toRegister[lfn] = { 'PFN' : res['Value'], 'SE' : self.targetSE }
    if not toRegister:
      return S_OK( ( 0, 0 ) )
    res = self.__getCatalogObject()
    if not res['OK']:
      for lfn in toRegister:
        self.failedRegistrations = toRegister
        self.log.error( 'Failed to get Catalog Object', res['Message'] )
        return S_OK( ( 0, len( toRegister ) ) )
    res = self.oCatalog.addReplica( toRegister )
    if not res['OK']:
      self.failedRegistrations = toRegister
      self.log.error( 'Failed to get Catalog Object', res['Message'] )
      return S_OK( ( 0, len( toRegister ) ) )
    for lfn, error in res['Value']['Failed'].items():
      self.failedRegistrations[lfn] = toRegister[lfn]
      self.log.error( 'Registration of Replica failed', '%s : %s' % ( lfn, str( error ) ) )
    return S_OK( ( len( res['Value']['Successful'] ), len( toRegister ) ) )

  def __sendAccounting( self, regSuc, regTotal, regTime, transEndTime, transDict ):
    """ send accounting record

    :param self: self reference
    :param regSuc: number of files successfully registered
    :param regTotal: number of files attepted to register
    :param regTime: time stamp at the end of registration
    :param transEndTime: time stamp at the end of FTS job
    :param dict transDict: dict holding couters for files being transerred, their sizes and successfull transfers
    """

    oAccounting = DataOperation()
    oAccounting.setEndTime( transEndTime )
    oAccounting.setStartTime( self.submitTime )

    accountingDict = {}
    accountingDict['OperationType'] = 'replicateAndRegister'
    result = getProxyInfo()
    if not result['OK']:
      userName = '******'
    else:
      userName = result['Value'].get( 'username', 'unknown' )
    accountingDict['User'] = userName
    accountingDict['Protocol'] = 'FTS' if 'fts3' not in self.ftsServer else 'FTS3'
    accountingDict['RegistrationTime'] = regTime
    accountingDict['RegistrationOK'] = regSuc
    accountingDict['RegistrationTotal'] = regTotal
    accountingDict['TransferOK'] = transDict['transOK']
    accountingDict['TransferTotal'] = transDict['transTotal']
    accountingDict['TransferSize'] = transDict['transSize']
    accountingDict['FinalStatus'] = self.requestStatus
    accountingDict['Source'] = self.sourceSE
    accountingDict['Destination'] = self.targetSE
    accountingDict['TransferTime'] = self.transferTime
    oAccounting.setValuesFromDict( accountingDict )
    self.log.verbose( "Attempting to commit accounting message..." )
    oAccounting.commit()
    self.log.verbose( "...committed." )
    return S_OK()
inputFileName = args[0]
storageElement = args[1]
status = args[2]

if os.path.exists( inputFileName ):
  inputFile = open( inputFileName, 'r' )
  string = inputFile.read()
  inputFile.close()
  lfns = sortList( string.splitlines() )
else:
  lfns = [inputFileName]

fc = FileCatalog()

replicaDict = {}
res = fc.getReplicas( lfns, allStatus = True )
if not res['OK']:
  gLogger.error( "Failed to get catalog replicas.", res['Message'] )
  DIRAC.exit( -1 )
lfnDict = {}
for lfn, error in res['Value']['Failed'].items():
  gLogger.error( "Failed to get replicas for file.", "%s:%s" % ( lfn, error ) )
for lfn, replicas in res['Value']['Successful'].items():
  if not storageElement in replicas.keys():
    gLogger.error( "LFN not registered at provided storage element." , "%s %s" % ( lfn, storageElement ) )
  else:
    lfnDict[lfn] = {'SE':storageElement, 'PFN':replicas[storageElement], 'Status':status}
if not lfnDict:
  gLogger.error( "No files found at the supplied storage element." )
  DIRAC.exit( 2 )
Example #32
0
class ReplicateAndRegister(DMSRequestOperationsBase):
    """
  .. class:: ReplicateAndRegister

  ReplicateAndRegister operation handler
  """
    def __init__(self, operation=None, csPath=None):
        """c'tor

    :param self: self reference
    :param Operation operation: Operation instance
    :param str csPath: CS path for this handler
    """
        super(ReplicateAndRegister, self).__init__(operation, csPath)
        # # own gMonitor stuff for files
        gMonitor.registerActivity("ReplicateAndRegisterAtt",
                                  "Replicate and register attempted",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("ReplicateOK", "Replications successful",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("ReplicateFail", "Replications failed",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("RegisterOK", "Registrations successful",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("RegisterFail", "Registrations failed",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        # # for FTS
        gMonitor.registerActivity("FTSScheduleAtt", "Files schedule attempted",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("FTSScheduleOK", "File schedule successful",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("FTSScheduleFail", "File schedule failed",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        # # SE cache

        # Clients
        self.fc = FileCatalog()

    def __call__(self):
        """ call me maybe """
        # # check replicas first
        checkReplicas = self.__checkReplicas()
        if not checkReplicas["OK"]:
            self.log.error('Failed to check replicas',
                           checkReplicas["Message"])
        if hasattr(self, "FTSMode") and getattr(self, "FTSMode"):
            bannedGroups = getattr(self, "FTSBannedGroups") if hasattr(
                self, "FTSBannedGroups") else ()
            if self.request.OwnerGroup in bannedGroups:
                self.log.verbose(
                    "usage of FTS system is banned for request's owner")
                return self.dmTransfer()

            if getattr(self, 'UseNewFTS3', False):
                return self.fts3Transfer()
            else:
                return self.ftsTransfer()

        return self.dmTransfer()

    def __checkReplicas(self):
        """ check done replicas and update file states  """
        waitingFiles = dict([(opFile.LFN, opFile) for opFile in self.operation
                             if opFile.Status in ("Waiting", "Scheduled")])
        targetSESet = set(self.operation.targetSEList)

        replicas = self.fc.getReplicas(waitingFiles.keys())
        if not replicas["OK"]:
            self.log.error('Failed to get replicas', replicas["Message"])
            return replicas

        reMissing = re.compile(r".*such file.*")
        for failedLFN, errStr in replicas["Value"]["Failed"].iteritems():
            waitingFiles[failedLFN].Error = errStr
            if reMissing.search(errStr.lower()):
                self.log.error("File does not exists", failedLFN)
                gMonitor.addMark("ReplicateFail", len(targetSESet))
                waitingFiles[failedLFN].Status = "Failed"

        for successfulLFN, reps in replicas["Value"]["Successful"].iteritems():
            if targetSESet.issubset(set(reps)):
                self.log.info("file %s has been replicated to all targets" %
                              successfulLFN)
                waitingFiles[successfulLFN].Status = "Done"

        return S_OK()

    def _addMetadataToFiles(self, toSchedule):
        """ Add metadata to those files that need to be scheduled through FTS

        toSchedule is a dictionary:
        {'lfn1': opFile, 'lfn2': opFile}
    """
        if toSchedule:
            self.log.info(
                "found %s files to schedule, getting metadata from FC" %
                len(toSchedule))
        else:
            self.log.verbose("No files to schedule")
            return S_OK([])

        res = self.fc.getFileMetadata(toSchedule.keys())
        if not res['OK']:
            return res
        else:
            if res['Value']['Failed']:
                self.log.warn(
                    "Can't schedule %d files: problems getting the metadata: %s"
                    % (len(res['Value']['Failed']), ', '.join(
                        res['Value']['Failed'])))
            metadata = res['Value']['Successful']

        filesToSchedule = {}

        for lfn, lfnMetadata in metadata.iteritems():
            opFileToSchedule = toSchedule[lfn][0]
            opFileToSchedule.GUID = lfnMetadata['GUID']
            # In principle this is defined already in filterReplicas()
            if not opFileToSchedule.Checksum:
                opFileToSchedule.Checksum = metadata[lfn]['Checksum']
                opFileToSchedule.ChecksumType = metadata[lfn]['ChecksumType']
            opFileToSchedule.Size = metadata[lfn]['Size']

            filesToSchedule[opFileToSchedule.LFN] = opFileToSchedule

        return S_OK(filesToSchedule)

    def _filterReplicas(self, opFile):
        """ filter out banned/invalid source SEs """
        return filterReplicas(opFile, logger=self.log, dataManager=self.dm)

    def ftsTransfer(self):
        """ replicate and register using FTS """

        self.log.info("scheduling files in FTS...")

        bannedTargets = self.checkSEsRSS()
        if not bannedTargets['OK']:
            gMonitor.addMark("FTSScheduleAtt")
            gMonitor.addMark("FTSScheduleFail")
            return bannedTargets

        if bannedTargets['Value']:
            return S_OK("%s targets are banned for writing" %
                        ",".join(bannedTargets['Value']))

        # Can continue now
        self.log.verbose("No targets banned for writing")

        toSchedule = {}

        delayExecution = 0
        errors = defaultdict(int)
        for opFile in self.getWaitingFilesList():
            opFile.Error = ''
            gMonitor.addMark("FTSScheduleAtt")
            # # check replicas
            replicas = self._filterReplicas(opFile)
            if not replicas["OK"]:
                continue
            replicas = replicas["Value"]

            validReplicas = replicas.get("Valid")
            noMetaReplicas = replicas.get("NoMetadata")
            noReplicas = replicas.get('NoReplicas')
            badReplicas = replicas.get('Bad')
            noActiveReplicas = replicas.get('NoActiveReplicas')

            if validReplicas:
                validTargets = list(
                    set(self.operation.targetSEList) - set(validReplicas))
                if not validTargets:
                    self.log.info("file %s is already present at all targets" %
                                  opFile.LFN)
                    opFile.Status = "Done"
                else:
                    toSchedule[opFile.LFN] = [
                        opFile, validReplicas, validTargets
                    ]
            else:
                gMonitor.addMark("FTSScheduleFail")
                if noMetaReplicas:
                    err = "Couldn't get metadata"
                    errors[err] += 1
                    self.log.verbose(
                        "unable to schedule '%s', %s at %s" %
                        (opFile.LFN, err, ','.join(noMetaReplicas)))
                    opFile.Error = err
                elif noReplicas:
                    err = "File doesn't exist"
                    errors[err] += 1
                    self.log.error(
                        "Unable to schedule transfer", "%s %s at %s" %
                        (opFile.LFN, err, ','.join(noReplicas)))
                    opFile.Error = err
                    opFile.Status = 'Failed'
                elif badReplicas:
                    err = "All replicas have a bad checksum"
                    errors[err] += 1
                    self.log.error(
                        "Unable to schedule transfer", "%s, %s at %s" %
                        (opFile.LFN, err, ','.join(badReplicas)))
                    opFile.Error = err
                    opFile.Status = 'Failed'
                elif noActiveReplicas:
                    err = "No active replica found"
                    errors[err] += 1
                    self.log.verbose(
                        "Unable to schedule transfer", "%s, %s at %s" %
                        (opFile.LFN, err, ','.join(noActiveReplicas)))
                    opFile.Error = err
                    # All source SEs are banned, delay execution by 1 hour
                    delayExecution = 60

        if delayExecution:
            self.log.info("Delay execution of the request by %d minutes" %
                          delayExecution)
            self.request.delayNextExecution(delayExecution)
        # Log error counts
        for error, count in errors.iteritems():
            self.log.error(error, 'for %d files' % count)

        filesToScheduleList = []
        res = self._addMetadataToFiles(toSchedule)
        if not res['OK']:
            return res
        else:
            filesToSchedule = res['Value']

            for lfn in filesToSchedule:
                filesToScheduleList.append(
                    (filesToSchedule[lfn][0].toJSON()['Value'],
                     toSchedule[lfn][1], toSchedule[lfn][2]))

        if filesToScheduleList:

            ftsSchedule = FTSClient().ftsSchedule(self.request.RequestID,
                                                  self.operation.OperationID,
                                                  filesToScheduleList)
            if not ftsSchedule["OK"]:
                self.log.error("Completely failed to schedule to FTS:",
                               ftsSchedule["Message"])
                return ftsSchedule

            # might have nothing to schedule
            ftsSchedule = ftsSchedule["Value"]
            if not ftsSchedule:
                return S_OK()

            self.log.info("%d files have been scheduled to FTS" %
                          len(ftsSchedule['Successful']))
            for opFile in self.operation:
                fileID = opFile.FileID
                if fileID in ftsSchedule["Successful"]:
                    gMonitor.addMark("FTSScheduleOK", 1)
                    opFile.Status = "Scheduled"
                    self.log.debug("%s has been scheduled for FTS" %
                                   opFile.LFN)
                elif fileID in ftsSchedule["Failed"]:
                    gMonitor.addMark("FTSScheduleFail", 1)
                    opFile.Error = ftsSchedule["Failed"][fileID]
                    if 'sourceSURL equals to targetSURL' in opFile.Error:
                        # In this case there is no need to continue
                        opFile.Status = 'Failed'
                    self.log.warn("unable to schedule %s for FTS: %s" %
                                  (opFile.LFN, opFile.Error))
        else:
            self.log.info("No files to schedule after metadata checks")

        # Just in case some transfers could not be scheduled, try them with RM
        return self.dmTransfer(fromFTS=True)

    def _checkExistingFTS3Operations(self):
        """
       Check if there are ongoing FTS3Operation for the current RMS Operation

       Under some conditions, we can be trying to schedule files while
       there is still an FTS transfer going on. This typically happens
       when the REA hangs. To prevent further race condition, we check
       if there are FTS3Operations in a non Final state matching the
       current operation ID. If so, we put the corresponding files in
       scheduled mode. We will then wait till the FTS3 Operation performs
       the callback

       :returns: S_OK with True if we can go on, False if we should stop the processing
    """

        res = FTS3Client().getOperationsFromRMSOpID(self.operation.OperationID)

        if not res['OK']:
            self.log.debug("Could not get FTS3Operations matching OperationID",
                           self.operation.OperationID)
            return res

        existingFTSOperations = res['Value']
        # It is ok to have FTS Operations in a final state, so we
        # care only about the others
        unfinishedFTSOperations = [
            ops for ops in existingFTSOperations
            if ops.status not in FTS3TransferOperation.FINAL_STATES
        ]

        if not unfinishedFTSOperations:
            self.log.debug("No ongoing FTS3Operations, all good")
            return S_OK(True)

        self.log.warn(
            "Some FTS3Operations already exist for the RMS Operation:",
            [op.operationID for op in unfinishedFTSOperations])

        # This would really be a screwed up situation !
        if len(unfinishedFTSOperations) > 1:
            self.log.warn("That's a serious problem !!")

        # We take the rmsFileID of the files in the Operations,
        # find the corresponding File object, and set them scheduled
        rmsFileIDsToSetScheduled = set([
            ftsFile.rmsFileID for ftsOp in unfinishedFTSOperations
            for ftsFile in ftsOp.ftsFiles
        ])

        for opFile in self.operation:
            # If it is in the DB, it has a FileID
            opFileID = opFile.FileID
            if opFileID in rmsFileIDsToSetScheduled:
                self.log.warn("Setting RMSFile as already scheduled", opFileID)
                opFile.Status = "Scheduled"

        # We return here such that the Request is set back to Scheduled in the DB
        # With no further modification
        return S_OK(False)

    def fts3Transfer(self):
        """ replicate and register using FTS3 """

        self.log.info("scheduling files in FTS3...")

        # Check first if we do not have ongoing transfers

        res = self._checkExistingFTS3Operations()
        if not res['OK']:
            return res

        # if res['Value'] is False
        # it means that there are ongoing transfers
        # and we should stop here
        if res['Value'] is False:
            # return S_OK such that the request is put back
            return S_OK()

        fts3Files = []
        toSchedule = {}

        # Dict which maps the FileID to the object
        rmsFilesIds = {}

        for opFile in self.getWaitingFilesList():
            rmsFilesIds[opFile.FileID] = opFile

            opFile.Error = ''
            gMonitor.addMark("FTSScheduleAtt")
            # # check replicas
            replicas = self._filterReplicas(opFile)
            if not replicas["OK"]:
                continue
            replicas = replicas["Value"]

            validReplicas = replicas["Valid"]
            noMetaReplicas = replicas["NoMetadata"]
            noReplicas = replicas['NoReplicas']
            badReplicas = replicas['Bad']
            noPFN = replicas['NoPFN']

            if validReplicas:
                validTargets = list(
                    set(self.operation.targetSEList) - set(validReplicas))
                if not validTargets:
                    self.log.info("file %s is already present at all targets" %
                                  opFile.LFN)
                    opFile.Status = "Done"
                else:
                    toSchedule[opFile.LFN] = [opFile, validTargets]

            else:
                gMonitor.addMark("FTSScheduleFail")
                if noMetaReplicas:
                    self.log.warn(
                        "unable to schedule '%s', couldn't get metadata at %s"
                        % (opFile.LFN, ','.join(noMetaReplicas)))
                    opFile.Error = "Couldn't get metadata"
                elif noReplicas:
                    self.log.error(
                        "Unable to schedule transfer",
                        "File %s doesn't exist at %s" %
                        (opFile.LFN, ','.join(noReplicas)))
                    opFile.Error = 'No replicas found'
                    opFile.Status = 'Failed'
                elif badReplicas:
                    self.log.error(
                        "Unable to schedule transfer",
                        "File %s, all replicas have a bad checksum at %s" %
                        (opFile.LFN, ','.join(badReplicas)))
                    opFile.Error = 'All replicas have a bad checksum'
                    opFile.Status = 'Failed'
                elif noPFN:
                    self.log.warn(
                        "unable to schedule %s, could not get a PFN at %s" %
                        (opFile.LFN, ','.join(noPFN)))

        res = self._addMetadataToFiles(toSchedule)
        if not res['OK']:
            return res
        else:
            filesToSchedule = res['Value']

            for lfn in filesToSchedule:
                opFile = filesToSchedule[lfn]
                validTargets = toSchedule[lfn][1]
                for targetSE in validTargets:
                    ftsFile = FTS3File.fromRMSFile(opFile, targetSE)
                    fts3Files.append(ftsFile)

        if fts3Files:
            res = Registry.getUsernameForDN(self.request.OwnerDN)
            if not res['OK']:
                self.log.error(
                    "Cannot get username for DN",
                    "%s %s" % (self.request.OwnerDN, res['Message']))
                return res

            username = res['Value']
            fts3Operation = FTS3TransferOperation.fromRMSObjects(
                self.request, self.operation, username)
            fts3Operation.ftsFiles = fts3Files

            ftsSchedule = FTS3Client().persistOperation(fts3Operation)
            if not ftsSchedule["OK"]:
                self.log.error("Completely failed to schedule to FTS3:",
                               ftsSchedule["Message"])
                return ftsSchedule

            # might have nothing to schedule
            ftsSchedule = ftsSchedule["Value"]
            self.log.info("Scheduled with FTS3Operation id %s" % ftsSchedule)

            self.log.info("%d files have been scheduled to FTS3" %
                          len(fts3Files))

            for ftsFile in fts3Files:
                opFile = rmsFilesIds[ftsFile.rmsFileID]
                gMonitor.addMark("FTSScheduleOK", 1)
                opFile.Status = "Scheduled"
                self.log.debug("%s has been scheduled for FTS" % opFile.LFN)
        else:
            self.log.info("No files to schedule after metadata checks")

        # Just in case some transfers could not be scheduled, try them with RM
        return self.dmTransfer(fromFTS=True)

    def dmTransfer(self, fromFTS=False):
        """ replicate and register using dataManager  """
        # # get waiting files. If none just return
        # # source SE
        sourceSE = self.operation.SourceSE if self.operation.SourceSE else None
        if sourceSE:
            # # check source se for read
            bannedSource = self.checkSEsRSS(sourceSE, 'ReadAccess')
            if not bannedSource["OK"]:
                gMonitor.addMark("ReplicateAndRegisterAtt",
                                 len(self.operation))
                gMonitor.addMark("ReplicateFail", len(self.operation))
                return bannedSource

            if bannedSource["Value"]:
                self.operation.Error = "SourceSE %s is banned for reading" % sourceSE
                self.log.info(self.operation.Error)
                return S_OK(self.operation.Error)

        # # check targetSEs for write
        bannedTargets = self.checkSEsRSS()
        if not bannedTargets['OK']:
            gMonitor.addMark("ReplicateAndRegisterAtt", len(self.operation))
            gMonitor.addMark("ReplicateFail", len(self.operation))
            return bannedTargets

        if bannedTargets['Value']:
            self.operation.Error = "%s targets are banned for writing" % ",".join(
                bannedTargets['Value'])
            return S_OK(self.operation.Error)

        # Can continue now
        self.log.verbose("No targets banned for writing")

        waitingFiles = self.getWaitingFilesList()
        if not waitingFiles:
            return S_OK()
        # # loop over files
        if fromFTS:
            self.log.info(
                "Trying transfer using replica manager as FTS failed")
        else:
            self.log.info("Transferring files using Data manager...")
        errors = defaultdict(int)
        delayExecution = 0
        for opFile in waitingFiles:
            if opFile.Error in (
                    "Couldn't get metadata",
                    "File doesn't exist",
                    'No active replica found',
                    "All replicas have a bad checksum",
            ):
                err = "File already in error status"
                errors[err] += 1

            gMonitor.addMark("ReplicateAndRegisterAtt", 1)
            opFile.Error = ''
            lfn = opFile.LFN

            # Check if replica is at the specified source
            replicas = self._filterReplicas(opFile)
            if not replicas["OK"]:
                self.log.error('Failed to check replicas', replicas["Message"])
                continue
            replicas = replicas["Value"]
            validReplicas = replicas.get("Valid")
            noMetaReplicas = replicas.get("NoMetadata")
            noReplicas = replicas.get('NoReplicas')
            badReplicas = replicas.get('Bad')
            noActiveReplicas = replicas.get('NoActiveReplicas')

            if not validReplicas:
                gMonitor.addMark("ReplicateFail")
                if noMetaReplicas:
                    err = "Couldn't get metadata"
                    errors[err] += 1
                    self.log.verbose(
                        "unable to replicate '%s', couldn't get metadata at %s"
                        % (opFile.LFN, ','.join(noMetaReplicas)))
                    opFile.Error = err
                elif noReplicas:
                    err = "File doesn't exist"
                    errors[err] += 1
                    self.log.verbose(
                        "Unable to replicate", "File %s doesn't exist at %s" %
                        (opFile.LFN, ','.join(noReplicas)))
                    opFile.Error = err
                    opFile.Status = 'Failed'
                elif badReplicas:
                    err = "All replicas have a bad checksum"
                    errors[err] += 1
                    self.log.error(
                        "Unable to replicate",
                        "%s, all replicas have a bad checksum at %s" %
                        (opFile.LFN, ','.join(badReplicas)))
                    opFile.Error = err
                    opFile.Status = 'Failed'
                elif noActiveReplicas:
                    err = "No active replica found"
                    errors[err] += 1
                    self.log.verbose(
                        "Unable to schedule transfer", "%s, %s at %s" %
                        (opFile.LFN, err, ','.join(noActiveReplicas)))
                    opFile.Error = err
                    # All source SEs are banned, delay execution by 1 hour
                    delayExecution = 60
                continue
            # # get the first one in the list
            if sourceSE not in validReplicas:
                if sourceSE:
                    err = "File not at specified source"
                    errors[err] += 1
                    self.log.warn(
                        "%s is not at specified sourceSE %s, changed to %s" %
                        (lfn, sourceSE, validReplicas[0]))
                sourceSE = validReplicas[0]

            # # loop over targetSE
            catalogs = self.operation.Catalog
            if catalogs:
                catalogs = [cat.strip() for cat in catalogs.split(',')]

            for targetSE in self.operation.targetSEList:

                # # call DataManager
                if targetSE in validReplicas:
                    self.log.warn(
                        "Request to replicate %s to an existing location: %s" %
                        (lfn, targetSE))
                    continue
                res = self.dm.replicateAndRegister(lfn,
                                                   targetSE,
                                                   sourceSE=sourceSE,
                                                   catalog=catalogs)
                if res["OK"]:

                    if lfn in res["Value"]["Successful"]:

                        if "replicate" in res["Value"]["Successful"][lfn]:

                            repTime = res["Value"]["Successful"][lfn][
                                "replicate"]
                            prString = "file %s replicated at %s in %s s." % (
                                lfn, targetSE, repTime)

                            gMonitor.addMark("ReplicateOK", 1)

                            if "register" in res["Value"]["Successful"][lfn]:

                                gMonitor.addMark("RegisterOK", 1)
                                regTime = res["Value"]["Successful"][lfn][
                                    "register"]
                                prString += ' and registered in %s s.' % regTime
                                self.log.info(prString)
                            else:

                                gMonitor.addMark("RegisterFail", 1)
                                prString += " but failed to register"
                                self.log.warn(prString)

                                opFile.Error = "Failed to register"
                                # # add register replica operation
                                registerOperation = self.getRegisterOperation(
                                    opFile, targetSE, type='RegisterReplica')
                                self.request.insertAfter(
                                    registerOperation, self.operation)

                        else:

                            self.log.error("Failed to replicate",
                                           "%s to %s" % (lfn, targetSE))
                            gMonitor.addMark("ReplicateFail", 1)
                            opFile.Error = "Failed to replicate"

                    else:

                        gMonitor.addMark("ReplicateFail", 1)
                        reason = res["Value"]["Failed"][lfn]
                        self.log.error("Failed to replicate and register",
                                       "File %s at %s:" % (lfn, targetSE),
                                       reason)
                        opFile.Error = reason

                else:

                    gMonitor.addMark("ReplicateFail", 1)
                    opFile.Error = "DataManager error: %s" % res["Message"]
                    self.log.error("DataManager error", res["Message"])

            if not opFile.Error:
                if len(self.operation.targetSEList) > 1:
                    self.log.info(
                        "file %s has been replicated to all targetSEs" % lfn)
                opFile.Status = "Done"
        # Log error counts
        if delayExecution:
            self.log.info("Delay execution of the request by %d minutes" %
                          delayExecution)
            self.request.delayNextExecution(delayExecution)
        for error, count in errors.iteritems():
            self.log.error(error, 'for %d files' % count)

        return S_OK()
Example #33
0
class DataIntegrityClient( Client ):

  """  
  The following methods are supported in the service but are not mentioned explicitly here:

          getProblematic()
             Obtains a problematic file from the IntegrityDB based on the LastUpdate time

          getPrognosisProblematics(prognosis)
            Obtains all the problematics of a particular prognosis from the integrityDB

          getProblematicsSummary()
            Obtains a count of the number of problematics for each prognosis found

          getDistinctPrognosis()
            Obtains the distinct prognosis found in the integrityDB

          getTransformationProblematics(prodID)
            Obtains the problematics for a given production

          incrementProblematicRetry(fileID)
            Increments the retry count for the supplied file ID

          changeProblematicPrognosis(fileID,newPrognosis)
            Changes the prognosis of the supplied file to the new prognosis

          setProblematicStatus(fileID,status)
            Updates the status of a problematic in the integrityDB

          removeProblematic(self,fileID)
            This removes the specified file ID from the integrity DB

          insertProblematic(sourceComponent,fileMetadata)
            Inserts file with supplied metadata into the integrity DB
 
  """

  def __init__( self, **kwargs ):

    Client.__init__( self, **kwargs )
    self.setServer( 'DataManagement/DataIntegrity' )
    self.dm = DataManager()
    self.fc = FileCatalog()

  ##########################################################################
  #
  # This section contains the specific methods for LFC->SE checks
  #

  def catalogDirectoryToSE( self, lfnDir ):
    """ This obtains the replica and metadata information from the catalog for the supplied directory and checks against the storage elements.
    """
    gLogger.info( "-" * 40 )
    gLogger.info( "Performing the LFC->SE check" )
    gLogger.info( "-" * 40 )
    if type( lfnDir ) in types.StringTypes:
      lfnDir = [lfnDir]
    res = self.__getCatalogDirectoryContents( lfnDir )
    if not res['OK']:
      return res
    replicas = res['Value']['Replicas']
    catalogMetadata = res['Value']['Metadata']
    res = self.__checkPhysicalFiles( replicas, catalogMetadata )
    if not res['OK']:
      return res
    resDict = {'CatalogMetadata':catalogMetadata, 'CatalogReplicas':replicas}
    return S_OK( resDict )

  def catalogFileToSE( self, lfns ):
    """ This obtains the replica and metadata information from the catalog and checks against the storage elements.
    """
    gLogger.info( "-" * 40 )
    gLogger.info( "Performing the LFC->SE check" )
    gLogger.info( "-" * 40 )
    if type( lfns ) in types.StringTypes:
      lfns = [lfns]
    res = self.__getCatalogMetadata( lfns )
    if not res['OK']:
      return res
    catalogMetadata = res['Value']
    res = self.__getCatalogReplicas( catalogMetadata.keys() )
    if not res['OK']:
      return res
    replicas = res['Value']
    res = self.__checkPhysicalFiles( replicas, catalogMetadata )
    if not res['OK']:
      return res
    resDict = {'CatalogMetadata':catalogMetadata, 'CatalogReplicas':replicas}
    return S_OK( resDict )

  def checkPhysicalFiles( self, replicas, catalogMetadata, ses = [] ):
    """ This obtains takes the supplied replica and metadata information obtained from the catalog and checks against the storage elements.
    """
    gLogger.info( "-" * 40 )
    gLogger.info( "Performing the LFC->SE check" )
    gLogger.info( "-" * 40 )
    return self.__checkPhysicalFiles( replicas, catalogMetadata, ses = ses )

  def __checkPhysicalFiles( self, replicas, catalogMetadata, ses = [] ):
    """ This obtains the physical file metadata and checks the metadata against the catalog entries
    """
    sePfns = {}
    pfnLfns = {}
    for lfn, replicaDict in replicas.items():
      for se, pfn in replicaDict.items():
        if ( ses ) and ( se not in ses ):
          continue
        if not sePfns.has_key( se ):
          sePfns[se] = []
        sePfns[se].append( pfn )
        pfnLfns[pfn] = lfn
    gLogger.info( '%s %s' % ( 'Storage Element'.ljust( 20 ), 'Replicas'.rjust( 20 ) ) )
    for site in sortList( sePfns.keys() ):
      files = len( sePfns[site] )
      gLogger.info( '%s %s' % ( site.ljust( 20 ), str( files ).rjust( 20 ) ) )

    for se in sortList( sePfns.keys() ):
      pfns = sePfns[se]
      pfnDict = {}
      for pfn in pfns:
        pfnDict[pfn] = pfnLfns[pfn]
      sizeMismatch = []
      res = self.__checkPhysicalFileMetadata( pfnDict, se )
      if not res['OK']:
        gLogger.error( 'Failed to get physical file metadata.', res['Message'] )
        return res
      for pfn, metadata in res['Value'].items():
        if catalogMetadata.has_key( pfnLfns[pfn] ):
          if ( metadata['Size'] != catalogMetadata[pfnLfns[pfn]]['Size'] ) and ( metadata['Size'] != 0 ):
            sizeMismatch.append( ( pfnLfns[pfn], pfn, se, 'CatalogPFNSizeMismatch' ) )
      if sizeMismatch:
        self.__reportProblematicReplicas( sizeMismatch, se, 'CatalogPFNSizeMismatch' )
    return S_OK()

  def __checkPhysicalFileMetadata( self, pfnLfns, se ):
    """ Check obtain the physical file metadata and check the files are available
    """
    gLogger.info( 'Checking the integrity of %s physical files at %s' % ( len( pfnLfns ), se ) )


    res = StorageElement( se ).getFileMetadata( pfnLfns.keys() )

    if not res['OK']:
      gLogger.error( 'Failed to get metadata for pfns.', res['Message'] )
      return res
    pfnMetadataDict = res['Value']['Successful']
    # If the replicas are completely missing
    missingReplicas = []
    for pfn, reason in res['Value']['Failed'].items():
      if re.search( 'File does not exist', reason ):
        missingReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNMissing' ) )
    if missingReplicas:
      self.__reportProblematicReplicas( missingReplicas, se, 'PFNMissing' )
    lostReplicas = []
    unavailableReplicas = []
    zeroSizeReplicas = []
    # If the files are not accessible
    for pfn, pfnMetadata in pfnMetadataDict.items():
      if pfnMetadata['Lost']:
        lostReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNLost' ) )
      if pfnMetadata['Unavailable']:
        unavailableReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNUnavailable' ) )
      if pfnMetadata['Size'] == 0:
        zeroSizeReplicas.append( ( pfnLfns[pfn], pfn, se, 'PFNZeroSize' ) )
    if lostReplicas:
      self.__reportProblematicReplicas( lostReplicas, se, 'PFNLost' )
    if unavailableReplicas:
      self.__reportProblematicReplicas( unavailableReplicas, se, 'PFNUnavailable' )
    if zeroSizeReplicas:
      self.__reportProblematicReplicas( zeroSizeReplicas, se, 'PFNZeroSize' )
    gLogger.info( 'Checking the integrity of physical files at %s complete' % se )
    return S_OK( pfnMetadataDict )

  ##########################################################################
  #
  # This section contains the specific methods for SE->LFC checks
  #

  def storageDirectoryToCatalog( self, lfnDir, storageElement ):
    """ This obtains the file found on the storage element in the supplied directories and determines whether they exist in the catalog and checks their metadata elements
    """
    gLogger.info( "-" * 40 )
    gLogger.info( "Performing the SE->LFC check at %s" % storageElement )
    gLogger.info( "-" * 40 )
    if type( lfnDir ) in types.StringTypes:
      lfnDir = [lfnDir]
    res = self.__getStorageDirectoryContents( lfnDir, storageElement )
    if not res['OK']:
      return res
    storageFileMetadata = res['Value']
    if storageFileMetadata:
      return self.__checkCatalogForSEFiles( storageFileMetadata, storageElement )
    return S_OK( {'CatalogMetadata':{}, 'StorageMetadata':{}} )

  def __checkCatalogForSEFiles( self, storageMetadata, storageElement ):
    gLogger.info( 'Checking %s storage files exist in the catalog' % len( storageMetadata ) )

    # RF_NOTE : this comment is completely wrong
    # First get all the PFNs as they should be registered in the catalog
    res = StorageElement( storageElement ).getPfnForProtocol( storageMetadata.keys(), withPort = False )
    if not res['OK']:
      gLogger.error( "Failed to get registered PFNs for physical files", res['Message'] )
      return res
    for pfn, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to obtain registered PFNs from physical file' )
    for original, registered in res['Value']['Successful'].items():
      storageMetadata[registered] = storageMetadata.pop( original )
    # Determine whether these PFNs are registered and if so obtain the LFN
    res = self.fc.getLFNForPFN( storageMetadata.keys() )
    if not res['OK']:
      gLogger.error( "Failed to get registered LFNs for PFNs", res['Message'] )
      return res
    failedPfns = res['Value']['Failed']
    notRegisteredPfns = []
    for pfn, error in failedPfns.items():
      if re.search( 'No such file or directory', error ):
        notRegisteredPfns.append( ( storageMetadata[pfn]['LFN'], pfn, storageElement, 'PFNNotRegistered' ) )
        failedPfns.pop( pfn )
    if notRegisteredPfns:
      self.__reportProblematicReplicas( notRegisteredPfns, storageElement, 'PFNNotRegistered' )
    if failedPfns:
      return S_ERROR( 'Failed to obtain LFNs for PFNs' )
    pfnLfns = res['Value']['Successful']
    for pfn in storageMetadata.keys():
      pfnMetadata = storageMetadata.pop( pfn )
      if pfn in pfnLfns.keys():
        lfn = pfnLfns[pfn]
        storageMetadata[lfn] = pfnMetadata
        storageMetadata[lfn]['PFN'] = pfn
    # For the LFNs found to be registered obtain the file metadata from the catalog and verify against the storage metadata
    res = self.__getCatalogMetadata( storageMetadata.keys() )
    if not res['OK']:
      return res
    catalogMetadata = res['Value']
    sizeMismatch = []
    for lfn, lfnCatalogMetadata in catalogMetadata.items():
      lfnStorageMetadata = storageMetadata[lfn]
      if ( lfnStorageMetadata['Size'] != lfnCatalogMetadata['Size'] ) and ( lfnStorageMetadata['Size'] != 0 ):
        sizeMismatch.append( ( lfn, storageMetadata[lfn]['PFN'], storageElement, 'CatalogPFNSizeMismatch' ) )
    if sizeMismatch:
      self.__reportProblematicReplicas( sizeMismatch, storageElement, 'CatalogPFNSizeMismatch' )
    gLogger.info( 'Checking storage files exist in the catalog complete' )
    resDict = {'CatalogMetadata':catalogMetadata, 'StorageMetadata':storageMetadata}
    return S_OK( resDict )

  def getStorageDirectoryContents( self, lfnDir, storageElement ):
    """ This obtains takes the supplied lfn directories and recursively obtains the files in the supplied storage element
    """
    return self.__getStorageDirectoryContents( lfnDir, storageElement )

  def __getStorageDirectoryContents( self, lfnDir, storageElement ):
    """ Obtians the contents of the supplied directory on the storage
    """
    gLogger.info( 'Obtaining the contents for %s directories at %s' % ( len( lfnDir ), storageElement ) )

    se = StorageElement( storageElement )
    res = se.getPfnForLfn( lfnDir )

    if not res['OK']:
      gLogger.error( "Failed to get PFNs for directories", res['Message'] )
      return res
    for directory, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain directory PFN from LFNs', '%s %s' % ( directory, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to obtain directory PFN from LFNs' )
    storageDirectories = res['Value']['Successful'].values()
    res = se.exists( storageDirectories )
    if not res['OK']:
      gLogger.error( "Failed to obtain existance of directories", res['Message'] )
      return res
    for directory, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to determine existance of directory', '%s %s' % ( directory, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to determine existance of directory' )
    directoryExists = res['Value']['Successful']
    activeDirs = []
    for directory in sortList( directoryExists.keys() ):
      exists = directoryExists[directory]
      if exists:
        activeDirs.append( directory )
    allFiles = {}
    while len( activeDirs ) > 0:
      currentDir = activeDirs[0]
      res = se.listDirectory( currentDir )
      activeDirs.remove( currentDir )
      if not res['OK']:
        gLogger.error( 'Failed to get directory contents', res['Message'] )
        return res
      elif res['Value']['Failed'].has_key( currentDir ):
        gLogger.error( 'Failed to get directory contents', '%s %s' % ( currentDir, res['Value']['Failed'][currentDir] ) )
        return S_ERROR( res['Value']['Failed'][currentDir] )
      else:
        dirContents = res['Value']['Successful'][currentDir]
        activeDirs.extend( dirContents['SubDirs'] )
        fileMetadata = dirContents['Files']

        # RF_NOTE This ugly trick is needed because se.getPfnPath does not follow the Successful/Failed convention
#         res = { "Successful" : {}, "Failed" : {} }
#         for pfn in fileMetadata:
#           inRes = se.getPfnPath( pfn )
#           if inRes["OK"]:
#             res["Successful"][pfn] = inRes["Value"]
#           else:
#             res["Failed"][pfn] = inRes["Message"]
        res = se.getLfnForPfn( fileMetadata.keys() )
        if not res['OK']:
          gLogger.error( 'Failed to get directory content LFNs', res['Message'] )
          return res

        for pfn, error in res['Value']['Failed'].items():
          gLogger.error( "Failed to get LFN for PFN", "%s %s" % ( pfn, error ) )
        if res['Value']['Failed']:
          return S_ERROR( "Failed to get LFNs for PFNs" )
        pfnLfns = res['Value']['Successful']
        for pfn, lfn in pfnLfns.items():
          fileMetadata[pfn]['LFN'] = lfn
        allFiles.update( fileMetadata )
    zeroSizeFiles = []
    lostFiles = []
    unavailableFiles = []
    for pfn in sortList( allFiles.keys() ):
      if os.path.basename( pfn ) == 'dirac_directory':
        allFiles.pop( pfn )
      else:
        metadata = allFiles[pfn]
        if metadata['Size'] == 0:
          zeroSizeFiles.append( ( metadata['LFN'], pfn, storageElement, 'PFNZeroSize' ) )
        # if metadata['Lost']:
        #  lostFiles.append((metadata['LFN'],pfn,storageElement,'PFNLost'))
        # if metadata['Unavailable']:
        #  unavailableFiles.append((metadata['LFN'],pfn,storageElement,'PFNUnavailable'))
    if zeroSizeFiles:
      self.__reportProblematicReplicas( zeroSizeFiles, storageElement, 'PFNZeroSize' )
    if lostFiles:
      self.__reportProblematicReplicas( lostFiles, storageElement, 'PFNLost' )
    if unavailableFiles:
      self.__reportProblematicReplicas( unavailableFiles, storageElement, 'PFNUnavailable' )
    gLogger.info( 'Obtained at total of %s files for directories at %s' % ( len( allFiles ), storageElement ) )
    return S_OK( allFiles )

  def __getStoragePathExists( self, lfnPaths, storageElement ):
    gLogger.info( 'Determining the existance of %d files at %s' % ( len( lfnPaths ), storageElement ) )

    se = StorageElement( storageElement )
    res = se.getPfnForLfn( lfnPaths )
    if not res['OK']:
      gLogger.error( "Failed to get PFNs for LFNs", res['Message'] )
      return res
    for lfnPath, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain PFN from LFN', '%s %s' % ( lfnPath, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to obtain PFNs from LFNs' )
    lfnPfns = res['Value']['Successful']
    pfnLfns = {}
    for lfn, pfn in lfnPfns.items():
      pfnLfns[pfn] = lfn

    res = se.exists( pfnLfns )
    if not res['OK']:
      gLogger.error( "Failed to obtain existance of paths", res['Message'] )
      return res
    for lfnPath, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to determine existance of path', '%s %s' % ( lfnPath, error ) )
    if res['Value']['Failed']:
      return S_ERROR( 'Failed to determine existance of paths' )
    pathExists = res['Value']['Successful']
    resDict = {}
    for pfn, exists in pathExists.items():
      if exists:
        resDict[pfnLfns[pfn]] = pfn
    return S_OK( resDict )

  ##########################################################################
  #
  # This section contains the specific methods for obtaining replica and metadata information from the catalog
  #

  def __getCatalogDirectoryContents( self, lfnDir ):
    """ Obtain the contents of the supplied directory
    """
    gLogger.info( 'Obtaining the catalog contents for %s directories' % len( lfnDir ) )

    activeDirs = lfnDir
    allFiles = {}
    while len( activeDirs ) > 0:
      currentDir = activeDirs[0]
      res = self.fc.listDirectory( currentDir )
      activeDirs.remove( currentDir )
      if not res['OK']:
        gLogger.error( 'Failed to get directory contents', res['Message'] )
        return res
      elif res['Value']['Failed'].has_key( currentDir ):
        gLogger.error( 'Failed to get directory contents', '%s %s' % ( currentDir, res['Value']['Failed'][currentDir] ) )
      else:
        dirContents = res['Value']['Successful'][currentDir]
        activeDirs.extend( dirContents['SubDirs'] )
        allFiles.update( dirContents['Files'] )

    zeroReplicaFiles = []
    zeroSizeFiles = []
    allReplicaDict = {}
    allMetadataDict = {}
    for lfn, lfnDict in allFiles.items():
      lfnReplicas = {}
      for se, replicaDict in lfnDict['Replicas'].items():
        lfnReplicas[se] = replicaDict['PFN']
      if not lfnReplicas:
        zeroReplicaFiles.append( lfn )
      allReplicaDict[lfn] = lfnReplicas
      allMetadataDict[lfn] = lfnDict['MetaData']
      if lfnDict['MetaData']['Size'] == 0:
        zeroSizeFiles.append( lfn )
    if zeroReplicaFiles:
      self.__reportProblematicFiles( zeroReplicaFiles, 'LFNZeroReplicas' )
    if zeroSizeFiles:
      self.__reportProblematicFiles( zeroSizeFiles, 'LFNZeroSize' )
    gLogger.info( 'Obtained at total of %s files for the supplied directories' % len( allMetadataDict ) )
    resDict = {'Metadata':allMetadataDict, 'Replicas':allReplicaDict}
    return S_OK( resDict )

  def __getCatalogReplicas( self, lfns ):
    """ Obtain the file replicas from the catalog while checking that there are replicas
    """
    gLogger.info( 'Obtaining the replicas for %s files' % len( lfns ) )

    zeroReplicaFiles = []
    res = self.fc.getReplicas( lfns, allStatus = True )
    if not res['OK']:
      gLogger.error( 'Failed to get catalog replicas', res['Message'] )
      return res
    allReplicas = res['Value']['Successful']
    for lfn, error in res['Value']['Failed'].items():
      if re.search( 'File has zero replicas', error ):
        zeroReplicaFiles.append( lfn )
    if zeroReplicaFiles:
      self.__reportProblematicFiles( zeroReplicaFiles, 'LFNZeroReplicas' )
    gLogger.info( 'Obtaining the replicas for files complete' )
    return S_OK( allReplicas )

  def __getCatalogMetadata( self, lfns ):
    """ Obtain the file metadata from the catalog while checking they exist
    """
    if not lfns:
      return S_OK( {} )
    gLogger.info( 'Obtaining the catalog metadata for %s files' % len( lfns ) )

    missingCatalogFiles = []
    zeroSizeFiles = []
    res = self.fc.getFileMetadata( lfns )
    if not res['OK']:
      gLogger.error( 'Failed to get catalog metadata', res['Message'] )
      return res
    allMetadata = res['Value']['Successful']
    for lfn, error in res['Value']['Failed'].items():
      if re.search( 'No such file or directory', error ):
        missingCatalogFiles.append( lfn )
    if missingCatalogFiles:
      self.__reportProblematicFiles( missingCatalogFiles, 'LFNCatalogMissing' )
    for lfn, metadata in allMetadata.items():
      if metadata['Size'] == 0:
        zeroSizeFiles.append( lfn )
    if zeroSizeFiles:
      self.__reportProblematicFiles( zeroSizeFiles, 'LFNZeroSize' )
    gLogger.info( 'Obtaining the catalog metadata complete' )
    return S_OK( allMetadata )

  ##########################################################################
  #
  # This section contains the methods for inserting problematic files into the integrity DB
  #

  def __reportProblematicFiles( self, lfns, reason ):
    """ Simple wrapper function around setFileProblematic """
    gLogger.info( 'The following %s files were found with %s' % ( len( lfns ), reason ) )
    for lfn in sortList( lfns ):
      gLogger.info( lfn )
    res = self.setFileProblematic( lfns, reason, sourceComponent = 'DataIntegrityClient' )
    if not res['OK']:
      gLogger.info( 'Failed to update integrity DB with files', res['Message'] )
    else:
      gLogger.info( 'Successfully updated integrity DB with files' )

  def setFileProblematic( self, lfn, reason, sourceComponent = '' ):
    """ This method updates the status of the file in the FileCatalog and the IntegrityDB

        lfn - the lfn of the file
        reason - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
    if type( lfn ) == types.ListType:
      lfns = lfn
    elif type( lfn ) == types.StringType:
      lfns = [lfn]
    else:
      errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN."
      gLogger.error( errStr )
      return S_ERROR( errStr )
    gLogger.info( "DataIntegrityClient.setFileProblematic: Attempting to update %s files." % len( lfns ) )
    fileMetadata = {}
    for lfn in lfns:
      fileMetadata[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':'', 'SE':''}
    res = self.insertProblematic( sourceComponent, fileMetadata )
    if not res['OK']:
      gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB" )
    return res

  def __reportProblematicReplicas( self, replicaTuple, se, reason ):
    """ Simple wrapper function around setReplicaProblematic """
    gLogger.info( 'The following %s files had %s at %s' % ( len( replicaTuple ), reason, se ) )
    for lfn, pfn, se, reason in sortList( replicaTuple ):
      if lfn:
        gLogger.info( lfn )
      else:
        gLogger.info( pfn )
    res = self.setReplicaProblematic( replicaTuple, sourceComponent = 'DataIntegrityClient' )
    if not res['OK']:
      gLogger.info( 'Failed to update integrity DB with replicas', res['Message'] )
    else:
      gLogger.info( 'Successfully updated integrity DB with replicas' )

  def setReplicaProblematic( self, replicaTuple, sourceComponent = '' ):
    """ This method updates the status of the replica in the FileCatalog and the IntegrityDB
        The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis}

        lfn - the lfn of the file
        pfn - the pfn if available (otherwise '')
        se - the storage element of the problematic replica (otherwise '')
        prognosis - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
    if type( replicaTuple ) == types.TupleType:
      replicaTuple = [replicaTuple]
    elif type( replicaTuple ) == types.ListType:
      pass
    else:
      errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples."
      gLogger.error( errStr )
      return S_ERROR( errStr )
    gLogger.info( "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas." % len( replicaTuple ) )
    replicaDict = {}
    for lfn, pfn, se, reason in replicaTuple:
      replicaDict[lfn] = {'Prognosis':reason, 'LFN':lfn, 'PFN':pfn, 'SE':se}
    res = self.insertProblematic( sourceComponent, replicaDict )
    if not res['OK']:
      gLogger.error( "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB" )
      return res
    for lfn in replicaDict.keys():
      replicaDict[lfn]['Status'] = 'Problematic'

    res = self.fc.setReplicaStatus( replicaDict )
    if not res['OK']:
      errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas."
      gLogger.error( errStr, res['Message'] )
      return res
    failed = res['Value']['Failed']
    successful = res['Value']['Successful']
    resDict = {'Successful':successful, 'Failed':failed}
    return S_OK( resDict )

  ##########################################################################
  #
  # This section contains the resolution methods for various prognoses
  #

  def __updateCompletedFiles( self, prognosis, fileID ):
    gLogger.info( "%s file (%d) is resolved" % ( prognosis, fileID ) )
    return self.setProblematicStatus( fileID, 'Resolved' )

  def __returnProblematicError( self, fileID, res ):
    self.incrementProblematicRetry( fileID )
    gLogger.error( res['Message'] )
    return res

  def __getRegisteredPFNLFN( self, pfn, storageElement ):

    res = StorageElement( storageElement ).getPfnForProtocol( pfn, withPort = False )
    if not res['OK']:
      gLogger.error( "Failed to get registered PFN for physical files", res['Message'] )
      return res
    for pfn, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) )
      return S_ERROR( 'Failed to obtain registered PFNs from physical file' )
    registeredPFN = res['Value']['Successful'][pfn]
    res = Utils.executeSingleFileOrDirWrapper( self.fc.getLFNForPFN( registeredPFN ) )
    if ( not res['OK'] ) and re.search( 'No such file or directory', res['Message'] ):
      return S_OK( False )
    return S_OK( res['Value'] )

  def __updateReplicaToChecked( self, problematicDict ):
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']
    prognosis = problematicDict['Prognosis']
    problematicDict['Status'] = 'Checked'

    res = Utils.executeSingleFileOrDirWrapper( self.fc.setReplicaStatus( {lfn:problematicDict} ) )

    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    gLogger.info( "%s replica (%d) is updated to Checked status" % ( prognosis, fileID ) )
    return self.__updateCompletedFiles( prognosis, fileID )

  def resolveCatalogPFNSizeMismatch( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis
    """
    lfn = problematicDict['LFN']
    pfn = problematicDict['PFN']
    se = problematicDict['SE']
    fileID = problematicDict['FileID']


    res = Utils.executeSingleFileOrDirWrapper( self.fc.getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    catalogSize = res['Value']
    res = Utils.executeSingleFileOrDirWrapper( StorageElement( se ).getFileSize( pfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    storageSize = res['Value']
    bkKCatalog = FileCatalog( ['BookkeepingDB'] )
    res = Utils.executeSingleFileOrDirWrapper( bkKCatalog.getFileSize( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    bookkeepingSize = res['Value']
    if bookkeepingSize == catalogSize == storageSize:
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) matched all registered sizes." % fileID )
      return self.__updateReplicaToChecked( problematicDict )
    if ( catalogSize == bookkeepingSize ):
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also" % fileID )
      res = Utils.executeSingleFileOrDirWrapper( self.fc.getReplicas( lfn ) )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      if len( res['Value'] ) <= 1:
        gLogger.info( "CatalogPFNSizeMismatch replica (%d) has no other replicas." % fileID )
        return S_ERROR( "Not removing catalog file mismatch since the only replica" )
      else:
        gLogger.info( "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..." % fileID )
        res = self.dm.removeReplica( se, lfn )
        if not res['OK']:
          return self.__returnProblematicError( fileID, res )
        return self.__updateCompletedFiles( 'CatalogPFNSizeMismatch', fileID )
    if ( catalogSize != bookkeepingSize ) and ( bookkeepingSize == storageSize ):
      gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size" % fileID )
      res = self.__updateReplicaToChecked( problematicDict )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      return self.changeProblematicPrognosis( fileID, 'BKCatalogSizeMismatch' )
    gLogger.info( "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count" % fileID )
    return self.incrementProblematicRetry( fileID )

  def resolvePFNNotRegistered( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNNotRegistered prognosis
    """
    lfn = problematicDict['LFN']
    pfn = problematicDict['PFN']
    seName = problematicDict['SE']
    fileID = problematicDict['FileID']

    se = StorageElement( seName )
    res = Utils.executeSingleFileOrDirWrapper( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if not res['Value']:
      # The file does not exist in the catalog
      res = Utils.executeSingleFileOrDirWrapper( se.removeFile( pfn ) )
      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )
    res = Utils.executeSingleFileOrDirWrapper( se.getFileMetadata( pfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      gLogger.info( "PFNNotRegistered replica (%d) found to be missing." % fileID )
      return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )
    elif not res['OK']:
      return self.__returnProblematicError( fileID, res )
    storageMetadata = res['Value']
    if storageMetadata['Lost']:
      gLogger.info( "PFNNotRegistered replica (%d) found to be Lost. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNLost' )
    if storageMetadata['Unavailable']:
      gLogger.info( "PFNNotRegistered replica (%d) found to be Unavailable. Updating retry count" % fileID )
      return self.incrementProblematicRetry( fileID )

    # HACK until we can obtain the space token descriptions through GFAL
    site = seName.split( '_' )[0].split( '-' )[0]
    if not storageMetadata['Cached']:
      if lfn.endswith( '.raw' ):
        seName = '%s-RAW' % site
      else:
        seName = '%s-RDST' % site
    elif storageMetadata['Migrated']:
      if lfn.startswith( '/lhcb/data' ):
        seName = '%s_M-DST' % site
      else:
        seName = '%s_MC_M-DST' % site
    else:
      if lfn.startswith( '/lhcb/data' ):
        seName = '%s-DST' % site
      else:
        seName = '%s_MC-DST' % site

    problematicDict['SE'] = seName
    res = se.getPfnForProtocol( pfn, withPort = False )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    for pfn, error in res['Value']['Failed'].items():
      gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) )
      return S_ERROR( 'Failed to obtain registered PFNs from physical file' )
    problematicDict['PFN'] = res['Value']['Successful'][pfn]

    res = Utils.executeSingleFileOrDirWrapper( self.fc.addReplica( {lfn:problematicDict} ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    res = Utils.executeSingleFileOrDirWrapper( self.fc.getFileMetadata( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']['Size'] != storageMetadata['Size']:
      gLogger.info( "PFNNotRegistered replica (%d) found with catalog size mismatch. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'CatalogPFNSizeMismatch' )
    return self.__updateCompletedFiles( 'PFNNotRegistered', fileID )

  def resolveLFNCatalogMissing( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the LFNCatalogMissing prognosis
    """
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = Utils.executeSingleFileOrDirWrapper( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']:
      return self.__updateCompletedFiles( 'LFNCatalogMissing', fileID )
    # Remove the file from all catalogs
    # RF_NOTE : here I can do it because it's a single file, but otherwise I would need to sort the path
    res = Utils.executeSingleFileOrDirWrapper( self.fc.removeFile( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    return self.__updateCompletedFiles( 'LFNCatalogMissing', fileID )

  def resolvePFNMissing( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNMissing prognosis
    """
    pfn = problematicDict['PFN']
    se = problematicDict['SE']
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = Utils.executeSingleFileOrDirWrapper( self.fc.exists( lfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if not res['Value']:
      gLogger.info( "PFNMissing file (%d) no longer exists in catalog" % fileID )
      return self.__updateCompletedFiles( 'PFNMissing', fileID )

    res = Utils.executeSingleFileOrDirWrapper( StorageElement( se ).exists( pfn ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if res['Value']:
      gLogger.info( "PFNMissing replica (%d) is no longer missing" % fileID )
      return self.__updateReplicaToChecked( problematicDict )
    gLogger.info( "PFNMissing replica (%d) does not exist" % fileID )
    res = Utils.executeSingleFileOrDirWrapper( self.fc.getReplicas( lfn, allStatus = True ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    replicas = res['Value']
    seSite = se.split( '_' )[0].split( '-' )[0]
    found = False
    print replicas
    for replicaSE in replicas.keys():
      if re.search( seSite, replicaSE ):
        found = True
        problematicDict['SE'] = replicaSE
        se = replicaSE
    if not found:
      gLogger.info( "PFNMissing replica (%d) is no longer registered at SE. Resolved." % fileID )
      return self.__updateCompletedFiles( 'PFNMissing', fileID )
    gLogger.info( "PFNMissing replica (%d) does not exist. Removing from catalog..." % fileID )
    res = Utils.executeSingleFileOrDirWrapper( self.fc.removeReplica( {lfn:problematicDict} ) )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    if len( replicas ) == 1:
      gLogger.info( "PFNMissing replica (%d) had a single replica. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'LFNZeroReplicas' )
    res = self.dm.replicateAndRegister( problematicDict['LFN'], se )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    # If we get here the problem is solved so we can update the integrityDB
    return self.__updateCompletedFiles( 'PFNMissing', fileID )

  def resolvePFNUnavailable( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNUnavailable prognosis
    """
    pfn = problematicDict['PFN']
    se = problematicDict['SE']
    fileID = problematicDict['FileID']

    res = Utils.executeSingleFileOrDirWrapper( StorageElement( se ).getFileMetadata( pfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      # The file is no longer Unavailable but has now dissapeared completely
      gLogger.info( "PFNUnavailable replica (%d) found to be missing. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )
    if ( not res['OK'] ) or res['Value']['Unavailable']:
      gLogger.info( "PFNUnavailable replica (%d) found to still be Unavailable" % fileID )
      return self.incrementProblematicRetry( fileID )
    if res['Value']['Lost']:
      gLogger.info( "PFNUnavailable replica (%d) is now found to be Lost. Updating prognosis" % fileID )
      return self.changeProblematicPrognosis( fileID, 'PFNLost' )
    gLogger.info( "PFNUnavailable replica (%d) is no longer Unavailable" % fileID )
    # Need to make the replica okay in the Catalog
    return self.__updateReplicaToChecked( problematicDict )

  def resolvePFNZeroSize( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolves the PFNZeroSize prognosis
    """
    pfn = problematicDict['PFN']
    seName = problematicDict['SE']
    fileID = problematicDict['FileID']

    se = StorageElement( seName )

    res = Utils.executeSingleFileOrDirWrapper( se.getFileSize( pfn ) )
    if ( not res['OK'] ) and ( re.search( 'File does not exist', res['Message'] ) ):
      gLogger.info( "PFNZeroSize replica (%d) found to be missing. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )
    storageSize = res['Value']
    if storageSize == 0:
      res = Utils.executeSingleFileOrDirWrapper( se.removeFile( pfn ) )

      if not res['OK']:
        return self.__returnProblematicError( fileID, res )
      gLogger.info( "PFNZeroSize replica (%d) removed. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNMissing' )
    res = self.__getRegisteredPFNLFN( pfn, seName )
    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    lfn = res['Value']
    if not lfn:
      gLogger.info( "PFNZeroSize replica (%d) not registered in catalog. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'PFNNotRegistered' )
    res = Utils.executeSingleFileOrDirWrapper( self.fc.getFileMetadata( lfn ) )

    if not res['OK']:
      return self.__returnProblematicError( fileID, res )
    catalogSize = res['Value']['Size']
    if catalogSize != storageSize:
      gLogger.info( "PFNZeroSize replica (%d) size found to differ from registered metadata. Updating prognosis" % problematicDict['FileID'] )
      return self.changeProblematicPrognosis( fileID, 'CatalogPFNSizeMismatch' )
    return self.__updateCompletedFiles( 'PFNZeroSize', fileID )

  ############################################################################################

  def resolveLFNZeroReplicas( self, problematicDict ):
    """ This takes the problematic dictionary returned by the integrity DB and resolves the LFNZeroReplicas prognosis
    """
    lfn = problematicDict['LFN']
    fileID = problematicDict['FileID']

    res = Utils.executeSingleFileOrDirWrapper( self.fc.getReplicas( lfn, allStatus = True ) )
    if res['OK'] and res['Value']:
      gLogger.info( "LFNZeroReplicas file (%d) found to have replicas" % fileID )
    else:
      gLogger.info( "LFNZeroReplicas file (%d) does not have replicas. Checking storage..." % fileID )
      pfnsFound = False
      for storageElementName in sortList( gConfig.getValue( 'Resources/StorageElementGroups/Tier1_MC_M-DST', [] ) ):
        res = self.__getStoragePathExists( [lfn], storageElementName )
        if res['Value'].has_key( lfn ):
          gLogger.info( "LFNZeroReplicas file (%d) found storage file at %s" % ( fileID, storageElementName ) )
          pfn = res['Value'][lfn]
          self.__reportProblematicReplicas( [( lfn, pfn, storageElementName, 'PFNNotRegistered' )], storageElementName, 'PFNNotRegistered' )
          pfnsFound = True
      if not pfnsFound:
        gLogger.info( "LFNZeroReplicas file (%d) did not have storage files. Removing..." % fileID )
        res = Utils.executeSingleFileOrDirWrapper( self.fc.removeFile( lfn ) )
        if not res['OK']:
          gLogger.error( res['Message'] )
          # Increment the number of retries for this file
          self.server.incrementProblematicRetry( fileID )
          return res
        gLogger.info( "LFNZeroReplicas file (%d) removed from catalog" % fileID )
    # If we get here the problem is solved so we can update the integrityDB
    return self.__updateCompletedFiles( 'LFNZeroReplicas', fileID )
Example #34
0
class DataIntegrityClient(Client):
    """
  The following methods are supported in the service but are not mentioned explicitly here:

          getProblematic()
             Obtains a problematic file from the IntegrityDB based on the LastUpdate time

          getPrognosisProblematics(prognosis)
            Obtains all the problematics of a particular prognosis from the integrityDB

          getProblematicsSummary()
            Obtains a count of the number of problematics for each prognosis found

          getDistinctPrognosis()
            Obtains the distinct prognosis found in the integrityDB

          getTransformationProblematics(prodID)
            Obtains the problematics for a given production

          incrementProblematicRetry(fileID)
            Increments the retry count for the supplied file ID

          changeProblematicPrognosis(fileID,newPrognosis)
            Changes the prognosis of the supplied file to the new prognosis

          setProblematicStatus(fileID,status)
            Updates the status of a problematic in the integrityDB

          removeProblematic(self,fileID)
            This removes the specified file ID from the integrity DB

          insertProblematic(sourceComponent,fileMetadata)
            Inserts file with supplied metadata into the integrity DB

  """
    def __init__(self, **kwargs):

        Client.__init__(self, **kwargs)
        self.setServer('DataManagement/DataIntegrity')
        self.dm = DataManager()
        self.fc = FileCatalog()

    ##########################################################################
    #
    # This section contains the specific methods for LFC->SE checks
    #

    def catalogDirectoryToSE(self, lfnDir):
        """ This obtains the replica and metadata information from the catalog for the supplied directory and checks against the storage elements.
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the LFC->SE check")
        gLogger.info("-" * 40)
        if type(lfnDir) in types.StringTypes:
            lfnDir = [lfnDir]
        res = self.__getCatalogDirectoryContents(lfnDir)
        if not res['OK']:
            return res
        replicas = res['Value']['Replicas']
        catalogMetadata = res['Value']['Metadata']
        res = self.__checkPhysicalFiles(replicas, catalogMetadata)
        if not res['OK']:
            return res
        resDict = {
            'CatalogMetadata': catalogMetadata,
            'CatalogReplicas': replicas
        }
        return S_OK(resDict)

    def catalogFileToSE(self, lfns):
        """ This obtains the replica and metadata information from the catalog and checks against the storage elements.
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the LFC->SE check")
        gLogger.info("-" * 40)
        if type(lfns) in types.StringTypes:
            lfns = [lfns]
        res = self.__getCatalogMetadata(lfns)
        if not res['OK']:
            return res
        catalogMetadata = res['Value']
        res = self.__getCatalogReplicas(catalogMetadata.keys())
        if not res['OK']:
            return res
        replicas = res['Value']
        res = self.__checkPhysicalFiles(replicas, catalogMetadata)
        if not res['OK']:
            return res
        resDict = {
            'CatalogMetadata': catalogMetadata,
            'CatalogReplicas': replicas
        }
        return S_OK(resDict)

    def checkPhysicalFiles(self, replicas, catalogMetadata, ses=[]):
        """ This obtains takes the supplied replica and metadata information obtained from the catalog and checks against the storage elements.
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the LFC->SE check")
        gLogger.info("-" * 40)
        return self.__checkPhysicalFiles(replicas, catalogMetadata, ses=ses)

    def __checkPhysicalFiles(self, replicas, catalogMetadata, ses=[]):
        """ This obtains the physical file metadata and checks the metadata against the catalog entries
    """
        seLfns = {}
        for lfn, replicaDict in replicas.items():
            for se, _url in replicaDict.items():
                if (ses) and (se not in ses):
                    continue
                seLfns.setdefault(se, []).append(lfn)
        gLogger.info('%s %s' %
                     ('Storage Element'.ljust(20), 'Replicas'.rjust(20)))

        for se in sortList(seLfns):
            files = len(seLfns[se])
            gLogger.info('%s %s' % (se.ljust(20), str(files).rjust(20)))

            lfns = seLfns[se]
            sizeMismatch = []
            res = self.__checkPhysicalFileMetadata(lfns, se)
            if not res['OK']:
                gLogger.error('Failed to get physical file metadata.',
                              res['Message'])
                return res
            for lfn, metadata in res['Value'].items():
                if lfn in catalogMetadata:
                    if (metadata['Size'] != catalogMetadata[lfn]['Size']) and (
                            metadata['Size'] != 0):
                        sizeMismatch.append((lfn, 'deprecatedUrl', se,
                                             'CatalogPFNSizeMismatch'))
            if sizeMismatch:
                self.__reportProblematicReplicas(sizeMismatch, se,
                                                 'CatalogPFNSizeMismatch')
        return S_OK()

    def __checkPhysicalFileMetadata(self, lfns, se):
        """ Check obtain the physical file metadata and check the files are available
    """
        gLogger.info('Checking the integrity of %s physical files at %s' %
                     (len(lfns), se))

        res = StorageElement(se).getFileMetadata(lfns)

        if not res['OK']:
            gLogger.error('Failed to get metadata for lfns.', res['Message'])
            return res
        lfnMetadataDict = res['Value']['Successful']
        # If the replicas are completely missing
        missingReplicas = []
        for lfn, reason in res['Value']['Failed'].items():
            if re.search('File does not exist', reason):
                missingReplicas.append(
                    (lfn, 'deprecatedUrl', se, 'PFNMissing'))
        if missingReplicas:
            self.__reportProblematicReplicas(missingReplicas, se, 'PFNMissing')
        lostReplicas = []
        unavailableReplicas = []
        zeroSizeReplicas = []
        # If the files are not accessible
        for lfn, lfnMetadata in lfnMetadataDict.items():
            if lfnMetadata['Lost']:
                lostReplicas.append((lfn, 'deprecatedUrl', se, 'PFNLost'))
            if lfnMetadata['Unavailable']:
                unavailableReplicas.append(
                    (lfn, 'deprecatedUrl', se, 'PFNUnavailable'))
            if lfnMetadata['Size'] == 0:
                zeroSizeReplicas.append(
                    (lfn, 'deprecatedUrl', se, 'PFNZeroSize'))
        if lostReplicas:
            self.__reportProblematicReplicas(lostReplicas, se, 'PFNLost')
        if unavailableReplicas:
            self.__reportProblematicReplicas(unavailableReplicas, se,
                                             'PFNUnavailable')
        if zeroSizeReplicas:
            self.__reportProblematicReplicas(zeroSizeReplicas, se,
                                             'PFNZeroSize')
        gLogger.info(
            'Checking the integrity of physical files at %s complete' % se)
        return S_OK(lfnMetadataDict)

    ##########################################################################
    #
    # This section contains the specific methods for SE->LFC checks
    #

    def storageDirectoryToCatalog(self, lfnDir, storageElement):
        """ This obtains the file found on the storage element in the supplied directories and determines whether they exist in the catalog and checks their metadata elements
    """
        gLogger.info("-" * 40)
        gLogger.info("Performing the SE->LFC check at %s" % storageElement)
        gLogger.info("-" * 40)
        if type(lfnDir) in types.StringTypes:
            lfnDir = [lfnDir]
        res = self.__getStorageDirectoryContents(lfnDir, storageElement)
        if not res['OK']:
            return res
        storageFileMetadata = res['Value']
        if storageFileMetadata:
            return self.__checkCatalogForSEFiles(storageFileMetadata,
                                                 storageElement)
        return S_OK({'CatalogMetadata': {}, 'StorageMetadata': {}})

    def __checkCatalogForSEFiles(self, storageMetadata, storageElement):
        gLogger.info('Checking %s storage files exist in the catalog' %
                     len(storageMetadata))

        res = self.fc.getReplicas(storageMetadata)
        if not res['OK']:
            gLogger.error("Failed to get replicas for LFN", res['Message'])
            return res
        failedLfns = res['Value']['Failed']
        successfulLfns = res['Value']['Successful']
        notRegisteredLfns = []

        for lfn in storageMetadata:
            if lfn in failedLfns:
                if 'No such file or directory' in failedLfns[lfn]:
                    notRegisteredLfns.append(
                        (lfn, 'deprecatedUrl', storageElement,
                         'LFNNotRegistered'))
                    failedLfns.pop(lfn)
            elif storageElement not in successfulLfns[lfn]:
                notRegisteredLfns.append(
                    (lfn, 'deprecatedUrl', storageElement, 'LFNNotRegistered'))

        if notRegisteredLfns:
            self.__reportProblematicReplicas(notRegisteredLfns, storageElement,
                                             'LFNNotRegistered')
        if failedLfns:
            return S_ERROR('Failed to obtain replicas')

        # For the LFNs found to be registered obtain the file metadata from the catalog and verify against the storage metadata
        res = self.__getCatalogMetadata(storageMetadata)
        if not res['OK']:
            return res
        catalogMetadata = res['Value']
        sizeMismatch = []
        for lfn, lfnCatalogMetadata in catalogMetadata.items():
            lfnStorageMetadata = storageMetadata[lfn]
            if (lfnStorageMetadata['Size'] != lfnCatalogMetadata['Size']) and (
                    lfnStorageMetadata['Size'] != 0):
                sizeMismatch.append((lfn, 'deprecatedUrl', storageElement,
                                     'CatalogPFNSizeMismatch'))
        if sizeMismatch:
            self.__reportProblematicReplicas(sizeMismatch, storageElement,
                                             'CatalogPFNSizeMismatch')
        gLogger.info('Checking storage files exist in the catalog complete')
        resDict = {
            'CatalogMetadata': catalogMetadata,
            'StorageMetadata': storageMetadata
        }
        return S_OK(resDict)

    def getStorageDirectoryContents(self, lfnDir, storageElement):
        """ This obtains takes the supplied lfn directories and recursively obtains the files in the supplied storage element
    """
        return self.__getStorageDirectoryContents(lfnDir, storageElement)

    def __getStorageDirectoryContents(self, lfnDir, storageElement):
        """ Obtians the contents of the supplied directory on the storage
    """
        gLogger.info('Obtaining the contents for %s directories at %s' %
                     (len(lfnDir), storageElement))

        se = StorageElement(storageElement)

        res = se.exists(lfnDir)
        if not res['OK']:
            gLogger.error("Failed to obtain existance of directories",
                          res['Message'])
            return res
        for directory, error in res['Value']['Failed'].items():
            gLogger.error('Failed to determine existance of directory',
                          '%s %s' % (directory, error))
        if res['Value']['Failed']:
            return S_ERROR('Failed to determine existance of directory')
        directoryExists = res['Value']['Successful']
        activeDirs = []
        for directory in sorted(directoryExists):
            exists = directoryExists[directory]
            if exists:
                activeDirs.append(directory)
        allFiles = {}
        while len(activeDirs) > 0:
            currentDir = activeDirs[0]
            res = se.listDirectory(currentDir)
            activeDirs.remove(currentDir)
            if not res['OK']:
                gLogger.error('Failed to get directory contents',
                              res['Message'])
                return res
            elif currentDir in res['Value']['Failed']:
                gLogger.error(
                    'Failed to get directory contents',
                    '%s %s' % (currentDir, res['Value']['Failed'][currentDir]))
                return S_ERROR(res['Value']['Failed'][currentDir])
            else:
                dirContents = res['Value']['Successful'][currentDir]
                activeDirs.extend(
                    se.getLFNFromURL(dirContents['SubDirs']).get(
                        'Value', {}).get('Successful', []))
                fileURLMetadata = dirContents['Files']
                fileMetadata = {}
                res = se.getLFNFromURL(fileURLMetadata)
                if not res['OK']:
                    gLogger.error('Failed to get directory content LFNs',
                                  res['Message'])
                    return res

                for url, error in res['Value']['Failed'].items():
                    gLogger.error("Failed to get LFN for URL",
                                  "%s %s" % (url, error))
                if res['Value']['Failed']:
                    return S_ERROR("Failed to get LFNs for PFNs")
                urlLfns = res['Value']['Successful']
                for urlLfn, lfn in urlLfns.items():
                    fileMetadata[lfn] = fileURLMetadata[urlLfn]
                allFiles.update(fileMetadata)

        zeroSizeFiles = []

        for lfn in sorted(allFiles):
            if os.path.basename(lfn) == 'dirac_directory':
                allFiles.pop(lfn)
            else:
                metadata = allFiles[lfn]
                if metadata['Size'] == 0:
                    zeroSizeFiles.append(
                        (lfn, 'deprecatedUrl', storageElement, 'PFNZeroSize'))
        if zeroSizeFiles:
            self.__reportProblematicReplicas(zeroSizeFiles, storageElement,
                                             'PFNZeroSize')

        gLogger.info('Obtained at total of %s files for directories at %s' %
                     (len(allFiles), storageElement))
        return S_OK(allFiles)

    def __getStoragePathExists(self, lfnPaths, storageElement):
        gLogger.info('Determining the existance of %d files at %s' %
                     (len(lfnPaths), storageElement))

        se = StorageElement(storageElement)

        res = se.exists(lfnPaths)
        if not res['OK']:
            gLogger.error("Failed to obtain existance of paths",
                          res['Message'])
            return res
        for lfnPath, error in res['Value']['Failed'].items():
            gLogger.error('Failed to determine existance of path',
                          '%s %s' % (lfnPath, error))
        if res['Value']['Failed']:
            return S_ERROR('Failed to determine existance of paths')
        pathExists = res['Value']['Successful']
        resDict = {}
        for lfn, exists in pathExists.items():
            if exists:
                resDict[lfn] = True
        return S_OK(resDict)

    ##########################################################################
    #
    # This section contains the specific methods for obtaining replica and metadata information from the catalog
    #

    def __getCatalogDirectoryContents(self, lfnDir):
        """ Obtain the contents of the supplied directory
    """
        gLogger.info('Obtaining the catalog contents for %s directories' %
                     len(lfnDir))

        activeDirs = lfnDir
        allFiles = {}
        while len(activeDirs) > 0:
            currentDir = activeDirs[0]
            res = self.fc.listDirectory(currentDir)
            activeDirs.remove(currentDir)
            if not res['OK']:
                gLogger.error('Failed to get directory contents',
                              res['Message'])
                return res
            elif res['Value']['Failed'].has_key(currentDir):
                gLogger.error(
                    'Failed to get directory contents',
                    '%s %s' % (currentDir, res['Value']['Failed'][currentDir]))
            else:
                dirContents = res['Value']['Successful'][currentDir]
                activeDirs.extend(dirContents['SubDirs'])
                allFiles.update(dirContents['Files'])

        zeroReplicaFiles = []
        zeroSizeFiles = []
        allReplicaDict = {}
        allMetadataDict = {}
        for lfn, lfnDict in allFiles.items():
            lfnReplicas = {}
            for se, replicaDict in lfnDict['Replicas'].items():
                lfnReplicas[se] = replicaDict['PFN']
            if not lfnReplicas:
                zeroReplicaFiles.append(lfn)
            allReplicaDict[lfn] = lfnReplicas
            allMetadataDict[lfn] = lfnDict['MetaData']
            if lfnDict['MetaData']['Size'] == 0:
                zeroSizeFiles.append(lfn)
        if zeroReplicaFiles:
            self.__reportProblematicFiles(zeroReplicaFiles, 'LFNZeroReplicas')
        if zeroSizeFiles:
            self.__reportProblematicFiles(zeroSizeFiles, 'LFNZeroSize')
        gLogger.info(
            'Obtained at total of %s files for the supplied directories' %
            len(allMetadataDict))
        resDict = {'Metadata': allMetadataDict, 'Replicas': allReplicaDict}
        return S_OK(resDict)

    def __getCatalogReplicas(self, lfns):
        """ Obtain the file replicas from the catalog while checking that there are replicas
    """
        gLogger.info('Obtaining the replicas for %s files' % len(lfns))

        zeroReplicaFiles = []
        res = self.fc.getReplicas(lfns, allStatus=True)
        if not res['OK']:
            gLogger.error('Failed to get catalog replicas', res['Message'])
            return res
        allReplicas = res['Value']['Successful']
        for lfn, error in res['Value']['Failed'].items():
            if re.search('File has zero replicas', error):
                zeroReplicaFiles.append(lfn)
        if zeroReplicaFiles:
            self.__reportProblematicFiles(zeroReplicaFiles, 'LFNZeroReplicas')
        gLogger.info('Obtaining the replicas for files complete')
        return S_OK(allReplicas)

    def __getCatalogMetadata(self, lfns):
        """ Obtain the file metadata from the catalog while checking they exist
    """
        if not lfns:
            return S_OK({})
        gLogger.info('Obtaining the catalog metadata for %s files' % len(lfns))

        missingCatalogFiles = []
        zeroSizeFiles = []
        res = self.fc.getFileMetadata(lfns)
        if not res['OK']:
            gLogger.error('Failed to get catalog metadata', res['Message'])
            return res
        allMetadata = res['Value']['Successful']
        for lfn, error in res['Value']['Failed'].items():
            if re.search('No such file or directory', error):
                missingCatalogFiles.append(lfn)
        if missingCatalogFiles:
            self.__reportProblematicFiles(missingCatalogFiles,
                                          'LFNCatalogMissing')
        for lfn, metadata in allMetadata.items():
            if metadata['Size'] == 0:
                zeroSizeFiles.append(lfn)
        if zeroSizeFiles:
            self.__reportProblematicFiles(zeroSizeFiles, 'LFNZeroSize')
        gLogger.info('Obtaining the catalog metadata complete')
        return S_OK(allMetadata)

    ##########################################################################
    #
    # This section contains the methods for inserting problematic files into the integrity DB
    #

    def __reportProblematicFiles(self, lfns, reason):
        """ Simple wrapper function around setFileProblematic """
        gLogger.info('The following %s files were found with %s' %
                     (len(lfns), reason))
        for lfn in sortList(lfns):
            gLogger.info(lfn)
        res = self.setFileProblematic(lfns,
                                      reason,
                                      sourceComponent='DataIntegrityClient')
        if not res['OK']:
            gLogger.info('Failed to update integrity DB with files',
                         res['Message'])
        else:
            gLogger.info('Successfully updated integrity DB with files')

    def setFileProblematic(self, lfn, reason, sourceComponent=''):
        """ This method updates the status of the file in the FileCatalog and the IntegrityDB

        lfn - the lfn of the file
        reason - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
        if type(lfn) == types.ListType:
            lfns = lfn
        elif type(lfn) == types.StringType:
            lfns = [lfn]
        else:
            errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN."
            gLogger.error(errStr)
            return S_ERROR(errStr)
        gLogger.info(
            "DataIntegrityClient.setFileProblematic: Attempting to update %s files."
            % len(lfns))
        fileMetadata = {}
        for lfn in lfns:
            fileMetadata[lfn] = {
                'Prognosis': reason,
                'LFN': lfn,
                'PFN': '',
                'SE': ''
            }
        res = self.insertProblematic(sourceComponent, fileMetadata)
        if not res['OK']:
            gLogger.error(
                "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB"
            )
        return res

    def __reportProblematicReplicas(self, replicaTuple, se, reason):
        """ Simple wrapper function around setReplicaProblematic """
        gLogger.info('The following %s files had %s at %s' %
                     (len(replicaTuple), reason, se))
        for lfn, _pfn, se, reason in sortList(replicaTuple):
            if lfn:
                gLogger.info(lfn)
        res = self.setReplicaProblematic(replicaTuple,
                                         sourceComponent='DataIntegrityClient')
        if not res['OK']:
            gLogger.info('Failed to update integrity DB with replicas',
                         res['Message'])
        else:
            gLogger.info('Successfully updated integrity DB with replicas')

    def setReplicaProblematic(self, replicaTuple, sourceComponent=''):
        """ This method updates the status of the replica in the FileCatalog and the IntegrityDB
        The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis}

        lfn - the lfn of the file
        pfn - the pfn if available (otherwise '')
        se - the storage element of the problematic replica (otherwise '')
        prognosis - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
        if type(replicaTuple) == types.TupleType:
            replicaTuple = [replicaTuple]
        elif type(replicaTuple) == types.ListType:
            pass
        else:
            errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples."
            gLogger.error(errStr)
            return S_ERROR(errStr)
        gLogger.info(
            "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas."
            % len(replicaTuple))
        replicaDict = {}
        for lfn, pfn, se, reason in replicaTuple:
            replicaDict[lfn] = {
                'Prognosis': reason,
                'LFN': lfn,
                'PFN': pfn,
                'SE': se
            }
        res = self.insertProblematic(sourceComponent, replicaDict)
        if not res['OK']:
            gLogger.error(
                "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB"
            )
            return res
        for lfn in replicaDict.keys():
            replicaDict[lfn]['Status'] = 'Problematic'

        res = self.fc.setReplicaStatus(replicaDict)
        if not res['OK']:
            errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas."
            gLogger.error(errStr, res['Message'])
            return res
        failed = res['Value']['Failed']
        successful = res['Value']['Successful']
        resDict = {'Successful': successful, 'Failed': failed}
        return S_OK(resDict)

    ##########################################################################
    #
    # This section contains the resolution methods for various prognoses
    #

    def __updateCompletedFiles(self, prognosis, fileID):
        gLogger.info("%s file (%d) is resolved" % (prognosis, fileID))
        return self.setProblematicStatus(fileID, 'Resolved')

    def __returnProblematicError(self, fileID, res):
        self.incrementProblematicRetry(fileID)
        gLogger.error('DataIntegrityClient failure', res['Message'])
        return res


#   def __getRegisteredPFNLFN( self, pfn, storageElement ):
#
#     res = StorageElement( storageElement ).getURL( pfn )
#     if not res['OK']:
#       gLogger.error( "Failed to get registered PFN for physical files", res['Message'] )
#       return res
#     for pfn, error in res['Value']['Failed'].items():
#       gLogger.error( 'Failed to obtain registered PFN for physical file', '%s %s' % ( pfn, error ) )
#       return S_ERROR( 'Failed to obtain registered PFNs from physical file' )
#     registeredPFN = res['Value']['Successful'][pfn]
#     res = returnSingleResult( self.fc.getLFNForPFN( registeredPFN ) )
#     if ( not res['OK'] ) and re.search( 'No such file or directory', res['Message'] ):
#       return S_OK( False )
#     return S_OK( res['Value'] )

    def __updateReplicaToChecked(self, problematicDict):
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']
        prognosis = problematicDict['Prognosis']
        problematicDict['Status'] = 'Checked'

        res = returnSingleResult(
            self.fc.setReplicaStatus({lfn: problematicDict}))

        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        gLogger.info("%s replica (%d) is updated to Checked status" %
                     (prognosis, fileID))
        return self.__updateCompletedFiles(prognosis, fileID)

    def resolveCatalogPFNSizeMismatch(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis
    """
        lfn = problematicDict['LFN']
        se = problematicDict['SE']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        catalogSize = res['Value']
        res = returnSingleResult(StorageElement(se).getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        storageSize = res['Value']
        bkKCatalog = FileCatalog(['BookkeepingDB'])
        res = returnSingleResult(bkKCatalog.getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        bookkeepingSize = res['Value']
        if bookkeepingSize == catalogSize == storageSize:
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) matched all registered sizes."
                % fileID)
            return self.__updateReplicaToChecked(problematicDict)
        if (catalogSize == bookkeepingSize):
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also"
                % fileID)
            res = returnSingleResult(self.fc.getReplicas(lfn))
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            if len(res['Value']) <= 1:
                gLogger.info(
                    "CatalogPFNSizeMismatch replica (%d) has no other replicas."
                    % fileID)
                return S_ERROR(
                    "Not removing catalog file mismatch since the only replica"
                )
            else:
                gLogger.info(
                    "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..."
                    % fileID)
                res = self.dm.removeReplica(se, lfn)
                if not res['OK']:
                    return self.__returnProblematicError(fileID, res)
                return self.__updateCompletedFiles('CatalogPFNSizeMismatch',
                                                   fileID)
        if (catalogSize != bookkeepingSize) and (bookkeepingSize
                                                 == storageSize):
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size"
                % fileID)
            res = self.__updateReplicaToChecked(problematicDict)
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            return self.changeProblematicPrognosis(fileID,
                                                   'BKCatalogSizeMismatch')
        gLogger.info(
            "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count"
            % fileID)
        return self.incrementProblematicRetry(fileID)

    def resolvePFNNotRegistered(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNNotRegistered prognosis
    """
        lfn = problematicDict['LFN']
        seName = problematicDict['SE']
        fileID = problematicDict['FileID']

        se = StorageElement(seName)
        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if not res['Value']:
            # The file does not exist in the catalog
            res = returnSingleResult(se.removeFile(lfn))
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            return self.__updateCompletedFiles('PFNNotRegistered', fileID)
        res = returnSingleResult(se.getFileMetadata(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            gLogger.info("PFNNotRegistered replica (%d) found to be missing." %
                         fileID)
            return self.__updateCompletedFiles('PFNNotRegistered', fileID)
        elif not res['OK']:
            return self.__returnProblematicError(fileID, res)
        storageMetadata = res['Value']
        if storageMetadata['Lost']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found to be Lost. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNLost')
        if storageMetadata['Unavailable']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found to be Unavailable. Updating retry count"
                % fileID)
            return self.incrementProblematicRetry(fileID)

        # HACK until we can obtain the space token descriptions through GFAL
        site = seName.split('_')[0].split('-')[0]
        if not storageMetadata['Cached']:
            if lfn.endswith('.raw'):
                seName = '%s-RAW' % site
            else:
                seName = '%s-RDST' % site
        elif storageMetadata['Migrated']:
            if lfn.startswith('/lhcb/data'):
                seName = '%s_M-DST' % site
            else:
                seName = '%s_MC_M-DST' % site
        else:
            if lfn.startswith('/lhcb/data'):
                seName = '%s-DST' % site
            else:
                seName = '%s_MC-DST' % site

        problematicDict['SE'] = seName
        res = returnSingleResult(se.getURL(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)

        problematicDict['PFN'] = res['Value']

        res = returnSingleResult(self.fc.addReplica({lfn: problematicDict}))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        res = returnSingleResult(self.fc.getFileMetadata(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']['Size'] != storageMetadata['Size']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found with catalog size mismatch. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID,
                                                   'CatalogPFNSizeMismatch')
        return self.__updateCompletedFiles('PFNNotRegistered', fileID)

    def resolveLFNCatalogMissing(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the LFNCatalogMissing prognosis
    """
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']:
            return self.__updateCompletedFiles('LFNCatalogMissing', fileID)
        # Remove the file from all catalogs
        # RF_NOTE : here I can do it because it's a single file, but otherwise I would need to sort the path
        res = returnSingleResult(self.fc.removeFile(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        return self.__updateCompletedFiles('LFNCatalogMissing', fileID)

    def resolvePFNMissing(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNMissing prognosis
    """
        se = problematicDict['SE']
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if not res['Value']:
            gLogger.info("PFNMissing file (%d) no longer exists in catalog" %
                         fileID)
            return self.__updateCompletedFiles('PFNMissing', fileID)

        res = returnSingleResult(StorageElement(se).exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']:
            gLogger.info("PFNMissing replica (%d) is no longer missing" %
                         fileID)
            return self.__updateReplicaToChecked(problematicDict)
        gLogger.info("PFNMissing replica (%d) does not exist" % fileID)
        res = returnSingleResult(self.fc.getReplicas(lfn, allStatus=True))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        replicas = res['Value']
        seSite = se.split('_')[0].split('-')[0]
        found = False
        print replicas
        for replicaSE in replicas.keys():
            if re.search(seSite, replicaSE):
                found = True
                problematicDict['SE'] = replicaSE
                se = replicaSE
        if not found:
            gLogger.info(
                "PFNMissing replica (%d) is no longer registered at SE. Resolved."
                % fileID)
            return self.__updateCompletedFiles('PFNMissing', fileID)
        gLogger.info(
            "PFNMissing replica (%d) does not exist. Removing from catalog..."
            % fileID)
        res = returnSingleResult(self.fc.removeReplica({lfn: problematicDict}))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if len(replicas) == 1:
            gLogger.info(
                "PFNMissing replica (%d) had a single replica. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'LFNZeroReplicas')
        res = self.dm.replicateAndRegister(problematicDict['LFN'], se)
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        # If we get here the problem is solved so we can update the integrityDB
        return self.__updateCompletedFiles('PFNMissing', fileID)

    def resolvePFNUnavailable(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNUnavailable prognosis
    """
        lfn = problematicDict['LFN']
        se = problematicDict['SE']
        fileID = problematicDict['FileID']

        res = returnSingleResult(StorageElement(se).getFileMetadata(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            # The file is no longer Unavailable but has now dissapeared completely
            gLogger.info(
                "PFNUnavailable replica (%d) found to be missing. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')
        if (not res['OK']) or res['Value']['Unavailable']:
            gLogger.info(
                "PFNUnavailable replica (%d) found to still be Unavailable" %
                fileID)
            return self.incrementProblematicRetry(fileID)
        if res['Value']['Lost']:
            gLogger.info(
                "PFNUnavailable replica (%d) is now found to be Lost. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNLost')
        gLogger.info("PFNUnavailable replica (%d) is no longer Unavailable" %
                     fileID)
        # Need to make the replica okay in the Catalog
        return self.__updateReplicaToChecked(problematicDict)

    def resolvePFNZeroSize(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolves the PFNZeroSize prognosis
    """
        lfn = problematicDict['LFN']
        seName = problematicDict['SE']
        fileID = problematicDict['FileID']

        se = StorageElement(seName)

        res = returnSingleResult(se.getFileSize(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            gLogger.info(
                "PFNZeroSize replica (%d) found to be missing. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')
        storageSize = res['Value']
        if storageSize == 0:
            res = returnSingleResult(se.removeFile(lfn))

            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            gLogger.info(
                "PFNZeroSize replica (%d) removed. Updating prognosis" %
                problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')

        res = returnSingleResult(self.fc.getReplicas(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if seName not in res['Value']:
            gLogger.info(
                "PFNZeroSize replica (%d) not registered in catalog. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNNotRegistered')
        res = returnSingleResult(self.fc.getFileMetadata(lfn))

        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        catalogSize = res['Value']['Size']
        if catalogSize != storageSize:
            gLogger.info(
                "PFNZeroSize replica (%d) size found to differ from registered metadata. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID,
                                                   'CatalogPFNSizeMismatch')
        return self.__updateCompletedFiles('PFNZeroSize', fileID)

    ############################################################################################

    def resolveLFNZeroReplicas(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolves the LFNZeroReplicas prognosis
    """
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.getReplicas(lfn, allStatus=True))
        if res['OK'] and res['Value']:
            gLogger.info("LFNZeroReplicas file (%d) found to have replicas" %
                         fileID)
        else:
            gLogger.info(
                "LFNZeroReplicas file (%d) does not have replicas. Checking storage..."
                % fileID)
            pfnsFound = False
            for storageElementName in sorted(
                    gConfig.getValue(
                        'Resources/StorageElementGroups/Tier1_MC_M-DST', [])):
                res = self.__getStoragePathExists([lfn], storageElementName)
                if lfn in res['Value']:
                    gLogger.info(
                        "LFNZeroReplicas file (%d) found storage file at %s" %
                        (fileID, storageElementName))
                    self.__reportProblematicReplicas(
                        [(lfn, 'deprecatedUrl', storageElementName,
                          'PFNNotRegistered')], storageElementName,
                        'PFNNotRegistered')
                    pfnsFound = True
            if not pfnsFound:
                gLogger.info(
                    "LFNZeroReplicas file (%d) did not have storage files. Removing..."
                    % fileID)
                res = returnSingleResult(self.fc.removeFile(lfn))
                if not res['OK']:
                    gLogger.error('DataIntegrityClient: failed to remove file',
                                  res['Message'])
                    # Increment the number of retries for this file
                    self.server.incrementProblematicRetry(fileID)
                    return res
                gLogger.info("LFNZeroReplicas file (%d) removed from catalog" %
                             fileID)
        # If we get here the problem is solved so we can update the integrityDB
        return self.__updateCompletedFiles('LFNZeroReplicas', fileID)
Example #35
0
class DataIntegrityClient(Client):
    """
  The following methods are supported in the service but are not mentioned explicitly here:

          getProblematic()
             Obtains a problematic file from the IntegrityDB based on the LastUpdate time

          getPrognosisProblematics(prognosis)
            Obtains all the problematics of a particular prognosis from the integrityDB

          getProblematicsSummary()
            Obtains a count of the number of problematics for each prognosis found

          getDistinctPrognosis()
            Obtains the distinct prognosis found in the integrityDB

          getTransformationProblematics(prodID)
            Obtains the problematics for a given production

          incrementProblematicRetry(fileID)
            Increments the retry count for the supplied file ID

          changeProblematicPrognosis(fileID,newPrognosis)
            Changes the prognosis of the supplied file to the new prognosis

          setProblematicStatus(fileID,status)
            Updates the status of a problematic in the integrityDB

          removeProblematic(self,fileID)
            This removes the specified file ID from the integrity DB

          insertProblematic(sourceComponent,fileMetadata)
            Inserts file with supplied metadata into the integrity DB

  """
    def __init__(self, **kwargs):

        super(DataIntegrityClient, self).__init__(**kwargs)
        self.setServer('DataManagement/DataIntegrity')
        self.dm = DataManager()
        self.fc = FileCatalog()

    def setFileProblematic(self, lfn, reason, sourceComponent=''):
        """ This method updates the status of the file in the FileCatalog and the IntegrityDB

        lfn - the lfn of the file
        reason - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
        if isinstance(lfn, list):
            lfns = lfn
        elif isinstance(lfn, basestring):
            lfns = [lfn]
        else:
            errStr = "DataIntegrityClient.setFileProblematic: Supplied file info must be list or a single LFN."
            gLogger.error(errStr)
            return S_ERROR(errStr)
        gLogger.info(
            "DataIntegrityClient.setFileProblematic: Attempting to update %s files."
            % len(lfns))
        fileMetadata = {}
        for lfn in lfns:
            fileMetadata[lfn] = {
                'Prognosis': reason,
                'LFN': lfn,
                'PFN': '',
                'SE': ''
            }
        res = self.insertProblematic(sourceComponent, fileMetadata)
        if not res['OK']:
            gLogger.error(
                "DataIntegrityClient.setReplicaProblematic: Failed to insert problematics to integrity DB"
            )
        return res

    def reportProblematicReplicas(self, replicaTuple, se, reason):
        """ Simple wrapper function around setReplicaProblematic """
        gLogger.info('The following %s files had %s at %s' %
                     (len(replicaTuple), reason, se))
        for lfn, _pfn, se, reason in sorted(replicaTuple):
            if lfn:
                gLogger.info(lfn)
        res = self.setReplicaProblematic(replicaTuple,
                                         sourceComponent='DataIntegrityClient')
        if not res['OK']:
            gLogger.info('Failed to update integrity DB with replicas',
                         res['Message'])
        else:
            gLogger.info('Successfully updated integrity DB with replicas')

    def setReplicaProblematic(self, replicaTuple, sourceComponent=''):
        """ This method updates the status of the replica in the FileCatalog and the IntegrityDB
        The supplied replicaDict should be of the form {lfn :{'PFN':pfn,'SE':se,'Prognosis':prognosis}

        lfn - the lfn of the file
        pfn - the pfn if available (otherwise '')
        se - the storage element of the problematic replica (otherwise '')
        prognosis - this is given to the integrity DB and should reflect the problem observed with the file

        sourceComponent is the component issuing the request.
    """
        if isinstance(replicaTuple, tuple):
            replicaTuple = [replicaTuple]
        elif isinstance(replicaTuple, list):
            pass
        else:
            errStr = "DataIntegrityClient.setReplicaProblematic: Supplied replica info must be a tuple or list of tuples."
            gLogger.error(errStr)
            return S_ERROR(errStr)
        gLogger.info(
            "DataIntegrityClient.setReplicaProblematic: Attempting to update %s replicas."
            % len(replicaTuple))
        replicaDict = {}
        for lfn, pfn, se, reason in replicaTuple:
            replicaDict[lfn] = {
                'Prognosis': reason,
                'LFN': lfn,
                'PFN': pfn,
                'SE': se
            }
        res = self.insertProblematic(sourceComponent, replicaDict)
        if not res['OK']:
            gLogger.error(
                "DataIntegrityClient.setReplicaProblematic: Failed to insert problematic to integrity DB"
            )
            return res
        for lfn in replicaDict.keys():
            replicaDict[lfn]['Status'] = 'Problematic'

        res = self.fc.setReplicaStatus(replicaDict)
        if not res['OK']:
            errStr = "DataIntegrityClient.setReplicaProblematic: Completely failed to update replicas."
            gLogger.error(errStr, res['Message'])
            return res
        failed = res['Value']['Failed']
        successful = res['Value']['Successful']
        resDict = {'Successful': successful, 'Failed': failed}
        return S_OK(resDict)

    ##########################################################################
    #
    # This section contains the resolution methods for various prognoses
    #

    def __updateCompletedFiles(self, prognosis, fileID):
        gLogger.info("%s file (%d) is resolved" % (prognosis, fileID))
        return self.setProblematicStatus(fileID, 'Resolved')

    def __returnProblematicError(self, fileID, res):
        self.incrementProblematicRetry(fileID)
        gLogger.error('DataIntegrityClient failure', res['Message'])
        return res

    def __updateReplicaToChecked(self, problematicDict):
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']
        prognosis = problematicDict['Prognosis']
        problematicDict['Status'] = 'Checked'

        res = returnSingleResult(
            self.fc.setReplicaStatus({lfn: problematicDict}))

        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        gLogger.info("%s replica (%d) is updated to Checked status" %
                     (prognosis, fileID))
        return self.__updateCompletedFiles(prognosis, fileID)

    def resolveCatalogPFNSizeMismatch(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis
    """
        lfn = problematicDict['LFN']
        se = problematicDict['SE']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        catalogSize = res['Value']
        res = returnSingleResult(StorageElement(se).getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        storageSize = res['Value']
        bkKCatalog = FileCatalog(['BookkeepingDB'])
        res = returnSingleResult(bkKCatalog.getFileSize(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        bookkeepingSize = res['Value']
        if bookkeepingSize == catalogSize == storageSize:
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) matched all registered sizes."
                % fileID)
            return self.__updateReplicaToChecked(problematicDict)
        if catalogSize == bookkeepingSize:
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also"
                % fileID)
            res = returnSingleResult(self.fc.getReplicas(lfn))
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            if len(res['Value']) <= 1:
                gLogger.info(
                    "CatalogPFNSizeMismatch replica (%d) has no other replicas."
                    % fileID)
                return S_ERROR(
                    "Not removing catalog file mismatch since the only replica"
                )
            else:
                gLogger.info(
                    "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..."
                    % fileID)
                res = self.dm.removeReplica(se, lfn)
                if not res['OK']:
                    return self.__returnProblematicError(fileID, res)
                return self.__updateCompletedFiles('CatalogPFNSizeMismatch',
                                                   fileID)
        if (catalogSize != bookkeepingSize) and (bookkeepingSize
                                                 == storageSize):
            gLogger.info(
                "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size"
                % fileID)
            res = self.__updateReplicaToChecked(problematicDict)
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            return self.changeProblematicPrognosis(fileID,
                                                   'BKCatalogSizeMismatch')
        gLogger.info(
            "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count"
            % fileID)
        return self.incrementProblematicRetry(fileID)

    #FIXME: Unused?
    def resolvePFNNotRegistered(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNNotRegistered prognosis
    """
        lfn = problematicDict['LFN']
        seName = problematicDict['SE']
        fileID = problematicDict['FileID']

        se = StorageElement(seName)
        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if not res['Value']:
            # The file does not exist in the catalog
            res = returnSingleResult(se.removeFile(lfn))
            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            return self.__updateCompletedFiles('PFNNotRegistered', fileID)
        res = returnSingleResult(se.getFileMetadata(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            gLogger.info("PFNNotRegistered replica (%d) found to be missing." %
                         fileID)
            return self.__updateCompletedFiles('PFNNotRegistered', fileID)
        elif not res['OK']:
            return self.__returnProblematicError(fileID, res)
        storageMetadata = res['Value']
        if storageMetadata['Lost']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found to be Lost. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNLost')
        if storageMetadata['Unavailable']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found to be Unavailable. Updating retry count"
                % fileID)
            return self.incrementProblematicRetry(fileID)

        # HACK until we can obtain the space token descriptions through GFAL
        site = seName.split('_')[0].split('-')[0]
        if not storageMetadata['Cached']:
            if lfn.endswith('.raw'):
                seName = '%s-RAW' % site
            else:
                seName = '%s-RDST' % site
        elif storageMetadata['Migrated']:
            if lfn.startswith('/lhcb/data'):
                seName = '%s_M-DST' % site
            else:
                seName = '%s_MC_M-DST' % site
        else:
            if lfn.startswith('/lhcb/data'):
                seName = '%s-DST' % site
            else:
                seName = '%s_MC-DST' % site

        problematicDict['SE'] = seName
        res = returnSingleResult(se.getURL(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)

        problematicDict['PFN'] = res['Value']

        res = returnSingleResult(self.fc.addReplica({lfn: problematicDict}))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        res = returnSingleResult(self.fc.getFileMetadata(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']['Size'] != storageMetadata['Size']:
            gLogger.info(
                "PFNNotRegistered replica (%d) found with catalog size mismatch. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID,
                                                   'CatalogPFNSizeMismatch')
        return self.__updateCompletedFiles('PFNNotRegistered', fileID)

    #FIXME: Unused?
    def resolveLFNCatalogMissing(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the LFNCatalogMissing prognosis
    """
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']:
            return self.__updateCompletedFiles('LFNCatalogMissing', fileID)
        # Remove the file from all catalogs
        # RF_NOTE : here I can do it because it's a single file, but otherwise I would need to sort the path
        res = returnSingleResult(self.fc.removeFile(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        return self.__updateCompletedFiles('LFNCatalogMissing', fileID)

    #FIXME: Unused?
    def resolvePFNMissing(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNMissing prognosis
    """
        se = problematicDict['SE']
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if not res['Value']:
            gLogger.info("PFNMissing file (%d) no longer exists in catalog" %
                         fileID)
            return self.__updateCompletedFiles('PFNMissing', fileID)

        res = returnSingleResult(StorageElement(se).exists(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if res['Value']:
            gLogger.info("PFNMissing replica (%d) is no longer missing" %
                         fileID)
            return self.__updateReplicaToChecked(problematicDict)
        gLogger.info("PFNMissing replica (%d) does not exist" % fileID)
        res = returnSingleResult(self.fc.getReplicas(lfn, allStatus=True))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        replicas = res['Value']
        seSite = se.split('_')[0].split('-')[0]
        found = False
        print replicas
        for replicaSE in replicas.keys():
            if re.search(seSite, replicaSE):
                found = True
                problematicDict['SE'] = replicaSE
                se = replicaSE
        if not found:
            gLogger.info(
                "PFNMissing replica (%d) is no longer registered at SE. Resolved."
                % fileID)
            return self.__updateCompletedFiles('PFNMissing', fileID)
        gLogger.info(
            "PFNMissing replica (%d) does not exist. Removing from catalog..."
            % fileID)
        res = returnSingleResult(self.fc.removeReplica({lfn: problematicDict}))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if len(replicas) == 1:
            gLogger.info(
                "PFNMissing replica (%d) had a single replica. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'LFNZeroReplicas')
        res = self.dm.replicateAndRegister(problematicDict['LFN'], se)
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        # If we get here the problem is solved so we can update the integrityDB
        return self.__updateCompletedFiles('PFNMissing', fileID)

    #FIXME: Unused?
    def resolvePFNUnavailable(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolved the PFNUnavailable prognosis
    """
        lfn = problematicDict['LFN']
        se = problematicDict['SE']
        fileID = problematicDict['FileID']

        res = returnSingleResult(StorageElement(se).getFileMetadata(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            # The file is no longer Unavailable but has now dissapeared completely
            gLogger.info(
                "PFNUnavailable replica (%d) found to be missing. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')
        if (not res['OK']) or res['Value']['Unavailable']:
            gLogger.info(
                "PFNUnavailable replica (%d) found to still be Unavailable" %
                fileID)
            return self.incrementProblematicRetry(fileID)
        if res['Value']['Lost']:
            gLogger.info(
                "PFNUnavailable replica (%d) is now found to be Lost. Updating prognosis"
                % fileID)
            return self.changeProblematicPrognosis(fileID, 'PFNLost')
        gLogger.info("PFNUnavailable replica (%d) is no longer Unavailable" %
                     fileID)
        # Need to make the replica okay in the Catalog
        return self.__updateReplicaToChecked(problematicDict)

    #FIXME: Unused?
    def resolvePFNZeroSize(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolves the PFNZeroSize prognosis
    """
        lfn = problematicDict['LFN']
        seName = problematicDict['SE']
        fileID = problematicDict['FileID']

        se = StorageElement(seName)

        res = returnSingleResult(se.getFileSize(lfn))
        if (not res['OK']) and (re.search('File does not exist',
                                          res['Message'])):
            gLogger.info(
                "PFNZeroSize replica (%d) found to be missing. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')
        storageSize = res['Value']
        if storageSize == 0:
            res = returnSingleResult(se.removeFile(lfn))

            if not res['OK']:
                return self.__returnProblematicError(fileID, res)
            gLogger.info(
                "PFNZeroSize replica (%d) removed. Updating prognosis" %
                problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNMissing')

        res = returnSingleResult(self.fc.getReplicas(lfn))
        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        if seName not in res['Value']:
            gLogger.info(
                "PFNZeroSize replica (%d) not registered in catalog. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID, 'PFNNotRegistered')
        res = returnSingleResult(self.fc.getFileMetadata(lfn))

        if not res['OK']:
            return self.__returnProblematicError(fileID, res)
        catalogSize = res['Value']['Size']
        if catalogSize != storageSize:
            gLogger.info(
                "PFNZeroSize replica (%d) size found to differ from registered metadata. Updating prognosis"
                % problematicDict['FileID'])
            return self.changeProblematicPrognosis(fileID,
                                                   'CatalogPFNSizeMismatch')
        return self.__updateCompletedFiles('PFNZeroSize', fileID)

    ############################################################################################

    #FIXME: Unused?
    def resolveLFNZeroReplicas(self, problematicDict):
        """ This takes the problematic dictionary returned by the integrity DB and resolves the LFNZeroReplicas prognosis
    """
        lfn = problematicDict['LFN']
        fileID = problematicDict['FileID']

        res = returnSingleResult(self.fc.getReplicas(lfn, allStatus=True))
        if res['OK'] and res['Value']:
            gLogger.info("LFNZeroReplicas file (%d) found to have replicas" %
                         fileID)
        else:
            gLogger.info(
                "LFNZeroReplicas file (%d) does not have replicas. Checking storage..."
                % fileID)
            pfnsFound = False
            for storageElementName in sorted(
                    gConfig.getValue(
                        'Resources/StorageElementGroups/Tier1_MC_M-DST', [])):
                res = self.__getStoragePathExists([lfn], storageElementName)
                if lfn in res['Value']:
                    gLogger.info(
                        "LFNZeroReplicas file (%d) found storage file at %s" %
                        (fileID, storageElementName))
                    self.reportProblematicReplicas(
                        [(lfn, 'deprecatedUrl', storageElementName,
                          'PFNNotRegistered')], storageElementName,
                        'PFNNotRegistered')
                    pfnsFound = True
            if not pfnsFound:
                gLogger.info(
                    "LFNZeroReplicas file (%d) did not have storage files. Removing..."
                    % fileID)
                res = returnSingleResult(self.fc.removeFile(lfn))
                if not res['OK']:
                    gLogger.error('DataIntegrityClient: failed to remove file',
                                  res['Message'])
                    # Increment the number of retries for this file
                    self.server.incrementProblematicRetry(fileID)
                    return res
                gLogger.info("LFNZeroReplicas file (%d) removed from catalog" %
                             fileID)
        # If we get here the problem is solved so we can update the integrityDB
        return self.__updateCompletedFiles('LFNZeroReplicas', fileID)

    def _reportProblematicFiles(self, lfns, reason):
        """ Simple wrapper function around setFileProblematic
    """
        gLogger.info('The following %s files were found with %s' %
                     (len(lfns), reason))
        for lfn in sorted(lfns):
            gLogger.info(lfn)
        res = self.setFileProblematic(lfns,
                                      reason,
                                      sourceComponent='DataIntegrityClient')
        if not res['OK']:
            gLogger.info('Failed to update integrity DB with files',
                         res['Message'])
        else:
            gLogger.info('Successfully updated integrity DB with files')
Example #36
0
  def __call__( self ):
    """ action for 'removeFile' operation  """
    # # get waiting files
    waitingFiles = self.getWaitingFilesList()
    fc = FileCatalog( self.operation.catalogList )

    res = fc.getReplicas( [wf.LFN for wf in waitingFiles] )
    if not res['OK']:
      gMonitor.addMark( "RemoveFileAtt" )
      gMonitor.addMark( "RemoveFileFail" )
      return res

    # We check the status of the SE from the LFN that are successful
    # No idea what to do with the others...
    replicas = res['Value']['Successful']
    targetSEs = set( [se for lfn in replicas for se in replicas[lfn] ] )

    bannedTargets = set()
    if targetSEs:
      bannedTargets = self.checkSEsRSS( targetSEs, access = 'RemoveAccess' )
      if not bannedTargets['OK']:
        gMonitor.addMark( "RemoveFileAtt" )
        gMonitor.addMark( "RemoveFileFail" )
        return bannedTargets
      bannedTargets = set( bannedTargets['Value'] )
      if bannedTargets and 'always banned' in self.operation.Error:
        return S_OK( "%s targets are always banned for removal" % ",".join( sorted( bannedTargets ) ) )

    # # prepare waiting file dict
    # # We take only files that have no replica at the banned SEs... If no replica, don't
    toRemoveDict = dict( ( opFile.LFN, opFile ) for opFile in waitingFiles if not bannedTargets.intersection( replicas.get( opFile.LFN, [] ) ) )

    if toRemoveDict:
      gMonitor.addMark( "RemoveFileAtt", len( toRemoveDict ) )
        # # 1st step - bulk removal
      self.log.debug( "bulk removal of %s files" % len( toRemoveDict ) )
      bulkRemoval = self.bulkRemoval( toRemoveDict )
      if not bulkRemoval["OK"]:
        self.log.error( "Bulk file removal failed", bulkRemoval["Message"] )
      else:
        gMonitor.addMark( "RemoveFileOK", len( toRemoveDict ) - len( bulkRemoval["Value"] ) )
        toRemoveDict = bulkRemoval["Value"]

      # # 2nd step - single file removal
      for lfn, opFile in toRemoveDict.items():
        self.log.info( "removing single file %s" % lfn )
        singleRemoval = self.singleRemoval( opFile )
        if not singleRemoval["OK"]:
          self.log.error( 'Error removing single file', singleRemoval["Message"] )
          gMonitor.addMark( "RemoveFileFail", 1 )
        else:
          self.log.info( "file %s has been removed" % lfn )
          gMonitor.addMark( "RemoveFileOK", 1 )

      # # set
      failedFiles = [ ( lfn, opFile ) for ( lfn, opFile ) in toRemoveDict.items()
                      if opFile.Status in ( "Failed", "Waiting" ) ]
      if failedFiles:
        self.operation.Error = "failed to remove %d files" % len( failedFiles )

    if bannedTargets:
      return S_OK( "%s targets are banned for removal" % ",".join( sorted( bannedTargets ) ) )
    return S_OK()
Example #37
0
    def __call__(self):
        """action for 'removeFile' operation"""

        # The flag  'rmsMonitoring' is set by the RequestTask and is False by default.
        # Here we use 'createRMSRecord' to create the ES record which is defined inside OperationHandlerBase.
        if self.rmsMonitoring:
            self.rmsMonitoringReporter = MonitoringReporter(
                monitoringType="RMSMonitoring")

        # # get waiting files
        waitingFiles = self.getWaitingFilesList()
        fc = FileCatalog(self.operation.catalogList)

        res = fc.getReplicas([wf.LFN for wf in waitingFiles])
        if not res["OK"]:
            if self.rmsMonitoring:
                for status in ["Attempted", "Failed"]:
                    self.rmsMonitoringReporter.addRecord(
                        self.createRMSRecord(status, len(waitingFiles)))
                self.rmsMonitoringReporter.commit()
            return res

        # We check the status of the SE from the LFN that are successful
        # No idea what to do with the others...
        replicas = res["Value"]["Successful"]
        targetSEs = set([se for lfn in replicas for se in replicas[lfn]])

        if targetSEs:
            # Check if SEs are allowed for remove but don't fail yet the operation if SEs are always banned
            bannedTargets = self.checkSEsRSS(targetSEs,
                                             access="RemoveAccess",
                                             failIfBanned=False)
            if not bannedTargets["OK"]:
                if self.rmsMonitoring:
                    for status in ["Attempted", "Failed"]:
                        self.rmsMonitoringReporter.addRecord(
                            self.createRMSRecord(status, len(replicas)))
                    self.rmsMonitoringReporter.commit()
                return bannedTargets
            bannedTargets = set(bannedTargets["Value"])
        else:
            bannedTargets = set()

        # # prepare waiting file dict
        # # We take only files that have no replica at the banned SEs... If no replica, don't
        toRemoveDict = dict(
            ((opFile.LFN, opFile) for opFile in waitingFiles
             if not bannedTargets
             or not bannedTargets.intersection(replicas.get(opFile.LFN, []))))
        # If some SEs are always banned, set Failed the files that cannot be removed
        if bannedTargets and "always banned" in self.operation.Error:
            for opFile in waitingFiles:
                if opFile.LFN not in toRemoveDict:
                    # Set the files that cannot be removed Failed
                    opFile.Error = self.operation.Error
                    opFile.Status = "Failed"

            if self.rmsMonitoring:
                self.rmsMonitoringReporter.addRecord(
                    self.createRMSRecord("Failed",
                                         len(waitingFiles) -
                                         len(toRemoveDict)))
                self.rmsMonitoringReporter.commit()

            if not toRemoveDict:
                # If there are no files that can be removed, exit, else try once to remove them anyway
                return S_OK("%s targets are always banned for removal" %
                            ",".join(sorted(bannedTargets)))

        if toRemoveDict:
            if self.rmsMonitoring:
                self.rmsMonitoringReporter.addRecord(
                    self.createRMSRecord("Attempted", len(toRemoveDict)))
            # # 1st step - bulk removal
            self.log.debug("bulk removal of %s files" % len(toRemoveDict))
            bulkRemoval = self.bulkRemoval(toRemoveDict)
            if not bulkRemoval["OK"]:
                self.log.error("Bulk file removal failed",
                               bulkRemoval["Message"])
            else:
                if self.rmsMonitoring:
                    self.rmsMonitoringReporter.addRecord(
                        self.createRMSRecord(
                            "Successful",
                            len(toRemoveDict) - len(bulkRemoval["Value"])))

            # # 2nd step - single file removal
            for lfn, opFile in toRemoveDict.items():
                self.log.info("removing single file %s" % lfn)
                singleRemoval = self.singleRemoval(opFile)
                if not singleRemoval["OK"]:
                    self.log.error("Error removing single file",
                                   singleRemoval["Message"])
                    if self.rmsMonitoring:
                        self.rmsMonitoringReporter.addRecord(
                            self.createRMSRecord("Failed", 1))
                else:
                    self.log.info("file %s has been removed" % lfn)
                    if self.rmsMonitoring:
                        self.rmsMonitoringReporter.addRecord(
                            self.createRMSRecord("Successful", 1))

            # # set
            failedFiles = [(lfn, opFile)
                           for (lfn, opFile) in toRemoveDict.items()
                           if opFile.Status in ("Failed", "Waiting")]
            if failedFiles:
                self.operation.Error = "failed to remove %d files" % len(
                    failedFiles)

        if self.rmsMonitoring:
            self.rmsMonitoringReporter.commit()

        if bannedTargets:
            return S_OK("%s targets are banned for removal" %
                        ",".join(sorted(bannedTargets)))
        return S_OK()
Example #38
0
class ReplicateAndRegister( DMSRequestOperationsBase ):
  """
  .. class:: ReplicateAndRegister

  ReplicateAndRegister operation handler
  """

  def __init__( self, operation = None, csPath = None ):
    """c'tor

    :param self: self reference
    :param Operation operation: Operation instance
    :param str csPath: CS path for this handler
    """
    super( ReplicateAndRegister, self ).__init__( operation, csPath )
    # # own gMonitor stuff for files
    gMonitor.registerActivity( "ReplicateAndRegisterAtt", "Replicate and register attempted",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "ReplicateOK", "Replications successful",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "ReplicateFail", "Replications failed",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "RegisterOK", "Registrations successful",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "RegisterFail", "Registrations failed",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    # # for FTS
    gMonitor.registerActivity( "FTSScheduleAtt", "Files schedule attempted",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "FTSScheduleOK", "File schedule successful",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "FTSScheduleFail", "File schedule failed",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    # # SE cache

    # Clients
    self.fc = FileCatalog()
    if hasattr( self, "FTSMode" ) and getattr( self, "FTSMode" ):
      from DIRAC.DataManagementSystem.Client.FTSClient import FTSClient
      self.ftsClient = FTSClient()

  def __call__( self ):
    """ call me maybe """
    # # check replicas first
    checkReplicas = self.__checkReplicas()
    if not checkReplicas["OK"]:
      self.log.error( 'Failed to check replicas', checkReplicas["Message"] )
    if hasattr( self, "FTSMode" ) and getattr( self, "FTSMode" ):
      bannedGroups = getattr( self, "FTSBannedGroups" ) if hasattr( self, "FTSBannedGroups" ) else ()
      if self.request.OwnerGroup in bannedGroups:
        self.log.verbose( "usage of FTS system is banned for request's owner" )
        return self.dmTransfer()
      return self.ftsTransfer()
    return self.dmTransfer()

  def __checkReplicas( self ):
    """ check done replicas and update file states  """
    waitingFiles = dict( [ ( opFile.LFN, opFile ) for opFile in self.operation
                          if opFile.Status in ( "Waiting", "Scheduled" ) ] )
    targetSESet = set( self.operation.targetSEList )

    replicas = self.fc.getReplicas( waitingFiles.keys() )
    if not replicas["OK"]:
      self.log.error( 'Failed to get replicas', replicas["Message"] )
      return replicas

    reMissing = re.compile( r".*such file.*" )
    for failedLFN, errStr in replicas["Value"]["Failed"].items():
      waitingFiles[failedLFN].Error = errStr
      if reMissing.search( errStr.lower() ):
        self.log.error( "File does not exists", failedLFN )
        gMonitor.addMark( "ReplicateFail", len( targetSESet ) )
        waitingFiles[failedLFN].Status = "Failed"

    for successfulLFN, reps in replicas["Value"]["Successful"].items():
      if targetSESet.issubset( set( reps ) ):
        self.log.info( "file %s has been replicated to all targets" % successfulLFN )
        waitingFiles[successfulLFN].Status = "Done"

    return S_OK()

  def _addMetadataToFiles( self, toSchedule ):
    """ Add metadata to those files that need to be scheduled through FTS

        toSchedule is a dictionary:
        {'lfn1': [opFile, validReplicas, validTargets], 'lfn2': [opFile, validReplicas, validTargets]}
    """
    if toSchedule:
      self.log.info( "found %s files to schedule, getting metadata from FC" % len( toSchedule ) )
      lfns = toSchedule.keys()
    else:
      self.log.info( "No files to schedule" )
      return S_OK()

    res = self.fc.getFileMetadata( lfns )
    if not res['OK']:
      return res
    else:
      if res['Value']['Failed']:
        self.log.warn( "Can't schedule %d files: problems getting the metadata: %s" % ( len( res['Value']['Failed'] ),
                                                                                        ', '.join( res['Value']['Failed'] ) ) )
      metadata = res['Value']['Successful']

    filesToScheduleList = []

    for lfnsToSchedule, lfnMetadata in metadata.items():
      opFileToSchedule = toSchedule[lfnsToSchedule][0]
      opFileToSchedule.GUID = lfnMetadata['GUID']
      opFileToSchedule.Checksum = metadata[lfnsToSchedule]['Checksum']
      opFileToSchedule.ChecksumType = metadata[lfnsToSchedule]['ChecksumType']
      opFileToSchedule.Size = metadata[lfnsToSchedule]['Size']

      filesToScheduleList.append( ( opFileToSchedule.toJSON()['Value'],
                                    toSchedule[lfnsToSchedule][1],
                                    toSchedule[lfnsToSchedule][2] ) )

    return S_OK( filesToScheduleList )



  def _filterReplicas( self, opFile ):
    """ filter out banned/invalid source SEs """
    return filterReplicas( opFile, logger = self.log, dataManager = self.dm )

  def ftsTransfer( self ):
    """ replicate and register using FTS """

    self.log.info( "scheduling files in FTS..." )

    bannedTargets = self.checkSEsRSS()
    if not bannedTargets['OK']:
      gMonitor.addMark( "FTSScheduleAtt" )
      gMonitor.addMark( "FTSScheduleFail" )
      return bannedTargets

    if bannedTargets['Value']:
      return S_OK( "%s targets are banned for writing" % ",".join( bannedTargets['Value'] ) )

    # Can continue now
    self.log.verbose( "No targets banned for writing" )

    toSchedule = {}

    for opFile in self.getWaitingFilesList():
      opFile.Error = ''
      gMonitor.addMark( "FTSScheduleAtt" )
      # # check replicas
      replicas = self._filterReplicas( opFile )
      if not replicas["OK"]:
        continue
      replicas = replicas["Value"]

      validReplicas = replicas["Valid"]
      noMetaReplicas = replicas["NoMetadata"]
      noReplicas = replicas['NoReplicas']
      badReplicas = replicas['Bad']
      noPFN = replicas['NoPFN']

      if validReplicas:
        validTargets = list( set( self.operation.targetSEList ) - set( validReplicas ) )
        if not validTargets:
          self.log.info( "file %s is already present at all targets" % opFile.LFN )
          opFile.Status = "Done"
        else:
          toSchedule[opFile.LFN] = [ opFile, validReplicas, validTargets ]
      else:
        gMonitor.addMark( "FTSScheduleFail" )
        if noMetaReplicas:
          self.log.warn( "unable to schedule '%s', couldn't get metadata at %s" % ( opFile.LFN, ','.join( noMetaReplicas ) ) )
          opFile.Error = "Couldn't get metadata"
        elif noReplicas:
          self.log.error( "Unable to schedule transfer",
                          "File %s doesn't exist at %s" % ( opFile.LFN, ','.join( noReplicas ) ) )
          opFile.Error = 'No replicas found'
          opFile.Status = 'Failed'
        elif badReplicas:
          self.log.error( "Unable to schedule transfer",
                          "File %s, all replicas have a bad checksum at %s" % ( opFile.LFN, ','.join( badReplicas ) ) )
          opFile.Error = 'All replicas have a bad checksum'
          opFile.Status = 'Failed'
        elif noPFN:
          self.log.warn( "unable to schedule %s, could not get a PFN at %s" % ( opFile.LFN, ','.join( noPFN ) ) )

    res = self._addMetadataToFiles( toSchedule )
    if not res['OK']:
      return res
    else:
      filesToScheduleList = res['Value']


    if filesToScheduleList:

      ftsSchedule = self.ftsClient.ftsSchedule( self.request.RequestID,
                                                self.operation.OperationID,
                                                filesToScheduleList )
      if not ftsSchedule["OK"]:
        self.log.error( "Completely failed to schedule to FTS:", ftsSchedule["Message"] )
        return ftsSchedule

      # might have nothing to schedule
      ftsSchedule = ftsSchedule["Value"]
      if not ftsSchedule:
        return S_OK()

      self.log.info( "%d files have been scheduled to FTS" % len( ftsSchedule['Successful'] ) )
      for opFile in self.operation:
        fileID = opFile.FileID
        if fileID in ftsSchedule["Successful"]:
          gMonitor.addMark( "FTSScheduleOK", 1 )
          opFile.Status = "Scheduled"
          self.log.debug( "%s has been scheduled for FTS" % opFile.LFN )
        elif fileID in ftsSchedule["Failed"]:
          gMonitor.addMark( "FTSScheduleFail", 1 )
          opFile.Error = ftsSchedule["Failed"][fileID]
          if 'sourceSURL equals to targetSURL' in opFile.Error:
            # In this case there is no need to continue
            opFile.Status = 'Failed'
          self.log.warn( "unable to schedule %s for FTS: %s" % ( opFile.LFN, opFile.Error ) )
    else:
      self.log.info( "No files to schedule after metadata checks" )

    # Just in case some transfers could not be scheduled, try them with RM
    return self.dmTransfer( fromFTS = True )

  def dmTransfer( self, fromFTS = False ):
    """ replicate and register using dataManager  """
    # # get waiting files. If none just return
    # # source SE
    sourceSE = self.operation.SourceSE if self.operation.SourceSE else None
    if sourceSE:
      # # check source se for read
      bannedSource = self.checkSEsRSS( sourceSE, 'ReadAccess' )
      if not bannedSource["OK"]:
        gMonitor.addMark( "ReplicateAndRegisterAtt", len( self.operation ) )
        gMonitor.addMark( "ReplicateFail", len( self.operation ) )
        return bannedSource

      if bannedSource["Value"]:
        self.operation.Error = "SourceSE %s is banned for reading" % sourceSE
        self.log.info( self.operation.Error )
        return S_OK( self.operation.Error )

    # # check targetSEs for write
    bannedTargets = self.checkSEsRSS()
    if not bannedTargets['OK']:
      gMonitor.addMark( "ReplicateAndRegisterAtt", len( self.operation ) )
      gMonitor.addMark( "ReplicateFail", len( self.operation ) )
      return bannedTargets

    if bannedTargets['Value']:
      self.operation.Error = "%s targets are banned for writing" % ",".join( bannedTargets['Value'] )
      return S_OK( self.operation.Error )

    # Can continue now
    self.log.verbose( "No targets banned for writing" )

    waitingFiles = self.getWaitingFilesList()
    if not waitingFiles:
      return S_OK()
    # # loop over files
    if fromFTS:
      self.log.info( "Trying transfer using replica manager as FTS failed" )
    else:
      self.log.info( "Transferring files using Data manager..." )
    for opFile in waitingFiles:

      gMonitor.addMark( "ReplicateAndRegisterAtt", 1 )
      opFile.Error = ''
      lfn = opFile.LFN

      # Check if replica is at the specified source
      replicas = self._filterReplicas( opFile )
      if not replicas["OK"]:
        self.log.error( 'Failed to check replicas', replicas["Message"] )
        continue
      replicas = replicas["Value"]
      validReplicas = replicas["Valid"]
      noMetaReplicas = replicas["NoMetadata"]
      noReplicas = replicas['NoReplicas']
      badReplicas = replicas['Bad']
      noPFN = replicas['NoPFN']

      if not validReplicas:
        gMonitor.addMark( "ReplicateFail" )
        if noMetaReplicas:
          self.log.warn( "unable to replicate '%s', couldn't get metadata at %s" % ( opFile.LFN, ','.join( noMetaReplicas ) ) )
          opFile.Error = "Couldn't get metadata"
        elif noReplicas:
          self.log.error( "Unable to replicate", "File %s doesn't exist at %s" % ( opFile.LFN, ','.join( noReplicas ) ) )
          opFile.Error = 'No replicas found'
          opFile.Status = 'Failed'
        elif badReplicas:
          self.log.error( "Unable to replicate", "%s, all replicas have a bad checksum at %s" % ( opFile.LFN, ','.join( badReplicas ) ) )
          opFile.Error = 'All replicas have a bad checksum'
          opFile.Status = 'Failed'
        elif noPFN:
          self.log.warn( "unable to replicate %s, could not get a PFN" % opFile.LFN )
        continue
      # # get the first one in the list
      if sourceSE not in validReplicas:
        if sourceSE:
          self.log.warn( "%s is not at specified sourceSE %s, changed to %s" % ( lfn, sourceSE, validReplicas[0] ) )
        sourceSE = validReplicas[0]

      # # loop over targetSE
      catalogs = self.operation.Catalog
      if catalogs:
        catalogs = [ cat.strip() for cat in catalogs.split( ',' ) ]

      for targetSE in self.operation.targetSEList:

        # # call DataManager
        if targetSE in validReplicas:
          self.log.warn( "Request to replicate %s to an existing location: %s" % ( lfn, targetSE ) )
          opFile.Status = 'Done'
          continue
        res = self.dm.replicateAndRegister( lfn, targetSE, sourceSE = sourceSE, catalog = catalogs )
        if res["OK"]:

          if lfn in res["Value"]["Successful"]:

            if "replicate" in res["Value"]["Successful"][lfn]:

              repTime = res["Value"]["Successful"][lfn]["replicate"]
              prString = "file %s replicated at %s in %s s." % ( lfn, targetSE, repTime )

              gMonitor.addMark( "ReplicateOK", 1 )

              if "register" in res["Value"]["Successful"][lfn]:

                gMonitor.addMark( "RegisterOK", 1 )
                regTime = res["Value"]["Successful"][lfn]["register"]
                prString += ' and registered in %s s.' % regTime
                self.log.info( prString )
              else:

                gMonitor.addMark( "RegisterFail", 1 )
                prString += " but failed to register"
                self.log.warn( prString )

                opFile.Error = "Failed to register"
                # # add register replica operation
                registerOperation = self.getRegisterOperation( opFile, targetSE, type = 'RegisterReplica' )
                self.request.insertAfter( registerOperation, self.operation )

            else:

              self.log.error( "Failed to replicate", "%s to %s" % ( lfn, targetSE ) )
              gMonitor.addMark( "ReplicateFail", 1 )
              opFile.Error = "Failed to replicate"

          else:

            gMonitor.addMark( "ReplicateFail", 1 )
            reason = res["Value"]["Failed"][lfn]
            self.log.error( "Failed to replicate and register", "File %s at %s:" % ( lfn, targetSE ), reason )
            opFile.Error = reason

        else:

          gMonitor.addMark( "ReplicateFail", 1 )
          opFile.Error = "DataManager error: %s" % res["Message"]
          self.log.error( "DataManager error", res["Message"] )

      if not opFile.Error:
        if len( self.operation.targetSEList ) > 1:
          self.log.info( "file %s has been replicated to all targetSEs" % lfn )
        opFile.Status = "Done"


    return S_OK()
class BigDataJobScheduler( AgentModule ):

  def initialize( self ):
    """ Standard constructor
    """
    import threading

    self.__tmpSandBoxDir = "/tmp/"
    self.jobDataset = ""
    self.am_setOption( "PollingTime", 60.0 )

    self.am_setOption( "ThreadStartDelay", 1 )
    self.am_setOption( "SubmitPools", [] )
    self.am_setOption( "DefaultSubmitPools", [] )

    self.am_setOption( "minThreadsInPool", 0 )
    self.am_setOption( "maxThreadsInPool", 2 )
    self.am_setOption( "totalThreadsInPool", 40 )

    self.directors = {}
    self.pools = {}

    self.directorDict = {}
    self.pendingTaskQueueJobs = {}

    self.callBackLock = threading.Lock()

    return DIRAC.S_OK()

  def execute( self ):
    """Main Agent code:
      1.- Query TaskQueueDB for existing TQs
      2.- Count Pending Jobs
      3.- Submit Jobs
    """
    self.__checkSubmitPools()

    bigDataJobsToSubmit = {}
    bigDataJobIdsToSubmit = {}

    for directorName, directorDict in self.directors.items():
      self.log.verbose( 'Checking Director:', directorName )
      self.log.verbose( 'RunningEndPoints:', directorDict['director'].runningEndPoints )
      for runningEndPointName in directorDict['director'].runningEndPoints:
        runningEndPointDict = directorDict['director'].runningEndPoints[runningEndPointName]
        NameNode = runningEndPointDict['NameNode']
        jobsByEndPoint = 0
        result = BigDataDB.getBigDataJobsByStatusAndEndpoint( 'Submitted', NameNode )
        if result['OK']:
          jobsByEndPoint += len( result['Value'] )
        result = BigDataDB.getBigDataJobsByStatusAndEndpoint( 'Running', NameNode )
        if result['OK']:
          jobsByEndPoint += len( result['Value'] )
        self.log.verbose( 'Checking Jobs By EndPoint %s:' % jobsByEndPoint )
        jobLimitsEndPoint = runningEndPointDict['LimitQueueJobsEndPoint']

        bigDataJobs = 0
        if jobsByEndPoint >= jobLimitsEndPoint:
          self.log.info( '%s >= %s Running jobs reach job limits: %s, skipping' % ( jobsByEndPoint, jobLimitsEndPoint, runningEndPointName ) )
          continue
        else:
          bigDataJobs = jobLimitsEndPoint - jobsByEndPoint
        requirementsDict = runningEndPointDict['Requirements']

        self.log.info( 'Requirements Dict: ', requirementsDict )
        result = taskQueueDB.getMatchingTaskQueues( requirementsDict )
        if not result['OK']:
          self.log.error( 'Could not retrieve TaskQueues from TaskQueueDB', result['Message'] )
          return result

        taskQueueDict = result['Value']
        self.log.info( 'Task Queues Dict: ', taskQueueDict )
        jobs = 0
        priority = 0
        cpu = 0
        jobsID = 0
        self.log.info( 'Pending Jobs from TaskQueue, which not matching before: ', self.pendingTaskQueueJobs )
        for tq in taskQueueDict:
          jobs += taskQueueDict[tq]['Jobs']
          priority += taskQueueDict[tq]['Priority']
          cpu += taskQueueDict[tq]['Jobs'] * taskQueueDict[tq]['CPUTime']

          #Matching of Jobs with BigData Softwares
          #This process is following the sequence:
          #Retrieve a job from taskqueueDict
          #Get job name and try to match with the resources        
          #If not match store the var pendingTaskQueueJobs for the
          #next iteration
          #
          #This matching is doing with the following JobName Pattern
          # NameSoftware _ SoftwareVersion _ HighLanguageName _ HighLanguageVersion _ DataSetName          
          #extract a job from the TaskQueue
          if tq not in self.pendingTaskQueueJobs.keys():
            self.pendingTaskQueueJobs[tq] = {}
          getJobFromTaskQueue = taskQueueDB.matchAndGetJob( taskQueueDict[tq] )
          if not getJobFromTaskQueue['OK']:
            self.log.error( 'Could not get Job and FromTaskQueue', getJobFromTaskQueue['Message'] )
            return getJobFromTaskQueue

          jobInfo = getJobFromTaskQueue['Value']
          jobID = jobInfo['jobId']
          jobAttrInfo = jobDB.getJobAttributes( jobID )

          if not jobAttrInfo['OK']:
            self.log.error( 'Could not get Job Attributes', jobAttrInfo['Message'] )
            return jobAttrInfo
          jobInfoUniq = jobAttrInfo['Value']
          jobName = jobInfoUniq['JobName']
          self.pendingTaskQueueJobs[tq][jobID] = jobName


          result = jobDB.getJobJDL( jobID, True )
          classAdJob = ClassAd( result['Value'] )
          arguments = 0
          if classAdJob.lookupAttribute( 'Arguments' ):
            arguments = classAdJob.getAttributeString( 'Arguments' )
          #if not classAdJob.lookupAttribute( 'Arguments' ):
          #  continue

          jobsToSubmit = self.matchingJobsForBDSubmission( arguments,
                                                       runningEndPointName,
                                                       runningEndPointDict['BigDataSoftware'],
                                                       runningEndPointDict['BigDataSoftwareVersion'],
                                                       runningEndPointDict['HighLevelLanguage']['HLLName'],
                                                       runningEndPointDict['HighLevelLanguage']['HLLVersion'],
                                                       jobID )
          if ( jobsToSubmit == "OK" ):
            if directorName not in bigDataJobsToSubmit:
              bigDataJobsToSubmit[directorName] = {}
            if runningEndPointName not in bigDataJobsToSubmit[directorName]:
              bigDataJobsToSubmit[directorName][runningEndPointName] = {}
            bigDataJobsToSubmit[directorName][runningEndPointName] = { 'JobId': jobID,
                                                        'JobName': jobName,
                                                        'TQPriority': priority,
                                                        'CPUTime': cpu,
                                                        'BigDataEndpoint': runningEndPointName,
                                                        'BigDataEndpointNameNode': runningEndPointDict['NameNode'],
                                                        'BdSoftware': runningEndPointDict['BigDataSoftware'],
                                                        'BdSoftwareVersion': runningEndPointDict['BigDataSoftwareVersion'],
                                                        'HLLName' : runningEndPointDict['HighLevelLanguage']['HLLName'],
                                                        'HLLVersion' : runningEndPointDict['HighLevelLanguage']['HLLVersion'],
                                                        'NumBigDataJobsAllowedToSubmit': bigDataJobs,
                                                        'SiteName': runningEndPointDict['SiteName'],
                                                        'PublicIP': runningEndPointDict['PublicIP'],
                                                        'User': runningEndPointDict['User'],
                                                        'Port': runningEndPointDict['Port'],
                                                        'UsePilot': runningEndPointDict['UsePilot'],
                                                        'IsInteractive': runningEndPointDict['IsInteractive'],
                                                        'Arguments': arguments }
            del self.pendingTaskQueueJobs[tq][jobID]
          else:
            self.log.error( jobsToSubmit )
        self.log.info( 'Pending Jobs from TaskQueue, which not matching after: ', self.pendingTaskQueueJobs )
        for tq in self.pendingTaskQueueJobs.keys():
          for jobid in self.pendingTaskQueueJobs[tq].keys():
            result = jobDB.getJobJDL( jobid, True )
            classAdJob = ClassAd( result['Value'] )
            arguments = 0
            if classAdJob.lookupAttribute( 'Arguments' ):
              arguments = classAdJob.getAttributeString( 'Arguments' )
            #if not classAdJob.lookupAttribute( 'Arguments' ):
            #  continue
            #do the match with the runningEndPoint
            jobsToSubmit = self.matchingJobsForBDSubmission( arguments,
                                                             runningEndPointName,
                                                             runningEndPointDict['BigDataSoftware'],
                                                             runningEndPointDict['BigDataSoftwareVersion'],
                                                             runningEndPointDict['HighLevelLanguage']['HLLName'],
                                                             runningEndPointDict['HighLevelLanguage']['HLLVersion'],
                                                             jobid )
            if ( jobsToSubmit == "OK" ):
              if directorName not in bigDataJobsToSubmit:
                bigDataJobsToSubmit[directorName] = {}
              if runningEndPointName not in bigDataJobsToSubmit[directorName]:
                bigDataJobsToSubmit[directorName][runningEndPointName] = {}
              bigDataJobsToSubmit[directorName][runningEndPointName] = { 'JobId': jobid,
                                                          'JobName': self.pendingTaskQueueJobs[tq][jobid],
                                                          'TQPriority': priority,
                                                          'CPUTime': cpu,
                                                          'BigDataEndpoint': runningEndPointName,
                                                          'BigDataEndpointNameNode': runningEndPointDict['NameNode'],
                                                          'BdSoftware': runningEndPointDict['BigDataSoftware'],
                                                          'BdSoftwareVersion': runningEndPointDict['BigDataSoftwareVersion'],
                                                          'HLLName' : runningEndPointDict['HighLevelLanguage']['HLLName'],
                                                          'HLLVersion' : runningEndPointDict['HighLevelLanguage']['HLLVersion'],
                                                          'NumBigDataJobsAllowedToSubmit': bigDataJobs,
                                                          'SiteName': runningEndPointDict['SiteName'],
                                                          'PublicIP': runningEndPointDict['PublicIP'],
                                                          'User': runningEndPointDict['User'],
                                                          'Port': runningEndPointDict['Port'],
                                                          'UsePilot': runningEndPointDict['UsePilot'],
                                                          'IsInteractive': runningEndPointDict['IsInteractive'],
                                                          'Arguments': arguments  }
              del self.pendingTaskQueueJobs[tq][jobid]
            else:
             self.log.error( jobsToSubmit )
        if not jobs and not self.pendingTaskQueueJobs:
          self.log.info( 'No matching jobs for %s found, skipping' % NameNode )
          continue

        self.log.info( '___BigDataJobsTo Submit:', bigDataJobsToSubmit )

    for directorName, JobsToSubmitDict in bigDataJobsToSubmit.items():
      for runningEndPointName, jobsToSubmitDict in JobsToSubmitDict.items():
        if self.directors[directorName]['isEnabled']:
          self.log.info( 'Requesting submission to %s of %s' % ( runningEndPointName, directorName ) )

          director = self.directors[directorName]['director']
          pool = self.pools[self.directors[directorName]['pool']]

          jobIDs = JobsToSubmitDict[runningEndPointName]['JobId']
          jobName = JobsToSubmitDict[runningEndPointName]['JobName']
          endpoint = JobsToSubmitDict[runningEndPointName]['BigDataEndpoint']
          runningSiteName = JobsToSubmitDict[runningEndPointName]['SiteName']
          NameNode = JobsToSubmitDict[runningEndPointName]['BigDataEndpointNameNode']
          BigDataSoftware = JobsToSubmitDict[runningEndPointName]['BdSoftware']
          BigDataSoftwareVersion = JobsToSubmitDict[runningEndPointName]['BdSoftwareVersion']
          HLLName = JobsToSubmitDict[runningEndPointName]['HLLName']
          HLLVersion = JobsToSubmitDict[runningEndPointName]['HLLVersion']
          PublicIP = JobsToSubmitDict[runningEndPointName]['PublicIP']
          User = JobsToSubmitDict[runningEndPointName]['User']
          Port = JobsToSubmitDict[runningEndPointName]['Port']
          UsePilot = JobsToSubmitDict[runningEndPointName]['UsePilot']
          IsInteractive = JobsToSubmitDict[runningEndPointName]['IsInteractive']
          Arguments = JobsToSubmitDict[runningEndPointName]['Arguments']
          numBigDataJobsAllowed = JobsToSubmitDict[runningEndPointName]['NumBigDataJobsAllowedToSubmit']

          ret = pool.generateJobAndQueueIt( director.submitBigDataJobs,
                                            args = ( endpoint, numBigDataJobsAllowed, runningSiteName, NameNode,
                                                     BigDataSoftware, BigDataSoftwareVersion, HLLName, HLLVersion,
                                                     PublicIP, Port, jobIDs, runningEndPointName, jobName, User, self.jobDataset, UsePilot, IsInteractive ),
                                            oCallback = self.callBack,
                                            oExceptionCallback = director.exceptionCallBack,
                                            blocking = False )
          if not ret['OK']:
            # Disable submission until next iteration
            self.directors[directorName]['isEnabled'] = False
          else:
            time.sleep( self.am_getOption( 'ThreadStartDelay' ) )

    if 'Default' in self.pools:
      # only for those in "Default' thread Pool
      # for pool in self.pools:
      self.pools['Default'].processAllResults()

    return DIRAC.S_OK()

  def matchingJobsForBDSubmission( self, arguments, bigdataendpoint, BigDataSoftware,
                                   BigDataSoftwareVersion, HLLName, HLLVersion, jobid ):
    """
     Jobs matching, first with the dataset and the SITE, find in the Database the matching with the Dataset key
     As the second step the endpoind is matched with the resulting SITES and in the case of 
     was matching, in the third step the job will be matched with the bigdatasoft of the SITE.
    """
    self.jobDataset = ""
    returned = jobDB.getInputData( jobid )
    if not returned['OK']:
      self.log.error( "There is not Input Data stored in the Job" )
      return "Error"

    if returned['Value'] != []:
      self.jobDataset = returned['Value'][0]

    if arguments == 0 and self.jobDataset == "":
      self.log.error( "Error reading the job arguments for BigData Submission:", arguments )
      return "Error"
    if arguments == 0 and self.jobDataset != "":
      self.fileCatalogue = FileCatalog()
      result = self.fileCatalogue.getReplicas( self.jobDataset )
      if not result['OK'] or result['Value']['Successful'] == {}:
        return S_ERROR( result )
      return_exit = False
      for SiteName in result['Value']['Successful'][self.jobDataset]:
        if bigdataendpoint in SiteName:
            return( "OK" )
        else:
            return_exit = True
      if return_exit:
        return "Dataset match with SiteName but Site doesn't have the software"

    self.log.info( "BigDataEndpoint", bigdataendpoint )
    self.log.info( "BigDataSoftware", BigDataSoftware )
    self.log.info( "BigDataSoftwareVersion", BigDataSoftwareVersion )
    self.log.info( "HLLName", HLLName )
    self.log.info( "HLLVersion", HLLVersion )

    jobNameSplitted = re.split( ' ', arguments )

    jobBigDataSoft = jobNameSplitted[0]
    if jobBigDataSoft not in BigDataDB.validSoftware:
      self.log.error( "Argument %s for valid B.D. software is not in the list of accepted:" % ( jobBigDataSoft ), BigDataDB.validSoftware )
      return "Error"

    jobBigDataVersion = jobNameSplitted[1]
    if jobBigDataVersion not in BigDataDB.validSoftwareVersion:
      self.log.error( "Argument %s for valid B.D. software version is not in the list of accepted:" % ( jobBigDataVersion ), BigDataDB.validSoftwareVersion )
      return "Error"

    jobHHLSoft = jobNameSplitted[2]
    if jobHHLSoft not in BigDataDB.validHighLevelLang:
      self.log.error( "Argument %s for valid B.D. H.L. software is not in the list of accepted:" % ( jobHHLSoft ), BigDataDB.validHighLevelLang )
      return "Error"

    jobHHLVersion = jobNameSplitted[3]
    #if jobHHLVersion not in BigDataDB.validHighLevelLangVersion:
    #  self.log.error( "Argument %s for valid B.D. H.L. software version is not in the list of accepted:" % ( jobHHLVersion ), BigDataDB.validHighLevelLangVersion )
    #  return "Error"

    #Old-one
    #JobSiteNames = BigDataDB.getSiteNameByDataSet( self.jobDataset );
    self.fileCatalogue = FileCatalog()
    result = self.fileCatalogue.getReplicas( self.jobDataset )
    if not result['OK'] or result['Value']['Successful'] == {}:
      return S_ERROR( result )
    for SiteName in result['Value']['Successful'][self.jobDataset]:
      if bigdataendpoint in SiteName:
        if ( jobBigDataSoft == BigDataSoftware ) and ( jobBigDataVersion == BigDataSoftwareVersion ) and ( HLLName == jobHHLSoft ) and ( HLLVersion == jobHHLVersion ):
          return( "OK" )
        else:
          return "Dataset match with SiteName but Site doesn't have the software"

    return "Dataset does not match with any Site"

  def submitPilotsForTaskQueue( self, taskQueueDict, waitingPilots ):

    taskQueueID = taskQueueDict['TaskQueueID']
    maxCPU = maxCPUSegments[-1]
    extraPilotFraction = self.am_getOption( 'extraPilotFraction' )
    extraPilots = self.am_getOption( 'extraPilots' )

    taskQueuePriority = taskQueueDict['Priority']
    self.log.verbose( 'Priority for TaskQueue %s:' % taskQueueID, taskQueuePriority )
    taskQueueCPU = max( taskQueueDict['CPUTime'], self.am_getOption( 'lowestCPUBoost' ) )
    self.log.verbose( 'CPUTime  for TaskQueue %s:' % taskQueueID, taskQueueCPU )
    taskQueueJobs = taskQueueDict['Jobs']
    self.log.verbose( 'Jobs in TaskQueue %s:' % taskQueueID, taskQueueJobs )

    # Determine number of pilots to submit, boosting TaskQueues with low CPU requirements
    #pilotsToSubmit = poisson( ( self.pilotsPerPriority * taskQueuePriority +
    #                            self.pilotsPerJob * taskQueueJobs ) * maxCPU / taskQueueCPU )
    pilotsToSubmit = poisson( ( taskQueuePriority +
                                taskQueueJobs ) * maxCPU / taskQueueCPU )
    # limit the number of pilots according to the number of waiting job in the TaskQueue
    # and the number of already submitted pilots for that TaskQueue
    pilotsToSubmit = min( pilotsToSubmit, int( ( 1 + extraPilotFraction ) * taskQueueJobs ) + extraPilots - waitingPilots )
    if pilotsToSubmit <= 0:
      return DIRAC.S_OK( 0 )
    self.log.verbose( 'Submitting %s pilots for TaskQueue %s' % ( pilotsToSubmit, taskQueueID ) )

    return self.__submitPilots( taskQueueDict, pilotsToSubmit )

  def __submitPilots( self, taskQueueDict, pilotsToSubmit ):
    """
      Try to insert the submission in the corresponding Thread Pool, disable the Thread Pool
      until next iteration once it becomes full
    """
    # Check if an specific MiddleWare is required
    if 'SubmitPools' in taskQueueDict:
      submitPools = taskQueueDict[ 'SubmitPools' ]
    else:
      submitPools = self.am_getOption( 'DefaultSubmitPools' )
    submitPools = DIRAC.List.randomize( submitPools )

    for submitPool in submitPools:
      self.log.verbose( 'Trying SubmitPool:', submitPool )

      if not submitPool in self.directors or not self.directors[submitPool]['isEnabled']:
        self.log.verbose( 'Not Enabled' )
        continue

      pool = self.pools[self.directors[submitPool]['pool']]
      director = self.directors[submitPool]['director']
      ret = pool.generateJobAndQueueIt( director.submitPilots,
                                        args = ( taskQueueDict, pilotsToSubmit, self.workDir ),
                                        oCallback = self.callBack,
                                        oExceptionCallback = director.exceptionCallBack,
                                        blocking = False )
      if not ret['OK']:
        # Disable submission until next iteration
        self.directors[submitPool]['isEnabled'] = False
      else:
        time.sleep( self.am_getOption( 'ThreadStartDelay' ) )
        break

    return DIRAC.S_OK( pilotsToSubmit )

  def __checkSubmitPools( self ):
    # this method is called at initialization and at the beginning of each execution cycle
    # in this way running parameters can be dynamically changed via the remote
    # configuration.

    # First update common Configuration for all Directors
    self.__configureDirector()

    # Now we need to initialize one thread for each Director in the List,
    # and check its configuration:
    for submitPool in self.am_getOption( 'SubmitPools' ):
      # check if the Director is initialized, then reconfigure
      if submitPool not in self.directors:
        # instantiate a new Director
        self.__createDirector( submitPool )

      self.__configureDirector( submitPool )

      # Now enable the director for this iteration, if some RB/WMS/CE is defined
      if submitPool in self.directors:
        if 'resourceBrokers' in dir( self.directors[submitPool]['director'] ) and self.directors[submitPool]['director'].resourceBrokers:
          self.directors[submitPool]['isEnabled'] = True
        if 'computingElements' in dir( self.directors[submitPool]['director'] ) and self.directors[submitPool]['director'].computingElements:
          self.directors[submitPool]['isEnabled'] = True

    # Now remove directors that are not Enable (they have been used but are no
    # longer required in the CS).
    pools = []
    for submitPool in self.directors.keys():
      if not self.directors[submitPool]['isEnabled']:
        self.log.info( 'Deleting Director for SubmitPool:', submitPool )
        director = self.directors[submitPool]['director']
        del self.directors[submitPool]
        del director
      else:
        pools.append( self.directors[submitPool]['pool'] )

    # Finally delete ThreadPools that are no longer in use
    for pool in self.pools:
      if pool != 'Default' and not pool in pools:
        pool = self.pools.pop( pool )
        # del self.pools[pool]
        del pool

  def __createDirector( self, submitPool ):
    """
     Instantiate a new VMDirector for the given SubmitPool
    """

    self.log.info( 'Creating Director for SubmitPool:', submitPool )
    # 1. get the BigDataDirector

    director = BigDataDirector( submitPool )
    directorName = '%sDirector' % submitPool

    self.log.info( 'Director Object instantiated:', directorName )

    # 2. check the requested ThreadPool (if not defined use the default one)
    directorPool = self.am_getOption( submitPool + '/Pool', 'Default' )
    if not directorPool in self.pools:
      self.log.info( 'Adding Thread Pool:', directorPool )
      poolName = self.__addPool( directorPool )
      if not poolName:
        self.log.error( 'Can not create Thread Pool:', directorPool )
        return

    # 3. add New director
    self.directors[ submitPool ] = { 'director': director,
                                     'pool': directorPool,
                                     'isEnabled': False,
                                   }

    self.log.verbose( 'Created Director for SubmitPool', submitPool )

    return

  def __configureDirector( self, submitPool = None ):
    # Update Configuration from CS
    # if submitPool == None then,
    #     disable all Directors
    # else
    #    Update Configuration for the BigDataDirector of that SubmitPool
    if submitPool == None:
      self.workDir = self.am_getOption( 'WorkDirectory' )
      # By default disable all directors
      for director in self.directors:
        self.directors[director]['isEnabled'] = False

    else:
      if submitPool not in self.directors:
        DIRAC.abort( -1, "Submit Pool not available", submitPool )
      director = self.directors[submitPool]['director']
      # Pass reference to our CS section so that defaults can be taken from there
      director.configure( self.am_getModuleParam( 'section' ), submitPool )

      # Enable director for jot submission
      self.directors[submitPool]['isEnabled'] = True

  def __addPool( self, poolName ):
    # create a new thread Pool, by default it has 2 executing threads and 40 requests
    # in the Queue

    if not poolName:
      return None
    if poolName in self.pools:
      return None
    pool = ThreadPool( self.am_getOption( 'minThreadsInPool' ),
                       self.am_getOption( 'maxThreadsInPool' ),
                       self.am_getOption( 'totalThreadsInPool' ) )
    # Daemonize except "Default" pool
    if poolName != 'Default':
      pool.daemonize()
    self.pools[poolName] = pool
    return poolName

  def callBack( self, threadedJob, submitResult ):
    if not submitResult['OK']:
      self.log.error( 'submitJobBigData Failed: ', submitResult['Message'] )
      if 'Value' in submitResult:
        self.callBackLock.acquire()
        self.callBackLock.release()
    else:
      self.log.info( 'New Job BigData Submitted' )
      self.callBackLock.acquire()
      self.callBackLock.release()