Beispiel #1
0
def printRequest( request, status = None, full = False, verbose = True, terse = False ):
  from DIRAC.DataManagementSystem.Client.FTSClient                                  import FTSClient
  global output
  ftsClient = FTSClient()
  if full:
    output = ''
    prettyPrint( request.toJSON()['Value'] )
    gLogger.always( output )
  else:
    if not status:
      status = request.Status
    gLogger.always( "Request name='%s' ID=%s Status='%s'%s%s%s" % ( request.RequestName,
                                                                     request.RequestID,
                                                                     request.Status, " ('%s' in DB)" % status if status != request.Status else '',
                                                                     ( " Error='%s'" % request.Error ) if request.Error and request.Error.strip() else "" ,
                                                                     ( " Job=%s" % request.JobID ) if request.JobID else "" ) )
    gLogger.always( "Created %s, Updated %s" % ( request.CreationTime, request.LastUpdate ) )
    if request.OwnerDN:
      gLogger.always( "Owner: '%s', Group: %s" % ( request.OwnerDN, request.OwnerGroup ) )
    for indexOperation in enumerate( request ):
      op = indexOperation[1]
      if not terse or op.Status == 'Failed':
        printOperation( indexOperation, verbose, onlyFailed = terse )
  # Check if FTS job exists
  res = ftsClient.getFTSJobsForRequest( request.RequestID )
  if res['OK']:
    ftsJobs = res['Value']
    if ftsJobs:
      gLogger.always( '         FTS jobs associated: %s' % ','.join( ['%s (%s)' % ( job.FTSGUID, job.Status ) \
                                                               for job in ftsJobs] ) )
Beispiel #2
0
  def __init__( self, operation = None, csPath = None ):
    """c'tor

    :param self: self reference
    :param Operation operation: Operation instance
    :param str csPath: CS path for this handler
    """
    super( ReplicateAndRegister, self ).__init__( operation, csPath )
    # # own gMonitor stuff for files
    gMonitor.registerActivity( "ReplicateAndRegisterAtt", "Replicate and register attempted",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "ReplicateOK", "Replications successful",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "ReplicateFail", "Replications failed",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "RegisterOK", "Registrations successful",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "RegisterFail", "Registrations failed",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    # # for FTS
    gMonitor.registerActivity( "FTSScheduleAtt", "Files schedule attempted",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "FTSScheduleOK", "File schedule successful",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "FTSScheduleFail", "File schedule failed",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    # # SE cache

    # Clients
    self.fc = FileCatalog()
    if hasattr( self, "FTSMode" ) and getattr( self, "FTSMode" ):
      from DIRAC.DataManagementSystem.Client.FTSClient import FTSClient
      self.ftsClient = FTSClient()
Beispiel #3
0
def printFTSJobs(request):
    """ Prints the FTSJobs associated to a request

      :param request: Request object
  """

    try:
        if request.RequestID:

            # We try first the new FTS3 system

            from DIRAC.DataManagementSystem.Client.FTS3Client import FTS3Client
            fts3Client = FTS3Client()
            res = fts3Client.ping()

            if res['OK']:
                associatedFTS3Jobs = []
                for op in request:
                    res = fts3Client.getOperationsFromRMSOpID(op.OperationID)
                    if res['OK']:
                        for fts3Op in res['Value']:
                            associatedFTS3Jobs.extend(fts3Op.ftsJobs)
                if associatedFTS3Jobs:
                    gLogger.always(
                        '\n\nFTS3 jobs associated: \n%s' %
                        '\n'.join('%s@%s (%s)' %
                                  (job.ftsGUID, job.ftsServer, job.status)
                                  for job in associatedFTS3Jobs))
                return

            # If we are here, the attempt with the new FTS3 system did not work, let's try the old FTS system
            gLogger.debug("Could not instantiate FTS3Client", res)
            from DIRAC.DataManagementSystem.Client.FTSClient import FTSClient
            ftsClient = FTSClient()
            res = ftsClient.ping()
            if not res['OK']:
                gLogger.debug("Could not instantiate FtsClient", res)
                return

            res = ftsClient.getFTSJobsForRequest(request.RequestID)
            if res['OK']:
                ftsJobs = res['Value']
                if ftsJobs:
                    gLogger.always('         FTS jobs associated: %s' %
                                   ','.join('%s (%s)' %
                                            (job.FTSGUID, job.Status)
                                            for job in ftsJobs))

    # ImportError can be thrown for the old client
    # AttributeError can be thrown because the deserialization will not have
    # happened correctly on the new fts3 (CC7 typically), and the error is not
    # properly propagated
    except (ImportError, AttributeError) as err:
        gLogger.debug("Could not instantiate FtsClient because of Exception",
                      repr(err))
Beispiel #4
0
def printFTSJobs(request):
  """ Prints the FTSJobs associated to a request

      :param request: Request object
  """

  try:
    if request.RequestID:

      # We try first the new FTS3 system

      from DIRAC.DataManagementSystem.Client.FTS3Client import FTS3Client
      fts3Client = FTS3Client()
      res = fts3Client.ping()

      if res['OK']:
        associatedFTS3Jobs = []
        for op in request:
          res = fts3Client.getOperationsFromRMSOpID(op.OperationID)
          if res['OK']:
            for fts3Op in res['Value']:
              associatedFTS3Jobs.extend(fts3Op.ftsJobs)
        if associatedFTS3Jobs:
          gLogger.always(
              '\n\nFTS3 jobs associated: \n%s' %
              '\n'.join(
                  '%s@%s (%s)' %
                  (job.ftsGUID,
                   job.ftsServer,
                   job.status) for job in associatedFTS3Jobs))
        return

      # If we are here, the attempt with the new FTS3 system did not work, let's try the old FTS system
      gLogger.debug("Could not instantiate FTS3Client", res)
      from DIRAC.DataManagementSystem.Client.FTSClient import FTSClient
      ftsClient = FTSClient()
      res = ftsClient.ping()
      if not res['OK']:
        gLogger.debug("Could not instantiate FtsClient", res)
        return

      res = ftsClient.getFTSJobsForRequest(request.RequestID)
      if res['OK']:
        ftsJobs = res['Value']
        if ftsJobs:
          gLogger.always('         FTS jobs associated: %s' % ','.join('%s (%s)' % (job.FTSGUID, job.Status)
                                                                       for job in ftsJobs))

  # ImportError can be thrown for the old client
  # AttributeError can be thrown because the deserialization will not have
  # happened correctly on the new fts3 (CC7 typically), and the error is not
  # properly propagated
  except (ImportError, AttributeError) as err:
    gLogger.debug("Could not instantiate FtsClient because of Exception", repr(err))
Beispiel #5
0
def printRequest(request, status=None, full=False, verbose=True, terse=False):
    global output

    ftsClient = None
    try:
        if request.RequestID:
            from DIRAC.DataManagementSystem.Client.FTSClient import FTSClient
            ftsClient = FTSClient()
            res = ftsClient.ping()
            if not res['OK']:
                gLogger.debug("Could not instantiate FtsClient", res)
                ftsClient = None
    except ImportError as err:
        gLogger.debug("Could not instantiate FtsClient because of Exception",
                      repr(err))

    if full:
        output = ''
        prettyPrint(json.loads(request.toJSON()['Value']))
        gLogger.always(output)
    else:
        if not status:
            status = request.Status
        gLogger.always(
            "Request name='%s' ID=%s Status='%s'%s%s%s" %
            (request.RequestName, request.RequestID if hasattr(
                request, 'RequestID') else '(not set yet)', request.Status,
             " ('%s' in DB)" % status if status != request.Status else '',
             (" Error='%s'" % request.Error)
             if request.Error and request.Error.strip() else "",
             (" Job=%s" % request.JobID) if request.JobID else ""))
        gLogger.always("Created %s, Updated %s%s" %
                       (request.CreationTime, request.LastUpdate,
                        (", NotBefore %s" %
                         request.NotBefore) if request.NotBefore else ""))
        if request.OwnerDN:
            gLogger.always("Owner: '%s', Group: %s" %
                           (request.OwnerDN, request.OwnerGroup))
        for indexOperation in enumerate(request):
            op = indexOperation[1]
            if not terse or op.Status == 'Failed':
                printOperation(indexOperation, verbose, onlyFailed=terse)

    if ftsClient:
        # Check if FTS job exists
        res = ftsClient.getFTSJobsForRequest(request.RequestID)
        if res['OK']:
            ftsJobs = res['Value']
            if ftsJobs:
                gLogger.always('         FTS jobs associated: %s' %
                               ','.join('%s (%s)' % (job.FTSGUID, job.Status)
                                        for job in ftsJobs))
Beispiel #6
0
def printFTSJobs(request):
  """ Prints the FTSJobs associated to a request

      :param request: Request object
  """

  try:
    if request.RequestID:

      # We try first the new FTS3 system

      from DIRAC.DataManagementSystem.Client.FTS3Client import FTS3Client
      fts3Client = FTS3Client()
      res = fts3Client.ping()

      if res['OK']:
        associatedFTS3Jobs = []
        for op in request:
          res = fts3Client.getOperationsFromRMSOpID(op.OperationID)
          if res['OK']:
            for fts3Op in res['Value']:
              associatedFTS3Jobs.extend(fts3Op.ftsJobs)
        if associatedFTS3Jobs:
          gLogger.always(
              '\n\nFTS3 jobs associated: \n%s' %
              '\n'.join(
                  '%s@%s (%s)' %
                  (job.ftsGUID,
                   job.ftsServer,
                   job.status) for job in associatedFTS3Jobs))
        return

      # If we are here, the attempt with the new FTS3 system did not work, let's try the old FTS system
      gLogger.debug("Could not instantiate FTS3Client", res)
      from DIRAC.DataManagementSystem.Client.FTSClient import FTSClient
      ftsClient = FTSClient()
      res = ftsClient.ping()
      if not res['OK']:
        gLogger.debug("Could not instantiate FtsClient", res)
        return

      res = ftsClient.getFTSJobsForRequest(request.RequestID)
      if res['OK']:
        ftsJobs = res['Value']
        if ftsJobs:
          gLogger.always('         FTS jobs associated: %s' % ','.join('%s (%s)' % (job.FTSGUID, job.Status)
                                                                       for job in ftsJobs))

  except ImportError as err:
    gLogger.debug("Could not instantiate FtsClient because of Exception", repr(err))
  def __init__( self, operation = None, csPath = None ):
    """c'tor

    :param self: self reference
    :param Operation operation: Operation instance
    :param str csPath: CS path for this handler
    """
    super( ReplicateAndRegister, self ).__init__( operation, csPath )
    # # own gMonitor stuff for files
    gMonitor.registerActivity( "ReplicateAndRegisterAtt", "Replicate and register attempted",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "ReplicateOK", "Replications successful",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "ReplicateFail", "Replications failed",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "RegisterOK", "Registrations successful",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "RegisterFail", "Registrations failed",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    # # for FTS
    gMonitor.registerActivity( "FTSScheduleAtt", "Files schedule attempted",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "FTSScheduleOK", "File schedule successful",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "FTSScheduleFail", "File schedule failed",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    # # SE cache
    self.seCache = {}

    # Clients
    self.fc = FileCatalog()
    self.ftsClient = FTSClient()
Beispiel #8
0
def printRequest(request, status=None, full=False, verbose=True, terse=False):
    global output

    ftsClient = None
    try:
        from DIRAC.DataManagementSystem.Client.FTSClient import FTSClient
        ftsClient = FTSClient()
    except Exception, e:
        gLogger.debug("Could not instantiate FtsClient", e)
Beispiel #9
0
    def __init__(self, operation=None, csPath=None):
        """c'tor

    :param self: self reference
    :param Operation operation: Operation instance
    :param str csPath: CS path for this handler
    """
        super(ReplicateAndRegister, self).__init__(operation, csPath)
        # # own gMonitor stuff for files
        gMonitor.registerActivity("ReplicateAndRegisterAtt",
                                  "Replicate and register attempted",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("ReplicateOK", "Replications successful",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("ReplicateFail", "Replications failed",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("RegisterOK", "Registrations successful",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("RegisterFail", "Registrations failed",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        # # for FTS
        gMonitor.registerActivity("FTSScheduleAtt", "Files schedule attempted",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("FTSScheduleOK", "File schedule successful",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("FTSScheduleFail", "File schedule failed",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        # # SE cache
        self.seCache = {}

        # Clients
        self.rm = ReplicaManager()
        self.ftsClient = FTSClient()
Beispiel #10
0
 def ftsClient(cls):
     """ FTS client """
     if not cls.__ftsClient:
         cls.__ftsClient = FTSClient()
     return cls.__ftsClient
Beispiel #11
0
 def ftsClient(self):
     """ FTSClient getter """
     if not self.__ftsClient:
         self.__ftsClient = FTSClient()
     return self.__ftsClient
Beispiel #12
0
    def ftsTransfer(self):
        """ replicate and register using FTS """

        self.log.info("scheduling files in FTS...")

        bannedTargets = self.checkSEsRSS()
        if not bannedTargets['OK']:
            gMonitor.addMark("FTSScheduleAtt")
            gMonitor.addMark("FTSScheduleFail")
            return bannedTargets

        if bannedTargets['Value']:
            return S_OK("%s targets are banned for writing" %
                        ",".join(bannedTargets['Value']))

        # Can continue now
        self.log.verbose("No targets banned for writing")

        toSchedule = {}

        delayExecution = 0
        errors = defaultdict(int)
        for opFile in self.getWaitingFilesList():
            opFile.Error = ''
            gMonitor.addMark("FTSScheduleAtt")
            # # check replicas
            replicas = self._filterReplicas(opFile)
            if not replicas["OK"]:
                continue
            replicas = replicas["Value"]

            validReplicas = replicas.get("Valid")
            noMetaReplicas = replicas.get("NoMetadata")
            noReplicas = replicas.get('NoReplicas')
            badReplicas = replicas.get('Bad')
            noActiveReplicas = replicas.get('NoActiveReplicas')

            if validReplicas:
                validTargets = list(
                    set(self.operation.targetSEList) - set(validReplicas))
                if not validTargets:
                    self.log.info("file %s is already present at all targets" %
                                  opFile.LFN)
                    opFile.Status = "Done"
                else:
                    toSchedule[opFile.LFN] = [
                        opFile, validReplicas, validTargets
                    ]
            else:
                gMonitor.addMark("FTSScheduleFail")
                if noMetaReplicas:
                    err = "Couldn't get metadata"
                    errors[err] += 1
                    self.log.verbose(
                        "unable to schedule '%s', %s at %s" %
                        (opFile.LFN, err, ','.join(noMetaReplicas)))
                    opFile.Error = err
                elif noReplicas:
                    err = "File doesn't exist"
                    errors[err] += 1
                    self.log.error(
                        "Unable to schedule transfer", "%s %s at %s" %
                        (opFile.LFN, err, ','.join(noReplicas)))
                    opFile.Error = err
                    opFile.Status = 'Failed'
                elif badReplicas:
                    err = "All replicas have a bad checksum"
                    errors[err] += 1
                    self.log.error(
                        "Unable to schedule transfer", "%s, %s at %s" %
                        (opFile.LFN, err, ','.join(badReplicas)))
                    opFile.Error = err
                    opFile.Status = 'Failed'
                elif noActiveReplicas:
                    err = "No active replica found"
                    errors[err] += 1
                    self.log.verbose(
                        "Unable to schedule transfer", "%s, %s at %s" %
                        (opFile.LFN, err, ','.join(noActiveReplicas)))
                    opFile.Error = err
                    # All source SEs are banned, delay execution by 1 hour
                    delayExecution = 60

        if delayExecution:
            self.log.info("Delay execution of the request by %d minutes" %
                          delayExecution)
            self.request.delayNextExecution(delayExecution)
        # Log error counts
        for error, count in errors.iteritems():
            self.log.error(error, 'for %d files' % count)

        filesToScheduleList = []
        res = self._addMetadataToFiles(toSchedule)
        if not res['OK']:
            return res
        else:
            filesToSchedule = res['Value']

            for lfn in filesToSchedule:
                filesToScheduleList.append(
                    (filesToSchedule[lfn][0].toJSON()['Value'],
                     toSchedule[lfn][1], toSchedule[lfn][2]))

        if filesToScheduleList:

            ftsSchedule = FTSClient().ftsSchedule(self.request.RequestID,
                                                  self.operation.OperationID,
                                                  filesToScheduleList)
            if not ftsSchedule["OK"]:
                self.log.error("Completely failed to schedule to FTS:",
                               ftsSchedule["Message"])
                return ftsSchedule

            # might have nothing to schedule
            ftsSchedule = ftsSchedule["Value"]
            if not ftsSchedule:
                return S_OK()

            self.log.info("%d files have been scheduled to FTS" %
                          len(ftsSchedule['Successful']))
            for opFile in self.operation:
                fileID = opFile.FileID
                if fileID in ftsSchedule["Successful"]:
                    gMonitor.addMark("FTSScheduleOK", 1)
                    opFile.Status = "Scheduled"
                    self.log.debug("%s has been scheduled for FTS" %
                                   opFile.LFN)
                elif fileID in ftsSchedule["Failed"]:
                    gMonitor.addMark("FTSScheduleFail", 1)
                    opFile.Error = ftsSchedule["Failed"][fileID]
                    if 'sourceSURL equals to targetSURL' in opFile.Error:
                        # In this case there is no need to continue
                        opFile.Status = 'Failed'
                    self.log.warn("unable to schedule %s for FTS: %s" %
                                  (opFile.LFN, opFile.Error))
        else:
            self.log.info("No files to schedule after metadata checks")

        # Just in case some transfers could not be scheduled, try them with RM
        return self.dmTransfer(fromFTS=True)
if __name__ == "__main__":

  args = Script.getPositionalArgs()
  if len( args ) != 1:
    Script.showHelp()
  try:
    requestID = long( args[0] )
  except ValueError:
    DIRAC.gLogger.error( "requestID should be an integer" )
    DIRAC.exit( -1 )


  from DIRAC.DataManagementSystem.Client.FTSClient import FTSClient
  from DIRAC.DataManagementSystem.Client.FTSJob import FTSJob

  ftsClient = FTSClient()

  ftsJobs = ftsClient.getFTSJobsForRequest( requestID, list( FTSJob.INITSTATES + FTSJob.TRANSSTATES + FTSJob.FINALSTATES ) )
  if not ftsJobs["OK"]:
    DIRAC.gLogger.error( ftsJobs["Message"] )
    DIRAC.exit( -1 )
  ftsJobs = ftsJobs["Value"]

  if not ftsJobs:
    DIRAC.gLogger.always( "No FTSJobs found for requestID %s" % requestID )
    DIRAC.exit( 0 )

  DIRAC.gLogger.always( "Found %s FTSJobs for requestID %s" % ( len( ftsJobs ), requestID ) )

  jobKeys = ( "SourceSE", "TargetSE", "Status", "Files", "Size", "Completeness", "CreationTime", "SubmitTime", "LastUpdate", "Error" )
  fileKeys = ( "SourceSURL", "TargetSURL", "Attempt", "Status", "Error" )
Beispiel #14
0
class ReplicateAndRegister( DMSRequestOperationsBase ):
  """
  .. class:: ReplicateAndRegister

  ReplicateAndRegister operation handler
  """

  def __init__( self, operation = None, csPath = None ):
    """c'tor

    :param self: self reference
    :param Operation operation: Operation instance
    :param str csPath: CS path for this handler
    """
    super( ReplicateAndRegister, self ).__init__( operation, csPath )
    # # own gMonitor stuff for files
    gMonitor.registerActivity( "ReplicateAndRegisterAtt", "Replicate and register attempted",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "ReplicateOK", "Replications successful",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "ReplicateFail", "Replications failed",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "RegisterOK", "Registrations successful",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "RegisterFail", "Registrations failed",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    # # for FTS
    gMonitor.registerActivity( "FTSScheduleAtt", "Files schedule attempted",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "FTSScheduleOK", "File schedule successful",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "FTSScheduleFail", "File schedule failed",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    # # SE cache

    # Clients
    self.fc = FileCatalog()
    if hasattr( self, "FTSMode" ) and getattr( self, "FTSMode" ):
      from DIRAC.DataManagementSystem.Client.FTSClient import FTSClient
      self.ftsClient = FTSClient()

  def __call__( self ):
    """ call me maybe """
    # # check replicas first
    checkReplicas = self.__checkReplicas()
    if not checkReplicas["OK"]:
      self.log.error( 'Failed to check replicas', checkReplicas["Message"] )
    if hasattr( self, "FTSMode" ) and getattr( self, "FTSMode" ):
      bannedGroups = getattr( self, "FTSBannedGroups" ) if hasattr( self, "FTSBannedGroups" ) else ()
      if self.request.OwnerGroup in bannedGroups:
        self.log.verbose( "usage of FTS system is banned for request's owner" )
        return self.dmTransfer()
      return self.ftsTransfer()
    return self.dmTransfer()

  def __checkReplicas( self ):
    """ check done replicas and update file states  """
    waitingFiles = dict( [ ( opFile.LFN, opFile ) for opFile in self.operation
                          if opFile.Status in ( "Waiting", "Scheduled" ) ] )
    targetSESet = set( self.operation.targetSEList )

    replicas = self.fc.getReplicas( waitingFiles.keys() )
    if not replicas["OK"]:
      self.log.error( 'Failed to get replicas', replicas["Message"] )
      return replicas

    reMissing = re.compile( r".*such file.*" )
    for failedLFN, errStr in replicas["Value"]["Failed"].items():
      waitingFiles[failedLFN].Error = errStr
      if reMissing.search( errStr.lower() ):
        self.log.error( "File does not exists", failedLFN )
        gMonitor.addMark( "ReplicateFail", len( targetSESet ) )
        waitingFiles[failedLFN].Status = "Failed"

    for successfulLFN, reps in replicas["Value"]["Successful"].items():
      if targetSESet.issubset( set( reps ) ):
        self.log.info( "file %s has been replicated to all targets" % successfulLFN )
        waitingFiles[successfulLFN].Status = "Done"

    return S_OK()

  def _addMetadataToFiles( self, toSchedule ):
    """ Add metadata to those files that need to be scheduled through FTS

        toSchedule is a dictionary:
        {'lfn1': [opFile, validReplicas, validTargets], 'lfn2': [opFile, validReplicas, validTargets]}
    """
    if toSchedule:
      self.log.info( "found %s files to schedule, getting metadata from FC" % len( toSchedule ) )
      lfns = toSchedule.keys()
    else:
      self.log.info( "No files to schedule" )
      return S_OK()

    res = self.fc.getFileMetadata( lfns )
    if not res['OK']:
      return res
    else:
      if res['Value']['Failed']:
        self.log.warn( "Can't schedule %d files: problems getting the metadata: %s" % ( len( res['Value']['Failed'] ),
                                                                                        ', '.join( res['Value']['Failed'] ) ) )
      metadata = res['Value']['Successful']

    filesToScheduleList = []

    for lfnsToSchedule, lfnMetadata in metadata.items():
      opFileToSchedule = toSchedule[lfnsToSchedule][0]
      opFileToSchedule.GUID = lfnMetadata['GUID']
      opFileToSchedule.Checksum = metadata[lfnsToSchedule]['Checksum']
      opFileToSchedule.ChecksumType = metadata[lfnsToSchedule]['ChecksumType']
      opFileToSchedule.Size = metadata[lfnsToSchedule]['Size']

      filesToScheduleList.append( ( opFileToSchedule.toJSON()['Value'],
                                    toSchedule[lfnsToSchedule][1],
                                    toSchedule[lfnsToSchedule][2] ) )

    return S_OK( filesToScheduleList )



  def _filterReplicas( self, opFile ):
    """ filter out banned/invalid source SEs """
    return filterReplicas( opFile, logger = self.log, dataManager = self.dm )

  def ftsTransfer( self ):
    """ replicate and register using FTS """

    self.log.info( "scheduling files in FTS..." )

    bannedTargets = self.checkSEsRSS()
    if not bannedTargets['OK']:
      gMonitor.addMark( "FTSScheduleAtt" )
      gMonitor.addMark( "FTSScheduleFail" )
      return bannedTargets

    if bannedTargets['Value']:
      return S_OK( "%s targets are banned for writing" % ",".join( bannedTargets['Value'] ) )

    # Can continue now
    self.log.verbose( "No targets banned for writing" )

    toSchedule = {}

    for opFile in self.getWaitingFilesList():
      opFile.Error = ''
      gMonitor.addMark( "FTSScheduleAtt" )
      # # check replicas
      replicas = self._filterReplicas( opFile )
      if not replicas["OK"]:
        continue
      replicas = replicas["Value"]

      validReplicas = replicas["Valid"]
      noMetaReplicas = replicas["NoMetadata"]
      noReplicas = replicas['NoReplicas']
      badReplicas = replicas['Bad']
      noPFN = replicas['NoPFN']

      if validReplicas:
        validTargets = list( set( self.operation.targetSEList ) - set( validReplicas ) )
        if not validTargets:
          self.log.info( "file %s is already present at all targets" % opFile.LFN )
          opFile.Status = "Done"
        else:
          toSchedule[opFile.LFN] = [ opFile, validReplicas, validTargets ]
      else:
        gMonitor.addMark( "FTSScheduleFail" )
        if noMetaReplicas:
          self.log.warn( "unable to schedule '%s', couldn't get metadata at %s" % ( opFile.LFN, ','.join( noMetaReplicas ) ) )
          opFile.Error = "Couldn't get metadata"
        elif noReplicas:
          self.log.error( "Unable to schedule transfer",
                          "File %s doesn't exist at %s" % ( opFile.LFN, ','.join( noReplicas ) ) )
          opFile.Error = 'No replicas found'
          opFile.Status = 'Failed'
        elif badReplicas:
          self.log.error( "Unable to schedule transfer",
                          "File %s, all replicas have a bad checksum at %s" % ( opFile.LFN, ','.join( badReplicas ) ) )
          opFile.Error = 'All replicas have a bad checksum'
          opFile.Status = 'Failed'
        elif noPFN:
          self.log.warn( "unable to schedule %s, could not get a PFN at %s" % ( opFile.LFN, ','.join( noPFN ) ) )

    res = self._addMetadataToFiles( toSchedule )
    if not res['OK']:
      return res
    else:
      filesToScheduleList = res['Value']


    if filesToScheduleList:

      ftsSchedule = self.ftsClient.ftsSchedule( self.request.RequestID,
                                                self.operation.OperationID,
                                                filesToScheduleList )
      if not ftsSchedule["OK"]:
        self.log.error( "Completely failed to schedule to FTS:", ftsSchedule["Message"] )
        return ftsSchedule

      # might have nothing to schedule
      ftsSchedule = ftsSchedule["Value"]
      if not ftsSchedule:
        return S_OK()

      self.log.info( "%d files have been scheduled to FTS" % len( ftsSchedule['Successful'] ) )
      for opFile in self.operation:
        fileID = opFile.FileID
        if fileID in ftsSchedule["Successful"]:
          gMonitor.addMark( "FTSScheduleOK", 1 )
          opFile.Status = "Scheduled"
          self.log.debug( "%s has been scheduled for FTS" % opFile.LFN )
        elif fileID in ftsSchedule["Failed"]:
          gMonitor.addMark( "FTSScheduleFail", 1 )
          opFile.Error = ftsSchedule["Failed"][fileID]
          if 'sourceSURL equals to targetSURL' in opFile.Error:
            # In this case there is no need to continue
            opFile.Status = 'Failed'
          self.log.warn( "unable to schedule %s for FTS: %s" % ( opFile.LFN, opFile.Error ) )
    else:
      self.log.info( "No files to schedule after metadata checks" )

    # Just in case some transfers could not be scheduled, try them with RM
    return self.dmTransfer( fromFTS = True )

  def dmTransfer( self, fromFTS = False ):
    """ replicate and register using dataManager  """
    # # get waiting files. If none just return
    # # source SE
    sourceSE = self.operation.SourceSE if self.operation.SourceSE else None
    if sourceSE:
      # # check source se for read
      bannedSource = self.checkSEsRSS( sourceSE, 'ReadAccess' )
      if not bannedSource["OK"]:
        gMonitor.addMark( "ReplicateAndRegisterAtt", len( self.operation ) )
        gMonitor.addMark( "ReplicateFail", len( self.operation ) )
        return bannedSource

      if bannedSource["Value"]:
        self.operation.Error = "SourceSE %s is banned for reading" % sourceSE
        self.log.info( self.operation.Error )
        return S_OK( self.operation.Error )

    # # check targetSEs for write
    bannedTargets = self.checkSEsRSS()
    if not bannedTargets['OK']:
      gMonitor.addMark( "ReplicateAndRegisterAtt", len( self.operation ) )
      gMonitor.addMark( "ReplicateFail", len( self.operation ) )
      return bannedTargets

    if bannedTargets['Value']:
      self.operation.Error = "%s targets are banned for writing" % ",".join( bannedTargets['Value'] )
      return S_OK( self.operation.Error )

    # Can continue now
    self.log.verbose( "No targets banned for writing" )

    waitingFiles = self.getWaitingFilesList()
    if not waitingFiles:
      return S_OK()
    # # loop over files
    if fromFTS:
      self.log.info( "Trying transfer using replica manager as FTS failed" )
    else:
      self.log.info( "Transferring files using Data manager..." )
    for opFile in waitingFiles:

      gMonitor.addMark( "ReplicateAndRegisterAtt", 1 )
      opFile.Error = ''
      lfn = opFile.LFN

      # Check if replica is at the specified source
      replicas = self._filterReplicas( opFile )
      if not replicas["OK"]:
        self.log.error( 'Failed to check replicas', replicas["Message"] )
        continue
      replicas = replicas["Value"]
      validReplicas = replicas["Valid"]
      noMetaReplicas = replicas["NoMetadata"]
      noReplicas = replicas['NoReplicas']
      badReplicas = replicas['Bad']
      noPFN = replicas['NoPFN']

      if not validReplicas:
        gMonitor.addMark( "ReplicateFail" )
        if noMetaReplicas:
          self.log.warn( "unable to replicate '%s', couldn't get metadata at %s" % ( opFile.LFN, ','.join( noMetaReplicas ) ) )
          opFile.Error = "Couldn't get metadata"
        elif noReplicas:
          self.log.error( "Unable to replicate", "File %s doesn't exist at %s" % ( opFile.LFN, ','.join( noReplicas ) ) )
          opFile.Error = 'No replicas found'
          opFile.Status = 'Failed'
        elif badReplicas:
          self.log.error( "Unable to replicate", "%s, all replicas have a bad checksum at %s" % ( opFile.LFN, ','.join( badReplicas ) ) )
          opFile.Error = 'All replicas have a bad checksum'
          opFile.Status = 'Failed'
        elif noPFN:
          self.log.warn( "unable to replicate %s, could not get a PFN" % opFile.LFN )
        continue
      # # get the first one in the list
      if sourceSE not in validReplicas:
        if sourceSE:
          self.log.warn( "%s is not at specified sourceSE %s, changed to %s" % ( lfn, sourceSE, validReplicas[0] ) )
        sourceSE = validReplicas[0]

      # # loop over targetSE
      catalogs = self.operation.Catalog
      if catalogs:
        catalogs = [ cat.strip() for cat in catalogs.split( ',' ) ]

      for targetSE in self.operation.targetSEList:

        # # call DataManager
        if targetSE in validReplicas:
          self.log.warn( "Request to replicate %s to an existing location: %s" % ( lfn, targetSE ) )
          opFile.Status = 'Done'
          continue
        res = self.dm.replicateAndRegister( lfn, targetSE, sourceSE = sourceSE, catalog = catalogs )
        if res["OK"]:

          if lfn in res["Value"]["Successful"]:

            if "replicate" in res["Value"]["Successful"][lfn]:

              repTime = res["Value"]["Successful"][lfn]["replicate"]
              prString = "file %s replicated at %s in %s s." % ( lfn, targetSE, repTime )

              gMonitor.addMark( "ReplicateOK", 1 )

              if "register" in res["Value"]["Successful"][lfn]:

                gMonitor.addMark( "RegisterOK", 1 )
                regTime = res["Value"]["Successful"][lfn]["register"]
                prString += ' and registered in %s s.' % regTime
                self.log.info( prString )
              else:

                gMonitor.addMark( "RegisterFail", 1 )
                prString += " but failed to register"
                self.log.warn( prString )

                opFile.Error = "Failed to register"
                # # add register replica operation
                registerOperation = self.getRegisterOperation( opFile, targetSE, type = 'RegisterReplica' )
                self.request.insertAfter( registerOperation, self.operation )

            else:

              self.log.error( "Failed to replicate", "%s to %s" % ( lfn, targetSE ) )
              gMonitor.addMark( "ReplicateFail", 1 )
              opFile.Error = "Failed to replicate"

          else:

            gMonitor.addMark( "ReplicateFail", 1 )
            reason = res["Value"]["Failed"][lfn]
            self.log.error( "Failed to replicate and register", "File %s at %s:" % ( lfn, targetSE ), reason )
            opFile.Error = reason

        else:

          gMonitor.addMark( "ReplicateFail", 1 )
          opFile.Error = "DataManager error: %s" % res["Message"]
          self.log.error( "DataManager error", res["Message"] )

      if not opFile.Error:
        if len( self.operation.targetSEList ) > 1:
          self.log.info( "file %s has been replicated to all targetSEs" % lfn )
        opFile.Status = "Done"


    return S_OK()
Beispiel #15
0
if __name__ == "__main__":

    args = Script.getPositionalArgs()
    if len(args) != 1:
        Script.showHelp()
    try:
        requestID = long(args[0])
    except ValueError:
        DIRAC.gLogger.error("requestID should be an integer")
        DIRAC.exit(-1)

    from DIRAC.DataManagementSystem.Client.FTSClient import FTSClient
    from DIRAC.DataManagementSystem.Client.FTSJob import FTSJob

    ftsClient = FTSClient()

    ftsJobs = ftsClient.getFTSJobsForRequest(
        requestID,
        list(FTSJob.INITSTATES + FTSJob.TRANSSTATES + FTSJob.FINALSTATES))
    if not ftsJobs["OK"]:
        DIRAC.gLogger.error(ftsJobs["Message"])
        DIRAC.exit(-1)
    ftsJobs = ftsJobs["Value"]

    if not ftsJobs:
        DIRAC.gLogger.always("No FTSJobs found for requestID %s" % requestID)
        DIRAC.exit(0)

    DIRAC.gLogger.always("Found %s FTSJobs for requestID %s" %
                         (len(ftsJobs), requestID))
Beispiel #16
0
  args = Script.getPositionalArgs()

  maxActiveJobs = 50
  ftsSite = ftsServer = ""
  if not len( args ) == 3:
    Script.showHelp()
    DIRAC.exit( 0 )
  else:
    ftsSite, ftsServer, maxActiveJobs = args
    try:
      maxActiveJobs = int( maxActiveJobs )
    except ValueError, error:
      gLogger.error( error )
      DIRAC.exit( -1 )

  ftsClient = FTSClient()

  ftsSites = ftsClient.getFTSSitesList()
  if not ftsSites["OK"]:
    gLogger.error( "unable to read FTSSites: %s" % ftsSites["Message"] )
    DIRAC.exit( -1 )
  ftsSites = ftsSites["Value"]

  for site in ftsSites:
    if site.Name == ftsSite:
      gLogger.error( "FTSSite '%s' is present in FTSDB!!!" % ftsSite )
      DIRAC.exit( -1 )

  getSites = getSites()
  if not getSites["OK"]:
    gLogger.error( "unable to read sites defined in CS!!!" )
from DIRAC.Core.Base import Script
Script.setUsageMessage( '\n'.join( [ __doc__,
                                     'Usage:',
                                     ' %s [option|cfgfile]' % Script.scriptName ] ) )
from operator import itemgetter

if __name__ == "__main__":

  from DIRAC.Core.Base.Script import parseCommandLine
  parseCommandLine()

  import DIRAC
  from DIRAC import gLogger, gConfig

  from DIRAC.DataManagementSystem.Client.FTSClient import FTSClient
  ftsClient = FTSClient()

  ret = ftsClient.getDBSummary()
  if not ret["OK"]:
    gLogger.error( ret["Message"] )
    DIRAC.exit( -1 )
  ret = ret["Value"]

  ic = 1

  ftsSites = ret.get( "FTSSite", None )
  if ftsSites:
    gLogger.always( "[%d] FTSSites:" % ic )
    ic += 1
    for ftsSite in ftsSites:
      gLogger.always( "- %-20s (%s)" % ( ftsSite["Name"], ftsSite["FTSServer"] ) )
Script.setUsageMessage('\n'.join(
    [__doc__, 'Usage:',
     ' %s [option|cfgfile]' % Script.scriptName]))
from operator import itemgetter

if __name__ == "__main__":

    from DIRAC.Core.Base.Script import parseCommandLine
    parseCommandLine()

    import DIRAC
    from DIRAC import gLogger, gConfig

    from DIRAC.DataManagementSystem.Client.FTSClient import FTSClient
    ftsClient = FTSClient()

    ret = ftsClient.getDBSummary()
    if not ret["OK"]:
        gLogger.error(ret["Message"])
        DIRAC.exit(-1)
    ret = ret["Value"]

    ic = 1

    ftsSites = ret.get("FTSSite", None)
    if ftsSites:
        gLogger.always("[%d] FTSSites:" % ic)
        ic += 1
        for ftsSite in ftsSites:
            gLogger.always("- %-20s (%s)" %
Beispiel #19
0
    def setUp(self):
        """ test case set up """

        gLogger.setLevel('NOTICE')

        self.ftsSites = [
            FTSSite(
                ftsServer=
                'https://fts22-t0-export.cern.ch:8443/glite-data-transfer-fts/services/FileTransfer',
                name='CERN.ch'),
            FTSSite(
                ftsServer=
                'https://fts.pic.es:8443/glite-data-transfer-fts/services/FileTransfer',
                name='PIC.es'),
            FTSSite(
                ftsServer=
                'https://lcgfts.gridpp.rl.ac.uk:8443/glite-data-transfer-fts/services/FileTransfer',
                name='RAL.uk'),
        ]

        self.ses = ['CERN-USER', 'RAL-USER']
        self.statuses = [
            'Submitted', 'Finished', 'FinishedDirty', 'Active', 'Ready'
        ]

        self.submitted = 0
        self.numberOfJobs = 10
        self.opIDs = []

        self.ftsJobs = []
        for i in xrange(self.numberOfJobs):

            opID = i % 3
            if opID not in self.opIDs:
                self.opIDs.append(opID)

            ftsJob = FTSJob()
            ftsJob.FTSGUID = str(uuid.uuid4())
            ftsJob.FTSServer = self.ftsSites[0].FTSServer
            ftsJob.Status = self.statuses[i % len(self.statuses)]
            ftsJob.OperationID = opID
            if ftsJob.Status in FTSJob.FINALSTATES:
                ftsJob.Completeness = 100
            if ftsJob.Status == 'Active':
                ftsJob.Completeness = 90
            ftsJob.SourceSE = self.ses[i % len(self.ses)]
            ftsJob.TargetSE = 'PIC-USER'
            ftsJob.RequestID = 12345

            ftsFile = FTSFile()
            ftsFile.FileID = i + 1
            ftsFile.OperationID = i + 1
            ftsFile.LFN = '/a/b/c/%d' % i
            ftsFile.Size = 1000000
            ftsFile.OperationID = opID
            ftsFile.SourceSE = ftsJob.SourceSE
            ftsFile.TargetSE = ftsJob.TargetSE
            ftsFile.SourceSURL = 'foo://source.bar.baz/%s' % ftsFile.LFN
            ftsFile.TargetSURL = 'foo://target.bar.baz/%s' % ftsFile.LFN
            ftsFile.Status = 'Waiting' if ftsJob.Status != 'FinishedDirty' else 'Failed'
            ftsFile.RequestID = 12345
            ftsFile.Checksum = 'addler'
            ftsFile.ChecksumType = 'adler32'

            ftsFile.FTSGUID = ftsJob.FTSGUID
            if ftsJob.Status == 'FinishedDirty':
                ftsJob.FailedFiles = 1
                ftsJob.FailedSize = ftsFile.Size

            ftsJob.addFile(ftsFile)
            self.ftsJobs.append(ftsJob)

        self.submitted = len(
            [i for i in self.ftsJobs if i.Status == 'Submitted'])

        self.ftsClient = FTSClient()
class ReplicateAndRegister( DMSRequestOperationsBase ):
  """
  .. class:: ReplicateAndRegister

  ReplicateAndRegister operation handler
  """

  def __init__( self, operation = None, csPath = None ):
    """c'tor

    :param self: self reference
    :param Operation operation: Operation instance
    :param str csPath: CS path for this handler
    """
    super( ReplicateAndRegister, self ).__init__( operation, csPath )
    # # own gMonitor stuff for files
    gMonitor.registerActivity( "ReplicateAndRegisterAtt", "Replicate and register attempted",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "ReplicateOK", "Replications successful",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "ReplicateFail", "Replications failed",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "RegisterOK", "Registrations successful",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "RegisterFail", "Registrations failed",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    # # for FTS
    gMonitor.registerActivity( "FTSScheduleAtt", "Files schedule attempted",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "FTSScheduleOK", "File schedule successful",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "FTSScheduleFail", "File schedule failed",
                               "RequestExecutingAgent", "Files/min", gMonitor.OP_SUM )
    # # SE cache
    self.seCache = {}

    # Clients
    self.fc = FileCatalog()
    self.ftsClient = FTSClient()

  def __call__( self ):
    """ call me maybe """
    # # check replicas first
    checkReplicas = self.__checkReplicas()
    if not checkReplicas["OK"]:
      self.log.error( checkReplicas["Message"] )
    if hasattr( self, "FTSMode" ) and getattr( self, "FTSMode" ):
      bannedGroups = getattr( self, "FTSBannedGroups" ) if hasattr( self, "FTSBannedGroups" ) else ()
      if self.request.OwnerGroup in bannedGroups:
        self.log.info( "usage of FTS system is banned for request's owner" )
        return self.rmTransfer()
      return self.ftsTransfer()
    return self.rmTransfer()

  def __checkReplicas( self ):
    """ check done replicas and update file states  """
    waitingFiles = dict( [ ( opFile.LFN, opFile ) for opFile in self.operation
                          if opFile.Status in ( "Waiting", "Scheduled" ) ] )
    targetSESet = set( self.operation.targetSEList )

    replicas = self.fc.getReplicas( waitingFiles.keys() )
    if not replicas["OK"]:
      self.log.error( replicas["Message"] )
      return replicas

    reMissing = re.compile( "no such file or directory" )
    for failedLFN, errStr in replicas["Value"]["Failed"].items():
      waitingFiles[failedLFN].Error = errStr
      if reMissing.search( errStr.lower() ):
        self.log.error( "file %s does not exists" % failedLFN )
        gMonitor.addMark( "ReplicateFail", len( targetSESet ) )
        waitingFiles[failedLFN].Status = "Failed"

    for successfulLFN, reps in replicas["Value"]["Successful"].items():
      if targetSESet.issubset( set( reps ) ):
        self.log.info( "file %s has been replicated to all targets" % successfulLFN )
        waitingFiles[successfulLFN].Status = "Done"

    return S_OK()

  def _addMetadataToFiles( self, toSchedule ):
    """ Add metadata to those files that need to be scheduled through FTS

        toSchedule is a dictionary:
        {'lfn1': [opFile, validReplicas, validTargets], 'lfn2': [opFile, validReplicas, validTargets]}
    """
    if toSchedule:
      self.log.info( "found %s files to schedule, getting metadata from FC" % len( toSchedule ) )
      lfns = toSchedule.keys()
    else:
      self.log.info( "No files to schedule" )
      return S_OK()

    res = self.fc.getFileMetadata( lfns )
    if not res['OK']:
      return res
    else:
      if res['Value']['Failed']:
        self.log.warn( "Can't schedule %d files: problems getting the metadata: %s" % ( len( res['Value']['Failed'] ),
                                                                                ', '.join( res['Value']['Failed'] ) ) )
      metadata = res['Value']['Successful']

    filesToScheduleList = []

    for lfnsToSchedule, lfnMetadata in metadata.items():
      opFileToSchedule = toSchedule[lfnsToSchedule][0]
      opFileToSchedule.GUID = lfnMetadata['GUID']
      opFileToSchedule.Checksum = metadata[lfnsToSchedule]['Checksum']
      opFileToSchedule.ChecksumType = metadata[lfnsToSchedule]['CheckSumType']
      opFileToSchedule.Size = metadata[lfnsToSchedule]['Size']

      filesToScheduleList.append( ( opFileToSchedule.toJSON()['Value'],
                                    toSchedule[lfnsToSchedule][1],
                                    toSchedule[lfnsToSchedule][2] ) )

    return S_OK( filesToScheduleList )



  def _filterReplicas( self, opFile ):
    """ filter out banned/invalid source SEs """
    return filterReplicas( opFile, logger = self.log, dataManager = self.dm, seCache = self.seCache )

  def ftsTransfer( self ):
    """ replicate and register using FTS """

    self.log.info( "scheduling files in FTS..." )

    bannedTargets = self.checkSEsRSS()
    if not bannedTargets['OK']:
      gMonitor.addMark( "FTSScheduleAtt" )
      gMonitor.addMark( "FTSScheduleFail" )
      return bannedTargets

    if bannedTargets['Value']:
      return S_OK( "%s targets are banned for writing" % ",".join( bannedTargets['Value'] ) )

    # Can continue now
    self.log.verbose( "No targets banned for writing" )

    toSchedule = {}

    for opFile in self.getWaitingFilesList():
      opFile.Error = ''
      gMonitor.addMark( "FTSScheduleAtt" )
      # # check replicas
      replicas = self._filterReplicas( opFile )
      if not replicas["OK"]:
        continue
      replicas = replicas["Value"]

      validReplicas = replicas["Valid"]
      bannedReplicas = replicas["Banned"]
      noReplicas = replicas['NoReplicas']
      badReplicas = replicas['Bad']
      noPFN = replicas['NoPFN']

      if not validReplicas:
        gMonitor.addMark( "FTSScheduleFail" )
        if bannedReplicas:
          self.log.warn( "unable to schedule '%s', replicas only at banned SEs" % opFile.LFN )
        elif noReplicas:
          self.log.error( "unable to schedule %s, file doesn't exist" % opFile.LFN )
          opFile.Error = 'No replicas found'
          opFile.Status = 'Failed'
        elif badReplicas:
          self.log.error( "unable to schedule %s, all replicas have a bad checksum" % opFile.LFN )
          opFile.Error = 'All replicas have a bad checksum'
          opFile.Status = 'Failed'
        elif noPFN:
          self.log.warn( "unable to schedule %s, could not get a PFN" % opFile.LFN )

      else:
        validTargets = list( set( self.operation.targetSEList ) - set( validReplicas ) )
        if not validTargets:
          self.log.info( "file %s is already present at all targets" % opFile.LFN )
          opFile.Status = "Done"
        else:
          toSchedule[opFile.LFN] = [ opFile, validReplicas, validTargets ]

    res = self._addMetadataToFiles( toSchedule )
    if not res['OK']:
      return res
    else:
      filesToScheduleList = res['Value']


    if filesToScheduleList:

      ftsSchedule = self.ftsClient.ftsSchedule( self.request.RequestID,
                                                self.operation.OperationID,
                                                filesToScheduleList )
      if not ftsSchedule["OK"]:
        self.log.error( ftsSchedule["Message"] )
        return ftsSchedule

      # might have nothing to schedule
      ftsSchedule = ftsSchedule["Value"]
      if not ftsSchedule:
        return S_OK()

      for fileID in ftsSchedule["Successful"]:
        gMonitor.addMark( "FTSScheduleOK", 1 )
        for opFile in self.operation:
          if fileID == opFile.FileID:
            opFile.Status = "Scheduled"
            self.log.debug( "%s has been scheduled for FTS" % opFile.LFN )
      self.log.info( "%d files have been scheduled to FTS" % len( ftsSchedule['Successful'] ) )

      for fileID in ftsSchedule["Failed"]:
        gMonitor.addMark( "FTSScheduleFail", 1 )
        for opFile in self.operation:
          if fileID == opFile.FileID:
            opFile.Error = ftsSchedule["Failed"][fileID]
            if 'sourceSURL equals to targetSURL' in opFile.Error:
              # In this case there is no need to continue
              opFile.Status = 'Failed'
            self.log.warn( "unable to schedule %s for FTS: %s" % ( opFile.LFN, opFile.Error ) )
    else:
      self.log.info( "No files to schedule after metadata checks" )

    # Just in case some transfers could not be scheduled, try them with RM
    return self.rmTransfer( fromFTS = True )

  def rmTransfer( self, fromFTS = False ):
    """ replicate and register using dataManager  """
    # # get waiting files. If none just return
    waitingFiles = self.getWaitingFilesList()
    if not waitingFiles:
      return S_OK()
    if fromFTS:
      self.log.info( "Trying transfer using replica manager as FTS failed" )
    else:
      self.log.info( "Transferring files using Data manager..." )
    # # source SE
    sourceSE = self.operation.SourceSE if self.operation.SourceSE else None
    if sourceSE:
      # # check source se for read
      sourceRead = self.rssSEStatus( sourceSE, "ReadAccess" )
      if not sourceRead["OK"]:
        self.log.info( sourceRead["Message"] )
        for opFile in self.operation:
          opFile.Error = sourceRead["Message"]
        self.operation.Error = sourceRead["Message"]
        gMonitor.addMark( "ReplicateAndRegisterAtt", len( self.operation ) )
        gMonitor.addMark( "ReplicateFail", len( self.operation ) )
        return sourceRead

      if not sourceRead["Value"]:
        self.operation.Error = "SourceSE %s is banned for reading" % sourceSE
        self.log.info( self.operation.Error )
        return S_OK( self.operation.Error )

    # # check targetSEs for write
    bannedTargets = self.checkSEsRSS()
    if not bannedTargets['OK']:
      gMonitor.addMark( "ReplicateAndRegisterAtt", len( self.operation ) )
      gMonitor.addMark( "ReplicateFail", len( self.operation ) )
      return bannedTargets

    if bannedTargets['Value']:
      return S_OK( "%s targets are banned for writing" % ",".join( bannedTargets['Value'] ) )

    # Can continue now
    self.log.verbose( "No targets banned for writing" )

    # # loop over files
    for opFile in waitingFiles:

      gMonitor.addMark( "ReplicateAndRegisterAtt", 1 )
      opFile.Error = ''
      lfn = opFile.LFN

      # Check if replica is at the specified source
      replicas = self._filterReplicas( opFile )
      if not replicas["OK"]:
        self.log.error( replicas["Message"] )
        continue
      replicas = replicas["Value"]
      if not replicas["Valid"]:
        self.log.warn( "unable to find valid replicas for %s" % lfn )
        continue
      # # get the first one in the list
      if sourceSE not in replicas['Valid']:
        if sourceSE:
          self.log.warn( "%s is not at specified sourceSE %s, changed to %s" % ( lfn, sourceSE, replicas["Valid"][0] ) )
        sourceSE = replicas["Valid"][0]

      # # loop over targetSE
      catalog = self.operation.Catalog
      for targetSE in self.operation.targetSEList:

        # # call DataManager
        if targetSE == sourceSE:
          self.log.warn( "Request to replicate %s to the source SE: %s" % ( lfn, sourceSE ) )
          continue
        res = self.dm.replicateAndRegister( lfn, targetSE, sourceSE = sourceSE, catalog = catalog )
        if res["OK"]:

          if lfn in res["Value"]["Successful"]:

            if "replicate" in res["Value"]["Successful"][lfn]:

              repTime = res["Value"]["Successful"][lfn]["replicate"]
              prString = "file %s replicated at %s in %s s." % ( lfn, targetSE, repTime )

              gMonitor.addMark( "ReplicateOK", 1 )

              if "register" in res["Value"]["Successful"][lfn]:

                gMonitor.addMark( "RegisterOK", 1 )
                regTime = res["Value"]["Successful"][lfn]["register"]
                prString += ' and registered in %s s.' % regTime
                self.log.info( prString )
              else:

                gMonitor.addMark( "RegisterFail", 1 )
                prString += " but failed to register"
                self.log.warn( prString )

                opFile.Error = "Failed to register"
                # # add register replica operation
                registerOperation = self.getRegisterOperation( opFile, targetSE )
                self.request.insertAfter( registerOperation, self.operation )

            else:

              self.log.error( "failed to replicate %s to %s." % ( lfn, targetSE ) )
              gMonitor.addMark( "ReplicateFail", 1 )
              opFile.Error = "Failed to replicate"

          else:

            gMonitor.addMark( "ReplicateFail", 1 )
            reason = res["Value"]["Failed"][lfn]
            self.log.error( "failed to replicate and register file %s at %s:" % ( lfn, targetSE ), reason )
            opFile.Error = reason

        else:

          gMonitor.addMark( "ReplicateFail", 1 )
          opFile.Error = "DataManager error: %s" % res["Message"]
          self.log.error( opFile.Error )

      if not opFile.Error:
        if len( self.operation.targetSEList ) > 1:
          self.log.info( "file %s has been replicated to all targetSEs" % lfn )
        opFile.Status = "Done"


    return S_OK()
Beispiel #21
0
class ReplicateAndRegister(OperationHandlerBase, DMSRequestOperationsBase):
    """
  .. class:: ReplicateAndRegister

  ReplicateAndRegister operation handler
  """
    def __init__(self, operation=None, csPath=None):
        """c'tor

    :param self: self reference
    :param Operation operation: Operation instance
    :param str csPath: CS path for this handler
    """
        super(ReplicateAndRegister, self).__init__(operation, csPath)
        # # own gMonitor stuff for files
        gMonitor.registerActivity("ReplicateAndRegisterAtt",
                                  "Replicate and register attempted",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("ReplicateOK", "Replications successful",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("ReplicateFail", "Replications failed",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("RegisterOK", "Registrations successful",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("RegisterFail", "Registrations failed",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        # # for FTS
        gMonitor.registerActivity("FTSScheduleAtt", "Files schedule attempted",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("FTSScheduleOK", "File schedule successful",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("FTSScheduleFail", "File schedule failed",
                                  "RequestExecutingAgent", "Files/min",
                                  gMonitor.OP_SUM)
        # # SE cache
        self.seCache = {}

        # Clients
        self.rm = ReplicaManager()
        self.ftsClient = FTSClient()

    def __call__(self):
        """ call me maybe """
        # # check replicas first
        checkReplicas = self.__checkReplicas()
        if not checkReplicas["OK"]:
            self.log.error(checkReplicas["Message"])
        if hasattr(self, "FTSMode") and getattr(self, "FTSMode"):
            bannedGroups = getattr(self, "FTSBannedGroups") if hasattr(
                self, "FTSBannedGroups") else ()
            if self.request.OwnerGroup in bannedGroups:
                self.log.info(
                    "usage of FTS system is banned for request's owner")
                return self.rmTransfer()
            return self.ftsTransfer()
        return self.rmTransfer()

    def __checkReplicas(self):
        """ check done replicas and update file states  """
        waitingFiles = dict([(opFile.LFN, opFile) for opFile in self.operation
                             if opFile.Status in ("Waiting", "Scheduled")])
        targetSESet = set(self.operation.targetSEList)

        replicas = self.rm.getCatalogReplicas(waitingFiles.keys())
        if not replicas["OK"]:
            self.log.error(replicas["Message"])
            return replicas

        reMissing = re.compile("no such file or directory")
        for failedLFN, errStr in replicas["Value"]["Failed"].items():
            waitingFiles[failedLFN].Error = errStr
            if reMissing.search(errStr.lower()):
                self.log.error("file %s does not exists" % failedLFN)
                gMonitor.addMark("ReplicateFail", len(targetSESet))
                waitingFiles[failedLFN].Status = "Failed"

        for successfulLFN, reps in replicas["Value"]["Successful"].items():
            if targetSESet.issubset(set(reps)):
                self.log.info("file %s has been replicated to all targets" %
                              successfulLFN)
                waitingFiles[successfulLFN].Status = "Done"

        return S_OK()

    def _addMetadataToFiles(self, toSchedule):
        """ Add metadata to those files that need to be scheduled through FTS

        toSchedule is a dictionary:
        {'lfn1': [opFile, validReplicas, validTargets], 'lfn2': [opFile, validReplicas, validTargets]}
    """
        if toSchedule:
            self.log.info(
                "found %s files to schedule, getting metadata from FC" %
                len(toSchedule))
            lfns = toSchedule.keys()
        else:
            self.log.info("No files to schedule")
            return S_OK()

        res = self.rm.getCatalogFileMetadata(lfns)
        if not res['OK']:
            return res
        else:
            if res['Value']['Failed']:
                self.log.warn(
                    "Can't schedule %d files: problems getting the metadata: %s"
                    % (len(res['Value']['Failed']), ', '.join(
                        res['Value']['Failed'])))
            metadata = res['Value']['Successful']

        filesToScheduleList = []

        for lfnsToSchedule, lfnMetadata in metadata.items():
            opFileToSchedule = toSchedule[lfnsToSchedule][0]
            opFileToSchedule.GUID = lfnMetadata['GUID']
            opFileToSchedule.Checksum = metadata[lfnsToSchedule]['Checksum']
            opFileToSchedule.ChecksumType = metadata[lfnsToSchedule][
                'CheckSumType']
            opFileToSchedule.Size = metadata[lfnsToSchedule]['Size']

            filesToScheduleList.append(
                (opFileToSchedule.toJSON()['Value'],
                 toSchedule[lfnsToSchedule][1], toSchedule[lfnsToSchedule][2]))

        return S_OK(filesToScheduleList)

    def _filterReplicas(self, opFile):
        """ filter out banned/invalid source SEs """

        from DIRAC.Core.Utilities.Adler import compareAdler
        ret = {"Valid": [], "Banned": [], "Bad": []}

        replicas = self.rm.getActiveReplicas(opFile.LFN)
        if not replicas["OK"]:
            self.log.error(replicas["Message"])
        reNotExists = re.compile("not such file or directory")
        replicas = replicas["Value"]
        failed = replicas["Failed"].get(opFile.LFN, "")
        if reNotExists.match(failed.lower()):
            opFile.Status = "Failed"
            opFile.Error = failed
            return S_ERROR(failed)

        replicas = replicas["Successful"][
            opFile.LFN] if opFile.LFN in replicas["Successful"] else {}

        for repSEName in replicas:

            seRead = self.rssSEStatus(repSEName, "ReadAccess")
            if not seRead["OK"]:
                self.log.info(seRead["Message"])
                ret["Banned"].append(repSEName)
                continue
            if not seRead["Value"]:
                self.log.info("StorageElement '%s' is banned for reading" %
                              (repSEName))

            repSE = self.seCache.get(repSEName, None)
            if not repSE:
                repSE = StorageElement(repSEName, "SRM2")
                self.seCache[repSE] = repSE

            pfn = repSE.getPfnForLfn(opFile.LFN)
            if not pfn["OK"]:
                self.log.warn("unable to create pfn for %s lfn: %s" %
                              (opFile.LFN, pfn["Message"]))
                ret["Banned"].append(repSEName)
                continue
            pfn = pfn["Value"]

            repSEMetadata = repSE.getFileMetadata(pfn, singleFile=True)
            if not repSEMetadata["OK"]:
                self.log.warn(repSEMetadata["Message"])
                ret["Banned"].append(repSEName)
                continue
            repSEMetadata = repSEMetadata["Value"]

            seChecksum = repSEMetadata.get("Checksum")
            if opFile.Checksum and seChecksum and not compareAdler(
                    seChecksum, opFile.Checksum):
                self.log.warn(" %s checksum mismatch: %s %s:%s" %
                              (opFile.LFN, opFile.Checksum, repSE, seChecksum))
                ret["Bad"].append(repSEName)
                continue
            # # if we're here repSE is OK
            ret["Valid"].append(repSEName)

        return S_OK(ret)

    def ftsTransfer(self):
        """ replicate and register using FTS """

        self.log.info("scheduling files in FTS...")

        bannedTargets = self.checkSEsRSS()
        if not bannedTargets['OK']:
            gMonitor.addMark("FTSScheduleAtt")
            gMonitor.addMark("FTSScheduleFail")
            return bannedTargets

        if bannedTargets['Value']:
            return S_OK("%s targets are banned for writing" %
                        ",".join(bannedTargets['Value']))

        # Can continue now
        self.log.verbose("No targets banned for writing")

        toSchedule = {}

        for opFile in self.getWaitingFilesList():
            opFile.Error = ''
            gMonitor.addMark("FTSScheduleAtt")
            # # check replicas
            replicas = self._filterReplicas(opFile)
            if not replicas["OK"]:
                continue
            replicas = replicas["Value"]

            if not replicas["Valid"] and replicas["Banned"]:
                self.log.warn(
                    "unable to schedule '%s', replicas only at banned SEs" %
                    opFile.LFN)
                gMonitor.addMark("FTSScheduleFail")
                continue

            validReplicas = replicas["Valid"]
            bannedReplicas = replicas["Banned"]

            if not validReplicas and bannedReplicas:
                self.log.warn(
                    "unable to schedule '%s', replicas only at banned SEs" %
                    opFile.LFN)
                gMonitor.addMark("FTSScheduleFail")
                continue

            if validReplicas:
                validTargets = list(
                    set(self.operation.targetSEList) - set(validReplicas))
                if not validTargets:
                    self.log.info("file %s is already present at all targets" %
                                  opFile.LFN)
                    opFile.Status = "Done"
                    continue

                toSchedule[opFile.LFN] = [opFile, validReplicas, validTargets]

        res = self._addMetadataToFiles(toSchedule)
        if not res['OK']:
            return res
        else:
            filesToScheduleList = res['Value']

        if filesToScheduleList:

            ftsSchedule = self.ftsClient.ftsSchedule(
                self.request.RequestID, self.operation.OperationID,
                filesToScheduleList)
            if not ftsSchedule["OK"]:
                self.log.error(ftsSchedule["Message"])
                return ftsSchedule

            # might have nothing to schedule
            ftsSchedule = ftsSchedule["Value"]
            if not ftsSchedule:
                return S_OK()

            for fileID in ftsSchedule["Successful"]:
                gMonitor.addMark("FTSScheduleOK", 1)
                for opFile in self.operation:
                    if fileID == opFile.FileID:
                        opFile.Status = "Scheduled"
                        self.log.always("%s has been scheduled for FTS" %
                                        opFile.LFN)

            for fileID, reason in ftsSchedule["Failed"]:
                gMonitor.addMark("FTSScheduleFail", 1)
                for opFile in self.operation:
                    if fileID == opFile.FileID:
                        opFile.Error = reason
                        self.log.error("unable to schedule %s for FTS: %s" %
                                       (opFile.LFN, opFile.Error))
        else:
            self.log.info("No files to schedule after metadata checks")

        # Just in case some transfers could not be scheduled, try them with RM
        return self.rmTransfer(fromFTS=True)

    def rmTransfer(self, fromFTS=False):
        """ replicate and register using ReplicaManager  """
        # # get waiting files. If none just return
        waitingFiles = self.getWaitingFilesList()
        if not waitingFiles:
            return S_OK()
        if fromFTS:
            self.log.info(
                "Trying transfer using replica manager as FTS failed")
        else:
            self.log.info("Transferring files using replica manager...")
        # # source SE
        sourceSE = self.operation.SourceSE if self.operation.SourceSE else None
        if sourceSE:
            # # check source se for read
            sourceRead = self.rssSEStatus(sourceSE, "ReadAccess")
            if not sourceRead["OK"]:
                self.log.info(sourceRead["Message"])
                for opFile in self.operation:
                    opFile.Error = sourceRead["Message"]
                    opFile.Status = "Failed"
                self.operation.Error = sourceRead["Message"]
                gMonitor.addMark("ReplicateAndRegisterAtt",
                                 len(self.operation))
                gMonitor.addMark("ReplicateFail", len(self.operation))
                return sourceRead

            if not sourceRead["Value"]:
                self.operation.Error = "SourceSE %s is banned for reading" % sourceSE
                self.log.info(self.operation.Error)
                return S_OK(self.operation.Error)

        # # check targetSEs for write
        bannedTargets = self.checkSEsRSS()
        if not bannedTargets['OK']:
            gMonitor.addMark("ReplicateAndRegisterAtt", len(self.operation))
            gMonitor.addMark("ReplicateFail", len(self.operation))
            return bannedTargets

        if bannedTargets['Value']:
            return S_OK("%s targets are banned for writing" %
                        ",".join(bannedTargets['Value']))

        # Can continue now
        self.log.verbose("No targets banned for writing")

        # # loop over files
        for opFile in waitingFiles:

            gMonitor.addMark("ReplicateAndRegisterAtt", 1)
            opFile.Error = ''
            lfn = opFile.LFN

            # Check if replica is at the specified source
            replicas = self._filterReplicas(opFile)
            if not replicas["OK"]:
                self.log.error(replicas["Message"])
                continue
            replicas = replicas["Value"]
            if not replicas["Valid"]:
                self.log.warn("unable to find valid replicas for %s" % lfn)
                continue
            # # get the first one in the list
            if sourceSE not in replicas['Valid']:
                if sourceSE:
                    self.log.warn(
                        "%s is not at specified sourceSE %s, changed to %s" %
                        (lfn, sourceSE, replicas["Valid"][0]))
                sourceSE = replicas["Valid"][0]

            # # loop over targetSE
            for targetSE in self.operation.targetSEList:

                # # call ReplicaManager
                if targetSE == sourceSE:
                    self.log.warn(
                        "Request to replicate %s to the source SE: %s" %
                        (lfn, sourceSE))
                    continue
                res = self.rm.replicateAndRegister(lfn,
                                                   targetSE,
                                                   sourceSE=sourceSE)

                if res["OK"]:

                    if lfn in res["Value"]["Successful"]:

                        if "replicate" in res["Value"]["Successful"][lfn]:

                            repTime = res["Value"]["Successful"][lfn][
                                "replicate"]
                            prString = "file %s replicated at %s in %s s." % (
                                lfn, targetSE, repTime)

                            gMonitor.addMark("ReplicateOK", 1)

                            if "register" in res["Value"]["Successful"][lfn]:

                                gMonitor.addMark("RegisterOK", 1)
                                regTime = res["Value"]["Successful"][lfn][
                                    "register"]
                                prString += ' and registered in %s s.' % regTime
                                self.log.info(prString)
                            else:

                                gMonitor.addMark("RegisterFail", 1)
                                prString += " but failed to register"
                                self.log.warn(prString)

                                opFile.Error = "Failed to register"
                                opFile.Status = "Failed"
                                # # add register replica operation
                                registerOperation = self.getRegisterOperation(
                                    opFile, targetSE)
                                self.request.insertAfter(
                                    registerOperation, self.operation)

                        else:

                            self.log.error("failed to replicate %s to %s." %
                                           (lfn, targetSE))
                            gMonitor.addMark("ReplicateFail", 1)
                            opFile.Error = "Failed to replicate"

                    else:

                        gMonitor.addMark("ReplicateFail", 1)
                        reason = res["Value"]["Failed"][lfn]
                        self.log.error(
                            "failed to replicate and register file %s at %s: %s"
                            % (lfn, targetSE, reason))
                        opFile.Error = reason

                else:

                    gMonitor.addMark("ReplicateFail", 1)
                    opFile.Error = "ReplicaManager error: %s" % res["Message"]
                    self.log.error(opFile.Error)

            if not opFile.Error:
                if len(self.operation.targetSEList) > 1:
                    self.log.info(
                        "file %s has been replicated to all targetSEs" % lfn)
                opFile.Status = "Done"

        return S_OK()