Beispiel #1
0
  def execute( self ):
    """ one cycle execution """
    log = gLogger.getSubLogger( "execute" )
    # # reset FTSGraph if expired
    now = datetime.datetime.now()
    if now > self.__ftsGraphValidStamp:
      log.info( "resetting expired FTS graph..." )
      resetFTSGraph = self.resetFTSGraph()
      if not resetFTSGraph["OK"]:
        log.error( "FTSGraph recreation error: %s" % resetFTSGraph["Message"] )
        return resetFTSGraph
      self.__ftsGraphValidStamp = now + datetime.timedelta( seconds = self.FTSGRAPH_REFRESH )
    # # update R/W access in FTSGraph if expired
    if now > self.__rwAccessValidStamp:
      log.info( "updating expired R/W access for SEs..." )
      try:
        self.updateLock().acquire()
        self.__ftsGraph.updateRWAccess()
      finally:
        self.updateLock().release()
        self.__rwAccessValidStamp = now + datetime.timedelta( seconds = self.RW_REFRESH )

    requestNames = self.requestClient().getRequestNamesList( [ "Scheduled" ] )
    if not requestNames["OK"]:
      log.error( "unable to read scheduled request names: %s" % requestNames["Message"] )
      return requestNames
    if not requestNames["Value"]:
      requestNames = self.__reqCache.keys()
    else:
      requestNames = [ req[0] for req in requestNames["Value"] ]
      requestNames = list( set ( requestNames + self.__reqCache.keys() ) )

    if not requestNames:
      log.info( "no 'Scheduled' requests to process" )
      return S_OK()

    log.info( "found %s requests to process:" % len( requestNames ) )
    log.info( " => from internal cache: %s" % ( len( self.__reqCache ) ) )
    log.info( " =>   new read from RMS: %s" % ( len( requestNames ) - len( self.__reqCache ) ) )

    for requestName in requestNames:
      request = self.getRequest( requestName )
      if not request["OK"]:
        log.error( request["Message"] )
        continue
      request = request["Value"]
      sTJId = request.RequestName
      while True:
        queue = self.threadPool().generateJobAndQueueIt( self.processRequest,
                                                         args = ( request, ),
                                                         sTJId = sTJId )
        if queue["OK"]:
          log.info( "request '%s' enqueued for execution" % sTJId )
          gMonitor.addMark( "RequestsAtt", 1 )
          break
        time.sleep( 1 )

    # # process all results
    self.threadPool().processAllResults()
    return S_OK()
  def execute( self ):
    """ read requests from RequestClient and enqueue them into ProcessPool """
    gMonitor.addMark( "Iteration", 1 )
    # # requests (and so tasks) counter
    taskCounter = 0
    while taskCounter < self.__requestsPerCycle:
      self.log.debug( "execute: executing %d request in this cycle" % taskCounter )
      getRequest = self.requestClient().getRequest()
      if not getRequest["OK"]:
        self.log.error( "execute: %s" % getRequest["Message"] )
        break
      if not getRequest["Value"]:
        self.log.info( "execute: no more 'Waiting' requests to process" )
        break
      # # OK, we've got you
      request = getRequest["Value"]
      # # set task id
      taskID = request.RequestName
      # # save current request in cache
      self.cacheRequest( request )
      # # serialize to JSON
      requestJSON = request.toJSON()
      if not requestJSON["OK"]:
        self.log.error( "JSON serialization error: %s" % requestJSON["Message"] )
        break
      requestJSON = requestJSON["Value"]

      self.log.info( "processPool tasks idle = %s working = %s" % ( self.processPool().getNumIdleProcesses(),
                                                                    self.processPool().getNumWorkingProcesses() ) )

      while True:
        if not self.processPool().getFreeSlots():
          self.log.info( "No free slots available in processPool, will wait %d seconds to proceed" % self.__poolSleep )
          time.sleep( self.__poolSleep )
        else:
          self.log.info( "spawning task for request '%s'" % ( request.RequestName ) )
          timeOut = self.getTimeout( request )
          enqueue = self.processPool().createAndQueueTask( RequestTask,
                                                           kwargs = { "requestJSON" : requestJSON,
                                                                      "handlersDict" : self.handlersDict,
                                                                      "csPath" : self.__configPath,
                                                                      "agentName": self.agentName },
                                                           taskID = taskID,
                                                           blocking = True,
                                                           usePoolCallbacks = True,
                                                           timeOut = timeOut )
          if not enqueue["OK"]:
            self.log.error( enqueue["Message"] )
          else:
            self.log.debug( "successfully enqueued task '%s'" % taskID )
            # # update monitor
            gMonitor.addMark( "Processed", 1 )
            # # update request counter
            taskCounter += 1
            # # task created, a little time kick to proceed
            time.sleep( 0.1 )
            break

    # # clean return
    return S_OK()
  def __checkReplicas( self ):
    """ check done replicas and update file states  """
    waitingFiles = dict( [ ( opFile.LFN, opFile ) for opFile in self.operation
                          if opFile.Status in ( "Waiting", "Scheduled" ) ] )
    targetSESet = set( self.operation.targetSEList )

    replicas = self.fc.getReplicas( waitingFiles.keys() )
    if not replicas["OK"]:
      self.log.error( replicas["Message"] )
      return replicas

    reMissing = re.compile( "no such file or directory" )
    for failedLFN, errStr in replicas["Value"]["Failed"].items():
      waitingFiles[failedLFN].Error = errStr
      if reMissing.search( errStr.lower() ):
        self.log.error( "file %s does not exists" % failedLFN )
        gMonitor.addMark( "ReplicateFail", len( targetSESet ) )
        waitingFiles[failedLFN].Status = "Failed"

    for successfulLFN, reps in replicas["Value"]["Successful"].items():
      if targetSESet.issubset( set( reps ) ):
        self.log.info( "file %s has been replicated to all targets" % successfulLFN )
        waitingFiles[successfulLFN].Status = "Done"

    return S_OK()
Beispiel #4
0
    def execute(self):
        """ Main execution method
    """
        delta = time.time() - self.last_update
        if delta > self.reportPeriod:
            result = self.jobDB.getCounters("Jobs", ["Status", "Site"], {}, "")
            if not result["OK"]:
                return S_ERROR("Failed to get data from the Job Database")
            self.resultDB = result["Value"]
            self.last_update = time.time()

        totalDict = {}
        for status in MONITOR_STATUS:
            totalDict[status] = 0

        for row in self.resultDB:
            site = row[0]["Site"]
            status = row[0]["Status"]
            count = row[1]
            if site in MONITOR_SITES and status in MONITOR_STATUS:
                gLogger.verbose("Adding mark %s-%s: " % (status, site) + str(count))
                gMonitor.addMark("%s-%s" % (status, site), count)
            if status in totalDict:
                totalDict[status] += count

        for status in MONITOR_STATUS:
            gLogger.verbose("Adding mark %s-All sites: " % status + str(totalDict[status]))
            gMonitor.addMark("%s-All sites" % status, totalDict[status])

        return S_OK()
Beispiel #5
0
 def _endReportToMonitoring( self, initialWallTime, initialCPUTime ):
   wallTime = time.time() - initialWallTime
   stats = os.times()
   cpuTime = stats[0] + stats[2] - initialCPUTime
   percentage = cpuTime / wallTime * 100.
   if percentage > 0:
     gMonitor.addMark( 'CPU', percentage )
Beispiel #6
0
    def export_requestJob(self, resourceDescription):
        """ Serve a job to the request of an agent which is the highest priority
        one matching the agent's site capacity
    """

        result = self.selectJob(resourceDescription)
        gMonitor.addMark("matchesDone")
        return result
Beispiel #7
0
  def execute( self ):
    """ execution in one cycle """

    now = datetime.datetime.utcnow()
    kickTime = now - datetime.timedelta( hours = self.KICK_GRACE_HOURS )
    rmTime = now - datetime.timedelta( days = self.DEL_GRACE_DAYS )

    # # kick
    statusList = [ "Assigned" ]
    requestNamesList = self.requestClient().getRequestNamesList( statusList, self.DEL_LIMIT )
    if not requestNamesList["OK"]:
      self.log.error( "execute: %s" % requestNamesList["Message"] )
      return requestNamesList

    requestNamesList = requestNamesList["Value"]
    kicked = 0
    for requestName, status, lastUpdate in requestNamesList:
      if lastUpdate < kickTime:
        self.log.info( "execute: kick assigned request '%s'" % requestName )
        getRequest = self.requestClient().peekRequest( requestName )
        if not getRequest["OK"]:
          self.log.error( "execute: unable to read request '%s': %s" % ( requestName, getRequest["Message"] ) )
          continue
        getRequest = getRequest["Value"]
        if getRequest:
          getRequest.Status = "Waiting"
          putRequest = self.requestClient().putRequest( getRequest )
          if not putRequest["OK"]:
            self.log.error( "execute: unable to put request '%s': %s" % ( requestName, putRequest["Message"] ) )
            continue
          else:
            self.log.verbose( "Kicked request %d" % putRequest['Value'] )
          kicked += 1

    # # delete
    statusList = [ "Done", "Failed" ] if self.DEL_FAILED else [ "Done" ]
    requestNamesList = self.requestClient().getRequestNamesList( statusList, self.DEL_LIMIT )
    if not requestNamesList["OK"]:
      self.log.error( "execute: %s" % requestNamesList["Message"] )
      return requestNamesList

    requestNamesList = requestNamesList["Value"]
    deleted = 0
    for requestName, status, lastUpdate in requestNamesList:
      if lastUpdate < rmTime:
        self.log.info( "execute: deleting request '%s' with status %s" % ( requestName, status ) )
        delRequest = self.requestClient().deleteRequest( requestName )
        if not delRequest["OK"]:
          self.log.error( "execute: unable to delete request '%s': %s" % ( requestName, delRequest["Message"] ) )
          continue
        deleted += 1

    gMonitor.addMark( "KickedRequests", kicked )
    gMonitor.addMark( "DeletedRequests", deleted )
    self.log.info( "execute: kicked assigned requests = %s" % kicked )
    self.log.info( "execute: deleted finished requests = %s" % deleted )
    return S_OK()
Beispiel #8
0
  def execute( self ):
    """ one cycle execution """

    # Don't use the server certificate otherwise the DFC wont let us write
    gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'false' )


    log = gLogger.getSubLogger( "execute" )
    # # reset FTSPlacement if expired
    now = datetime.datetime.now()
    if now > self.__ftsPlacementValidStamp:
      log.info( "resetting expired FTS placement..." )
      resetFTSPlacement = self.resetFTSPlacement()
      if not resetFTSPlacement["OK"]:
        log.error( "FTSPlacement recreation error:" , resetFTSPlacement["Message"] )
        return resetFTSPlacement
      self.__ftsPlacementValidStamp = now + datetime.timedelta( seconds = self.FTSPLACEMENT_REFRESH )

    requestIDs = self.requestClient().getRequestIDsList( statusList = [ "Scheduled" ], limit = self.MAX_REQUESTS )
    if not requestIDs["OK"]:
      log.error( "unable to read scheduled request ids" , requestIDs["Message"] )
      return requestIDs
    if not requestIDs["Value"]:
      requestIDs = []
    else:
      requestIDs = [ req[0] for req in requestIDs["Value"] if req[0] not in self.__reqCache ]
    requestIDs += self.__reqCache.keys()

    if not requestIDs:
      log.info( "no 'Scheduled' requests to process" )
      return S_OK()

    log.info( "found %s requests to process:" % len( requestIDs ) )
    log.info( " => from internal cache: %s" % ( len( self.__reqCache ) ) )
    log.info( " =>   new read from RMS: %s" % ( len( requestIDs ) - len( self.__reqCache ) ) )

    for requestID in requestIDs:
      request = self.getRequest( requestID )
      if not request["OK"]:
        log.error( "Error getting request", "%s: %s" % ( requestID, request["Message"] ) )
        continue
      request = request["Value"]
      sTJId = request.RequestID
      while True:
        queue = self.threadPool().generateJobAndQueueIt( self.processRequest,
                                                         args = ( request, ),
                                                         sTJId = sTJId )
        if queue["OK"]:
          log.info( "Request enqueued for execution", sTJId )
          gMonitor.addMark( "RequestsAtt", 1 )
          break
        time.sleep( 1 )

    # # process all results
    self.threadPool().processAllResults()
    return S_OK()
Beispiel #9
0
  def __monitorJob( self, request, ftsJob ):
    """ execute FTSJob.monitorFTS for a given :ftsJob:
        if ftsJob is in a final state, finalize it

    :param Request request: ReqDB.Request instance
    :param FTSJob ftsJob: FTSDB.FTSJob instance
    """
    log = self.log.getSubLogger( "req_%s/%s/monitor/%s" % ( request.RequestID, request.RequestName, ftsJob.FTSGUID ) )
    log.info( "FTSJob '%s'@'%s'" % ( ftsJob.FTSGUID, ftsJob.FTSServer ) )

    # # this will be returned
    ftsFilesDict = dict( [ ( k, list() ) for k in ( "toRegister", "toSubmit", "toFail", "toReschedule", "toUpdate" ) ] )

    monitor = ftsJob.monitorFTS( self.__ftsVersion , command = self.MONITOR_COMMAND )
    if not monitor["OK"]:
      gMonitor.addMark( "FTSMonitorFail", 1 )
      log.error( monitor["Message"] )
      if "getTransferJobSummary2: Not authorised to query request" in monitor["Message"] or \
         'was not found' in monitor['Message'] or\
         "Not found" in monitor['Message'] or\
         'Unknown transfer state' in monitor['Message']:
        log.error( "FTSJob not known (expired on server?): delete it" )
        for ftsFile in ftsJob:
          ftsFile.Status = "Waiting"
          ftsFilesDict["toSubmit"].append( ftsFile )
        # #  No way further for that job: delete it
        res = self.ftsClient().deleteFTSJob( ftsJob.FTSJobID )
        if not res['OK']:
          log.error( "Unable to delete FTSJob", res['Message'] )
        return S_OK( ftsFilesDict )
      return monitor

    monitor = monitor["Value"]
    log.info( "FTSJob Status = %s Completeness = %s" % ( ftsJob.Status, ftsJob.Completeness ) )

    # # monitor status change
    gMonitor.addMark( "FTSJobs%s" % ftsJob.Status, 1 )

    if ftsJob.Status in FTSJob.FINALSTATES:
      finalizeFTSJob = self.__finalizeFTSJob( request, ftsJob )
      if not finalizeFTSJob["OK"]:
        if 'Unknown transfer state' in finalizeFTSJob['Message']:
          for ftsFile in ftsJob:
            ftsFile.Status = "Waiting"
            ftsFilesDict["toSubmit"].append( ftsFile )
          # #  No way further for that job: delete it
          res = self.ftsClient().deleteFTSJob( ftsJob.FTSJobID )
          if not res['OK']:
            log.error( "Unable to delete FTSJob", res['Message'] )
        else:
          log.error( finalizeFTSJob["Message"] )
          return finalizeFTSJob
      else:
        ftsFilesDict = self.updateFTSFileDict( ftsFilesDict, finalizeFTSJob["Value"] )

    return S_OK( ftsFilesDict )
 def execute( self ):
   self.enableFlag = self.am_getOption( 'EnableFlag', 'True' )
   if not self.enableFlag == 'True':
     self.log.info( 'MigrationMonitoringAgent is disabled by configuration option %s/EnableFlag' % ( self.section ) )
     return S_OK( 'Disabled via CS flag' )
   gMonitor.addMark( "Iteration", 1 )
   self.NewToMigrating()
   for se in self.storageElements:
     gMonitor.addMark( "Iteration%s" % se, 1 )
     self.MigratingToMigrated( se )
   return S_OK()
Beispiel #11
0
  def execute( self ):
    """ one cycle execution """

    now = datetime.datetime.now()
    kickTime = now - datetime.timedelta( hours = self.KICK_ASSIGNED_HOURS )
    rmTime = now - datetime.timedelta( days = self.DEL_GRACE_DAYS )

    kicked = 0
    deleted = 0

    # # select Assigned FTSJobs
    assignedFTSJobList = self.ftsClient().getFTSJobList( ["Assigned"], self.KICK_LIMIT )
    if not assignedFTSJobList["OK"]:
      self.log.error( "execute: %s" % assignedFTSJobList["Message"] )
      return assignedFTSJobList
    assignedFTSJobList = assignedFTSJobList["Value"]

    for ftsJob in assignedFTSJobList:
      if ftsJob.LastUpdate > kickTime:
        self.log.debug( "FTSJob %s is Assigned for too long and has to be kicked" % ftsJob.FTSGUID )
        kicked += 1
        ftsJob.Status = "Submitted"
      put = self.ftsClient().putFTSJob( ftsJob )
      if not put["OK"]:
        self.log.error( "execute: unable to put back FTSJob %s: %s" % ( ftsJob.FTSGUID, put["Message"] ) )
        return put

    finishedFTSJobList = self.ftsClient().getFTSJobList( list( FTSJob.FINALSTATES ), self.DEL_LIMIT )
    if not finishedFTSJobList["OK"]:
      self.log.error( "execute: %s" % finishedFTSJobList["Message"] )
      return finishedFTSJobList
    finishedFTSJobList = finishedFTSJobList["Value"]

    for ftsJob in finishedFTSJobList:
      if ftsJob.LastUpdate > rmTime:
        self.log.debug( "FTSJob %s is too old and has to be deleted" % ftsJob.FTSGUID )
        delJob = self.ftsClient().deleteFTSJob( ftsJob.FTSJobID )
        if not delJob["OK"]:
          self.log.error( "execute: %s" % delJob["Message"] )
          return delJob
      else:
        putJob = self.ftsClient().putFTSJob( ftsJob )
        if not putJob["OK"]:
          self.log.error( "execute: %s" % putJob["Message"] )
          return putJob

    self.log.info( "Assigned FTSJobs kicked %s Finished FTSJobs deleted %s" % ( kicked, deleted ) )
    gMonitor.addMark( "KickedFTSJobs", kicked )
    gMonitor.addMark( "DeletedFTSJobs", deleted )
    return S_OK()
Beispiel #12
0
  def __call__( self ):
    """ action for 'removeFile' operation  """
    # # get waiting files
    waitingFiles = self.getWaitingFilesList()
    # # prepare waiting file dict
    toRemoveDict = dict( [ ( opFile.LFN, opFile ) for opFile in waitingFiles ] )
    gMonitor.addMark( "RemoveFileAtt", len( toRemoveDict ) )

    # # 1st step - bulk removal
    self.log.debug( "bulk removal of %s files" % len( toRemoveDict ) )
    bulkRemoval = self.bulkRemoval( toRemoveDict )
    if not bulkRemoval["OK"]:
      self.log.error( "bulk removal failed: %s" % bulkRemoval["Message"] )
    else:
      gMonitor.addMark( "RemoveFileOK", len( toRemoveDict ) - len( bulkRemoval["Value"] ) )
      toRemoveDict = bulkRemoval["Value"]

    # # 2nd step - single file removal
    for lfn, opFile in toRemoveDict.items():
      self.log.info( "removing single file %s" % lfn )
      singleRemoval = self.singleRemoval( opFile )
      if not singleRemoval["OK"]:
        self.log.error( 'Error removing single file', singleRemoval["Message"] )
        gMonitor.addMark( "RemoveFileFail", 1 )
      else:
        self.log.info( "file %s has been removed" % lfn )
        gMonitor.addMark( "RemoveFileOK", 1 )

    # # set
    failedFiles = [ ( lfn, opFile ) for ( lfn, opFile ) in toRemoveDict.items()
                    if opFile.Status in ( "Failed", "Waiting" ) ]
    if failedFiles:
      self.operation.Error = "failed to remove %d files" % len( failedFiles )

    return S_OK()
Beispiel #13
0
 def _startReportToMonitoring( self ):
   try:
     now = time.time()
     stats = os.times()
     cpuTime = stats[0] + stats[2]
     if now - self.__monitorLastStatsUpdate < 10:
       return ( now, cpuTime )
     # Send CPU consumption mark
     self.__monitorLastStatsUpdate = now
     # Send Memory consumption mark
     membytes = MemStat.VmB( 'VmRSS:' )
     if membytes:
       mem = membytes / ( 1024. * 1024. )
       gMonitor.addMark( 'MEM', mem )
     return( now, cpuTime )
   except Exception:
     return False
 def export_getReport( self, reportRequest ):
   """
   Plot a accounting
   Arguments:
     - viewName : Name of view (easy!)
     - startTime
     - endTime
     - argsDict : Arguments to the view.
     - grouping
     - extraArgs
   """
   retVal = self.__checkPlotRequest( reportRequest )
   if not retVal[ 'OK' ]:
     return retVal
   reporter = MainReporter( gAccountingDB, self.serviceInfoDict[ 'clientSetup' ] )
   gMonitor.addMark( "reportsRequested" )
   reportRequest[ 'generatePlot' ] = False
   return reporter.generate( reportRequest, self.getRemoteCredentials() )
Beispiel #15
0
  def __monitorJob( self, request, ftsJob ):
    """ execute FTSJob.monitorFTS2 for a given :ftsJob:
        if ftsJob is in a final state, finalize it

    :param Request request: ReqDB.Request instance
    :param FTSJob ftsJob: FTSDB.FTSJob instance
    """
    log = self.log.getSubLogger( "%s/monitor/%s" % ( request.RequestName, ftsJob.FTSGUID ) )
    log.info( "FTSJob '%s'@'%s'" % ( ftsJob.FTSGUID, ftsJob.FTSServer ) )

    # # this will be returned
    ftsFilesDict = dict( [ ( k, list() ) for k in ( "toRegister", "toSubmit", "toFail", "toReschedule", "toUpdate" ) ] )

    monitor = ftsJob.monitorFTS2()
    if not monitor["OK"]:
      gMonitor.addMark( "FTSMonitorFail", 1 )
      log.error( monitor["Message"] )
      if "getTransferJobSummary2: Not authorised to query request" in monitor["Message"]:
        log.error( "FTSJob not known (expired on server?)" )
        for ftsFile in ftsJob:
          ftsFile.Status = "Waiting"
          ftsFilesDict["toSubmit"] = ftsFile
        return S_OK( ftsFilesDict )
      return monitor

    monitor = monitor["Value"]
    log.info( "FTSJob Status = %s Completeness = %s" % ( ftsJob.Status, ftsJob.Completeness ) )

    # # monitor status change
    gMonitor.addMark( "FTSJobs%s" % ftsJob.Status, 1 )

    if ftsJob.Status in FTSJob.FINALSTATES:
      finalizeFTSJob = self.__finalizeFTSJob( request, ftsJob )
      if not finalizeFTSJob["OK"]:
        log.error( finalizeFTSJob["Message"] )
        return finalizeFTSJob
      ftsFilesDict = self.updateFTSFileDict( ftsFilesDict, finalizeFTSJob["Value"] )

    return S_OK( ftsFilesDict )
  def resultCallback( self, taskID, taskResult ):
    """ definition of request callback function
    
    :param self: self reference
    """
    self.log.info("%s result callback" %  taskID ) 

    if not taskResult["OK"]:
      self.log.error( "%s result callback: %s" % ( taskID, taskResult["Message"] ) )
      if taskResult["Message"] == "Timed out":
        self.resetRequest( taskID )
      self.deleteRequest( taskID )
      return
    
    self.deleteRequest( taskID )
    taskResult = taskResult["Value"]
    ## add monitoring info
    monitor = taskResult["monitor"] if "monitor" in taskResult else {}
    for mark, value in monitor.items():
      try:
        gMonitor.addMark( mark, value )
      except Exception, error:
        self.log.exception( str(error) )
Beispiel #17
0
  def export_addFile( self, lfns ):
    """ Register supplied files """
    gMonitor.addMark( "AddFile", 1 )
    res = gFileCatalogDB.addFile( lfns, self.getRemoteCredentials() )
    if res['OK']:
      gMonitor.addMark( "AddFileSuccessful", len( res.get( 'Value', {} ).get( 'Successful', [] ) ) )
      gMonitor.addMark( "AddFileFailed", len( res.get( 'Value', {} ).get( 'Failed', [] ) ) )

    return res
Beispiel #18
0
  def export_removeReplica( self, lfns ):
    """ Remove the supplied replicas """
    gMonitor.addMark( "RemoveReplica", 1 )
    res = gFileCatalogDB.removeReplica( lfns, self.getRemoteCredentials() )
    if res['OK']:
      gMonitor.addMark( "RemoveReplicaSuccessful", len( res.get( 'Value', {} ).get( 'Successful', [] ) ) )
      gMonitor.addMark( "RemoveReplicaFailed", len( res.get( 'Value', {} ).get( 'Failed', [] ) ) )

    return res
Beispiel #19
0
    def export_addReplica(self, lfns):
        """ Register supplied replicas """
        gMonitor.addMark("AddReplica", 1)
        res = gFileCatalogDB.addReplica(lfns, self.getRemoteCredentials())
        if res["OK"]:
            gMonitor.addMark("AddReplicaSuccessful", len(res.get("Value", {}).get("Successful", [])))
            gMonitor.addMark("AddReplicaFailed", len(res.get("Value", {}).get("Failed", [])))

        return res
Beispiel #20
0
    def export_removeFile(self, lfns):
        """ Remove the supplied lfns """
        gMonitor.addMark("RemoveFile", 1)
        res = gFileCatalogDB.removeFile(lfns, self.getRemoteCredentials())
        if res["OK"]:
            gMonitor.addMark("RemoveFileSuccessful", len(res.get("Value", {}).get("Successful", [])))
            gMonitor.addMark("RemoveFileFailed", len(res.get("Value", {}).get("Failed", [])))

        return res
Beispiel #21
0
  def execute(self):
    """ This agent is the smallest and (cutest) of all the DIRAC agents in existence.
    """
    gMonitor.addMark("Iteration",1)

    central = PathFinder.getServiceURL("RequestManagement/centralURL")
    if central:
      self.central = central
    local = PathFinder.getServiceURL("RequestManagement/localURL")
    if local:
      self.local = local

    res = self.RequestDBClient.serveRequest(url=self.local)
    if not res['OK']:
      gLogger.error("ZuziaAgent.execute: Failed to get request from database.",self.local)
      return S_OK()
    elif not res['Value']:
      gLogger.info("ZuziaAgent.execute: No requests to be executed found.")
      return S_OK()

    gMonitor.addMark("Attempted",1)

    requestString = res['Value']['RequestString']
    requestName = res['Value']['RequestName']
    gLogger.info("ZuziaAgent.execute: Obtained request %s" % requestName)

    gLogger.info("ZuziaAgent.execute: Attempting to set request to %s." % self.central)
    res = self.RequestDBClient.setRequest(requestName,requestString,self.central)
    if res['OK']:
      gMonitor.addMark("Successful",1)
      gLogger.info("ZuziaAgent.execute: Successfully put request.")
    else:
      gMonitor.addMark("Failed",1)
      gLogger.error("ZuziaAgent.execute: Failed to set request to", self.central)
      gLogger.info("ZuziaAgent.execute: Attempting to set request to %s." % self.local)
      res = self.RequestDBClient.setRequest(requestName,requestString,self.local)
      if res['OK']:
        gLogger.info("ZuziaAgent.execute: Successfully put request.")
      else:
        gLogger.error("ZuziaAgent.execute: Failed to set request to", self.local)
        while not res['OK']:
          gLogger.info("ZuziaAgent.execute: Attempting to set request to anywhere.")
          res = self.RequestDBClient.setRequest(requestName,requestString)
        gLogger.info("ZuziaAgent.execute: Successfully put request to %s." % res["Server"])
    return S_OK()
Beispiel #22
0
  def __call__( self ):
    """ call me maybe """
    # # counter for failed replicas

    print "YEAHHH"
    import time
    time.sleep( 2 )
    failedReplicas = 0
    # # catalog to use
    catalog = self.operation.Catalog
    # # get waiting files
    waitingFiles = self.getWaitingFilesList()
    # # loop over files
    for opFile in waitingFiles:

      gMonitor.addMark( "RegisterReplicaAtt", 1 )

      # # get LFN
      lfn = opFile.LFN
      # # and others
      replicaTuple = ( lfn , opFile.PFN, self.operation.targetSEList[0] )
      # # call ReplicaManager
      registerReplica = self.replicaManager().registerReplica( replicaTuple, catalog )
      # # check results
      if not registerReplica["OK"] or lfn in registerReplica["Value"]["Failed"]:

        gMonitor.addMark( "RegisterReplicaFail", 1 )
        self.dataLoggingClient().addFileRecord( lfn, "RegisterReplicaFail", catalog, "", "RegisterReplica" )

        reason = registerReplica["Message"] if not registerReplica["OK"] else registerReplica["Value"]["Failed"][lfn]
        errorStr = "failed to register LFN %s: %s" % ( lfn, reason )
        opFile.Error = errorStr
        self.log.warn( errorStr )
        failedReplicas += 1

      else:

        gMonitor.addMark( "RegisterReplicaOK", 1 )
        self.dataLoggingClient().addFileRecord( lfn, "RegisterReplicaOK", catalog, "", "RegisterReplica" )

        self.log.info( "Replica %s has been registered at %s" % ( lfn, catalog ) )
        opFile.Status = "Done"

    # # final check
    if failedReplicas:
      self.log.info( "all replicas processed, %s replicas failed to register" % failedReplicas )
      self.operation.Error = "some replicas failed to register"
      return S_ERROR( self.operation.Error )

    return S_OK()
Beispiel #23
0
  def __call__( self ):
    """ call me maybe """
    # # counter for failed files
    failedFiles = 0
    # # catalog to use
    catalog = self.operation.Catalog
    # # get waiting files
    waitingFiles = self.getWaitingFilesList()
    # # loop over files
    for opFile in waitingFiles:

      gMonitor.addMark( "RegisterAtt", 1 )

      # # get LFN
      lfn = opFile.LFN
      # # and others
      fileTuple = ( lfn , opFile.PFN, opFile.Size, self.operation.targetSEList[0], opFile.GUID, opFile.Checksum )
      # # call ReplicaManager
      registerFile = self.replicaManager().registerFile( fileTuple, catalog )
      # # check results
      if not registerFile["OK"] or lfn in registerFile["Value"]["Failed"]:

        gMonitor.addMark( "RegisterFail", 1 )
        self.dataLoggingClient().addFileRecord( lfn, "RegisterFail", catalog, "", "RegisterFile" )

        reason = registerFile["Message"] if not registerFile["OK"] else registerFile["Value"]["Failed"][lfn]
        errorStr = "failed to register LFN %s: %s" % ( lfn, reason )
        opFile.Error = errorStr
        self.log.warn( errorStr )
        failedFiles += 1

      else:

        gMonitor.addMark( "RegisterOK", 1 )
        self.dataLoggingClient().addFileRecord( lfn, "Register", catalog, "", "RegisterFile" )

        self.log.info( "file %s has been registered at %s" % ( lfn, catalog ) )
        opFile.Status = "Done"

    # # final check
    if failedFiles:
      self.log.info( "all files processed, %s files failed to register" % failedFiles )
      self.operation.Error = "some files failed to register"
      return S_ERROR( self.operation.Error )

    return S_OK()
Beispiel #24
0
    def __call__(self):
        """ call me maybe """
        # # list of targetSE
        targetSEs = self.operation.targetSEList
        if len(targetSEs) != 1:
            self.log.error(
                "wrong TargetSE attribute, expecting one entry, got %s" %
                len(targetSEs))
            self.operation.Error = "Wrongly formatted TargetSE"
            for opFile in self.operation:

                gMonitor.addMark("RegisterAtt", 1)
                gMonitor.addMark("RegisterFail", 1)
                self.dataLoggingClient().addFileRecord(opFile.LFN,
                                                       "RegisterFail",
                                                       self.operation.TargetSE,
                                                       "", "RegisterFile")

                opFile.Error = "wrongly formatted targetSE"
                opFile.Status = "Failed"

            self.operation.Status = "Failed"
            return S_ERROR(self.operation.Error)

        targetSE = targetSEs[0]
        # # counter for failed files
        failedFiles = 0
        # # catalog to use
        catalog = self.operation.Catalog
        # # get waiting files
        waitingFiles = self.getWaitingFilesList()
        # # loop over files
        for opFile in waitingFiles:

            gMonitor.addMark("RegisterAtt", 1)

            # # get LFN
            lfn = opFile.LFN
            # # and others
            fileTuple = (lfn, opFile.PFN, opFile.Size, targetSE, opFile.GUID,
                         opFile.Checksum)
            # # call ReplicaManager
            registerFile = self.replicaManager().registerFile(
                fileTuple, catalog)
            # # check results
            if not registerFile["OK"] or lfn in registerFile["Value"]["Failed"]:

                gMonitor.addMark("RegisterFail", 1)
                self.dataLoggingClient().addFileRecord(lfn, "RegisterFail",
                                                       targetSE, "",
                                                       "RegisterFile")

                reason = registerFile["Message"] if not registerFile[
                    "OK"] else registerFile["Value"]["Failed"][lfn]
                errorStr = "failed to register LFN %s: %s" % (lfn, reason)
                opFile.Error = errorStr
                self.log.warn(errorStr)
                failedFiles += 1

            else:

                gMonitor.addMark("RegisterOK", 1)
                self.dataLoggingClient().addFileRecord(lfn, "Register",
                                                       targetSE, "",
                                                       "RegisterFile")

                self.log.info("file %s has been registered at %s" %
                              (lfn, targetSE))
                opFile.Status = "Done"

        # # final check
        if failedFiles:
            self.log.info("all files processed, %s files failed to register" %
                          failedFiles)
            self.operation.Error = "some files failed to register"
            return S_ERROR(self.operation.Error)

        return S_OK()
Beispiel #25
0
    def execute(self):
        ''' Main execution method
    '''

        gMonitor.addMark('Iteration', 1)
        # Get all the transformations
        result = self.transClient.getTransformations({
            'Status':
            'Active',
            'Type':
            self.transformationTypes
        })
        if not result['OK']:
            gLogger.error(
                "InputDataAgent.execute: Failed to get transformations.",
                result['Message'])
            return S_OK()

        # Process each transformation
        for transDict in result['Value']:
            transID = long(transDict['TransformationID'])
            res = self.transClient.getTransformationInputDataQuery(transID)
            if not res['OK']:
                if res['Message'] == 'No InputDataQuery found for transformation':
                    gLogger.info(
                        "InputDataAgent.execute: No input data query found for transformation %d"
                        % transID)
                else:
                    gLogger.error(
                        "InputDataAgent.execute: Failed to get input data query for %d"
                        % transID, res['Message'])
                continue
            inputDataQuery = res['Value']

            if self.refreshonly:
                # Determine the correct time stamp to use for this transformation
                if self.timeLog.has_key(transID):
                    if self.fullTimeLog.has_key(transID):
                        # If it is more than a day since the last reduced query, make a full query just in case
                        if (datetime.datetime.utcnow() -
                                self.fullTimeLog[transID]
                            ) < datetime.timedelta(
                                seconds=self.fullUpdatePeriod):
                            timeStamp = self.timeLog[transID]
                            if self.dateKey:
                                inputDataQuery[self.dateKey] = (
                                    timeStamp - datetime.timedelta(seconds=10)
                                ).strftime('%Y-%m-%d %H:%M:%S')
                            else:
                                gLogger.error(
                                    "DateKey was not set in the CS, cannot use the RefreshOnly"
                                )
                        else:
                            self.fullTimeLog[
                                transID] = datetime.datetime.utcnow()
                self.timeLog[transID] = datetime.datetime.utcnow()
                if not self.fullTimeLog.has_key(transID):
                    self.fullTimeLog[transID] = datetime.datetime.utcnow()

            # Perform the query to the metadata catalog
            gLogger.verbose(
                "Using input data query for transformation %d: %s" %
                (transID, str(inputDataQuery)))
            start = time.time()
            result = self.metadataClient.findFilesByMetadata(inputDataQuery)
            rtime = time.time() - start
            gLogger.verbose("Metadata catalog query time: %.2f seconds." %
                            (rtime))
            if not result['OK']:
                gLogger.error(
                    "InputDataAgent.execute: Failed to get response from the metadata catalog",
                    result['Message'])
                continue
            lfnList = result['Value']

            # Check if the number of files has changed since the last cycle
            nlfns = len(lfnList)
            gLogger.info(
                "%d files returned for transformation %d from the metadata catalog"
                % (nlfns, int(transID)))
            if self.fileLog.has_key(transID):
                if nlfns == self.fileLog[transID]:
                    gLogger.verbose(
                        'No new files in metadata catalog since last check')
            self.fileLog[transID] = nlfns

            # Add any new files to the transformation
            addedLfns = []
            if lfnList:
                gLogger.verbose('Processing %d lfns for transformation %d' %
                                (len(lfnList), transID))
                # Add the files to the transformation
                gLogger.verbose('Adding %d lfns for transformation %d' %
                                (len(lfnList), transID))
                result = self.transClient.addFilesToTransformation(
                    transID, sortList(lfnList))
                if not result['OK']:
                    gLogger.warn(
                        "InputDataAgent.execute: failed to add lfns to transformation",
                        result['Message'])
                    self.fileLog[transID] = 0
                else:
                    if result['Value']['Failed']:
                        for lfn, error in res['Value']['Failed'].items():
                            gLogger.warn(
                                "InputDataAgent.execute: Failed to add %s to transformation"
                                % lfn, error)
                    if result['Value']['Successful']:
                        for lfn, status in result['Value']['Successful'].items(
                        ):
                            if status == 'Added':
                                addedLfns.append(lfn)
                        gLogger.info(
                            "InputDataAgent.execute: Added %d files to transformation"
                            % len(addedLfns))

        return S_OK()
Beispiel #26
0
    def dmTransfer(self, fromFTS=False):
        """ replicate and register using dataManager  """
        # # get waiting files. If none just return
        # # source SE
        sourceSE = self.operation.SourceSE if self.operation.SourceSE else None
        if sourceSE:
            # # check source se for read
            bannedSource = self.checkSEsRSS(sourceSE, 'ReadAccess')
            if not bannedSource["OK"]:
                gMonitor.addMark("ReplicateAndRegisterAtt",
                                 len(self.operation))
                gMonitor.addMark("ReplicateFail", len(self.operation))
                return bannedSource

            if bannedSource["Value"]:
                self.operation.Error = "SourceSE %s is banned for reading" % sourceSE
                self.log.info(self.operation.Error)
                return S_OK(self.operation.Error)

        # # check targetSEs for write
        bannedTargets = self.checkSEsRSS()
        if not bannedTargets['OK']:
            gMonitor.addMark("ReplicateAndRegisterAtt", len(self.operation))
            gMonitor.addMark("ReplicateFail", len(self.operation))
            return bannedTargets

        if bannedTargets['Value']:
            self.operation.Error = "%s targets are banned for writing" % ",".join(
                bannedTargets['Value'])
            return S_OK(self.operation.Error)

        # Can continue now
        self.log.verbose("No targets banned for writing")

        waitingFiles = self.getWaitingFilesList()
        if not waitingFiles:
            return S_OK()
        # # loop over files
        if fromFTS:
            self.log.info(
                "Trying transfer using replica manager as FTS failed")
        else:
            self.log.info("Transferring files using Data manager...")
        for opFile in waitingFiles:

            gMonitor.addMark("ReplicateAndRegisterAtt", 1)
            opFile.Error = ''
            lfn = opFile.LFN

            # Check if replica is at the specified source
            replicas = self._filterReplicas(opFile)
            if not replicas["OK"]:
                self.log.error(replicas["Message"])
                continue
            replicas = replicas["Value"]
            validReplicas = replicas["Valid"]
            noMetaReplicas = replicas["NoMetadata"]
            noReplicas = replicas['NoReplicas']
            badReplicas = replicas['Bad']
            noPFN = replicas['NoPFN']

            if not validReplicas:
                gMonitor.addMark("ReplicateFail")
                if noMetaReplicas:
                    self.log.warn(
                        "unable to replicate '%s', couldn't get metadata at %s"
                        % (opFile.LFN, ','.join(noMetaReplicas)))
                    opFile.Error = "Couldn't get metadata"
                elif noReplicas:
                    self.log.error(
                        "unable to replicate %s, file doesn't exist at %s" %
                        (opFile.LFN, ','.join(noReplicas)))
                    opFile.Error = 'No replicas found'
                    opFile.Status = 'Failed'
                elif badReplicas:
                    self.log.error(
                        "unable to replicate %s, all replicas have a bad checksum at %s"
                        % (opFile.LFN, ','.join(badReplicas)))
                    opFile.Error = 'All replicas have a bad checksum'
                    opFile.Status = 'Failed'
                elif noPFN:
                    self.log.warn(
                        "unable to replicate %s, could not get a PFN" %
                        opFile.LFN)
                continue
            # # get the first one in the list
            if sourceSE not in validReplicas:
                if sourceSE:
                    self.log.warn(
                        "%s is not at specified sourceSE %s, changed to %s" %
                        (lfn, sourceSE, validReplicas[0]))
                sourceSE = validReplicas[0]

            # # loop over targetSE
            catalog = self.operation.Catalog
            for targetSE in self.operation.targetSEList:

                # # call DataManager
                if targetSE in validReplicas:
                    self.log.warn(
                        "Request to replicate %s to an existing location: %s" %
                        (lfn, targetSE))
                    opFile.Status = 'Done'
                    continue
                res = self.dm.replicateAndRegister(lfn,
                                                   targetSE,
                                                   sourceSE=sourceSE,
                                                   catalog=catalog)
                if res["OK"]:

                    if lfn in res["Value"]["Successful"]:

                        if "replicate" in res["Value"]["Successful"][lfn]:

                            repTime = res["Value"]["Successful"][lfn][
                                "replicate"]
                            prString = "file %s replicated at %s in %s s." % (
                                lfn, targetSE, repTime)

                            gMonitor.addMark("ReplicateOK", 1)

                            if "register" in res["Value"]["Successful"][lfn]:

                                gMonitor.addMark("RegisterOK", 1)
                                regTime = res["Value"]["Successful"][lfn][
                                    "register"]
                                prString += ' and registered in %s s.' % regTime
                                self.log.info(prString)
                            else:

                                gMonitor.addMark("RegisterFail", 1)
                                prString += " but failed to register"
                                self.log.warn(prString)

                                opFile.Error = "Failed to register"
                                # # add register replica operation
                                registerOperation = self.getRegisterOperation(
                                    opFile, targetSE, type='RegisterReplica')
                                self.request.insertAfter(
                                    registerOperation, self.operation)

                        else:

                            self.log.error("failed to replicate %s to %s." %
                                           (lfn, targetSE))
                            gMonitor.addMark("ReplicateFail", 1)
                            opFile.Error = "Failed to replicate"

                    else:

                        gMonitor.addMark("ReplicateFail", 1)
                        reason = res["Value"]["Failed"][lfn]
                        self.log.error(
                            "failed to replicate and register file %s at %s:" %
                            (lfn, targetSE), reason)
                        opFile.Error = reason

                else:

                    gMonitor.addMark("ReplicateFail", 1)
                    opFile.Error = "DataManager error: %s" % res["Message"]
                    self.log.error(opFile.Error)

            if not opFile.Error:
                if len(self.operation.targetSEList) > 1:
                    self.log.info(
                        "file %s has been replicated to all targetSEs" % lfn)
                opFile.Status = "Done"

        return S_OK()
Beispiel #27
0
 def __updateMigrationAccounting(self, se, migratingFiles, matchingFiles,
                                 mismatchingFiles, assumedEndTime,
                                 previousMonitorTime):
     """ Create accounting messages for the overall throughput observed and the total migration time for the files
 """
     allMigrated = matchingFiles + mismatchingFiles
     gMonitor.addMark("MigratedFiles%s" % se, len(allMigrated))
     gMonitor.addMark("TotalMigratedFiles%s" % se, len(allMigrated))
     lfnFileID = {}
     sizesToObtain = []
     for fileID in allMigrated:
         if not migratingFiles[fileID]['Size']:
             lfn = migratingFiles[fileID]['LFN']
             sizesToObtain.append(lfn)
             lfnFileID[lfn] = fileID
     if sizesToObtain:
         res = self.ReplicaManager.getCatalogFileSize(sizesToObtain)
         if not res['OK']:
             gLogger.error(
                 "[%s] __updateMigrationAccounting: Failed to obtain file sizes"
                 % se)
             return res
         for lfn, error in res['Value']['Failed'].items():
             gLogger.error(
                 "[%s] __updateAccounting: Failed to get file size" % se,
                 "%s %s" % (lfn, error))
             migratingFiles[lfnFileID[lfn]]['Size'] = 0
         for lfn, size in res['Value']['Successful'].items():
             migratingFiles[lfnFileID[lfn]]['Size'] = size
     totalSize = 0
     for fileID in allMigrated:
         size = migratingFiles[fileID]['Size']
         totalSize += size
         submitTime = migratingFiles[fileID]['SubmitTime']
         timeDiff = submitTime - assumedEndTime
         migrationTime = (timeDiff.days * 86400) + (timeDiff.seconds) + (
             timeDiff.microseconds / 1000000.0)
         gMonitor.addMark("MigrationTime%s" % se, migrationTime)
         gDataStoreClient.addRegister(
             self.__initialiseAccountingObject('MigrationTime', se,
                                               submitTime, assumedEndTime,
                                               size))
         gDataStoreClient.addRegister(
             self.__initialiseAccountingObject('MigrationThroughput', se,
                                               previousMonitorTime,
                                               assumedEndTime, size))
         oDataOperation = self.__initialiseAccountingObject(
             'MigrationSuccess', se, submitTime, assumedEndTime, size)
         if fileID in mismatchingFiles:
             oDataOperation.setValueByKey('TransferOK', 0)
             oDataOperation.setValueByKey('FinalStatus', 'Failed')
         gDataStoreClient.addRegister(oDataOperation)
     gMonitor.addMark("TotalMigratedSize%s" % se, totalSize)
     gMonitor.addMark("ChecksumMismatches%s" % se, len(mismatchingFiles))
     gMonitor.addMark("TotalChecksumMismatches%s" % se,
                      len(mismatchingFiles))
     gMonitor.addMark("ChecksumMatches%s" % se, len(matchingFiles))
     gMonitor.addMark("TotalChecksumMatches%s" % se, len(matchingFiles))
     if allMigrated:
         gLogger.info(
             '[%s] __updateMigrationAccounting: Attempting to send accounting message...'
             % se)
         return gDataStoreClient.commit()
     return S_OK()
Beispiel #28
0
    def ftsTransfer(self):
        """ replicate and register using FTS """

        self.log.info("scheduling files in FTS...")

        bannedTargets = self.checkSEsRSS()
        if not bannedTargets['OK']:
            gMonitor.addMark("FTSScheduleAtt")
            gMonitor.addMark("FTSScheduleFail")
            return bannedTargets

        if bannedTargets['Value']:
            return S_OK("%s targets are banned for writing" %
                        ",".join(bannedTargets['Value']))

        # Can continue now
        self.log.verbose("No targets banned for writing")

        toSchedule = {}

        for opFile in self.getWaitingFilesList():
            opFile.Error = ''
            gMonitor.addMark("FTSScheduleAtt")
            # # check replicas
            replicas = self._filterReplicas(opFile)
            if not replicas["OK"]:
                continue
            replicas = replicas["Value"]

            if not replicas["Valid"] and replicas["Banned"]:
                self.log.warn(
                    "unable to schedule '%s', replicas only at banned SEs" %
                    opFile.LFN)
                gMonitor.addMark("FTSScheduleFail")
                continue

            validReplicas = replicas["Valid"]
            bannedReplicas = replicas["Banned"]

            if not validReplicas and bannedReplicas:
                self.log.warn(
                    "unable to schedule '%s', replicas only at banned SEs" %
                    opFile.LFN)
                gMonitor.addMark("FTSScheduleFail")
                continue

            if validReplicas:
                validTargets = list(
                    set(self.operation.targetSEList) - set(validReplicas))
                if not validTargets:
                    self.log.info("file %s is already present at all targets" %
                                  opFile.LFN)
                    opFile.Status = "Done"
                    continue

                toSchedule[opFile.LFN] = [opFile, validReplicas, validTargets]

        res = self._addMetadataToFiles(toSchedule)
        if not res['OK']:
            return res
        else:
            filesToScheduleList = res['Value']

        if filesToScheduleList:

            ftsSchedule = self.ftsClient.ftsSchedule(
                self.request.RequestID, self.operation.OperationID,
                filesToScheduleList)
            if not ftsSchedule["OK"]:
                self.log.error(ftsSchedule["Message"])
                return ftsSchedule

            # might have nothing to schedule
            ftsSchedule = ftsSchedule["Value"]
            if not ftsSchedule:
                return S_OK()

            for fileID in ftsSchedule["Successful"]:
                gMonitor.addMark("FTSScheduleOK", 1)
                for opFile in self.operation:
                    if fileID == opFile.FileID:
                        opFile.Status = "Scheduled"
                        self.log.always("%s has been scheduled for FTS" %
                                        opFile.LFN)

            for fileID, reason in ftsSchedule["Failed"]:
                gMonitor.addMark("FTSScheduleFail", 1)
                for opFile in self.operation:
                    if fileID == opFile.FileID:
                        opFile.Error = reason
                        self.log.error("unable to schedule %s for FTS: %s" %
                                       (opFile.LFN, opFile.Error))
        else:
            self.log.info("No files to schedule after metadata checks")

        # Just in case some transfers could not be scheduled, try them with RM
        return self.rmTransfer(fromFTS=True)
Beispiel #29
0
  def execute( self ):
    ''' Main execution method
    '''

    gMonitor.addMark( 'Iteration', 1 )
    # Get all the transformations
    result = self.transClient.getTransformations( condDict = {'Status': self.transfStatuses }, timeout = 320 )
    if not result['OK']:
      gLogger.error( "UpdateTransformationCounters.execute: Failed to get transformations.", result['Message'] )
      return S_OK()
    # Process each transformation
    jobsStates = self.transClient.getTransformationCountersStatuses( 'Tasks' )['Value']
    filesStates = self.transClient.getTransformationCountersStatuses( 'Files' )['Value']

    for transDict in result['Value']:
      transID = long( transDict['TransformationID'] )
      gLogger.debug( "Looking at transformationID %d" % transID )
      counterDict = {}
      counterDict['TransformationID'] = transID

      #Take care of the Tasks' states
      gLogger.verbose( "Getting the tasks stats for Transformation %s" % transID )
      res = self.transClient.getTransformationTaskStats( transID )
      if not res['OK']:
        gLogger.warn( "Could not get Transformation Task Stats for transformation %s : %s" % ( transID,
                                                                                               res['Message'] ) )
        break
      else:
        taskDict = {}
        if res['Value']:
          taskDict = res['Value']
          gLogger.verbose( "Got %s tasks dict for transformation %s" % ( str( taskDict ), transID ) )
          for state in jobsStates:
            counterDict[state] = taskDict.get( state, 0 )
        else:
          gLogger.warn( "No Task Statuses found" )
          break

      #Now look for the files' states  
      gLogger.verbose( "Getting the files stats for Transformation %s" % transID )
      res = self.transClient.getTransformationStats( transID )
      if not res['OK']:
        gLogger.warn( "Could not get Transformation Stats for transformation %s : %s" % ( transID,
                                                                                          res['Message'] ) )
        break
      else:
        fileDict = {}
        if res['Value']:
          fileDict = res['Value']
          gLogger.debug( "Got %s file dict for transformation %s" % ( str( fileDict ), transID ) )
          for state in filesStates:
            counterDict[state] = fileDict.get( state, 0 )
        else:
          gLogger.warn( "No File Statuses found" )
          break

      gLogger.verbose( "Updating the counters for transformation %s" % transID )
      res = self.transClient.updateTransformationCounters( counterDict )
      if not res['OK']:
        gLogger.error( "Failed updating counters for transformation %s: %s" % ( transID, res['Message'] ) )
      else:
        gLogger.verbose( "Updated the counters of transformation %s" % transID )

    return S_OK()
Beispiel #30
0
    def rmTransfer(self):
        """ replicate and register using ReplicaManager  """
        self.log.info("transferring files using replica manager...")
        # # source SE
        sourceSE = self.operation.SourceSE if self.operation.SourceSE else None
        if sourceSE:
            # # check source se for read
            sourceRead = self.rssSEStatus(sourceSE, "ReadAccess")
            if not sourceRead["OK"]:
                self.log.error(sourceRead["Message"])
                for opFile in self.operation:
                    opFile.Error = sourceRead["Message"]
                    opFile.Status = "Failed"
                self.operation.Error = sourceRead["Message"]
                gMonitor.addMark("ReplicateAndRegisterAtt",
                                 len(self.operation))
                gMonitor.addMark("ReplicateFail", len(self.operation))
                return sourceRead

            if not sourceRead["Value"]:
                self.operation.Error = "SourceSE %s is banned for reading" % sourceSE
                self.log.error(self.operation.Error)
                return S_ERROR(self.operation.Error)

        # # list of targetSEs
        targetSEs = self.operation.targetSEList
        # # check targetSEs for removal
        bannedTargets = []
        for targetSE in targetSEs:
            writeStatus = self.rssSEStatus(targetSE, "WriteAccess")
            if not writeStatus["OK"]:
                self.log.error(writeStatus["Message"])
                for opFile in self.operation:
                    opFile.Error = "unknown targetSE: %s" % targetSE
                    opFile.Status = "Failed"
                self.operation.Error = "unknown targetSE: %s" % targetSE
                return S_ERROR(self.operation.Error)

            if not writeStatus["Value"]:
                self.log.error("TargetSE %s in banned for writing right now" %
                               targetSE)
                bannedTargets.append(targetSE)
                self.operation.Error += "banned targetSE: %s;" % targetSE
        # # some targets are banned? return
        if bannedTargets:
            return S_ERROR("%s targets are banned for writing" %
                           ",".join(bannedTargets))

        # # loop over targetSE
        for targetSE in targetSEs:

            # # check target SE
            targetWrite = self.rssSEStatus(targetSE, "WriteAccess")
            if not targetWrite["OK"]:
                self.log.error(targetWrite["Message"])
                for opFile in self.operation:
                    opFile.Error = targetWrite["Message"]
                    opFile.Status = "Failed"
                self.operation.Error = targetWrite["Message"]
                return targetWrite
            if not targetWrite["Value"]:
                reason = "TargetSE %s is banned for writing" % targetSE
                self.log.error(reason)
                self.operation.Error = reason
                continue

            # # get waiting files
            waitingFiles = self.getWaitingFilesList()

            # # loop over files
            for opFile in waitingFiles:

                gMonitor.addMark("ReplicateAndRegisterAtt", 1)
                lfn = opFile.LFN

                if not sourceSE:
                    replicas = self._filterReplicas(opFile)
                    if not replicas["OK"]:
                        self.log.error(replicas["Message"])
                        continue
                    replicas = replicas["Value"]
                    if not replicas["Valid"]:
                        self.log.warn("unable to find valid replicas for %s" %
                                      lfn)
                        continue
                    # # get the first one in the list
                    sourceSE = replicas["Valid"][0]

                # # call ReplicaManager
                res = self.replicaManager().replicateAndRegister(
                    lfn, targetSE, sourceSE=sourceSE)

                if res["OK"]:

                    if lfn in res["Value"]["Successful"]:

                        if "replicate" in res["Value"]["Successful"][lfn]:

                            repTime = res["Value"]["Successful"][lfn][
                                "replicate"]
                            self.log.info("file %s replicated at %s in %s s." %
                                          (lfn, targetSE, repTime))

                            gMonitor.addMark("ReplicateOK", 1)

                            if "register" in res["Value"]["Successful"][lfn]:

                                gMonitor.addMark("RegisterOK", 1)
                                regTime = res["Value"]["Successful"][lfn][
                                    "register"]
                                self.log.info(
                                    "file %s registered at %s in %s s." %
                                    (lfn, targetSE, regTime))

                            else:

                                gMonitor.addMark("RegisterFail", 1)
                                self.log.info("failed to register %s at %s." %
                                              (lfn, targetSE))

                                opFile.Error = "Failed to register"
                                opFile.Status = "Failed"
                                # # add register replica operation
                                self.addRegisterReplica(opFile, targetSE)

                        else:

                            self.log.info("failed to replicate %s to %s." %
                                          (lfn, targetSE))
                            gMonitor.addMark("ReplicateFail", 1)
                            opFile.Error = "Failed to replicate"

                    else:

                        gMonitor.addMark("ReplicateFail", 1)
                        reason = res["Value"]["Failed"][lfn]
                        self.log.error(
                            "failed to replicate and register file %s at %s: %s"
                            % (lfn, targetSE, reason))
                        opFile.Error = reason

                else:

                    gMonitor.addMark("ReplicateFail", 1)
                    opFile.Error = "ReplicaManager error: %s" % res["Message"]
                    self.log.error(opFile.Error)

                if not opFile.Error:
                    self.log.info(
                        "file %s has been replicated to all targetSEs" % lfn)
                    opFile.Status = "Done"

        return S_OK()
Beispiel #31
0
    def __call__(self):
        """ reTransfer operation execution """
        # # list of targetSEs
        targetSEs = self.operation.targetSEList
        # # check targetSEs for removal
        targetSE = targetSEs[0]
        bannedTargets = self.checkSEsRSS(targetSE)
        if not bannedTargets['OK']:
            gMonitor.addMark("FileReTransferAtt")
            gMonitor.addMark("FileReTransferFail")
            return bannedTargets

        if bannedTargets['Value']:
            return S_OK("%s targets are banned for writing" %
                        ",".join(bannedTargets['Value']))

        # # get waiting files
        waitingFiles = self.getWaitingFilesList()
        # # prepare waiting files
        toRetransfer = dict([(opFile.PFN, opFile) for opFile in waitingFiles])

        gMonitor.addMark("FileReTransferAtt", len(toRetransfer))

        if len(targetSEs) != 1:
            error = "only one TargetSE allowed, got %d" % len(targetSEs)
            for opFile in toRetransfer.values():
                opFile.Error = error
                opFile.Status = "Failed"
            self.operation.Error = error
            gMonitor.addMark("FileReTransferFail", len(toRetransfer))
            return S_ERROR(error)

        se = StorageElement(targetSE)
        for opFile in toRetransfer.values():
            reTransfer = se.retransferOnlineFile(opFile.LFN)
            if not reTransfer["OK"]:
                opFile.Error = reTransfer["Message"]
                self.log.error("Retransfer failed", opFile.Error)
                gMonitor.addMark("FileReTransferFail", 1)
                continue
            reTransfer = reTransfer["Value"]
            if opFile.LFN in reTransfer["Failed"]:
                opFile.Error = reTransfer["Failed"][opFile.LFN]
                self.log.error("Retransfer failed", opFile.Error)
                gMonitor.addMark("FileReTransferFail", 1)
                continue
            opFile.Status = "Done"
            self.log.info("%s retransfer done" % opFile.LFN)
            gMonitor.addMark("FileReTransferOK", 1)

        return S_OK()
Beispiel #32
0
 def export_listDirectory(self, lfns, verbose):
     """ List the contents of supplied directories """
     gMonitor.addMark('ListDirectory', 1)
     return gFileCatalogDB.listDirectory(lfns,
                                         self.getRemoteCredentials(),
                                         verbose=verbose)
Beispiel #33
0
  def __call__( self ):
    """ LogUpload operation processing """
    # # list of targetSEs

    targetSEs = self.operation.targetSEList

    if len( targetSEs ) != 1:
      self.log.error( "wrong value for TargetSE list = %s, should contain only one target!" % targetSEs )
      self.operation.Error = "Wrong parameters: TargetSE should contain only one targetSE"
      for opFile in self.operation:

        opFile.Status = "Failed"
        opFile.Error = "Wrong parameters: TargetSE should contain only one targetSE"

        gMonitor.addMark( "LogUploadAtt", 1 )
        gMonitor.addMark( "LogUploadFail", 1 )

      return S_ERROR( "TargetSE should contain only one target, got %s" % targetSEs )

    targetSE = targetSEs[0]
    targetWrite = self.rssSEStatus( targetSE, "WriteAccess" )
    if not targetWrite["OK"]:
      self.log.error( targetWrite["Message"] )
      for opFile in self.operation:
        opFile.Status = "Failed"
        opFile.Error = "Wrong parameters: %s" % targetWrite["Message"]
        gMonitor.addMark( "LogUploadAtt", 1 )
        gMonitor.addMark( "LogUploadFail", 1 )
      self.operation.Error = targetWrite["Message"]
      return S_OK()

    if not targetWrite["Value"]:
      self.operation.Error = "TargetSE %s is banned for writing"
      return S_ERROR( self.operation.Error )

    # # get waiting files
    waitingFiles = self.getWaitingFilesList()

    # # loop over files
    for opFile in waitingFiles:
      # # get LFN
      lfn = opFile.LFN
      self.log.info( "processing file %s" % lfn )
      gMonitor.addMark( "LogUploadAtt", 1 )

      destinationFolder = '/'.join( lfn.split( '/' )[0:-1] )
      destinationSubFolder = "%03d" % ( int(( os.path.basename( lfn ) ).split( '_' )[1].split( '.' )[0]) / 1000)
      destination = destinationFolder + '/' + destinationSubFolder

      logUpload = self.dm.replicate( lfn, targetSE, destPath = destination, localCache = self.workDirectory )
      if not logUpload["OK"]:
        gMonitor.addMark( "LogUploadFail", 1 )
#         self.dataLoggingClient().addFileRecord( lfn, "LogUploadFail", targetSE, "", "LogUpload" )
        self.log.error( "completely failed to upload log file: %s" % logUpload["Message"] )
        opFile.Error = str( logUpload["Message"] )
        opFile.Attempt += 1
        self.operation.Error = opFile.Error
        if 'No such file or directory' in opFile.Error:
          opFile.Status = 'Failed'
        continue

      if lfn in logUpload['Value']:
        gMonitor.addMark( "LogUploadOK", 1 )
#         self.dataLoggingClient().addFileRecord( lfn, "LogUpload", targetSE, "", "LogUpload" )
        opFile.Status = 'Done'
        self.log.info( "Uploaded %s to %s" % ( lfn, targetSE ) )

    return S_OK()
Beispiel #34
0
    def __call__(self):
        """ request processing """

        self.log.debug("about to execute request")
        gMonitor.addMark("RequestAtt", 1)

        # # setup proxy for request owner
        setupProxy = self.setupProxy()
        if not setupProxy["OK"]:
            self.log.error(setupProxy["Message"])
            self.request.Error = setupProxy["Message"]
            return self.updateRequest()
        shifter = setupProxy["Value"]["Shifter"]
        proxyFile = setupProxy["Value"]["ProxyFile"]

        error = None
        while self.request.Status == "Waiting":

            # # get waiting operation
            operation = self.request.getWaiting()
            if not operation["OK"]:
                self.log.error(operation["Message"])
                return operation
            operation = operation["Value"]
            self.log.info("executing operation #%s '%s'" %
                          (operation.Order, operation.Type))

            # # and handler for it
            handler = self.getHandler(operation)
            if not handler["OK"]:
                self.log.error("unable to process operation %s: %s" %
                               (operation.Type, handler["Message"]))
                gMonitor.addMark("%s%s" % (operation.Type, "Fail"), 1)
                operation.Error = handler["Message"]
                break

            handler = handler["Value"]
            # # set shifters list in the handler
            handler.shifter = shifter
            # # and execute
            try:
                gMonitor.addMark("%s%s" % (operation.Type, "Att"), 1)
                exe = handler()
                if not exe["OK"]:
                    self.log.error("unable to process operation %s: %s" %
                                   (operation.Type, exe["Message"]))
                    gMonitor.addMark("%s%s" % (operation.Type, "Fail"), 1)
                    gMonitor.addMark("RequestFail", 1)
            except Exception, error:
                self.log.exception("hit by exception: %s" % str(error))
                gMonitor.addMark("%s%s" % (operation.Type, "Fail"), 1)
                gMonitor.addMark("RequestFail", 1)
                break

            # # operation status check
            if operation.Status == "Done":
                gMonitor.addMark("%s%s" % (operation.Type, "OK"), 1)
            elif operation.Status == "Failed":
                gMonitor.addMark("%s%s" % (operation.Type, "Fail"), 1)
            elif operation.Status in ("Waiting", "Scheduled"):
                # # no update for waiting or all files scheduled
                break
Beispiel #35
0
    def execute(self):
        """ execution in one cycle """

        now = datetime.datetime.utcnow()
        kickTime = now - datetime.timedelta(hours=self.KICK_GRACE_HOURS)
        rmTime = now - datetime.timedelta(days=self.DEL_GRACE_DAYS)

        # # kick
        statusList = ["Assigned"]
        requestIDsList = self.requestClient().getRequestIDsList(
            statusList, self.KICK_LIMIT)
        if not requestIDsList["OK"]:
            self.log.error("execute: %s" % requestIDsList["Message"])
            return requestIDsList

        requestIDsList = requestIDsList["Value"]
        kicked = 0
        for requestID, status, lastUpdate in requestIDsList:
            reqStatus = self.requestClient().getRequestStatus(requestID)
            if not reqStatus['OK']:
                self.log.error(("execute: unable to get request status",
                                reqStatus['Message']))
                continue
            status = reqStatus['Value']
            if lastUpdate < kickTime and status == 'Assigned':
                getRequest = self.requestClient().peekRequest(requestID)
                if not getRequest["OK"]:
                    self.log.error("execute: unable to read request '%s': %s" %
                                   (requestID, getRequest["Message"]))
                    continue
                getRequest = getRequest["Value"]
                if getRequest and getRequest.LastUpdate < kickTime:
                    self.log.info(
                        "execute: kick assigned request (%s/'%s') in status %s"
                        %
                        (requestID, getRequest.RequestName, getRequest.Status))
                    putRequest = self.requestClient().putRequest(getRequest)
                    if not putRequest["OK"]:
                        self.log.error(
                            "execute: unable to put request (%s/'%s'): %s" %
                            (requestID, getRequest.RequestName,
                             putRequest["Message"]))
                        continue
                    else:
                        self.log.verbose("Kicked request %d" %
                                         putRequest['Value'])
                    kicked += 1

        # # delete
        statusList = ["Done", "Failed", "Canceled"
                      ] if self.DEL_FAILED else ["Done"]
        requestIDsList = self.requestClient().getRequestIDsList(
            statusList, self.DEL_LIMIT)
        if not requestIDsList["OK"]:
            self.log.error("execute: %s" % requestIDsList["Message"])
            return requestIDsList

        requestIDsList = requestIDsList["Value"]
        deleted = 0
        for requestID, status, lastUpdate in requestIDsList:
            if lastUpdate < rmTime:
                self.log.info("execute: deleting request '%s' with status %s" %
                              (requestID, status))
                delRequest = self.requestClient().deleteRequest(requestID)
                if not delRequest["OK"]:
                    self.log.error(
                        "execute: unable to delete request '%s': %s" %
                        (requestID, delRequest["Message"]))
                    continue
                deleted += 1

        gMonitor.addMark("KickedRequests", kicked)
        gMonitor.addMark("DeletedRequests", deleted)
        self.log.info("execute: kicked assigned requests = %s" % kicked)
        self.log.info("execute: deleted finished requests = %s" % deleted)
        return S_OK()
Beispiel #36
0
    def __call__(self):
        """ remove replicas """
        # # prepare list of targetSEs
        targetSEs = self.operation.targetSEList
        # # get waiting files
        waitingFiles = self.getWaitingFilesList()
        # # and prepare dict
        toRemoveDict = dict([(opFile.LFN, opFile) for opFile in waitingFiles])

        self.log.info("found %s replicas to delete from %s sites" %
                      (len(toRemoveDict), len(targetSEs)))
        gMonitor.addMark("RemoveReplicaAtt",
                         len(toRemoveDict) * len(targetSEs))

        # # check targetSEs for removal
        bannedTargets = []
        for targetSE in targetSEs:
            # removeStatus = self.rssSEStatus( targetSE, "RemoveAccess" )
            removeStatus = self.rssSEStatus(targetSE, "WriteAccess")

            if not removeStatus["OK"]:
                self.log.error(removeStatus["Message"])
                for opFile in self.operation:
                    opFile.Error = "unknown targetSE: %s" % targetSE
                    opFile.Status = "Failed"
                self.operation.Error = "unknown targetSE: %s" % targetSE
                return S_ERROR(self.operation.Error)

            if not removeStatus["Value"]:
                self.log.error("%s in banned for remove right now" % targetSE)
                bannedTargets.append(targetSE)
                self.operation.Error += "banned targetSE: %s;" % targetSE

        # # some targets are banned? return
        if bannedTargets:
            return S_ERROR("targets %s are banned for removal" %
                           ",".join(bannedTargets))

        # # keep status for each targetSE
        removalStatus = dict.fromkeys(toRemoveDict.keys(), None)
        for lfn in removalStatus:
            removalStatus[lfn] = dict.fromkeys(targetSEs, None)

        # # loop over targetSEs
        for targetSE in targetSEs:

            self.log.info("removing replicas at %s" % targetSE)

            # # 1st step - bulk removal
            bulkRemoval = self.bulkRemoval(toRemoveDict, targetSE)
            if not bulkRemoval["OK"]:
                self.log.error(bulkRemoval["Message"])
                return bulkRemoval
            bulkRemoval = bulkRemoval["Value"]

            # # update removal status for successful files
            removalOK = [
                opFile for opFile in bulkRemoval.values() if not opFile.Error
            ]
            for opFile in removalOK:
                removalStatus[opFile.LFN][targetSE] = ""
            gMonitor.addMark("RemoveReplicaOK", len(removalOK))

            # # 2nd step - process the rest again
            toRetry = dict([(lfn, opFile)
                            for lfn, opFile in bulkRemoval.items()
                            if opFile.Error])
            for lfn, opFile in toRetry.items():
                self.singleRemoval(opFile, targetSE)
                if not opFile.Error:
                    gMonitor.addMark("RemoveReplicaOK", 1)
                    removalStatus[lfn][targetSE] = ""
                else:
                    gMonitor.addMark("RemoveReplicaFail", 1)
                    removalStatus[lfn][targetSE] = opFile.Error

        # # update file status for waiting files
        failed = 0
        for opFile in self.operation:
            if opFile.Status == "Waiting":
                errors = [
                    error for error in removalStatus[lfn].values() if error
                ]
                if errors:
                    failed += 1
                    opFile.Error = ",".join(errors)
                    if "Write access not permitted for this credential" in opFile.Error:
                        opFile.Status = "Failed"
                        continue
                opFile.Status = "Done"

        if failed:
            self.operation.Error = "failed to remove %s replicas" % failed

        return S_OK()
Beispiel #37
0
    def execute(self):
        """ execution in one cycle """

        now = datetime.datetime.utcnow()
        kickTime = now - datetime.timedelta(hours=self.KICK_GRACE_HOURS)
        rmTime = now - datetime.timedelta(days=self.DEL_GRACE_DAYS)

        # # kick
        statusList = ["Assigned"]
        requestNamesList = self.requestClient().getRequestNamesList(
            statusList, self.DEL_LIMIT)
        if not requestNamesList["OK"]:
            self.log.error("execute: %s" % requestNamesList["Message"])
            return requestNamesList

        requestNamesList = requestNamesList["Value"]
        kicked = 0
        for requestName, status, lastUpdate in requestNamesList:
            if lastUpdate < kickTime:
                self.log.info("execute: kick assigned request '%s'" %
                              requestName)
                getRequest = self.requestClient().peekRequest(requestName)
                if not getRequest["OK"]:
                    self.log.error("execute: unable to read request '%s': %s" %
                                   (requestName, getRequest["Message"]))
                    continue
                getRequest = getRequest["Value"]
                if getRequest:
                    getRequest.Status = "Waiting"
                    putRequest = self.requestClient().putRequest(getRequest)
                    if not putRequest["OK"]:
                        self.log.error(
                            "execute: unable to put request '%s': %s" %
                            (requestName, putRequest["Message"]))
                        continue
                    kicked += 1

        # # delete
        statusList = ["Done", "Failed"] if self.DEL_FAILED else ["Done"]
        requestNamesList = self.requestClient().getRequestNamesList(
            statusList, self.DEL_LIMIT)
        if not requestNamesList["OK"]:
            self.log.error("execute: %s" % requestNamesList["Message"])
            return requestNamesList

        requestNamesList = requestNamesList["Value"]
        deleted = 0
        for requestName, status, lastUpdate in requestNamesList:
            if lastUpdate < rmTime:
                self.log.info("execute: deleting request '%s' with status %s" %
                              (requestName, status))
                delRequest = self.requestClient().deleteRequest(requestName)
                if not delRequest["OK"]:
                    self.log.error(
                        "execute: unable to delete request '%s': %s" %
                        (requestName, delRequest["Message"]))
                    continue
                deleted += 1

        gMonitor.addMark("KickedRequests", kicked)
        gMonitor.addMark("DeletedRequests", deleted)
        self.log.info("execute: kicked assigned requests = %s" % kicked)
        self.log.info("execute: deleted finished requests = %s" % deleted)
        return S_OK()
Beispiel #38
0
    def __call__(self):
        """ PutAndRegister operation processing """
        # # list of targetSEs

        targetSEs = self.operation.targetSEList

        if len(targetSEs) != 1:
            self.log.error(
                "wrong value for TargetSE list = %s, should contain only one target!"
                % targetSEs)
            self.operation.Error = "Wrong parameters: TargetSE should contain only one targetSE"
            for opFile in self.operation:

                opFile.Status = "Failed"
                opFile.Error = "Wrong parameters: TargetSE should contain only one targetSE"

                gMonitor.addMark("PutAtt", 1)
                gMonitor.addMark("PutFail", 1)

            return S_ERROR("TargetSE should contain only one target, got %s" %
                           targetSEs)

        targetSE = targetSEs[0]
        bannedTargets = self.checkSEsRSS(targetSE)
        if not bannedTargets['OK']:
            gMonitor.addMark("PutAtt")
            gMonitor.addMark("PutFail")
            return bannedTargets

        if bannedTargets['Value']:
            return S_OK("%s targets are banned for writing" %
                        ",".join(bannedTargets['Value']))

        # # get waiting files
        waitingFiles = self.getWaitingFilesList()

        # # loop over files
        for opFile in waitingFiles:
            # # get LFN
            lfn = opFile.LFN
            self.log.info("processing file %s" % lfn)
            gMonitor.addMark("PutAtt", 1)

            pfn = opFile.PFN
            guid = opFile.GUID
            checksum = opFile.Checksum

            # # call RM at least
            putAndRegister = DataManager(
                catalogs=self.operation.Catalog).putAndRegister(
                    lfn, pfn, targetSE, guid=guid, checksum=checksum)
            if not putAndRegister["OK"]:
                gMonitor.addMark("PutFail", 1)
                self.dataLoggingClient().addFileRecord(lfn, "PutFail",
                                                       targetSE, "",
                                                       "PutAndRegister")
                self.log.error(
                    "completely failed to put and register file: %s" %
                    putAndRegister["Message"])
                opFile.Error = str(putAndRegister["Message"])
                self.operation.Error = str(putAndRegister["Message"])
                continue

            putAndRegister = putAndRegister["Value"]

            if lfn in putAndRegister["Failed"]:
                gMonitor.addMark("PutFail", 1)
                self.dataLoggingClient().addFileRecord(lfn, "PutFail",
                                                       targetSE, "",
                                                       "PutAndRegister")

                reason = putAndRegister["Failed"][lfn]
                self.log.error("failed to put and register file %s at %s: %s" %
                               (lfn, targetSE, reason))
                opFile.Error = str(reason)
                self.operation.Error = str(reason)
                continue

            putAndRegister = putAndRegister["Successful"]
            if lfn in putAndRegister:

                if "put" not in putAndRegister[lfn]:

                    gMonitor.addMark("PutFail", 1)
                    self.dataLoggingClient().addFileRecord(
                        lfn, "PutFail", targetSE, "", "PutAndRegister")

                    self.log.info("failed to put %s to %s" % (lfn, targetSE))

                    opFile.Error = "put failed"
                    self.operation.Error = "put failed"
                    continue

                if "register" not in putAndRegister[lfn]:

                    gMonitor.addMark("PutOK", 1)
                    gMonitor.addMark("RegisterFail", 1)

                    self.dataLoggingClient().addFileRecord(
                        lfn, "Put", targetSE, "", "PutAndRegister")
                    self.dataLoggingClient().addFileRecord(
                        lfn, "RegisterFail", targetSE, "", "PutAndRegister")

                    self.log.info("put of %s to %s took %s seconds" %
                                  (lfn, targetSE, putAndRegister[lfn]["put"]))
                    self.log.error("register of %s to %s failed" %
                                   (lfn, targetSE))

                    opFile.Error = "failed to register %s at %s" % (lfn,
                                                                    targetSE)
                    opFile.Status = "Failed"

                    self.log.info(opFile.Error)
                    registerOperation = self.getRegisterOperation(
                        opFile, targetSE)
                    self.request.insertAfter(registerOperation, self.operation)
                    continue

                gMonitor.addMark("PutOK", 1)
                gMonitor.addMark("RegisterOK", 1)

                self.dataLoggingClient().addFileRecord(lfn, "Put", targetSE,
                                                       "", "PutAndRegister")
                self.dataLoggingClient().addFileRecord(lfn, "Register",
                                                       targetSE, "",
                                                       "PutAndRegister")

                opFile.Status = "Done"
                for op in ("put", "register"):
                    self.log.info("%s of %s to %s took %s seconds" %
                                  (op, lfn, targetSE, putAndRegister[lfn][op]))

        return S_OK()
Beispiel #39
0
    def selectJob(self, resourceDescription):
        """ Main job selection function to find the highest priority job
        matching the resource capacity
    """

        startTime = time.time()
        resourceDict = self.__processResourceDescription(resourceDescription)

        credDict = self.getRemoteCredentials()
        #Check credentials if not generic pilot
        if Properties.GENERIC_PILOT in credDict['properties']:
            #You can only match groups in the same VO
            vo = Registry.getVOForGroup(credDict['group'])
            result = Registry.getGroupsForVO(vo)
            if result['OK']:
                resourceDict['OwnerGroup'] = result['Value']
        else:
            #If it's a private pilot, the DN has to be the same
            if Properties.PILOT in credDict['properties']:
                gLogger.notice("Setting the resource DN to the credentials DN")
                resourceDict['OwnerDN'] = credDict['DN']
            #If it's a job sharing. The group has to be the same and just check that the DN (if any)
            # belongs to the same group
            elif Properties.JOB_SHARING in credDict['properties']:
                resourceDict['OwnerGroup'] = credDict['group']
                gLogger.notice(
                    "Setting the resource group to the credentials group")
                if 'OwnerDN' in resourceDict and resourceDict[
                        'OwnerDN'] != credDict['DN']:
                    ownerDN = resourceDict['OwnerDN']
                    result = Registry.getGroupsForDN(resourceDict['OwnerDN'])
                    if not result['OK'] or credDict['group'] not in result[
                            'Value']:
                        #DN is not in the same group! bad boy.
                        gLogger.notice(
                            "You cannot request jobs from DN %s. It does not belong to your group!"
                            % ownerDN)
                        resourceDict['OwnerDN'] = credDict['DN']
            #Nothing special, group and DN have to be the same
            else:
                resourceDict['OwnerDN'] = credDict['DN']
                resourceDict['OwnerGroup'] = credDict['group']

        # Check the pilot DIRAC version
        if self.__opsHelper.getValue("Pilot/CheckVersion", True):
            if 'ReleaseVersion' not in resourceDict:
                if not 'DIRACVersion' in resourceDict:
                    return S_ERROR(
                        'Version check requested and not provided by Pilot')
                else:
                    pilotVersion = resourceDict['DIRACVersion']
            else:
                pilotVersion = resourceDict['ReleaseVersion']

            validVersions = self.__opsHelper.getValue("Pilot/Version", [])
            if validVersions and pilotVersion not in validVersions:
                return S_ERROR( 'Pilot version does not match the production version %s not in ( %s )' % \
                               ( pilotVersion, ",".join( validVersions ) ) )
            #Check project if requested
            validProject = self.__opsHelper.getValue("Pilot/Project", "")
            if validProject:
                if 'ReleaseProject' not in resourceDict:
                    return S_ERROR(
                        "Version check requested but expected project %s not received"
                        % validProject)
                if resourceDict['ReleaseProject'] != validProject:
                    return S_ERROR(
                        "Version check requested but expected project %s != received %s"
                        % (validProject, resourceDict['ReleaseProject']))

        # Update pilot information
        pilotInfoReported = False
        pilotReference = resourceDict.get('PilotReference', '')
        if pilotReference:
            if "PilotInfoReportedFlag" in resourceDict and not resourceDict[
                    'PilotInfoReportedFlag']:
                gridCE = resourceDict.get('GridCE', 'Unknown')
                site = destination = resourceDict.get('Site', 'Unknown')
                benchmark = benchmark = resourceDict.get('PilotBenchmark', 0.0)
                gLogger.verbose(
                    'Reporting pilot info for %s: gridCE=%s, site=%s, benchmark=%f'
                    % (pilotReference, gridCE, site, benchmark))
                result = gPilotAgentsDB.setPilotStatus(pilotReference,
                                                       status='Running',
                                                       gridSite=gridCE,
                                                       destination=site,
                                                       benchmark=benchmark)
                if result['OK']:
                    pilotInfoReported = True

        #Check the site mask
        if not 'Site' in resourceDict:
            return S_ERROR('Missing Site Name in Resource JDL')

        # Get common site mask and check the agent site
        result = gJobDB.getSiteMask(siteState='Active')
        if not result['OK']:
            return S_ERROR('Internal error: can not get site mask')
        maskList = result['Value']

        siteName = resourceDict['Site']
        if siteName not in maskList:
            if 'GridCE' not in resourceDict:
                return S_ERROR('Site not in mask and GridCE not specified')
            #Even if the site is banned, if it defines a CE, it must be able to check it
            del resourceDict['Site']

        resourceDict['Setup'] = self.serviceInfoDict['clientSetup']

        gLogger.verbose("Resource description:")
        for key in resourceDict:
            gLogger.verbose("%s : %s" % (key.rjust(20), resourceDict[key]))

        negativeCond = self.__limiter.getNegativeCondForSite(siteName)
        result = gTaskQueueDB.matchAndGetJob(resourceDict,
                                             negativeCond=negativeCond)

        if DEBUG:
            print result

        if not result['OK']:
            return result
        result = result['Value']
        if not result['matchFound']:
            return S_ERROR('No match found')

        jobID = result['jobId']
        resAtt = gJobDB.getJobAttributes(jobID,
                                         ['OwnerDN', 'OwnerGroup', 'Status'])
        if not resAtt['OK']:
            return S_ERROR('Could not retrieve job attributes')
        if not resAtt['Value']:
            return S_ERROR('No attributes returned for job')
        if not resAtt['Value']['Status'] == 'Waiting':
            gLogger.error('Job %s matched by the TQ is not in Waiting state' %
                          str(jobID))
            result = gTaskQueueDB.deleteJob(jobID)
            if not result['OK']:
                return result
            return S_ERROR("Job %s is not in Waiting state" % str(jobID))

        attNames = ['Status', 'MinorStatus', 'ApplicationStatus', 'Site']
        attValues = ['Matched', 'Assigned', 'Unknown', siteName]
        result = gJobDB.setJobAttributes(jobID, attNames, attValues)
        # result = gJobDB.setJobStatus( jobID, status = 'Matched', minor = 'Assigned' )
        result = gJobLoggingDB.addLoggingRecord(jobID,
                                                status='Matched',
                                                minor='Assigned',
                                                source='Matcher')

        result = gJobDB.getJobJDL(jobID)
        if not result['OK']:
            return S_ERROR('Failed to get the job JDL')

        resultDict = {}
        resultDict['JDL'] = result['Value']
        resultDict['JobID'] = jobID

        matchTime = time.time() - startTime
        gLogger.info("Match time: [%s]" % str(matchTime))
        gMonitor.addMark("matchTime", matchTime)

        # Get some extra stuff into the response returned
        resOpt = gJobDB.getJobOptParameters(jobID)
        if resOpt['OK']:
            for key, value in resOpt['Value'].items():
                resultDict[key] = value
        resAtt = gJobDB.getJobAttributes(jobID, ['OwnerDN', 'OwnerGroup'])
        if not resAtt['OK']:
            return S_ERROR('Could not retrieve job attributes')
        if not resAtt['Value']:
            return S_ERROR('No attributes returned for job')

        if self.__opsHelper.getValue("JobScheduling/CheckMatchingDelay", True):
            self.__limiter.updateDelayCounters(siteName, jobID)

        # Report pilot-job association
        if pilotReference:
            result = gPilotAgentsDB.setCurrentJobID(pilotReference, jobID)
            result = gPilotAgentsDB.setJobForPilot(jobID,
                                                   pilotReference,
                                                   updateStatus=False)

        resultDict['DN'] = resAtt['Value']['OwnerDN']
        resultDict['Group'] = resAtt['Value']['OwnerGroup']
        resultDict['PilotInfoReportedFlag'] = pilotInfoReported
        return S_OK(resultDict)
Beispiel #40
0
    def ftsTransfer(self):
        """ replicate and register using FTS """

        self.log.info("scheduling files in FTS...")

        bannedTargets = self.checkSEsRSS()
        if not bannedTargets['OK']:
            gMonitor.addMark("FTSScheduleAtt")
            gMonitor.addMark("FTSScheduleFail")
            return bannedTargets

        if bannedTargets['Value']:
            return S_OK("%s targets are banned for writing" %
                        ",".join(bannedTargets['Value']))

        # Can continue now
        self.log.verbose("No targets banned for writing")

        toSchedule = {}

        for opFile in self.getWaitingFilesList():
            opFile.Error = ''
            gMonitor.addMark("FTSScheduleAtt")
            # # check replicas
            replicas = self._filterReplicas(opFile)
            if not replicas["OK"]:
                continue
            replicas = replicas["Value"]

            validReplicas = replicas["Valid"]
            noMetaReplicas = replicas["NoMetadata"]
            noReplicas = replicas['NoReplicas']
            badReplicas = replicas['Bad']
            noPFN = replicas['NoPFN']

            if validReplicas:
                validTargets = list(
                    set(self.operation.targetSEList) - set(validReplicas))
                if not validTargets:
                    self.log.info("file %s is already present at all targets" %
                                  opFile.LFN)
                    opFile.Status = "Done"
                else:
                    toSchedule[opFile.LFN] = [
                        opFile, validReplicas, validTargets
                    ]
            else:
                gMonitor.addMark("FTSScheduleFail")
                if noMetaReplicas:
                    self.log.warn(
                        "unable to schedule '%s', couldn't get metadata at %s"
                        % (opFile.LFN, ','.join(noMetaReplicas)))
                    opFile.Error = "Couldn't get metadata"
                elif noReplicas:
                    self.log.error(
                        "unable to schedule %s, file doesn't exist at %s" %
                        (opFile.LFN, ','.join(noReplicas)))
                    opFile.Error = 'No replicas found'
                    opFile.Status = 'Failed'
                elif badReplicas:
                    self.log.error(
                        "unable to schedule %s, all replicas have a bad checksum at %s"
                        % (opFile.LFN, ','.join(badReplicas)))
                    opFile.Error = 'All replicas have a bad checksum'
                    opFile.Status = 'Failed'
                elif noPFN:
                    self.log.warn(
                        "unable to schedule %s, could not get a PFN at %s" %
                        (opFile.LFN, ','.join(noPFN)))

        res = self._addMetadataToFiles(toSchedule)
        if not res['OK']:
            return res
        else:
            filesToScheduleList = res['Value']

        if filesToScheduleList:

            ftsSchedule = self.ftsClient.ftsSchedule(
                self.request.RequestID, self.operation.OperationID,
                filesToScheduleList)
            if not ftsSchedule["OK"]:
                self.log.error("Completely failed to schedule to FTS:",
                               ftsSchedule["Message"])
                return ftsSchedule

            # might have nothing to schedule
            ftsSchedule = ftsSchedule["Value"]
            if not ftsSchedule:
                return S_OK()

            self.log.info("%d files have been scheduled to FTS" %
                          len(ftsSchedule['Successful']))
            for opFile in self.operation:
                fileID = opFile.FileID
                if fileID in ftsSchedule["Successful"]:
                    gMonitor.addMark("FTSScheduleOK", 1)
                    opFile.Status = "Scheduled"
                    self.log.debug("%s has been scheduled for FTS" %
                                   opFile.LFN)
                elif fileID in ftsSchedule["Failed"]:
                    gMonitor.addMark("FTSScheduleFail", 1)
                    opFile.Error = ftsSchedule["Failed"][fileID]
                    if 'sourceSURL equals to targetSURL' in opFile.Error:
                        # In this case there is no need to continue
                        opFile.Status = 'Failed'
                    self.log.warn("unable to schedule %s for FTS: %s" %
                                  (opFile.LFN, opFile.Error))
        else:
            self.log.info("No files to schedule after metadata checks")

        # Just in case some transfers could not be scheduled, try them with RM
        return self.dmTransfer(fromFTS=True)
Beispiel #41
0
    def rmTransfer(self, fromFTS=False):
        """ replicate and register using ReplicaManager  """
        # # get waiting files. If none just return
        waitingFiles = self.getWaitingFilesList()
        if not waitingFiles:
            return S_OK()
        if fromFTS:
            self.log.info(
                "Trying transfer using replica manager as FTS failed")
        else:
            self.log.info("Transferring files using replica manager...")
        # # source SE
        sourceSE = self.operation.SourceSE if self.operation.SourceSE else None
        if sourceSE:
            # # check source se for read
            sourceRead = self.rssSEStatus(sourceSE, "ReadAccess")
            if not sourceRead["OK"]:
                self.log.info(sourceRead["Message"])
                for opFile in self.operation:
                    opFile.Error = sourceRead["Message"]
                    opFile.Status = "Failed"
                self.operation.Error = sourceRead["Message"]
                gMonitor.addMark("ReplicateAndRegisterAtt",
                                 len(self.operation))
                gMonitor.addMark("ReplicateFail", len(self.operation))
                return sourceRead

            if not sourceRead["Value"]:
                self.operation.Error = "SourceSE %s is banned for reading" % sourceSE
                self.log.info(self.operation.Error)
                return S_OK(self.operation.Error)

        # # check targetSEs for write
        bannedTargets = self.checkSEsRSS()
        if not bannedTargets['OK']:
            gMonitor.addMark("ReplicateAndRegisterAtt", len(self.operation))
            gMonitor.addMark("ReplicateFail", len(self.operation))
            return bannedTargets

        if bannedTargets['Value']:
            return S_OK("%s targets are banned for writing" %
                        ",".join(bannedTargets['Value']))

        # Can continue now
        self.log.verbose("No targets banned for writing")

        # # loop over files
        for opFile in waitingFiles:

            gMonitor.addMark("ReplicateAndRegisterAtt", 1)
            opFile.Error = ''
            lfn = opFile.LFN

            # Check if replica is at the specified source
            replicas = self._filterReplicas(opFile)
            if not replicas["OK"]:
                self.log.error(replicas["Message"])
                continue
            replicas = replicas["Value"]
            if not replicas["Valid"]:
                self.log.warn("unable to find valid replicas for %s" % lfn)
                continue
            # # get the first one in the list
            if sourceSE not in replicas['Valid']:
                if sourceSE:
                    self.log.warn(
                        "%s is not at specified sourceSE %s, changed to %s" %
                        (lfn, sourceSE, replicas["Valid"][0]))
                sourceSE = replicas["Valid"][0]

            # # loop over targetSE
            for targetSE in self.operation.targetSEList:

                # # call ReplicaManager
                if targetSE == sourceSE:
                    self.log.warn(
                        "Request to replicate %s to the source SE: %s" %
                        (lfn, sourceSE))
                    continue
                res = self.rm.replicateAndRegister(lfn,
                                                   targetSE,
                                                   sourceSE=sourceSE)

                if res["OK"]:

                    if lfn in res["Value"]["Successful"]:

                        if "replicate" in res["Value"]["Successful"][lfn]:

                            repTime = res["Value"]["Successful"][lfn][
                                "replicate"]
                            prString = "file %s replicated at %s in %s s." % (
                                lfn, targetSE, repTime)

                            gMonitor.addMark("ReplicateOK", 1)

                            if "register" in res["Value"]["Successful"][lfn]:

                                gMonitor.addMark("RegisterOK", 1)
                                regTime = res["Value"]["Successful"][lfn][
                                    "register"]
                                prString += ' and registered in %s s.' % regTime
                                self.log.info(prString)
                            else:

                                gMonitor.addMark("RegisterFail", 1)
                                prString += " but failed to register"
                                self.log.warn(prString)

                                opFile.Error = "Failed to register"
                                opFile.Status = "Failed"
                                # # add register replica operation
                                registerOperation = self.getRegisterOperation(
                                    opFile, targetSE)
                                self.request.insertAfter(
                                    registerOperation, self.operation)

                        else:

                            self.log.error("failed to replicate %s to %s." %
                                           (lfn, targetSE))
                            gMonitor.addMark("ReplicateFail", 1)
                            opFile.Error = "Failed to replicate"

                    else:

                        gMonitor.addMark("ReplicateFail", 1)
                        reason = res["Value"]["Failed"][lfn]
                        self.log.error(
                            "failed to replicate and register file %s at %s: %s"
                            % (lfn, targetSE, reason))
                        opFile.Error = reason

                else:

                    gMonitor.addMark("ReplicateFail", 1)
                    opFile.Error = "ReplicaManager error: %s" % res["Message"]
                    self.log.error(opFile.Error)

            if not opFile.Error:
                if len(self.operation.targetSEList) > 1:
                    self.log.info(
                        "file %s has been replicated to all targetSEs" % lfn)
                opFile.Status = "Done"

        return S_OK()
Beispiel #42
0
  def execute( self ):
    subRequestAttributes = self.argumentsDict.get( 'SubRequestAttributes', {} )
    if not subRequestAttributes:
      return S_ERROR( "SubRequestAttributes not supplied" )
    subRequestFiles = self.argumentsDict.get( 'Files', {} )
    if not subRequestFiles:
      return S_ERROR( "Files not supplied" )
    targetSE = subRequestAttributes.get( 'TargetSE', '' )
    if not targetSE:
      return S_ERROR( "TargetSE not supplied" )
    catalog = subRequestAttributes.get( 'Catalogue', '' )
    status = subRequestAttributes.get( 'Status', 'Waiting' )
    if status != 'Waiting':
      return S_OK()

    for subRequestFile in subRequestFiles:
      lfn = str( subRequestFile.get( 'LFN', '' ) )
      if not lfn:
        self.log.error( "LFN not supplied" )
        continue
      file = subRequestFile.get( 'PFN', '' )
      if not file:
        self.log.error( "PFN not supplied" )
        continue
      addler = subRequestFile.get( 'Addler', '' )
      guid = subRequestFile.get( 'GUID', '' )
      status = subRequestFile.get( 'Status', 'Waiting' )
      if status != 'Waiting':
        self.log.info( "%s in %s status and not Waiting" % ( lfn, status ) )
        continue
      gMonitor.addMark( "Put and register", 1 )
      self.__getReplicaManager()
      res = self.rm.putAndRegister( lfn, file, targetSE, guid = guid, checksum = addler, catalog = catalog )
      if not res['OK']:
        gMonitor.addMark( "Put failed", 1 )
        self.log.error( "Failed to put file", res['Message'] )
        continue
      successful = res['Value']['Successful']
      failed = res['Value']['Failed']
      if ( not successful.has_key( lfn ) ) or ( not successful[lfn].has_key( 'put' ) ):
        gMonitor.addMark( "Put failed", 1 )
        self.log.error( "Failed to put file" )
        continue
      if successful[lfn].has_key( 'put' ):
        gMonitor.addMark( "Put successful", 1 )
        gLogger.info( "Successfully put %s to %s in %s seconds." % ( lfn, targetSE, successful[lfn]['put'] ) )
        subRequestFile['Status'] = 'Done'
      if successful[lfn].has_key( 'register' ):
        gMonitor.addMark( "File registration successful", 1 )
        gLogger.info( "Successfully registered %s to %s in %s seconds." % ( lfn, targetSE, successful[lfn]['register'] ) )
      else:
        gMonitor.addMark( "File registration failed", 1 )
        gLogger.error( "Failed to register %s to %s." % ( lfn, targetSE ) )
        fileDict = failed[lfn]['register']
        registerRequestDict = {'Attributes':{'TargetSE': fileDict['TargetSE'], 'Operation':'registerFile'}, 'Files':[{'LFN': fileDict['LFN'], 'PFN':fileDict['PFN'], 'Size':fileDict['Size'], 'Addler':fileDict['Addler'], 'GUID':fileDict['GUID']}]}
        print registerRequestDict
    return S_OK()
Beispiel #43
0
    def __call__(self):
        """ reTransfer operation execution """
        # # list of targetSEs
        targetSEs = self.operation.targetSEList
        # # get waiting files
        waitingFiles = self.getWaitingFilesList()
        # # prepare waiting files
        toRetransfer = dict([(opFile.PFN, opFile) for opFile in waitingFiles])

        gMonitor.addMark("FileReTransferAtt", len(toRetransfer))

        if len(targetSEs) != 1:
            error = "only one TargetSE allowed, got %s" % len(targetSEs)
            for opFile in toRetransfer.values():
                opFile.Error = error
                opFile.Status = "Failed"
            self.operation.Error = error
            gMonitor.addMark("FileReTransferFail", len(toRetransfer))
            return S_ERROR(error)

        # # check targetSEs for removal
        targetSE = targetSEs[0]
        writeStatus = self.rssSEStatus(targetSE, "Write")
        if not writeStatus["OK"]:
            self.log.error(writeStatus["Message"])
            for opFile in self.operation:
                opFile.Error = "unknown targetSE: %s" % targetSE
                opFile.Status = "Failed"
            self.operation.Error = "unknown targetSE: %s" % targetSE
            gMonitor.addMark("FileReTransferFail", len(toRetransfer))
            return S_ERROR(self.operation.Error)

        if not writeStatus["Value"]:
            self.log.error("TargetSE %s in banned for writing right now" %
                           targetSE)
            self.operation.Error += "banned targetSE: %s;" % targetSE
            gMonitor.addMark("FileReTransferFail", len(toRetransfer))
            return S_ERROR(self.operation.Error)

        for opFile in toRetransfer.values():
            reTransfer = self.replicaManager().onlineRetransfer(
                targetSE, opFile.PFN)
            if not reTransfer["OK"]:
                opFile.Error = reTransfer["Message"]
                self.log.error("%s retransfer failed: %s" % opFile.Error)
                gMonitor.addMark("FileReTransferFail", 1)
                continue
            reTransfer = reTransfer["Value"]
            if opFile.PFN in reTransfer["Failed"]:
                opFile.Error = reTransfer["Failed"][opFile.PFN]
                self.log.error("%s retransfer failed: %s" % opFile.Error)
                gMonitor.addMark("FileReTransferFail", 1)
                continue
            opFile.Status = "Done"
            self.log.info("%s retransfer done" % opFile.LFN)
            gMonitor.addMark("FileReTransferOK", 1)

        return S_OK()
  def execute( self ):
    """ read requests from RequestClient and enqueue them into ProcessPool """
    gMonitor.addMark( "Iteration", 1 )
    # # requests (and so tasks) counter
    taskCounter = 0
    while taskCounter < self.__requestsPerCycle:
      self.log.debug( "execute: executing %d request in this cycle" % taskCounter )

      requestsToExecute = []

      if not self.__bulkRequest:
        self.log.info( "execute: ask for a single request" )
        getRequest = self.requestClient().getRequest()
        if not getRequest["OK"]:
          self.log.error( "execute: %s" % getRequest["Message"] )
          break
        if not getRequest["Value"]:
          self.log.info( "execute: no more 'Waiting' requests to process" )
          break
        requestsToExecute = [getRequest["Value"] ]
      else:
        numberOfRequest = min( self.__bulkRequest, self.__requestsPerCycle - taskCounter )
        self.log.info( "execute: ask for %s requests" % numberOfRequest )
        getRequests = self.requestClient().getBulkRequests( numberOfRequest )
        if not getRequests["OK"]:
          self.log.error( "execute: %s" % getRequests["Message"] )
          break
        if not getRequests["Value"]:
          self.log.info( "execute: no more 'Waiting' requests to process" )
          break
        for rId in getRequests["Value"]["Failed"]:
          self.log.error( "execute: %s" % getRequests["Value"]["Failed"][rId] )

        requestsToExecute = getRequests["Value"]["Successful"].values()

      self.log.info( "execute: will execute %s requests " % len( requestsToExecute ) )

      for request in requestsToExecute:
        # # set task id
        taskID = request.RequestID
        # # save current request in cache
        self.cacheRequest( request )
        # # serialize to JSON
        result = request.toJSON()
        if not result['OK']:
          continue
        requestJSON = result['Value']

        self.log.info( "processPool tasks idle = %s working = %s" % ( self.processPool().getNumIdleProcesses(),
                                                                      self.processPool().getNumWorkingProcesses() ) )

        looping = 0
        while True:
          if not self.processPool().getFreeSlots():
            if not looping:
              self.log.info( "No free slots available in processPool, will wait %d seconds to proceed" % self.__poolSleep )
            time.sleep( self.__poolSleep )
            looping += 1
          else:
            if looping:
              self.log.info( "Free slot found after %d seconds" % looping * self.__poolSleep )
            looping = 0
            self.log.info( "spawning task for request '%s/%s'" % ( request.RequestID, request.RequestName ) )
            timeOut = self.getTimeout( request )
            enqueue = self.processPool().createAndQueueTask( RequestTask,
                                                             kwargs = { "requestJSON" : requestJSON,
                                                                        "handlersDict" : self.handlersDict,
                                                                        "csPath" : self.__configPath,
                                                                        "agentName": self.agentName },
                                                             taskID = taskID,
                                                             blocking = True,
                                                             usePoolCallbacks = True,
                                                             timeOut = timeOut )
            if not enqueue["OK"]:
              self.log.error( enqueue["Message"] )
            else:
              self.log.debug( "successfully enqueued task '%s'" % taskID )
              # # update monitor
              gMonitor.addMark( "Processed", 1 )
              # # update request counter
              taskCounter += 1
              # # task created, a little time kick to proceed
              time.sleep( 0.1 )
              break

    # # clean return
    return S_OK()
Beispiel #45
0
    def selectJob(self, resourceDescription):
        """ Main job selection function to find the highest priority job
        matching the resource capacity
    """

        startTime = time.time()
        resourceDict = self.__processResourceDescription(resourceDescription)

        credDict = self.getRemoteCredentials()
        #Check credentials
        if Properties.GENERIC_PILOT not in credDict['properties']:
            #Not a generic pilot and requires a DN??? This smells fishy
            if 'OwnerDN' in resourceDict and resourceDict[
                    'OwnerDN'] != credDict['DN']:
                ownerDN = resourceDict['OwnerDN']
                if Properties.JOB_SHARING in credDict['properties']:
                    #Job sharing, is the DN in the same group?
                    result = Registry.getGroupsForDN(ownerDN)
                    if not result['OK']:
                        return S_ERROR(
                            "Requested owner DN %s does not have any group!" %
                            ownerDN)
                    groups = result['Value']
                    if credDict['group'] not in groups:
                        #DN is not in the same group! bad body.
                        gLogger.notice(
                            "You cannot request jobs from DN %s. It does not belong to your group!"
                            % ownerDN)
                        resourceDict['OwnerDN'] = credDict['DN']
                else:
                    #No generic pilot and not JobSharing? DN has to be the same!
                    gLogger.notice("You can only match jobs for your DN (%s)" %
                                   credDict['DN'])
                    resourceDict['OwnerDN'] = credDict['DN']
            if Properties.PILOT not in credDict['properties']:
                #No pilot? Group has to be the same!
                if 'OwnerGroup' in resourceDict and resourceDict[
                        'OwnerGroup'] != credDict['group']:
                    gLogger.notice(
                        "You can only match jobs for your group (%s)" %
                        credDict['group'])
                resourceDict['OwnerGroup'] = credDict['group']

        # Check the pilot DIRAC version
        if self.checkPilotVersion:
            if not 'DIRACVersion' in resourceDict:
                return S_ERROR(
                    'Version check requested and not provided by Pilot')

            # Check if the matching Request provides a VirtualOrganization
            if 'VirtualOrganization' in resourceDict:
                voName = resourceDict['VirtualOrganization']
            # Check if the matching Request provides an OwnerGroup
            elif 'OwnerGroup' in resourceDict:
                voName = Registry.getVOForGroup(resourceDict['OwnerGroup'])
            # else take the default VirtualOrganization for the installation
            else:
                voName = Registry.getVOForGroup('')

            self.pilotVersion = gConfig.getValue(
                '/Operations/%s/%s/Versions/PilotVersion' %
                (voName, self.setup), '')
            if self.pilotVersion and resourceDict[
                    'DIRACVersion'] != self.pilotVersion:
                return S_ERROR( 'Pilot version does not match the production version %s:%s' % \
                               ( resourceDict['DIRACVersion'], self.pilotVersion ) )

        # Get common site mask and check the agent site
        result = gJobDB.getSiteMask(siteState='Active')
        if result['OK']:
            maskList = result['Value']
        else:
            return S_ERROR('Internal error: can not get site mask')

        if not 'Site' in resourceDict:
            return S_ERROR('Missing Site Name in Resource JDL')

        siteName = resourceDict['Site']
        if resourceDict['Site'] not in maskList:
            if 'GridCE' in resourceDict:
                del resourceDict['Site']
            else:
                return S_ERROR('Site not in mask and GridCE not specified')

        resourceDict['Setup'] = self.serviceInfoDict['clientSetup']

        if DEBUG:
            print "Resource description:"
            for key, value in resourceDict.items():
                print key.rjust(20), value

        # Check if Job Limits are imposed onto the site
        extraConditions = {}
        if self.siteJobLimits:
            result = self.getExtraConditions(siteName)
            if result['OK']:
                extraConditions = result['Value']
        if extraConditions:
            gLogger.info('Job Limits for site %s are: %s' %
                         (siteName, str(extraConditions)))

        result = gTaskQueueDB.matchAndGetJob(resourceDict,
                                             extraConditions=extraConditions)

        if DEBUG:
            print result

        if not result['OK']:
            return result
        result = result['Value']
        if not result['matchFound']:
            return S_ERROR('No match found')

        jobID = result['jobId']
        resAtt = gJobDB.getJobAttributes(jobID,
                                         ['OwnerDN', 'OwnerGroup', 'Status'])
        if not resAtt['OK']:
            return S_ERROR('Could not retrieve job attributes')
        if not resAtt['Value']:
            return S_ERROR('No attributes returned for job')
        if not resAtt['Value']['Status'] == 'Waiting':
            gLogger.error('Job %s matched by the TQ is not in Waiting state' %
                          str(jobID))
            result = gTaskQueueDB.deleteJob(jobID)

        result = gJobDB.setJobStatus(jobID, status='Matched', minor='Assigned')
        result = gJobLoggingDB.addLoggingRecord(jobID,
                                                status='Matched',
                                                minor='Assigned',
                                                source='Matcher')

        result = gJobDB.getJobJDL(jobID)
        if not result['OK']:
            return S_ERROR('Failed to get the job JDL')

        resultDict = {}
        resultDict['JDL'] = result['Value']
        resultDict['JobID'] = jobID

        matchTime = time.time() - startTime
        gLogger.info("Match time: [%s]" % str(matchTime))
        gMonitor.addMark("matchTime", matchTime)

        # Get some extra stuff into the response returned
        resOpt = gJobDB.getJobOptParameters(jobID)
        if resOpt['OK']:
            for key, value in resOpt['Value'].items():
                resultDict[key] = value
        resAtt = gJobDB.getJobAttributes(jobID, ['OwnerDN', 'OwnerGroup'])
        if not resAtt['OK']:
            return S_ERROR('Could not retrieve job attributes')
        if not resAtt['Value']:
            return S_ERROR('No attributes returned for job')

        resultDict['DN'] = resAtt['Value']['OwnerDN']
        resultDict['Group'] = resAtt['Value']['OwnerGroup']
        return S_OK(resultDict)
Beispiel #46
0
    def MigratingToMigrated(self, se):
        """ Obtain the active files from the migration monitoring db and check their status
    """
        # First get the migrating files from the database
        gLogger.info(
            "[%s] MigratingToMigrated: Attempting to obtain 'Migrating' files."
            % se)
        res = self.__getFiles(se, 'Migrating')
        if not res['OK']:
            gLogger.error(
                "[%s] MigratingToMigrated: Failed to get 'Migrating' files." %
                se, res['Message'])
            return res
        pfnIDs = res['Value']['PFNIDs']
        if not pfnIDs:
            gLogger.info(
                "[%s] MigratingToMigrated: Found no 'Migrating' files." % se)
            return S_OK()
        migratingFiles = res['Value']['Files']
        gLogger.info("[%s] MigratingToMigrated: Found %d 'Migrating' files." %
                     (se, len(pfnIDs)))
        gMonitor.addMark("MigratingFiles%s" % se, len(pfnIDs))
        gLogger.info(
            "[%s] MigratingToMigrated: Obtaining physical file metadata for 'Migrating' files."
            % se)
        startTime = datetime.datetime.utcnow()
        res = self.__getMigratedFiles(se, pfnIDs.keys())
        if not res['OK']:
            gLogger.error(
                "[%s] MigratingToMigrated: Failed to get 'Migrating' file metadata."
                % se, res['Message'])
            return res
        assumedEndTime = datetime.datetime.utcnow() - (
            (datetime.datetime.utcnow() - startTime) / 2
        )  # Assumed that the files are found migrated midway through obtaining the metadata
        previousMonitorTime = self.lastMonitors[se]
        self.lastMonitors[se] = datetime.datetime.utcnow()
        terminal = res['Value']['Terminal']
        migrated = res['Value']['Migrated']

        # Update the problematic files in the integrity DB and update the MigrationMonitoringDB
        gLogger.info(
            "[%s] MigratingToMigrated: Found %d terminally failed files." %
            (se, len(terminal)))
        if terminal:
            replicaTuples = []
            terminalFileIDs = []
            for pfn, prognosis in terminal.items():
                fileID = pfnIDs[pfn]
                terminalFileIDs.append(fileID)
                lfn = migratingFiles[fileID]['LFN']
                se = migratingFiles[fileID]['SE']
                replicaTuples.append((lfn, pfn, se, prognosis))
            self.__reportProblematicReplicas(replicaTuples)
            res = self.MigrationMonitoringDB.setMigratingReplicaStatus(
                terminalFileIDs, 'Failed')
            if not res['OK']:
                gLogger.error(
                    "[%s] MigratingToMigrated: Failed to update terminal files."
                    % se, res['Message'])

        # Update the migrated files and send accounting
        gLogger.info("[%s] MigratingToMigrated: Found %d migrated files." %
                     (se, len(migrated)))
        if migrated:
            migratedFileIDs = {}
            for pfn, checksum in migrated.items():
                migratedFileIDs[pfnIDs[pfn]] = checksum
            #res = self.MigrationMonitoringDB.setMigratingReplicaStatus(migratedFileIDs.keys(),'Migrated')
            #if not res['OK']:
            #  gLogger.error("[%s] MigratingToMigrated: Failed to update migrated files." % se, res['Message'])
            # Check the checksums of the migrated files
            res = self.__validateChecksums(se, migratedFileIDs, migratingFiles)
            if not res['OK']:
                gLogger.error(
                    "[%s] MigratingToMigrated: Failed to perform checksum matching."
                    % se, res['Message'])
                matchingFiles = []
                mismatchingFiles = []
            else:
                matchingFiles = res['Value']['MatchingFiles']
                mismatchingFiles = res['Value']['MismatchFiles']
            # Create and send the accounting messages
            res = self.__updateMigrationAccounting(se, migratingFiles,
                                                   matchingFiles,
                                                   mismatchingFiles,
                                                   assumedEndTime,
                                                   previousMonitorTime)
            if not res['OK']:
                gLogger.error(
                    "[%s] MigratingToMigrated: Failed to send accounting for migrated files."
                    % se, res['Message'])
        return S_OK()
Beispiel #47
0
  def selectJob( self, resourceDescription, credDict ):
    """ Main job selection function to find the highest priority job matching the resource capacity
    """

    startTime = time.time()

    resourceDict = self._getResourceDict( resourceDescription, credDict )

    negativeCond = self.limiter.getNegativeCondForSite( resourceDict['Site'] )
    result = self.tqDB.matchAndGetJob( resourceDict, negativeCond = negativeCond )

    if not result['OK']:
      return result
    result = result['Value']
    if not result['matchFound']:
      self.log.info( "No match found" )
      raise RuntimeError( "No match found" )

    jobID = result['jobId']
    resAtt = self.jobDB.getJobAttributes( jobID, ['OwnerDN', 'OwnerGroup', 'Status'] )
    if not resAtt['OK']:
      raise RuntimeError( 'Could not retrieve job attributes' )
    if not resAtt['Value']:
      raise RuntimeError( "No attributes returned for job" )
    if not resAtt['Value']['Status'] == 'Waiting':
      self.log.error( 'Job matched by the TQ is not in Waiting state', str( jobID ) )
      result = self.tqDB.deleteJob( jobID )
      if not result[ 'OK' ]:
        return result
      raise RuntimeError( "Job %s is not in Waiting state" % str( jobID ) )

    self._reportStatus( resourceDict, jobID )

    result = self.jobDB.getJobJDL( jobID )
    if not result['OK']:
      raise RuntimeError( "Failed to get the job JDL" )

    resultDict = {}
    resultDict['JDL'] = result['Value']
    resultDict['JobID'] = jobID

    matchTime = time.time() - startTime
    self.log.info( "Match time: [%s]" % str( matchTime ) )
    gMonitor.addMark( "matchTime", matchTime )

    # Get some extra stuff into the response returned
    resOpt = self.jobDB.getJobOptParameters( jobID )
    if resOpt['OK']:
      for key, value in resOpt['Value'].items():
        resultDict[key] = value
    resAtt = self.jobDB.getJobAttributes( jobID, ['OwnerDN', 'OwnerGroup'] )
    if not resAtt['OK']:
      raise RuntimeError( 'Could not retrieve job attributes' )
    if not resAtt['Value']:
      raise RuntimeError( 'No attributes returned for job' )

    if self.opsHelper.getValue( "JobScheduling/CheckMatchingDelay", True ):
      self.limiter.updateDelayCounters( resourceDict['Site'], jobID )

    pilotInfoReportedFlag = resourceDict.get( 'PilotInfoReportedFlag', False )
    if not pilotInfoReportedFlag:
      self._updatePilotInfo( resourceDict )
    self._updatePilotJobMapping( resourceDict, jobID )

    resultDict['DN'] = resAtt['Value']['OwnerDN']
    resultDict['Group'] = resAtt['Value']['OwnerGroup']
    resultDict['PilotInfoReportedFlag'] = True

    return resultDict
Beispiel #48
0
  def __call__( self ):
    """ request processing """

    self.log.debug( "about to execute request" )
    gMonitor.addMark( "RequestAtt", 1 )

    # # setup proxy for request owner
    setupProxy = self.setupProxy()
    if not setupProxy["OK"]:
      self.request.Error = setupProxy["Message"]
      if 'has no proxy registered' in setupProxy["Message"]:
        self.log.error( 'Request set to Failed:', setupProxy["Message"] )
        # If user is no longer registered, fail the request
        for operation in self.request:
          for opFile in operation:
            opFile.Status = 'Failed'
          operation.Status = 'Failed'
      else:
        self.log.error( setupProxy["Message"] )
      return S_OK( self.request )
    shifter = setupProxy["Value"]["Shifter"]
    proxyFile = setupProxy["Value"]["ProxyFile"]

    error = None
    while self.request.Status == "Waiting":

      # # get waiting operation
      operation = self.request.getWaiting()
      if not operation["OK"]:
        self.log.error( operation["Message"] )
        return operation
      operation = operation["Value"]
      self.log.info( "executing operation #%s '%s'" % ( operation.Order, operation.Type ) )

      # # and handler for it
      handler = self.getHandler( operation )
      if not handler["OK"]:
        self.log.error( "unable to process operation %s: %s" % ( operation.Type, handler["Message"] ) )
        # gMonitor.addMark( "%s%s" % ( operation.Type, "Fail" ), 1 )
        operation.Error = handler["Message"]
        break

      handler = handler["Value"]
      # # set shifters list in the handler
      handler.shifter = shifter
      # # and execute
      pluginName = self.getPluginName( self.handlersDict.get( operation.Type ) )
      if self.standalone:
        useServerCertificate = gConfig.useServerCertificate()
      else:
        # Always use server certificates if executed within an agent
        useServerCertificate = True
      try:
        if pluginName:
          gMonitor.addMark( "%s%s" % ( pluginName, "Att" ), 1 )
        # Always use request owner proxy
        if useServerCertificate:
          gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'false' )
        exe = handler()
        if useServerCertificate:
          gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'true' )
        if not exe["OK"]:
          self.log.error( "unable to process operation %s: %s" % ( operation.Type, exe["Message"] ) )
          if pluginName:
            gMonitor.addMark( "%s%s" % ( pluginName, "Fail" ), 1 )
          gMonitor.addMark( "RequestFail", 1 )
          if self.request.JobID:
            # Check if the job exists
            monitorServer = RPCClient( "WorkloadManagement/JobMonitoring", useCertificates = True )
            res = monitorServer.getJobPrimarySummary( int( self.request.JobID ) )
            if not res["OK"]:
              self.log.error( "RequestTask: Failed to get job %d status" % self.request.JobID )
            elif not res['Value']:
              self.log.warn( "RequestTask: job %d does not exist (anymore): failed request" % self.request.JobID )
              for opFile in operation:
                opFile.Status = 'Failed'
              if operation.Status != 'Failed':
                operation.Status = 'Failed'
              self.request.Error = 'Job no longer exists'
      except Exception, error:
        self.log.exception( "hit by exception: %s" % str( error ) )
        if pluginName:
          gMonitor.addMark( "%s%s" % ( pluginName, "Fail" ), 1 )
        gMonitor.addMark( "RequestFail", 1 )
        if useServerCertificate:
          gConfigurationData.setOptionInCFG( '/DIRAC/Security/UseServerCertificate', 'true' )
        break

      # # operation status check
      if operation.Status == "Done" and pluginName:
        gMonitor.addMark( "%s%s" % ( pluginName, "OK" ), 1 )
      elif operation.Status == "Failed" and pluginName:
        gMonitor.addMark( "%s%s" % ( pluginName, "Fail" ), 1 )
      elif operation.Status in ( "Waiting", "Scheduled" ):
        # # no update for waiting or all files scheduled
        break
Beispiel #49
0
  def __call__( self ):
    """ perform physical removal operation """
    # # prepare targetSE list
    targetSEs = self.operation.targetSEList
    # # check targetSEs for removal
    bannedTargets = []
    for targetSE in targetSEs:
      removeStatus = self.rssSEStatus( targetSE, "Remove" )
      if not removeStatus["OK"]:
        self.log.error( removeStatus["Message"] )
        for opFile in self.operation:
          opFile.Error = "unknown targetSE: %s" % targetSE
          opFile.Status = "Failed"
        self.operation.Error = "unknown targetSE: %s" % targetSE
        return S_ERROR( self.operation.Error )

      if not removeStatus["Value"]:
        self.log.error( "%s in banned for remove right now" % targetSE )
        bannedTargets.append( targetSE )
        self.operation.Error += "banned targetSE: %s;" % targetSE
    # # some targets are banned? return
    if bannedTargets:
      return S_ERROR( "targets %s are banned for removal" % ",".join( bannedTargets ) )

    # # get waiting files
    waitingFiles = self.getWaitingFilesList()
    # # prepare pfn dict
    toRemoveDict = dict( [ ( opFile.PFN, opFile ) for opFile in waitingFiles ] )

    gMonitor.addMark( "PhysicalRemovalAtt", len( toRemoveDict ) * len( targetSEs ) )

    # # keep errors dict
    removalStatus = dict.fromkeys( toRemoveDict.keys(), None )
    for pfn in removalStatus:
      removalStatus[pfn] = dict.fromkeys( targetSEs, "" )

    for targetSE in targetSEs:

      self.log.info( "removing files from %s" % targetSE )

      # # 1st - bulk removal
      bulkRemoval = self.bulkRemoval( toRemoveDict, targetSE )
      if not bulkRemoval["OK"]:
        self.log.error( bulkRemoval["Message"] )
        self.operation.Error = bulkRemoval["Message"]
        return bulkRemoval

      bulkRemoval = bulkRemoval["Value"]

      for pfn, opFile in toRemoveDict.items():
        removalStatus[pfn][targetSE] = bulkRemoval["Failed"].get( pfn, "" )
        opFile.Error = removalStatus[pfn][targetSE]

      # # 2nd - single file removal
      toRetry = dict( [ ( pfn, opFile ) for pfn, opFile in toRemoveDict.items() if pfn in bulkRemoval["Failed"] ] )
      for pfn, opFile in toRetry.items():
        self.singleRemoval( opFile, targetSE )
        if not opFile.Error:
          removalStatus[pfn][targetSE] = ""
        else:
          gMonitor.addMark( "PhysicalRemovalFail", 1 )
          removalStatus[pfn][targetSE] = opFile.Error

    # # update file status for waiting files
    failed = 0
    for opFile in self.operation:
      if opFile.Status == "Waiting":
        errors = [ error for error in removalStatus[opFile.PFN].values() if error.strip() ]
        if errors:
          failed += 1
          opFile.Error = ",".join( errors )
          if "Write access not permitted for this credential" in opFile.Error:
            opFile.Status = "Failed"
            gMonitor.addMark( "PhysicalRemovalFail", len( errors ) )
            continue
        gMonitor.addMark( "PhysicalRemovalOK", len( targetSEs ) )
        gMonitor.addMark( "PhysicalRemovalSize", opFile.Size * len( targetSEs ) )
        opFile.Status = "Done"

    if failed:
      self.operation.Error = "failed to remove %s files" % failed

    return S_OK()
Beispiel #50
0
def sendNumTaskQueues():
  result = taskQueueDB.getNumTaskQueues()
  if result[ 'OK' ]:
    gMonitor.addMark( 'numTQs', result[ 'Value' ] )
  else:
    gLogger.error( "Cannot get the number of task queues", result[ 'Message' ] )
Beispiel #51
0
  def __call__( self ):
    """ reTransfer operation execution """
    # # list of targetSEs
    targetSEs = self.operation.targetSEList
    # # check targetSEs for removal
    targetSE = targetSEs[0]
    bannedTargets = self.checkSEsRSS( targetSE )
    if not bannedTargets['OK']:
      gMonitor.addMark( "FileReTransferAtt" )
      gMonitor.addMark( "FileReTransferFail" )
      return bannedTargets

    if bannedTargets['Value']:
      return S_OK( "%s targets are banned for writing" % ",".join( bannedTargets['Value'] ) )

    # # get waiting files
    waitingFiles = self.getWaitingFilesList()
    # # prepare waiting files
    toRetransfer = dict( [ ( opFile.PFN, opFile ) for opFile in waitingFiles ] )

    gMonitor.addMark( "FileReTransferAtt", len( toRetransfer ) )

    if len( targetSEs ) != 1:
      error = "only one TargetSE allowed, got %d" % len( targetSEs )
      for opFile in toRetransfer.values():
        opFile.Error = error
        opFile.Status = "Failed"
      self.operation.Error = error
      gMonitor.addMark( "FileReTransferFail", len( toRetransfer ) )
      return S_ERROR( error )

    se = StorageElement( targetSE )
    for opFile in toRetransfer.values():
      reTransfer = se.retransferOnlineFile( opFile.PFN )
      if not reTransfer["OK"]:
        opFile.Error = reTransfer["Message"]
        self.log.error( "%s retransfer failed: %s" % opFile.Error )
        gMonitor.addMark( "FileReTransferFail", 1 )
        continue
      reTransfer = reTransfer["Value"]
      if opFile.PFN in reTransfer["Failed"]:
        opFile.Error = reTransfer["Failed"][opFile.PFN]
        self.log.error( "%s retransfer failed: %s" % opFile.Error )
        gMonitor.addMark( "FileReTransferFail", 1 )
        continue
      opFile.Status = "Done"
      self.log.info( "%s retransfer done" % opFile.LFN )
      gMonitor.addMark( "FileReTransferOK", 1 )

    return S_OK()
Beispiel #52
0
  def selectJob( self, resourceDescription ):
    """ Main job selection function to find the highest priority job
        matching the resource capacity
    """

    startTime = time.time()

    # Check and form the resource description dictionary
    resourceDict = {}
    if type( resourceDescription ) in StringTypes:
      classAdAgent = ClassAd( resourceDescription )
      if not classAdAgent.isOK():
        return S_ERROR( 'Illegal Resource JDL' )
      gLogger.verbose( classAdAgent.asJDL() )

      for name in taskQueueDB.getSingleValueTQDefFields():
        if classAdAgent.lookupAttribute( name ):
          if name == 'CPUTime':
            resourceDict[name] = classAdAgent.getAttributeInt( name )
          else:
            resourceDict[name] = classAdAgent.getAttributeString( name )

      for name in taskQueueDB.getMultiValueMatchFields():
        if classAdAgent.lookupAttribute( name ):
          resourceDict[name] = classAdAgent.getAttributeString( name )

      # Check if a JobID is requested
      if classAdAgent.lookupAttribute( 'JobID' ):
        resourceDict['JobID'] = classAdAgent.getAttributeInt( 'JobID' )

      if classAdAgent.lookupAttribute( 'DIRACVersion' ):
        resourceDict['DIRACVersion'] = classAdAgent.getAttributeString( 'DIRACVersion' )

    else:
      for name in taskQueueDB.getSingleValueTQDefFields():
        if resourceDescription.has_key( name ):
          resourceDict[name] = resourceDescription[name]

      for name in taskQueueDB.getMultiValueMatchFields():
        if resourceDescription.has_key( name ):
          resourceDict[name] = resourceDescription[name]

      if resourceDescription.has_key( 'JobID' ):
        resourceDict['JobID'] = resourceDescription['JobID']
      if resourceDescription.has_key( 'DIRACVersion' ):
        resourceDict['DIRACVersion'] = resourceDescription['DIRACVersion']

    # Check the pilot DIRAC version
    if self.checkPilotVersion:
      if 'DIRACVersion' in resourceDict:
        if self.pilotVersion and resourceDict['DIRACVersion'] != self.pilotVersion:
          return S_ERROR( 'Pilot version does not match the production version %s:%s' % \
                         ( resourceDict['DIRACVersion'], self.pilotVersion ) )

    # Get common site mask and check the agent site
    result = jobDB.getSiteMask( siteState = 'Active' )
    if result['OK']:
      maskList = result['Value']
    else:
      return S_ERROR( 'Internal error: can not get site mask' )

    if not 'Site' in resourceDict:
      return S_ERROR( 'Missing Site Name in Resource JDL' )

    siteName = resourceDict['Site']
    if resourceDict['Site'] not in maskList:
      if 'GridCE' in resourceDict:
        del resourceDict['Site']
      else:
        return S_ERROR( 'Site not in mask and GridCE not specified' )

    resourceDict['Setup'] = self.serviceInfoDict['clientSetup']

    if DEBUG:
      print "Resource description:"
      for k, v in resourceDict.items():
        print k.rjust( 20 ), v

    # Check if Job Limits are imposed onto the site
    extraConditions = {}
    if self.siteJobLimits:
      result = self.getExtraConditions( siteName )
      if result['OK']:
        extraConditions = result['Value']
    if extraConditions:
      gLogger.info( 'Job Limits for site %s are: %s' % ( siteName, str( extraConditions ) ) )

    result = taskQueueDB.matchAndGetJob( resourceDict, extraConditions = extraConditions )

    if DEBUG:
      print result

    if not result['OK']:
      return result
    result = result['Value']
    if not result['matchFound']:
      return S_ERROR( 'No match found' )

    jobID = result['jobId']
    resAtt = jobDB.getJobAttributes( jobID, ['OwnerDN', 'OwnerGroup', 'Status'] )
    if not resAtt['OK']:
      return S_ERROR( 'Could not retrieve job attributes' )
    if not resAtt['Value']:
      return S_ERROR( 'No attributes returned for job' )
    if not resAtt['Value']['Status'] == 'Waiting':
      gLogger.error( 'Job %s matched by the TQ is not in Waiting state' % str( jobID ) )
      result = taskQueueDB.deleteJob( jobID )

    result = jobDB.setJobStatus( jobID, status = 'Matched', minor = 'Assigned' )
    result = jobLoggingDB.addLoggingRecord( jobID,
                                           status = 'Matched',
                                           minor = 'Assigned',
                                           source = 'Matcher' )

    result = jobDB.getJobJDL( jobID )
    if not result['OK']:
      return S_ERROR( 'Failed to get the job JDL' )

    resultDict = {}
    resultDict['JDL'] = result['Value']
    resultDict['JobID'] = jobID

    matchTime = time.time() - startTime
    gLogger.info( "Match time: [%s]" % str( matchTime ) )
    gMonitor.addMark( "matchTime", matchTime )

    # Get some extra stuff into the response returned
    resOpt = jobDB.getJobOptParameters( jobID )
    if resOpt['OK']:
      for key, value in resOpt['Value'].items():
        resultDict[key] = value
    resAtt = jobDB.getJobAttributes( jobID, ['OwnerDN', 'OwnerGroup'] )
    if not resAtt['OK']:
      return S_ERROR( 'Could not retrieve job attributes' )
    if not resAtt['Value']:
      return S_ERROR( 'No attributes returned for job' )

    resultDict['DN'] = resAtt['Value']['OwnerDN']
    resultDict['Group'] = resAtt['Value']['OwnerGroup']
    return S_OK( resultDict )
Beispiel #53
0
    def execute_request(self):
        """ Takes one DISET request and forward it to the destination service
    """
        gMonitor.addMark("Iteration", 1)
        if self.RequestDB:
            res = self.RequestDB.getRequest('diset')
        else:
            res = self.RequestDBClient.getRequest('diset', url=self.local)
        if not res['OK']:
            gLogger.error(
                "DISETForwardingAgent.execute: Failed to get request from database.",
                self.local)
            return S_OK()
        elif not res['Value']:
            gLogger.info(
                "DISETForwardingAgent.execute: No requests to be executed found."
            )
            return S_OK()

        gMonitor.addMark("Attempted", 1)
        requestString = res['Value']['RequestString']
        requestName = res['Value']['RequestName']
        try:
            jobID = int(res['Value']['JobID'])
        except:
            jobID = 0
        gLogger.info("DISETForwardingAgent.execute: Obtained request %s" %
                     requestName)

        if self.RequestDB:
            result = self.RequestDB._getRequestAttribute(
                'RequestID', requestName=requestName)
            if not result['OK']:
                return S_OK('Can not get the request execution order')
            requestID = result['Value']
            result = self.RequestDB.getCurrentExecutionOrder(requestID)
        else:
            result = self.RequestDBClient.getCurrentExecutionOrder(
                requestName, self.local)
        if result['OK']:
            currentOrder = result['Value']
        else:
            return S_OK('Can not get the request execution order')

        oRequest = RequestContainer(request=requestString)
        requestAttributes = oRequest.getRequestAttributes()['Value']

        ################################################
        # Find the number of sub-requests from the request
        res = oRequest.getNumSubRequests('diset')
        if not res['OK']:
            errStr = "DISETForwardingAgent.execute: Failed to obtain number of diset subrequests."
            gLogger.error(errStr, res['Message'])
            return S_OK()

        gLogger.info(
            "DISETForwardingAgent.execute: Found %s sub requests for job %s" %
            (res['Value'], jobID))
        ################################################
        # For all the sub-requests in the request
        modified = False
        for ind in range(res['Value']):
            subRequestAttributes = oRequest.getSubRequestAttributes(
                ind, 'diset')['Value']
            subExecutionOrder = int(subRequestAttributes['ExecutionOrder'])
            subStatus = subRequestAttributes['Status']
            gLogger.info(
                "DISETForwardingAgent.execute: Processing sub-request %s with execution order %d"
                % (ind, subExecutionOrder))
            if subStatus == 'Waiting' and subExecutionOrder <= currentOrder:
                operation = subRequestAttributes['Operation']
                gLogger.info(
                    "DISETForwardingAgent.execute: Attempting to forward %s type."
                    % operation)
                rpcStubString = subRequestAttributes['Arguments']
                rpcStub, length = DEncode.decode(rpcStubString)
                res = executeRPCStub(rpcStub)
                if res['OK']:
                    gLogger.info(
                        "DISETForwardingAgent.execute: Successfully forwarded."
                    )
                    oRequest.setSubRequestStatus(ind, 'diset', 'Done')
                    gMonitor.addMark("Successful", 1)
                    modified = True
                elif res['Message'] == 'No Matching Job':
                    gLogger.warn(
                        "DISETForwardingAgent.execute: No corresponding job found. Setting to done."
                    )
                    oRequest.setSubRequestStatus(ind, 'diset', 'Done')
                else:
                    gLogger.error(
                        "DISETForwardingAgent.execute: Failed to forward request.",
                        res['Message'])
            else:
                gLogger.info(
                    "DISETForwardingAgent.execute: Sub-request %s is status '%s' and  not to be executed."
                    % (ind, subRequestAttributes['Status']))

        ################################################
        #  Generate the new request string after operation
        requestString = oRequest.toXML()['Value']
        if self.RequestDB:
            res = self.RequestDB.updateRequest(requestName, requestString)
        else:
            res = self.RequestDBClient.updateRequest(requestName,
                                                     requestString, self.local)
        if res['OK']:
            gLogger.info(
                "DISETForwardingAgent.execute: Successfully updated request.")
        else:
            gLogger.error(
                "DISETForwardingAgent.execute: Failed to update request to",
                self.local)

        if modified and jobID:
            result = self.RequestDBClient.finalizeRequest(
                requestName, jobID, self.local)

        return S_OK()
Beispiel #54
0
    def execute(self):
        subRequestAttributes = self.argumentsDict.get('SubRequestAttributes',
                                                      {})
        if not subRequestAttributes:
            return S_ERROR("SubRequestAttributes not supplied")
        subRequestFiles = self.argumentsDict.get('Files', {})
        if not subRequestFiles:
            return S_ERROR("Files not supplied")
        targetSE = subRequestAttributes.get('TargetSE', '')
        if not targetSE:
            return S_ERROR("TargetSE not supplied")
        catalog = subRequestAttributes.get('Catalogue', '')
        status = subRequestAttributes.get('Status', 'Waiting')
        if status != 'Waiting':
            return S_OK()

        for subRequestFile in subRequestFiles:
            lfn = str(subRequestFile.get('LFN', ''))
            if not lfn:
                self.log.error("LFN not supplied")
                continue
            file = subRequestFile.get('PFN', '')
            if not file:
                self.log.error("PFN not supplied")
                continue
            addler = subRequestFile.get('Addler', '')
            guid = subRequestFile.get('GUID', '')
            status = subRequestFile.get('Status', 'Waiting')
            if status != 'Waiting':
                self.log.info("%s in %s status and not Waiting" %
                              (lfn, status))
                continue
            gMonitor.addMark("Put and register", 1)
            self.__getReplicaManager()
            res = self.rm.putAndRegister(lfn,
                                         file,
                                         targetSE,
                                         guid=guid,
                                         checksum=addler,
                                         catalog=catalog)
            if not res['OK']:
                gMonitor.addMark("Put failed", 1)
                self.log.error("Failed to put file", res['Message'])
                continue
            successful = res['Value']['Successful']
            failed = res['Value']['Failed']
            if (not successful.has_key(lfn)) or (
                    not successful[lfn].has_key('put')):
                gMonitor.addMark("Put failed", 1)
                self.log.error("Failed to put file")
                continue
            if successful[lfn].has_key('put'):
                gMonitor.addMark("Put successful", 1)
                gLogger.info("Successfully put %s to %s in %s seconds." %
                             (lfn, targetSE, successful[lfn]['put']))
                subRequestFile['Status'] = 'Done'
            if successful[lfn].has_key('register'):
                gMonitor.addMark("File registration successful", 1)
                gLogger.info(
                    "Successfully registered %s to %s in %s seconds." %
                    (lfn, targetSE, successful[lfn]['register']))
            else:
                gMonitor.addMark("File registration failed", 1)
                gLogger.error("Failed to register %s to %s." % (lfn, targetSE))
                fileDict = failed[lfn]['register']
                registerRequestDict = {
                    'Attributes': {
                        'TargetSE': fileDict['TargetSE'],
                        'Operation': 'registerFile'
                    },
                    'Files': [{
                        'LFN': fileDict['LFN'],
                        'PFN': fileDict['PFN'],
                        'Size': fileDict['Size'],
                        'Addler': fileDict['Addler'],
                        'GUID': fileDict['GUID']
                    }]
                }
                print registerRequestDict
        return S_OK()
Beispiel #55
0
def htmlPageTitle():
  gMonitor.addMark( "pagesServed" )
  path = currentPath()
  return gWebConfig.getPageTitle( path )
Beispiel #56
0
    def __call__(self):
        """ remove replicas """
        # # prepare list of targetSEs
        targetSEs = self.operation.targetSEList
        # # check targetSEs for removal
        bannedTargets = self.checkSEsRSS(targetSEs, access='RemoveAccess')
        if not bannedTargets['OK']:
            gMonitor.addMark("RemoveReplicaAtt")
            gMonitor.addMark("RemoveReplicaFail")
            return bannedTargets

        if bannedTargets['Value']:
            return S_OK("%s targets are banned for removal" %
                        ",".join(bannedTargets['Value']))

        # # get waiting files
        waitingFiles = self.getWaitingFilesList()
        # # and prepare dict
        toRemoveDict = dict([(opFile.LFN, opFile) for opFile in waitingFiles])

        self.log.info("todo: %s replicas to delete from %s sites" %
                      (len(toRemoveDict), len(targetSEs)))
        gMonitor.addMark("RemoveReplicaAtt",
                         len(toRemoveDict) * len(targetSEs))

        # # keep status for each targetSE
        removalStatus = dict.fromkeys(toRemoveDict.keys(), None)
        for lfn in removalStatus:
            removalStatus[lfn] = dict.fromkeys(targetSEs, None)

        # # loop over targetSEs
        for targetSE in targetSEs:

            self.log.info("removing replicas at %s" % targetSE)

            # # 1st step - bulk removal
            bulkRemoval = self.bulkRemoval(toRemoveDict, targetSE)
            if not bulkRemoval["OK"]:
                self.log.error('Bulk replica removal failed',
                               bulkRemoval["Message"])
                return bulkRemoval
            bulkRemoval = bulkRemoval["Value"]

            # # update removal status for successful files
            removalOK = [
                opFile for opFile in bulkRemoval.values() if not opFile.Error
            ]
            for opFile in removalOK:
                removalStatus[opFile.LFN][targetSE] = ""
            gMonitor.addMark("RemoveReplicaOK", len(removalOK))

            # # 2nd step - process the rest again
            toRetry = dict([(lfn, opFile)
                            for lfn, opFile in bulkRemoval.items()
                            if opFile.Error])
            for lfn, opFile in toRetry.items():
                self.singleRemoval(opFile, targetSE)
                if not opFile.Error:
                    gMonitor.addMark("RemoveReplicaOK", 1)
                    removalStatus[lfn][targetSE] = ""
                else:
                    gMonitor.addMark("RemoveReplicaFail", 1)
                    removalStatus[lfn][targetSE] = opFile.Error

        # # update file status for waiting files
        failed = 0
        for opFile in self.operation:
            if opFile.Status == "Waiting":
                errors = list(
                    set([
                        error for error in removalStatus[lfn].values() if error
                    ]))
                if errors:
                    opFile.Error = ",".join(errors)
                    # This seems to be the only offending error
                    if "Write access not permitted for this credential" in opFile.Error:
                        failed += 1
                        continue
                opFile.Status = "Done"

        if failed:
            self.operation.Error = "failed to remove %s replicas" % failed

        return S_OK()
Beispiel #57
0
    gMonitor.flush()

    if error:
      return S_ERROR( error )

    # # request done?
    if self.request.Status == "Done":
      # # update request to the RequestDB
      self.log.info( 'updating request with status %s' % self.request.Status )
      update = self.updateRequest()
      if not update["OK"]:
        self.log.error( update["Message"] )
        return update
      self.log.info( "request '%s' is done" % self.request.RequestName )
      gMonitor.addMark( "RequestOK", 1 )
      # # and there is a job waiting for it? finalize!
      if self.request.JobID:
        attempts = 0
        while True:
          finalizeRequest = self.requestClient.finalizeRequest( self.request.RequestName, self.request.JobID )
          if not finalizeRequest["OK"]:
            if not attempts:
              self.log.error( "unable to finalize request %s: %s, will retry" % ( self.request.RequestName,
                                                                                finalizeRequest["Message"] ) )
            self.log.verbose( "Waiting 10 seconds" )
            attempts += 1
            if attempts == 10:
              self.log.error( "giving up finalize request after %d attempts" % attempts )
              return S_ERROR( 'Could not finalize request' )
Beispiel #58
0
    def __call__(self):
        """ perform physical removal operation """
        bannedTargets = self.checkSEsRSS(access='RemoveAccess')
        if not bannedTargets['OK']:
            gMonitor.addMark("PhysicalRemovalAtt")
            gMonitor.addMark("PhysicalRemovalFail")
            return bannedTargets

        if bannedTargets['Value']:
            return S_OK("%s targets are banned for removal" %
                        ",".join(bannedTargets['Value']))

        # # get waiting files
        waitingFiles = self.getWaitingFilesList()
        # # prepare pfn dict
        toRemoveDict = dict([(opFile.PFN, opFile) for opFile in waitingFiles])

        targetSEs = self.operation.targetSEList
        gMonitor.addMark("PhysicalRemovalAtt",
                         len(toRemoveDict) * len(targetSEs))

        # # keep errors dict
        removalStatus = dict.fromkeys(toRemoveDict.keys(), None)
        for pfn in removalStatus:
            removalStatus[pfn] = dict.fromkeys(targetSEs, "")

        for targetSE in targetSEs:

            self.log.info("removing files from %s" % targetSE)

            # # 1st - bulk removal
            bulkRemoval = self.bulkRemoval(toRemoveDict, targetSE)
            if not bulkRemoval["OK"]:
                self.log.error(bulkRemoval["Message"])
                self.operation.Error = bulkRemoval["Message"]
                return bulkRemoval

            bulkRemoval = bulkRemoval["Value"]

            for pfn, opFile in toRemoveDict.items():
                removalStatus[pfn][targetSE] = bulkRemoval["Failed"].get(
                    pfn, "")
                opFile.Error = removalStatus[pfn][targetSE]

            # # 2nd - single file removal
            toRetry = dict([(pfn, opFile)
                            for pfn, opFile in toRemoveDict.items()
                            if pfn in bulkRemoval["Failed"]])
            for pfn, opFile in toRetry.items():
                self.singleRemoval(opFile, targetSE)
                if not opFile.Error:
                    removalStatus[pfn][targetSE] = ""
                else:
                    gMonitor.addMark("PhysicalRemovalFail", 1)
                    removalStatus[pfn][targetSE] = opFile.Error

        # # update file status for waiting files
        failed = 0
        for opFile in self.operation:
            if opFile.Status == "Waiting":
                errors = [
                    error for error in removalStatus[opFile.PFN].values()
                    if error.strip()
                ]
                if errors:
                    failed += 1
                    opFile.Error = ",".join(errors)
                    if "Write access not permitted for this credential" in opFile.Error:
                        opFile.Status = "Failed"
                        gMonitor.addMark("PhysicalRemovalFail", len(errors))
                        continue
                gMonitor.addMark("PhysicalRemovalOK", len(targetSEs))
                gMonitor.addMark("PhysicalRemovalSize",
                                 opFile.Size * len(targetSEs))
                opFile.Status = "Done"

        if failed:
            self.operation.Error = "failed to remove %s files" % failed

        return S_OK()
Beispiel #59
0
  def execute( self ):
    ''' Main execution method
    '''

    gMonitor.addMark( 'Iteration', 1 )
    # Get all the transformations
    result = self.transClient.getTransformations( {'Status' : 'Active', 
                                                   'Type' : self.transformationTypes } )
    if not result['OK']:
      gLogger.error( "InputDataAgent.execute: Failed to get transformations.", result['Message'] )
      return S_OK()

    # Process each transformation
    for transDict in result['Value']:
      transID = long( transDict['TransformationID'] )
      res = self.transClient.getTransformationInputDataQuery( transID )
      if not res['OK']:
        if res['Message'] == 'No InputDataQuery found for transformation':
          gLogger.info( "InputDataAgent.execute: No input data query found for transformation %d" % transID )
        else:
          gLogger.error( "InputDataAgent.execute: Failed to get input data query for %d" % transID, res['Message'] )
        continue
      inputDataQuery = res['Value']

      if self.refreshonly:
        # Determine the correct time stamp to use for this transformation
        if self.timeLog.has_key( transID ):
          if self.fullTimeLog.has_key( transID ):
            # If it is more than a day since the last reduced query, make a full query just in case
            if ( datetime.datetime.utcnow() - self.fullTimeLog[transID] ) < datetime.timedelta( seconds = self.fullUpdatePeriod ):
              timeStamp = self.timeLog[transID]
              if self.dateKey:
                inputDataQuery[self.dateKey] = ( timeStamp - datetime.timedelta( seconds = 10 ) ).strftime( '%Y-%m-%d %H:%M:%S' )
              else:
                gLogger.error( "DateKey was not set in the CS, cannot use the RefreshOnly" )
            else:
              self.fullTimeLog[transID] = datetime.datetime.utcnow()
        self.timeLog[transID] = datetime.datetime.utcnow()
        if not self.fullTimeLog.has_key( transID ):
          self.fullTimeLog[transID] = datetime.datetime.utcnow()

      # Perform the query to the metadata catalog
      gLogger.verbose( "Using input data query for transformation %d: %s" % ( transID, str( inputDataQuery ) ) )
      start = time.time()
      result = self.metadataClient.findFilesByMetadata( inputDataQuery )
      rtime = time.time() - start
      gLogger.verbose( "Metadata catalog query time: %.2f seconds." % ( rtime ) )
      if not result['OK']:
        gLogger.error( "InputDataAgent.execute: Failed to get response from the metadata catalog", result['Message'] )
        continue
      lfnList = result['Value']

      # Check if the number of files has changed since the last cycle
      nlfns = len( lfnList )
      gLogger.info( "%d files returned for transformation %d from the metadata catalog" % ( nlfns, int( transID ) ) )
      if self.fileLog.has_key( transID ):
        if nlfns == self.fileLog[transID]:
          gLogger.verbose( 'No new files in metadata catalog since last check' )
      self.fileLog[transID] = nlfns

      # Add any new files to the transformation
      addedLfns = []
      if lfnList:
        gLogger.verbose( 'Processing %d lfns for transformation %d' % ( len( lfnList ), transID ) )
        # Add the files to the transformation
        gLogger.verbose( 'Adding %d lfns for transformation %d' % ( len( lfnList ), transID ) )
        result = self.transClient.addFilesToTransformation( transID, sorted( lfnList ) )
        if not result['OK']:
          gLogger.warn( "InputDataAgent.execute: failed to add lfns to transformation", result['Message'] )
          self.fileLog[transID] = 0
        else:
          if result['Value']['Failed']:
            for lfn, error in res['Value']['Failed'].items():
              gLogger.warn( "InputDataAgent.execute: Failed to add %s to transformation" % lfn, error )
          if result['Value']['Successful']:
            for lfn, status in result['Value']['Successful'].items():
              if status == 'Added':
                addedLfns.append( lfn )
            gLogger.info( "InputDataAgent.execute: Added %d files to transformation" % len( addedLfns ) )

    return S_OK()
Beispiel #60
0
    def ftsTransfer(self):
        """ replicate and register using FTS """

        self.log.info("scheduling files...")
        targetSEs = self.operation.targetSEList

        for targetSE in targetSEs:
            writeStatus = self.rssSEStatus(targetSE, "WriteAccess")
            if not writeStatus["OK"]:
                self.log.error(writeStatus["Message"])
                for opFile in self.operation:
                    opFile.Error = "unknown targetSE: %s" % targetSE
                    opFile.Status = "Failed"
                self.operation.Error = "unknown targetSE: %s" % targetSE
                return S_ERROR(self.operation.Error)

        toSchedule = []

        for opFile in self.getWaitingFilesList():
            gMonitor.addMark("FTSScheduleAtt", 1)
            # # check replicas
            replicas = self._filterReplicas(opFile)
            if not replicas["OK"]:
                continue
            replicas = replicas["Value"]

            if not replicas["Valid"] and replicas["Banned"]:
                self.log.warn(
                    "unable to schedule '%s', replicas only at banned SEs" %
                    opFile.LFN)
                gMonitor.addMark("FTSScheduleFail", 1)
                continue

            validReplicas = replicas["Valid"]
            bannedReplicas = replicas["Banned"]

            if not validReplicas and bannedReplicas:
                self.log.warn(
                    "unable to schedule '%s', replicas only at banned SEs" %
                    opFile.LFN)
                gMonitor.addMark("FTSScheduleFail", 1)
                continue

            if validReplicas:
                validTargets = list(
                    set(self.operation.targetSEList) - set(validReplicas))
                if not validTargets:
                    self.log.info("file %s is already present at all targets" %
                                  opFile.LFN)
                    opFile.Status = "Done"
                    continue
                toSchedule.append(
                    (opFile.toJSON()["Value"], validReplicas, validTargets))

        if toSchedule:
            self.log.info("found %s files to schedule" % len(toSchedule))

            ftsSchedule = self.ftsClient().ftsSchedule(
                self.request.RequestID, self.operation.OperationID, toSchedule)
            if not ftsSchedule["OK"]:
                self.log.error(ftsSchedule["Message"])
                return ftsSchedule

            ftsSchedule = ftsSchedule["Value"]

            for fileID in ftsSchedule["Successful"]:
                gMonitor.addMark("FTSScheduleOK", 1)
                for opFile in self.operation:
                    if fileID == opFile.FileID:
                        opFile.Status = "Scheduled"
                        self.log.always("%s has been scheduled for FTS" %
                                        opFile.LFN)

            for fileID, reason in ftsSchedule["Failed"]:
                gMonitor.addMark("FTSScheduleFail", 1)
                for opFile in self.operation:
                    if fileID == opFile.FileID:
                        opFile.Error = reason
                        self.log.error("unable to schedule %s for FTS: %s" %
                                       (opFile.LFN, opFile.Error))

        return S_OK()