Esempio n. 1
0
  def prepareNewJobs(self, maxFilesPerJob=100, maxAttemptsPerFile=10):

    log = gLogger.getSubLogger("_prepareNewJobs", child=True)

    filesToSubmit = self._getFilesToSubmit(maxAttemptsPerFile=maxAttemptsPerFile)
    log.debug("%s ftsFiles to submit" % len(filesToSubmit))

    newJobs = []

    # {targetSE : [FTS3Files] }
    filesGroupedByTarget = FTS3Utilities.groupFilesByTarget(filesToSubmit)

    for targetSE, ftsFiles in filesGroupedByTarget.iteritems():

      res = self._checkSEAccess(targetSE, 'ReadAccess', vo=self.vo)
      if not res['OK']:
        log.error(res)
        continue

      for ftsFilesChunk in breakListIntoChunks(ftsFiles, maxFilesPerJob):

        newJob = self._createNewJob('Staging', ftsFilesChunk, targetSE, sourceSE=targetSE)
        newJobs.append(newJob)

    return S_OK(newJobs)
Esempio n. 2
0
 def __init__( self, taskID, timeWait, raiseException=False ):
   from DIRAC.Core.Base import Script
   Script.parseCommandLine()
   from DIRAC.FrameworkSystem.Client.Logger import gLogger
   self.log = gLogger.getSubLogger( self.__class__.__name__ + "/%s" % taskID )
   self.taskID = taskID
   self.timeWait = timeWait
   self.raiseException = raiseException
Esempio n. 3
0
 def setUp( self ):
   from DIRAC.Core.Base import Script
   Script.parseCommandLine()
   from DIRAC.FrameworkSystem.Client.Logger import gLogger
   gLogger.showHeaders( True )
   self.log = gLogger.getSubLogger( self.__class__.__name__ )
   self.processPool = ProcessPool( 4, 8, 8 ) 
   self.processPool.daemonize()
Esempio n. 4
0
 def __init__(self, timeout=False, bufferLimit=52428800):
     self.log = gLogger.getSubLogger("Subprocess")
     self.timeout = False
     try:
         self.changeTimeout(timeout)
         self.bufferLimit = int(bufferLimit)  # 5MB limit for data
     except Exception, x:
         self.log.exception("Failed initialisation of Subprocess object")
         raise x
Esempio n. 5
0
 def __init__( self, oTransport = None ):
   self.oTransport = oTransport
   self.__oMD5 = md5.md5()
   self.bFinishedTransmission = False
   self.bReceivedEOF = False
   self.direction = False
   self.packetSize = 1048576
   self.__fileBytes = 0
   self.__log = gLogger.getSubLogger( "FileHelper" )
Esempio n. 6
0
  def _monitorJob(self, ftsJob):
    """
        * query the FTS servers
        * update the FTSFile status
        * update the FTSJob status
    """
    # General try catch to avoid that the tread dies
    try:
      threadID = current_process().name
      log = gLogger.getSubLogger("_monitorJob/%s" % ftsJob.jobID, child=True)

      res = self.getFTS3Context(
          ftsJob.username, ftsJob.userGroup, ftsJob.ftsServer, threadID=threadID)

      if not res['OK']:
        log.error("Error getting context", res)
        return ftsJob, res

      context = res['Value']

      res = ftsJob.monitor(context=context)

      if not res['OK']:
        log.error("Error monitoring job", res)
        return ftsJob, res

      # { fileID : { Status, Error } }
      filesStatus = res['Value']

      # Specify the job ftsGUID to make sure we do not overwrite
      # status of files already taken by newer jobs
      res = self.fts3db.updateFileStatus(filesStatus, ftsGUID=ftsJob.ftsGUID)

      if not res['OK']:
        log.error("Error updating file fts status", "%s, %s" % (ftsJob.ftsGUID, res))
        return ftsJob, res

      upDict = {
          ftsJob.jobID: {
              'status': ftsJob.status,
              'error': ftsJob.error,
              'completeness': ftsJob.completeness,
              'operationID': ftsJob.operationID,
              'lastMonitor': True,
          }
      }
      res = self.fts3db.updateJobStatus(upDict)

      if ftsJob.status in ftsJob.FINAL_STATES:
        self.__sendAccounting(ftsJob)

      return ftsJob, res

    except Exception as e:
      return ftsJob, S_ERROR(0, "Exception %s" % repr(e))
Esempio n. 7
0
  def getFTS3Context(self, username, group, ftsServer, threadID):
    """ Returns an fts3 context for a given user, group and fts server

        The context pool is per thread, and there is one context
        per tuple (user, group, server).
        We dump the proxy of a user to a file (shared by all the threads),
        and use it to make the context.
        The proxy needs a lifetime of at least 2h, is cached for 1.5h, and
        the lifetime of the context is 45mn

        :param username: name of the user
        :param group: group of the user
        :param ftsServer: address of the server

        :returns: S_OK with the context object

    """

    log = gLogger.getSubLogger("getFTS3Context", child=True)

    contextes = self._globalContextCache.setdefault(threadID, DictCache())

    idTuple = (username, group, ftsServer)
    log.debug("Getting context for %s" % (idTuple, ))

    if not contextes.exists(idTuple, 2700):
      res = getDNForUsername(username)
      if not res['OK']:
        return res
      # We take the first DN returned
      userDN = res['Value'][0]

      log.debug("UserDN %s" % userDN)

      # We dump the proxy to a file.
      # It has to have a lifetime of at least 2 hours
      # and we cache it for 1.5 hours
      res = gProxyManager.downloadVOMSProxyToFile(
          userDN, group, requiredTimeLeft=7200, cacheTime=5400)
      if not res['OK']:
        return res

      proxyFile = res['Value']
      log.debug("Proxy file %s" % proxyFile)

      # We generate the context
      res = FTS3Job.generateContext(ftsServer, proxyFile)
      if not res['OK']:
        return res
      context = res['Value']

      # we add it to the cache for this thread for 1h
      contextes.add(idTuple, 3600, context)

    return S_OK(contextes.get(idTuple))
Esempio n. 8
0
  def _monitorJobCallback(returnedValue):
    """ Callback when a job has been monitored
        :param returnedValue: value returned by the _monitorJob method
                              (ftsJob, standard dirac return struct)
    """

    ftsJob, res = returnedValue
    log = gLogger.getSubLogger("_monitorJobCallback/%s" % ftsJob.jobID, child=True)
    if not res['OK']:
      log.error("Error updating job status", res)
    else:
      log.debug("Successfully updated job status")
Esempio n. 9
0
 def __init__( self, taskID, timeWait, raiseException=False ):
   from DIRAC.Core.Base import Script
   Script.parseCommandLine()
   from DIRAC.FrameworkSystem.Client.Logger import gLogger
   self.log = gLogger.getSubLogger( self.__class__.__name__ + "/%s" % taskID )
   self.taskID = taskID
   self.log.always( "pid=%s task=%s I'm locked" % ( os.getpid(), self.taskID ) )
   gLock.acquire()
   self.log.always("you can't see that line, object is stuck by gLock" )
   self.timeWait = timeWait 
   self.raiseException = raiseException
   gLock.release()
Esempio n. 10
0
  def _treatOperationCallback(returnedValue):
    """ Callback when an operation has been treated

        :param returnedValue: value returned by the _treatOperation method
                              (ftsOperation, standard dirac return struct)
    """

    operation, res = returnedValue
    log = gLogger.getSubLogger("_treatOperationCallback/%s" % operation.operationID, child=True)
    if not res['OK']:
      log.error("Error treating operation", res)
    else:
      log.debug("Successfully treated operation")
Esempio n. 11
0
  def kickJobs(self):
    """ kick stuck jobs """

    log = gLogger.getSubLogger("kickJobs", child=True)

    res = self.fts3db.kickStuckJobs(limit=self.maxKick, kickDelay=self.kickDelay)
    if not res['OK']:
      return res

    kickedJobs = res['Value']
    log.info("Kicked %s stuck jobs" % kickedJobs)

    return S_OK()
Esempio n. 12
0
  def deleteOperations(self):
    """ delete final operations """

    log = gLogger.getSubLogger("deleteOperations", child=True)

    res = self.fts3db.deleteFinalOperations(limit=self.maxDelete, deleteDelay=self.deleteDelay)
    if not res['OK']:
      return res

    deletedOperations = res['Value']
    log.info("Deleted %s final operations" % deletedOperations)

    return S_OK()
Esempio n. 13
0
  def __init__( self, timeout = False, bufferLimit = 52428800 ):
    """ c'tor

    :param int timeout: timeout in seconds
    :param int bufferLimit: buffer size, default 5MB
    """
    self.log = gLogger.getSubLogger( 'Subprocess' )
    self.timeout = False
    try:
      self.changeTimeout( timeout )
      self.bufferLimit = int( bufferLimit ) # 5MB limit for data
    except Exception, x:
      self.log.exception( 'Failed initialisation of Subprocess object' )
      raise x
Esempio n. 14
0
  def setUp( self ):
    """c'tor

    :param self: self reference
    """
    from DIRAC.Core.Base import Script
    Script.parseCommandLine()
    from DIRAC.FrameworkSystem.Client.Logger import gLogger
    gLogger.showHeaders( True )
    self.log = gLogger.getSubLogger( self.__class__.__name__ )
    self.processPool = ProcessPool( 4, 8, 8,
                                    poolCallback = self.poolCallback, 
                                    poolExceptionCallback = self.poolExceptionCallback )
    self.processPool.daemonize()
Esempio n. 15
0
  def init_on_load(self):
    """ This method initializes some attributes.
        It is called by sqlalchemy (which does not call __init__)
    """
    self._vo = None

    self.dManager = DataManager()
    self.rssClient = ResourceStatus()

    opID = getattr(self, 'operationID', None)
    loggerName = '%s/' % opID if opID else ''
    loggerName += 'req_%s/op_%s' % (self.rmsReqID, self.rmsOpID)

    self._log = gLogger.getSubLogger(loggerName, True)
Esempio n. 16
0
  def _constructRemovalJob(self, context, allTargetSURLs, failedLFNs, target_spacetoken):
    """ Build a job for removal

        Some attributes of the job are expected to be set
          * targetSE
          * activity (optional)
          * priority (optional)
          * filesToSubmit
          * operationID (optional, used as metadata for the job)


        :param context: fts3 context
        :param allTargetSURLs: dict {lfn:surl} for the target
        :param failedLFNs: set of LFNs in filesToSubmit for which there was a problem
        :param target_spacetoken: the space token of the target

        :return: S_OK( (job object, list of ftsFileIDs in the job))
    """

    log = gLogger.getSubLogger(
        "constructRemovalJob/%s/%s" %
        (self.operationID, self.targetSE), True)

    transfers = []
    fileIDsInTheJob = []
    for ftsFile in self.filesToSubmit:

      if ftsFile.lfn in failedLFNs:
        log.debug("Not preparing transfer for file %s" % ftsFile.lfn)
        continue

      transfers.append({'surl': allTargetSURLs[ftsFile.lfn],
                        'metadata': getattr(ftsFile, 'fileID')})
      fileIDsInTheJob.append(getattr(ftsFile, 'fileID'))

    # We add a few metadata to the fts job so that we can reuse them later on without
    # querying our DB.
    # source and target SE are just used for accounting purpose
    job_metadata = {
        'operationID': self.operationID,
        'sourceSE': self.sourceSE,
        'targetSE': self.targetSE}

    job = fts3.new_delete_job(transfers,
                              spacetoken=target_spacetoken,
                              metadata=job_metadata)
    job['params']['retry'] = 3
    job['params']['priority'] = self.priority

    return S_OK((job, fileIDsInTheJob))
Esempio n. 17
0
def selectUniqueRandomSource(ftsFiles, allowedSources=None):
  """
      For a list of FTS3files object, select a random source, and group the files by source.

      :param allowedSources: list of allowed sources
      :param ftsFiles: list of FTS3File object

      :return:  S_OK({ sourceSE: [ FTS3Files] })

  """

  _log = gLogger.getSubLogger("selectUniqueRandomSource")

  allowedSourcesSet = set(allowedSources) if allowedSources else set()

  # destGroup will contain for each target SE a dict { source : [list of FTS3Files] }
  groupBySource = {}

  # For all files, check which possible sources they have
  res = _checkSourceReplicas(ftsFiles)
  if not res['OK']:
    return res

  filteredReplicas = res['Value']

  for ftsFile in ftsFiles:

    if ftsFile.lfn in filteredReplicas['Failed']:
      _log.error("Failed to get active replicas", "%s,%s" %
                 (ftsFile.lfn, filteredReplicas['Failed'][ftsFile.lfn]))
      continue

    replicaDict = filteredReplicas['Successful'][ftsFile.lfn]

    # Only consider the allowed sources

    # If we have a restriction, apply it, otherwise take all the replicas
    allowedReplicaSource = (set(replicaDict) & allowedSourcesSet) if allowedSourcesSet else replicaDict

    # pick a random source

    randSource = random.choice(list(allowedReplicaSource))  # one has to convert to list

    groupBySource.setdefault(randSource, []).append(ftsFile)

  return S_OK(groupBySource)
Esempio n. 18
0
    def kickOperations(self):
        """ Kick stuck operations

        :return: S_OK()/S_ERROR()
    """

        log = gLogger.getSubLogger("kickOperations", child=True)

        res = self.fts3db.kickStuckOperations(limit=self.maxKick,
                                              kickDelay=self.kickDelay)
        if not res['OK']:
            return res

        kickedOperations = res['Value']
        log.info("Kicked %s stuck operations" % kickedOperations)

        return S_OK()
Esempio n. 19
0
    def finalize(self, timeout=60):
        """
    Drain pool, shutdown processing in more or less clean way

    :param self: self reference
    :param timeout: seconds to wait before killing
    """
        # # start drainig
        self.__draining = True
        # # join deamon process
        if self.__daemonProcess:
            self.__daemonProcess.join(timeout)
        # # process all tasks
        self.processAllResults(timeout)
        # # set stop event, all idle workers should be terminated
        self.__stopEvent.set()
        # # join idle workers
        start = time.time()
        log = gLogger.getSubLogger("ProcessPool/finalize")
        nWorkers = 9999999
        while self.__workersDict:
            self.__cleanDeadProcesses()
            if len(self.__workersDict) != nWorkers:
                nWorkers = len(self.__workersDict)
                log.debug("%d workers still active, timeout = %d" %
                          (nWorkers, timeout))
            if timeout <= 0 or time.time() - start >= timeout:
                break
            time.sleep(0.1)
        # # second clean up - join and terminate workers
        if self.__workersDict:
            log.debug(
                "After cleaning dead processes, %d workers still active, timeout = %d"
                % (len(self.__workersDict), timeout))
        for worker in self.__workersDict.values():
            if worker.is_alive():
                worker.terminate()
                worker.join(5)
        self.__cleanDeadProcesses()
        # third clean up - kill'em all!!!
        if self.__workersDict:
            log.debug(
                "After terminating processes, %d workers still active, timeout = %d, kill them"
                % (len(self.__workersDict), timeout))
        self.__filicide()
Esempio n. 20
0
    def monitorJobsLoop(self):
        """ * fetch the active FTSJobs from the DB
        * spawn a thread to monitor each of them

        :return: S_OK()/S_ERROR()
    """

        log = gLogger.getSubLogger("monitorJobs", child=True)
        log.debug("Size of the context cache %s" %
                  len(self._globalContextCache))

        log.debug("Getting active jobs")
        # get jobs from DB
        res = self.fts3db.getActiveJobs(limit=self.jobBulkSize,
                                        jobAssignmentTag=self.assignmentTag)

        if not res['OK']:
            log.error("Could not retrieve ftsJobs from the DB", res)
            return res

        activeJobs = res['Value']
        log.info("%s jobs to queue for monitoring" % len(activeJobs))

        # We store here the AsyncResult object on which we are going to wait
        applyAsyncResults = []

        # Starting the monitoring threads
        for ftsJob in activeJobs:
            log.debug("Queuing executing of ftsJob %s" % ftsJob.jobID)
            # queue the execution of self._monitorJob( ftsJob ) in the thread pool
            # The returned value is passed to _monitorJobCallback
            applyAsyncResults.append(
                self.jobsThreadPool.apply_async(
                    self._monitorJob, (ftsJob, ),
                    callback=self._monitorJobCallback))

        log.debug("All execution queued")

        # Waiting for all the monitoring to finish
        while not all([r.ready() for r in applyAsyncResults]):
            log.debug("Not all the tasks are finished")
            time.sleep(0.5)

        log.debug("All the tasks have completed")
        return S_OK()
Esempio n. 21
0
    def execute(self):
        """ One cycle execution

        :return: S_OK()/S_ERROR()
    """

        log = gLogger.getSubLogger("execute", child=True)

        log.info("Monitoring job")
        res = self.monitorJobsLoop()

        if not res['OK']:
            log.error("Error monitoring jobs", res)
            return res

        log.info("Treating operations")
        res = self.treatOperationsLoop()

        if not res['OK']:
            log.error("Error treating operations", res)
            return res

        log.info("Kicking stuck jobs")
        res = self.kickJobs()

        if not res['OK']:
            log.error("Error kicking jobs", res)
            return res

        log.info("Kicking stuck operations")
        res = self.kickOperations()

        if not res['OK']:
            log.error("Error kicking operations", res)
            return res

        log.info("Deleting final operations")
        res = self.deleteOperations()

        if not res['OK']:
            log.error("Error deleting operations", res)
            return res

        return S_OK()
Esempio n. 22
0
  def __init__(self, serverDict, serverPolicy="Random"):
    """
        Call the init of the parent, and initialize the list of FTS3 servers
    """

    self.log = gLogger.getSubLogger("FTS3ServerPolicy")

    self._serverDict = serverDict
    self._serverList = serverDict.keys()
    self._maxAttempts = len(self._serverList)
    self._nextServerID = 0
    self._resourceStatus = ResourceStatus()

    methName = "_%sServerPolicy" % serverPolicy.lower()
    if not hasattr(self, methName):
      self.log.error('Unknown server policy %s. Using Random instead' % serverPolicy)
      methName = "_randomServerPolicy"

    self._policyMethod = getattr(self, methName)
Esempio n. 23
0
  def finalize(self):
    """ finalize processing """
    # Joining all the ThreadPools
    log = gLogger.getSubLogger("Finalize")

    log.debug("Closing jobsThreadPool")

    self.jobsThreadPool.close()
    self.jobsThreadPool.join()

    log.debug("jobsThreadPool joined")

    log.debug("Closing opsThreadPool")

    self.opsThreadPool.close()
    self.opsThreadPool.join()

    log.debug("opsThreadPool joined")

    return S_OK()
Esempio n. 24
0
  def finalize(self):
    """ finalize processing """
    # Joining all the ThreadPools
    log = gLogger.getSubLogger("Finalize")

    log.debug("Closing jobsThreadPool")

    self.jobsThreadPool.close()
    self.jobsThreadPool.join()

    log.debug("jobsThreadPool joined")

    log.debug("Closing opsThreadPool")

    self.opsThreadPool.close()
    self.opsThreadPool.join()

    log.debug("opsThreadPool joined")

    return S_OK()
Esempio n. 25
0
def generatePossibleTransfersBySources(ftsFiles, allowedSources=None):
  """
      For a list of FTS3files object, group the transfer possible sources
      CAUTION ! a given LFN can be in multiple source
                You still have to choose your source !

      :param allowedSources : list of allowed sources
      :param ftsFiles : list of FTS3File object
      :return  S_OK({ sourceSE: [ FTS3Files] })

  """

  _log = gLogger.getSubLogger("generatePossibleTransfersBySources", True)

  # destGroup will contain for each target SE a dict { possible source : transfer metadata }
  groupBySource = {}

  # For all files, check which possible sources they have
  res = _checkSourceReplicas(ftsFiles)
  if not res['OK']:
    return res

  filteredReplicas = res['Value']

  for ftsFile in ftsFiles:

    if ftsFile.lfn in filteredReplicas['Failed']:
      _log.error("Failed to get active replicas", "%s,%s" %
                 (ftsFile.lfn, filteredReplicas['Failed'][ftsFile.lfn]))
      continue

    replicaDict = filteredReplicas['Successful'][ftsFile.lfn]

    for se in replicaDict:
      # if we are imposed a source, respect it
      if allowedSources and se not in allowedSources:
        continue

      groupBySource.setdefault(se, []).append(ftsFile)

  return S_OK(groupBySource)
Esempio n. 26
0
  def execute(self):
    """ one cycle execution """

    log = gLogger.getSubLogger("execute", child=True)

    log.info("Monitoring job")
    res = self.monitorJobsLoop()

    if not res['OK']:
      log.error("Error monitoring jobs", res)
      return res

    log.info("Treating operations")
    res = self.treatOperationsLoop()

    if not res['OK']:
      log.error("Error treating operations", res)
      return res

    log.info("Kicking stuck jobs")
    res = self.kickJobs()

    if not res['OK']:
      log.error("Error kicking jobs", res)
      return res

    log.info("Kicking stuck operations")
    res = self.kickOperations()

    if not res['OK']:
      log.error("Error kicking operations", res)
      return res

    log.info("Deleting final operations")
    res = self.deleteOperations()

    if not res['OK']:
      log.error("Error deleting operations", res)
      return res

    return S_OK()
Esempio n. 27
0
  def treatOperationsLoop(self):
    """ * Fetch all the FTSOperations which are not finished
        * Spawn a thread to treat each operation
    """

    log = gLogger.getSubLogger("treatOperations", child=True)

    log.debug("Size of the context cache %s" % len(self._globalContextCache))

    log.info("Getting non finished operations")

    res = self.fts3db.getNonFinishedOperations(
        limit=self.operationBulkSize, operationAssignmentTag=self.assignmentTag)

    if not res['OK']:
      log.error("Could not get incomplete operations", res)
      return res

    incompleteOperations = res['Value']

    log.info("Treating %s incomplete operations" % len(incompleteOperations))

    applyAsyncResults = []

    for operation in incompleteOperations:
      log.debug("Queuing executing of operation %s" % operation.operationID)
      # queue the execution of self._treatOperation( operation ) in the thread pool
      # The returned value is passed to _treatOperationCallback
      applyAsyncResults.append(self.opsThreadPool.apply_async(
          self._treatOperation, (operation, ), callback=self._treatOperationCallback))

    log.debug("All execution queued")

    # Waiting for all the treatments to finish
    while not all([r.ready() for r in applyAsyncResults]):
      log.debug("Not all the tasks are finished")
      time.sleep(0.5)

    log.debug("All the tasks have completed")

    return S_OK()
Esempio n. 28
0
  def treatOperationsLoop(self):
    """ * Fetch all the FTSOperations which are not finished
        * Spawn a thread to treat each operation
    """

    log = gLogger.getSubLogger("treatOperations", child=True)

    log.debug("Size of the context cache %s" % len(self._globalContextCache))

    log.info("Getting non finished operations")

    res = self.fts3db.getNonFinishedOperations(
        limit=self.operationBulkSize, operationAssignmentTag=self.assignmentTag)

    if not res['OK']:
      log.error("Could not get incomplete operations", res)
      return res

    incompleteOperations = res['Value']

    log.info("Treating %s incomplete operations" % len(incompleteOperations))

    applyAsyncResults = []

    for operation in incompleteOperations:
      log.debug("Queuing executing of operation %s" % operation.operationID)
      # queue the execution of self._treatOperation( operation ) in the thread pool
      # The returned value is passed to _treatOperationCallback
      applyAsyncResults.append(self.opsThreadPool.apply_async(
          self._treatOperation, (operation, ), callback=self._treatOperationCallback))

    log.debug("All execution queued")

    # Waiting for all the treatments to finish
    while not all([r.ready() for r in applyAsyncResults]):
      log.debug("Not all the tasks are finished")
      time.sleep(0.5)

    log.debug("All the tasks have completed")

    return S_OK()
Esempio n. 29
0
    def __init__(self, timeout=False, bufferLimit=52428800):
        """c'tor

        :param int timeout: timeout in seconds
        :param int bufferLimit: buffer size, default 5MB
        """
        self.log = gLogger.getSubLogger("Subprocess")
        self.timeout = False
        try:
            self.changeTimeout(timeout)
            self.bufferLimit = int(bufferLimit)  # 5MB limit for data
        except Exception as x:
            self.log.exception("Failed initialisation of Subprocess object")
            raise x

        self.child = None
        self.childPID = 0
        self.childKilled = False
        self.callback = None
        self.bufferList = []
        self.cmdSeq = []
Esempio n. 30
0
  def monitorJobsLoop(self):
    """
        * fetch the active FTSJobs from the DB
        * spawn a thread to monitor each of them
    """

    log = gLogger.getSubLogger("monitorJobs", child=True)
    log.debug("Size of the context cache %s" % len(self._globalContextCache))

    log.debug("Getting active jobs")
    # get jobs from DB
    res = self.fts3db.getActiveJobs(limit=self.jobBulkSize, jobAssignmentTag=self.assignmentTag)

    if not res['OK']:
      log.error("Could not retrieve ftsJobs from the DB", res)
      return res

    activeJobs = res['Value']
    log.info("%s jobs to queue for monitoring" % len(activeJobs))

    # We store here the AsyncResult object on which we are going to wait
    applyAsyncResults = []

    # Starting the monitoring threads
    for ftsJob in activeJobs:
      log.debug("Queuing executing of ftsJob %s" % ftsJob.jobID)
      # queue the execution of self._monitorJob( ftsJob ) in the thread pool
      # The returned value is passed to _monitorJobCallback
      applyAsyncResults.append(self.jobsThreadPool.apply_async(
          self._monitorJob, (ftsJob, ), callback=self._monitorJobCallback))

    log.debug("All execution queued")

    # Waiting for all the monitoring to finish
    while not all([r.ready() for r in applyAsyncResults]):
      log.debug("Not all the tasks are finished")
      time.sleep(0.5)

    log.debug("All the tasks have completed")
    return S_OK()
Esempio n. 31
0
    def treatOperationsLoop(self):
        """ * Fetch all the FTSOperations which are not finished
        * Spawn a thread to treat each operation
    """

        log = gLogger.getSubLogger("treatOperations", child=True)

        thPool = ThreadPool(self.maxNumberOfThreads)

        log.info("Getting non finished operations")

        res = self.fts3db.getNonFinishedOperations(
            limit=self.operationBulkSize,
            operationAssignmentTag=self.assignmentTag)

        if not res['OK']:
            log.error("Could not get incomplete operations", res)
            return res

        incompleteOperations = res['Value']

        log.info("Treating %s incomplete operations" %
                 len(incompleteOperations))

        for operation in incompleteOperations:
            log.debug("Queuing executing of operation %s" %
                      operation.operationID)
            # queue the execution of self._treatOperation( operation ) in the thread pool
            # The returned value is passed to _treatOperationCallback
            thPool.apply_async(self._treatOperation, (operation, ),
                               callback=self._treatOperationCallback)

        log.debug("All execution queued")

        # Waiting for all the treatments to finish
        thPool.close()
        thPool.join()
        log.debug("thPool joined")
        return S_OK()
Esempio n. 32
0
def selectUniqueRandomSource(ftsFiles, allowedSources=None):
  """
      For a list of FTS3files object, select a random source, and group the files by source.

      :param allowedSources : list of allowed sources
      :param ftsFiles : list of FTS3File object

      :return:  S_OK({ sourceSE: [ FTS3Files] })

  """

  _log = gLogger.getSubLogger("selectUniqueRandomSource")

  # destGroup will contain for each target SE a dict { source : [list of FTS3Files] }
  groupBySource = {}

  # For all files, check which possible sources they have
  res = _checkSourceReplicas(ftsFiles)
  if not res['OK']:
    return res

  filteredReplicas = res['Value']

  for ftsFile in ftsFiles:

    if ftsFile.lfn in filteredReplicas['Failed']:
      _log.error("Failed to get active replicas", "%s,%s" %
                 (ftsFile.lfn, filteredReplicas['Failed'][ftsFile.lfn]))
      continue

    replicaDict = filteredReplicas['Successful'][ftsFile.lfn]

    # pick a random source

    randSource = random.choice(list(replicaDict))  # one has to convert to list

    groupBySource.setdefault(randSource, []).append(ftsFile)

  return S_OK(groupBySource)
Esempio n. 33
0
    def init_on_load(self):
        """This method initializes some attributes.
        It is called by sqlalchemy (which does not call __init__)

        """
        self._vo = None

        # Note that in the case of an FTS3Operation created from an RMS
        # object, the members here will probably be "wrong" in the sense
        # that the VO will not be known by then.
        # It does not really matter however, since we do not perform anything
        # on an operation created this way, it's just to be then serialized
        # in the DB.
        self.dManager = DataManager()
        self.rssClient = ResourceStatus()
        self.fts3Plugin = FTS3Utilities.getFTS3Plugin(vo=self.vo)

        opID = getattr(self, "operationID", None)
        loggerName = "%s/" % opID if opID else ""
        loggerName += "req_%s/op_%s" % (self.rmsReqID, self.rmsOpID)

        self._log = gLogger.getSubLogger(loggerName)
Esempio n. 34
0
    def monitorJobsLoop(self):
        """
        * fetch the active FTSJobs from the DB
        * spawn a thread to monitor each of them
    """

        log = gLogger.getSubLogger("monitorJobs", child=True)

        thPool = ThreadPool(self.maxNumberOfThreads)

        log.debug("Getting active jobs")
        # get jobs from DB
        res = self.fts3db.getActiveJobs(limit=self.jobBulkSize,
                                        jobAssignmentTag=self.assignmentTag)

        if not res['OK']:
            log.error("Could not retrieve ftsJobs from the DB", res)
            return res

        activeJobs = res['Value']
        log.info("%s jobs to queue for monitoring" % len(activeJobs))

        # Starting the monitoring threads
        for ftsJob in activeJobs:
            log.debug("Queuing executing of ftsJob %s" % ftsJob.jobID)
            # queue the execution of self._monitorJob( ftsJob ) in the thread pool
            # The returned value is passed to _monitorJobCallback
            thPool.apply_async(self._monitorJob, (ftsJob, ),
                               callback=self._monitorJobCallback)

        log.debug("All execution queued")

        # Waiting for all the monitoring to finish
        thPool.close()
        thPool.join()
        log.debug("thPool joined")
        return S_OK()
Esempio n. 35
0
    def _treatOperation(self, operation):
        """ Treat one operation:
          * does the callback if the operation is finished
          * generate new jobs and submits them

        :param operation: the operation to treat

        :return: operation, S_OK()/S_ERROR()
    """
        try:
            threadID = current_process().name
            log = gLogger.getSubLogger("treatOperation/%s" %
                                       operation.operationID,
                                       child=True)

            # If the operation is totally processed
            # we perform the callback
            if operation.isTotallyProcessed():
                log.debug("FTS3Operation %s is totally processed" %
                          operation.operationID)
                res = operation.callback()

                if not res['OK']:
                    log.error("Error performing the callback", res)
                    log.info("Putting back the operation")
                    dbRes = self.fts3db.persistOperation(operation)

                    if not dbRes['OK']:
                        log.error("Could not persist operation", dbRes)

                    return operation, res

            else:
                log.debug("FTS3Operation %s is not totally processed yet" %
                          operation.operationID)

                # This flag is set to False if we want to stop the ongoing processing
                # of an operation, typically when the matching RMS Request has been
                # canceled (see below)
                continueOperationProcessing = True

                # Check the status of the associated RMS Request.
                # If it is canceled then we will not create new FTS3Jobs, and mark
                # this as FTS3Operation canceled.

                if operation.rmsReqID:
                    res = ReqClient().getRequestStatus(operation.rmsReqID)
                    if not res['OK']:
                        log.error("Could not get request status", res)
                        return operation, res
                    rmsReqStatus = res['Value']

                    if rmsReqStatus == 'Canceled':
                        log.info(
                            "The RMS Request is canceled, canceling the FTS3Operation",
                            "rmsReqID: %s, FTS3OperationID: %s" %
                            (operation.rmsReqID, operation.operationID))
                        operation.status = 'Canceled'
                        continueOperationProcessing = False

                if continueOperationProcessing:
                    res = operation.prepareNewJobs(
                        maxFilesPerJob=self.maxFilesPerJob,
                        maxAttemptsPerFile=self.maxAttemptsPerFile)

                    if not res['OK']:
                        log.error(
                            "Cannot prepare new Jobs",
                            "FTS3Operation %s : %s" %
                            (operation.operationID, res))
                        return operation, res

                    newJobs = res['Value']

                    log.debug("FTS3Operation %s: %s new jobs to be submitted" %
                              (operation.operationID, len(newJobs)))

                    for ftsJob in newJobs:
                        res = self._serverPolicy.chooseFTS3Server()
                        if not res['OK']:
                            log.error(res)
                            continue

                        ftsServer = res['Value']
                        log.debug("Use %s server" % ftsServer)

                        ftsJob.ftsServer = ftsServer

                        res = self.getFTS3Context(ftsJob.username,
                                                  ftsJob.userGroup,
                                                  ftsServer,
                                                  threadID=threadID)

                        if not res['OK']:
                            log.error("Could not get context", res)
                            continue

                        context = res['Value']
                        res = ftsJob.submit(context=context,
                                            protocols=self.thirdPartyProtocols)

                        if not res['OK']:
                            log.error(
                                "Could not submit FTS3Job",
                                "FTS3Operation %s : %s" %
                                (operation.operationID, res))
                            continue

                        operation.ftsJobs.append(ftsJob)

                        submittedFileIds = res['Value']
                        log.info(
                            "FTS3Operation %s: Submitted job for %s transfers"
                            % (operation.operationID, len(submittedFileIds)))

                # new jobs are put in the DB at the same time
            res = self.fts3db.persistOperation(operation)

            if not res['OK']:
                log.error("Could not persist operation", res)

            return operation, res

        except Exception as e:
            log.exception('Exception in the thread', repr(e))
            return operation, S_ERROR("Exception %s" % repr(e))
Esempio n. 36
0
    def monitorJobsLoop(self):
        """* fetch the active FTSJobs from the DB
        * spawn a thread to monitor each of them

        :return: S_OK()/S_ERROR()
        """

        log = gLogger.getSubLogger("monitorJobs")
        log.debug("Size of the context cache %s" %
                  len(self._globalContextCache))

        # Find the number of loops
        nbOfLoops, mod = divmod(self.jobBulkSize, JOB_MONITORING_BATCH_SIZE)
        if mod:
            nbOfLoops += 1

        log.debug("Getting active jobs")

        for loopId in range(nbOfLoops):

            log.info("Getting next batch of jobs to monitor",
                     "%s/%s" % (loopId, nbOfLoops))
            # get jobs from DB
            res = self.fts3db.getActiveJobs(
                limit=JOB_MONITORING_BATCH_SIZE,
                jobAssignmentTag=self.assignmentTag)

            if not res["OK"]:
                log.error("Could not retrieve ftsJobs from the DB", res)
                return res

            activeJobs = res["Value"]
            log.info("Jobs queued for monitoring", len(activeJobs))

            # We store here the AsyncResult object on which we are going to wait
            applyAsyncResults = []

            # Starting the monitoring threads
            for ftsJob in activeJobs:
                log.debug("Queuing executing of ftsJob %s" % ftsJob.jobID)
                # queue the execution of self._monitorJob( ftsJob ) in the thread pool
                # The returned value is passed to _monitorJobCallback
                applyAsyncResults.append(
                    self.jobsThreadPool.apply_async(
                        self._monitorJob, (ftsJob, ),
                        callback=self._monitorJobCallback))

            log.debug("All execution queued")

            # Waiting for all the monitoring to finish
            while not all([r.ready() for r in applyAsyncResults]):
                log.debug("Not all the tasks are finished")
                time.sleep(0.5)

            # If we got less to monitor than what we asked,
            # stop looping
            if len(activeJobs) < JOB_MONITORING_BATCH_SIZE:
                break

        log.debug("All the tasks have completed")
        return S_OK()
Esempio n. 37
0
  def submit(self, context=None, ftsServer=None, ucert=None, pinTime=36000, protocols=None):
    """ submit the job to the FTS server

        Some attributes are expected to be defined for the submission to work:
          * type (set by FTS3Operation)
          * sourceSE (only for Transfer jobs)
          * targetSE
          * activity (optional)
          * priority (optional)
          * username
          * userGroup
          * filesToSubmit
          * operationID (optional, used as metadata for the job)

        We also expect the FTSFiles have an ID defined, as it is given as transfer metadata

        :param pinTime: Time the file should be pinned on disk (used for transfers and staging)
                        Used only if he source SE is a tape storage
        :param context: fts3 context. If not given, it is created (see ftsServer & ucert param)
        :param ftsServer: the address of the fts server to submit to. Used only if context is
                          not given. if not given either, use the ftsServer object attribute

        :param ucert: path to the user certificate/proxy. Might be inferred by the fts cli (see its doc)
        :param protocols: list of protocols from which we should choose the protocol to use

        :returns S_OK([FTSFiles ids of files submitted])
    """

    log = gLogger.getSubLogger("submit/%s/%s_%s" %
                               (self.operationID, self.sourceSE, self.targetSE), True)

    if not context:
      if not ftsServer:
        ftsServer = self.ftsServer
      context = fts3.Context(
          endpoint=ftsServer,
          ucert=ucert,
          request_class=ftsSSLRequest,
          verify=False)

    # Construct the target SURL
    res = self.__fetchSpaceToken(self.targetSE)
    if not res['OK']:
      return res
    target_spacetoken = res['Value']

    allLFNs = [ftsFile.lfn for ftsFile in self.filesToSubmit]

    if self.type == 'Transfer':
      res = self._constructTransferJob(
          pinTime,
          allLFNs,
          target_spacetoken,
          protocols=protocols)
    elif self.type == 'Staging':
      res = self._constructStagingJob(
          pinTime,
          allLFNs,
          target_spacetoken)
    # elif self.type == 'Removal':
    #   res = self._constructRemovalJob(context, allLFNs, failedLFNs, target_spacetoken)

    if not res['OK']:
      return res

    job, fileIDsInTheJob = res['Value']
    setFileIdsInTheJob = set(fileIDsInTheJob)

    try:
      self.ftsGUID = fts3.submit(context, job)
      log.info("Got GUID %s" % self.ftsGUID)

      # Only increase the amount of attempt
      # if we succeeded in submitting -> no ! Why did I do that ??
      for ftsFile in self.filesToSubmit:
        ftsFile.attempt += 1

        # This should never happen because a file should be "released"
        # first by the previous job.
        # But we just print a warning
        if ftsFile.ftsGUID is not None:
          log.warn(
              "FTSFile has a non NULL ftsGUID at job submission time",
              "FileID: %s existing ftsGUID: %s" %
              (ftsFile.fileID,
               ftsFile.ftsGUID))

        # `assign` the file to this job
        ftsFile.ftsGUID = self.ftsGUID
        if ftsFile.fileID in setFileIdsInTheJob:
          ftsFile.status = 'Submitted'

      now = datetime.datetime.utcnow().replace(microsecond=0)
      self.submitTime = now
      self.lastUpdate = now
      self.lastMonitor = now

    except FTS3ClientException as e:
      log.exception("Error at submission", repr(e))
      return S_ERROR("Error at submission: %s" % e)

    return S_OK(fileIDsInTheJob)
Esempio n. 38
0
  def _constructStagingJob(self, pinTime, allLFNs, target_spacetoken):
    """ Build a job for staging

        Some attributes of the job are expected to be set
          * targetSE
          * activity (optional)
          * priority (optional)
          * filesToSubmit
          * operationID (optional, used as metadata for the job)

        :param pinTime: pining time in case staging is needed
        :param allLFNs: List of LFNs to stage
        :param failedLFNs: set of LFNs in filesToSubmit for which there was a problem
        :param target_spacetoken: the space token of the target

        :return: S_OK( (job object, list of ftsFileIDs in the job))
    """

    log = gLogger.getSubLogger(
        "constructStagingJob/%s/%s" %
        (self.operationID, self.targetSE), True)

    transfers = []
    fileIDsInTheJob = []

    # Set of LFNs for which we did not get an SRM URL
    failedLFNs = set()

    # getting all the target surls
    res = StorageElement(self.targetSE, vo=self.vo).getURL(allLFNs, protocol='srm')
    if not res['OK']:
      return res

    for lfn, reason in res['Value']['Failed'].iteritems():
      failedLFNs.add(lfn)
      log.error("Could not get target SURL", "%s %s" % (lfn, reason))

    allTargetSURLs = res['Value']['Successful']

    for ftsFile in self.filesToSubmit:

      if ftsFile.lfn in failedLFNs:
        log.debug("Not preparing transfer for file %s" % ftsFile.lfn)
        continue

      sourceSURL = targetSURL = allTargetSURLs[ftsFile.lfn]
      trans = fts3.new_transfer(sourceSURL,
                                targetSURL,
                                checksum='ADLER32:%s' % ftsFile.checksum,
                                filesize=ftsFile.size,
                                metadata=getattr(ftsFile, 'fileID'),
                                activity=self.activity)

      transfers.append(trans)
      fileIDsInTheJob.append(getattr(ftsFile, 'fileID'))

    # If the source is not an tape SE, we should set the
    # copy_pin_lifetime and bring_online params to None,
    # otherwise they will do an extra useless queue in FTS
    sourceIsTape = self.__isTapeSE(self.sourceSE)
    copy_pin_lifetime = pinTime if sourceIsTape else None
    bring_online = 86400 if sourceIsTape else None

    # We add a few metadata to the fts job so that we can reuse them later on without
    # querying our DB.
    # source and target SE are just used for accounting purpose
    job_metadata = {
        'operationID': self.operationID,
        'sourceSE': self.sourceSE,
        'targetSE': self.targetSE}

    job = fts3.new_job(transfers=transfers,
                       overwrite=True,
                       source_spacetoken=target_spacetoken,
                       spacetoken=target_spacetoken,
                       bring_online=bring_online,
                       copy_pin_lifetime=copy_pin_lifetime,
                       retry=3,
                       metadata=job_metadata,
                       priority=self.priority)

    return S_OK((job, fileIDsInTheJob))
Esempio n. 39
0
  def __init__( self, requestString, requestName, executionOrder, jobID, configPath ):
    """ c'tor

    :param self: self reference
    :param str requestString: XML serialised RequestContainer
    :param str requestName: request name
    :param list executionOrder: request execution order
    :param int jobID: jobID
    :param str sourceServer: request's source server
    :param str configPath: path in CS for parent agent
    """    
    ## fixtures

    ## python fixtures
    import os, os.path, sys, time, re, types
    self.makeGlobal( "os", os )
    self.makeGlobal( "os.path", os.path )
    self.makeGlobal( "sys", sys )
    self.makeGlobal( "time", time )
    self.makeGlobal( "re", re )
    ## export all Types from types
    [ self.makeGlobal( item, getattr( types, item ) ) for item in dir(types) if "Type" in item ]

    ## DIRAC fixtures
    from DIRAC.FrameworkSystem.Client.Logger import gLogger
    self.__log = gLogger.getSubLogger( "%s/%s" % ( self.__class__.__name__, str(requestName) ) )

    self.always = self.__log.always
    self.notice = self.__log.notice
    self.info = self.__log.info
    self.debug = self.__log.debug
    self.warn = self.__log.warn
    self.error = self.__log.error
    self.exception = self.__log.exception
    self.fatal = self.__log.fatal
    
    from DIRAC import S_OK, S_ERROR
    from DIRAC.ConfigurationSystem.Client.Config import gConfig
    from DIRAC.FrameworkSystem.Client.ProxyManagerClient import gProxyManager 
    from DIRAC.ConfigurationSystem.Client.Helpers.Registry import getGroupsWithVOMSAttribute
    from DIRAC.ConfigurationSystem.Client.ConfigurationData import gConfigurationData

    ## export DIRAC global tools and functions
    self.makeGlobal( "S_OK", S_OK )
    self.makeGlobal( "S_ERROR", S_ERROR )
    self.makeGlobal( "gLogger", gLogger )
    self.makeGlobal( "gConfig", gConfig )
    self.makeGlobal( "gProxyManager", gProxyManager ) 
    self.makeGlobal( "getGroupsWithVOMSAttribute", getGroupsWithVOMSAttribute )
    self.makeGlobal( "gConfigurationData", gConfigurationData )

    ## save request string
    self.requestString = requestString
    ## build request object
    from DIRAC.RequestManagementSystem.Client.RequestContainer import RequestContainer
    self.requestObj = RequestContainer( init = False )
    self.requestObj.parseRequest( request = self.requestString )
    ## save request name
    self.requestName = requestName
    ## .. and jobID
    self.jobID = jobID
    ## .. and execution order
    self.executionOrder = executionOrder

    ## save config path 
    self.__configPath = configPath
    ## set requestType
    self.setRequestType( gConfig.getValue( os.path.join( configPath, "RequestType" ), "" ) )
    ## get log level
    self.__log.setLevel( gConfig.getValue( os.path.join( configPath, self.__class__.__name__,  "LogLevel" ), "INFO" ) )
    ## clear monitoring
    self.__monitor = {}
    ## save DataManager proxy
    if "X509_USER_PROXY" in os.environ:
      self.info("saving path to current proxy file")
      self.__dataManagerProxy = os.environ["X509_USER_PROXY"]
    else:
      self.error("'X509_USER_PROXY' environment variable not set")
Esempio n. 40
0
    LockRing = None

try:
    from DIRAC.Core.Utilities.ReturnValues import S_OK, S_ERROR
except ImportError:

    def S_OK(val=""):
        """ dummy S_OK """
        return {'OK': True, 'Value': val}

    def S_ERROR(mess):
        """ dummy S_ERROR """
        return {'OK': False, 'Message': mess}


sLog = gLogger.getSubLogger(__name__)


class WorkingProcess(multiprocessing.Process):
    """
  .. class:: WorkingProcess

  WorkingProcess is a class that represents activity that runs in a separate process.

  It is running main thread (process) in daemon mode, reading tasks from :pendingQueue:, executing
  them and pushing back tasks with results to the :resultsQueue:. If task has got a timeout value
  defined a separate threading.Timer thread is started killing execution (and destroying worker)
  after :ProcessTask.__timeOut: seconds.

  Main execution could also terminate in a few different ways:
Esempio n. 41
0
    def submit(self,
               context=None,
               ftsServer=None,
               ucert=None,
               pinTime=36000,
               protocols=None):
        """ submit the job to the FTS server

        Some attributes are expected to be defined for the submission to work:
          * type (set by FTS3Operation)
          * sourceSE (only for Transfer jobs)
          * targetSE
          * activity (optional)
          * priority (optional)
          * username
          * userGroup
          * filesToSubmit
          * operationID (optional, used as metadata for the job)

        We also expect the FTSFiles have an ID defined, as it is given as transfer metadata

        :param pinTime: Time the file should be pinned on disk (used for transfers and staging)
                        Used only if he source SE is a tape storage
        :param context: fts3 context. If not given, it is created (see ftsServer & ucert param)
        :param ftsServer: the address of the fts server to submit to. Used only if context is
                          not given. if not given either, use the ftsServer object attribute

        :param ucert: path to the user certificate/proxy. Might be inferred by the fts cli (see its doc)
        :param protocols: list of protocols from which we should choose the protocol to use

        :returns S_OK([FTSFiles ids of files submitted])
    """

        log = gLogger.getSubLogger(
            "submit/%s/%s_%s" %
            (self.operationID, self.sourceSE, self.targetSE), True)

        if not context:
            if not ftsServer:
                ftsServer = self.ftsServer
            context = fts3.Context(endpoint=ftsServer,
                                   ucert=ucert,
                                   request_class=ftsSSLRequest,
                                   verify=False)

        # Construct the target SURL
        res = self.__fetchSpaceToken(self.targetSE)
        if not res['OK']:
            return res
        target_spacetoken = res['Value']

        allLFNs = [ftsFile.lfn for ftsFile in self.filesToSubmit]

        if self.type == 'Transfer':
            res = self._constructTransferJob(pinTime,
                                             allLFNs,
                                             target_spacetoken,
                                             protocols=protocols)
        elif self.type == 'Staging':
            res = self._constructStagingJob(pinTime, allLFNs,
                                            target_spacetoken)
        # elif self.type == 'Removal':
        #   res = self._constructRemovalJob(context, allLFNs, failedLFNs, target_spacetoken)

        if not res['OK']:
            return res

        job, fileIDsInTheJob = res['Value']
        setFileIdsInTheJob = set(fileIDsInTheJob)

        try:
            self.ftsGUID = fts3.submit(context, job)
            log.info("Got GUID %s" % self.ftsGUID)

            # Only increase the amount of attempt
            # if we succeeded in submitting -> no ! Why did I do that ??
            for ftsFile in self.filesToSubmit:
                ftsFile.attempt += 1
                if ftsFile.fileID in setFileIdsInTheJob:
                    ftsFile.status = 'Submitted'

            now = datetime.datetime.utcnow().replace(microsecond=0)
            self.submitTime = now
            self.lastUpdate = now
            self.lastMonitor = now

        except FTS3ClientException as e:
            log.exception("Error at submission", repr(e))
            return S_ERROR("Error at submission: %s" % e)

        return S_OK(fileIDsInTheJob)
Esempio n. 42
0
import threading

import six
from six import StringIO

from DIRAC.Core.Utilities.ReturnValues import S_OK, S_ERROR
from DIRAC.FrameworkSystem.Client.Logger import gLogger

try:
    # Python 2: "file" is built-in
    file_types = file, io.IOBase
except NameError:
    # Python 3: "file" fully replaced with IOBase
    file_types = (io.IOBase, )

gLogger = gLogger.getSubLogger("FileTransmissionHelper")


class FileHelper(object):

    __validDirections = ("toClient", "fromClient", "receive", "send")
    __directionsMapping = {"toClient": "send", "fromClient": "receive"}

    def __init__(self, oTransport=None, checkSum=True):
        self.oTransport = oTransport
        self.__checkMD5 = checkSum
        self.__oMD5 = hashlib.md5()
        self.bFinishedTransmission = False
        self.bReceivedEOF = False
        self.direction = False
        self.packetSize = 1048576
Esempio n. 43
0
    def processResults(self):
        """
    Execute tasks' callbacks removing them from results queue

    :param self: self reference
    """
        processed = 0
        log = gLogger.getSubLogger('ProcessPool')
        while True:
            if (not log.debug(
                    "Start loop (t=0) queue size = %d, processed = %d" %
                (self.__resultsQueue.qsize(), processed)) and processed == 0
                    and self.__resultsQueue.qsize()):
                log.info("Process results, queue size = %d" %
                         self.__resultsQueue.qsize())
            start = time.time()
            self.__cleanDeadProcesses()
            log.debug("__cleanDeadProcesses", 't=%.2f' % (time.time() - start))
            if not self.__pendingQueue.empty():
                self.__spawnNeededWorkingProcesses()
                log.debug("__spawnNeededWorkingProcesses",
                          't=%.2f' % (time.time() - start))
            time.sleep(0.1)
            if self.__resultsQueue.empty():
                if self.__resultsQueue.qsize():
                    log.warn(
                        "Results queue is empty but has non zero size: %d" %
                        self.__resultsQueue.qsize())
                    # We only commit suicide if we reach a backlog greater than the maximum number of workers
                    if self.__resultsQueue.qsize() > self.__maxSize:
                        return -1
                    else:
                        return 0
                if processed == 0:
                    log.verbose("Process results, but queue is empty...")
                break
            # # get task
            task = self.__resultsQueue.get()
            log.debug("__resultsQueue.get", 't=%.2f' % (time.time() - start))
            # # execute callbacks
            try:
                task.doExceptionCallback()
                task.doCallback()
                log.debug("doCallback", 't=%.2f' % (time.time() - start))
                if task.usePoolCallbacks():
                    if self.__poolExceptionCallback and task.exceptionRaised():
                        self.__poolExceptionCallback(task.getTaskID(),
                                                     task.taskException())
                    if self.__poolCallback and task.taskResults():
                        self.__poolCallback(task.getTaskID(),
                                            task.taskResults())
                        log.debug("__poolCallback",
                                  't=%.2f' % (time.time() - start))
            except Exception as error:
                log.exception("Exception in callback", lException=error)
                pass
            processed += 1
        if processed:
            log.info("Processed %d results" % processed)
        else:
            log.debug("No results processed")
        return processed
Esempio n. 44
0
class AuthManager( object ):
  """ Handle Service Authorization
  """

  __authLogger = gLogger.getSubLogger( "Authorization" )
  KW_HOSTS_GROUP = 'hosts'
  KW_DN = 'DN'
  KW_GROUP = 'group'
  KW_EXTRA_CREDENTIALS = 'extraCredentials'
  KW_PROPERTIES = 'properties'
  KW_USERNAME = '******'


  def __init__( self, authSection ):
    """
    Constructor

    :type authSection: string
    :param authSection: Section containing the authorization rules
    """
    self.authSection = authSection

  def authQuery( self, methodQuery, credDict, defaultProperties = False ):
    """
    Check if the query is authorized for a credentials dictionary

    :type  methodQuery: string
    :param methodQuery: Method to test
    :type  credDict: dictionary
    :param credDict: dictionary containing credentials for test. The dictionary can contain the DN
                        and selected group.
    :return: Boolean result of test
    """
    userString = ""
    if self.KW_DN in credDict:
      userString += "DN=%s" % credDict[ self.KW_DN ]
    if self.KW_GROUP in credDict:
      userString += " group=%s" % credDict[ self.KW_GROUP ]
    if self.KW_EXTRA_CREDENTIALS in credDict:
      userString += " extraCredentials=%s" % str( credDict[ self.KW_EXTRA_CREDENTIALS ] )
    self.__authLogger.verbose( "Trying to authenticate %s" % userString )
    # Get properties
    requiredProperties = self.getValidPropertiesForMethod( methodQuery, defaultProperties )
    # Extract valid groups
    validGroups = self.getValidGroups( requiredProperties )
    lowerCaseProperties = [ prop.lower() for prop in requiredProperties ]
    if not lowerCaseProperties:
      lowerCaseProperties = ['any']

    allowAll = "any" in lowerCaseProperties or "all" in lowerCaseProperties
    #Set no properties by default
    credDict[ self.KW_PROPERTIES ] = []
    #Check non secure backends
    if self.KW_DN not in credDict or not credDict[ self.KW_DN ]:
      if allowAll and not validGroups:
        self.__authLogger.verbose( "Accepted request from unsecure transport" )
        return True
      else:
        self.__authLogger.verbose( "Explicit property required and query seems to be coming through an unsecure transport" )
        return False
    #Check if query comes though a gateway/web server
    if self.forwardedCredentials( credDict ):
      self.__authLogger.verbose( "Query comes from a gateway" )
      self.unpackForwardedCredentials( credDict )
      return self.authQuery( methodQuery, credDict )
    #Get the properties
    #Check for invalid forwarding
    if self.KW_EXTRA_CREDENTIALS in credDict:
      #Invalid forwarding?
      if not isinstance ( credDict[ self.KW_EXTRA_CREDENTIALS ], basestring ):
        self.__authLogger.verbose( "The credentials seem to be forwarded by a host, but it is not a trusted one" )
        return False
    #Is it a host?
    if self.KW_EXTRA_CREDENTIALS in credDict and credDict[ self.KW_EXTRA_CREDENTIALS ] == self.KW_HOSTS_GROUP:
      #Get the nickname of the host
      credDict[ self.KW_GROUP ] = credDict[ self.KW_EXTRA_CREDENTIALS ]
    #HACK TO MAINTAIN COMPATIBILITY
    else:
      if self.KW_EXTRA_CREDENTIALS in credDict and self.KW_GROUP not in credDict:
        credDict[ self.KW_GROUP ] = credDict[ self.KW_EXTRA_CREDENTIALS ]
    #END OF HACK
    #Get the username
    if self.KW_DN in credDict and credDict[ self.KW_DN ]:
      if self.KW_GROUP not in credDict:
        result = CS.findDefaultGroupForDN( credDict[ self.KW_DN ] )
        if not result['OK']:
          return False
        credDict[ self.KW_GROUP ] = result['Value']
      if credDict[ self.KW_GROUP ] == self.KW_HOSTS_GROUP:
      #For host
        if not self.getHostNickName( credDict ):
          self.__authLogger.warn( "Host is invalid" )
          if not allowAll:
            return False
          #If all, then set anon credentials
          credDict[ self.KW_USERNAME ] = "anonymous"
          credDict[ self.KW_GROUP ] = "visitor"
      else:
      #For users
        if not self.getUsername( credDict ):
          self.__authLogger.warn( "User is invalid or does not belong to the group it's saying" )
          if not allowAll:
            return False
          #If all, then set anon credentials
          credDict[ self.KW_USERNAME ] = "anonymous"
          credDict[ self.KW_GROUP ] = "visitor"
    #If any or all in the props, allow
    allowGroup = not validGroups or credDict[ self.KW_GROUP ] in validGroups
    if allowAll and allowGroup:
      return True
    #Check authorized groups
    if "authenticated" in lowerCaseProperties and allowGroup:
      return True
    if not self.matchProperties( credDict, requiredProperties ):
      self.__authLogger.warn( "Client is not authorized\nValid properties: %s\nClient: %s" %
                               ( requiredProperties, credDict ) )
      return False
    elif not allowGroup:
      self.__authLogger.warn( "Client is not authorized\nValid groups: %s\nClient: %s" %
                               ( validGroups, credDict ) )
      return False
    return True

  def getHostNickName( self, credDict ):
    """
    Discover the host nickname associated to the DN.
    The nickname will be included in the credentials dictionary.

    :type  credDict: dictionary
    :param credDict: Credentials to ckeck
    :return: Boolean specifying whether the nickname was found
    """
    if self.KW_DN not in credDict:
      return True
    if self.KW_GROUP not in credDict:
      return False
    retVal = CS.getHostnameForDN( credDict[ self.KW_DN ] )
    if not retVal[ 'OK' ]:
      gLogger.warn( "Cannot find hostname for DN %s: %s" % ( credDict[ self.KW_DN ], retVal[ 'Message' ] ) )
      return False
    credDict[ self.KW_USERNAME ] = retVal[ 'Value' ]
    credDict[ self.KW_PROPERTIES ] = CS.getPropertiesForHost( credDict[ self.KW_USERNAME ], [] )
    return True

  def getValidPropertiesForMethod( self, method, defaultProperties = False ):
    """
    Get all authorized groups for calling a method

    :type  method: string
    :param method: Method to test
    :return: List containing the allowed groups
    """
    authProps = gConfig.getValue( "%s/%s" % ( self.authSection, method ), [] )
    if authProps:
      return authProps
    if defaultProperties:
      self.__authLogger.verbose( "Using hardcoded properties for method %s : %s" % ( method, defaultProperties ) )
      if type( defaultProperties ) not in ( types.ListType, types.TupleType ):
        return List.fromChar( defaultProperties )
      return defaultProperties
    defaultPath = "%s/Default" % "/".join( method.split( "/" )[:-1] )
    authProps = gConfig.getValue( "%s/%s" % ( self.authSection, defaultPath ), [] )
    if authProps:
      self.__authLogger.verbose( "Method %s has no properties defined using %s" % ( method, defaultPath ) )
      return authProps
    self.__authLogger.verbose( "Method %s has no authorization rules defined. Allowing no properties" % method )
    return []

  def getValidGroups( self, rawProperties ):
    """  Get valid groups as specified in the method authorization rules

    :param list rawProperties: all method properties
    :return: list of allowed groups or []
    """
    validGroups = []
    for prop in list( rawProperties ):
      if prop.startswith( 'group:' ):
        rawProperties.remove( prop )
        prop = prop.replace( 'group:', '' )
        validGroups.append( prop )
      elif prop.startswith( 'vo:' ):
        rawProperties.remove( prop )
        vo = prop.replace( 'vo:', '' )
        result = getGroupsForVO( vo )
        if result['OK']:
          validGroups.extend( result['Value'] )

    validGroups = list( set( validGroups ) )
    return validGroups

  def forwardedCredentials( self, credDict ):
    """
    Check whether the credentials are being forwarded by a valid source

    :type  credDict: dictionary
    :param credDict: Credentials to ckeck
    :return: Boolean with the result
    """
    if self.KW_EXTRA_CREDENTIALS in credDict and type( credDict[ self.KW_EXTRA_CREDENTIALS ] ) == types.TupleType:
      if self.KW_DN in credDict:
        retVal = CS.getHostnameForDN( credDict[ self.KW_DN ] )
        if retVal[ 'OK' ]:
          hostname = retVal[ 'Value' ]
          if Properties.TRUSTED_HOST in CS.getPropertiesForHost( hostname, [] ):
            return True
    return False

  def unpackForwardedCredentials( self, credDict ):
    """
    Extract the forwarded credentials

    :type  credDict: dictionary
    :param credDict: Credentials to unpack
    """
    credDict[ self.KW_DN ] = credDict[ self.KW_EXTRA_CREDENTIALS ][0]
    credDict[ self.KW_GROUP ] = credDict[ self.KW_EXTRA_CREDENTIALS ][1]
    del( credDict[ self.KW_EXTRA_CREDENTIALS ] )


  def getUsername( self, credDict ):
    """
    Discover the username associated to the DN. It will check if the selected group is valid.
    The username will be included in the credentials dictionary.

    :type  credDict: dictionary
    :param credDict: Credentials to ckeck
    :return: Boolean specifying whether the username was found
    """
    if self.KW_DN not in credDict:
      return True
    if self.KW_GROUP not in credDict:
      result = CS.findDefaultGroupForDN( credDict[ self.KW_DN ] )
      if not result['OK']:
        return False
      credDict[ self.KW_GROUP ] = result['Value']
    credDict[ self.KW_PROPERTIES ] = CS.getPropertiesForGroup( credDict[ self.KW_GROUP ], [] )
    usersInGroup = CS.getUsersInGroup( credDict[ self.KW_GROUP ], [] )
    if not usersInGroup:
      return False
    retVal = CS.getUsernameForDN( credDict[ self.KW_DN ], usersInGroup )
    if retVal[ 'OK' ]:
      credDict[ self.KW_USERNAME ] = retVal[ 'Value' ]
      return True
    return False

  def matchProperties( self, credDict, validProps, caseSensitive = False ):
    """
    Return True if one or more properties are in the valid list of properties
    :type  props: list
    :param props: List of properties to match
    :type  validProps: list
    :param validProps: List of valid properties
    :return: Boolean specifying whether any property has matched the valid ones
    """

    #HACK: Map lower case properties to properties to make the check in lowercase but return the proper case
    if not caseSensitive:
      validProps = dict( ( prop.lower(), prop ) for prop in validProps )
    else:
      validProps = dict( ( prop, prop ) for prop in validProps )
    groupProperties = credDict[ self.KW_PROPERTIES ]
    foundProps = []
    for prop in groupProperties:
      if not caseSensitive:
        prop = prop.lower()
      if prop in validProps:
        foundProps.append( validProps[ prop ] )
    credDict[ self.KW_PROPERTIES ] = foundProps
    return foundProps
Esempio n. 45
0
    def getFTS3Context(self, username, group, ftsServer, threadID):
        """ Returns an fts3 context for a given user, group and fts server

        The context pool is per thread, and there is one context
        per tuple (user, group, server).
        We dump the proxy of a user to a file (shared by all the threads),
        and use it to make the context.
        The proxy needs a lifetime of self.proxyLifetime, is cached for cacheTime = (2*lifeTime/3) - 10mn,
        and the lifetime of the context is 45mn
        The reason for cacheTime to be what it is is because the FTS3 server will ask for a new proxy
        after 2/3rd of the existing proxy has expired, so we renew it just before

        :param str username: name of the user
        :param str group: group of the user
        :param str ftsServer: address of the server
        :param str threadID: thread ID

        :returns: S_OK with the context object

    """

        log = gLogger.getSubLogger("getFTS3Context", child=True)

        contextes = self._globalContextCache.setdefault(threadID, DictCache())

        idTuple = (username, group, ftsServer)
        log.debug("Getting context for %s" % (idTuple, ))

        # We keep a context in the cache for 45 minutes
        # (so it needs to be valid at least 15 since we add it for one hour)
        if not contextes.exists(idTuple, 15 * 60):
            res = getDNForUsername(username)
            if not res['OK']:
                return res
            # We take the first DN returned
            userDN = res['Value'][0]

            log.debug("UserDN %s" % userDN)

            # We dump the proxy to a file.
            # It has to have a lifetime of self.proxyLifetime
            # Because the FTS3 servers cache it for 2/3rd of the lifetime
            # we should make our cache a bit less than 2/3rd of the lifetime
            cacheTime = int(2 * self.proxyLifetime / 3) - 600
            res = gProxyManager.downloadVOMSProxyToFile(
                userDN,
                group,
                requiredTimeLeft=self.proxyLifetime,
                cacheTime=cacheTime)
            if not res['OK']:
                return res

            proxyFile = res['Value']
            log.debug("Proxy file %s" % proxyFile)

            # We generate the context
            # In practice, the lifetime will be less than proxyLifetime
            # because we reuse a cached proxy. However, the cached proxy will
            # never forced a redelegation, because it is recent enough for FTS3 servers.
            # The delegation is forced when 2/3 rd of the lifetime are left, and we get a fresh
            # one just before. So no problem
            res = FTS3Job.generateContext(ftsServer,
                                          proxyFile,
                                          lifetime=self.proxyLifetime)

            if not res['OK']:
                return res
            context = res['Value']

            # we add it to the cache for this thread for 1h
            contextes.add(idTuple, 3600, context)

        return S_OK(contextes.get(idTuple))
Esempio n. 46
0
  def __init__( self, requestString, requestName, executionOrder, jobID, configPath ):
    """ c'tor

    :param self: self reference
    :param str requestString: XML serialised RequestContainer
    :param str requestName: request name
    :param list executionOrder: request execution order
    :param int jobID: jobID
    :param str sourceServer: request's source server
    :param str configPath: path in CS for parent agent
    """    
    ## fixtures

    ## python fixtures
    import os, os.path, sys, time, re, types
    self.makeGlobal( "os", os )
    self.makeGlobal( "os.path", os.path )
    self.makeGlobal( "sys", sys )
    self.makeGlobal( "time", time )
    self.makeGlobal( "re", re )
    ## export all Types from types
    [ self.makeGlobal( item, getattr( types, item ) ) for item in dir(types) if "Type" in item ]

    ## DIRAC fixtures
    from DIRAC.FrameworkSystem.Client.Logger import gLogger
    self.__log = gLogger.getSubLogger( "%s/%s" % ( self.__class__.__name__, str(requestName) ) )

    self.always = self.__log.always
    self.notice = self.__log.notice
    self.info = self.__log.info
    self.debug = self.__log.debug
    self.warn = self.__log.warn
    self.error = self.__log.error
    self.exception = self.__log.exception
    self.fatal = self.__log.fatal
    
    from DIRAC import S_OK, S_ERROR
    from DIRAC.ConfigurationSystem.Client.Config import gConfig
    from DIRAC.FrameworkSystem.Client.ProxyManagerClient import gProxyManager 
    from DIRAC.ConfigurationSystem.Client.Helpers.Registry import getGroupsWithVOMSAttribute
    from DIRAC.ConfigurationSystem.Client.ConfigurationData import gConfigurationData

    ## export DIRAC global tools and functions
    self.makeGlobal( "S_OK", S_OK )
    self.makeGlobal( "S_ERROR", S_ERROR )
    self.makeGlobal( "gLogger", gLogger )
    self.makeGlobal( "gConfig", gConfig )
    self.makeGlobal( "gProxyManager", gProxyManager ) 
    self.makeGlobal( "getGroupsWithVOMSAttribute", getGroupsWithVOMSAttribute )
    self.makeGlobal( "gConfigurationData", gConfigurationData )

    ## save request string
    self.requestString = requestString
    ## build request object
    from DIRAC.RequestManagementSystem.Client.RequestContainer import RequestContainer
    self.requestObj = RequestContainer( init = False )
    self.requestObj.parseRequest( request = self.requestString )
    ## save request name
    self.requestName = requestName
    ## .. and jobID
    self.jobID = jobID
    ## .. and execution order
    self.executionOrder = executionOrder

    ## save config path 
    self.__configPath = configPath
    ## set requestType
    self.setRequestType( gConfig.getValue( os.path.join( configPath, "RequestType" ), "" ) )
    ## get log level
    self.__log.setLevel( gConfig.getValue( os.path.join( configPath, self.__class__.__name__,  "LogLevel" ), "INFO" ) )
    ## clear monitoring
    self.__monitor = {}
    ## save DataManager proxy
    if "X509_USER_PROXY" in os.environ:
      self.info("saving path to current proxy file")
      self.__dataManagerProxy = os.environ["X509_USER_PROXY"]
    else:
      self.error("'X509_USER_PROXY' environment variable not set")
Esempio n. 47
0
except ImportError:
  LockRing = None

try:
  from DIRAC.Core.Utilities.ReturnValues import S_OK, S_ERROR
except ImportError:
  def S_OK(val=""):
    """ dummy S_OK """
    return {'OK': True, 'Value': val}

  def S_ERROR(mess):
    """ dummy S_ERROR """
    return {'OK': False, 'Message': mess}


LOG = gLogger.getSubLogger(__name__)

class WorkingProcess(multiprocessing.Process):
  """
  .. class:: WorkingProcess

  WorkingProcess is a class that represents activity that runs in a separate process.

  It is running main thread (process) in daemon mode, reading tasks from :pendingQueue:, executing
  them and pushing back tasks with results to the :resultsQueue:. If task has got a timeout value
  defined a separate threading.Timer thread is started killing execution (and destroying worker)
  after :ProcessTask.__timeOut: seconds.

  Main execution could also terminate in a few different ways:

    * on every failed read attempt (from empty  :pendingQueue:), the  idle loop counter is increased,
Esempio n. 48
0
  def _constructTransferJob(self, pinTime, allLFNs, target_spacetoken, protocols=None):
    """ Build a job for transfer

        Some attributes of the job are expected to be set
          * sourceSE
          * targetSE
          * activity (optional)
          * priority (optional)
          * filesToSubmit
          * operationID (optional, used as metadata for the job)


        :param pinTime: pining time in case staging is needed
        :param allLFNs: list of LFNs to transfer
        :param failedLFNs: set of LFNs in filesToSubmit for which there was a problem
        :param target_spacetoken: the space token of the target
        :param protocols: list of protocols to restrict the protocol choice for the transfer

        :return: S_OK( (job object, list of ftsFileIDs in the job))
    """

    log = gLogger.getSubLogger(
        "constructTransferJob/%s/%s_%s" %
        (self.operationID, self.sourceSE, self.targetSE), True)

    res = self.__fetchSpaceToken(self.sourceSE)
    if not res['OK']:
      return res
    source_spacetoken = res['Value']

    failedLFNs = set()
    dstSE = StorageElement(self.targetSE, vo=self.vo)
    srcSE = StorageElement(self.sourceSE, vo=self.vo)

    # getting all the (source, dest) surls
    res = dstSE.generateTransferURLsBetweenSEs(allLFNs, srcSE, protocols=protocols)

    if not res['OK']:
      return res

    for lfn, reason in res['Value']['Failed'].iteritems():
      failedLFNs.add(lfn)
      log.error("Could not get source SURL", "%s %s" % (lfn, reason))

    allSrcDstSURLs = res['Value']['Successful']

    transfers = []

    fileIDsInTheJob = []

    for ftsFile in self.filesToSubmit:

      if ftsFile.lfn in failedLFNs:
        log.debug("Not preparing transfer for file %s" % ftsFile.lfn)
        continue

      sourceSURL, targetSURL = allSrcDstSURLs[ftsFile.lfn]

      if sourceSURL == targetSURL:
        log.error("sourceSURL equals to targetSURL", "%s" % ftsFile.lfn)
        ftsFile.error = "sourceSURL equals to targetSURL"
        ftsFile.status = 'Defunct'
        continue

      trans = fts3.new_transfer(sourceSURL,
                                targetSURL,
                                checksum='ADLER32:%s' % ftsFile.checksum,
                                filesize=ftsFile.size,
                                metadata=getattr(ftsFile, 'fileID'),
                                activity=self.activity)

      transfers.append(trans)
      fileIDsInTheJob.append(getattr(ftsFile, 'fileID'))

    # If the source is not an tape SE, we should set the
    # copy_pin_lifetime and bring_online params to None,
    # otherwise they will do an extra useless queue in FTS
    sourceIsTape = self.__isTapeSE(self.sourceSE)
    copy_pin_lifetime = pinTime if sourceIsTape else None
    bring_online = BRING_ONLINE_TIMEOUT if sourceIsTape else None

    if not transfers:
      log.error("No transfer possible!")
      return S_ERROR("No transfer possible")

    # We add a few metadata to the fts job so that we can reuse them later on without
    # querying our DB.
    # source and target SE are just used for accounting purpose
    job_metadata = {
        'operationID': self.operationID,
        'sourceSE': self.sourceSE,
        'targetSE': self.targetSE}

    job = fts3.new_job(transfers=transfers,
                       overwrite=True,
                       source_spacetoken=source_spacetoken,
                       spacetoken=target_spacetoken,
                       bring_online=bring_online,
                       copy_pin_lifetime=copy_pin_lifetime,
                       retry=3,
                       metadata=job_metadata,
                       priority=self.priority)

    return S_OK((job, fileIDsInTheJob))
Esempio n. 49
0
    def _constructTransferJob(self,
                              pinTime,
                              allLFNs,
                              target_spacetoken,
                              protocols=None):
        """ Build a job for transfer

        Some attributes of the job are expected to be set
          * sourceSE
          * targetSE
          * activity (optional)
          * priority (optional)
          * filesToSubmit
          * operationID (optional, used as metadata for the job)


        :param pinTime: pining time in case staging is needed
        :param allLFNs: list of LFNs to transfer
        :param failedLFNs: set of LFNs in filesToSubmit for which there was a problem
        :param target_spacetoken: the space token of the target
        :param protocols: list of protocols to restrict the protocol choice for the transfer

        :return: S_OK( (job object, list of ftsFileIDs in the job))
    """

        log = gLogger.getSubLogger(
            "constructTransferJob/%s/%s_%s" %
            (self.operationID, self.sourceSE, self.targetSE), True)

        res = self.__fetchSpaceToken(self.sourceSE, self.vo)
        if not res['OK']:
            return res
        source_spacetoken = res['Value']

        failedLFNs = set()
        dstSE = StorageElement(self.targetSE, vo=self.vo)
        srcSE = StorageElement(self.sourceSE, vo=self.vo)

        # If the source is not a tape SE, we should set the
        # copy_pin_lifetime and bring_online params to None,
        # otherwise they will do an extra useless queue in FTS
        sourceIsTape = self.__isTapeSE(self.sourceSE, self.vo)
        copy_pin_lifetime = pinTime if sourceIsTape else None
        bring_online = BRING_ONLINE_TIMEOUT if sourceIsTape else None

        # getting all the (source, dest) surls
        res = dstSE.generateTransferURLsBetweenSEs(allLFNs,
                                                   srcSE,
                                                   protocols=protocols)

        if not res['OK']:
            return res

        for lfn, reason in res['Value']['Failed'].items():
            failedLFNs.add(lfn)
            log.error("Could not get source SURL", "%s %s" % (lfn, reason))

        allSrcDstSURLs = res['Value']['Successful']

        # This contains the staging URLs if they are different from the transfer URLs
        # (CTA...)
        allStageURLs = dict()

        # In case we are transfering from a tape system, and the stage protocol
        # is not the same as the transfer protocol, we generate the staging URLs
        # to do a multihop transfer. See below.
        if sourceIsTape:
            srcProto, _destProto = res['Value']['Protocols']
            if srcProto not in srcSE.localStageProtocolList:

                # As of version 3.10, FTS can only handle one file per multi hop
                # job. If we are here, that means that we need one, so make sure that
                # we only have a single file to transfer (this should have been checked
                # at the job construction step in FTS3Operation).
                # This test is important, because multiple files would result in the source
                # being deleted !
                if len(allLFNs) != 1:
                    log.debug(
                        "Multihop job has %s files while only 1 allowed" %
                        len(allLFNs))
                    return S_ERROR(
                        errno.E2BIG,
                        "Trying multihop job with more than one file !")

                res = srcSE.getURL(allSrcDstSURLs,
                                   protocol=srcSE.localStageProtocolList)

                if not res['OK']:
                    return res

                for lfn, reason in res['Value']['Failed'].items():
                    failedLFNs.add(lfn)
                    log.error("Could not get stage SURL",
                              "%s %s" % (lfn, reason))
                    allSrcDstSURLs.pop(lfn)

                allStageURLs = res['Value']['Successful']

        transfers = []

        fileIDsInTheJob = []

        for ftsFile in self.filesToSubmit:

            if ftsFile.lfn in failedLFNs:
                log.debug("Not preparing transfer for file %s" % ftsFile.lfn)
                continue

            sourceSURL, targetSURL = allSrcDstSURLs[ftsFile.lfn]
            stageURL = allStageURLs.get(ftsFile.lfn)

            if sourceSURL == targetSURL:
                log.error("sourceSURL equals to targetSURL",
                          "%s" % ftsFile.lfn)
                ftsFile.error = "sourceSURL equals to targetSURL"
                ftsFile.status = 'Defunct'
                continue

            ftsFileID = getattr(ftsFile, 'fileID')

            # Under normal circumstances, we simply submit an fts transfer as such:
            # * srcProto://myFile -> destProto://myFile
            #
            # Even in case of the source storage being a tape system, it works fine.
            # However, if the staging and transfer protocols are different (which might be the case for CTA),
            #  we use the multihop machinery to submit two sequential fts transfers:
            # one to stage, one to transfer.
            # It looks like such
            # * stageProto://myFile -> stageProto://myFile
            # * srcProto://myFile -> destProto://myFile

            if stageURL:

                # We do not set a fileID in the metadata
                # such that we do not update the DB when monitoring
                stageTrans_metadata = {'desc': 'PreStage %s' % ftsFileID}
                stageTrans = fts3.new_transfer(stageURL,
                                               stageURL,
                                               checksum='ADLER32:%s' %
                                               ftsFile.checksum,
                                               filesize=ftsFile.size,
                                               metadata=stageTrans_metadata,
                                               activity=self.activity)
                transfers.append(stageTrans)

            trans_metadata = {
                'desc': 'Transfer %s' % ftsFileID,
                'fileID': ftsFileID
            }
            trans = fts3.new_transfer(sourceSURL,
                                      targetSURL,
                                      checksum='ADLER32:%s' % ftsFile.checksum,
                                      filesize=ftsFile.size,
                                      metadata=trans_metadata,
                                      activity=self.activity)

            transfers.append(trans)
            fileIDsInTheJob.append(ftsFileID)

        if not transfers:
            log.error("No transfer possible!")
            return S_ERROR("No transfer possible")

        # We add a few metadata to the fts job so that we can reuse them later on without
        # querying our DB.
        # source and target SE are just used for accounting purpose
        job_metadata = {
            'operationID': self.operationID,
            'rmsReqID': self.rmsReqID,
            'sourceSE': self.sourceSE,
            'targetSE': self.targetSE
        }

        job = fts3.new_job(
            transfers=transfers,
            overwrite=True,
            source_spacetoken=source_spacetoken,
            spacetoken=target_spacetoken,
            bring_online=bring_online,
            copy_pin_lifetime=copy_pin_lifetime,
            retry=3,
            verify_checksum=
            'target',  # Only check target vs specified, since we verify the source earlier
            multihop=bool(
                allStageURLs),  # if we have stage urls, then we need multihop
            metadata=job_metadata,
            priority=self.priority)

        return S_OK((job, fileIDsInTheJob))
Esempio n. 50
0
    def _constructTransferJob(self, pinTime, allLFNs, target_spacetoken, protocols=None):
        """Build a job for transfer

        Some attributes of the job are expected to be set
          * sourceSE
          * targetSE
          * multiHopSE (optional)
          * activity (optional)
          * priority (optional)
          * filesToSubmit
          * operationID (optional, used as metadata for the job)

        Note that, because of FTS limitations (and also because it anyway would be "not very smart"),
        multiHop can only use non-SRM disk storage as hops.


        :param pinTime: pining time in case staging is needed
        :param allLFNs: list of LFNs to transfer
        :param failedLFNs: set of LFNs in filesToSubmit for which there was a problem
        :param target_spacetoken: the space token of the target
        :param protocols: list of protocols to restrict the protocol choice for the transfer

        :return: S_OK( (job object, list of ftsFileIDs in the job))
        """

        log = gLogger.getSubLogger(f"constructTransferJob/{self.operationID}/{self.sourceSE}_{self.targetSE}")

        isMultiHop = False

        # Check if it is a multiHop transfer
        if self.multiHopSE:
            if len(allLFNs) != 1:
                log.debug("Multihop job has %s files while only 1 allowed" % len(allLFNs))
                return S_ERROR(errno.E2BIG, "Trying multihop job with more than one file !")
            allHops = [(self.sourceSE, self.multiHopSE), (self.multiHopSE, self.targetSE)]
            isMultiHop = True
        else:
            allHops = [(self.sourceSE, self.targetSE)]

        nbOfHops = len(allHops)

        res = self.__fetchSpaceToken(self.sourceSE, self.vo)
        if not res["OK"]:
            return res
        source_spacetoken = res["Value"]

        failedLFNs = set()

        copy_pin_lifetime = None
        bring_online = None
        archive_timeout = None

        transfers = []

        fileIDsInTheJob = set()

        for hopId, (hopSrcSEName, hopDstSEName) in enumerate(allHops, start=1):

            # Again, this is relevant only for the very initial source
            # but code factorization is more important
            hopSrcIsTape = self.__isTapeSE(hopSrcSEName, self.vo)

            dstSE = StorageElement(hopDstSEName, vo=self.vo)
            srcSE = StorageElement(hopSrcSEName, vo=self.vo)

            # getting all the (source, dest) surls
            res = dstSE.generateTransferURLsBetweenSEs(allLFNs, srcSE, protocols=protocols)
            if not res["OK"]:
                return res

            for lfn, reason in res["Value"]["Failed"].items():
                failedLFNs.add(lfn)
                log.error("Could not get source SURL", "%s %s" % (lfn, reason))

            allSrcDstSURLs = res["Value"]["Successful"]
            srcProto, destProto = res["Value"]["Protocols"]

            # If the source is a tape SE, we should set the
            # copy_pin_lifetime and bring_online params
            # In case of multihop, this is relevant only for the
            # original source, but again, code factorization is more important
            if hopSrcIsTape:
                copy_pin_lifetime = pinTime
                bring_online = srcSE.options.get("BringOnlineTimeout", BRING_ONLINE_TIMEOUT)

            # If the destination is a tape, and the protocol supports it,
            # check if we want to have an archive timeout
            # In case of multihop, this is relevant only for the
            # final target, but again, code factorization is more important
            dstIsTape = self.__isTapeSE(hopDstSEName, self.vo)
            if dstIsTape and destProto in dstSE.localStageProtocolList:
                archive_timeout = dstSE.options.get("ArchiveTimeout")

            # This contains the staging URLs if they are different from the transfer URLs
            # (CTA...)
            allStageURLs = dict()

            # In case we are transfering from a tape system, and the stage protocol
            # is not the same as the transfer protocol, we generate the staging URLs
            # to do a multihop transfer. See below.
            if hopSrcIsTape and srcProto not in srcSE.localStageProtocolList:
                isMultiHop = True
                # As of version 3.10, FTS can only handle one file per multi hop
                # job. If we are here, that means that we need one, so make sure that
                # we only have a single file to transfer (this should have been checked
                # at the job construction step in FTS3Operation).
                # This test is important, because multiple files would result in the source
                # being deleted !
                if len(allLFNs) != 1:
                    log.debug("Multihop job has %s files while only 1 allowed" % len(allLFNs))
                    return S_ERROR(errno.E2BIG, "Trying multihop job with more than one file !")

                res = srcSE.getURL(allSrcDstSURLs, protocol=srcSE.localStageProtocolList)

                if not res["OK"]:
                    return res

                for lfn, reason in res["Value"]["Failed"].items():
                    failedLFNs.add(lfn)
                    log.error("Could not get stage SURL", "%s %s" % (lfn, reason))
                    allSrcDstSURLs.pop(lfn)

                allStageURLs = res["Value"]["Successful"]

            for ftsFile in self.filesToSubmit:

                if ftsFile.lfn in failedLFNs:
                    log.debug("Not preparing transfer for file %s" % ftsFile.lfn)
                    continue

                sourceSURL, targetSURL = allSrcDstSURLs[ftsFile.lfn]
                stageURL = allStageURLs.get(ftsFile.lfn)

                if sourceSURL == targetSURL:
                    log.error("sourceSURL equals to targetSURL", "%s" % ftsFile.lfn)
                    ftsFile.error = "sourceSURL equals to targetSURL"
                    ftsFile.status = "Defunct"
                    continue

                ftsFileID = getattr(ftsFile, "fileID")

                # Under normal circumstances, we simply submit an fts transfer as such:
                # * srcProto://myFile -> destProto://myFile
                #
                # Even in case of the source storage being a tape system, it works fine.
                # However, if the staging and transfer protocols are different (which might be the case for CTA),
                #  we use the multihop machinery to submit two sequential fts transfers:
                # one to stage, one to transfer.
                # It looks like such
                # * stageProto://myFile -> stageProto://myFile
                # * srcProto://myFile -> destProto://myFile

                if stageURL:

                    # We do not set a fileID in the metadata
                    # such that we do not update the DB when monitoring
                    stageTrans_metadata = {"desc": "PreStage %s" % ftsFileID}

                    # If we use an activity, also set it as file metadata
                    # for WLCG monitoring purposes
                    # https://its.cern.ch/jira/projects/DOMATPC/issues/DOMATPC-14?
                    if self.activity:
                        stageTrans_metadata["activity"] = self.activity

                    stageTrans = fts3.new_transfer(
                        stageURL,
                        stageURL,
                        checksum="ADLER32:%s" % ftsFile.checksum,
                        filesize=ftsFile.size,
                        metadata=stageTrans_metadata,
                        activity=self.activity,
                    )
                    transfers.append(stageTrans)

                # If it is the last hop only, we set the fileID metadata
                # for monitoring
                if hopId == nbOfHops:
                    trans_metadata = {"desc": "Transfer %s" % ftsFileID, "fileID": ftsFileID}
                else:
                    trans_metadata = {"desc": "MultiHop %s" % ftsFileID}

                # If we use an activity, also set it as file metadata
                # for WLCG monitoring purposes
                # https://its.cern.ch/jira/projects/DOMATPC/issues/DOMATPC-14?
                if self.activity:
                    trans_metadata["activity"] = self.activity

                # because of an xroot bug (https://github.com/xrootd/xrootd/issues/1433)
                # the checksum needs to be lowercase. It does not impact the other
                # protocol, so it's fine to put it here.
                # I only add it in this transfer and not the "staging" one above because it
                # impacts only root -> root transfers
                trans = fts3.new_transfer(
                    sourceSURL,
                    targetSURL,
                    checksum="ADLER32:%s" % ftsFile.checksum.lower(),
                    filesize=ftsFile.size,
                    metadata=trans_metadata,
                    activity=self.activity,
                )

                transfers.append(trans)
                fileIDsInTheJob.add(ftsFileID)

        if not transfers:
            log.error("No transfer possible!")
            return S_ERROR(errno.ENODATA, "No transfer possible")

        # We add a few metadata to the fts job so that we can reuse them later on without
        # querying our DB.
        # source and target SE are just used for accounting purpose
        job_metadata = {
            "operationID": self.operationID,
            "rmsReqID": self.rmsReqID,
            "sourceSE": self.sourceSE,
            "targetSE": self.targetSE,
        }

        if self.activity:
            job_metadata["activity"] = self.activity

        job = fts3.new_job(
            transfers=transfers,
            overwrite=True,
            source_spacetoken=source_spacetoken,
            spacetoken=target_spacetoken,
            bring_online=bring_online,
            copy_pin_lifetime=copy_pin_lifetime,
            retry=3,
            verify_checksum="target",  # Only check target vs specified, since we verify the source earlier
            multihop=isMultiHop,
            metadata=job_metadata,
            priority=self.priority,
            archive_timeout=archive_timeout,
        )

        return S_OK((job, fileIDsInTheJob))
Esempio n. 51
0
    def _constructStagingJob(self, pinTime, allLFNs, target_spacetoken):
        """ Build a job for staging

        Some attributes of the job are expected to be set
          * targetSE
          * activity (optional)
          * priority (optional)
          * filesToSubmit
          * operationID (optional, used as metadata for the job)

        :param pinTime: pining time in case staging is needed
        :param allLFNs: List of LFNs to stage
        :param failedLFNs: set of LFNs in filesToSubmit for which there was a problem
        :param target_spacetoken: the space token of the target

        :return: S_OK( (job object, list of ftsFileIDs in the job))
    """

        log = gLogger.getSubLogger(
            "constructStagingJob/%s/%s" % (self.operationID, self.targetSE),
            True)

        transfers = []
        fileIDsInTheJob = []

        # Set of LFNs for which we did not get an SRM URL
        failedLFNs = set()

        # getting all the target surls
        res = StorageElement(self.targetSE, vo=self.vo).getURL(allLFNs,
                                                               protocol='srm')
        if not res['OK']:
            return res

        for lfn, reason in res['Value']['Failed'].iteritems():
            failedLFNs.add(lfn)
            log.error("Could not get target SURL", "%s %s" % (lfn, reason))

        allTargetSURLs = res['Value']['Successful']

        for ftsFile in self.filesToSubmit:

            if ftsFile.lfn in failedLFNs:
                log.debug("Not preparing transfer for file %s" % ftsFile.lfn)
                continue

            sourceSURL = targetSURL = allTargetSURLs[ftsFile.lfn]
            trans = fts3.new_transfer(sourceSURL,
                                      targetSURL,
                                      checksum='ADLER32:%s' % ftsFile.checksum,
                                      filesize=ftsFile.size,
                                      metadata=getattr(ftsFile, 'fileID'),
                                      activity=self.activity)

            transfers.append(trans)
            fileIDsInTheJob.append(getattr(ftsFile, 'fileID'))

        # If the source is not an tape SE, we should set the
        # copy_pin_lifetime and bring_online params to None,
        # otherwise they will do an extra useless queue in FTS
        sourceIsTape = self.__isTapeSE(self.sourceSE)
        copy_pin_lifetime = pinTime if sourceIsTape else None
        bring_online = 86400 if sourceIsTape else None

        # We add a few metadata to the fts job so that we can reuse them later on without
        # querying our DB.
        # source and target SE are just used for accounting purpose
        job_metadata = {
            'operationID': self.operationID,
            'sourceSE': self.sourceSE,
            'targetSE': self.targetSE
        }

        job = fts3.new_job(transfers=transfers,
                           overwrite=True,
                           source_spacetoken=target_spacetoken,
                           spacetoken=target_spacetoken,
                           bring_online=bring_online,
                           copy_pin_lifetime=copy_pin_lifetime,
                           retry=3,
                           metadata=job_metadata,
                           priority=self.priority)

        return S_OK((job, fileIDsInTheJob))
Esempio n. 52
0
  def submit(self, context=None, ftsServer=None, ucert=None, pinTime=36000, ):
    """ submit the job to the FTS server

        Some attributes are expected to be defined for the submission to work:
          * type (set by FTS3Operation)
          * sourceSE (only for Transfer jobs)
          * targetSE
          * activity (optional)
          * priority (optional)
          * username
          * userGroup
          * filesToSubmit
          * operationID (optional, used as metadata for the job)

        We also expect the FTSFiles have an ID defined, as it is given as transfer metadata

        :param pinTime: Time the file should be pinned on disk (used for transfers and staging)
                        Used only if he source SE is a tape storage
        :param context: fts3 context. If not given, it is created (see ftsServer & ucert param)
        :param ftsServer: the address of the fts server to submit to. Used only if context is
                          not given. if not given either, use the ftsServer object attribute

        :param ucert: path to the user certificate/proxy. Might be inferred by the fts cli (see its doc)

        :returns S_OK([FTSFiles ids of files submitted])
    """

    log = gLogger.getSubLogger("submit/%s/%s_%s" %
                               (self.operationID, self.sourceSE, self.targetSE), True)

    if not context:
      if not ftsServer:
        ftsServer = self.ftsServer
      context = fts3.Context(
          endpoint=ftsServer,
          ucert=ucert,
          request_class=ftsSSLRequest,
          verify=False)

    # Construct the target SURL
    res = self.__fetchSpaceToken(self.targetSE)
    if not res['OK']:
      return res
    target_spacetoken = res['Value']

    allLFNs = [ftsFile.lfn for ftsFile in self.filesToSubmit]

    failedLFNs = set()

    # getting all the target surls
    res = StorageElement(self.targetSE, vo=self.vo).getURL(allLFNs, protocol='srm')
    if not res['OK']:
      return res

    for lfn, reason in res['Value']['Failed'].iteritems():
      failedLFNs.add(lfn)
      log.error("Could not get target SURL", "%s %s" % (lfn, reason))

    allTargetSURLs = res['Value']['Successful']

    if self.type == 'Transfer':
      res = self._constructTransferJob(
          context,
          pinTime,
          allTargetSURLs,
          failedLFNs,
          target_spacetoken)
    elif self.type == 'Staging':
      res = self._constructStagingJob(
          context,
          pinTime,
          allTargetSURLs,
          failedLFNs,
          target_spacetoken)
    elif self.type == 'Removal':
      res = self._constructRemovalJob(context, allTargetSURLs, failedLFNs, target_spacetoken)

    if not res['OK']:
      return res

    job, fileIDsInTheJob = res['Value']
    setFileIdsInTheJob = set(fileIDsInTheJob)

    try:
      self.ftsGUID = fts3.submit(context, job)
      log.info("Got GUID %s" % self.ftsGUID)

      # Only increase the amount of attempt
      # if we succeeded in submitting -> no ! Why did I do that ??
      for ftsFile in self.filesToSubmit:
        ftsFile.attempt += 1
        if ftsFile.fileID in setFileIdsInTheJob:
          ftsFile.status = 'Submitted'

      now = datetime.datetime.utcnow().replace(microsecond=0)
      self.submitTime = now
      self.lastUpdate = now
      self.lastMonitor = now

    except FTS3ClientException as e:
      log.exception("Error at submission", repr(e))
      return S_ERROR("Error at submission: %s" % e)

    return S_OK(fileIDsInTheJob)
Esempio n. 53
0
    def _monitorJob(self, ftsJob):
        """ * query the FTS servers
        * update the FTSFile status
        * update the FTSJob status

        :param ftsJob: FTS job

        :return: ftsJob, S_OK()/S_ERROR()
    """
        # General try catch to avoid that the tread dies
        try:
            threadID = current_process().name
            log = gLogger.getSubLogger("_monitorJob/%s" % ftsJob.jobID,
                                       child=True)

            res = self.getFTS3Context(ftsJob.username,
                                      ftsJob.userGroup,
                                      ftsJob.ftsServer,
                                      threadID=threadID)

            if not res['OK']:
                log.error("Error getting context", res)
                return ftsJob, res

            context = res['Value']

            res = ftsJob.monitor(context=context)

            if not res['OK']:
                log.error("Error monitoring job", res)

                # If the job was not found on the server, update the DB
                if cmpError(res, errno.ESRCH):
                    res = self.fts3db.cancelNonExistingJob(
                        ftsJob.operationID, ftsJob.ftsGUID)

                return ftsJob, res

            # { fileID : { Status, Error } }
            filesStatus = res['Value']

            # Specify the job ftsGUID to make sure we do not overwrite
            # status of files already taken by newer jobs
            res = self.fts3db.updateFileStatus(filesStatus,
                                               ftsGUID=ftsJob.ftsGUID)

            if not res['OK']:
                log.error("Error updating file fts status",
                          "%s, %s" % (ftsJob.ftsGUID, res))
                return ftsJob, res

            upDict = {
                ftsJob.jobID: {
                    'status': ftsJob.status,
                    'error': ftsJob.error,
                    'completeness': ftsJob.completeness,
                    'operationID': ftsJob.operationID,
                    'lastMonitor': True,
                }
            }
            res = self.fts3db.updateJobStatus(upDict)

            if ftsJob.status in ftsJob.FINAL_STATES:
                self.__sendAccounting(ftsJob)

            return ftsJob, res

        except Exception as e:
            return ftsJob, S_ERROR(0, "Exception %s" % repr(e))
Esempio n. 54
0
  def _treatOperation(self, operation):
    """ Treat one operation:
          * does the callback if the operation is finished
          * generate new jobs and submits them

          :param operation: the operation to treat
          :param threadId: the id of the tread, it just has to be unique (used for the context cache)
    """
    try:
      threadID = current_process().name
      log = gLogger.getSubLogger("treatOperation/%s" % operation.operationID, child=True)

      # If the operation is totally processed
      # we perform the callback
      if operation.isTotallyProcessed():
        log.debug("FTS3Operation %s is totally processed" % operation.operationID)
        res = operation.callback()

        if not res['OK']:
          log.error("Error performing the callback", res)
          log.info("Putting back the operation")
          dbRes = self.fts3db.persistOperation(operation)

          if not dbRes['OK']:
            log.error("Could not persist operation", dbRes)

          return operation, res

      else:
        log.debug("FTS3Operation %s is not totally processed yet" % operation.operationID)

        res = operation.prepareNewJobs(
            maxFilesPerJob=self.maxFilesPerJob, maxAttemptsPerFile=self.maxAttemptsPerFile)

        if not res['OK']:
          log.error("Cannot prepare new Jobs", "FTS3Operation %s : %s" %
                    (operation.operationID, res))
          return operation, res

        newJobs = res['Value']

        log.debug("FTS3Operation %s: %s new jobs to be submitted" %
                  (operation.operationID, len(newJobs)))

        for ftsJob in newJobs:
          res = self._serverPolicy.chooseFTS3Server()
          if not res['OK']:
            log.error(res)
            continue

          ftsServer = res['Value']
          log.debug("Use %s server" % ftsServer)

          ftsJob.ftsServer = ftsServer

          res = self.getFTS3Context(
              ftsJob.username, ftsJob.userGroup, ftsServer, threadID=threadID)

          if not res['OK']:
            log.error("Could not get context", res)
            continue

          context = res['Value']
          res = ftsJob.submit(context=context, protocols=self.thirdPartyProtocols)

          if not res['OK']:
            log.error("Could not submit FTS3Job", "FTS3Operation %s : %s" %
                      (operation.operationID, res))
            continue

          operation.ftsJobs.append(ftsJob)

          submittedFileIds = res['Value']
          log.info("FTS3Operation %s: Submitted job for %s transfers" %
                   (operation.operationID, len(submittedFileIds)))

        # new jobs are put in the DB at the same time
      res = self.fts3db.persistOperation(operation)

      if not res['OK']:
        log.error("Could not persist operation", res)

      return operation, res

    except Exception as e:
      log.exception('Exception in the thread', repr(e))
      return operation, S_ERROR("Exception %s" % repr(e))
Esempio n. 55
0
  def _treatOperation(self, operation):
    """ Treat one operation:
          * does the callback if the operation is finished
          * generate new jobs and submits them

          :param operation: the operation to treat
          :param threadId: the id of the tread, it just has to be unique (used for the context cache)
    """
    try:
      threadID = current_process().name
      log = gLogger.getSubLogger("treatOperation/%s" % operation.operationID, child=True)

      # If the operation is totally processed
      # we perform the callback
      if operation.isTotallyProcessed():
        log.debug("FTS3Operation %s is totally processed" % operation.operationID)
        res = operation.callback()

        if not res['OK']:
          log.error("Error performing the callback", res)
          log.info("Putting back the operation")
          dbRes = self.fts3db.persistOperation(operation)

          if not dbRes['OK']:
            log.error("Could not persist operation", dbRes)

          return operation, res

      else:
        log.debug("FTS3Operation %s is not totally processed yet" % operation.operationID)

        res = operation.prepareNewJobs(
            maxFilesPerJob=self.maxFilesPerJob, maxAttemptsPerFile=self.maxAttemptsPerFile)

        if not res['OK']:
          log.error("Cannot prepare new Jobs", "FTS3Operation %s : %s" %
                    (operation.operationID, res))
          return operation, res

        newJobs = res['Value']

        log.debug("FTS3Operation %s: %s new jobs to be submitted" %
                  (operation.operationID, len(newJobs)))

        for ftsJob in newJobs:
          res = self._serverPolicy.chooseFTS3Server()
          if not res['OK']:
            log.error(res)
            continue

          ftsServer = res['Value']
          log.debug("Use %s server" % ftsServer)

          ftsJob.ftsServer = ftsServer

          res = self.getFTS3Context(
              ftsJob.username, ftsJob.userGroup, ftsServer, threadID=threadID)

          if not res['OK']:
            log.error("Could not get context", res)
            continue

          context = res['Value']
          res = ftsJob.submit(context=context)

          if not res['OK']:
            log.error("Could not submit FTS3Job", "FTS3Operation %s : %s" %
                      (operation.operationID, res))
            continue

          operation.ftsJobs.append(ftsJob)

          submittedFileIds = res['Value']
          log.info("FTS3Operation %s: Submitted job for %s transfers" %
                   (operation.operationID, len(submittedFileIds)))

        # new jobs are put in the DB at the same time
      res = self.fts3db.persistOperation(operation)

      if not res['OK']:
        log.error("Could not persist operation", res)

      return operation, res

    except Exception as e:
      log.exception('Exception in the thread', repr(e))
      return operation, S_ERROR("Exception %s" % repr(e))
Esempio n. 56
0
import os
try:
  import hashlib
  md5 = hashlib
except:
  import md5
import types
import threading
import cStringIO
import tarfile
import tempfile
from DIRAC.Core.Utilities.ReturnValues import S_OK, S_ERROR
from DIRAC.FrameworkSystem.Client.Logger import gLogger

gLogger = gLogger.getSubLogger( "FileTransmissionHelper" )

class FileHelper:

  __validDirections = ( "toClient", "fromClient", 'receive', 'send' )
  __directionsMapping = { 'toClient' : 'send', 'fromClient' : 'receive' }

  def __init__( self, oTransport = None, checkSum = True ):
    self.oTransport = oTransport
    self.__checkMD5 = checkSum
    self.__oMD5 = md5.md5()
    self.bFinishedTransmission = False
    self.bReceivedEOF = False
    self.direction = False
    self.packetSize = 1048576
    self.__fileBytes = 0