Ejemplo n.º 1
0
    def execute(self):
        """ execution in one agent's cycle

    :param self: self reference
    """

        self.enableFlag = self.am_getOption('EnableFlag', self.enableFlag)
        if self.enableFlag != 'True':
            self.log.info(
                'TransformationCleaningAgent is disabled by configuration option EnableFlag'
            )
            return S_OK('Disabled via CS flag')

        # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
        res = self.transClient.getTransformations({
            'Status':
            'Cleaning',
            'Type':
            self.transformationTypes
        })
        if res['OK']:
            for transDict in res['Value']:
                if self.shifterProxy:
                    self._executeClean(transDict)
                else:
                    self.log.info(
                        "Cleaning transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s"
                        % transDict)
                    executeWithUserProxy(self._executeClean)(
                        transDict,
                        proxyUserDN=transDict['AuthorDN'],
                        proxyUserGroup=transDict['AuthorGroup'])
        else:
            self.log.error("Failed to get transformations", res['Message'])

        # Obtain the transformations in RemovingFiles status and removes the output files
        res = self.transClient.getTransformations({
            'Status':
            'RemovingFiles',
            'Type':
            self.transformationTypes
        })
        if res['OK']:
            for transDict in res['Value']:
                if self.shifterProxy:
                    self._executeRemoval(transDict)
                else:
                    self.log.info(
                        "Removing files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s"
                        % transDict)
                    executeWithUserProxy(self._executeRemoval)(
                        transDict,
                        proxyUserDN=transDict['AuthorDN'],
                        proxyUserGroup=transDict['AuthorGroup'])
        else:
            self.log.error("Could not get the transformations", res['Message'])

        # Obtain the transformations in Completed status and archive if inactive for X days
        olderThanTime = datetime.utcnow() - timedelta(days=self.archiveAfter)
        res = self.transClient.getTransformations(
            {
                'Status': 'Completed',
                'Type': self.transformationTypes
            },
            older=olderThanTime,
            timeStamp='LastUpdate')
        if res['OK']:
            for transDict in res['Value']:
                if self.shifterProxy:
                    self._executeArchive(transDict)
                else:
                    self.log.info(
                        "Archiving files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s"
                        % transDict)
                    executeWithUserProxy(self._executeArchive)(
                        transDict,
                        proxyUserDN=transDict['AuthorDN'],
                        proxyUserGroup=transDict['AuthorGroup'])
        else:
            self.log.error("Could not get the transformations", res['Message'])
        return S_OK()
Ejemplo n.º 2
0
  def execute(self):
    """ execution in one agent's cycle

    :param self: self reference
    """

    self.enableFlag = self.am_getOption('EnableFlag', self.enableFlag)
    if self.enableFlag != 'True':
      self.log.info('TransformationCleaningAgent is disabled by configuration option EnableFlag')
      return S_OK('Disabled via CS flag')

    # Obtain the transformations in Cleaning status and remove any mention of the jobs/files
    res = self.transClient.getTransformations({'Status': 'Cleaning',
                                               'Type': self.transformationTypes})
    if res['OK']:
      for transDict in res['Value']:
        if self.shifterProxy:
          self._executeClean(transDict)
        else:
          self.log.info("Cleaning transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" %
                        transDict)
          executeWithUserProxy(self._executeClean)(transDict,
                                                   proxyUserDN=transDict['AuthorDN'],
                                                   proxyUserGroup=transDict['AuthorGroup'])
    else:
      self.log.error("Failed to get transformations", res['Message'])

    # Obtain the transformations in RemovingFiles status and removes the output files
    res = self.transClient.getTransformations({'Status': 'RemovingFiles',
                                               'Type': self.transformationTypes})
    if res['OK']:
      for transDict in res['Value']:
        if self.shifterProxy:
          self._executeRemoval(transDict)
        else:
          self.log.info("Removing files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" %
                        transDict)
          executeWithUserProxy(self._executeRemoval)(transDict,
                                                     proxyUserDN=transDict['AuthorDN'],
                                                     proxyUserGroup=transDict['AuthorGroup'])
    else:
      self.log.error("Could not get the transformations", res['Message'])

    # Obtain the transformations in Completed status and archive if inactive for X days
    olderThanTime = datetime.utcnow() - timedelta(days=self.archiveAfter)
    res = self.transClient.getTransformations({'Status': 'Completed',
                                               'Type': self.transformationTypes},
                                              older=olderThanTime,
                                              timeStamp='LastUpdate')
    if res['OK']:
      for transDict in res['Value']:
        if self.shifterProxy:
          self._executeArchive(transDict)
        else:
          self.log.info("Archiving files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" %
                        transDict)
          executeWithUserProxy(self._executeArchive)(transDict,
                                                     proxyUserDN=transDict['AuthorDN'],
                                                     proxyUserGroup=transDict['AuthorGroup'])
    else:
      self.log.error("Could not get the transformations", res['Message'])
    return S_OK()
Ejemplo n.º 3
0
def _checkFilesToStage( seToLFNs, onlineLFNs, offlineLFNs, absentLFNs,
                        checkOnlyTapeSEs = None, jobLog = None,
                        proxyUserName = None,
                        proxyUserGroup = None,
                        executionLock = None ):
  """
  Checks on SEs whether the file is NEARLINE or ONLINE
  onlineLFNs, offlineLFNs and absentLFNs are modified to contain the files found online
  If checkOnlyTapeSEs is True, disk replicas are not checked
  As soon as a replica is found Online for a file, no further check is made
  """
  # Only check on storage if it is a tape SE
  if jobLog is None:
    logger = gLogger
  else:
    logger = jobLog
  if checkOnlyTapeSEs is None:
    # Default value is True
    checkOnlyTapeSEs = True

  failed = {}
  for se, lfnsInSEList in seToLFNs.iteritems():
    # If we have found already all files online at another SE, no need to check the others
    # but still we want to set the SE as Online if not a TapeSE
    vo = getVOForGroup( proxyUserGroup )
    seObj = StorageElement( se, vo = vo )
    status = seObj.getStatus()
    if not status['OK']:
      return status
    tapeSE = status['Value']['TapeSE']
    diskSE = status['Value']['DiskSE']
    # If requested to check only Tape SEs and the file is at a diskSE, we guess it is Online...
    filesToCheck = []
    for lfn in lfnsInSEList:
      # If the file had already been found accessible at an SE, only check that this one is on disk
      diskIsOK = checkOnlyTapeSEs or ( lfn in onlineLFNs )
      if diskIsOK and diskSE:
        onlineLFNs.setdefault( lfn, [] ).append( se )
      elif not diskIsOK:
        filesToCheck.append( lfn )
    if not filesToCheck:
      continue

    # Wrap the SE method with executeWithUserProxy
    fileMetadata = ( executeWithUserProxy( seObj.getFileMetadata )
                    ( filesToCheck,
                      proxyUserName = proxyUserName,
                      proxyUserGroup = proxyUserGroup,
                      executionLock = executionLock ) )

    if not fileMetadata['OK']:
      failed[se] = dict.fromkeys( filesToCheck, fileMetadata['Message'] )
    else:
      if fileMetadata['Value']['Failed']:
        failed[se] = fileMetadata['Value']['Failed']
      # is there at least one replica online?
      for lfn, mDict in fileMetadata['Value']['Successful'].iteritems():
        # SRM returns Cached, but others may only return Accessible
        if mDict.get( 'Cached', mDict['Accessible'] ):
          onlineLFNs.setdefault( lfn, [] ).append( se )
        elif tapeSE:
          # A file can be staged only at Tape SE
          offlineLFNs.setdefault( lfn, [] ).append( se )
        else:
          # File not available at a diskSE... we shall retry later
          pass

  # Doesn't matter if some files are Offline if they are also online
  for lfn in set( offlineLFNs ) & set( onlineLFNs ):
    offlineLFNs.pop( lfn )

  # If the file was found staged, ignore possible errors, but print out errors
  for se, failedLfns in failed.items():
    logger.error( "Errors when getting files metadata", 'at %s' % se )
    for lfn, reason in failedLfns.items():
      if lfn in onlineLFNs:
        logger.warn( reason, 'for %s, but there is an online replica' % lfn )
        failed[se].pop( lfn )
      else:
        logger.error( reason, 'for %s, no online replicas' % lfn )
        if cmpError( reason, errno.ENOENT ):
          absentLFNs.setdefault( lfn, [] ).append( se )
          failed[se].pop( lfn )
    if not failed[se]:
      failed.pop( se )
  # Find the files that do not exist at SE
  if failed:
    logger.error( "Error getting metadata", "for %d files" % len( set( lfn for lfnList in failed.itervalues() for lfn in lfnList ) ) )

  for lfn in absentLFNs:
    seList = absentLFNs[lfn]
    # FIXME: it is not possible to return here an S_ERROR(), return the message only
    absentLFNs[lfn] = S_ERROR( errno.ENOENT, "File not at %s" % ','.join( seList ) )['Message']
  # Format the error for absent files
  return S_OK()
Ejemplo n.º 4
0
    def finalize(self):
        """Only at finalization: will clean ancient transformations (remnants)

            1) get the transformation IDs of jobs that are older than 1 year
            2) find the status of those transformations. Those "Cleaned" and "Archived" will be
               cleaned and archived (again)

        Why doing this here? Basically, it's a race:

        1) the production manager submits a transformation
        2) the TransformationAgent, and a bit later the WorkflowTaskAgent, put such transformation in their internal queue,
           so eventually during their (long-ish) cycle they'll work on it.
        3) 1 minute after creating the transformation, the production manager cleans it (by hand, for whatever reason).
           So, the status is changed to "Cleaning"
        4) the TransformationCleaningAgent cleans what has been created (maybe, nothing),
           then sets the transformation status to "Cleaned" or "Archived"
        5) a bit later the TransformationAgent, and later the WorkflowTaskAgent, kick in,
           creating tasks and jobs for a production that's effectively cleaned (but these 2 agents don't know yet).

        Of course, one could make one final check in TransformationAgent or WorkflowTaskAgent,
        but these 2 agents are already doing a lot of stuff, and are pretty heavy.
        So, we should just clean from time to time.
        What I added here is done only when the agent finalize, and it's quite light-ish operation anyway.
        """
        res = self.jobMonitoringClient.getJobGroups(
            None,
            datetime.utcnow() - timedelta(days=365))
        if not res["OK"]:
            self.log.error("Failed to get job groups", res["Message"])
            return res
        transformationIDs = res["Value"]
        if transformationIDs:
            res = self.transClient.getTransformations(
                {"TransformationID": transformationIDs})
            if not res["OK"]:
                self.log.error("Failed to get transformations", res["Message"])
                return res
            transformations = res["Value"]
            toClean = []
            toArchive = []
            for transDict in transformations:
                if transDict["Status"] == "Cleaned":
                    toClean.append(transDict)
                if transDict["Status"] == "Archived":
                    toArchive.append(transDict)

            for transDict in toClean:
                if self.shifterProxy:
                    self._executeClean(transDict)
                else:
                    self.log.info(
                        "Cleaning transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s"
                        % transDict)
                    executeWithUserProxy(self._executeClean)(
                        transDict,
                        proxyUserDN=transDict["AuthorDN"],
                        proxyUserGroup=transDict["AuthorGroup"])

            for transDict in toArchive:
                if self.shifterProxy:
                    self._executeArchive(transDict)
                else:
                    self.log.info(
                        "Archiving files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s"
                        % transDict)
                    executeWithUserProxy(self._executeArchive)(
                        transDict,
                        proxyUserDN=transDict["AuthorDN"],
                        proxyUserGroup=transDict["AuthorGroup"])

            # Remove JobIDs that were unknown to the TransformationSystem
            jobGroupsToCheck = [
                str(transDict["TransformationID"]).zfill(8)
                for transDict in toClean + toArchive
            ]
            res = self.jobMonitoringClient.getJobs(
                {"JobGroup": jobGroupsToCheck})
            if not res["OK"]:
                return res
            jobIDsToRemove = [int(jobID) for jobID in res["Value"]]
            res = self.__removeWMSTasks(jobIDsToRemove)
            if not res["OK"]:
                return res

        return S_OK()