def execute(self): """ execution in one agent's cycle :param self: self reference """ self.enableFlag = self.am_getOption('EnableFlag', self.enableFlag) if self.enableFlag != 'True': self.log.info( 'TransformationCleaningAgent is disabled by configuration option EnableFlag' ) return S_OK('Disabled via CS flag') # Obtain the transformations in Cleaning status and remove any mention of the jobs/files res = self.transClient.getTransformations({ 'Status': 'Cleaning', 'Type': self.transformationTypes }) if res['OK']: for transDict in res['Value']: if self.shifterProxy: self._executeClean(transDict) else: self.log.info( "Cleaning transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" % transDict) executeWithUserProxy(self._executeClean)( transDict, proxyUserDN=transDict['AuthorDN'], proxyUserGroup=transDict['AuthorGroup']) else: self.log.error("Failed to get transformations", res['Message']) # Obtain the transformations in RemovingFiles status and removes the output files res = self.transClient.getTransformations({ 'Status': 'RemovingFiles', 'Type': self.transformationTypes }) if res['OK']: for transDict in res['Value']: if self.shifterProxy: self._executeRemoval(transDict) else: self.log.info( "Removing files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" % transDict) executeWithUserProxy(self._executeRemoval)( transDict, proxyUserDN=transDict['AuthorDN'], proxyUserGroup=transDict['AuthorGroup']) else: self.log.error("Could not get the transformations", res['Message']) # Obtain the transformations in Completed status and archive if inactive for X days olderThanTime = datetime.utcnow() - timedelta(days=self.archiveAfter) res = self.transClient.getTransformations( { 'Status': 'Completed', 'Type': self.transformationTypes }, older=olderThanTime, timeStamp='LastUpdate') if res['OK']: for transDict in res['Value']: if self.shifterProxy: self._executeArchive(transDict) else: self.log.info( "Archiving files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" % transDict) executeWithUserProxy(self._executeArchive)( transDict, proxyUserDN=transDict['AuthorDN'], proxyUserGroup=transDict['AuthorGroup']) else: self.log.error("Could not get the transformations", res['Message']) return S_OK()
def execute(self): """ execution in one agent's cycle :param self: self reference """ self.enableFlag = self.am_getOption('EnableFlag', self.enableFlag) if self.enableFlag != 'True': self.log.info('TransformationCleaningAgent is disabled by configuration option EnableFlag') return S_OK('Disabled via CS flag') # Obtain the transformations in Cleaning status and remove any mention of the jobs/files res = self.transClient.getTransformations({'Status': 'Cleaning', 'Type': self.transformationTypes}) if res['OK']: for transDict in res['Value']: if self.shifterProxy: self._executeClean(transDict) else: self.log.info("Cleaning transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" % transDict) executeWithUserProxy(self._executeClean)(transDict, proxyUserDN=transDict['AuthorDN'], proxyUserGroup=transDict['AuthorGroup']) else: self.log.error("Failed to get transformations", res['Message']) # Obtain the transformations in RemovingFiles status and removes the output files res = self.transClient.getTransformations({'Status': 'RemovingFiles', 'Type': self.transformationTypes}) if res['OK']: for transDict in res['Value']: if self.shifterProxy: self._executeRemoval(transDict) else: self.log.info("Removing files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" % transDict) executeWithUserProxy(self._executeRemoval)(transDict, proxyUserDN=transDict['AuthorDN'], proxyUserGroup=transDict['AuthorGroup']) else: self.log.error("Could not get the transformations", res['Message']) # Obtain the transformations in Completed status and archive if inactive for X days olderThanTime = datetime.utcnow() - timedelta(days=self.archiveAfter) res = self.transClient.getTransformations({'Status': 'Completed', 'Type': self.transformationTypes}, older=olderThanTime, timeStamp='LastUpdate') if res['OK']: for transDict in res['Value']: if self.shifterProxy: self._executeArchive(transDict) else: self.log.info("Archiving files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" % transDict) executeWithUserProxy(self._executeArchive)(transDict, proxyUserDN=transDict['AuthorDN'], proxyUserGroup=transDict['AuthorGroup']) else: self.log.error("Could not get the transformations", res['Message']) return S_OK()
def _checkFilesToStage( seToLFNs, onlineLFNs, offlineLFNs, absentLFNs, checkOnlyTapeSEs = None, jobLog = None, proxyUserName = None, proxyUserGroup = None, executionLock = None ): """ Checks on SEs whether the file is NEARLINE or ONLINE onlineLFNs, offlineLFNs and absentLFNs are modified to contain the files found online If checkOnlyTapeSEs is True, disk replicas are not checked As soon as a replica is found Online for a file, no further check is made """ # Only check on storage if it is a tape SE if jobLog is None: logger = gLogger else: logger = jobLog if checkOnlyTapeSEs is None: # Default value is True checkOnlyTapeSEs = True failed = {} for se, lfnsInSEList in seToLFNs.iteritems(): # If we have found already all files online at another SE, no need to check the others # but still we want to set the SE as Online if not a TapeSE vo = getVOForGroup( proxyUserGroup ) seObj = StorageElement( se, vo = vo ) status = seObj.getStatus() if not status['OK']: return status tapeSE = status['Value']['TapeSE'] diskSE = status['Value']['DiskSE'] # If requested to check only Tape SEs and the file is at a diskSE, we guess it is Online... filesToCheck = [] for lfn in lfnsInSEList: # If the file had already been found accessible at an SE, only check that this one is on disk diskIsOK = checkOnlyTapeSEs or ( lfn in onlineLFNs ) if diskIsOK and diskSE: onlineLFNs.setdefault( lfn, [] ).append( se ) elif not diskIsOK: filesToCheck.append( lfn ) if not filesToCheck: continue # Wrap the SE method with executeWithUserProxy fileMetadata = ( executeWithUserProxy( seObj.getFileMetadata ) ( filesToCheck, proxyUserName = proxyUserName, proxyUserGroup = proxyUserGroup, executionLock = executionLock ) ) if not fileMetadata['OK']: failed[se] = dict.fromkeys( filesToCheck, fileMetadata['Message'] ) else: if fileMetadata['Value']['Failed']: failed[se] = fileMetadata['Value']['Failed'] # is there at least one replica online? for lfn, mDict in fileMetadata['Value']['Successful'].iteritems(): # SRM returns Cached, but others may only return Accessible if mDict.get( 'Cached', mDict['Accessible'] ): onlineLFNs.setdefault( lfn, [] ).append( se ) elif tapeSE: # A file can be staged only at Tape SE offlineLFNs.setdefault( lfn, [] ).append( se ) else: # File not available at a diskSE... we shall retry later pass # Doesn't matter if some files are Offline if they are also online for lfn in set( offlineLFNs ) & set( onlineLFNs ): offlineLFNs.pop( lfn ) # If the file was found staged, ignore possible errors, but print out errors for se, failedLfns in failed.items(): logger.error( "Errors when getting files metadata", 'at %s' % se ) for lfn, reason in failedLfns.items(): if lfn in onlineLFNs: logger.warn( reason, 'for %s, but there is an online replica' % lfn ) failed[se].pop( lfn ) else: logger.error( reason, 'for %s, no online replicas' % lfn ) if cmpError( reason, errno.ENOENT ): absentLFNs.setdefault( lfn, [] ).append( se ) failed[se].pop( lfn ) if not failed[se]: failed.pop( se ) # Find the files that do not exist at SE if failed: logger.error( "Error getting metadata", "for %d files" % len( set( lfn for lfnList in failed.itervalues() for lfn in lfnList ) ) ) for lfn in absentLFNs: seList = absentLFNs[lfn] # FIXME: it is not possible to return here an S_ERROR(), return the message only absentLFNs[lfn] = S_ERROR( errno.ENOENT, "File not at %s" % ','.join( seList ) )['Message'] # Format the error for absent files return S_OK()
def finalize(self): """Only at finalization: will clean ancient transformations (remnants) 1) get the transformation IDs of jobs that are older than 1 year 2) find the status of those transformations. Those "Cleaned" and "Archived" will be cleaned and archived (again) Why doing this here? Basically, it's a race: 1) the production manager submits a transformation 2) the TransformationAgent, and a bit later the WorkflowTaskAgent, put such transformation in their internal queue, so eventually during their (long-ish) cycle they'll work on it. 3) 1 minute after creating the transformation, the production manager cleans it (by hand, for whatever reason). So, the status is changed to "Cleaning" 4) the TransformationCleaningAgent cleans what has been created (maybe, nothing), then sets the transformation status to "Cleaned" or "Archived" 5) a bit later the TransformationAgent, and later the WorkflowTaskAgent, kick in, creating tasks and jobs for a production that's effectively cleaned (but these 2 agents don't know yet). Of course, one could make one final check in TransformationAgent or WorkflowTaskAgent, but these 2 agents are already doing a lot of stuff, and are pretty heavy. So, we should just clean from time to time. What I added here is done only when the agent finalize, and it's quite light-ish operation anyway. """ res = self.jobMonitoringClient.getJobGroups( None, datetime.utcnow() - timedelta(days=365)) if not res["OK"]: self.log.error("Failed to get job groups", res["Message"]) return res transformationIDs = res["Value"] if transformationIDs: res = self.transClient.getTransformations( {"TransformationID": transformationIDs}) if not res["OK"]: self.log.error("Failed to get transformations", res["Message"]) return res transformations = res["Value"] toClean = [] toArchive = [] for transDict in transformations: if transDict["Status"] == "Cleaned": toClean.append(transDict) if transDict["Status"] == "Archived": toArchive.append(transDict) for transDict in toClean: if self.shifterProxy: self._executeClean(transDict) else: self.log.info( "Cleaning transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" % transDict) executeWithUserProxy(self._executeClean)( transDict, proxyUserDN=transDict["AuthorDN"], proxyUserGroup=transDict["AuthorGroup"]) for transDict in toArchive: if self.shifterProxy: self._executeArchive(transDict) else: self.log.info( "Archiving files for transformation %(TransformationID)s with %(AuthorDN)s, %(AuthorGroup)s" % transDict) executeWithUserProxy(self._executeArchive)( transDict, proxyUserDN=transDict["AuthorDN"], proxyUserGroup=transDict["AuthorGroup"]) # Remove JobIDs that were unknown to the TransformationSystem jobGroupsToCheck = [ str(transDict["TransformationID"]).zfill(8) for transDict in toClean + toArchive ] res = self.jobMonitoringClient.getJobs( {"JobGroup": jobGroupsToCheck}) if not res["OK"]: return res jobIDsToRemove = [int(jobID) for jobID in res["Value"]] res = self.__removeWMSTasks(jobIDsToRemove) if not res["OK"]: return res return S_OK()