class RSSCache( object ): ''' Cache with purgeThread integrated ''' def __init__( self, lifeTime, updateFunc = None, cacheHistoryLifeTime = None ): ''' Constructor ''' self.__lifeTime = lifeTime # lifetime of the history on hours self.__cacheHistoryLifeTime = ( 1 and cacheHistoryLifeTime ) or 24 self.__updateFunc = updateFunc # RSSCache self.__rssCache = DictCache() self.__rssCacheStatus = [] # ( updateTime, message ) self.__rssCacheLock = threading.Lock() # Create purgeThread self.__refreshStop = False self.__refreshThread = threading.Thread( target = self.__refreshCacheThreadRun ) self.__refreshThread.setDaemon( True ) def startRefreshThread( self ): ''' Run refresh thread. ''' self.__refreshThread.start() def stopRefreshThread( self ): ''' Stop refresh thread. ''' self.__refreshStop = True def isCacheAlive( self ): ''' Returns status of the cache refreshing thread ''' return S_OK( self.__refreshThread.isAlive() ) def setLifeTime( self, lifeTime ): ''' Set cache life time ''' self.__lifeTime = lifeTime def setCacheHistoryLifeTime( self, cacheHistoryLifeTime ): ''' Set cache life time ''' self.__cacheHistoryLifeTime = cacheHistoryLifeTime def getCacheKeys( self ): ''' List all the keys stored in the cache. ''' self.__rssCacheLock.acquire() keys = self.__rssCache.getKeys() self.__rssCacheLock.release() return S_OK( keys ) def acquireLock( self ): ''' Acquires RSSCache lock ''' self.__rssCacheLock.acquire() def releaseLock( self ): ''' Releases RSSCache lock ''' self.__rssCacheLock.release() def getCacheStatus( self ): ''' Return the latest cache status ''' self.__rssCacheLock.acquire() if self.__rssCacheStatus: res = dict( [ self.__rssCacheStatus[ 0 ] ] ) else: res = {} self.__rssCacheLock.release() return S_OK( res ) def getCacheHistory( self ): ''' Return the cache updates history ''' self.__rssCacheLock.acquire() res = dict( self.__rssCacheStatus ) self.__rssCacheLock.release() return S_OK( res ) def get( self, resourceKey ): ''' Gets the resource(s) status(es). Every resource can have multiple statuses, so in order to speed up things, we store them on the cache as follows:: { (<resourceName>,<resourceStatusType0>) : whatever0, (<resourceName>,<resourceStatusType1>) : whatever1, } ''' #cacheKey = '%s#%s' % ( resourceName, resourceStatusType ) self.__rssCacheLock.acquire() resourceStatus = self.__rssCache.get( resourceKey ) self.__rssCacheLock.release() if resourceStatus: return S_OK( { resourceKey : resourceStatus } ) return S_ERROR( 'Cannot get %s' % resourceKey ) def getBulk( self, resourceKeys ): ''' Gets values for resourceKeys in one ATOMIC operation. ''' result = {} self.__rssCacheLock.acquire() for resourceKey in resourceKeys: resourceRow = self.__rssCache.get( resourceKey ) if not resourceRow: return S_ERROR( 'Cannot get %s' % resourceKey ) result.update( { resourceKey : resourceRow } ) self.__rssCacheLock.release() return S_OK( result ) def resetCache( self ): ''' Reset cache. ''' self.__rssCacheLock.acquire() self.__rssCache.purgeAll() self.__rssCacheLock.release() return S_OK() def refreshCache( self ): ''' Clears the cache and gets its latest version, not Thread safe ! Acquire a lock before using it ! ( and release it afterwards ! ) ''' self.__rssCache.purgeAll() if self.__updateFunc is None: return S_ERROR( 'RSSCache has no updateFunction' ) newCache = self.__updateFunc() if not newCache[ 'OK' ]: return newCache itemsAdded = self.__updateCache( newCache[ 'Value' ] ) return itemsAdded def refreshCacheAndHistory( self ): ''' Method that refreshes the cache and updates the history. Not thread safe, you must acquire a lock before using it, and release it right after ! ''' refreshResult = self.refreshCache() now = datetime.datetime.utcnow() if self.__rssCacheStatus: # Check oldest record dateInserted, _message = self.__rssCacheStatus[ -1 ] if dateInserted < now - datetime.timedelta( hours = self.__cacheHistoryLifeTime ): self.__rssCacheStatus.pop() self.__rssCacheStatus.insert( 0, ( now, refreshResult ) ) ################################################################################ # Private methods def __updateCache( self, newCache ): ''' The new cache must be a dictionary, which should look like:: { ( <resourceName>,<resourceStatusType0>) : whatever0, ( <resourceName>,<resourceStatusType1>) : whatever1, } ''' itemsCounter = 0 for cacheKey, cacheValue in newCache.items(): self.__rssCache.add( cacheKey, self.__lifeTime, value = cacheValue ) itemsCounter += 1 return S_OK( itemsCounter ) def __refreshCacheThreadRun( self ): ''' Method that refreshes periodically the cache. ''' while not self.__refreshStop: self.__rssCacheLock.acquire() self.refreshCacheAndHistory() self.__rssCacheLock.release() time.sleep( self.__lifeTime ) self.__refreshStop = False
class RSSCache: """ Cache with purgeThread integrated """ def __init__(self, lifeTime, updateFunc=None, cacheHistoryLifeTime=None): """ Constructor """ self.__lifeTime = lifeTime # lifetime of the history on hours self.__cacheHistoryLifeTime = (1 and cacheHistoryLifeTime) or 24 self.__updateFunc = updateFunc # RSSCache self.__rssCache = DictCache() self.__rssCacheStatus = [] # ( updateTime, message ) self.__rssCacheLock = threading.Lock() # Create purgeThread self.__refreshStop = False self.__refreshThread = threading.Thread( target=self.__refreshCacheThreadRun) self.__refreshThread.setDaemon(True) def startRefreshThread(self): """ Run refresh thread. """ self.__refreshThread.start() def stopRefreshThread(self): """ Stop refresh thread. """ self.__refreshStop = True def isCacheAlive(self): """ Returns status of the cache refreshing thread """ return S_OK(self.__refreshThread.is_alive()) def setLifeTime(self, lifeTime): """ Set cache life time """ self.__lifeTime = lifeTime def setCacheHistoryLifeTime(self, cacheHistoryLifeTime): """ Set cache life time """ self.__cacheHistoryLifeTime = cacheHistoryLifeTime def getCacheKeys(self): """ List all the keys stored in the cache. """ self.__rssCacheLock.acquire() keys = self.__rssCache.getKeys() self.__rssCacheLock.release() return S_OK(keys) def acquireLock(self): """ Acquires RSSCache lock """ self.__rssCacheLock.acquire() def releaseLock(self): """ Releases RSSCache lock """ self.__rssCacheLock.release() def getCacheStatus(self): """ Return the latest cache status """ self.__rssCacheLock.acquire() if self.__rssCacheStatus: res = dict([self.__rssCacheStatus[0]]) else: res = {} self.__rssCacheLock.release() return S_OK(res) def getCacheHistory(self): """ Return the cache updates history """ self.__rssCacheLock.acquire() res = dict(self.__rssCacheStatus) self.__rssCacheLock.release() return S_OK(res) def get(self, resourceKey): """ Gets the resource(s) status(es). Every resource can have multiple statuses, so in order to speed up things, we store them on the cache as follows:: { (<resourceName>,<resourceStatusType0>) : whatever0, (<resourceName>,<resourceStatusType1>) : whatever1, } """ # cacheKey = '%s#%s' % ( resourceName, resourceStatusType ) self.__rssCacheLock.acquire() resourceStatus = self.__rssCache.get(resourceKey) self.__rssCacheLock.release() if resourceStatus: return S_OK({resourceKey: resourceStatus}) return S_ERROR("Cannot get %s" % resourceKey) def getBulk(self, resourceKeys): """ Gets values for resourceKeys in one ATOMIC operation. """ result = {} self.__rssCacheLock.acquire() for resourceKey in resourceKeys: resourceRow = self.__rssCache.get(resourceKey) if not resourceRow: return S_ERROR("Cannot get %s" % resourceKey) result.update({resourceKey: resourceRow}) self.__rssCacheLock.release() return S_OK(result) def resetCache(self): """ Reset cache. """ self.__rssCacheLock.acquire() self.__rssCache.purgeAll() self.__rssCacheLock.release() return S_OK() def refreshCache(self): """ Clears the cache and gets its latest version, not Thread safe ! Acquire a lock before using it ! ( and release it afterwards ! ) """ self.__rssCache.purgeAll() if self.__updateFunc is None: return S_ERROR("RSSCache has no updateFunction") newCache = self.__updateFunc() if not newCache["OK"]: return newCache itemsAdded = self.__updateCache(newCache["Value"]) return itemsAdded def refreshCacheAndHistory(self): """ Method that refreshes the cache and updates the history. Not thread safe, you must acquire a lock before using it, and release it right after ! """ refreshResult = self.refreshCache() now = datetime.datetime.utcnow() if self.__rssCacheStatus: # Check oldest record dateInserted, _message = self.__rssCacheStatus[-1] if dateInserted < now - datetime.timedelta( hours=self.__cacheHistoryLifeTime): self.__rssCacheStatus.pop() self.__rssCacheStatus.insert(0, (now, refreshResult)) ################################################################################ # Private methods def __updateCache(self, newCache): """ The new cache must be a dictionary, which should look like:: { ( <resourceName>,<resourceStatusType0>) : whatever0, ( <resourceName>,<resourceStatusType1>) : whatever1, } """ itemsCounter = 0 for cacheKey, cacheValue in newCache.items(): self.__rssCache.add(cacheKey, self.__lifeTime, value=cacheValue) itemsCounter += 1 return S_OK(itemsCounter) def __refreshCacheThreadRun(self): """ Method that refreshes periodically the cache. """ while not self.__refreshStop: self.__rssCacheLock.acquire() self.refreshCacheAndHistory() self.__rssCacheLock.release() time.sleep(self.__lifeTime) self.__refreshStop = False
class Cache(object): """ Cache basic class. WARNING: None of its methods is thread safe. Acquire / Release lock when using them ! """ def __init__(self, lifeTime, updateFunc): """ Constructor :Parameters: **lifeTime** - `int` Lifetime of the elements in the cache ( seconds ! ) **updateFunc** - `function` This function MUST return a S_OK | S_ERROR object. In the case of the first, its value must be a dictionary. """ # We set a 20% of the lifetime randomly, so that if we have thousands of jobs # starting at the same time, all the caches will not end at the same time. randomLifeTimeBias = 0.2 * random.random() self.log = gLogger.getSubLogger(self.__class__.__name__) self.__lifeTime = int(lifeTime * (1 + randomLifeTimeBias)) self.__updateFunc = updateFunc # The records returned from the cache must be valid at least 30 seconds. self.__validSeconds = 30 # Cache self.__cache = DictCache() self.__cacheLock = LockRing() self.__cacheLock.getLock(self.__class__.__name__) #............................................................................. # internal cache object getter def cacheKeys(self): """ Cache keys getter :returns: list with valid keys on the cache """ return self.__cache.getKeys(validSeconds=self.__validSeconds) #............................................................................. # acquire / release Locks def acquireLock(self): """ Acquires Cache lock """ self.__cacheLock.acquire(self.__class__.__name__) def releaseLock(self): """ Releases Cache lock """ self.__cacheLock.release(self.__class__.__name__) #............................................................................. # Cache getters def get(self, cacheKeys): """ Gets values for cacheKeys given, if all are found ( present on the cache and valid ), returns S_OK with the results. If any is not neither present not valid, returns S_ERROR. :Parameters: **cacheKeys** - `list` list of keys to be extracted from the cache :return: S_OK | S_ERROR """ result = {} for cacheKey in cacheKeys: cacheRow = self.__cache.get(cacheKey, validSeconds=self.__validSeconds) if not cacheRow: self.log.error(str(cacheKey)) return S_ERROR('Cannot get %s' % str(cacheKey)) result.update({cacheKey: cacheRow}) return S_OK(result) #............................................................................. # Cache refreshers def refreshCache(self): """ Purges the cache and gets fresh data from the update function. :return: S_OK | S_ERROR. If the first, its content is the new cache. """ self.log.verbose('refreshing...') self.__cache.purgeAll() newCache = self.__updateFunc() if not newCache['OK']: self.log.error(newCache['Message']) return newCache newCache = self.__updateCache(newCache['Value']) self.log.verbose('refreshed') return newCache #............................................................................. # Private methods def __updateCache(self, newCache): """ Given the new cache dictionary, updates the internal cache with it. It sets a duration to the entries of <self.__lifeTime> seconds. :Parameters: **newCache** - `dict` dictionary containing a new cache :return: dictionary. It is newCache argument. """ for cacheKey, cacheValue in newCache.items(): self.__cache.add(cacheKey, self.__lifeTime, value=cacheValue) # We are assuming nothing will fail while inserting in the cache. There is # no apparent reason to suspect from that piece of code. return S_OK(newCache)
class DIRACPilotDirector(PilotDirector): """ DIRAC PilotDirector class """ def __init__(self, submitPool): """ Define some defaults and call parent __init__ """ self.gridMiddleware = 'DIRAC' PilotDirector.__init__(self, submitPool) self.computingElementList = COMPUTING_ELEMENTS self.computingElementDict = {} self.addComputingElement(self.computingElementList) self.siteName = gConfig.getValue('/LocalSite/Site', '') if not self.siteName: self.log.error( 'Can not run a Director if Site Name is not defined') sys.exit() self.__failingCECache = DictCache() self.__ticketsCECache = DictCache() def configure(self, csSection, submitPool): """ Here goes common configuration for DIRAC PilotDirector """ PilotDirector.configure(self, csSection, submitPool) self.reloadConfiguration(csSection, submitPool) self.__failingCECache.purgeExpired() self.__ticketsCECache.purgeExpired() for ce in self.__failingCECache.getKeys(): if ce in self.computingElementDict.keys(): try: del self.computingElementDict[ce] except: pass if self.computingElementDict: self.log.info(' ComputingElements:', ', '.join(self.computingElementDict.keys())) else: return # FIXME: this is to start testing _ceName, computingElementDict = self.computingElementDict.items()[0] self.computingElement = computingElementDict['CE'] self.log.debug(self.computingElement.getCEStatus()) self.log.info(' SiteName:', self.siteName) def configureFromSection(self, mySection): """ reload from CS """ PilotDirector.configureFromSection(self, mySection) self.computingElementList = gConfig.getValue( mySection + '/ComputingElements', self.computingElementList) self.addComputingElement(self.computingElementList) self.siteName = gConfig.getValue(mySection + '/SiteName', self.siteName) def addComputingElement(self, ceList): """ Check if a CE object for the current CE is available, instantiate one if necessary """ for CE in ceList: if CE not in self.computingElementDict: ceFactory = ComputingElementFactory() ceInstance = ceFactory.getCE(ceName=CE) if not ceInstance['OK']: self.log.error('Can not create CE object:', ceInstance['Message']) return self.computingElementDict[CE] = ceInstance[ 'Value'].ceConfigDict # add the 'CE' instance at the end to avoid being overwritten self.computingElementDict[CE]['CE'] = ceInstance['Value'] def _submitPilots(self, workDir, taskQueueDict, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ, proxy, pilotsPerJob): """ This method does the actual pilot submission to the DIRAC CE The logic is as follows: - If there are no available CE it return error - If there is no queue available in the CE's, it returns error - It creates a temp directory - It prepare a PilotScript """ taskQueueID = taskQueueDict['TaskQueueID'] # ownerDN = taskQueueDict['OwnerDN'] submittedPilots = 0 # if self.computingElement not in self.computingElementDict: # # Since we can exclude CEs from the list, it may become empty # return S_ERROR( ERROR_CE ) pilotRequirements = [] pilotRequirements.append(('CPUTime', taskQueueDict['CPUTime'])) # do we need to care about anything else? pilotRequirementsString = str(pilotRequirements) # Check that there are available queues for the Jobs: if self.enableListMatch: availableQueues = [] # now = Time.dateTime() cachedAvailableQueues = self.listMatchCache.get( pilotRequirementsString) if cachedAvailableQueues is None: availableQueues = self._listQueues(pilotRequirements) if availableQueues != False: self.listMatchCache.add(pilotRequirementsString, self.listMatchDelay, availableQueues) self.log.verbose( 'Available Queues for TaskQueue ', "%s: %s" % (taskQueueID, str(availableQueues))) else: availableQueues = cachedAvailableQueues if not availableQueues: return S_ERROR(ERROR_CE + ' TQ: %d' % taskQueueID) baseDir = os.getcwd() workingDirectory = tempfile.mkdtemp(prefix='TQ_%s_' % taskQueueID, dir=workDir) self.log.verbose('Using working Directory:', workingDirectory) os.chdir(workingDirectory) # set the Site Name pilotOptions.append("-n '%s'" % self.siteName) # submit pilots for every CE available for CE in self.computingElementDict.keys(): ceName = CE computingElement = self.computingElementDict[CE]['CE'] # add possible requirements from Site and CE for req, val in getResourceDict(ceName).items(): pilotOptions.append("-o '/AgentJobRequirements/%s=%s'" % (req, val)) ceConfigDict = self.computingElementDict[CE] httpProxy = '' if 'HttpProxy' in ceConfigDict: httpProxy = ceConfigDict['HttpProxy'] if 'JobExecDir' in ceConfigDict: pilotExecDir = ceConfigDict['JobExecDir'] try: pilotScript = self._writePilotScript(workingDirectory, pilotOptions, proxy, httpProxy, pilotExecDir) except: self.log.exception(ERROR_SCRIPT) try: os.chdir(baseDir) shutil.rmtree(workingDirectory) except: pass return S_ERROR(ERROR_SCRIPT) self.log.info("Pilots to submit: ", pilotsToSubmit) while submittedPilots < pilotsToSubmit: # Find out how many pilots can be submitted ret = computingElement.available() if not ret['OK']: self.log.error( 'Can not determine if pilot should be submitted: ', ret['Message']) break maxPilotsToSubmit = ret['Value'] self.log.info("Submit Pilots: ", maxPilotsToSubmit) if not maxPilotsToSubmit: break # submit the pilots and then check again for _i in range( min(maxPilotsToSubmit, pilotsToSubmit - submittedPilots)): submission = computingElement.submitJob( pilotScript, '', '') if not submission['OK']: self.log.error('Pilot submission failed: ', submission['Message']) # cleanup try: os.chdir(baseDir) shutil.rmtree(workingDirectory) except: pass return S_ERROR('Pilot submission failed after ' + str(submittedPilots) + ' pilots submitted successful') submittedPilots += 1 # let the batch system some time to digest the submitted job time.sleep(1) #next CE try: os.chdir(baseDir) shutil.rmtree(workingDirectory) except: pass return S_OK(submittedPilots) def _listQueues(self, pilotRequirements): """ For each defined CE return the list of Queues with available, running and waiting slots, matching the requirements of the pilots. Currently only CPU time is considered """ result = self.computingElement.available(pilotRequirements) if not result['OK']: self.log.error('Can not determine available queues', result['Message']) return False return result['Value'] def _writePilotScript(self, workingDirectory, pilotOptions, proxy, httpProxy, pilotExecDir): """ Prepare the script to execute the pilot For the moment it will do like Grid Pilots, a full DIRAC installation It assumes that the pilot script will have access to the submit working directory """ try: compressedAndEncodedProxy = base64.encodestring( bz2.compress(proxy.dumpAllToString()['Value'])).replace( '\n', '') compressedAndEncodedPilot = base64.encodestring( bz2.compress(open(self.pilot, "rb").read(), 9)).replace('\n', '') compressedAndEncodedInstall = base64.encodestring( bz2.compress(open(self.install, "rb").read(), 9)).replace('\n', '') except: self.log.exception( 'Exception during file compression of proxy, dirac-pilot or dirac-install' ) return S_ERROR( 'Exception during file compression of proxy, dirac-pilot or dirac-install' ) localPilot = """#!/bin/bash /usr/bin/env python << EOF # import os, stat, tempfile, sys, shutil, base64, bz2 try: pilotExecDir = '%(pilotExecDir)s' if not pilotExecDir: pilotExecDir = None pilotWorkingDirectory = tempfile.mkdtemp( suffix = 'pilot', prefix = 'DIRAC_', dir = pilotExecDir ) os.chdir( pilotWorkingDirectory ) open( 'proxy', "w" ).write(bz2.decompress( base64.decodestring( "%(compressedAndEncodedProxy)s" ) ) ) open( '%(pilotScript)s', "w" ).write(bz2.decompress( base64.decodestring( "%(compressedAndEncodedPilot)s" ) ) ) open( '%(installScript)s', "w" ).write(bz2.decompress( base64.decodestring( "%(compressedAndEncodedInstall)s" ) ) ) os.chmod("proxy", stat.S_IRUSR | stat.S_IWUSR) os.chmod("%(pilotScript)s", stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR) os.chmod("%(installScript)s", stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR) if "LD_LIBRARY_PATH" not in os.environ: os.environ["LD_LIBRARY_PATH"]="" os.environ["X509_USER_PROXY"]=os.path.join(pilotWorkingDirectory, 'proxy') if "%(httpProxy)s": os.environ["HTTP_PROXY"]="%(httpProxy)s" os.environ["X509_CERT_DIR"]=os.path.join(pilotWorkingDirectory, 'etc/grid-security/certificates') # TODO: structure the output print '===========================================================' print 'Environment of execution host' for key in os.environ.keys(): print key + '=' + os.environ[key] print '===========================================================' except Exception, x: print >> sys.stderr, x sys.exit(-1) cmd = "python %(pilotScript)s %(pilotOptions)s" print 'Executing: ', cmd sys.stdout.flush() os.system( cmd ) shutil.rmtree( pilotWorkingDirectory ) EOF """ % { 'compressedAndEncodedProxy': compressedAndEncodedProxy, 'compressedAndEncodedPilot': compressedAndEncodedPilot, 'compressedAndEncodedInstall': compressedAndEncodedInstall, 'httpProxy': httpProxy, 'pilotScript': os.path.basename(self.pilot), 'installScript': os.path.basename(self.install), 'pilotOptions': ' '.join(pilotOptions), 'pilotExecDir': pilotExecDir } fd, name = tempfile.mkstemp(suffix='_pilotwrapper.py', prefix='DIRAC_', dir=workingDirectory) pilotWrapper = os.fdopen(fd, 'w') pilotWrapper.write(localPilot) pilotWrapper.close() return name def _getPilotProxyFromDIRACGroup(self, ownerDN, ownerGroup, requiredTimeLeft): """ Download a limited pilot proxy with VOMS extensions depending on the group """ #Assign VOMS attribute vomsAttr = CS.getVOMSAttributeForGroup(ownerGroup) if not vomsAttr: self.log.info( "Downloading a proxy without VOMS extensions for %s@%s" % (ownerDN, ownerGroup)) return gProxyManager.downloadProxy( ownerDN, ownerGroup, limited=True, requiredTimeLeft=requiredTimeLeft) else: self.log.info( "Downloading a proxy with '%s' VOMS extension for %s@%s" % (vomsAttr, ownerDN, ownerGroup)) return gProxyManager.downloadVOMSProxy( ownerDN, ownerGroup, limited=True, requiredTimeLeft=requiredTimeLeft, requiredVOMSAttribute=vomsAttr)
class DIRACPilotDirector(PilotDirector): """ DIRAC PilotDirector class """ def __init__( self, submitPool ): """ Define some defaults and call parent __init__ """ self.gridMiddleware = 'DIRAC' PilotDirector.__init__( self, submitPool ) self.computingElementList = COMPUTING_ELEMENTS self.computingElementDict = {} self.addComputingElement( self.computingElementList ) self.siteName = gConfig.getValue('/LocalSite/Site','') if not self.siteName: self.log.error( 'Can not run a Director if Site Name is not defined' ) sys.exit() self.__failingCECache = DictCache() self.__ticketsCECache = DictCache() def configure(self, csSection, submitPool ): """ Here goes common configuration for DIRAC PilotDirector """ PilotDirector.configure( self, csSection, submitPool ) self.reloadConfiguration( csSection, submitPool ) self.__failingCECache.purgeExpired() self.__ticketsCECache.purgeExpired() for ce in self.__failingCECache.getKeys(): if ce in self.computingElementDict.keys(): try: del self.computingElementDict[ce] except: pass if self.computingElementDict: self.log.info( ' ComputingElements:', ', '.join(self.computingElementDict.keys()) ) else: return # FIXME: this is to start testing _ceName, computingElementDict = self.computingElementDict.items()[0] self.computingElement = computingElementDict['CE'] self.log.debug( self.computingElement.getCEStatus() ) self.log.info( ' SiteName:', self.siteName ) def configureFromSection( self, mySection ): """ reload from CS """ PilotDirector.configureFromSection( self, mySection ) self.computingElementList = gConfig.getValue( mySection+'/ComputingElements' , self.computingElementList ) self.addComputingElement( self.computingElementList ) self.siteName = gConfig.getValue( mySection+'/SiteName' , self.siteName ) def addComputingElement(self, ceList): """ Check if a CE object for the current CE is available, instantiate one if necessary """ for CE in ceList: if CE not in self.computingElementDict: ceFactory = ComputingElementFactory( ) ceInstance = ceFactory.getCE( ceName = CE ) if not ceInstance['OK']: self.log.error('Can not create CE object:', ceInstance['Message']) return self.computingElementDict[CE] = ceInstance['Value'].ceConfigDict # add the 'CE' instance at the end to avoid being overwritten self.computingElementDict[CE]['CE'] = ceInstance['Value'] def _submitPilots( self, workDir, taskQueueDict, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ, proxy, pilotsPerJob ): """ This method does the actual pilot submission to the DIRAC CE The logic is as follows: - If there are no available CE it return error - If there is no queue available in the CE's, it returns error - It creates a temp directory - It prepare a PilotScript """ taskQueueID = taskQueueDict['TaskQueueID'] # ownerDN = taskQueueDict['OwnerDN'] submittedPilots = 0 # if self.computingElement not in self.computingElementDict: # # Since we can exclude CEs from the list, it may become empty # return S_ERROR( ERROR_CE ) pilotRequirements = [] pilotRequirements.append( ( 'CPUTime', taskQueueDict['CPUTime'] ) ) # do we need to care about anything else? pilotRequirementsString = str( pilotRequirements ) # Check that there are available queues for the Jobs: if self.enableListMatch: availableQueues = [] # now = Time.dateTime() cachedAvailableQueues = self.listMatchCache.get( pilotRequirementsString ) if cachedAvailableQueues is None: availableQueues = self._listQueues( pilotRequirements ) if availableQueues != False: self.listMatchCache.add( pilotRequirementsString, self.listMatchDelay, availableQueues ) self.log.verbose( 'Available Queues for TaskQueue ', "%s: %s" % ( taskQueueID, str(availableQueues) ) ) else: availableQueues = cachedAvailableQueues if not availableQueues: return S_ERROR( ERROR_CE + ' TQ: %d' % taskQueueID ) baseDir = os.getcwd() workingDirectory = tempfile.mkdtemp( prefix= 'TQ_%s_' % taskQueueID, dir = workDir ) self.log.verbose( 'Using working Directory:', workingDirectory ) os.chdir( workingDirectory ) # set the Site Name pilotOptions.append( "-n '%s'" % self.siteName) # submit pilots for every CE available for CE in self.computingElementDict.keys(): ceName = CE computingElement = self.computingElementDict[CE]['CE'] # add possible requirements from Site and CE for req, val in getResourceDict( ceName ).items(): pilotOptions.append( "-o '/AgentJobRequirements/%s=%s'" % ( req, val ) ) ceConfigDict = self.computingElementDict[CE] if 'ClientPlatform' in ceConfigDict: pilotOptions.append( "-p '%s'" % ceConfigDict['ClientPlatform']) if 'SharedArea' in ceConfigDict: pilotOptions.append( "-o '/LocalSite/SharedArea=%s'" % ceConfigDict['SharedArea'] ) # if 'CPUScalingFactor' in ceConfigDict: # pilotOptions.append( "-o '/LocalSite/CPUScalingFactor=%s'" % ceConfigDict['CPUScalingFactor'] ) # # if 'CPUNormalizationFactor' in ceConfigDict: # pilotOptions.append( "-o '/LocalSite/CPUNormalizationFactor=%s'" % ceConfigDict['CPUNormalizationFactor'] ) self.log.info( "pilotOptions: ", ' '.join(pilotOptions)) httpProxy = '' if 'HttpProxy' in ceConfigDict: httpProxy = ceConfigDict['HttpProxy'] if 'JobExecDir' in ceConfigDict: pilotExecDir = ceConfigDict['JobExecDir'] try: pilotScript = self._writePilotScript( workingDirectory, pilotOptions, proxy, httpProxy, pilotExecDir ) except: self.log.exception( ERROR_SCRIPT ) try: os.chdir( baseDir ) shutil.rmtree( workingDirectory ) except: pass return S_ERROR( ERROR_SCRIPT ) self.log.info("Pilots to submit: ", pilotsToSubmit) while submittedPilots < pilotsToSubmit: # Find out how many pilots can be submitted ret = computingElement.available( ) if not ret['OK']: self.log.error('Can not determine if pilot should be submitted: ', ret['Message']) break maxPilotsToSubmit = ret['Value'] self.log.info("Submit Pilots: ", maxPilotsToSubmit) if not maxPilotsToSubmit: break # submit the pilots and then check again for _i in range( min( maxPilotsToSubmit, pilotsToSubmit - submittedPilots ) ): submission = computingElement.submitJob(pilotScript, '', '') if not submission['OK']: self.log.error('Pilot submission failed: ', submission['Message']) # cleanup try: os.chdir( baseDir ) shutil.rmtree( workingDirectory ) except: pass return S_ERROR('Pilot submission failed after ' + str(submittedPilots) + ' pilots submitted successful') submittedPilots += 1 # let the batch system some time to digest the submitted job time.sleep(1) #next CE try: os.chdir( baseDir ) shutil.rmtree( workingDirectory ) except: pass return S_OK(submittedPilots) def _listQueues( self, pilotRequirements ): """ For each defined CE return the list of Queues with available, running and waiting slots, matching the requirements of the pilots. Currently only CPU time is considered """ result = self.computingElement.available( pilotRequirements ) if not result['OK']: self.log.error( 'Can not determine available queues', result['Message'] ) return False return result['Value'] def _writePilotScript( self, workingDirectory, pilotOptions, proxy, httpProxy, pilotExecDir ): """ Prepare the script to execute the pilot For the moment it will do like Grid Pilots, a full DIRAC installation It assumes that the pilot script will have access to the submit working directory """ try: compressedAndEncodedProxy = base64.encodestring( bz2.compress( proxy.dumpAllToString()['Value'] ) ).replace('\n','') compressedAndEncodedPilot = base64.encodestring( bz2.compress( open( self.pilot, "rb" ).read(), 9 ) ).replace('\n','') compressedAndEncodedInstall = base64.encodestring( bz2.compress( open( self.install, "rb" ).read(), 9 ) ).replace('\n','') except: self.log.exception('Exception during file compression of proxy, dirac-pilot or dirac-install') return S_ERROR('Exception during file compression of proxy, dirac-pilot or dirac-install') localPilot = """#!/bin/bash /usr/bin/env python << EOF # import os, stat, tempfile, sys, shutil, base64, bz2 try: pilotExecDir = '%(pilotExecDir)s' if not pilotExecDir: pilotExecDir = None pilotWorkingDirectory = tempfile.mkdtemp( suffix = 'pilot', prefix = 'DIRAC_', dir = pilotExecDir ) os.chdir( pilotWorkingDirectory ) open( 'proxy', "w" ).write(bz2.decompress( base64.decodestring( "%(compressedAndEncodedProxy)s" ) ) ) open( '%(pilotScript)s', "w" ).write(bz2.decompress( base64.decodestring( "%(compressedAndEncodedPilot)s" ) ) ) open( '%(installScript)s', "w" ).write(bz2.decompress( base64.decodestring( "%(compressedAndEncodedInstall)s" ) ) ) os.chmod("proxy", stat.S_IRUSR | stat.S_IWUSR) os.chmod("%(pilotScript)s", stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR) os.chmod("%(installScript)s", stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR) if "LD_LIBRARY_PATH" not in os.environ: os.environ["LD_LIBRARY_PATH"]="" os.environ["X509_USER_PROXY"]=os.path.join(pilotWorkingDirectory, 'proxy') if "%(httpProxy)s": os.environ["HTTP_PROXY"]="%(httpProxy)s" os.environ["X509_CERT_DIR"]=os.path.join(pilotWorkingDirectory, 'etc/grid-security/certificates') # TODO: structure the output print '===========================================================' print 'Environment of execution host' for key in os.environ.keys(): print key + '=' + os.environ[key] print '===========================================================' except Exception, x: print >> sys.stderr, x sys.exit(-1) cmd = "python %(pilotScript)s %(pilotOptions)s" print 'Executing: ', cmd sys.stdout.flush() os.system( cmd ) shutil.rmtree( pilotWorkingDirectory ) EOF """ % { 'compressedAndEncodedProxy': compressedAndEncodedProxy, 'compressedAndEncodedPilot': compressedAndEncodedPilot, 'compressedAndEncodedInstall': compressedAndEncodedInstall, 'httpProxy': httpProxy, 'pilotScript': os.path.basename(self.pilot), 'installScript': os.path.basename(self.install), 'pilotOptions': ' '.join( pilotOptions ), 'pilotExecDir': pilotExecDir } fd, name = tempfile.mkstemp( suffix = '_pilotwrapper.py', prefix = 'DIRAC_', dir=workingDirectory) pilotWrapper = os.fdopen(fd, 'w') pilotWrapper.write( localPilot ) pilotWrapper.close() return name def _getPilotProxyFromDIRACGroup( self, ownerDN, ownerGroup, requiredTimeLeft ): """ Download a limited pilot proxy with VOMS extensions depending on the group """ #Assign VOMS attribute vomsAttr = CS.getVOMSAttributeForGroup( ownerGroup ) if not vomsAttr: self.log.info( "Downloading a proxy without VOMS extensions for %s@%s" % ( ownerDN, ownerGroup ) ) return gProxyManager.downloadProxy( ownerDN, ownerGroup, limited = True, requiredTimeLeft = requiredTimeLeft ) else: self.log.info( "Downloading a proxy with '%s' VOMS extension for %s@%s" % ( vomsAttr, ownerDN, ownerGroup ) ) return gProxyManager.downloadVOMSProxy( ownerDN, ownerGroup, limited = True, requiredTimeLeft = requiredTimeLeft, requiredVOMSAttribute = vomsAttr )
class CredentialsClient: CONSUMER_GRACE_TIME = 3600 REQUEST_GRACE_TIME = 900 def __init__( self, RPCFunctor = None ): if not RPCFunctor: self.__RPCFunctor = RPCClient else: self.__RPCFunctor = RPCFunctor self.__tokens = DictCache() self.__requests = DictCache() self.__consumers = DictCache( deleteFunction = self.__cleanConsumerCache ) def __getRPC( self ): return self.__RPCFunctor( "WebAPI/Credentials" ) def __cleanReturn( self, result ): if 'rpcStub' in result: result.pop( 'rpcStub' ) return result ## # Consumer ## def generateConsumerPair( self, name, callback, icon, consumerKey = "" ): result = self.__getRPC().generateConsumerPair( name, callback, icon, consumerKey ) if not result[ 'OK' ]: return self.__cleanReturn( result ) self.__consumers.add( consumerKey, self.CONSUMER_GRACE_TIME, result[ 'Value' ] ) return self.__cleanReturn( result ) def getConsumerData( self, consumerKey ): cData = self.__consumers.get( consumerKey ) if cData: return S_OK( cData ) result = self.__getRPC().getConsumerData( consumerKey ) if not result[ 'OK' ]: return self.__cleanReturn( result ) self.__consumers.add( consumerKey, self.CONSUMER_GRACE_TIME, result[ 'Value' ] ) return self.__cleanReturn( result ) def deleteConsumer( self, consumerKey ): self.__consumers.delete( consumerKey ) result = self.__getRPC().deleteConsumer( consumerKey ) if result[ 'OK' ]: self.__cleanConsumerCache( { 'key' : consumerKey } ) return self.__cleanReturn( result ) def getAllConsumers( self ): result = self.__getRPC().getAllConsumers() if not result[ 'OK' ]: return self.__cleanReturn( result ) data = result[ 'Value' ] consIndex = { 'key': 0, 'name' : 0, 'callback' : 0, 'secret' : 0, 'icon' : 0 } for key in consIndex: consIndex[ key ] = data[ 'Parameters' ].find( key ) for record in data[ 'Records' ]: consData = {} for key in consIndex: consData[ key ] = record[ consIndex[ key ] ] self.__consumers.add( consData[ 'key' ], self.CONSUMER_GRACE_TIME, consData ) return self.__cleanReturn( result ) def __cleanConsumerCache( self, cData ): consumerKey = cData[ 'key' ] for dc in ( self.__tokens, self.__requests ): cKeys = dc.getKeys() for cKey in cKeys: if cKey[0] == consumerKey: dc.delete( cKey ) ## # Requests ## def generateRequest( self, consumerKey, callback = "" ): result = self.__getRPC().generateRequest( consumerKey, callback ) if not result[ 'OK' ]: return self.__cleanReturn( result ) requestData = result[ 'Value' ] self.__requests.add( requestData[ 'request' ], result[ 'lifeTime' ] - 5, requestData ) return self.__cleanReturn( result ) def getRequestData( self, request ): data = self.__requests.get( request ) if data: return S_OK( data ) result = self.__getRPC().getRequestData( request ) if not result[ 'OK' ]: return self.__cleanReturn( result ) self.__tokens.add( request, result[ 'lifeTime' ] - 5, result[ 'Value' ] ) return self.__cleanReturn( result ) def deleteRequest( self, request ): result = self.__getRPC().deleteRequest( request ) if not result[ 'OK' ]: return self.__cleanReturn( result ) cKeys = self.__requests.getKeys() for cKey in cKeys: if cKey[1] == request: self.__requests.delete( cKey ) return self.__cleanReturn( result ) ## # Verifiers ## def generateVerifier( self, consumerKey, request, userDN, userGroup, lifeTime = 3600 ): result = self.__getRPC().generateVerifier( consumerKey, request, userDN, userGroup, lifeTime ) return self.__cleanReturn( result ) def getVerifierData( self, verifier ): result = self.__getRPC().getVerifierData( verifier ) return self.__cleanReturn( result ) def deleteVerifier( self, verifier ): result = self.__getRPC().deleteVerifier( verifier ) return self.__cleanReturn( result ) def findVerifier( self, consumerKey, request ): result = self.__getRPC().findVerifier( consumerKey, request ) return self.__cleanReturn( result ) def setVerifierProperties( self, consumerKey, request, verifier, userDN, userGroup, lifeTime ): result = self.__getRPC().setVerifierProperties( consumerKey, request, verifier, userDN, userGroup, lifeTime ) return self.__cleanReturn( result ) ## # Tokens ## def generateToken( self, consumerKey, request, verifier ): result = self.__getRPC().generateToken( consumerKey, request, verifier ) if not result[ 'OK' ]: return self.__cleanReturn( result ) tokenData = result[ 'Value' ] cKey = ( consumerKey, tokenData[ 'token' ] ) self.__tokens.add( cKey, tokenData[ 'lifeTime' ] - 5, tokenData ) return S_OK( tokenData ) def getTokenData( self, consumerKey, token ): cKey = ( consumerKey, token ) tokenData = self.__tokens.get( cKey ) if tokenData: return S_OK( tokenData ) result = self.__getRPC().getTokenData( consumerKey, token ) if not result[ 'OK' ]: return self.__cleanReturn( result ) tokenData = result[ 'Value' ] self.__tokens.add( cKey, tokenData[ 'lifeTime' ] - 5, tokenData ) return self.__cleanReturn( result ) def revokeUserToken( self, userDN, userGroup, token ): result = self.__getRPC().revokeUserToken( userDN, userGroup, token ) if not result[ 'OK' ]: return self.__cleanReturn( result ) cKeys = self.__tokens.getKeys() for cKey in cKeys: if cKey[0] == userDN and cKey[1] == userGroup and cKey[3] == token: self.__tokens.delete( cKey ) return self.__cleanReturn( result ) def revokeToken( self, token ): result = self.__getRPC().revokeToken( token ) if not result[ 'OK' ]: return self.__cleanReturn( result ) cKeys = self.__tokens.getKeys() for cKey in cKeys: if cKey[3] == token: self.__tokens.delete( cKey ) return self.__cleanReturn( result ) def cleanExpired( self ): return self.__getRPC().cleanExpired() def getTokens( self, condDict = {} ): result = self.__getRPC().getTokens( condDict ) if not result[ 'OK' ]: return self.__cleanReturn( result ) params = result[ 'Value' ][ 'Parameters'] data = result[ 'Value' ][ 'Records' ] consumerKey = "unknown" token = unknown lifeTime = 0 for record in data: tokenData = {} for iPos in range( len( params ) ): if params[iPos] == "UserDN": tokenData[ 'userDN' ] = record[iPos] elif params[iPos] == "UserGroup": tokenData[ 'userGroup' ] = record[iPos] elif params[iPos] == "ConsumerKey": consumerKey = record[iPos] elif params[iPos] == "Token": token = record[iPos] elif params[iPos] == "Secret": tokenData[ 'secret' ] = record[iPos] elif params[iPos] == "LifeTime": tokenData[ 'lifeTime' ] = record[iPos] lifeTime = record[ iPos ] self.__tokens.add( ( consumerKey, token ), tokenData[ 'lifeTime' ], tokenData ) return self.__cleanReturn( result )
class GridPilotDirector(PilotDirector): """ Base Grid PilotDirector class Derived classes must declare: self.Middleware: It must correspond to the string before "PilotDirector". (For proper naming of the logger) self.ResourceBrokers: list of Brokers used by the Director. (For proper error reporting) """ def __init__(self, submitPool): """ Define some defaults and call parent __init__ """ self.gridEnv = GRIDENV self.cpuPowerRef = CPU_POWER_REF self.requirements = REQUIREMENTS self.rank = RANK self.fuzzyRank = FUZZY_RANK self.__failingWMSCache = DictCache() self.__ticketsWMSCache = DictCache() self.__listMatchWMSCache = DictCache() PilotDirector.__init__(self, submitPool) def configure(self, csSection, submitPool): """ Here goes common configuration for all Grid PilotDirectors """ PilotDirector.configure(self, csSection, submitPool) self.reloadConfiguration(csSection, submitPool) self.__failingWMSCache.purgeExpired() self.__ticketsWMSCache.purgeExpired() for rb in self.__failingWMSCache.getKeys(): if rb in self.resourceBrokers: try: self.resourceBrokers.remove(rb) except: pass self.resourceBrokers = List.randomize(self.resourceBrokers) if self.gridEnv: self.log.info(' GridEnv: ', self.gridEnv) if self.resourceBrokers: self.log.info(' ResourceBrokers:', ', '.join(self.resourceBrokers)) def configureFromSection(self, mySection): """ reload from CS """ PilotDirector.configureFromSection(self, mySection) self.gridEnv = gConfig.getValue(mySection + '/GridEnv', self.gridEnv) if not self.gridEnv: # No specific option found, try a general one setup = gConfig.getValue('/DIRAC/Setup', '') if setup: instance = gConfig.getValue( '/DIRAC/Setups/%s/WorkloadManagement' % setup, '') if instance: self.gridEnv = gConfig.getValue( '/Systems/WorkloadManagement/%s/GridEnv' % instance, '') self.resourceBrokers = gConfig.getValue(mySection + '/ResourceBrokers', self.resourceBrokers) self.cpuPowerRef = gConfig.getValue(mySection + '/CPUPowerRef', self.cpuPowerRef) self.requirements = gConfig.getValue(mySection + '/Requirements', self.requirements) self.rank = gConfig.getValue(mySection + '/Rank', self.rank) self.fuzzyRank = gConfig.getValue(mySection + '/FuzzyRank', self.fuzzyRank) def _submitPilots(self, workDir, taskQueueDict, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ, proxy, pilotsPerJob): """ This method does the actual pilot submission to the Grid RB The logic is as follows: - If there are no available RB it return error - If there is no VOMS extension in the proxy, return error - It creates a temp directory - Prepare a JDL it has some part common to gLite and LCG (the payload description) it has some part specific to each middleware """ taskQueueID = taskQueueDict['TaskQueueID'] # ownerDN = taskQueueDict['OwnerDN'] credDict = proxy.getCredentials()['Value'] ownerDN = credDict['identity'] ownerGroup = credDict['group'] if not self.resourceBrokers: # Since we can exclude RBs from the list, it may become empty return S_ERROR(ERROR_RB) # Need to get VOMS extension for the later interactions with WMS ret = gProxyManager.getVOMSAttributes(proxy) if not ret['OK']: self.log.error(ERROR_VOMS, ret['Message']) return S_ERROR(ERROR_VOMS) if not ret['Value']: return S_ERROR(ERROR_VOMS) vomsGroup = ret['Value'][0] workingDirectory = tempfile.mkdtemp(prefix='TQ_%s_' % taskQueueID, dir=workDir) self.log.verbose('Using working Directory:', workingDirectory) # Write JDL retDict = self._prepareJDL(taskQueueDict, workingDirectory, pilotOptions, pilotsPerJob, ceMask, submitPrivatePilot, privateTQ) jdl = retDict['JDL'] pilotRequirements = retDict['Requirements'] rb = retDict['RB'] if not jdl: try: shutil.rmtree(workingDirectory) except: pass return S_ERROR(ERROR_JDL) # Check that there are available queues for the Job: if self.enableListMatch: availableCEs = [] now = Time.dateTime() availableCEs = self.listMatchCache.get(pilotRequirements) if availableCEs == False: availableCEs = self._listMatch(proxy, jdl, taskQueueID, rb) if availableCEs != False: self.log.verbose('LastListMatch', now) self.log.verbose('AvailableCEs ', availableCEs) self.listMatchCache.add( pilotRequirements, self.listMatchDelay * 60, value=availableCEs) # it is given in minutes if not availableCEs: try: shutil.rmtree(workingDirectory) except: pass return S_ERROR(ERROR_CE + ' TQ: %d' % taskQueueID) # Now we are ready for the actual submission, so self.log.verbose('Submitting Pilots for TaskQueue', taskQueueID) submitRet = self._submitPilot(proxy, pilotsPerJob, jdl, taskQueueID, rb) try: shutil.rmtree(workingDirectory) except: pass if not submitRet: return S_ERROR('Pilot Submission Failed for TQ %d ' % taskQueueID) # pilotReference, resourceBroker = submitRet submittedPilots = 0 if pilotsPerJob != 1 and len(submitRet) != pilotsPerJob: # Parametric jobs are used for pilotReference, resourceBroker in submitRet: pilotReference = self._getChildrenReferences( proxy, pilotReference, taskQueueID) submittedPilots += len(pilotReference) pilotAgentsDB.addPilotTQReference(pilotReference, taskQueueID, ownerDN, ownerGroup, resourceBroker, self.gridMiddleware, pilotRequirements) else: for pilotReference, resourceBroker in submitRet: pilotReference = [pilotReference] submittedPilots += len(pilotReference) pilotAgentsDB.addPilotTQReference(pilotReference, taskQueueID, ownerDN, ownerGroup, resourceBroker, self.gridMiddleware, pilotRequirements) # add some sleep here time.sleep(0.1 * submittedPilots) if pilotsToSubmit > pilotsPerJob: # Additional submissions are necessary, need to get a new token and iterate. pilotsToSubmit -= pilotsPerJob result = gProxyManager.requestToken( ownerDN, ownerGroup, max(pilotsToSubmit, self.maxJobsInFillMode)) if not result['OK']: self.log.error(ERROR_TOKEN, result['Message']) result = S_ERROR(ERROR_TOKEN) result['Value'] = submittedPilots return result (token, numberOfUses) = result['Value'] for option in pilotOptions: if option.find('-o /Security/ProxyToken=') == 0: pilotOptions.remove(option) pilotOptions.append('-o /Security/ProxyToken=%s' % token) pilotsPerJob = max( 1, min(pilotsPerJob, int(numberOfUses / self.maxJobsInFillMode))) result = self._submitPilots(workDir, taskQueueDict, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ, proxy, pilotsPerJob) if not result['OK']: if 'Value' not in result: result['Value'] = 0 result['Value'] += submittedPilots return result submittedPilots += result['Value'] return S_OK(submittedPilots) def _prepareJDL(self, taskQueueDict, workingDirectory, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ): """ This method should be overridden in a subclass """ self.log.error( '_prepareJDL() method should be implemented in a subclass') sys.exit() def _JobJDL(self, taskQueueDict, pilotOptions, ceMask): """ The Job JDL is the same for LCG and GLite """ pilotJDL = 'Executable = "%s";\n' % os.path.basename(self.pilot) executable = self.pilot pilotJDL += 'Arguments = "%s";\n' % ' '.join(pilotOptions) pilotJDL += 'CPUTimeRef = %s;\n' % taskQueueDict['CPUTime'] pilotJDL += 'CPUPowerRef = %s;\n' % self.cpuPowerRef pilotJDL += """CPUWorkRef = real( CPUTimeRef * CPUPowerRef ); Lookup = "CPUScalingReferenceSI00=*"; cap = isList( other.GlueCECapability ) ? other.GlueCECapability : { "dummy" }; i0 = regexp( Lookup, cap[0] ) ? 0 : undefined; i1 = isString( cap[1] ) && regexp( Lookup, cap[1] ) ? 1 : i0; i2 = isString( cap[2] ) && regexp( Lookup, cap[2] ) ? 2 : i1; i3 = isString( cap[3] ) && regexp( Lookup, cap[3] ) ? 3 : i2; i4 = isString( cap[4] ) && regexp( Lookup, cap[4] ) ? 4 : i3; i5 = isString( cap[5] ) && regexp( Lookup, cap[5] ) ? 5 : i4; index = isString( cap[6] ) && regexp( Lookup, cap[6] ) ? 6 : i5; i = isUndefined( index ) ? 0 : index; QueuePowerRef = real( ! isUndefined( index ) ? int( substr( cap[i], size( Lookup ) - 1 ) ) : other.GlueHostBenchmarkSI00 ); QueueTimeRef = real( other.GlueCEPolicyMaxCPUTime * 60 ); QueueWorkRef = QueuePowerRef * QueueTimeRef; """ requirements = list(self.requirements) if 'GridCEs' in taskQueueDict and taskQueueDict['GridCEs']: # if there an explicit Grig CE requested by the TQ, remove the Ranking requirement for req in self.requirements: if req.strip().lower()[:6] == 'rank >': requirements.remove(req) requirements.append('QueueWorkRef > CPUWorkRef') siteRequirements = '\n || '.join( ['other.GlueCEInfoHostName == "%s"' % s for s in ceMask]) requirements.append("( %s\n )" % siteRequirements) pilotRequirements = '\n && '.join(requirements) pilotJDL += 'pilotRequirements = %s;\n' % pilotRequirements pilotJDL += 'Rank = %s;\n' % self.rank pilotJDL += 'FuzzyRank = %s;\n' % self.fuzzyRank pilotJDL += 'StdOutput = "%s";\n' % outputSandboxFiles[0] pilotJDL += 'StdError = "%s";\n' % outputSandboxFiles[1] pilotJDL += 'InputSandbox = { "%s" };\n' % '", "'.join( [self.install, executable]) pilotJDL += 'OutputSandbox = { %s };\n' % ', '.join( ['"%s"' % f for f in outputSandboxFiles]) self.log.verbose(pilotJDL) return (pilotJDL, pilotRequirements) def parseListMatchStdout(self, proxy, cmd, taskQueueID, rb): """ Parse List Match stdout to return list of matched CE's """ self.log.verbose('Executing List Match for TaskQueue', taskQueueID) start = time.time() ret = executeGridCommand(proxy, cmd, self.gridEnv) if not ret['OK']: self.log.error('Failed to execute List Match:', ret['Message']) self.__sendErrorMail(rb, 'List Match', cmd, ret, proxy) return False if ret['Value'][0] != 0: self.log.error('Error executing List Match:', str(ret['Value'][0]) + '\n'.join(ret['Value'][1:3])) self.__sendErrorMail(rb, 'List Match', cmd, ret, proxy) return False self.log.info('List Match Execution Time: %.2f for TaskQueue %d' % ((time.time() - start), taskQueueID)) stdout = ret['Value'][1] stderr = ret['Value'][2] availableCEs = [] # Parse std.out for line in List.fromChar(stdout, '\n'): if re.search('/jobmanager-', line) or re.search('/cream-', line): # TODO: the line has to be stripped from extra info availableCEs.append(line) if not availableCEs: self.log.info('List-Match failed to find CEs for TaskQueue', taskQueueID) self.log.info(stdout) self.log.info(stderr) else: self.log.debug('List-Match returns:', str(ret['Value'][0]) + '\n'.join(ret['Value'][1:3])) self.log.info( 'List-Match found %s CEs for TaskQueue' % len(availableCEs), taskQueueID) self.log.verbose(', '.join(availableCEs)) return availableCEs def parseJobSubmitStdout(self, proxy, cmd, taskQueueID, rb): """ Parse Job Submit stdout to return pilot reference """ start = time.time() self.log.verbose('Executing Job Submit for TaskQueue', taskQueueID) ret = executeGridCommand(proxy, cmd, self.gridEnv) if not ret['OK']: self.log.error('Failed to execute Job Submit:', ret['Message']) self.__sendErrorMail(rb, 'Job Submit', cmd, ret, proxy) return False if ret['Value'][0] != 0: self.log.error('Error executing Job Submit:', str(ret['Value'][0]) + '\n'.join(ret['Value'][1:3])) self.__sendErrorMail(rb, 'Job Submit', cmd, ret, proxy) return False self.log.info('Job Submit Execution Time: %.2f for TaskQueue %d' % ((time.time() - start), taskQueueID)) stdout = ret['Value'][1] stderr = ret['Value'][2] submittedPilot = None failed = 1 rb = '' for line in List.fromChar(stdout, '\n'): m = re.search("(https:\S+)", line) if (m): glite_id = m.group(1) submittedPilot = glite_id if not rb: m = re.search("https://(.+):.+", glite_id) rb = m.group(1) failed = 0 if failed: self.log.error('Job Submit returns no Reference:', str(ret['Value'][0]) + '\n'.join(ret['Value'][1:3])) return False self.log.info('Reference %s for TaskQueue %s' % (glite_id, taskQueueID)) return glite_id, rb def _writeJDL(self, filename, jdlList): try: f = open(filename, 'w') f.write('\n'.join(jdlList)) f.close() except Exception, x: self.log.exception() return '' return filename
class Cache( object ): """ Cache basic class. WARNING: None of its methods is thread safe. Acquire / Release lock when using them ! """ def __init__( self, lifeTime, updateFunc ): """ Constructor :Parameters: **lifeTime** - `int` Lifetime of the elements in the cache ( seconds ! ) **updateFunc** - `function` This function MUST return a S_OK | S_ERROR object. In the case of the first, its value must be a dictionary. """ # We set a 20% of the lifetime randomly, so that if we have thousands of jobs # starting at the same time, all the caches will not end at the same time. randomLifeTimeBias = 0.2 * random.random() self.log = gLogger.getSubLogger( self.__class__.__name__ ) self.__lifeTime = int( lifeTime * ( 1 + randomLifeTimeBias ) ) self.__updateFunc = updateFunc # The records returned from the cache must be valid at least 10 seconds. self.__validSeconds = 10 # Cache self.__cache = DictCache() self.__cacheLock = LockRing() self.__cacheLock.getLock( self.__class__.__name__ ) #............................................................................. # internal cache object getter def cacheKeys( self ): """ Cache keys getter :returns: list with valid keys on the cache """ return self.__cache.getKeys( validSeconds = self.__validSeconds ) #............................................................................. # acquire / release Locks def acquireLock( self ): """ Acquires Cache lock """ self.__cacheLock.acquire( self.__class__.__name__ ) def releaseLock( self ): """ Releases Cache lock """ self.__cacheLock.release( self.__class__.__name__) #............................................................................. # Cache getters def get( self, cacheKeys ): """ Gets values for cacheKeys given, if all are found ( present on the cache and valid ), returns S_OK with the results. If any is not neither present not valid, returns S_ERROR. :Parameters: **cacheKeys** - `list` list of keys to be extracted from the cache :return: S_OK | S_ERROR """ result = {} for cacheKey in cacheKeys: cacheRow = self.__cache.get( cacheKey, validSeconds = self.__validSeconds ) if not cacheRow: self.log.error( str( cacheKey ) ) return S_ERROR( 'Cannot get %s' % str( cacheKey ) ) result.update( { cacheKey : cacheRow } ) return S_OK( result ) #............................................................................. # Cache refreshers def refreshCache( self ): """ Purges the cache and gets fresh data from the update function. :return: S_OK | S_ERROR. If the first, its content is the new cache. """ self.log.verbose( 'refreshing...' ) self.__cache.purgeAll() newCache = self.__updateFunc() if not newCache[ 'OK' ]: self.log.error( newCache[ 'Message' ] ) return newCache newCache = self.__updateCache( newCache[ 'Value' ] ) self.log.verbose( 'refreshed' ) return newCache #............................................................................. # Private methods def __updateCache( self, newCache ): """ Given the new cache dictionary, updates the internal cache with it. It sets a duration to the entries of <self.__lifeTime> seconds. :Parameters: **newCache** - `dict` dictionary containing a new cache :return: dictionary. It is newCache argument. """ for cacheKey, cacheValue in newCache.items(): self.__cache.add( cacheKey, self.__lifeTime, value = cacheValue ) # We are assuming nothing will fail while inserting in the cache. There is # no apparent reason to suspect from that piece of code. return S_OK( newCache )
class GridPilotDirector( PilotDirector ): """ Base Grid PilotDirector class Derived classes must declare: self.Middleware: It must correspond to the string before "PilotDirector". (For proper naming of the logger) self.ResourceBrokers: list of Brokers used by the Director. (For proper error reporting) """ def __init__( self, submitPool ): """ Define some defaults and call parent __init__ """ self.gridEnv = GRIDENV self.cpuPowerRef = CPU_POWER_REF self.requirements = REQUIREMENTS self.rank = RANK self.fuzzyRank = FUZZY_RANK self.__failingWMSCache = DictCache() self.__ticketsWMSCache = DictCache() self.__listMatchWMSCache = DictCache() PilotDirector.__init__( self, submitPool ) def configure( self, csSection, submitPool ): """ Here goes common configuration for all Grid PilotDirectors """ PilotDirector.configure( self, csSection, submitPool ) self.reloadConfiguration( csSection, submitPool ) self.__failingWMSCache.purgeExpired() self.__ticketsWMSCache.purgeExpired() for rb in self.__failingWMSCache.getKeys(): if rb in self.resourceBrokers: try: self.resourceBrokers.remove( rb ) except: pass self.resourceBrokers = List.randomize( self.resourceBrokers ) if self.gridEnv: self.log.info( ' GridEnv: ', self.gridEnv ) if self.resourceBrokers: self.log.info( ' ResourceBrokers:', ', '.join( self.resourceBrokers ) ) def configureFromSection( self, mySection ): """ reload from CS """ PilotDirector.configureFromSection( self, mySection ) self.gridEnv = gConfig.getValue( mySection + '/GridEnv', self.gridEnv ) if not self.gridEnv: # No specific option found, try a general one setup = gConfig.getValue( '/DIRAC/Setup', '' ) if setup: instance = gConfig.getValue( '/DIRAC/Setups/%s/WorkloadManagement' % setup, '' ) if instance: self.gridEnv = gConfig.getValue( '/Systems/WorkloadManagement/%s/GridEnv' % instance, '' ) self.resourceBrokers = gConfig.getValue( mySection + '/ResourceBrokers' , self.resourceBrokers ) self.cpuPowerRef = gConfig.getValue( mySection + '/CPUPowerRef' , self.cpuPowerRef ) self.requirements = gConfig.getValue( mySection + '/Requirements' , self.requirements ) self.rank = gConfig.getValue( mySection + '/Rank' , self.rank ) self.fuzzyRank = gConfig.getValue( mySection + '/FuzzyRank' , self.fuzzyRank ) def _submitPilots( self, workDir, taskQueueDict, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ, proxy, pilotsPerJob ): """ This method does the actual pilot submission to the Grid RB The logic is as follows: - If there are no available RB it return error - If there is no VOMS extension in the proxy, return error - It creates a temp directory - Prepare a JDL it has some part common to gLite and LCG (the payload description) it has some part specific to each middleware """ taskQueueID = taskQueueDict['TaskQueueID'] # ownerDN = taskQueueDict['OwnerDN'] credDict = proxy.getCredentials()['Value'] ownerDN = credDict['identity'] ownerGroup = credDict[ 'group' ] if not self.resourceBrokers: # Since we can exclude RBs from the list, it may become empty return S_ERROR( ERROR_RB ) # Need to get VOMS extension for the later interactions with WMS ret = gProxyManager.getVOMSAttributes( proxy ) if not ret['OK']: self.log.error( ERROR_VOMS, ret['Message'] ) return S_ERROR( ERROR_VOMS ) if not ret['Value']: return S_ERROR( ERROR_VOMS ) vomsGroup = ret['Value'][0] workingDirectory = tempfile.mkdtemp( prefix = 'TQ_%s_' % taskQueueID, dir = workDir ) self.log.verbose( 'Using working Directory:', workingDirectory ) # Write JDL retDict = self._prepareJDL( taskQueueDict, workingDirectory, pilotOptions, pilotsPerJob, ceMask, submitPrivatePilot, privateTQ ) jdl = retDict['JDL'] pilotRequirements = retDict['Requirements'] rb = retDict['RB'] if not jdl: try: shutil.rmtree( workingDirectory ) except: pass return S_ERROR( ERROR_JDL ) # Check that there are available queues for the Job: if self.enableListMatch: availableCEs = [] now = Time.dateTime() availableCEs = self.listMatchCache.get( pilotRequirements ) if availableCEs == False: availableCEs = self._listMatch( proxy, jdl, taskQueueID, rb ) if availableCEs != False: self.log.verbose( 'LastListMatch', now ) self.log.verbose( 'AvailableCEs ', availableCEs ) self.listMatchCache.add( pilotRequirements, self.listMatchDelay * 60, value = availableCEs ) # it is given in minutes if not availableCEs: try: shutil.rmtree( workingDirectory ) except: pass return S_ERROR( ERROR_CE + ' TQ: %d' % taskQueueID ) # Now we are ready for the actual submission, so self.log.verbose( 'Submitting Pilots for TaskQueue', taskQueueID ) submitRet = self._submitPilot( proxy, pilotsPerJob, jdl, taskQueueID, rb ) try: shutil.rmtree( workingDirectory ) except: pass if not submitRet: return S_ERROR( 'Pilot Submission Failed for TQ %d ' % taskQueueID ) # pilotReference, resourceBroker = submitRet submittedPilots = 0 if pilotsPerJob != 1 and len( submitRet ) != pilotsPerJob: # Parametric jobs are used for pilotReference, resourceBroker in submitRet: pilotReference = self._getChildrenReferences( proxy, pilotReference, taskQueueID ) submittedPilots += len( pilotReference ) pilotAgentsDB.addPilotTQReference( pilotReference, taskQueueID, ownerDN, ownerGroup, resourceBroker, self.gridMiddleware, pilotRequirements ) else: for pilotReference, resourceBroker in submitRet: pilotReference = [pilotReference] submittedPilots += len( pilotReference ) pilotAgentsDB.addPilotTQReference( pilotReference, taskQueueID, ownerDN, ownerGroup, resourceBroker, self.gridMiddleware, pilotRequirements ) # add some sleep here time.sleep( 0.1 * submittedPilots ) if pilotsToSubmit > pilotsPerJob: # Additional submissions are necessary, need to get a new token and iterate. pilotsToSubmit -= pilotsPerJob result = gProxyManager.requestToken( ownerDN, ownerGroup, max( pilotsToSubmit, self.maxJobsInFillMode ) ) if not result[ 'OK' ]: self.log.error( ERROR_TOKEN, result['Message'] ) result = S_ERROR( ERROR_TOKEN ) result['Value'] = submittedPilots return result ( token, numberOfUses ) = result[ 'Value' ] for option in pilotOptions: if option.find( '-o /Security/ProxyToken=' ) == 0: pilotOptions.remove( option ) pilotOptions.append( '-o /Security/ProxyToken=%s' % token ) pilotsPerJob = max( 1, min( pilotsPerJob, int( numberOfUses / self.maxJobsInFillMode ) ) ) result = self._submitPilots( workDir, taskQueueDict, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ, proxy, pilotsPerJob ) if not result['OK']: if 'Value' not in result: result['Value'] = 0 result['Value'] += submittedPilots return result submittedPilots += result['Value'] return S_OK( submittedPilots ) def _prepareJDL( self, taskQueueDict, workingDirectory, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ ): """ This method should be overridden in a subclass """ self.log.error( '_prepareJDL() method should be implemented in a subclass' ) sys.exit() def _JobJDL( self, taskQueueDict, pilotOptions, ceMask ): """ The Job JDL is the same for LCG and GLite """ pilotJDL = 'Executable = "%s";\n' % os.path.basename( self.pilot ) executable = self.pilot pilotJDL += 'Arguments = "%s";\n' % ' '.join( pilotOptions ) pilotJDL += 'CPUTimeRef = %s;\n' % taskQueueDict['CPUTime'] pilotJDL += 'CPUPowerRef = %s;\n' % self.cpuPowerRef pilotJDL += """CPUWorkRef = real( CPUTimeRef * CPUPowerRef ); Lookup = "CPUScalingReferenceSI00=*"; cap = isList( other.GlueCECapability ) ? other.GlueCECapability : { "dummy" }; i0 = regexp( Lookup, cap[0] ) ? 0 : undefined; i1 = isString( cap[1] ) && regexp( Lookup, cap[1] ) ? 1 : i0; i2 = isString( cap[2] ) && regexp( Lookup, cap[2] ) ? 2 : i1; i3 = isString( cap[3] ) && regexp( Lookup, cap[3] ) ? 3 : i2; i4 = isString( cap[4] ) && regexp( Lookup, cap[4] ) ? 4 : i3; i5 = isString( cap[5] ) && regexp( Lookup, cap[5] ) ? 5 : i4; index = isString( cap[6] ) && regexp( Lookup, cap[6] ) ? 6 : i5; i = isUndefined( index ) ? 0 : index; QueuePowerRef = real( ! isUndefined( index ) ? int( substr( cap[i], size( Lookup ) - 1 ) ) : other.GlueHostBenchmarkSI00 ); QueueTimeRef = real( other.GlueCEPolicyMaxCPUTime * 60 ); QueueWorkRef = QueuePowerRef * QueueTimeRef; """ requirements = list( self.requirements ) if 'GridCEs' in taskQueueDict and taskQueueDict['GridCEs']: # if there an explicit Grig CE requested by the TQ, remove the Ranking requirement for req in self.requirements: if req.strip().lower()[:6] == 'rank >': requirements.remove( req ) requirements.append( 'QueueWorkRef > CPUWorkRef' ) siteRequirements = '\n || '.join( [ 'other.GlueCEInfoHostName == "%s"' % s for s in ceMask ] ) requirements.append( "( %s\n )" % siteRequirements ) pilotRequirements = '\n && '.join( requirements ) pilotJDL += 'pilotRequirements = %s;\n' % pilotRequirements pilotJDL += 'Rank = %s;\n' % self.rank pilotJDL += 'FuzzyRank = %s;\n' % self.fuzzyRank pilotJDL += 'StdOutput = "%s";\n' % outputSandboxFiles[0] pilotJDL += 'StdError = "%s";\n' % outputSandboxFiles[1] pilotJDL += 'InputSandbox = { "%s" };\n' % '", "'.join( [ self.install, executable ] ) pilotJDL += 'OutputSandbox = { %s };\n' % ', '.join( [ '"%s"' % f for f in outputSandboxFiles ] ) self.log.verbose( pilotJDL ) return ( pilotJDL, pilotRequirements ) def parseListMatchStdout( self, proxy, cmd, taskQueueID, rb ): """ Parse List Match stdout to return list of matched CE's """ self.log.verbose( 'Executing List Match for TaskQueue', taskQueueID ) start = time.time() ret = executeGridCommand( proxy, cmd, self.gridEnv ) if not ret['OK']: self.log.error( 'Failed to execute List Match:', ret['Message'] ) self.__sendErrorMail( rb, 'List Match', cmd, ret, proxy ) return False if ret['Value'][0] != 0: self.log.error( 'Error executing List Match:', str( ret['Value'][0] ) + '\n'.join( ret['Value'][1:3] ) ) self.__sendErrorMail( rb, 'List Match', cmd, ret, proxy ) return False self.log.info( 'List Match Execution Time: %.2f for TaskQueue %d' % ( ( time.time() - start ), taskQueueID ) ) stdout = ret['Value'][1] stderr = ret['Value'][2] availableCEs = [] # Parse std.out for line in List.fromChar( stdout, '\n' ): if re.search( '/jobmanager-', line ) or re.search( '/cream-', line ): # TODO: the line has to be stripped from extra info availableCEs.append( line ) if not availableCEs: self.log.info( 'List-Match failed to find CEs for TaskQueue', taskQueueID ) self.log.info( stdout ) self.log.info( stderr ) else: self.log.debug( 'List-Match returns:', str( ret['Value'][0] ) + '\n'.join( ret['Value'][1:3] ) ) self.log.info( 'List-Match found %s CEs for TaskQueue' % len( availableCEs ), taskQueueID ) self.log.verbose( ', '.join( availableCEs ) ) return availableCEs def parseJobSubmitStdout( self, proxy, cmd, taskQueueID, rb ): """ Parse Job Submit stdout to return pilot reference """ start = time.time() self.log.verbose( 'Executing Job Submit for TaskQueue', taskQueueID ) ret = executeGridCommand( proxy, cmd, self.gridEnv ) if not ret['OK']: self.log.error( 'Failed to execute Job Submit:', ret['Message'] ) self.__sendErrorMail( rb, 'Job Submit', cmd, ret, proxy ) return False if ret['Value'][0] != 0: self.log.error( 'Error executing Job Submit:', str( ret['Value'][0] ) + '\n'.join( ret['Value'][1:3] ) ) self.__sendErrorMail( rb, 'Job Submit', cmd, ret, proxy ) return False self.log.info( 'Job Submit Execution Time: %.2f for TaskQueue %d' % ( ( time.time() - start ), taskQueueID ) ) stdout = ret['Value'][1] stderr = ret['Value'][2] submittedPilot = None failed = 1 rb = '' for line in List.fromChar( stdout, '\n' ): m = re.search( "(https:\S+)", line ) if ( m ): glite_id = m.group( 1 ) submittedPilot = glite_id if not rb: m = re.search( "https://(.+):.+", glite_id ) rb = m.group( 1 ) failed = 0 if failed: self.log.error( 'Job Submit returns no Reference:', str( ret['Value'][0] ) + '\n'.join( ret['Value'][1:3] ) ) return False self.log.info( 'Reference %s for TaskQueue %s' % ( glite_id, taskQueueID ) ) return glite_id, rb def _writeJDL( self, filename, jdlList ): try: f = open( filename, 'w' ) f.write( '\n'.join( jdlList ) ) f.close() except Exception, x: self.log.exception() return '' return filename
class Cache: """ Cache basic class. WARNING: None of its methods is thread safe. Acquire / Release lock when using them ! """ def __init__(self, lifeTime, updateFunc): """ Constructor :Parameters: **lifeTime** - `int` Lifetime of the elements in the cache ( seconds ! ) **updateFunc** - `function` This function MUST return a S_OK | S_ERROR object. In the case of the first, its value must be a dictionary. """ # We set a 20% of the lifetime randomly, so that if we have thousands of jobs # starting at the same time, all the caches will not end at the same time. randomLifeTimeBias = 0.2 * random.random() self.log = gLogger.getSubLogger(self.__class__.__name__) self.__lifeTime = int(lifeTime * (1 + randomLifeTimeBias)) self.__updateFunc = updateFunc # The records returned from the cache must be valid at least 30 seconds. self.__validSeconds = 30 # Cache self.__cache = DictCache() self.__cacheLock = LockRing() self.__cacheLock.getLock(self.__class__.__name__) # internal cache object getter def cacheKeys(self): """ Cache keys getter :returns: list with keys in the cache valid for at least twice the validity period of the element """ # Here we need to have more than the validity period because of the logic of the matching: # * get all the keys with validity T # * for each key K, get the element K with validity T # This logic fails for elements just at the limit of the required time return self.__cache.getKeys(validSeconds=self.__validSeconds * 2) # acquire / release Locks def acquireLock(self): """ Acquires Cache lock """ self.__cacheLock.acquire(self.__class__.__name__) def releaseLock(self): """ Releases Cache lock """ self.__cacheLock.release(self.__class__.__name__) # Cache getters def get(self, cacheKeys): """ Gets values for cacheKeys given, if all are found ( present on the cache and valid ), returns S_OK with the results. If any is not neither present not valid, returns S_ERROR. :Parameters: **cacheKeys** - `list` list of keys to be extracted from the cache :return: S_OK | S_ERROR """ result = {} for cacheKey in cacheKeys: cacheRow = self.__cache.get(cacheKey, validSeconds=self.__validSeconds) if not cacheRow: return S_ERROR("Cannot get %s" % str(cacheKey)) result.update({cacheKey: cacheRow}) return S_OK(result) def check(self, cacheKeys, vO): """ Modified get() method. Attempts to find keys with a vO value appended or 'all' value appended. The cacheKeys passed in are 'flattened' cache keys (no vO) Gets values for cacheKeys given, if all are found ( present on the cache and valid ), returns S_OK with the results. If any is not neither present not valid, returns S_ERROR. :Parameters: **cacheKeys** - `list` list of keys to be extracted from the cache :return: S_OK | S_ERROR """ result = {} for cacheKey in cacheKeys: longCacheKey = cacheKey + ("all", ) cacheRow = self.__cache.get(longCacheKey, validSeconds=self.__validSeconds) if not cacheRow: longCacheKey = cacheKey + (vO, ) cacheRow = self.__cache.get(longCacheKey, validSeconds=self.__validSeconds) if not cacheRow: return S_ERROR( 'Cannot get extended %s (neither for VO = %s nor for "all" Vos)' % (str(cacheKey), vO)) result.update({longCacheKey: cacheRow}) return S_OK(result) # Cache refreshers def refreshCache(self): """ Purges the cache and gets fresh data from the update function. :return: S_OK | S_ERROR. If the first, its content is the new cache. """ self.log.verbose("refreshing...") self.__cache.purgeAll() newCache = self.__updateFunc() if not newCache["OK"]: self.log.error(newCache["Message"]) return newCache newCache = self.__updateCache(newCache["Value"]) self.log.verbose("refreshed") return newCache # Private methods def __updateCache(self, newCache): """ Given the new cache dictionary, updates the internal cache with it. It sets a duration to the entries of <self.__lifeTime> seconds. :Parameters: **newCache** - `dict` dictionary containing a new cache :return: dictionary. It is newCache argument. """ for cacheKey, cacheValue in newCache.items(): self.__cache.add(cacheKey, self.__lifeTime, value=cacheValue) # We are assuming nothing will fail while inserting in the cache. There is # no apparent reason to suspect from that piece of code. return S_OK(newCache)