class StorageElementCache(object): def __init__(self): self.seCache = DictCache() def __call__(self, name, plugins=None, vo=None, hideExceptions=False): self.seCache.purgeExpired(expiredInSeconds=60) tId = threading.current_thread().ident if not vo: result = getVOfromProxyGroup() if not result['OK']: return vo = result['Value'] # Because the gfal2 context caches the proxy location, # we also use the proxy location as a key. # In practice, there should almost always be one, except for the REA # If we see its memory consumtpion exploding, this might be a place to look proxyLoc = getProxyLocation() argTuple = (tId, name, plugins, vo, proxyLoc) seObj = self.seCache.get(argTuple) if not seObj: seObj = StorageElementItem(name, plugins, vo, hideExceptions=hideExceptions) # Add the StorageElement to the cache for 1/2 hour self.seCache.add(argTuple, 1800, seObj) return seObj
def __init__(self, lifeTime, updateFunc): """ Constructor :Parameters: **lifeTime** - `int` Lifetime of the elements in the cache ( seconds ! ) **updateFunc** - `function` This function MUST return a S_OK | S_ERROR object. In the case of the first, its value must be a dictionary. """ # We set a 20% of the lifetime randomly, so that if we have thousands of jobs # starting at the same time, all the caches will not end at the same time. randomLifeTimeBias = 0.2 * random.random() self.log = gLogger.getSubLogger(self.__class__.__name__) self.__lifeTime = int(lifeTime * (1 + randomLifeTimeBias)) self.__updateFunc = updateFunc # The records returned from the cache must be valid at least 10 seconds. self.__validSeconds = 10 # Cache self.__cache = DictCache() self.__cacheLock = LockRing() self.__cacheLock.getLock(self.__class__.__name__)
class StorageElementCache(object): def __init__(self): self.seCache = DictCache() def __call__(self, name, plugins=None, vo=None, hideExceptions=False): self.seCache.purgeExpired(expiredInSeconds=60) tId = threading.current_thread().ident if not vo: result = getVOfromProxyGroup() if not result['OK']: return vo = result['Value'] argTuple = (tId, name, plugins, vo) seObj = self.seCache.get(argTuple) if not seObj: seObj = StorageElementItem(name, plugins, vo, hideExceptions=hideExceptions) # Add the StorageElement to the cache for 1/2 hour self.seCache.add(argTuple, 1800, seObj) return seObj
def __init__(self, plotsLocation=False): self.plotsLocation = plotsLocation self.alive = True self.__graphCache = DictCache(deleteFunction=_deleteGraph) self.__graphLifeTime = 600 self.purgeThread = threading.Thread(target=self.purgeExpired) self.purgeThread.start()
def __init__( self, RPCFunctor = None ): if not RPCFunctor: self.__RPCFunctor = RPCClient else: self.__RPCFunctor = RPCFunctor self.__tokens = DictCache() self.__requests = DictCache() self.__consumers = DictCache( deleteFunction = self.__cleanConsumerCache )
def __init__(self, dirName="accountingPlots"): self.graphsLocation = os.path.join(rootPath, "data", dirName) self.cachedGraphs = {} self.alive = True self.purgeThread = threading.Thread(target=self.purgeExpired) self.purgeThread.setDaemon(1) self.purgeThread.start() self.__dataCache = DictCache() self.__graphCache = DictCache(deleteFunction=self._deleteGraph) self.__dataLifeTime = 600 self.__graphLifeTime = 3600
def __init__( self ): self.graphsLocation = os.path.join( gConfig.getValue( '/LocalSite/InstancePath', rootPath ), 'data', 'accountingPlots' ) self.cachedGraphs = {} self.alive = True self.purgeThread = threading.Thread( target = self.purgeExpired ) self.purgeThread.setDaemon( 1 ) self.purgeThread.start() self.__dataCache = DictCache() self.__graphCache = DictCache( deleteFunction = self._deleteGraph ) self.__dataLifeTime = 600 self.__graphLifeTime = 3600
class PlotCache(object): def __init__(self, plotsLocation=False): self.plotsLocation = plotsLocation self.alive = True self.__graphCache = DictCache(deleteFunction=_deleteGraph) self.__graphLifeTime = 600 self.purgeThread = threading.Thread(target=self.purgeExpired) self.purgeThread.start() def setPlotsLocation(self, plotsDir): self.plotsLocation = plotsDir for plot in os.listdir(self.plotsLocation): if plot.find(".png") > 0: plotLocation = "%s/%s" % (self.plotsLocation, plot) gLogger.verbose("Purging %s" % plotLocation) os.unlink(plotLocation) def purgeExpired(self): while self.alive: time.sleep(self.__graphLifeTime) self.__graphCache.purgeExpired() def getPlot(self, plotHash, plotData, plotMetadata, subplotMetadata): """ Get plot from the cache if exists, else generate it """ plotDict = self.__graphCache.get(plotHash) if plotDict is None: basePlotFileName = "%s/%s.png" % (self.plotsLocation, plotHash) if subplotMetadata: retVal = graph(plotData, basePlotFileName, plotMetadata, metadata=subplotMetadata) else: retVal = graph(plotData, basePlotFileName, plotMetadata) if not retVal['OK']: return retVal plotDict = retVal['Value'] if plotDict['plot']: plotDict['plot'] = os.path.basename(basePlotFileName) self.__graphCache.add(plotHash, self.__graphLifeTime, plotDict) return S_OK(plotDict) def getPlotData(self, plotFileName): filename = "%s/%s" % (self.plotsLocation, plotFileName) try: fd = file(filename, "rb") data = fd.read() fd.close() except Exception as v: return S_ERROR("Can't open file %s: %s" % (plotFileName, str(v))) return S_OK(data)
class PlotCache: def __init__( self, plotsLocation = False ): self.plotsLocation = plotsLocation self.alive = True self.__graphCache = DictCache( deleteFunction = _deleteGraph ) self.__graphLifeTime = 600 self.purgeThread = threading.Thread( target = self.purgeExpired ) self.purgeThread.setDaemon( 1 ) self.purgeThread.start() def setPlotsLocation( self, plotsDir ): self.plotsLocation = plotsDir for plot in os.listdir( self.plotsLocation ): if plot.find( ".png" ) > 0: plotLocation = "%s/%s" % ( self.plotsLocation, plot ) gLogger.verbose( "Purging %s" % plotLocation ) os.unlink( plotLocation ) def purgeExpired( self ): while self.alive: time.sleep( self.__graphLifeTime ) self.__graphCache.purgeExpired() def getPlot( self, plotHash, plotData, plotMetadata, subplotMetadata ): """ Get plot from the cache if exists, else generate it """ plotDict = self.__graphCache.get( plotHash ) if plotDict == False: basePlotFileName = "%s/%s.png" % ( self.plotsLocation, plotHash ) if subplotMetadata: retVal = graph( plotData, basePlotFileName, plotMetadata, metadata = subplotMetadata ) else: retVal = graph( plotData, basePlotFileName, plotMetadata ) if not retVal[ 'OK' ]: return retVal plotDict = retVal[ 'Value' ] if plotDict[ 'plot' ]: plotDict[ 'plot' ] = os.path.basename( basePlotFileName ) self.__graphCache.add( plotHash, self.__graphLifeTime, plotDict ) return S_OK( plotDict ) def getPlotData( self, plotFileName ): filename = "%s/%s" % ( self.plotsLocation, plotFileName ) try: fd = file( filename, "rb" ) data = fd.read() fd.close() except Exception, v: return S_ERROR( "Can't open file %s: %s" % ( plotFileName, str( v ) ) ) return S_OK( data )
def __init__(self): self.graphsLocation = os.path.join( gConfig.getValue('/LocalSite/InstancePath', rootPath), 'data', 'accountingPlots') self.cachedGraphs = {} self.alive = True self.purgeThread = threading.Thread(target=self.purgeExpired) self.purgeThread.setDaemon(1) self.purgeThread.start() self.__dataCache = DictCache() self.__graphCache = DictCache(deleteFunction=self._deleteGraph) self.__dataLifeTime = 600 self.__graphLifeTime = 3600
class StorageElementCache(object): def __init__(self): self.seCache = DictCache() def __call__(self, name, protocols=None, vo=None, hideExceptions=False): self.seCache.purgeExpired(expiredInSeconds=60) argTuple = (name, protocols, vo) seObj = self.seCache.get(argTuple) if not seObj: seObj = StorageElementItem(name, protocols, vo, hideExceptions=hideExceptions) # Add the StorageElement to the cache for 1/2 hour self.seCache.add(argTuple, 1800, seObj) return seObj
class StorageElementCache(object): def __init__(self): self.seCache = DictCache() def __call__(self, name, protocols=None, vo=None): self.seCache.purgeExpired(expiredInSeconds=60) argTuple = (name, protocols, vo) seObj = self.seCache.get(argTuple) if not seObj: seObj = StorageElementItem(name, protocols, vo) # Add the StorageElement to the cache for 1/2 hour self.seCache.add(argTuple, 1800, seObj) return seObj
def updateDelayCounters(self, siteName, jid): # Get the info from the CS siteSection = "%s/%s" % (self.__matchingDelaySection, siteName) result = self.__extractCSData(siteSection) if not result["OK"]: return result delayDict = result["Value"] # limitsDict is something like { 'JobType' : { 'Merge' : 20, 'MCGen' : 1000 } } if not delayDict: return S_OK() attNames = [] for attName in delayDict: if attName not in self.jobDB.jobAttributeNames: self.log.error("Attribute does not exist in the JobDB. Please fix it!", "(%s)" % attName) else: attNames.append(attName) result = self.jobDB.getJobAttributes(jid, attNames) if not result["OK"]: self.log.error("Error while retrieving attributes", "coming from %s: %s" % (siteSection, result["Message"])) return result atts = result["Value"] # Create the DictCache if not there if siteName not in self.delayMem: self.delayMem[siteName] = DictCache() # Update the counters delayCounter = self.delayMem[siteName] for attName in atts: attValue = atts[attName] if attValue in delayDict[attName]: delayTime = delayDict[attName][attValue] self.log.notice("Adding delay for %s/%s=%s of %s secs" % (siteName, attName, attValue, delayTime)) delayCounter.add((attName, attValue), delayTime) return S_OK()
def __init__( self, lifeTime, updateFunc ): """ Constructor :Parameters: **lifeTime** - `int` Lifetime of the elements in the cache ( seconds ! ) **updateFunc** - `function` This function MUST return a S_OK | S_ERROR object. In the case of the first, its value must be a dictionary. """ # We set a 20% of the lifetime randomly, so that if we have thousands of jobs # starting at the same time, all the caches will not end at the same time. randomLifeTimeBias = 0.2 * random.random() self.log = gLogger.getSubLogger( self.__class__.__name__ ) self.__lifeTime = int( lifeTime * ( 1 + randomLifeTimeBias ) ) self.__updateFunc = updateFunc # The records returned from the cache must be valid at least 10 seconds. self.__validSeconds = 10 # Cache self.__cache = DictCache() self.__cacheLock = LockRing() self.__cacheLock.getLock( self.__class__.__name__ )
def __init__( self, plotsLocation = False ): self.plotsLocation = plotsLocation self.alive = True self.__graphCache = DictCache( deleteFunction = _deleteGraph ) self.__graphLifeTime = 600 self.purgeThread = threading.Thread( target = self.purgeExpired ) self.purgeThread.start()
def __init__(self, submitPool): """ Define some defaults and call parent __init__ """ self.gridEnv = GRIDENV self.cpuPowerRef = CPU_POWER_REF self.requirements = REQUIREMENTS self.rank = RANK self.fuzzyRank = FUZZY_RANK self.__failingWMSCache = DictCache() self.__ticketsWMSCache = DictCache() self.__listMatchWMSCache = DictCache() PilotDirector.__init__(self, submitPool)
def __init__( self, submitPool ): """ Define some defaults and call parent __init__ """ self.gridEnv = GRIDENV self.cpuPowerRef = CPU_POWER_REF self.requirements = REQUIREMENTS self.rank = RANK self.fuzzyRank = FUZZY_RANK self.__failingWMSCache = DictCache() self.__ticketsWMSCache = DictCache() self.__listMatchWMSCache = DictCache() PilotDirector.__init__( self, submitPool )
def __init__(self): """ Initialize like a real service """ super(GatewayService, self).__init__( {'modName': GatewayService.GATEWAY_NAME, 'loadName': GatewayService.GATEWAY_NAME, 'standalone': True, 'moduleObj': sys.modules[DIRAC.Core.DISET.private.GatewayService.GatewayService.__module__], 'classObj': self.__class__}) self.__delegatedCredentials = DictCache() self.__transferBytesLimit = 1024 * 1024 * 100 # to be resolved self._url = None self._handler = None self._threadPool = None self._msgBroker = None self._msgForwarder = None
def initializeHandler(cls, serviceInfo): """Handler initialization""" cls.notDB = NotificationDB() cls.mailCache = DictCache() gThreadScheduler.addPeriodicTask(3600, cls.notDB.purgeExpiredNotifications) gThreadScheduler.addPeriodicTask(3600, cls.mailCache.purgeExpired()) return S_OK()
def getFTS3Context(self, username, group, ftsServer, threadID): """ Returns an fts3 context for a given user, group and fts server The context pool is per thread, and there is one context per tuple (user, group, server). We dump the proxy of a user to a file (shared by all the threads), and use it to make the context. The proxy needs a lifetime of at least 2h, is cached for 1.5h, and the lifetime of the context is 45mn :param username: name of the user :param group: group of the user :param ftsServer: address of the server :returns: S_OK with the context object """ log = gLogger.getSubLogger("getFTS3Context", child=True) contextes = self._globalContextCache.setdefault(threadID, DictCache()) idTuple = (username, group, ftsServer) log.debug("Getting context for %s" % (idTuple, )) if not contextes.exists(idTuple, 2700): res = getDNForUsername(username) if not res['OK']: return res # We take the first DN returned userDN = res['Value'][0] log.debug("UserDN %s" % userDN) # We dump the proxy to a file. # It has to have a lifetime of at least 2 hours # and we cache it for 1.5 hours res = gProxyManager.downloadVOMSProxyToFile(userDN, group, requiredTimeLeft=7200, cacheTime=5400) if not res['OK']: return res proxyFile = res['Value'] log.debug("Proxy file %s" % proxyFile) # We generate the context res = FTS3Job.generateContext(ftsServer, proxyFile) if not res['OK']: return res context = res['Value'] # we add it to the cache for this thread for 1h contextes.add(idTuple, 3600, context) return S_OK(contextes.get(idTuple))
def __init__( self, submitPool ): """ Define some defaults and call parent __init__ """ self.gridMiddleware = 'DIRAC' PilotDirector.__init__( self, submitPool ) self.computingElementList = COMPUTING_ELEMENTS self.computingElementDict = {} self.addComputingElement( self.computingElementList ) self.siteName = gConfig.getValue('/LocalSite/Site','') if not self.siteName: self.log.error( 'Can not run a Director if Site Name is not defined' ) sys.exit() self.__failingCECache = DictCache() self.__ticketsCECache = DictCache()
def __init__( self, lifeTime, updateFunc = None, cacheHistoryLifeTime = None ): ''' Constructor ''' self.__lifeTime = lifeTime # lifetime of the history on hours self.__cacheHistoryLifeTime = ( 1 and cacheHistoryLifeTime ) or 24 self.__updateFunc = updateFunc # RSSCache self.__rssCache = DictCache() self.__rssCacheStatus = [] # ( updateTime, message ) self.__rssCacheLock = threading.Lock() # Create purgeThread self.__refreshStop = False self.__refreshThread = threading.Thread( target = self.__refreshCacheThreadRun ) self.__refreshThread.setDaemon( True )
class StorageElementCache(object): def __init__(self): self.seCache = DictCache() def __call__(self, name, protocols=None, vo=None, hideExceptions=False): self.seCache.purgeExpired(expiredInSeconds=60) tId = threading.current_thread().ident argTuple = (tId, name, protocols, vo) seObj = self.seCache.get(argTuple) if not seObj: seObj = StorageElementItem(name, protocols, vo, hideExceptions=hideExceptions) # Add the StorageElement to the cache for 1/2 hour self.seCache.add(argTuple, 1800, seObj) return seObj
class HttpStorageAccessHandler(BaseHTTPServer.BaseHTTPRequestHandler): register = DictCache() basePath = '' def do_GET(self): """Serve a GET request.""" # Strip off leading slash key = self.path[1:] if not self.register.exists(key): self.send_error(401, "Invalid key provided, access denied") return None cache_path = self.register.get(key) fileList = os.listdir(cache_path) if len(fileList) == 1: path = os.path.join(cache_path, fileList[0]) else: # multiple files, make archive unique = str(random.getrandbits(24)) fileString = ' '.join(fileList) os.system('tar -cf %s/dirac_data_%s.tar --remove-files -C %s %s' % (cache_path, unique, cache_path, fileString)) path = os.path.join(cache_path, 'dirac_data_%s.tar' % unique) f = self.send_head(path) if f: shutil.copyfileobj(f, self.wfile) f.close() self.register.delete(key) def send_head(self, path): """ Prepare headers for the file download """ #path = self.translate_path(self.path) f = None try: # Always read in binary mode. Opening files in text mode may cause # newline translations, making the actual size of the content # transmitted *less* than the content-length! f = open(path, 'rb') except IOError: self.send_error(404, "File not found") return None self.send_response(200) self.send_header("Content-type", 'application/octet-stream') fs = os.fstat(f.fileno()) self.send_header("Content-Length", str(fs[6])) self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) fname = os.path.basename(path) self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) self.send_header("Content-Disposition", "filename=%s" % fname) self.end_headers() return f
def __init__( self, submitPool ): """ Define the logger and some defaults """ if submitPool == self.gridMiddleware: self.log = gLogger.getSubLogger( '%sPilotDirector' % self.gridMiddleware ) else: self.log = gLogger.getSubLogger( '%sPilotDirector/%s' % ( self.gridMiddleware, submitPool ) ) self.pilot = DIRAC_PILOT self.submitPoolOption = '-o /Resources/Computing/CEDefaults/SubmitPool=%s' % submitPool self.extraPilotOptions = [] self.installVersion = DIRAC_VERSION self.installProject = DIRAC_PROJECT self.installation = DIRAC_INSTALLATION self.pilotExtensionsList = [] self.virtualOrganization = VIRTUAL_ORGANIZATION self.install = DIRAC_INSTALL self.extraModules = DIRAC_MODULES self.maxJobsInFillMode = MAX_JOBS_IN_FILLMODE self.targetGrids = [ self.gridMiddleware ] self.enableListMatch = ENABLE_LISTMATCH self.listMatchDelay = LISTMATCH_DELAY self.listMatchCache = DictCache() self.privatePilotFraction = PRIVATE_PILOT_FRACTION self.errorClearTime = ERROR_CLEAR_TIME self.errorTicketTime = ERROR_TICKET_TIME self.errorMailAddress = DIRAC.errorMail self.alarmMailAddress = DIRAC.alarmMail self.mailFromAddress = FROM_MAIL self.siteClient = SiteStatus() if not 'log' in self.__dict__: self.log = gLogger.getSubLogger( 'PilotDirector' ) self.log.info( 'Initialized' )
def __init__(self, jobDB=None, opsHelper=None): """ Constructor """ self.__runningLimitSection = "JobScheduling/RunningLimit" self.__matchingDelaySection = "JobScheduling/MatchingDelay" self.csDictCache = DictCache() self.condCache = DictCache() self.delayMem = {} if jobDB: self.jobDB = jobDB else: self.jobDB = JobDB() self.log = gLogger.getSubLogger("Limiter") if opsHelper: self.__opsHelper = opsHelper else: self.__opsHelper = Operations()
def __init__(self, *args, **kwargs): ''' c'tor ''' AgentModule.__init__(self, *args, **kwargs) self.__baseDir = '/lhcb' self.__baseDirLabel = "_".join(List.fromChar(self.__baseDir, "/")) self.__ignoreDirsList = [] self.__keepDirLevels = 4 self.__startExecutionTime = long(time.time()) self.__dirExplorer = DirectoryExplorer(reverse=True) self.__processedDirs = 0 self.__directoryOwners = {} self.catalog = FileCatalog() self.__maxToPublish = self.am_getOption('MaxDirectories', 5000) if self.am_getOption('DirectDB', False): self.storageUsage = StorageUsageDB() else: # Set a timeout of 0.1 seconds per directory (factor 5 margin) self.storageUsage = RPCClient('DataManagement/StorageUsage', timeout=self.am_getOption( 'Timeout', int(self.__maxToPublish * 0.1))) self.activePeriod = self.am_getOption('ActivePeriod', self.activePeriod) self.dataLock = threading.Lock() self.replicaListLock = threading.Lock() self.proxyCache = DictCache(removeProxy) self.__noProxy = set() self.__catalogType = None self.__recalculateUsage = Operations().getValue( 'DataManagement/RecalculateDirSize', False) self.enableStartupSleep = self.am_getOption('EnableStartupSleep', self.enableStartupSleep) self.__publishDirQueue = {} self.__dirsToPublish = {} self.__replicaFilesUsed = set() self.__replicaListFilesDir = ""
class StorageElementCache( object ): def __init__( self ): self.seCache = DictCache() def __call__( self, name, plugins = None, vo = None, hideExceptions = False ): self.seCache.purgeExpired( expiredInSeconds = 60 ) tId = threading.current_thread().ident if not vo: result = getVOfromProxyGroup() if not result['OK']: return vo = result['Value'] argTuple = ( tId, name, plugins, vo ) seObj = self.seCache.get( argTuple ) if not seObj: seObj = StorageElementItem( name, plugins, vo, hideExceptions = hideExceptions ) # Add the StorageElement to the cache for 1/2 hour self.seCache.add( argTuple, 1800, seObj ) return seObj
def __init__( self ): """ Initialize like a real service """ super(GatewayService, self).__init__( {'modName':GatewayService.GATEWAY_NAME, 'loadName':GatewayService.GATEWAY_NAME, 'standalone': True, 'moduleObj': sys.modules[DIRAC.Core.DISET.private.GatewayService.GatewayService.__module__], 'classObj': self.__class__} ) self.__delegatedCredentials = DictCache() self.__transferBytesLimit = 1024 * 1024 * 100 # to be resolved self._url = None self._handler = None self._threadPool = None self._msgBroker = None self._msgForwarder = None
def initializeHandler(cls, *args): """Initialization :return: S_OK()/S_ERROR() """ # Let's try to connect to the database try: cls.__tokenDB = TokenDB(parentLogger=cls.log) except Exception as e: cls.log.exception(e) return S_ERROR(f"Could not connect to the database {repr(e)}") # Cache containing tokens from scope requested by the client cls.__tokensCache = DictCache() # The service plays an important OAuth 2.0 role, namely it is an Identity Provider client. # This allows you to manage tokens without the involvement of their owners. cls.idps = IdProviderFactory() return S_OK()
def __init__( self, submitPool ): """ Define the logger and some defaults """ if submitPool == self.gridMiddleware: self.log = gLogger.getSubLogger( '%sPilotDirector' % self.gridMiddleware ) else: self.log = gLogger.getSubLogger( '%sPilotDirector/%s' % ( self.gridMiddleware, submitPool ) ) self.pilot = DIRAC_PILOT self.submitPoolOption = '-o /Resources/Computing/CEDefaults/SubmitPool=%s' % submitPool self.extraPilotOptions = [] self.installVersion = DIRAC_VERSION self.installProject = DIRAC_PROJECT self.installation = DIRAC_INSTALLATION self.pilotExtensionsList = [] self.virtualOrganization = VIRTUAL_ORGANIZATION self.install = DIRAC_INSTALL self.extraModules = DIRAC_MODULES self.maxJobsInFillMode = MAX_JOBS_IN_FILLMODE self.targetGrids = [ self.gridMiddleware ] self.enableListMatch = ENABLE_LISTMATCH self.listMatchDelay = LISTMATCH_DELAY self.listMatchCache = DictCache() self.privatePilotFraction = PRIVATE_PILOT_FRACTION self.errorClearTime = ERROR_CLEAR_TIME self.errorTicketTime = ERROR_TICKET_TIME self.errorMailAddress = DIRAC.errorMail self.alarmMailAddress = DIRAC.alarmMail self.mailFromAddress = FROM_MAIL if not 'log' in self.__dict__: self.log = gLogger.getSubLogger( 'PilotDirector' ) self.log.info( 'Initialized' )
class RSSCache: """ Cache with purgeThread integrated """ def __init__(self, lifeTime, updateFunc=None, cacheHistoryLifeTime=None): """ Constructor """ self.__lifeTime = lifeTime # lifetime of the history on hours self.__cacheHistoryLifeTime = (1 and cacheHistoryLifeTime) or 24 self.__updateFunc = updateFunc # RSSCache self.__rssCache = DictCache() self.__rssCacheStatus = [] # ( updateTime, message ) self.__rssCacheLock = threading.Lock() # Create purgeThread self.__refreshStop = False self.__refreshThread = threading.Thread( target=self.__refreshCacheThreadRun) self.__refreshThread.setDaemon(True) def startRefreshThread(self): """ Run refresh thread. """ self.__refreshThread.start() def stopRefreshThread(self): """ Stop refresh thread. """ self.__refreshStop = True def isCacheAlive(self): """ Returns status of the cache refreshing thread """ return S_OK(self.__refreshThread.is_alive()) def setLifeTime(self, lifeTime): """ Set cache life time """ self.__lifeTime = lifeTime def setCacheHistoryLifeTime(self, cacheHistoryLifeTime): """ Set cache life time """ self.__cacheHistoryLifeTime = cacheHistoryLifeTime def getCacheKeys(self): """ List all the keys stored in the cache. """ self.__rssCacheLock.acquire() keys = self.__rssCache.getKeys() self.__rssCacheLock.release() return S_OK(keys) def acquireLock(self): """ Acquires RSSCache lock """ self.__rssCacheLock.acquire() def releaseLock(self): """ Releases RSSCache lock """ self.__rssCacheLock.release() def getCacheStatus(self): """ Return the latest cache status """ self.__rssCacheLock.acquire() if self.__rssCacheStatus: res = dict([self.__rssCacheStatus[0]]) else: res = {} self.__rssCacheLock.release() return S_OK(res) def getCacheHistory(self): """ Return the cache updates history """ self.__rssCacheLock.acquire() res = dict(self.__rssCacheStatus) self.__rssCacheLock.release() return S_OK(res) def get(self, resourceKey): """ Gets the resource(s) status(es). Every resource can have multiple statuses, so in order to speed up things, we store them on the cache as follows:: { (<resourceName>,<resourceStatusType0>) : whatever0, (<resourceName>,<resourceStatusType1>) : whatever1, } """ # cacheKey = '%s#%s' % ( resourceName, resourceStatusType ) self.__rssCacheLock.acquire() resourceStatus = self.__rssCache.get(resourceKey) self.__rssCacheLock.release() if resourceStatus: return S_OK({resourceKey: resourceStatus}) return S_ERROR("Cannot get %s" % resourceKey) def getBulk(self, resourceKeys): """ Gets values for resourceKeys in one ATOMIC operation. """ result = {} self.__rssCacheLock.acquire() for resourceKey in resourceKeys: resourceRow = self.__rssCache.get(resourceKey) if not resourceRow: return S_ERROR("Cannot get %s" % resourceKey) result.update({resourceKey: resourceRow}) self.__rssCacheLock.release() return S_OK(result) def resetCache(self): """ Reset cache. """ self.__rssCacheLock.acquire() self.__rssCache.purgeAll() self.__rssCacheLock.release() return S_OK() def refreshCache(self): """ Clears the cache and gets its latest version, not Thread safe ! Acquire a lock before using it ! ( and release it afterwards ! ) """ self.__rssCache.purgeAll() if self.__updateFunc is None: return S_ERROR("RSSCache has no updateFunction") newCache = self.__updateFunc() if not newCache["OK"]: return newCache itemsAdded = self.__updateCache(newCache["Value"]) return itemsAdded def refreshCacheAndHistory(self): """ Method that refreshes the cache and updates the history. Not thread safe, you must acquire a lock before using it, and release it right after ! """ refreshResult = self.refreshCache() now = datetime.datetime.utcnow() if self.__rssCacheStatus: # Check oldest record dateInserted, _message = self.__rssCacheStatus[-1] if dateInserted < now - datetime.timedelta( hours=self.__cacheHistoryLifeTime): self.__rssCacheStatus.pop() self.__rssCacheStatus.insert(0, (now, refreshResult)) ################################################################################ # Private methods def __updateCache(self, newCache): """ The new cache must be a dictionary, which should look like:: { ( <resourceName>,<resourceStatusType0>) : whatever0, ( <resourceName>,<resourceStatusType1>) : whatever1, } """ itemsCounter = 0 for cacheKey, cacheValue in newCache.items(): self.__rssCache.add(cacheKey, self.__lifeTime, value=cacheValue) itemsCounter += 1 return S_OK(itemsCounter) def __refreshCacheThreadRun(self): """ Method that refreshes periodically the cache. """ while not self.__refreshStop: self.__rssCacheLock.acquire() self.refreshCacheAndHistory() self.__rssCacheLock.release() time.sleep(self.__lifeTime) self.__refreshStop = False
class IdProviderFactory(object): def __init__(self): """Standard constructor""" self.log = gLogger.getSubLogger(self.__class__.__name__) self.cacheMetadata = DictCache() @gCacheMetadata def getMetadata(self, idP): return self.cacheMetadata.get(idP) or {} @gCacheMetadata def addMetadata(self, idP, data, time=24 * 3600): if data: self.cacheMetadata.add(idP, time, data) def getIdProviderForToken(self, token): """This method returns a IdProvider instance corresponding to the supplied issuer in a token. :param token: access token or dict with access_token key :return: S_OK(IdProvider)/S_ERROR() """ if isinstance(token, dict): token = token["access_token"] data = {} # Read token without verification to get issuer issuer = jwt.decode(token, leeway=300, options=dict(verify_signature=False, verify_aud=False))["iss"].strip("/") result = getSettingsNamesForIdPIssuer(issuer) if not result["OK"]: return result return self.getIdProvider(result["Value"]) def getIdProvider(self, name, **kwargs): """This method returns a IdProvider instance corresponding to the supplied name. :param str name: the name of the Identity Provider client :return: S_OK(IdProvider)/S_ERROR() """ if not name: return S_ERROR("Identity Provider client name must be not None.") # Get Authorization Server metadata try: asMetaDict = collectMetadata(kwargs.get("issuer"), ignoreErrors=True) except Exception as e: return S_ERROR(str(e)) self.log.debug("Search configuration for", name) clients = getDIRACClients() if name in clients: # If it is a DIRAC default pre-registred client pDict = asMetaDict pDict.update(clients[name]) else: # if it is external identity provider client result = getProviderInfo(name) if not result["OK"]: self.log.error("Failed to read configuration", "%s: %s" % (name, result["Message"])) return result pDict = result["Value"] # Set default redirect_uri pDict["redirect_uri"] = pDict.get("redirect_uri", asMetaDict["redirect_uri"]) pDict.update(kwargs) pDict["ProviderName"] = name self.log.verbose("Creating IdProvider of %s type with the name %s" % (pDict["ProviderType"], name)) subClassName = "%sIdProvider" % pDict["ProviderType"] objectLoader = ObjectLoader.ObjectLoader() result = objectLoader.loadObject( "Resources.IdProvider.%s" % subClassName, subClassName) if not result["OK"]: self.log.error("Failed to load object", "%s: %s" % (subClassName, result["Message"])) return result pClass = result["Value"] try: provider = pClass(**pDict) except Exception as x: msg = "IdProviderFactory could not instantiate %s object: %s" % ( subClassName, str(x)) self.log.exception() self.log.warn(msg) return S_ERROR(msg) return S_OK(provider)
class DIRACPilotDirector(PilotDirector): """ DIRAC PilotDirector class """ def __init__( self, submitPool ): """ Define some defaults and call parent __init__ """ self.gridMiddleware = 'DIRAC' PilotDirector.__init__( self, submitPool ) self.computingElementList = COMPUTING_ELEMENTS self.computingElementDict = {} self.addComputingElement( self.computingElementList ) self.siteName = gConfig.getValue('/LocalSite/Site','') if not self.siteName: self.log.error( 'Can not run a Director if Site Name is not defined' ) sys.exit() self.__failingCECache = DictCache() self.__ticketsCECache = DictCache() def configure(self, csSection, submitPool ): """ Here goes common configuration for DIRAC PilotDirector """ PilotDirector.configure( self, csSection, submitPool ) self.reloadConfiguration( csSection, submitPool ) self.__failingCECache.purgeExpired() self.__ticketsCECache.purgeExpired() for ce in self.__failingCECache.getKeys(): if ce in self.computingElementDict.keys(): try: del self.computingElementDict[ce] except: pass if self.computingElementDict: self.log.info( ' ComputingElements:', ', '.join(self.computingElementDict.keys()) ) else: return # FIXME: this is to start testing _ceName, computingElementDict = self.computingElementDict.items()[0] self.computingElement = computingElementDict['CE'] self.log.debug( self.computingElement.getCEStatus() ) self.log.info( ' SiteName:', self.siteName ) def configureFromSection( self, mySection ): """ reload from CS """ PilotDirector.configureFromSection( self, mySection ) self.computingElementList = gConfig.getValue( mySection+'/ComputingElements' , self.computingElementList ) self.addComputingElement( self.computingElementList ) self.siteName = gConfig.getValue( mySection+'/SiteName' , self.siteName ) def addComputingElement(self, ceList): """ Check if a CE object for the current CE is available, instantiate one if necessary """ for CE in ceList: if CE not in self.computingElementDict: ceFactory = ComputingElementFactory( ) ceInstance = ceFactory.getCE( ceName = CE ) if not ceInstance['OK']: self.log.error('Can not create CE object:', ceInstance['Message']) return self.computingElementDict[CE] = ceInstance['Value'].ceConfigDict # add the 'CE' instance at the end to avoid being overwritten self.computingElementDict[CE]['CE'] = ceInstance['Value'] def _submitPilots( self, workDir, taskQueueDict, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ, proxy, pilotsPerJob ): """ This method does the actual pilot submission to the DIRAC CE The logic is as follows: - If there are no available CE it return error - If there is no queue available in the CE's, it returns error - It creates a temp directory - It prepare a PilotScript """ taskQueueID = taskQueueDict['TaskQueueID'] # ownerDN = taskQueueDict['OwnerDN'] submittedPilots = 0 # if self.computingElement not in self.computingElementDict: # # Since we can exclude CEs from the list, it may become empty # return S_ERROR( ERROR_CE ) pilotRequirements = [] pilotRequirements.append( ( 'CPUTime', taskQueueDict['CPUTime'] ) ) # do we need to care about anything else? pilotRequirementsString = str( pilotRequirements ) # Check that there are available queues for the Jobs: if self.enableListMatch: availableQueues = [] # now = Time.dateTime() cachedAvailableQueues = self.listMatchCache.get( pilotRequirementsString ) if cachedAvailableQueues is None: availableQueues = self._listQueues( pilotRequirements ) if availableQueues != False: self.listMatchCache.add( pilotRequirementsString, self.listMatchDelay, availableQueues ) self.log.verbose( 'Available Queues for TaskQueue ', "%s: %s" % ( taskQueueID, str(availableQueues) ) ) else: availableQueues = cachedAvailableQueues if not availableQueues: return S_ERROR( ERROR_CE + ' TQ: %d' % taskQueueID ) baseDir = os.getcwd() workingDirectory = tempfile.mkdtemp( prefix= 'TQ_%s_' % taskQueueID, dir = workDir ) self.log.verbose( 'Using working Directory:', workingDirectory ) os.chdir( workingDirectory ) # set the Site Name pilotOptions.append( "-n '%s'" % self.siteName) # submit pilots for every CE available for CE in self.computingElementDict.keys(): ceName = CE computingElement = self.computingElementDict[CE]['CE'] # add possible requirements from Site and CE for req, val in getResourceDict( ceName ).items(): pilotOptions.append( "-o '/AgentJobRequirements/%s=%s'" % ( req, val ) ) ceConfigDict = self.computingElementDict[CE] if 'ClientPlatform' in ceConfigDict: pilotOptions.append( "-p '%s'" % ceConfigDict['ClientPlatform']) if 'SharedArea' in ceConfigDict: pilotOptions.append( "-o '/LocalSite/SharedArea=%s'" % ceConfigDict['SharedArea'] ) # if 'CPUScalingFactor' in ceConfigDict: # pilotOptions.append( "-o '/LocalSite/CPUScalingFactor=%s'" % ceConfigDict['CPUScalingFactor'] ) # # if 'CPUNormalizationFactor' in ceConfigDict: # pilotOptions.append( "-o '/LocalSite/CPUNormalizationFactor=%s'" % ceConfigDict['CPUNormalizationFactor'] ) self.log.info( "pilotOptions: ", ' '.join(pilotOptions)) httpProxy = '' if 'HttpProxy' in ceConfigDict: httpProxy = ceConfigDict['HttpProxy'] if 'JobExecDir' in ceConfigDict: pilotExecDir = ceConfigDict['JobExecDir'] try: pilotScript = self._writePilotScript( workingDirectory, pilotOptions, proxy, httpProxy, pilotExecDir ) except: self.log.exception( ERROR_SCRIPT ) try: os.chdir( baseDir ) shutil.rmtree( workingDirectory ) except: pass return S_ERROR( ERROR_SCRIPT ) self.log.info("Pilots to submit: ", pilotsToSubmit) while submittedPilots < pilotsToSubmit: # Find out how many pilots can be submitted ret = computingElement.available( ) if not ret['OK']: self.log.error('Can not determine if pilot should be submitted: ', ret['Message']) break maxPilotsToSubmit = ret['Value'] self.log.info("Submit Pilots: ", maxPilotsToSubmit) if not maxPilotsToSubmit: break # submit the pilots and then check again for _i in range( min( maxPilotsToSubmit, pilotsToSubmit - submittedPilots ) ): submission = computingElement.submitJob(pilotScript, '', '') if not submission['OK']: self.log.error('Pilot submission failed: ', submission['Message']) # cleanup try: os.chdir( baseDir ) shutil.rmtree( workingDirectory ) except: pass return S_ERROR('Pilot submission failed after ' + str(submittedPilots) + ' pilots submitted successful') submittedPilots += 1 # let the batch system some time to digest the submitted job time.sleep(1) #next CE try: os.chdir( baseDir ) shutil.rmtree( workingDirectory ) except: pass return S_OK(submittedPilots) def _listQueues( self, pilotRequirements ): """ For each defined CE return the list of Queues with available, running and waiting slots, matching the requirements of the pilots. Currently only CPU time is considered """ result = self.computingElement.available( pilotRequirements ) if not result['OK']: self.log.error( 'Can not determine available queues', result['Message'] ) return False return result['Value'] def _writePilotScript( self, workingDirectory, pilotOptions, proxy, httpProxy, pilotExecDir ): """ Prepare the script to execute the pilot For the moment it will do like Grid Pilots, a full DIRAC installation It assumes that the pilot script will have access to the submit working directory """ try: compressedAndEncodedProxy = base64.encodestring( bz2.compress( proxy.dumpAllToString()['Value'] ) ).replace('\n','') compressedAndEncodedPilot = base64.encodestring( bz2.compress( open( self.pilot, "rb" ).read(), 9 ) ).replace('\n','') compressedAndEncodedInstall = base64.encodestring( bz2.compress( open( self.install, "rb" ).read(), 9 ) ).replace('\n','') except: self.log.exception('Exception during file compression of proxy, dirac-pilot or dirac-install') return S_ERROR('Exception during file compression of proxy, dirac-pilot or dirac-install') localPilot = """#!/bin/bash /usr/bin/env python << EOF # import os, stat, tempfile, sys, shutil, base64, bz2 try: pilotExecDir = '%(pilotExecDir)s' if not pilotExecDir: pilotExecDir = None pilotWorkingDirectory = tempfile.mkdtemp( suffix = 'pilot', prefix = 'DIRAC_', dir = pilotExecDir ) os.chdir( pilotWorkingDirectory ) open( 'proxy', "w" ).write(bz2.decompress( base64.decodestring( "%(compressedAndEncodedProxy)s" ) ) ) open( '%(pilotScript)s', "w" ).write(bz2.decompress( base64.decodestring( "%(compressedAndEncodedPilot)s" ) ) ) open( '%(installScript)s', "w" ).write(bz2.decompress( base64.decodestring( "%(compressedAndEncodedInstall)s" ) ) ) os.chmod("proxy", stat.S_IRUSR | stat.S_IWUSR) os.chmod("%(pilotScript)s", stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR) os.chmod("%(installScript)s", stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR) if "LD_LIBRARY_PATH" not in os.environ: os.environ["LD_LIBRARY_PATH"]="" os.environ["X509_USER_PROXY"]=os.path.join(pilotWorkingDirectory, 'proxy') if "%(httpProxy)s": os.environ["HTTP_PROXY"]="%(httpProxy)s" os.environ["X509_CERT_DIR"]=os.path.join(pilotWorkingDirectory, 'etc/grid-security/certificates') # TODO: structure the output print '===========================================================' print 'Environment of execution host' for key in os.environ.keys(): print key + '=' + os.environ[key] print '===========================================================' except Exception, x: print >> sys.stderr, x sys.exit(-1) cmd = "python %(pilotScript)s %(pilotOptions)s" print 'Executing: ', cmd sys.stdout.flush() os.system( cmd ) shutil.rmtree( pilotWorkingDirectory ) EOF """ % { 'compressedAndEncodedProxy': compressedAndEncodedProxy, 'compressedAndEncodedPilot': compressedAndEncodedPilot, 'compressedAndEncodedInstall': compressedAndEncodedInstall, 'httpProxy': httpProxy, 'pilotScript': os.path.basename(self.pilot), 'installScript': os.path.basename(self.install), 'pilotOptions': ' '.join( pilotOptions ), 'pilotExecDir': pilotExecDir } fd, name = tempfile.mkstemp( suffix = '_pilotwrapper.py', prefix = 'DIRAC_', dir=workingDirectory) pilotWrapper = os.fdopen(fd, 'w') pilotWrapper.write( localPilot ) pilotWrapper.close() return name def _getPilotProxyFromDIRACGroup( self, ownerDN, ownerGroup, requiredTimeLeft ): """ Download a limited pilot proxy with VOMS extensions depending on the group """ #Assign VOMS attribute vomsAttr = CS.getVOMSAttributeForGroup( ownerGroup ) if not vomsAttr: self.log.info( "Downloading a proxy without VOMS extensions for %s@%s" % ( ownerDN, ownerGroup ) ) return gProxyManager.downloadProxy( ownerDN, ownerGroup, limited = True, requiredTimeLeft = requiredTimeLeft ) else: self.log.info( "Downloading a proxy with '%s' VOMS extension for %s@%s" % ( vomsAttr, ownerDN, ownerGroup ) ) return gProxyManager.downloadVOMSProxy( ownerDN, ownerGroup, limited = True, requiredTimeLeft = requiredTimeLeft, requiredVOMSAttribute = vomsAttr )
class Limiter(object): def __init__(self, jobDB=None, opsHelper=None): """ Constructor """ self.__runningLimitSection = "JobScheduling/RunningLimit" self.__matchingDelaySection = "JobScheduling/MatchingDelay" self.csDictCache = DictCache() self.condCache = DictCache() self.delayMem = {} if jobDB: self.jobDB = jobDB else: self.jobDB = JobDB() self.log = gLogger.getSubLogger("Limiter") if opsHelper: self.__opsHelper = opsHelper else: self.__opsHelper = Operations() def getNegativeCond(self): """ Get negative condition for ALL sites """ orCond = self.condCache.get("GLOBAL") if orCond: return orCond negCond = {} # Run Limit result = self.__opsHelper.getSections(self.__runningLimitSection) sites = [] if result['OK']: sites = result['Value'] for siteName in sites: result = self.__getRunningCondition(siteName) if not result['OK']: continue data = result['Value'] if data: negCond[siteName] = data # Delay limit result = self.__opsHelper.getSections(self.__matchingDelaySection) sites = [] if result['OK']: sites = result['Value'] for siteName in sites: result = self.__getDelayCondition(siteName) if not result['OK']: continue data = result['Value'] if not data: continue if siteName in negCond: negCond[siteName] = self.__mergeCond(negCond[siteName], data) else: negCond[siteName] = data orCond = [] for siteName in negCond: negCond[siteName]['Site'] = siteName orCond.append(negCond[siteName]) self.condCache.add("GLOBAL", 10, orCond) return orCond def getNegativeCondForSite(self, siteName): """ Generate a negative query based on the limits set on the site """ # Check if Limits are imposed onto the site negativeCond = {} if self.__opsHelper.getValue("JobScheduling/CheckJobLimits", True): result = self.__getRunningCondition(siteName) if result['OK']: negativeCond = result['Value'] self.log.verbose('Negative conditions for site %s after checking limits are: %s' % (siteName, str(negativeCond))) if self.__opsHelper.getValue("JobScheduling/CheckMatchingDelay", True): result = self.__getDelayCondition(siteName) if result['OK']: delayCond = result['Value'] self.log.verbose('Negative conditions for site %s after delay checking are: %s' % (siteName, str(delayCond))) negativeCond = self.__mergeCond(negativeCond, delayCond) if negativeCond: self.log.info('Negative conditions for site %s are: %s' % (siteName, str(negativeCond))) return negativeCond def __mergeCond(self, negCond, addCond): """ Merge two negative dicts """ # Merge both negative dicts for attr in addCond: if attr not in negCond: negCond[attr] = [] for value in addCond[attr]: if value not in negCond[attr]: negCond[attr].append(value) return negCond def __extractCSData(self, section): """ Extract limiting information from the CS in the form: { 'JobType' : { 'Merge' : 20, 'MCGen' : 1000 } } """ stuffDict = self.csDictCache.get(section) if stuffDict: return S_OK(stuffDict) result = self.__opsHelper.getSections(section) if not result['OK']: return result attribs = result['Value'] stuffDict = {} for attName in attribs: result = self.__opsHelper.getOptionsDict("%s/%s" % (section, attName)) if not result['OK']: return result attLimits = result['Value'] try: attLimits = dict([(k, int(attLimits[k])) for k in attLimits]) except Exception as excp: errMsg = "%s/%s has to contain numbers: %s" % (section, attName, str(excp)) self.log.error(errMsg) return S_ERROR(errMsg) stuffDict[attName] = attLimits self.csDictCache.add(section, 300, stuffDict) return S_OK(stuffDict) def __getRunningCondition(self, siteName): """ Get extra conditions allowing site throttling """ siteSection = "%s/%s" % (self.__runningLimitSection, siteName) result = self.__extractCSData(siteSection) if not result['OK']: return result limitsDict = result['Value'] # limitsDict is something like { 'JobType' : { 'Merge' : 20, 'MCGen' : 1000 } } if not limitsDict: return S_OK({}) # Check if the site exceeding the given limits negCond = {} for attName in limitsDict: if attName not in self.jobDB.jobAttributeNames: self.log.error("Attribute %s does not exist. Check the job limits" % attName) continue cK = "Running:%s:%s" % (siteName, attName) data = self.condCache.get(cK) if not data: result = self.jobDB.getCounters( 'Jobs', [attName], { 'Site': siteName, 'Status': [ 'Running', 'Matched', 'Stalled']}) if not result['OK']: return result data = result['Value'] data = dict([(k[0][attName], k[1]) for k in data]) self.condCache.add(cK, 10, data) for attValue in limitsDict[attName]: limit = limitsDict[attName][attValue] running = data.get(attValue, 0) if running >= limit: self.log.verbose('Job Limit imposed at %s on %s/%s=%d,' ' %d jobs already deployed' % (siteName, attName, attValue, limit, running)) if attName not in negCond: negCond[attName] = [] negCond[attName].append(attValue) # negCond is something like : {'JobType': ['Merge']} return S_OK(negCond) def updateDelayCounters(self, siteName, jid): # Get the info from the CS siteSection = "%s/%s" % (self.__matchingDelaySection, siteName) result = self.__extractCSData(siteSection) if not result['OK']: return result delayDict = result['Value'] # limitsDict is something like { 'JobType' : { 'Merge' : 20, 'MCGen' : 1000 } } if not delayDict: return S_OK() attNames = [] for attName in delayDict: if attName not in self.jobDB.jobAttributeNames: self.log.error("Attribute %s does not exist in the JobDB. Please fix it!" % attName) else: attNames.append(attName) result = self.jobDB.getJobAttributes(jid, attNames) if not result['OK']: self.log.error("While retrieving attributes coming from %s: %s" % (siteSection, result['Message'])) return result atts = result['Value'] # Create the DictCache if not there if siteName not in self.delayMem: self.delayMem[siteName] = DictCache() # Update the counters delayCounter = self.delayMem[siteName] for attName in atts: attValue = atts[attName] if attValue in delayDict[attName]: delayTime = delayDict[attName][attValue] self.log.notice("Adding delay for %s/%s=%s of %s secs" % (siteName, attName, attValue, delayTime)) delayCounter.add((attName, attValue), delayTime) return S_OK() def __getDelayCondition(self, siteName): """ Get extra conditions allowing matching delay """ if siteName not in self.delayMem: return S_OK({}) lastRun = self.delayMem[siteName].getKeys() negCond = {} for attName, attValue in lastRun: if attName not in negCond: negCond[attName] = [] negCond[attName].append(attValue) return S_OK(negCond)
class ProxyManagerClient(object): def __init__(self): self.__usersCache = DictCache() self.__proxiesCache = DictCache() self.__vomsProxiesCache = DictCache() self.__pilotProxiesCache = DictCache() self.__filesCache = DictCache(self.__deleteTemporalFile) def __deleteTemporalFile(self, filename): """ Delete temporal file :param str filename: path to file """ try: os.remove(filename) except Exception: pass def clearCaches(self): """ Clear caches """ self.__usersCache.purgeAll() self.__proxiesCache.purgeAll() self.__vomsProxiesCache.purgeAll() self.__pilotProxiesCache.purgeAll() def __getSecondsLeftToExpiration(self, expiration, utc=True): """ Get time left to expiration in a seconds :param datetime expiration: :param boolean utc: time in utc :return: datetime """ if utc: td = expiration - datetime.datetime.utcnow() else: td = expiration - datetime.datetime.now() return td.days * 86400 + td.seconds def __refreshUserCache(self, validSeconds=0): """ Refresh user cache :param int validSeconds: required seconds the proxy is valid for :return: S_OK()/S_ERROR() """ rpcClient = RPCClient("Framework/ProxyManager", timeout=120) retVal = rpcClient.getRegisteredUsers(validSeconds) if not retVal['OK']: return retVal data = retVal['Value'] # Update the cache for record in data: cacheKey = (record['DN'], record['group']) self.__usersCache.add(cacheKey, self.__getSecondsLeftToExpiration(record['expirationtime']), record) return S_OK() @gUsersSync def userHasProxy(self, userDN, userGroup, validSeconds=0): """ Check if a user(DN-group) has a proxy in the proxy management Updates internal cache if needed to minimize queries to the service :param str userDN: user DN :param str userGroup: user group :param int validSeconds: proxy valid time in a seconds :return: S_OK()/S_ERROR() """ # For backward compatibility reasons with versions prior to v7r1 # we need to check for proxy with a group # AND for groupless proxy even if not specified cacheKeys = ((userDN, userGroup), (userDN, '')) for cacheKey in cacheKeys: if self.__usersCache.exists(cacheKey, validSeconds): return S_OK(True) # Get list of users from the DB with proxys at least 300 seconds gLogger.verbose("Updating list of users in proxy management") retVal = self.__refreshUserCache(validSeconds) if not retVal['OK']: return retVal for cacheKey in cacheKeys: if self.__usersCache.exists(cacheKey, validSeconds): return S_OK(True) return S_OK(False) @gUsersSync def getUserPersistence(self, userDN, userGroup, validSeconds=0): """ Check if a user(DN-group) has a proxy in the proxy management Updates internal cache if needed to minimize queries to the service :param str userDN: user DN :param str userGroup: user group :param int validSeconds: proxy valid time in a seconds :return: S_OK()/S_ERROR() """ cacheKey = (userDN, userGroup) userData = self.__usersCache.get(cacheKey, validSeconds) if userData: if userData['persistent']: return S_OK(True) # Get list of users from the DB with proxys at least 300 seconds gLogger.verbose("Updating list of users in proxy management") retVal = self.__refreshUserCache(validSeconds) if not retVal['OK']: return retVal userData = self.__usersCache.get(cacheKey, validSeconds) if userData: return S_OK(userData['persistent']) return S_OK(False) def setPersistency(self, userDN, userGroup, persistent): """ Set the persistency for user/group :param str userDN: user DN :param str userGroup: user group :param boolean persistent: presistent flag :return: S_OK()/S_ERROR() """ # Hack to ensure bool in the rpc call persistentFlag = True if not persistent: persistentFlag = False rpcClient = RPCClient("Framework/ProxyManager", timeout=120) retVal = rpcClient.setPersistency(userDN, userGroup, persistentFlag) if not retVal['OK']: return retVal # Update internal persistency cache cacheKey = (userDN, userGroup) record = self.__usersCache.get(cacheKey, 0) if record: record['persistent'] = persistentFlag self.__usersCache.add(cacheKey, self.__getSecondsLeftToExpiration(record['expirationtime']), record) return retVal def uploadProxy(self, proxy=None, restrictLifeTime=0, rfcIfPossible=False): """ Upload a proxy to the proxy management service using delegation :param X509Chain proxy: proxy as a chain :param int restrictLifeTime: proxy live time in a seconds :param boolean rfcIfPossible: make rfc proxy if possible :return: S_OK(dict)/S_ERROR() -- dict contain proxies """ # Discover proxy location if isinstance(proxy, X509Chain): chain = proxy proxyLocation = "" else: if not proxy: proxyLocation = Locations.getProxyLocation() if not proxyLocation: return S_ERROR("Can't find a valid proxy") elif isinstance(proxy, six.string_types): proxyLocation = proxy else: return S_ERROR("Can't find a valid proxy") chain = X509Chain() result = chain.loadProxyFromFile(proxyLocation) if not result['OK']: return S_ERROR("Can't load %s: %s " % (proxyLocation, result['Message'])) # Make sure it's valid if chain.hasExpired().get('Value'): return S_ERROR("Proxy %s has expired" % proxyLocation) if chain.getDIRACGroup().get('Value') or chain.isVOMS().get('Value'): return S_ERROR("Cannot upload proxy with DIRAC group or VOMS extensions") rpcClient = RPCClient("Framework/ProxyManager", timeout=120) # Get a delegation request result = rpcClient.requestDelegationUpload(chain.getRemainingSecs()['Value']) if not result['OK']: return result reqDict = result['Value'] # Generate delegated chain chainLifeTime = chain.getRemainingSecs()['Value'] - 60 if restrictLifeTime and restrictLifeTime < chainLifeTime: chainLifeTime = restrictLifeTime retVal = chain.generateChainFromRequestString(reqDict['request'], lifetime=chainLifeTime, rfc=rfcIfPossible) if not retVal['OK']: return retVal # Upload! result = rpcClient.completeDelegationUpload(reqDict['id'], retVal['Value']) if not result['OK']: return result return S_OK(result.get('proxies') or result['Value']) @gProxiesSync def downloadProxy(self, userDN, userGroup, limited=False, requiredTimeLeft=1200, cacheTime=14400, proxyToConnect=None, token=None): """ Get a proxy Chain from the proxy management :param str userDN: user DN :param str userGroup: user group :param boolean limited: if need limited proxy :param int requiredTimeLeft: required proxy live time in a seconds :param int cacheTime: store in a cache time in a seconds :param X509Chain proxyToConnect: proxy as a chain :param str token: valid token to get a proxy :return: S_OK(X509Chain)/S_ERROR() """ cacheKey = (userDN, userGroup) if self.__proxiesCache.exists(cacheKey, requiredTimeLeft): return S_OK(self.__proxiesCache.get(cacheKey)) req = X509Request() req.generateProxyRequest(limited=limited) if proxyToConnect: rpcClient = RPCClient("Framework/ProxyManager", proxyChain=proxyToConnect, timeout=120) else: rpcClient = RPCClient("Framework/ProxyManager", timeout=120) if token: retVal = rpcClient.getProxyWithToken(userDN, userGroup, req.dumpRequest()['Value'], int(cacheTime + requiredTimeLeft), token) else: retVal = rpcClient.getProxy(userDN, userGroup, req.dumpRequest()['Value'], int(cacheTime + requiredTimeLeft)) if not retVal['OK']: return retVal chain = X509Chain(keyObj=req.getPKey()) retVal = chain.loadChainFromString(retVal['Value']) if not retVal['OK']: return retVal self.__proxiesCache.add(cacheKey, chain.getRemainingSecs()['Value'], chain) return S_OK(chain) def downloadProxyToFile(self, userDN, userGroup, limited=False, requiredTimeLeft=1200, cacheTime=14400, filePath=None, proxyToConnect=None, token=None): """ Get a proxy Chain from the proxy management and write it to file :param str userDN: user DN :param str userGroup: user group :param boolean limited: if need limited proxy :param int requiredTimeLeft: required proxy live time in a seconds :param int cacheTime: store in a cache time in a seconds :param str filePath: path to save proxy :param X509Chain proxyToConnect: proxy as a chain :param str token: valid token to get a proxy :return: S_OK(X509Chain)/S_ERROR() """ retVal = self.downloadProxy(userDN, userGroup, limited, requiredTimeLeft, cacheTime, proxyToConnect, token) if not retVal['OK']: return retVal chain = retVal['Value'] retVal = self.dumpProxyToFile(chain, filePath) if not retVal['OK']: return retVal retVal['chain'] = chain return retVal @gVOMSProxiesSync def downloadVOMSProxy(self, userDN, userGroup, limited=False, requiredTimeLeft=1200, cacheTime=14400, requiredVOMSAttribute=None, proxyToConnect=None, token=None): """ Download a proxy if needed and transform it into a VOMS one :param str userDN: user DN :param str userGroup: user group :param boolean limited: if need limited proxy :param int requiredTimeLeft: required proxy live time in a seconds :param int cacheTime: store in a cache time in a seconds :param str requiredVOMSAttribute: VOMS attr to add to the proxy :param X509Chain proxyToConnect: proxy as a chain :param str token: valid token to get a proxy :return: S_OK(X509Chain)/S_ERROR() """ cacheKey = (userDN, userGroup, requiredVOMSAttribute, limited) if self.__vomsProxiesCache.exists(cacheKey, requiredTimeLeft): return S_OK(self.__vomsProxiesCache.get(cacheKey)) req = X509Request() req.generateProxyRequest(limited=limited) if proxyToConnect: rpcClient = RPCClient("Framework/ProxyManager", proxyChain=proxyToConnect, timeout=120) else: rpcClient = RPCClient("Framework/ProxyManager", timeout=120) if token: retVal = rpcClient.getVOMSProxyWithToken(userDN, userGroup, req.dumpRequest()['Value'], int(cacheTime + requiredTimeLeft), token, requiredVOMSAttribute) else: retVal = rpcClient.getVOMSProxy(userDN, userGroup, req.dumpRequest()['Value'], int(cacheTime + requiredTimeLeft), requiredVOMSAttribute) if not retVal['OK']: return retVal chain = X509Chain(keyObj=req.getPKey()) retVal = chain.loadChainFromString(retVal['Value']) if not retVal['OK']: return retVal self.__vomsProxiesCache.add(cacheKey, chain.getRemainingSecs()['Value'], chain) return S_OK(chain) def downloadVOMSProxyToFile(self, userDN, userGroup, limited=False, requiredTimeLeft=1200, cacheTime=14400, requiredVOMSAttribute=None, filePath=None, proxyToConnect=None, token=None): """ Download a proxy if needed, transform it into a VOMS one and write it to file :param str userDN: user DN :param str userGroup: user group :param boolean limited: if need limited proxy :param int requiredTimeLeft: required proxy live time in a seconds :param int cacheTime: store in a cache time in a seconds :param str requiredVOMSAttribute: VOMS attr to add to the proxy :param str filePath: path to save proxy :param X509Chain proxyToConnect: proxy as a chain :param str token: valid token to get a proxy :return: S_OK(X509Chain)/S_ERROR() """ retVal = self.downloadVOMSProxy(userDN, userGroup, limited, requiredTimeLeft, cacheTime, requiredVOMSAttribute, proxyToConnect, token) if not retVal['OK']: return retVal chain = retVal['Value'] retVal = self.dumpProxyToFile(chain, filePath) if not retVal['OK']: return retVal retVal['chain'] = chain return retVal def getPilotProxyFromDIRACGroup(self, userDN, userGroup, requiredTimeLeft=43200, proxyToConnect=None): """ Download a pilot proxy with VOMS extensions depending on the group :param str userDN: user DN :param str userGroup: user group :param int requiredTimeLeft: required proxy live time in seconds :param X509Chain proxyToConnect: proxy as a chain :return: S_OK(X509Chain)/S_ERROR() """ # Assign VOMS attribute vomsAttr = Registry.getVOMSAttributeForGroup(userGroup) if not vomsAttr: gLogger.warn("No voms attribute assigned to group %s when requested pilot proxy" % userGroup) return self.downloadProxy(userDN, userGroup, limited=False, requiredTimeLeft=requiredTimeLeft, proxyToConnect=proxyToConnect) else: return self.downloadVOMSProxy(userDN, userGroup, limited=False, requiredTimeLeft=requiredTimeLeft, requiredVOMSAttribute=vomsAttr, proxyToConnect=proxyToConnect) def getPilotProxyFromVOMSGroup(self, userDN, vomsAttr, requiredTimeLeft=43200, proxyToConnect=None): """ Download a pilot proxy with VOMS extensions depending on the group :param str userDN: user DN :param str vomsAttr: VOMS attribute :param int requiredTimeLeft: required proxy live time in a seconds :param X509Chain proxyToConnect: proxy as a chain :return: S_OK(X509Chain)/S_ERROR() """ groups = Registry.getGroupsWithVOMSAttribute(vomsAttr) if not groups: return S_ERROR("No group found that has %s as voms attrs" % vomsAttr) for userGroup in groups: result = self.downloadVOMSProxy(userDN, userGroup, limited=False, requiredTimeLeft=requiredTimeLeft, requiredVOMSAttribute=vomsAttr, proxyToConnect=proxyToConnect) if result['OK']: return result return result def getPayloadProxyFromDIRACGroup(self, userDN, userGroup, requiredTimeLeft, token=None, proxyToConnect=None): """ Download a payload proxy with VOMS extensions depending on the group :param str userDN: user DN :param str userGroup: user group :param int requiredTimeLeft: required proxy live time in a seconds :param str token: valid token to get a proxy :param X509Chain proxyToConnect: proxy as a chain :return: S_OK(X509Chain)/S_ERROR() """ # Assign VOMS attribute vomsAttr = Registry.getVOMSAttributeForGroup(userGroup) if not vomsAttr: gLogger.verbose("No voms attribute assigned to group %s when requested payload proxy" % userGroup) return self.downloadProxy(userDN, userGroup, limited=True, requiredTimeLeft=requiredTimeLeft, proxyToConnect=proxyToConnect, token=token) else: return self.downloadVOMSProxy(userDN, userGroup, limited=True, requiredTimeLeft=requiredTimeLeft, requiredVOMSAttribute=vomsAttr, proxyToConnect=proxyToConnect, token=token) def getPayloadProxyFromVOMSGroup(self, userDN, vomsAttr, token, requiredTimeLeft, proxyToConnect=None): """ Download a payload proxy with VOMS extensions depending on the VOMS attr :param str userDN: user DN :param str vomsAttr: VOMS attribute :param str token: valid token to get a proxy :param int requiredTimeLeft: required proxy live time in a seconds :param X509Chain proxyToConnect: proxy as a chain :return: S_OK(X509Chain)/S_ERROR() """ groups = Registry.getGroupsWithVOMSAttribute(vomsAttr) if not groups: return S_ERROR("No group found that has %s as voms attrs" % vomsAttr) userGroup = groups[0] return self.downloadVOMSProxy(userDN, userGroup, limited=True, requiredTimeLeft=requiredTimeLeft, requiredVOMSAttribute=vomsAttr, proxyToConnect=proxyToConnect, token=token) def dumpProxyToFile(self, chain, destinationFile=None, requiredTimeLeft=600): """ Dump a proxy to a file. It's cached so multiple calls won't generate extra files :param X509Chain chain: proxy as a chain :param str destinationFile: path to store proxy :param int requiredTimeLeft: required proxy live time in a seconds :return: S_OK(str)/S_ERROR() """ result = chain.hash() if not result['OK']: return result cHash = result['Value'] if self.__filesCache.exists(cHash, requiredTimeLeft): filepath = self.__filesCache.get(cHash) if filepath and os.path.isfile(filepath): return S_OK(filepath) self.__filesCache.delete(cHash) retVal = chain.dumpAllToFile(destinationFile) if not retVal['OK']: return retVal filename = retVal['Value'] self.__filesCache.add(cHash, chain.getRemainingSecs()['Value'], filename) return S_OK(filename) def deleteGeneratedProxyFile(self, chain): """ Delete a file generated by a dump :param X509Chain chain: proxy as a chain :return: S_OK() """ self.__filesCache.delete(chain) return S_OK() def deleteProxyBundle(self, idList): """ delete a list of id's :param list,tuple idList: list of identity numbers :return: S_OK(int)/S_ERROR() """ rpcClient = RPCClient("Framework/ProxyManager", timeout=120) return rpcClient.deleteProxyBundle(idList) def requestToken(self, requesterDN, requesterGroup, numUses=1): """ Request a number of tokens. usesList must be a list of integers and each integer is the number of uses a token must have :param str requesterDN: user DN :param str requesterGroup: user group :param int numUses: number of uses :return: S_OK(tuple)/S_ERROR() -- tuple contain token, number uses """ rpcClient = RPCClient("Framework/ProxyManager", timeout=120) return rpcClient.generateToken(requesterDN, requesterGroup, numUses) def renewProxy(self, proxyToBeRenewed=None, minLifeTime=3600, newProxyLifeTime=43200, proxyToConnect=None): """ Renew a proxy using the ProxyManager :param X509Chain proxyToBeRenewed: proxy to renew :param int minLifeTime: if proxy life time is less than this, renew. Skip otherwise :param int newProxyLifeTime: life time of new proxy :param X509Chain proxyToConnect: proxy to use for connecting to the service :return: S_OK(X509Chain)/S_ERROR() """ retVal = multiProxyArgument(proxyToBeRenewed) if not retVal['Value']: return retVal proxyToRenewDict = retVal['Value'] secs = proxyToRenewDict['chain'].getRemainingSecs()['Value'] if secs > minLifeTime: deleteMultiProxy(proxyToRenewDict) return S_OK() if not proxyToConnect: proxyToConnectDict = {'chain': False, 'tempFile': False} else: retVal = multiProxyArgument(proxyToConnect) if not retVal['Value']: deleteMultiProxy(proxyToRenewDict) return retVal proxyToConnectDict = retVal['Value'] userDN = proxyToRenewDict['chain'].getIssuerCert()['Value'].getSubjectDN()['Value'] retVal = proxyToRenewDict['chain'].getDIRACGroup() if not retVal['OK']: deleteMultiProxy(proxyToRenewDict) deleteMultiProxy(proxyToConnectDict) return retVal userGroup = retVal['Value'] limited = proxyToRenewDict['chain'].isLimitedProxy()['Value'] voms = VOMS() retVal = voms.getVOMSAttributes(proxyToRenewDict['chain']) if not retVal['OK']: deleteMultiProxy(proxyToRenewDict) deleteMultiProxy(proxyToConnectDict) return retVal vomsAttrs = retVal['Value'] if vomsAttrs: retVal = self.downloadVOMSProxy(userDN, userGroup, limited=limited, requiredTimeLeft=newProxyLifeTime, requiredVOMSAttribute=vomsAttrs[0], proxyToConnect=proxyToConnectDict['chain']) else: retVal = self.downloadProxy(userDN, userGroup, limited=limited, requiredTimeLeft=newProxyLifeTime, proxyToConnect=proxyToConnectDict['chain']) deleteMultiProxy(proxyToRenewDict) deleteMultiProxy(proxyToConnectDict) if not retVal['OK']: return retVal chain = retVal['Value'] if not proxyToRenewDict['tempFile']: return chain.dumpAllToFile(proxyToRenewDict['file']) return S_OK(chain) def getDBContents(self, condDict={}, sorting=[['UserDN', 'DESC']], start=0, limit=0): """ Get the contents of the db :param dict condDict: search condition :return: S_OK(dict)/S_ERROR() -- dict contain fields, record list, total records """ rpcClient = RPCClient("Framework/ProxyManager", timeout=120) return rpcClient.getContents(condDict, sorting, start, limit) def getVOMSAttributes(self, chain): """ Get the voms attributes for a chain :param X509Chain chain: proxy as a chain :return: S_OK(str)/S_ERROR() """ return VOMS().getVOMSAttributes(chain) def getUploadedProxyLifeTime(self, DN, group): """ Get the remaining seconds for an uploaded proxy :param str DN: user DN :param str group: group :return: S_OK(int)/S_ERROR() """ result = self.getDBContents({'UserDN': [DN], 'UserGroup': [group]}) if not result['OK']: return result data = result['Value'] if len(data['Records']) == 0: return S_OK(0) pNames = list(data['ParameterNames']) dnPos = pNames.index('UserDN') groupPos = pNames.index('UserGroup') expiryPos = pNames.index('ExpirationTime') for row in data['Records']: if DN == row[dnPos] and group == row[groupPos]: td = row[expiryPos] - datetime.datetime.utcnow() secondsLeft = td.days * 86400 + td.seconds return S_OK(max(0, secondsLeft)) return S_OK(0) def getUserProxiesInfo(self): """ Get the user proxies uploaded info :return: S_OK(dict)/S_ERROR() """ result = RPCClient("Framework/ProxyManager", timeout=120).getUserProxiesInfo() if 'rpcStub' in result: result.pop('rpcStub') return result
def __init__(self): self.__usersCache = DictCache() self.__proxiesCache = DictCache() self.__vomsProxiesCache = DictCache() self.__pilotProxiesCache = DictCache() self.__filesCache = DictCache(self.__deleteTemporalFile)
class CredentialsClient: CONSUMER_GRACE_TIME = 3600 REQUEST_GRACE_TIME = 900 def __init__( self, RPCFunctor = None ): if not RPCFunctor: self.__RPCFunctor = RPCClient else: self.__RPCFunctor = RPCFunctor self.__tokens = DictCache() self.__requests = DictCache() self.__consumers = DictCache( deleteFunction = self.__cleanConsumerCache ) def __getRPC( self ): return self.__RPCFunctor( "WebAPI/Credentials" ) def __cleanReturn( self, result ): if 'rpcStub' in result: result.pop( 'rpcStub' ) return result ## # Consumer ## def generateConsumerPair( self, name, callback, icon, consumerKey = "" ): result = self.__getRPC().generateConsumerPair( name, callback, icon, consumerKey ) if not result[ 'OK' ]: return self.__cleanReturn( result ) self.__consumers.add( consumerKey, self.CONSUMER_GRACE_TIME, result[ 'Value' ] ) return self.__cleanReturn( result ) def getConsumerData( self, consumerKey ): cData = self.__consumers.get( consumerKey ) if cData: return S_OK( cData ) result = self.__getRPC().getConsumerData( consumerKey ) if not result[ 'OK' ]: return self.__cleanReturn( result ) self.__consumers.add( consumerKey, self.CONSUMER_GRACE_TIME, result[ 'Value' ] ) return self.__cleanReturn( result ) def deleteConsumer( self, consumerKey ): self.__consumers.delete( consumerKey ) result = self.__getRPC().deleteConsumer( consumerKey ) if result[ 'OK' ]: self.__cleanConsumerCache( { 'key' : consumerKey } ) return self.__cleanReturn( result ) def getAllConsumers( self ): result = self.__getRPC().getAllConsumers() if not result[ 'OK' ]: return self.__cleanReturn( result ) data = result[ 'Value' ] consIndex = { 'key': 0, 'name' : 0, 'callback' : 0, 'secret' : 0, 'icon' : 0 } for key in consIndex: consIndex[ key ] = data[ 'Parameters' ].find( key ) for record in data[ 'Records' ]: consData = {} for key in consIndex: consData[ key ] = record[ consIndex[ key ] ] self.__consumers.add( consData[ 'key' ], self.CONSUMER_GRACE_TIME, consData ) return self.__cleanReturn( result ) def __cleanConsumerCache( self, cData ): consumerKey = cData[ 'key' ] for dc in ( self.__tokens, self.__requests ): cKeys = dc.getKeys() for cKey in cKeys: if cKey[0] == consumerKey: dc.delete( cKey ) ## # Requests ## def generateRequest( self, consumerKey, callback = "" ): result = self.__getRPC().generateRequest( consumerKey, callback ) if not result[ 'OK' ]: return self.__cleanReturn( result ) requestData = result[ 'Value' ] self.__requests.add( requestData[ 'request' ], result[ 'lifeTime' ] - 5, requestData ) return self.__cleanReturn( result ) def getRequestData( self, request ): data = self.__requests.get( request ) if data: return S_OK( data ) result = self.__getRPC().getRequestData( request ) if not result[ 'OK' ]: return self.__cleanReturn( result ) self.__tokens.add( request, result[ 'lifeTime' ] - 5, result[ 'Value' ] ) return self.__cleanReturn( result ) def deleteRequest( self, request ): result = self.__getRPC().deleteRequest( request ) if not result[ 'OK' ]: return self.__cleanReturn( result ) cKeys = self.__requests.getKeys() for cKey in cKeys: if cKey[1] == request: self.__requests.delete( cKey ) return self.__cleanReturn( result ) ## # Verifiers ## def generateVerifier( self, consumerKey, request, userDN, userGroup, lifeTime = 3600 ): result = self.__getRPC().generateVerifier( consumerKey, request, userDN, userGroup, lifeTime ) return self.__cleanReturn( result ) def getVerifierData( self, verifier ): result = self.__getRPC().getVerifierData( verifier ) return self.__cleanReturn( result ) def deleteVerifier( self, verifier ): result = self.__getRPC().deleteVerifier( verifier ) return self.__cleanReturn( result ) def findVerifier( self, consumerKey, request ): result = self.__getRPC().findVerifier( consumerKey, request ) return self.__cleanReturn( result ) def setVerifierProperties( self, consumerKey, request, verifier, userDN, userGroup, lifeTime ): result = self.__getRPC().setVerifierProperties( consumerKey, request, verifier, userDN, userGroup, lifeTime ) return self.__cleanReturn( result ) ## # Tokens ## def generateToken( self, consumerKey, request, verifier ): result = self.__getRPC().generateToken( consumerKey, request, verifier ) if not result[ 'OK' ]: return self.__cleanReturn( result ) tokenData = result[ 'Value' ] cKey = ( consumerKey, tokenData[ 'token' ] ) self.__tokens.add( cKey, tokenData[ 'lifeTime' ] - 5, tokenData ) return S_OK( tokenData ) def getTokenData( self, consumerKey, token ): cKey = ( consumerKey, token ) tokenData = self.__tokens.get( cKey ) if tokenData: return S_OK( tokenData ) result = self.__getRPC().getTokenData( consumerKey, token ) if not result[ 'OK' ]: return self.__cleanReturn( result ) tokenData = result[ 'Value' ] self.__tokens.add( cKey, tokenData[ 'lifeTime' ] - 5, tokenData ) return self.__cleanReturn( result ) def revokeUserToken( self, userDN, userGroup, token ): result = self.__getRPC().revokeUserToken( userDN, userGroup, token ) if not result[ 'OK' ]: return self.__cleanReturn( result ) cKeys = self.__tokens.getKeys() for cKey in cKeys: if cKey[0] == userDN and cKey[1] == userGroup and cKey[3] == token: self.__tokens.delete( cKey ) return self.__cleanReturn( result ) def revokeToken( self, token ): result = self.__getRPC().revokeToken( token ) if not result[ 'OK' ]: return self.__cleanReturn( result ) cKeys = self.__tokens.getKeys() for cKey in cKeys: if cKey[3] == token: self.__tokens.delete( cKey ) return self.__cleanReturn( result ) def cleanExpired( self ): return self.__getRPC().cleanExpired() def getTokens( self, condDict = {} ): result = self.__getRPC().getTokens( condDict ) if not result[ 'OK' ]: return self.__cleanReturn( result ) params = result[ 'Value' ][ 'Parameters'] data = result[ 'Value' ][ 'Records' ] consumerKey = "unknown" token = unknown lifeTime = 0 for record in data: tokenData = {} for iPos in range( len( params ) ): if params[iPos] == "UserDN": tokenData[ 'userDN' ] = record[iPos] elif params[iPos] == "UserGroup": tokenData[ 'userGroup' ] = record[iPos] elif params[iPos] == "ConsumerKey": consumerKey = record[iPos] elif params[iPos] == "Token": token = record[iPos] elif params[iPos] == "Secret": tokenData[ 'secret' ] = record[iPos] elif params[iPos] == "LifeTime": tokenData[ 'lifeTime' ] = record[iPos] lifeTime = record[ iPos ] self.__tokens.add( ( consumerKey, token ), tokenData[ 'lifeTime' ], tokenData ) return self.__cleanReturn( result )
def __init__( self ): self.__usersCache = DictCache() self.__proxiesCache = DictCache() self.__vomsProxiesCache = DictCache() self.__pilotProxiesCache = DictCache() self.__filesCache = DictCache( self.__deleteTemporalFile )
class Cache(object): """ Cache basic class. WARNING: None of its methods is thread safe. Acquire / Release lock when using them ! """ def __init__(self, lifeTime, updateFunc): """ Constructor :Parameters: **lifeTime** - `int` Lifetime of the elements in the cache ( seconds ! ) **updateFunc** - `function` This function MUST return a S_OK | S_ERROR object. In the case of the first, its value must be a dictionary. """ # We set a 20% of the lifetime randomly, so that if we have thousands of jobs # starting at the same time, all the caches will not end at the same time. randomLifeTimeBias = 0.2 * random.random() self.log = gLogger.getSubLogger(self.__class__.__name__) self.__lifeTime = int(lifeTime * (1 + randomLifeTimeBias)) self.__updateFunc = updateFunc # The records returned from the cache must be valid at least 30 seconds. self.__validSeconds = 30 # Cache self.__cache = DictCache() self.__cacheLock = LockRing() self.__cacheLock.getLock(self.__class__.__name__) #............................................................................. # internal cache object getter def cacheKeys(self): """ Cache keys getter :returns: list with valid keys on the cache """ return self.__cache.getKeys(validSeconds=self.__validSeconds) #............................................................................. # acquire / release Locks def acquireLock(self): """ Acquires Cache lock """ self.__cacheLock.acquire(self.__class__.__name__) def releaseLock(self): """ Releases Cache lock """ self.__cacheLock.release(self.__class__.__name__) #............................................................................. # Cache getters def get(self, cacheKeys): """ Gets values for cacheKeys given, if all are found ( present on the cache and valid ), returns S_OK with the results. If any is not neither present not valid, returns S_ERROR. :Parameters: **cacheKeys** - `list` list of keys to be extracted from the cache :return: S_OK | S_ERROR """ result = {} for cacheKey in cacheKeys: cacheRow = self.__cache.get(cacheKey, validSeconds=self.__validSeconds) if not cacheRow: self.log.error(str(cacheKey)) return S_ERROR('Cannot get %s' % str(cacheKey)) result.update({cacheKey: cacheRow}) return S_OK(result) #............................................................................. # Cache refreshers def refreshCache(self): """ Purges the cache and gets fresh data from the update function. :return: S_OK | S_ERROR. If the first, its content is the new cache. """ self.log.verbose('refreshing...') self.__cache.purgeAll() newCache = self.__updateFunc() if not newCache['OK']: self.log.error(newCache['Message']) return newCache newCache = self.__updateCache(newCache['Value']) self.log.verbose('refreshed') return newCache #............................................................................. # Private methods def __updateCache(self, newCache): """ Given the new cache dictionary, updates the internal cache with it. It sets a duration to the entries of <self.__lifeTime> seconds. :Parameters: **newCache** - `dict` dictionary containing a new cache :return: dictionary. It is newCache argument. """ for cacheKey, cacheValue in newCache.items(): self.__cache.add(cacheKey, self.__lifeTime, value=cacheValue) # We are assuming nothing will fail while inserting in the cache. There is # no apparent reason to suspect from that piece of code. return S_OK(newCache)
class ProxyManagerClient: __metaclass__ = DIRACSingleton.DIRACSingleton def __init__( self ): self.__usersCache = DictCache() self.__proxiesCache = DictCache() self.__vomsProxiesCache = DictCache() self.__pilotProxiesCache = DictCache() self.__filesCache = DictCache( self.__deleteTemporalFile ) def __deleteTemporalFile( self, filename ): try: os.unlink( filename ) except: pass def clearCaches( self ): self.__usersCache.purgeAll() self.__proxiesCache.purgeAll() self.__vomsProxiesCache.purgeAll() self.__pilotProxiesCache.purgeAll() def __getSecondsLeftToExpiration( self, expiration, utc = True ): if utc: td = expiration - datetime.datetime.utcnow() else: td = expiration - datetime.datetime.now() return td.days * 86400 + td.seconds def __refreshUserCache( self, validSeconds = 0 ): rpcClient = RPCClient( "Framework/ProxyManager", timeout = 120 ) retVal = rpcClient.getRegisteredUsers( validSeconds ) if not retVal[ 'OK' ]: return retVal data = retVal[ 'Value' ] #Update the cache for record in data: cacheKey = ( record[ 'DN' ], record[ 'group' ] ) self.__usersCache.add( cacheKey, self.__getSecondsLeftToExpiration( record[ 'expirationtime' ] ), record ) return S_OK() @gUsersSync def userHasProxy( self, userDN, userGroup, validSeconds = 0 ): """ Check if a user(DN-group) has a proxy in the proxy management - Updates internal cache if needed to minimize queries to the service """ cacheKey = ( userDN, userGroup ) if self.__usersCache.exists( cacheKey, validSeconds ): return S_OK( True ) #Get list of users from the DB with proxys at least 300 seconds gLogger.verbose( "Updating list of users in proxy management" ) retVal = self.__refreshUserCache( validSeconds ) if not retVal[ 'OK' ]: return retVal return S_OK( self.__usersCache.exists( cacheKey, validSeconds ) ) @gUsersSync def getUserPersistence( self, userDN, userGroup, validSeconds = 0 ): """ Check if a user(DN-group) has a proxy in the proxy management - Updates internal cache if needed to minimize queries to the service """ cacheKey = ( userDN, userGroup ) userData = self.__usersCache.get( cacheKey, validSeconds ) if userData: if userData[ 'persistent' ]: return S_OK( True ) #Get list of users from the DB with proxys at least 300 seconds gLogger.verbose( "Updating list of users in proxy management" ) retVal = self.__refreshUserCache( validSeconds ) if not retVal[ 'OK' ]: return retVal userData = self.__usersCache.get( cacheKey, validSeconds ) if userData: return S_OK( userData[ 'persistent' ] ) return S_OK( False ) def setPersistency( self, userDN, userGroup, persistent ): """ Set the persistency for user/group """ #Hack to ensure bool in the rpc call persistentFlag = True if not persistent: persistentFlag = False rpcClient = RPCClient( "Framework/ProxyManager", timeout = 120 ) retVal = rpcClient.setPersistency( userDN, userGroup, persistentFlag ) if not retVal[ 'OK' ]: return retVal #Update internal persistency cache cacheKey = ( userDN, userGroup ) record = self.__usersCache.get( cacheKey, 0 ) if record: record[ 'persistent' ] = persistentFlag self.__usersCache.add( cacheKey, self.__getSecondsLeftToExpiration( record[ 'expirationtime' ] ), record ) return retVal def uploadProxy( self, proxy = False, diracGroup = False, chainToConnect = False, restrictLifeTime = 0, rfcIfPossible = False ): """ Upload a proxy to the proxy management service using delegation """ #Discover proxy location if type( proxy ) == g_X509ChainType: chain = proxy proxyLocation = "" else: if not proxy: proxyLocation = Locations.getProxyLocation() if not proxyLocation: return S_ERROR( "Can't find a valid proxy" ) elif isinstance( proxy, basestring ): proxyLocation = proxy else: return S_ERROR( "Can't find a valid proxy" ) chain = X509Chain() result = chain.loadProxyFromFile( proxyLocation ) if not result[ 'OK' ]: return S_ERROR( "Can't load %s: %s " % ( proxyLocation, result[ 'Message' ] ) ) if not chainToConnect: chainToConnect = chain #Make sure it's valid if chain.hasExpired()[ 'Value' ]: return S_ERROR( "Proxy %s has expired" % proxyLocation ) #rpcClient = RPCClient( "Framework/ProxyManager", proxyChain = chainToConnect ) rpcClient = RPCClient( "Framework/ProxyManager", timeout = 120 ) #Get a delegation request result = rpcClient.requestDelegationUpload( chain.getRemainingSecs()['Value'], diracGroup ) if not result[ 'OK' ]: return result #Check if the delegation has been granted if 'Value' not in result or not result[ 'Value' ]: if 'proxies' in result: return S_OK( result[ 'proxies' ] ) else: return S_OK() reqDict = result[ 'Value' ] #Generate delegated chain chainLifeTime = chain.getRemainingSecs()[ 'Value' ] - 60 if restrictLifeTime and restrictLifeTime < chainLifeTime: chainLifeTime = restrictLifeTime retVal = chain.generateChainFromRequestString( reqDict[ 'request' ], lifetime = chainLifeTime, diracGroup = diracGroup, rfc = rfcIfPossible) if not retVal[ 'OK' ]: return retVal #Upload! result = rpcClient.completeDelegationUpload( reqDict[ 'id' ], retVal[ 'Value' ] ) if not result[ 'OK' ]: return result if 'proxies' in result: return S_OK( result[ 'proxies' ] ) return S_OK() @gProxiesSync def downloadProxy( self, userDN, userGroup, limited = False, requiredTimeLeft = 1200, cacheTime = 43200, proxyToConnect = False, token = False ): """ Get a proxy Chain from the proxy management """ cacheKey = ( userDN, userGroup ) if self.__proxiesCache.exists( cacheKey, requiredTimeLeft ): return S_OK( self.__proxiesCache.get( cacheKey ) ) req = X509Request() req.generateProxyRequest( limited = limited ) if proxyToConnect: rpcClient = RPCClient( "Framework/ProxyManager", proxyChain = proxyToConnect, timeout = 120 ) else: rpcClient = RPCClient( "Framework/ProxyManager", timeout = 120 ) if token: retVal = rpcClient.getProxyWithToken( userDN, userGroup, req.dumpRequest()['Value'], long( cacheTime + requiredTimeLeft ), token ) else: retVal = rpcClient.getProxy( userDN, userGroup, req.dumpRequest()['Value'], long( cacheTime + requiredTimeLeft ) ) if not retVal[ 'OK' ]: return retVal chain = X509Chain( keyObj = req.getPKey() ) retVal = chain.loadChainFromString( retVal[ 'Value' ] ) if not retVal[ 'OK' ]: return retVal self.__proxiesCache.add( cacheKey, chain.getRemainingSecs()['Value'], chain ) return S_OK( chain ) def downloadProxyToFile( self, userDN, userGroup, limited = False, requiredTimeLeft = 1200, cacheTime = 43200, filePath = False, proxyToConnect = False, token = False ): """ Get a proxy Chain from the proxy management and write it to file """ retVal = self.downloadProxy( userDN, userGroup, limited, requiredTimeLeft, cacheTime, proxyToConnect, token ) if not retVal[ 'OK' ]: return retVal chain = retVal[ 'Value' ] retVal = self.dumpProxyToFile( chain, filePath ) if not retVal[ 'OK' ]: return retVal retVal[ 'chain' ] = chain return retVal @gVOMSProxiesSync def downloadVOMSProxy( self, userDN, userGroup, limited = False, requiredTimeLeft = 1200, cacheTime = 43200, requiredVOMSAttribute = False, proxyToConnect = False, token = False ): """ Download a proxy if needed and transform it into a VOMS one """ cacheKey = ( userDN, userGroup, requiredVOMSAttribute, limited ) if self.__vomsProxiesCache.exists( cacheKey, requiredTimeLeft ): return S_OK( self.__vomsProxiesCache.get( cacheKey ) ) req = X509Request() req.generateProxyRequest( limited = limited ) if proxyToConnect: rpcClient = RPCClient( "Framework/ProxyManager", proxyChain = proxyToConnect, timeout = 120 ) else: rpcClient = RPCClient( "Framework/ProxyManager", timeout = 120 ) if token: retVal = rpcClient.getVOMSProxyWithToken( userDN, userGroup, req.dumpRequest()['Value'], long( cacheTime + requiredTimeLeft ), token, requiredVOMSAttribute ) else: retVal = rpcClient.getVOMSProxy( userDN, userGroup, req.dumpRequest()['Value'], long( cacheTime + requiredTimeLeft ), requiredVOMSAttribute ) if not retVal[ 'OK' ]: return retVal chain = X509Chain( keyObj = req.getPKey() ) retVal = chain.loadChainFromString( retVal[ 'Value' ] ) if not retVal[ 'OK' ]: return retVal self.__vomsProxiesCache.add( cacheKey, chain.getRemainingSecs()['Value'], chain ) return S_OK( chain ) def downloadVOMSProxyToFile( self, userDN, userGroup, limited = False, requiredTimeLeft = 1200, cacheTime = 43200, requiredVOMSAttribute = False, filePath = False, proxyToConnect = False, token = False ): """ Download a proxy if needed, transform it into a VOMS one and write it to file """ retVal = self.downloadVOMSProxy( userDN, userGroup, limited, requiredTimeLeft, cacheTime, requiredVOMSAttribute, proxyToConnect, token ) if not retVal[ 'OK' ]: return retVal chain = retVal[ 'Value' ] retVal = self.dumpProxyToFile( chain, filePath ) if not retVal[ 'OK' ]: return retVal retVal[ 'chain' ] = chain return retVal def getPilotProxyFromDIRACGroup( self, userDN, userGroup, requiredTimeLeft = 43200, proxyToConnect = False ): """ Download a pilot proxy with VOMS extensions depending on the group """ #Assign VOMS attribute vomsAttr = CS.getVOMSAttributeForGroup( userGroup ) if not vomsAttr: gLogger.verbose( "No voms attribute assigned to group %s when requested pilot proxy" % userGroup ) return self.downloadProxy( userDN, userGroup, limited = False, requiredTimeLeft = requiredTimeLeft, proxyToConnect = proxyToConnect ) else: return self.downloadVOMSProxy( userDN, userGroup, limited = False, requiredTimeLeft = requiredTimeLeft, requiredVOMSAttribute = vomsAttr, proxyToConnect = proxyToConnect ) def getPilotProxyFromVOMSGroup( self, userDN, vomsAttr, requiredTimeLeft = 43200, proxyToConnect = False ): """ Download a pilot proxy with VOMS extensions depending on the group """ groups = CS.getGroupsWithVOMSAttribute( vomsAttr ) if not groups: return S_ERROR( "No group found that has %s as voms attrs" % vomsAttr ) for userGroup in groups: result = self.downloadVOMSProxy( userDN, userGroup, limited = False, requiredTimeLeft = requiredTimeLeft, requiredVOMSAttribute = vomsAttr, proxyToConnect = proxyToConnect ) if result['OK']: return result return result def getPayloadProxyFromDIRACGroup( self, userDN, userGroup, requiredTimeLeft, token = False, proxyToConnect = False ): """ Download a payload proxy with VOMS extensions depending on the group """ #Assign VOMS attribute vomsAttr = CS.getVOMSAttributeForGroup( userGroup ) if not vomsAttr: gLogger.verbose( "No voms attribute assigned to group %s when requested payload proxy" % userGroup ) return self.downloadProxy( userDN, userGroup, limited = True, requiredTimeLeft = requiredTimeLeft, proxyToConnect = proxyToConnect, token = token ) else: return self.downloadVOMSProxy( userDN, userGroup, limited = True, requiredTimeLeft = requiredTimeLeft, requiredVOMSAttribute = vomsAttr, proxyToConnect = proxyToConnect, token = token ) def getPayloadProxyFromVOMSGroup( self, userDN, vomsAttr, token, requiredTimeLeft, proxyToConnect = False ): """ Download a payload proxy with VOMS extensions depending on the VOMS attr """ groups = CS.getGroupsWithVOMSAttribute( vomsAttr ) if not groups: return S_ERROR( "No group found that has %s as voms attrs" % vomsAttr ) userGroup = groups[0] return self.downloadVOMSProxy( userDN, userGroup, limited = True, requiredTimeLeft = requiredTimeLeft, requiredVOMSAttribute = vomsAttr, proxyToConnect = proxyToConnect, token = token ) def dumpProxyToFile( self, chain, destinationFile = False, requiredTimeLeft = 600 ): """ Dump a proxy to a file. It's cached so multiple calls won't generate extra files """ result = chain.hash() if not result[ 'OK' ]: return result cHash = result[ 'Value' ] if self.__filesCache.exists( cHash, requiredTimeLeft ): filepath = self.__filesCache.get( cHash ) if filepath and os.path.isfile( filepath ): return S_OK( filepath ) self.__filesCache.delete( cHash ) retVal = chain.dumpAllToFile( destinationFile ) if not retVal[ 'OK' ]: return retVal filename = retVal[ 'Value' ] self.__filesCache.add( cHash, chain.getRemainingSecs()['Value'], filename ) return S_OK( filename ) def deleteGeneratedProxyFile( self, chain ): """ Delete a file generated by a dump """ self.__filesCache.delete( chain ) return S_OK() def requestToken( self, requesterDN, requesterGroup, numUses = 1 ): """ Request a number of tokens. usesList must be a list of integers and each integer is the number of uses a token must have """ rpcClient = RPCClient( "Framework/ProxyManager", timeout = 120 ) return rpcClient.generateToken( requesterDN, requesterGroup, numUses ) def renewProxy( self, proxyToBeRenewed = False, minLifeTime = 3600, newProxyLifeTime = 43200, proxyToConnect = False ): """ Renew a proxy using the ProxyManager Arguments: proxyToBeRenewed : proxy to renew minLifeTime : if proxy life time is less than this, renew. Skip otherwise newProxyLifeTime : life time of new proxy proxyToConnect : proxy to use for connecting to the service """ retVal = multiProxyArgument( proxyToBeRenewed ) if not retVal[ 'Value' ]: return retVal proxyToRenewDict = retVal[ 'Value' ] secs = proxyToRenewDict[ 'chain' ].getRemainingSecs()[ 'Value' ] if secs > minLifeTime: deleteMultiProxy( proxyToRenewDict ) return S_OK() if not proxyToConnect: proxyToConnectDict = { 'chain': False, 'tempFile': False } else: retVal = multiProxyArgument( proxyToConnect ) if not retVal[ 'Value' ]: deleteMultiProxy( proxyToRenewDict ) return retVal proxyToConnectDict = retVal[ 'Value' ] userDN = proxyToRenewDict[ 'chain' ].getIssuerCert()[ 'Value' ].getSubjectDN()[ 'Value' ] retVal = proxyToRenewDict[ 'chain' ].getDIRACGroup() if not retVal[ 'OK' ]: deleteMultiProxy( proxyToRenewDict ) deleteMultiProxy( proxyToConnectDict ) return retVal userGroup = retVal[ 'Value' ] limited = proxyToRenewDict[ 'chain' ].isLimitedProxy()[ 'Value' ] voms = VOMS() retVal = voms.getVOMSAttributes( proxyToRenewDict[ 'chain' ] ) if not retVal[ 'OK' ]: deleteMultiProxy( proxyToRenewDict ) deleteMultiProxy( proxyToConnectDict ) return retVal vomsAttrs = retVal[ 'Value' ] if vomsAttrs: retVal = self.downloadVOMSProxy( userDN, userGroup, limited = limited, requiredTimeLeft = newProxyLifeTime, requiredVOMSAttribute = vomsAttrs[0], proxyToConnect = proxyToConnectDict[ 'chain' ] ) else: retVal = self.downloadProxy( userDN, userGroup, limited = limited, requiredTimeLeft = newProxyLifeTime, proxyToConnect = proxyToConnectDict[ 'chain' ] ) deleteMultiProxy( proxyToRenewDict ) deleteMultiProxy( proxyToConnectDict ) if not retVal[ 'OK' ]: return retVal chain = retVal['Value'] if not proxyToRenewDict[ 'tempFile' ]: return chain.dumpAllToFile( proxyToRenewDict[ 'file' ] ) return S_OK( chain ) def getDBContents( self, condDict = {} ): """ Get the contents of the db """ rpcClient = RPCClient( "Framework/ProxyManager", timeout = 120 ) return rpcClient.getContents( condDict, [ [ 'UserDN', 'DESC' ] ], 0, 0 ) def getVOMSAttributes( self, chain ): """ Get the voms attributes for a chain """ return VOMS().getVOMSAttributes( chain ) def getUploadedProxyLifeTime( self, DN, group ): """ Get the remaining seconds for an uploaded proxy """ result = self.getDBContents( { 'UserDN' : [ DN ], 'UserGroup' : [ group ] } ) if not result[ 'OK' ]: return result data = result[ 'Value' ] if len( data[ 'Records' ] ) == 0: return S_OK( 0 ) pNames = list( data[ 'ParameterNames' ] ) dnPos = pNames.index( 'UserDN' ) groupPos = pNames.index( 'UserGroup' ) expiryPos = pNames.index( 'ExpirationTime' ) for row in data[ 'Records' ]: if DN == row[ dnPos ] and group == row[ groupPos ]: td = row[ expiryPos ] - datetime.datetime.utcnow() secondsLeft = td.days * 86400 + td.seconds return S_OK( max( 0, secondsLeft ) ) return S_OK( 0 ) def getUserProxiesInfo( self ): """ Get the user proxies uploaded info """ result = RPCClient( "Framework/ProxyManager", timeout = 120 ).getUserProxiesInfo() if 'rpcStub' in result: result.pop( 'rpcStub' ) return result
class GatewayService( Service ): GATEWAY_NAME = "Framework/Gateway" def __init__( self ): Service.__init__( self, GatewayService.GATEWAY_NAME ) self.__delegatedCredentials = DictCache() self.__transferBytesLimit = 1024 * 1024 * 100 def initialize( self ): #Build the URLs self._url = self._cfg.getURL() if not self._url: return S_ERROR( "Could not build service URL for %s" % GatewayService.GATEWAY_NAME ) gLogger.verbose( "Service URL is %s" % self._url ) #Discover Handler self._initMonitoring() self._threadPool = ThreadPool( 1, max( 0, self._cfg.getMaxThreads() ), self._cfg.getMaxWaitingPetitions() ) self._threadPool.daemonize() self._msgBroker = MessageBroker( "%sMSB" % GatewayService.GATEWAY_NAME, threadPool = self._threadPool ) self._msgBroker.useMessageObjects( False ) getGlobalMessageBroker().useMessageObjects( False ) self._msgForwarder = MessageForwarder( self._msgBroker ) return S_OK() #Threaded process function def _processInThread( self, clientTransport ): #Handshake try: clientTransport.handshake() except: return #Add to the transport pool trid = self._transportPool.add( clientTransport ) if not trid: return #Receive and check proposal result = self._receiveAndCheckProposal( trid ) if not result[ 'OK' ]: self._transportPool.sendAndClose( trid, result ) return proposalTuple = result[ 'Value' ] #Instantiate handler result = self.__getClientInitArgs( trid, proposalTuple ) if not result[ 'OK' ]: self._transportPool.sendAndClose( trid, result ) return clientInitArgs = result[ 'Value' ] #Execute the action result = self._processProposal( trid, proposalTuple, clientInitArgs ) #Close the connection if required if result[ 'closeTransport' ]: self._transportPool.close( trid ) return result def _receiveAndCheckProposal( self, trid ): clientTransport = self._transportPool.get( trid ) #Get the peer credentials credDict = clientTransport.getConnectingCredentials() #Receive the action proposal retVal = clientTransport.receiveData( 1024 ) if not retVal[ 'OK' ]: gLogger.error( "Invalid action proposal", "%s %s" % ( self._createIdentityString( credDict, clientTransport ), retVal[ 'Message' ] ) ) return S_ERROR( "Invalid action proposal" ) proposalTuple = retVal[ 'Value' ] gLogger.debug( "Received action from client", "/".join( list( proposalTuple[1] ) ) ) #Check if there are extra credentials if proposalTuple[2]: clientTransport.setExtraCredentials( proposalTuple[2] ) return S_OK( proposalTuple ) def __getClientInitArgs( self, trid, proposalTuple ): clientTransport = self._transportPool.get( trid ) #Get the peer credentials credDict = clientTransport.getConnectingCredentials() if 'x509Chain' not in credDict: return S_OK() cKey = ( credDict[ 'DN' ], credDict.get( 'group', False ), credDict.get( 'extraCredentials', False ), credDict[ 'isLimitedProxy' ] ) dP = self.__delegatedCredentials.get( cKey, 3600 ) idString = self._createIdentityString( credDict, clientTransport ) if dP: gLogger.verbose( "Proxy for %s is cached" % idString ) return S_OK( dP ) result = self.__requestDelegation( clientTransport, credDict ) if not result[ 'OK' ]: gLogger.warn( "Could not get proxy for %s: %s" % ( idString, result[ 'Message' ] ) ) return result delChain = result[ 'Value' ] delegatedChain = delChain.dumpAllToString()[ 'Value' ] secsLeft = delChain.getRemainingSecs()[ 'Value' ] - 1 clientInitArgs = { BaseClient.KW_SETUP : proposalTuple[0][1], BaseClient.KW_TIMEOUT : 600, BaseClient.KW_IGNORE_GATEWAYS : True, BaseClient.KW_USE_CERTIFICATES : False, BaseClient.KW_PROXY_STRING : delegatedChain } if BaseClient.KW_EXTRA_CREDENTIALS in credDict: clientInitArgs[ BaseClient.KW_EXTRA_CREDENTIALS ] = credDict[ BaseClient.KW_EXTRA_CREDENTIALS ] gLogger.warn( "Got delegated proxy for %s: %s secs left" % ( idString, secsLeft ) ) self.__delegatedCredentials.add( cKey, secsLeft, clientInitArgs ) return S_OK( clientInitArgs ) def __requestDelegation( self, clientTransport, credDict ): peerChain = credDict[ 'x509Chain' ] retVal = peerChain.getCertInChain()[ 'Value' ].generateProxyRequest() if not retVal[ 'OK' ]: return retVal delegationRequest = retVal[ 'Value' ] retVal = delegationRequest.dumpRequest() if not retVal[ 'OK' ]: retVal = S_ERROR( "Server Error: Can't generate delegation request" ) clientTransport.sendData( retVal ) return retVal gLogger.info( "Sending delegation request for %s" % delegationRequest.getSubjectDN()[ 'Value' ] ) clientTransport.sendData( S_OK( { 'delegate' : retVal[ 'Value' ] } ) ) delegatedCertChain = clientTransport.receiveData() delegatedChain = X509Chain( keyObj = delegationRequest.getPKey() ) retVal = delegatedChain.loadChainFromString( delegatedCertChain ) if not retVal[ 'OK' ]: retVal = S_ERROR( "Error in receiving delegated proxy: %s" % retVal[ 'Message' ] ) clientTransport.sendData( retVal ) return retVal return S_OK( delegatedChain ) #Msg def _mbConnect( self, trid, clientInitArgs ): return S_OK() def _mbReceivedMsg( self, cliTrid, msgObj ): return self._msgForwarder.msgFromClient( cliTrid, msgObj ) def _mbDisconnect( self, cliTrid ): self._msgForwarder.cliDisconnect( cliTrid ) #Execute action def _executeAction( self, trid, proposalTuple, clientInitArgs ): clientTransport = self._transportPool.get( trid ) credDict = clientTransport.getConnectingCredentials() targetService = proposalTuple[0][0] actionType = proposalTuple[1][0] actionMethod = proposalTuple[1][1] idString = self._createIdentityString( credDict, clientTransport ) #OOkay! Lets do the magic! retVal = clientTransport.receiveData() if not retVal[ 'OK' ]: gLogger.error( "Error while receiving file description", retVal[ 'Message' ] ) clientTransport.sendData( S_ERROR( "Error while receiving file description: %s" % retVal[ 'Message' ] ) ) return if actionType == "FileTransfer": gLogger.warn( "Received a file transfer action from %s" % idString ) clientTransport.sendData( S_OK( "Accepted" ) ) retVal = self.__forwardFileTransferCall( targetService, clientInitArgs, actionMethod, retVal[ 'Value' ], clientTransport ) elif actionType == "RPC": gLogger.info( "Forwarding %s/%s action to %s for %s" % ( actionType, actionMethod, targetService, idString ) ) retVal = self.__forwardRPCCall( targetService, clientInitArgs, actionMethod, retVal[ 'Value' ] ) elif actionType == "Connection" and actionMethod == "new": gLogger.info( "Initiating a messaging connection to %s for %s" % ( targetService, idString ) ) retVal = self._msgForwarder.addClient( trid, targetService, clientInitArgs, retVal[ 'Value' ] ) else: gLogger.warn( "Received an invalid %s/%s action from %s" % ( actionType, actionMethod, idString ) ) retVal = S_ERROR( "Unknown type of action (%s)" % actionType ) #TODO: Send back the data? if 'rpcStub' in retVal: retVal.pop( 'rpcStub' ) clientTransport.sendData( retVal ) return retVal def __forwardRPCCall( self, targetService, clientInitArgs, method, params ): if targetService == "Configuration/Server": if method == "getCompressedDataIfNewer": #Relay CS data directly serviceVersion = gConfigurationData.getVersion() retDict = { 'newestVersion' : serviceVersion } clientVersion = params[0] if clientVersion < serviceVersion: retDict[ 'data' ] = gConfigurationData.getCompressedData() return S_OK( retDict ) #Default rpcClient = RPCClient( targetService, **clientInitArgs ) methodObj = getattr( rpcClient, method ) return methodObj( *params ) def __forwardFileTransferCall( self, targetService, clientInitArgs, method, params, clientTransport ): transferRelay = TransferRelay( targetService, **clientInitArgs ) transferRelay.setTransferLimit( self.__transferBytesLimit ) cliFH = FileHelper( clientTransport ) #Check file size if method.find( "ToClient" ) > -1: cliFH.setDirection( "send" ) elif method.find( "FromClient" ) > -1: cliFH.setDirection( "receive" ) if not self.__ftCheckMaxTransferSize( params[2] ): cliFH.markAsTransferred() return S_ERROR( "Transfer size is too big" ) #Forward queries try: relayMethodObject = getattr( transferRelay, 'forward%s' % method ) except: return S_ERROR( "Cannot forward unknown method %s" % method ) result = relayMethodObject( cliFH, params ) return result def __ftCheckMaxTransferSize( self, requestedTransferSize ): if not self.__transferBytesLimit: return True if not requestedTransferSize: return True if requestedTransferSize <= self.__transferBytesLimit: return True return False
def __init__(self): """Standard constructor""" self.log = gLogger.getSubLogger(self.__class__.__name__) self.cacheMetadata = DictCache()
class GridPilotDirector( PilotDirector ): """ Base Grid PilotDirector class Derived classes must declare: self.Middleware: It must correspond to the string before "PilotDirector". (For proper naming of the logger) self.ResourceBrokers: list of Brokers used by the Director. (For proper error reporting) """ def __init__( self, submitPool ): """ Define some defaults and call parent __init__ """ self.gridEnv = GRIDENV self.cpuPowerRef = CPU_POWER_REF self.requirements = REQUIREMENTS self.rank = RANK self.fuzzyRank = FUZZY_RANK self.__failingWMSCache = DictCache() self.__ticketsWMSCache = DictCache() self.__listMatchWMSCache = DictCache() PilotDirector.__init__( self, submitPool ) def configure( self, csSection, submitPool ): """ Here goes common configuration for all Grid PilotDirectors """ PilotDirector.configure( self, csSection, submitPool ) self.reloadConfiguration( csSection, submitPool ) self.__failingWMSCache.purgeExpired() self.__ticketsWMSCache.purgeExpired() for rb in self.__failingWMSCache.getKeys(): if rb in self.resourceBrokers: try: self.resourceBrokers.remove( rb ) except: pass self.resourceBrokers = List.randomize( self.resourceBrokers ) if self.gridEnv: self.log.info( ' GridEnv: ', self.gridEnv ) if self.resourceBrokers: self.log.info( ' ResourceBrokers:', ', '.join( self.resourceBrokers ) ) def configureFromSection( self, mySection ): """ reload from CS """ PilotDirector.configureFromSection( self, mySection ) self.gridEnv = gConfig.getValue( mySection + '/GridEnv', self.gridEnv ) if not self.gridEnv: # No specific option found, try a general one setup = gConfig.getValue( '/DIRAC/Setup', '' ) if setup: instance = gConfig.getValue( '/DIRAC/Setups/%s/WorkloadManagement' % setup, '' ) if instance: self.gridEnv = gConfig.getValue( '/Systems/WorkloadManagement/%s/GridEnv' % instance, '' ) self.resourceBrokers = gConfig.getValue( mySection + '/ResourceBrokers' , self.resourceBrokers ) self.cpuPowerRef = gConfig.getValue( mySection + '/CPUPowerRef' , self.cpuPowerRef ) self.requirements = gConfig.getValue( mySection + '/Requirements' , self.requirements ) self.rank = gConfig.getValue( mySection + '/Rank' , self.rank ) self.fuzzyRank = gConfig.getValue( mySection + '/FuzzyRank' , self.fuzzyRank ) def _submitPilots( self, workDir, taskQueueDict, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ, proxy, pilotsPerJob ): """ This method does the actual pilot submission to the Grid RB The logic is as follows: - If there are no available RB it return error - If there is no VOMS extension in the proxy, return error - It creates a temp directory - Prepare a JDL it has some part common to gLite and LCG (the payload description) it has some part specific to each middleware """ taskQueueID = taskQueueDict['TaskQueueID'] # ownerDN = taskQueueDict['OwnerDN'] credDict = proxy.getCredentials()['Value'] ownerDN = credDict['identity'] ownerGroup = credDict[ 'group' ] if not self.resourceBrokers: # Since we can exclude RBs from the list, it may become empty return S_ERROR( ERROR_RB ) # Need to get VOMS extension for the later interactions with WMS ret = gProxyManager.getVOMSAttributes( proxy ) if not ret['OK']: self.log.error( ERROR_VOMS, ret['Message'] ) return S_ERROR( ERROR_VOMS ) if not ret['Value']: return S_ERROR( ERROR_VOMS ) vomsGroup = ret['Value'][0] workingDirectory = tempfile.mkdtemp( prefix = 'TQ_%s_' % taskQueueID, dir = workDir ) self.log.verbose( 'Using working Directory:', workingDirectory ) # Write JDL retDict = self._prepareJDL( taskQueueDict, workingDirectory, pilotOptions, pilotsPerJob, ceMask, submitPrivatePilot, privateTQ ) jdl = retDict['JDL'] pilotRequirements = retDict['Requirements'] rb = retDict['RB'] if not jdl: try: shutil.rmtree( workingDirectory ) except: pass return S_ERROR( ERROR_JDL ) # Check that there are available queues for the Job: if self.enableListMatch: availableCEs = [] now = Time.dateTime() availableCEs = self.listMatchCache.get( pilotRequirements ) if availableCEs == False: availableCEs = self._listMatch( proxy, jdl, taskQueueID, rb ) if availableCEs != False: self.log.verbose( 'LastListMatch', now ) self.log.verbose( 'AvailableCEs ', availableCEs ) self.listMatchCache.add( pilotRequirements, self.listMatchDelay * 60, value = availableCEs ) # it is given in minutes if not availableCEs: try: shutil.rmtree( workingDirectory ) except: pass return S_ERROR( ERROR_CE + ' TQ: %d' % taskQueueID ) # Now we are ready for the actual submission, so self.log.verbose( 'Submitting Pilots for TaskQueue', taskQueueID ) submitRet = self._submitPilot( proxy, pilotsPerJob, jdl, taskQueueID, rb ) try: shutil.rmtree( workingDirectory ) except: pass if not submitRet: return S_ERROR( 'Pilot Submission Failed for TQ %d ' % taskQueueID ) # pilotReference, resourceBroker = submitRet submittedPilots = 0 if pilotsPerJob != 1 and len( submitRet ) != pilotsPerJob: # Parametric jobs are used for pilotReference, resourceBroker in submitRet: pilotReference = self._getChildrenReferences( proxy, pilotReference, taskQueueID ) submittedPilots += len( pilotReference ) pilotAgentsDB.addPilotTQReference( pilotReference, taskQueueID, ownerDN, ownerGroup, resourceBroker, self.gridMiddleware, pilotRequirements ) else: for pilotReference, resourceBroker in submitRet: pilotReference = [pilotReference] submittedPilots += len( pilotReference ) pilotAgentsDB.addPilotTQReference( pilotReference, taskQueueID, ownerDN, ownerGroup, resourceBroker, self.gridMiddleware, pilotRequirements ) # add some sleep here time.sleep( 0.1 * submittedPilots ) if pilotsToSubmit > pilotsPerJob: # Additional submissions are necessary, need to get a new token and iterate. pilotsToSubmit -= pilotsPerJob result = gProxyManager.requestToken( ownerDN, ownerGroup, max( pilotsToSubmit, self.maxJobsInFillMode ) ) if not result[ 'OK' ]: self.log.error( ERROR_TOKEN, result['Message'] ) result = S_ERROR( ERROR_TOKEN ) result['Value'] = submittedPilots return result ( token, numberOfUses ) = result[ 'Value' ] for option in pilotOptions: if option.find( '-o /Security/ProxyToken=' ) == 0: pilotOptions.remove( option ) pilotOptions.append( '-o /Security/ProxyToken=%s' % token ) pilotsPerJob = max( 1, min( pilotsPerJob, int( numberOfUses / self.maxJobsInFillMode ) ) ) result = self._submitPilots( workDir, taskQueueDict, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ, proxy, pilotsPerJob ) if not result['OK']: if 'Value' not in result: result['Value'] = 0 result['Value'] += submittedPilots return result submittedPilots += result['Value'] return S_OK( submittedPilots ) def _prepareJDL( self, taskQueueDict, workingDirectory, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ ): """ This method should be overridden in a subclass """ self.log.error( '_prepareJDL() method should be implemented in a subclass' ) sys.exit() def _JobJDL( self, taskQueueDict, pilotOptions, ceMask ): """ The Job JDL is the same for LCG and GLite """ pilotJDL = 'Executable = "%s";\n' % os.path.basename( self.pilot ) executable = self.pilot pilotJDL += 'Arguments = "%s";\n' % ' '.join( pilotOptions ) pilotJDL += 'CPUTimeRef = %s;\n' % taskQueueDict['CPUTime'] pilotJDL += 'CPUPowerRef = %s;\n' % self.cpuPowerRef pilotJDL += """CPUWorkRef = real( CPUTimeRef * CPUPowerRef ); Lookup = "CPUScalingReferenceSI00=*"; cap = isList( other.GlueCECapability ) ? other.GlueCECapability : { "dummy" }; i0 = regexp( Lookup, cap[0] ) ? 0 : undefined; i1 = isString( cap[1] ) && regexp( Lookup, cap[1] ) ? 1 : i0; i2 = isString( cap[2] ) && regexp( Lookup, cap[2] ) ? 2 : i1; i3 = isString( cap[3] ) && regexp( Lookup, cap[3] ) ? 3 : i2; i4 = isString( cap[4] ) && regexp( Lookup, cap[4] ) ? 4 : i3; i5 = isString( cap[5] ) && regexp( Lookup, cap[5] ) ? 5 : i4; index = isString( cap[6] ) && regexp( Lookup, cap[6] ) ? 6 : i5; i = isUndefined( index ) ? 0 : index; QueuePowerRef = real( ! isUndefined( index ) ? int( substr( cap[i], size( Lookup ) - 1 ) ) : other.GlueHostBenchmarkSI00 ); QueueTimeRef = real( other.GlueCEPolicyMaxCPUTime * 60 ); QueueWorkRef = QueuePowerRef * QueueTimeRef; """ requirements = list( self.requirements ) if 'GridCEs' in taskQueueDict and taskQueueDict['GridCEs']: # if there an explicit Grig CE requested by the TQ, remove the Ranking requirement for req in self.requirements: if req.strip().lower()[:6] == 'rank >': requirements.remove( req ) requirements.append( 'QueueWorkRef > CPUWorkRef' ) siteRequirements = '\n || '.join( [ 'other.GlueCEInfoHostName == "%s"' % s for s in ceMask ] ) requirements.append( "( %s\n )" % siteRequirements ) pilotRequirements = '\n && '.join( requirements ) pilotJDL += 'pilotRequirements = %s;\n' % pilotRequirements pilotJDL += 'Rank = %s;\n' % self.rank pilotJDL += 'FuzzyRank = %s;\n' % self.fuzzyRank pilotJDL += 'StdOutput = "%s";\n' % outputSandboxFiles[0] pilotJDL += 'StdError = "%s";\n' % outputSandboxFiles[1] pilotJDL += 'InputSandbox = { "%s" };\n' % '", "'.join( [ self.install, executable ] ) pilotJDL += 'OutputSandbox = { %s };\n' % ', '.join( [ '"%s"' % f for f in outputSandboxFiles ] ) self.log.verbose( pilotJDL ) return ( pilotJDL, pilotRequirements ) def parseListMatchStdout( self, proxy, cmd, taskQueueID, rb ): """ Parse List Match stdout to return list of matched CE's """ self.log.verbose( 'Executing List Match for TaskQueue', taskQueueID ) start = time.time() ret = executeGridCommand( proxy, cmd, self.gridEnv ) if not ret['OK']: self.log.error( 'Failed to execute List Match:', ret['Message'] ) self.__sendErrorMail( rb, 'List Match', cmd, ret, proxy ) return False if ret['Value'][0] != 0: self.log.error( 'Error executing List Match:', str( ret['Value'][0] ) + '\n'.join( ret['Value'][1:3] ) ) self.__sendErrorMail( rb, 'List Match', cmd, ret, proxy ) return False self.log.info( 'List Match Execution Time: %.2f for TaskQueue %d' % ( ( time.time() - start ), taskQueueID ) ) stdout = ret['Value'][1] stderr = ret['Value'][2] availableCEs = [] # Parse std.out for line in List.fromChar( stdout, '\n' ): if re.search( '/jobmanager-', line ) or re.search( '/cream-', line ): # TODO: the line has to be stripped from extra info availableCEs.append( line ) if not availableCEs: self.log.info( 'List-Match failed to find CEs for TaskQueue', taskQueueID ) self.log.info( stdout ) self.log.info( stderr ) else: self.log.debug( 'List-Match returns:', str( ret['Value'][0] ) + '\n'.join( ret['Value'][1:3] ) ) self.log.info( 'List-Match found %s CEs for TaskQueue' % len( availableCEs ), taskQueueID ) self.log.verbose( ', '.join( availableCEs ) ) return availableCEs def parseJobSubmitStdout( self, proxy, cmd, taskQueueID, rb ): """ Parse Job Submit stdout to return pilot reference """ start = time.time() self.log.verbose( 'Executing Job Submit for TaskQueue', taskQueueID ) ret = executeGridCommand( proxy, cmd, self.gridEnv ) if not ret['OK']: self.log.error( 'Failed to execute Job Submit:', ret['Message'] ) self.__sendErrorMail( rb, 'Job Submit', cmd, ret, proxy ) return False if ret['Value'][0] != 0: self.log.error( 'Error executing Job Submit:', str( ret['Value'][0] ) + '\n'.join( ret['Value'][1:3] ) ) self.__sendErrorMail( rb, 'Job Submit', cmd, ret, proxy ) return False self.log.info( 'Job Submit Execution Time: %.2f for TaskQueue %d' % ( ( time.time() - start ), taskQueueID ) ) stdout = ret['Value'][1] stderr = ret['Value'][2] submittedPilot = None failed = 1 rb = '' for line in List.fromChar( stdout, '\n' ): m = re.search( "(https:\S+)", line ) if ( m ): glite_id = m.group( 1 ) submittedPilot = glite_id if not rb: m = re.search( "https://(.+):.+", glite_id ) rb = m.group( 1 ) failed = 0 if failed: self.log.error( 'Job Submit returns no Reference:', str( ret['Value'][0] ) + '\n'.join( ret['Value'][1:3] ) ) return False self.log.info( 'Reference %s for TaskQueue %s' % ( glite_id, taskQueueID ) ) return glite_id, rb def _writeJDL( self, filename, jdlList ): try: f = open( filename, 'w' ) f.write( '\n'.join( jdlList ) ) f.close() except Exception, x: self.log.exception() return '' return filename
class PilotDirector: """ Base Pilot Director class. Derived classes must implement: * __init__( self, submitPool ): that must call the parent class __init__ method and then do its own initialization * configure( self, csSection, submitPool ): that must call the parent class configure method and the do its own configuration * _submitPilots( self, workDir, taskQueueDict, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ, proxy, pilotsPerJob ) actual method doing the submission to the backend once the submitPilots method has prepared the common part Derived classes might implement: * configureFromSection( self, mySection ): to reload from a CS section the additional datamembers they might have defined. If additional datamembers are defined, they must: - be declared in the __init__ - be reconfigured in the configureFromSection method by executing self.reloadConfiguration( csSection, submitPool ) in their configure method """ gridMiddleware = '' def __init__( self, submitPool ): """ Define the logger and some defaults """ if submitPool == self.gridMiddleware: self.log = gLogger.getSubLogger( '%sPilotDirector' % self.gridMiddleware ) else: self.log = gLogger.getSubLogger( '%sPilotDirector/%s' % ( self.gridMiddleware, submitPool ) ) self.pilot = DIRAC_PILOT self.submitPoolOption = '-o /Resources/Computing/CEDefaults/SubmitPool=%s' % submitPool self.extraPilotOptions = [] self.installVersion = DIRAC_VERSION self.installProject = DIRAC_PROJECT self.installation = DIRAC_INSTALLATION self.pilotExtensionsList = [] self.virtualOrganization = VIRTUAL_ORGANIZATION self.install = DIRAC_INSTALL self.maxJobsInFillMode = MAX_JOBS_IN_FILLMODE self.targetGrids = [ self.gridMiddleware ] self.enableListMatch = ENABLE_LISTMATCH self.listMatchDelay = LISTMATCH_DELAY self.listMatchCache = DictCache() self.privatePilotFraction = PRIVATE_PILOT_FRACTION self.errorClearTime = ERROR_CLEAR_TIME self.errorTicketTime = ERROR_TICKET_TIME self.errorMailAddress = DIRAC.errorMail self.alarmMailAddress = DIRAC.alarmMail self.mailFromAddress = FROM_MAIL if not 'log' in self.__dict__: self.log = gLogger.getSubLogger( 'PilotDirector' ) self.log.info( 'Initialized' ) def configure( self, csSection, submitPool ): """ Here goes common configuration for all PilotDirectors """ self.configureFromSection( csSection ) self.reloadConfiguration( csSection, submitPool ) # Get the defaults for the Setup where the Director is running opsHelper = Operations() self.installVersion = opsHelper.getValue( cfgPath( 'Pilot', 'Version' ), [ self.installVersion ] )[0] self.installProject = opsHelper.getValue( cfgPath( 'Pilot', 'Project' ), self.installProject ) self.installation = opsHelper.getValue( cfgPath( 'Pilot', 'Installation' ), self.installation ) self.pilotExtensionsList = opsHelper.getValue( "Pilot/Extensions", self.pilotExtensionsList ) self.log.info( '===============================================' ) self.log.info( 'Configuration:' ) self.log.info( '' ) self.log.info( ' Target Grids: ', ', '.join( self.targetGrids ) ) self.log.info( ' Install script: ', self.install ) self.log.info( ' Pilot script: ', self.pilot ) self.log.info( ' Install Ver: ', self.installVersion ) if self.installProject: self.log.info( ' Project: ', self.installProject ) if self.installation: self.log.info( ' Installation: ', self.installation ) if self.extraPilotOptions: self.log.info( ' Extra Options: ', ' '.join( self.extraPilotOptions ) ) self.log.info( ' ListMatch: ', self.enableListMatch ) self.log.info( ' Private %: ', self.privatePilotFraction * 100 ) if self.enableListMatch: self.log.info( ' ListMatch Delay:', self.listMatchDelay ) self.listMatchCache.purgeExpired() def reloadConfiguration( self, csSection, submitPool ): """ Common Configuration can be overwriten for each GridMiddleware """ mySection = csSection + '/' + self.gridMiddleware self.configureFromSection( mySection ) # And Again for each SubmitPool mySection = csSection + '/' + submitPool self.configureFromSection( mySection ) def configureFromSection( self, mySection ): """ reload from CS """ self.pilot = gConfig.getValue( mySection + '/PilotScript' , self.pilot ) #TODO: Remove this DIRACVersion after 06/2012 self.installVersion = gConfig.getValue( mySection + '/DIRACVersion' , self.installVersion ) self.installVersion = gConfig.getValue( mySection + '/Version' , self.installVersion ) self.extraPilotOptions = gConfig.getValue( mySection + '/ExtraPilotOptions' , self.extraPilotOptions ) self.install = gConfig.getValue( mySection + '/InstallScript' , self.install ) self.installProject = gConfig.getValue( mySection + '/Project' , self.installProject ) self.installation = gConfig.getValue( mySection + '/Installation' , self.installation ) self.maxJobsInFillMode = gConfig.getValue( mySection + '/MaxJobsInFillMode' , self.maxJobsInFillMode ) self.targetGrids = gConfig.getValue( mySection + '/TargetGrids' , self.targetGrids ) self.enableListMatch = gConfig.getValue( mySection + '/EnableListMatch' , self.enableListMatch ) self.listMatchDelay = gConfig.getValue( mySection + '/ListMatchDelay' , self.listMatchDelay ) self.errorClearTime = gConfig.getValue( mySection + '/ErrorClearTime' , self.errorClearTime ) self.errorTicketTime = gConfig.getValue( mySection + '/ErrorTicketTime' , self.errorTicketTime ) self.errorMailAddress = gConfig.getValue( mySection + '/ErrorMailAddress' , self.errorMailAddress ) self.alarmMailAddress = gConfig.getValue( mySection + '/AlarmMailAddress' , self.alarmMailAddress ) self.mailFromAddress = gConfig.getValue( mySection + '/MailFromAddress' , self.mailFromAddress ) self.privatePilotFraction = gConfig.getValue( mySection + '/PrivatePilotFraction' , self.privatePilotFraction ) virtualOrganization = gConfig.getValue( mySection + '/VirtualOrganization' , '' ) if not virtualOrganization: virtualOrganization = getVOForGroup( 'NonExistingGroup' ) if not virtualOrganization: virtualOrganization = self.virtualOrganization self.virtualOrganization = virtualOrganization def _resolveCECandidates( self, taskQueueDict ): """ Return a list of CEs for this TaskQueue """ # assume user knows what they're doing and avoid site mask e.g. sam jobs if 'GridCEs' in taskQueueDict and taskQueueDict['GridCEs']: self.log.info( 'CEs requested by TaskQueue %s:' % taskQueueDict['TaskQueueID'], ', '.join( taskQueueDict['GridCEs'] ) ) return taskQueueDict['GridCEs'] # Get the mask ret = jobDB.getSiteMask() if not ret['OK']: self.log.error( 'Can not retrieve site Mask from DB:', ret['Message'] ) return [] siteMask = ret['Value'] if not siteMask: self.log.error( 'Site mask is empty' ) return [] self.log.verbose( 'Site Mask: %s' % ', '.join( siteMask ) ) # remove banned sites from siteMask if 'BannedSites' in taskQueueDict: for site in taskQueueDict['BannedSites']: if site in siteMask: siteMask.remove( site ) self.log.verbose( 'Removing banned site %s from site Mask' % site ) # remove from the mask if a Site is given siteMask = [ site for site in siteMask if 'Sites' not in taskQueueDict or site in taskQueueDict['Sites'] ] if not siteMask: # pilot can not be submitted self.log.info( 'No Valid Site Candidate in Mask for TaskQueue %s' % taskQueueDict['TaskQueueID'] ) return [] self.log.info( 'Site Candidates for TaskQueue %s:' % taskQueueDict['TaskQueueID'], ', '.join( siteMask ) ) # Get CE's associates to the given site Names ceMask = [] for grid in self.targetGrids: section = '/Resources/Sites/%s' % grid ret = gConfig.getSections( section ) if not ret['OK']: # this is hack, maintained until LCG is added as TargetGrid for the gLite SubmitPool section = '/Resources/Sites/LCG' ret = gConfig.getSections( section ) if not ret['OK']: self.log.error( 'Could not obtain CEs from CS', ret['Message'] ) continue gridSites = ret['Value'] for siteName in gridSites: if siteName in siteMask: ret = gConfig.getValue( '%s/%s/CE' % ( section, siteName ), [] ) for ce in ret: submissionMode = gConfig.getValue( '%s/%s/CEs/%s/SubmissionMode' % ( section, siteName, ce ), 'gLite' ) if submissionMode == self.gridMiddleware and ce not in ceMask: ceMask.append( ce ) if not ceMask: self.log.info( 'No CE Candidate found for TaskQueue %s:' % taskQueueDict['TaskQueueID'], ', '.join( siteMask ) ) self.log.verbose( 'CE Candidates for TaskQueue %s:' % taskQueueDict['TaskQueueID'], ', '.join( ceMask ) ) return ceMask def _getPilotOptions( self, taskQueueDict, pilotsToSubmit ): # Need to limit the maximum number of pilots to submit at once # For generic pilots this is limited by the number of use of the tokens and the # maximum number of jobs in Filling mode, but for private Jobs we need an extra limitation: pilotsToSubmit = max( min( pilotsToSubmit, int( 50 / self.maxJobsInFillMode ) ), 1 ) pilotOptions = [] privateIfGenericTQ = self.privatePilotFraction > random.random() privateTQ = ( 'PilotTypes' in taskQueueDict and 'private' in [ t.lower() for t in taskQueueDict['PilotTypes'] ] ) forceGeneric = 'ForceGeneric' in taskQueueDict submitPrivatePilot = ( privateIfGenericTQ or privateTQ ) and not forceGeneric if submitPrivatePilot: self.log.verbose( 'Submitting private pilots for TaskQueue %s' % taskQueueDict['TaskQueueID'] ) ownerDN = taskQueueDict['OwnerDN'] ownerGroup = taskQueueDict['OwnerGroup'] # User Group requirement pilotOptions.append( '-G %s' % taskQueueDict['OwnerGroup'] ) # check if group allows jobsharing ownerGroupProperties = getPropertiesForGroup( ownerGroup ) if not 'JobSharing' in ownerGroupProperties: # Add Owner requirement to pilot pilotOptions.append( "-O '%s'" % ownerDN ) if privateTQ: pilotOptions.append( '-o /Resources/Computing/CEDefaults/PilotType=private' ) maxJobsInFillMode = self.maxJobsInFillMode else: #For generic jobs we'll submit mixture of generic and private pilots self.log.verbose( 'Submitting generic pilots for TaskQueue %s' % taskQueueDict['TaskQueueID'] ) #ADRI: Find the generic group result = findGenericPilotCredentials( group = taskQueueDict[ 'OwnerGroup' ] ) if not result[ 'OK' ]: self.log.error( ERROR_GENERIC_CREDENTIALS, result[ 'Message' ] ) return S_ERROR( ERROR_GENERIC_CREDENTIALS ) ownerDN, ownerGroup = result[ 'Value' ] result = gProxyManager.requestToken( ownerDN, ownerGroup, max( pilotsToSubmit, self.maxJobsInFillMode ) ) if not result[ 'OK' ]: self.log.error( ERROR_TOKEN, result['Message'] ) return S_ERROR( ERROR_TOKEN ) ( token, numberOfUses ) = result[ 'Value' ] pilotsToSubmit = min( numberOfUses, pilotsToSubmit ) pilotOptions.append( '-o /Security/ProxyToken=%s' % token ) pilotsToSubmit = max( 1, ( pilotsToSubmit - 1 ) / self.maxJobsInFillMode + 1 ) maxJobsInFillMode = int( numberOfUses / pilotsToSubmit ) # Use Filling mode pilotOptions.append( '-M %s' % maxJobsInFillMode ) # Debug pilotOptions.append( '-d' ) # Setup. pilotOptions.append( '-S %s' % taskQueueDict['Setup'] ) # CS Servers csServers = gConfig.getServersList() if len( csServers ) > 3: # Remove the master master = gConfigurationData.getMasterServer() if master in csServers: csServers.remove( master ) pilotOptions.append( '-C %s' % ",".join( csServers ) ) # DIRAC Extensions to be used in pilots # ubeda: I'm not entirely sure if we can use here the same opsHelper as in line # line +352 pilotExtensionsList = Operations().getValue( "Pilot/Extensions", [] ) extensionsList = [] if pilotExtensionsList: if pilotExtensionsList[0] != 'None': extensionsList = pilotExtensionsList else: extensionsList = getCSExtensions() if extensionsList: pilotOptions.append( '-e %s' % ",".join( extensionsList ) ) #Get DIRAC version and project, There might be global Setup defaults and per VO/Setup defaults (from configure) opsHelper = Operations( group = taskQueueDict['OwnerGroup'], setup = taskQueueDict['Setup'] ) # Requested version of DIRAC (it can be a list, so we take the fist one) version = opsHelper.getValue( cfgPath( 'Pilot', 'Version' ) , [ self.installVersion ] )[0] pilotOptions.append( '-r %s' % version ) # Requested Project to install installProject = opsHelper.getValue( cfgPath( 'Pilot', 'Project' ) , self.installProject ) if installProject: pilotOptions.append( '-l %s' % installProject ) installation = opsHelper.getValue( cfgPath( 'Pilot', 'Installation' ), self.installation ) if installation: pilotOptions.append( "-V %s" % installation ) # Requested CPU time pilotOptions.append( '-T %s' % taskQueueDict['CPUTime'] ) if self.submitPoolOption not in self.extraPilotOptions: pilotOptions.append( self.submitPoolOption ) if self.extraPilotOptions: pilotOptions.extend( self.extraPilotOptions ) return S_OK( ( pilotOptions, pilotsToSubmit, ownerDN, ownerGroup, submitPrivatePilot, privateTQ ) ) def _submitPilots( self, workDir, taskQueueDict, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ, proxy, pilotsPerJob ): """ This method must be implemented on the Backend specific derived class. This is problem with the Director, not with the Job so we must return S_OK Return S_ERROR if not defined. """ self.log.error( '_submitPilots method not implemented' ) return S_OK() def submitPilots( self, taskQueueDict, pilotsToSubmit, workDir = None ): """ Submit pilot for the given TaskQueue, this method just insert the request in the corresponding ThreadPool, the submission is done from the Thread Pool job """ try: taskQueueID = taskQueueDict['TaskQueueID'] self.log.verbose( 'Submitting Pilot' ) ceMask = self._resolveCECandidates( taskQueueDict ) if not ceMask: return S_ERROR( 'No CE available for TaskQueue %d' % int( taskQueueID ) ) result = self._getPilotOptions( taskQueueDict, pilotsToSubmit ) if not result['OK']: return result ( pilotOptions, pilotsPerJob, ownerDN, ownerGroup, submitPrivatePilot, privateTQ ) = result['Value'] # get a valid proxy, submit with a long proxy to avoid renewal ret = self._getPilotProxyFromDIRACGroup( ownerDN, ownerGroup, requiredTimeLeft = 86400 * 5 ) if not ret['OK']: self.log.error( ret['Message'] ) self.log.error( 'No proxy Available', 'User "%s", Group "%s"' % ( ownerDN, ownerGroup ) ) return S_ERROR( ERROR_PROXY ) proxy = ret['Value'] # Now call a Grid Specific method to handle the final submission of the pilots return self._submitPilots( workDir, taskQueueDict, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ, proxy, pilotsPerJob ) except Exception: self.log.exception( 'Error in Pilot Submission' ) return S_OK( 0 ) def _getPilotProxyFromDIRACGroup( self, ownerDN, ownerGroup, requiredTimeLeft ): """ To be overwritten if a given Pilot does not require a full proxy """ self.log.info( "Downloading %s@%s proxy" % ( ownerDN, ownerGroup ) ) return gProxyManager.getPilotProxyFromDIRACGroup( ownerDN, ownerGroup, requiredTimeLeft ) def exceptionCallBack( self, threadedJob, exceptionInfo ): self.log.exception( 'Error in Pilot Submission' )
def __init__( self ): Service.__init__( self, GatewayService.GATEWAY_NAME ) self.__delegatedCredentials = DictCache() self.__transferBytesLimit = 1024 * 1024 * 100
class PilotDirector(object): """ Base Pilot Director class. Derived classes must implement: * __init__( self, submitPool ): that must call the parent class __init__ method and then do its own initialization * configure( self, csSection, submitPool ): that must call the parent class configure method and the do its own configuration * _submitPilot( self, workDir, taskQueueDict, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ, proxy, pilotsPerJob ) * _listMatch( self, proxy, jdl, taskQueueID, rb ) * _getChildrenReferences( self, proxy, parentReference, taskQueueID ) Derived classes might implement: * configureFromSection( self, mySection ): to reload from a CS section the additional datamembers they might have defined. If additional datamembers are defined, they must: - be declared in the __init__ - be reconfigured in the configureFromSection method by executing self.reloadConfiguration( csSection, submitPool ) in their configure method """ gridMiddleware = '' def __init__(self, submitPool): """ Define the logger and some defaults """ if submitPool == self.gridMiddleware: self.log = gLogger.getSubLogger('%sPilotDirector' % self.gridMiddleware) else: self.log = gLogger.getSubLogger('%sPilotDirector/%s' % (self.gridMiddleware, submitPool)) self.pilot = DIRAC_PILOT self.submitPoolOption = '-o /Resources/Computing/CEDefaults/SubmitPool=%s' % submitPool self.extraPilotOptions = [] self.installVersion = DIRAC_VERSION self.installProject = DIRAC_PROJECT self.installation = DIRAC_INSTALLATION self.pilotExtensionsList = [] self.virtualOrganization = VIRTUAL_ORGANIZATION self.install = DIRAC_INSTALL self.extraModules = DIRAC_MODULES self.maxJobsInFillMode = MAX_JOBS_IN_FILLMODE self.targetGrids = [self.gridMiddleware] self.enableListMatch = ENABLE_LISTMATCH self.listMatchDelay = LISTMATCH_DELAY self.listMatchCache = DictCache() self.privatePilotFraction = PRIVATE_PILOT_FRACTION self.errorClearTime = ERROR_CLEAR_TIME self.errorTicketTime = ERROR_TICKET_TIME self.errorMailAddress = DIRAC.errorMail self.alarmMailAddress = DIRAC.alarmMail self.mailFromAddress = FROM_MAIL if not 'log' in self.__dict__: self.log = gLogger.getSubLogger('PilotDirector') self.log.info('Initialized') def configure(self, csSection, submitPool): """ Here goes common configuration for all PilotDirectors """ self.configureFromSection(csSection) self.reloadConfiguration(csSection, submitPool) # Get the defaults for the Setup where the Director is running opsHelper = Operations() self.installVersion = opsHelper.getValue(cfgPath('Pilot', 'Version'), [self.installVersion])[0] self.installProject = opsHelper.getValue(cfgPath('Pilot', 'Project'), self.installProject) self.installation = opsHelper.getValue( cfgPath('Pilot', 'Installation'), self.installation) self.pilotExtensionsList = opsHelper.getValue("Pilot/Extensions", self.pilotExtensionsList) self.log.info('===============================================') self.log.info('Configuration:') self.log.info('') self.log.info(' Target Grids: ', ', '.join(self.targetGrids)) self.log.info(' Install script: ', self.install) self.log.info(' Pilot script: ', self.pilot) self.log.info(' Pilot modules', self.extraModules) self.log.info(' Install Ver: ', self.installVersion) if self.installProject: self.log.info(' Project: ', self.installProject) if self.installation: self.log.info(' Installation: ', self.installation) if self.extraPilotOptions: self.log.info(' Extra Options: ', ' '.join(self.extraPilotOptions)) self.log.info(' ListMatch: ', self.enableListMatch) self.log.info(' Private %: ', self.privatePilotFraction * 100) if self.enableListMatch: self.log.info(' ListMatch Delay:', self.listMatchDelay) self.listMatchCache.purgeExpired() def reloadConfiguration(self, csSection, submitPool): """ Common Configuration can be overwriten for each GridMiddleware """ mySection = csSection + '/' + self.gridMiddleware self.configureFromSection(mySection) # And Again for each SubmitPool mySection = csSection + '/' + submitPool self.configureFromSection(mySection) def configureFromSection(self, mySection): """ reload from CS """ self.pilot = gConfig.getValue(mySection + '/PilotScript', self.pilot) self.installVersion = gConfig.getValue(mySection + '/Version', self.installVersion) self.extraPilotOptions = gConfig.getValue( mySection + '/ExtraPilotOptions', self.extraPilotOptions) self.install = gConfig.getValue(mySection + '/InstallScript', self.install) self.extraModules = gConfig.getValue(mySection + '/ExtraPilotModules', []) + self.extraModules self.installProject = gConfig.getValue(mySection + '/Project', self.installProject) self.installation = gConfig.getValue(mySection + '/Installation', self.installation) self.maxJobsInFillMode = gConfig.getValue( mySection + '/MaxJobsInFillMode', self.maxJobsInFillMode) self.targetGrids = gConfig.getValue(mySection + '/TargetGrids', self.targetGrids) self.enableListMatch = gConfig.getValue(mySection + '/EnableListMatch', self.enableListMatch) self.listMatchDelay = gConfig.getValue(mySection + '/ListMatchDelay', self.listMatchDelay) self.errorClearTime = gConfig.getValue(mySection + '/ErrorClearTime', self.errorClearTime) self.errorTicketTime = gConfig.getValue(mySection + '/ErrorTicketTime', self.errorTicketTime) self.errorMailAddress = gConfig.getValue( mySection + '/ErrorMailAddress', self.errorMailAddress) self.alarmMailAddress = gConfig.getValue( mySection + '/AlarmMailAddress', self.alarmMailAddress) self.mailFromAddress = gConfig.getValue(mySection + '/MailFromAddress', self.mailFromAddress) self.privatePilotFraction = gConfig.getValue( mySection + '/PrivatePilotFraction', self.privatePilotFraction) virtualOrganization = gConfig.getValue( mySection + '/VirtualOrganization', '') if not virtualOrganization: virtualOrganization = getVOForGroup('NonExistingGroup') if not virtualOrganization: virtualOrganization = self.virtualOrganization self.virtualOrganization = virtualOrganization def _resolveCECandidates(self, taskQueueDict): """ Return a list of CEs for this TaskQueue """ # assume user knows what they're doing and avoid site mask e.g. sam jobs if 'GridCEs' in taskQueueDict and taskQueueDict['GridCEs']: self.log.info( 'CEs requested by TaskQueue %s:' % taskQueueDict['TaskQueueID'], ', '.join(taskQueueDict['GridCEs'])) return taskQueueDict['GridCEs'] # Get the mask ret = jobDB.getSiteMask() if not ret['OK']: self.log.error('Can not retrieve site Mask from DB:', ret['Message']) return [] siteMask = ret['Value'] if not siteMask: self.log.error('Site mask is empty') return [] self.log.verbose('Site Mask: %s' % ', '.join(siteMask)) # remove banned sites from siteMask if 'BannedSites' in taskQueueDict: for site in taskQueueDict['BannedSites']: if site in siteMask: siteMask.remove(site) self.log.verbose('Removing banned site %s from site Mask' % site) # remove from the mask if a Site is given siteMask = [ site for site in siteMask if 'Sites' not in taskQueueDict or site in taskQueueDict['Sites'] ] if not siteMask: # pilot can not be submitted self.log.info('No Valid Site Candidate in Mask for TaskQueue %s' % taskQueueDict['TaskQueueID']) return [] self.log.info( 'Site Candidates for TaskQueue %s:' % taskQueueDict['TaskQueueID'], ', '.join(siteMask)) # Get CE's associates to the given site Names ceMask = [] for grid in self.targetGrids: section = '/Resources/Sites/%s' % grid ret = gConfig.getSections(section) if not ret['OK']: # this is hack, maintained until LCG is added as TargetGrid for the gLite SubmitPool section = '/Resources/Sites/LCG' ret = gConfig.getSections(section) if not ret['OK']: self.log.error('Could not obtain CEs from CS', ret['Message']) continue gridSites = ret['Value'] for siteName in gridSites: if siteName in siteMask: ret = gConfig.getValue('%s/%s/CE' % (section, siteName), []) for ce in ret: submissionMode = gConfig.getValue( '%s/%s/CEs/%s/SubmissionMode' % (section, siteName, ce), 'gLite') if submissionMode == self.gridMiddleware and ce not in ceMask: ceMask.append(ce) if not ceMask: self.log.info( 'No CE Candidate found for TaskQueue %s:' % taskQueueDict['TaskQueueID'], ', '.join(siteMask)) self.log.verbose( 'CE Candidates for TaskQueue %s:' % taskQueueDict['TaskQueueID'], ', '.join(ceMask)) return ceMask def _getPilotOptions(self, taskQueueDict, pilotsToSubmit): # Need to limit the maximum number of pilots to submit at once # For generic pilots this is limited by the number of use of the tokens and the # maximum number of jobs in Filling mode, but for private Jobs we need an extra limitation: pilotsToSubmit = max( min(pilotsToSubmit, int(50 / self.maxJobsInFillMode)), 1) pilotOptions = [] privateIfGenericTQ = self.privatePilotFraction > random.random() privateTQ = ('PilotTypes' in taskQueueDict and 'private' in [t.lower() for t in taskQueueDict['PilotTypes']]) forceGeneric = 'ForceGeneric' in taskQueueDict submitPrivatePilot = (privateIfGenericTQ or privateTQ) and not forceGeneric if submitPrivatePilot: self.log.verbose('Submitting private pilots for TaskQueue %s' % taskQueueDict['TaskQueueID']) ownerDN = taskQueueDict['OwnerDN'] ownerGroup = taskQueueDict['OwnerGroup'] # User Group requirement pilotOptions.append('-G %s' % taskQueueDict['OwnerGroup']) # check if group allows jobsharing ownerGroupProperties = getPropertiesForGroup(ownerGroup) if not 'JobSharing' in ownerGroupProperties: # Add Owner requirement to pilot pilotOptions.append("-O '%s'" % ownerDN) if privateTQ: pilotOptions.append( '-o /Resources/Computing/CEDefaults/PilotType=private') maxJobsInFillMode = self.maxJobsInFillMode else: #For generic jobs we'll submit mixture of generic and private pilots self.log.verbose('Submitting generic pilots for TaskQueue %s' % taskQueueDict['TaskQueueID']) #ADRI: Find the generic group result = findGenericPilotCredentials( group=taskQueueDict['OwnerGroup']) if not result['OK']: self.log.error(ERROR_GENERIC_CREDENTIALS, result['Message']) return S_ERROR(ERROR_GENERIC_CREDENTIALS) ownerDN, ownerGroup = result['Value'] result = gProxyManager.requestToken( ownerDN, ownerGroup, max(pilotsToSubmit, self.maxJobsInFillMode)) if not result['OK']: self.log.error(ERROR_TOKEN, result['Message']) return S_ERROR(ERROR_TOKEN) (token, numberOfUses) = result['Value'] pilotsToSubmit = min(numberOfUses, pilotsToSubmit) pilotOptions.append('-o /Security/ProxyToken=%s' % token) pilotsToSubmit = max( 1, (pilotsToSubmit - 1) / self.maxJobsInFillMode + 1) maxJobsInFillMode = int(numberOfUses / pilotsToSubmit) # Use Filling mode pilotOptions.append('-M %s' % maxJobsInFillMode) # Debug pilotOptions.append('-d') # Setup. pilotOptions.append('-S %s' % taskQueueDict['Setup']) # CS Servers csServers = gConfig.getServersList() if len(csServers) > 3: # Remove the master master = gConfigurationData.getMasterServer() if master in csServers: csServers.remove(master) pilotOptions.append('-C %s' % ",".join(csServers)) # DIRAC Extensions to be used in pilots # ubeda: I'm not entirely sure if we can use here the same opsHelper as in line # line +352 pilotExtensionsList = Operations().getValue("Pilot/Extensions", []) extensionsList = [] if pilotExtensionsList: if pilotExtensionsList[0] != 'None': extensionsList = pilotExtensionsList else: extensionsList = getCSExtensions() if extensionsList: pilotOptions.append('-e %s' % ",".join(extensionsList)) #Get DIRAC version and project, There might be global Setup defaults and per VO/Setup defaults (from configure) opsHelper = Operations(group=taskQueueDict['OwnerGroup'], setup=taskQueueDict['Setup']) # Requested version of DIRAC (it can be a list, so we take the fist one) version = opsHelper.getValue(cfgPath('Pilot', 'Version'), [self.installVersion])[0] pilotOptions.append('-r %s' % version) # Requested Project to install installProject = opsHelper.getValue(cfgPath('Pilot', 'Project'), self.installProject) if installProject: pilotOptions.append('-l %s' % installProject) installation = opsHelper.getValue(cfgPath('Pilot', 'Installation'), self.installation) if installation: pilotOptions.append("-V %s" % installation) # Requested CPU time pilotOptions.append('-T %s' % taskQueueDict['CPUTime']) if self.submitPoolOption not in self.extraPilotOptions: pilotOptions.append(self.submitPoolOption) if self.extraPilotOptions: pilotOptions.extend(self.extraPilotOptions) return S_OK((pilotOptions, pilotsToSubmit, ownerDN, ownerGroup, submitPrivatePilot, privateTQ)) def _submitPilots(self, workDir, taskQueueDict, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ, proxy, pilotsPerJob): """ This method must be implemented on the Backend specific derived class. This is problem with the Director, not with the Job so we must return S_OK Return S_ERROR if not defined. """ self.log.error('_submitPilots method not implemented') return S_OK() def _submitPilot(self, proxy, pilotsToSubmit, jdl, taskQueueID, rb): """ Submit pilot and get back the reference """ self.log.error('_submitPilot method not implemented') return S_OK() def _listMatch(self, proxy, jdl, taskQueueID, rb): """ This method must be implemented on the Backend specific derived class. """ self.log.error('_listMatch method not implemented') return S_OK() def _getChildrenReferences(self, proxy, parentReference, taskQueueID): """ This method must be implemented on the Backend specific derived class. """ self.log.error('_getChildrenReferences method not implemented') return S_OK() def submitPilots(self, taskQueueDict, pilotsToSubmit, workDir=None): """ Submit pilot for the given TaskQueue, this method just insert the request in the corresponding ThreadPool, the submission is done from the Thread Pool job """ try: taskQueueID = taskQueueDict['TaskQueueID'] self.log.verbose('Submitting Pilot') ceMask = self._resolveCECandidates(taskQueueDict) if not ceMask: return S_ERROR('No CE available for TaskQueue %d' % int(taskQueueID)) result = self._getPilotOptions(taskQueueDict, pilotsToSubmit) if not result['OK']: return result (pilotOptions, pilotsPerJob, ownerDN, ownerGroup, submitPrivatePilot, privateTQ) = result['Value'] # get a valid proxy, submit with a long proxy to avoid renewal ret = self._getPilotProxyFromDIRACGroup(ownerDN, ownerGroup, requiredTimeLeft=86400 * 5) if not ret['OK']: self.log.error(ret['Message']) self.log.error('No proxy Available', 'User "%s", Group "%s"' % (ownerDN, ownerGroup)) return S_ERROR(ERROR_PROXY) proxy = ret['Value'] # Now call a Grid Specific method to handle the final submission of the pilots return self._submitPilots(workDir, taskQueueDict, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ, proxy, pilotsPerJob) except Exception: self.log.exception('Error in Pilot Submission') return S_OK(0) def _getPilotProxyFromDIRACGroup(self, ownerDN, ownerGroup, requiredTimeLeft): """ To be overwritten if a given Pilot does not require a full proxy """ self.log.info("Downloading %s@%s proxy" % (ownerDN, ownerGroup)) return gProxyManager.getPilotProxyFromDIRACGroup( ownerDN, ownerGroup, requiredTimeLeft) def exceptionCallBack(self, threadedJob, exceptionInfo): self.log.exception('Error in Pilot Submission')
class ProxyManagerClient: __metaclass__ = DIRACSingleton.DIRACSingleton def __init__(self): self.__usersCache = DictCache() self.__proxiesCache = DictCache() self.__vomsProxiesCache = DictCache() self.__pilotProxiesCache = DictCache() self.__filesCache = DictCache(self.__deleteTemporalFile) def __deleteTemporalFile(self, filename): try: os.unlink(filename) except: pass def clearCaches(self): self.__usersCache.purgeAll() self.__proxiesCache.purgeAll() self.__vomsProxiesCache.purgeAll() self.__pilotProxiesCache.purgeAll() def __getSecondsLeftToExpiration(self, expiration, utc=True): if utc: td = expiration - datetime.datetime.utcnow() else: td = expiration - datetime.datetime.now() return td.days * 86400 + td.seconds def __refreshUserCache(self, validSeconds=0): rpcClient = RPCClient("Framework/ProxyManager", timeout=120) retVal = rpcClient.getRegisteredUsers(validSeconds) if not retVal['OK']: return retVal data = retVal['Value'] #Update the cache for record in data: cacheKey = (record['DN'], record['group']) self.__usersCache.add( cacheKey, self.__getSecondsLeftToExpiration(record['expirationtime']), record) return S_OK() @gUsersSync def userHasProxy(self, userDN, userGroup, validSeconds=0): """ Check if a user(DN-group) has a proxy in the proxy management - Updates internal cache if needed to minimize queries to the service """ cacheKey = (userDN, userGroup) if self.__usersCache.exists(cacheKey, validSeconds): return S_OK(True) #Get list of users from the DB with proxys at least 300 seconds gLogger.verbose("Updating list of users in proxy management") retVal = self.__refreshUserCache(validSeconds) if not retVal['OK']: return retVal return S_OK(self.__usersCache.exists(cacheKey, validSeconds)) @gUsersSync def getUserPersistence(self, userDN, userGroup, validSeconds=0): """ Check if a user(DN-group) has a proxy in the proxy management - Updates internal cache if needed to minimize queries to the service """ cacheKey = (userDN, userGroup) userData = self.__usersCache.get(cacheKey, validSeconds) if userData: if userData['persistent']: return S_OK(True) #Get list of users from the DB with proxys at least 300 seconds gLogger.verbose("Updating list of users in proxy management") retVal = self.__refreshUserCache(validSeconds) if not retVal['OK']: return retVal userData = self.__usersCache.get(cacheKey, validSeconds) if userData: return S_OK(userData['persistent']) return S_OK(False) def setPersistency(self, userDN, userGroup, persistent): """ Set the persistency for user/group """ #Hack to ensure bool in the rpc call persistentFlag = True if not persistent: persistentFlag = False rpcClient = RPCClient("Framework/ProxyManager", timeout=120) retVal = rpcClient.setPersistency(userDN, userGroup, persistentFlag) if not retVal['OK']: return retVal #Update internal persistency cache cacheKey = (userDN, userGroup) record = self.__usersCache.get(cacheKey, 0) if record: record['persistent'] = persistentFlag self.__usersCache.add( cacheKey, self.__getSecondsLeftToExpiration(record['expirationtime']), record) return retVal def uploadProxy(self, proxy=False, diracGroup=False, chainToConnect=False, restrictLifeTime=0): """ Upload a proxy to the proxy management service using delgation """ #Discover proxy location if type(proxy) == g_X509ChainType: chain = proxy proxyLocation = "" else: if not proxy: proxyLocation = Locations.getProxyLocation() if not proxyLocation: return S_ERROR("Can't find a valid proxy") elif type(proxy) in (types.StringType, types.UnicodeType): proxyLocation = proxy else: return S_ERROR("Can't find a valid proxy") chain = X509Chain() result = chain.loadProxyFromFile(proxyLocation) if not result['OK']: return S_ERROR("Can't load %s: %s " % (proxyLocation, result['Message'])) if not chainToConnect: chainToConnect = chain #Make sure it's valid if chain.hasExpired()['Value']: return S_ERROR("Proxy %s has expired" % proxyLocation) #rpcClient = RPCClient( "Framework/ProxyManager", proxyChain = chainToConnect ) rpcClient = RPCClient("Framework/ProxyManager", timeout=120) #Get a delegation request result = rpcClient.requestDelegationUpload( chain.getRemainingSecs()['Value'], diracGroup) if not result['OK']: return result #Check if the delegation has been granted if 'Value' not in result or not result['Value']: if 'proxies' in result: return S_OK(result['proxies']) else: return S_OK() reqDict = result['Value'] #Generate delegated chain chainLifeTime = chain.getRemainingSecs()['Value'] - 60 if restrictLifeTime and restrictLifeTime < chainLifeTime: chainLifeTime = restrictLifeTime retVal = chain.generateChainFromRequestString(reqDict['request'], lifetime=chainLifeTime, diracGroup=diracGroup) if not retVal['OK']: return retVal #Upload! result = rpcClient.completeDelegationUpload(reqDict['id'], retVal['Value']) if not result['OK']: return result if 'proxies' in result: return S_OK(result['proxies']) return S_OK() @gProxiesSync def downloadProxy(self, userDN, userGroup, limited=False, requiredTimeLeft=1200, cacheTime=43200, proxyToConnect=False, token=False): """ Get a proxy Chain from the proxy management """ cacheKey = (userDN, userGroup) if self.__proxiesCache.exists(cacheKey, requiredTimeLeft): return S_OK(self.__proxiesCache.get(cacheKey)) req = X509Request() req.generateProxyRequest(limited=limited) if proxyToConnect: rpcClient = RPCClient("Framework/ProxyManager", proxyChain=proxyToConnect, timeout=120) else: rpcClient = RPCClient("Framework/ProxyManager", timeout=120) if token: retVal = rpcClient.getProxyWithToken( userDN, userGroup, req.dumpRequest()['Value'], long(cacheTime + requiredTimeLeft), token) else: retVal = rpcClient.getProxy(userDN, userGroup, req.dumpRequest()['Value'], long(cacheTime + requiredTimeLeft)) if not retVal['OK']: return retVal chain = X509Chain(keyObj=req.getPKey()) retVal = chain.loadChainFromString(retVal['Value']) if not retVal['OK']: return retVal self.__proxiesCache.add(cacheKey, chain.getRemainingSecs()['Value'], chain) return S_OK(chain) def downloadProxyToFile(self, userDN, userGroup, limited=False, requiredTimeLeft=1200, cacheTime=43200, filePath=False, proxyToConnect=False, token=False): """ Get a proxy Chain from the proxy management and write it to file """ retVal = self.downloadProxy(userDN, userGroup, limited, requiredTimeLeft, cacheTime, proxyToConnect, token) if not retVal['OK']: return retVal chain = retVal['Value'] retVal = self.dumpProxyToFile(chain, filePath) if not retVal['OK']: return retVal retVal['chain'] = chain return retVal @gVOMSProxiesSync def downloadVOMSProxy(self, userDN, userGroup, limited=False, requiredTimeLeft=1200, cacheTime=43200, requiredVOMSAttribute=False, proxyToConnect=False, token=False): """ Download a proxy if needed and transform it into a VOMS one """ cacheKey = (userDN, userGroup, requiredVOMSAttribute, limited) if self.__vomsProxiesCache.exists(cacheKey, requiredTimeLeft): return S_OK(self.__vomsProxiesCache.get(cacheKey)) req = X509Request() req.generateProxyRequest(limited=limited) if proxyToConnect: rpcClient = RPCClient("Framework/ProxyManager", proxyChain=proxyToConnect, timeout=120) else: rpcClient = RPCClient("Framework/ProxyManager", timeout=120) if token: retVal = rpcClient.getVOMSProxyWithToken( userDN, userGroup, req.dumpRequest()['Value'], long(cacheTime + requiredTimeLeft), token, requiredVOMSAttribute) else: retVal = rpcClient.getVOMSProxy(userDN, userGroup, req.dumpRequest()['Value'], long(cacheTime + requiredTimeLeft), requiredVOMSAttribute) if not retVal['OK']: return retVal chain = X509Chain(keyObj=req.getPKey()) retVal = chain.loadChainFromString(retVal['Value']) if not retVal['OK']: return retVal self.__vomsProxiesCache.add(cacheKey, chain.getRemainingSecs()['Value'], chain) return S_OK(chain) def downloadVOMSProxyToFile(self, userDN, userGroup, limited=False, requiredTimeLeft=1200, cacheTime=43200, requiredVOMSAttribute=False, filePath=False, proxyToConnect=False, token=False): """ Download a proxy if needed, transform it into a VOMS one and write it to file """ retVal = self.downloadVOMSProxy(userDN, userGroup, limited, requiredTimeLeft, cacheTime, requiredVOMSAttribute, proxyToConnect, token) if not retVal['OK']: return retVal chain = retVal['Value'] retVal = self.dumpProxyToFile(chain, filePath) if not retVal['OK']: return retVal retVal['chain'] = chain return retVal def getPilotProxyFromDIRACGroup(self, userDN, userGroup, requiredTimeLeft=43200, proxyToConnect=False): """ Download a pilot proxy with VOMS extensions depending on the group """ #Assign VOMS attribute vomsAttr = CS.getVOMSAttributeForGroup(userGroup) if not vomsAttr: gLogger.verbose( "No voms attribute assigned to group %s when requested pilot proxy" % userGroup) return self.downloadProxy(userDN, userGroup, limited=False, requiredTimeLeft=requiredTimeLeft, proxyToConnect=proxyToConnect) else: return self.downloadVOMSProxy(userDN, userGroup, limited=False, requiredTimeLeft=requiredTimeLeft, requiredVOMSAttribute=vomsAttr, proxyToConnect=proxyToConnect) def getPilotProxyFromVOMSGroup(self, userDN, vomsAttr, requiredTimeLeft=43200, proxyToConnect=False): """ Download a pilot proxy with VOMS extensions depending on the group """ groups = CS.getGroupsWithVOMSAttribute(vomsAttr) if not groups: return S_ERROR("No group found that has %s as voms attrs" % vomsAttr) for userGroup in groups: result = self.downloadVOMSProxy(userDN, userGroup, limited=False, requiredTimeLeft=requiredTimeLeft, requiredVOMSAttribute=vomsAttr, proxyToConnect=proxyToConnect) if result['OK']: return result return result def getPayloadProxyFromDIRACGroup(self, userDN, userGroup, requiredTimeLeft, token=False, proxyToConnect=False): """ Download a payload proxy with VOMS extensions depending on the group """ #Assign VOMS attribute vomsAttr = CS.getVOMSAttributeForGroup(userGroup) if not vomsAttr: gLogger.verbose( "No voms attribute assigned to group %s when requested payload proxy" % userGroup) return self.downloadProxy(userDN, userGroup, limited=True, requiredTimeLeft=requiredTimeLeft, proxyToConnect=proxyToConnect, token=token) else: return self.downloadVOMSProxy(userDN, userGroup, limited=True, requiredTimeLeft=requiredTimeLeft, requiredVOMSAttribute=vomsAttr, proxyToConnect=proxyToConnect, token=token) def getPayloadProxyFromVOMSGroup(self, userDN, vomsAttr, token, requiredTimeLeft, proxyToConnect=False): """ Download a payload proxy with VOMS extensions depending on the VOMS attr """ groups = CS.getGroupsWithVOMSAttribute(vomsAttr) if not groups: return S_ERROR("No group found that has %s as voms attrs" % vomsAttr) userGroup = groups[0] return self.downloadVOMSProxy(userDN, userGroup, limited=True, requiredTimeLeft=requiredTimeLeft, requiredVOMSAttribute=vomsAttr, proxyToConnect=proxyToConnect, token=token) def dumpProxyToFile(self, chain, destinationFile=False, requiredTimeLeft=600): """ Dump a proxy to a file. It's cached so multiple calls won't generate extra files """ result = chain.hash() if not result['OK']: return result hash = result['Value'] if self.__filesCache.exists(hash, requiredTimeLeft): filepath = self.__filesCache.get(hash) if os.path.isfile(filepath): return S_OK(filepath) self.__filesCache.delete(hash) retVal = chain.dumpAllToFile(destinationFile) if not retVal['OK']: return retVal filename = retVal['Value'] self.__filesCache.add(hash, chain.getRemainingSecs()['Value'], filename) return S_OK(filename) def deleteGeneratedProxyFile(self, chain): """ Delete a file generated by a dump """ self.__filesCache.delete(chain) return S_OK() def requestToken(self, requesterDN, requesterGroup, numUses=1): """ Request a number of tokens. usesList must be a list of integers and each integer is the number of uses a token must have """ rpcClient = RPCClient("Framework/ProxyManager", timeout=120) return rpcClient.generateToken(requesterDN, requesterGroup, numUses) def renewProxy(self, proxyToBeRenewed=False, minLifeTime=3600, newProxyLifeTime=43200, proxyToConnect=False): """ Renew a proxy using the ProxyManager Arguments: proxyToBeRenewed : proxy to renew minLifeTime : if proxy life time is less than this, renew. Skip otherwise newProxyLifeTime : life time of new proxy proxyToConnect : proxy to use for connecting to the service """ retVal = File.multiProxyArgument(proxyToBeRenewed) if not retVal['Value']: return retVal proxyToRenewDict = retVal['Value'] secs = proxyToRenewDict['chain'].getRemainingSecs()['Value'] if secs > minLifeTime: File.deleteMultiProxy(proxyToRenewDict) return S_OK() if not proxyToConnect: proxyToConnectDict = {'chain': False, 'tempFile': False} else: retVal = File.multiProxyArgument(proxyToConnect) if not retVal['Value']: File.deleteMultiProxy(proxyToRenewDict) return retVal proxyToConnectDict = retVal['Value'] userDN = proxyToRenewDict['chain'].getIssuerCert( )['Value'].getSubjectDN()['Value'] retVal = proxyToRenewDict['chain'].getDIRACGroup() if not retVal['OK']: File.deleteMultiProxy(proxyToRenewDict) File.deleteMultiProxy(proxyToConnectDict) return retVal userGroup = retVal['Value'] limited = proxyToRenewDict['chain'].isLimitedProxy()['Value'] voms = VOMS() retVal = voms.getVOMSAttributes(proxyToRenewDict['chain']) if not retVal['OK']: File.deleteMultiProxy(proxyToRenewDict) File.deleteMultiProxy(proxyToConnectDict) return retVal vomsAttrs = retVal['Value'] if vomsAttrs: retVal = self.downloadVOMSProxy( userDN, userGroup, limited=limited, requiredTimeLeft=newProxyLifeTime, requiredVOMSAttribute=vomsAttrs[0], proxyToConnect=proxyToConnectDict['chain']) else: retVal = self.downloadProxy( userDN, userGroup, limited=limited, requiredTimeLeft=newProxyLifeTime, proxyToConnect=proxyToConnectDict['chain']) File.deleteMultiProxy(proxyToRenewDict) File.deleteMultiProxy(proxyToConnectDict) if not retVal['OK']: return retVal chain = retVal['Value'] if not proxyToRenewDict['tempFile']: return chain.dumpAllToFile(proxyToRenewDict['file']) return S_OK(chain) def getDBContents(self, condDict={}): """ Get the contents of the db """ rpcClient = RPCClient("Framework/ProxyManager", timeout=120) return rpcClient.getContents(condDict, [['UserDN', 'DESC']], 0, 0) def getVOMSAttributes(self, chain): """ Get the voms attributes for a chain """ return VOMS().getVOMSAttributes(chain) def getUploadedProxyLifeTime(self, DN, group): """ Get the remaining seconds for an uploaded proxy """ result = self.getDBContents({'UserDN': [DN], 'UserGroup': [group]}) if not result['OK']: return result data = result['Value'] if len(data['Records']) == 0: return S_OK(0) pNames = list(data['ParameterNames']) dnPos = pNames.index('UserDN') groupPos = pNames.index('UserGroup') expiryPos = pNames.index('ExpirationTime') for row in data['Records']: if DN == row[dnPos] and group == row[groupPos]: td = row[expiryPos] - datetime.datetime.utcnow() secondsLeft = td.days * 86400 + td.seconds return S_OK(max(0, secondsLeft)) return S_OK(0) def getUserProxiesInfo(self): """ Get the user proxies uploaded info """ result = RPCClient("Framework/ProxyManager", timeout=120).getUserProxiesInfo() if 'rpcStub' in result: result.pop('rpcStub') return result
def __init__(self): self.seCache = DictCache()
class DIRACPilotDirector(PilotDirector): """ DIRAC PilotDirector class """ def __init__(self, submitPool): """ Define some defaults and call parent __init__ """ self.gridMiddleware = 'DIRAC' PilotDirector.__init__(self, submitPool) self.computingElementList = COMPUTING_ELEMENTS self.computingElementDict = {} self.addComputingElement(self.computingElementList) self.siteName = gConfig.getValue('/LocalSite/Site', '') if not self.siteName: self.log.error( 'Can not run a Director if Site Name is not defined') sys.exit() self.__failingCECache = DictCache() self.__ticketsCECache = DictCache() def configure(self, csSection, submitPool): """ Here goes common configuration for DIRAC PilotDirector """ PilotDirector.configure(self, csSection, submitPool) self.reloadConfiguration(csSection, submitPool) self.__failingCECache.purgeExpired() self.__ticketsCECache.purgeExpired() for ce in self.__failingCECache.getKeys(): if ce in self.computingElementDict.keys(): try: del self.computingElementDict[ce] except: pass if self.computingElementDict: self.log.info(' ComputingElements:', ', '.join(self.computingElementDict.keys())) else: return # FIXME: this is to start testing _ceName, computingElementDict = self.computingElementDict.items()[0] self.computingElement = computingElementDict['CE'] self.log.debug(self.computingElement.getCEStatus()) self.log.info(' SiteName:', self.siteName) def configureFromSection(self, mySection): """ reload from CS """ PilotDirector.configureFromSection(self, mySection) self.computingElementList = gConfig.getValue( mySection + '/ComputingElements', self.computingElementList) self.addComputingElement(self.computingElementList) self.siteName = gConfig.getValue(mySection + '/SiteName', self.siteName) def addComputingElement(self, ceList): """ Check if a CE object for the current CE is available, instantiate one if necessary """ for CE in ceList: if CE not in self.computingElementDict: ceFactory = ComputingElementFactory() ceInstance = ceFactory.getCE(ceName=CE) if not ceInstance['OK']: self.log.error('Can not create CE object:', ceInstance['Message']) return self.computingElementDict[CE] = ceInstance[ 'Value'].ceConfigDict # add the 'CE' instance at the end to avoid being overwritten self.computingElementDict[CE]['CE'] = ceInstance['Value'] def _submitPilots(self, workDir, taskQueueDict, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ, proxy, pilotsPerJob): """ This method does the actual pilot submission to the DIRAC CE The logic is as follows: - If there are no available CE it return error - If there is no queue available in the CE's, it returns error - It creates a temp directory - It prepare a PilotScript """ taskQueueID = taskQueueDict['TaskQueueID'] # ownerDN = taskQueueDict['OwnerDN'] submittedPilots = 0 # if self.computingElement not in self.computingElementDict: # # Since we can exclude CEs from the list, it may become empty # return S_ERROR( ERROR_CE ) pilotRequirements = [] pilotRequirements.append(('CPUTime', taskQueueDict['CPUTime'])) # do we need to care about anything else? pilotRequirementsString = str(pilotRequirements) # Check that there are available queues for the Jobs: if self.enableListMatch: availableQueues = [] # now = Time.dateTime() cachedAvailableQueues = self.listMatchCache.get( pilotRequirementsString) if cachedAvailableQueues is None: availableQueues = self._listQueues(pilotRequirements) if availableQueues != False: self.listMatchCache.add(pilotRequirementsString, self.listMatchDelay, availableQueues) self.log.verbose( 'Available Queues for TaskQueue ', "%s: %s" % (taskQueueID, str(availableQueues))) else: availableQueues = cachedAvailableQueues if not availableQueues: return S_ERROR(ERROR_CE + ' TQ: %d' % taskQueueID) baseDir = os.getcwd() workingDirectory = tempfile.mkdtemp(prefix='TQ_%s_' % taskQueueID, dir=workDir) self.log.verbose('Using working Directory:', workingDirectory) os.chdir(workingDirectory) # set the Site Name pilotOptions.append("-n '%s'" % self.siteName) # submit pilots for every CE available for CE in self.computingElementDict.keys(): ceName = CE computingElement = self.computingElementDict[CE]['CE'] # add possible requirements from Site and CE for req, val in getResourceDict(ceName).items(): pilotOptions.append("-o '/AgentJobRequirements/%s=%s'" % (req, val)) ceConfigDict = self.computingElementDict[CE] httpProxy = '' if 'HttpProxy' in ceConfigDict: httpProxy = ceConfigDict['HttpProxy'] if 'JobExecDir' in ceConfigDict: pilotExecDir = ceConfigDict['JobExecDir'] try: pilotScript = self._writePilotScript(workingDirectory, pilotOptions, proxy, httpProxy, pilotExecDir) except: self.log.exception(ERROR_SCRIPT) try: os.chdir(baseDir) shutil.rmtree(workingDirectory) except: pass return S_ERROR(ERROR_SCRIPT) self.log.info("Pilots to submit: ", pilotsToSubmit) while submittedPilots < pilotsToSubmit: # Find out how many pilots can be submitted ret = computingElement.available() if not ret['OK']: self.log.error( 'Can not determine if pilot should be submitted: ', ret['Message']) break maxPilotsToSubmit = ret['Value'] self.log.info("Submit Pilots: ", maxPilotsToSubmit) if not maxPilotsToSubmit: break # submit the pilots and then check again for _i in range( min(maxPilotsToSubmit, pilotsToSubmit - submittedPilots)): submission = computingElement.submitJob( pilotScript, '', '') if not submission['OK']: self.log.error('Pilot submission failed: ', submission['Message']) # cleanup try: os.chdir(baseDir) shutil.rmtree(workingDirectory) except: pass return S_ERROR('Pilot submission failed after ' + str(submittedPilots) + ' pilots submitted successful') submittedPilots += 1 # let the batch system some time to digest the submitted job time.sleep(1) #next CE try: os.chdir(baseDir) shutil.rmtree(workingDirectory) except: pass return S_OK(submittedPilots) def _listQueues(self, pilotRequirements): """ For each defined CE return the list of Queues with available, running and waiting slots, matching the requirements of the pilots. Currently only CPU time is considered """ result = self.computingElement.available(pilotRequirements) if not result['OK']: self.log.error('Can not determine available queues', result['Message']) return False return result['Value'] def _writePilotScript(self, workingDirectory, pilotOptions, proxy, httpProxy, pilotExecDir): """ Prepare the script to execute the pilot For the moment it will do like Grid Pilots, a full DIRAC installation It assumes that the pilot script will have access to the submit working directory """ try: compressedAndEncodedProxy = base64.encodestring( bz2.compress(proxy.dumpAllToString()['Value'])).replace( '\n', '') compressedAndEncodedPilot = base64.encodestring( bz2.compress(open(self.pilot, "rb").read(), 9)).replace('\n', '') compressedAndEncodedInstall = base64.encodestring( bz2.compress(open(self.install, "rb").read(), 9)).replace('\n', '') except: self.log.exception( 'Exception during file compression of proxy, dirac-pilot or dirac-install' ) return S_ERROR( 'Exception during file compression of proxy, dirac-pilot or dirac-install' ) localPilot = """#!/bin/bash /usr/bin/env python << EOF # import os, stat, tempfile, sys, shutil, base64, bz2 try: pilotExecDir = '%(pilotExecDir)s' if not pilotExecDir: pilotExecDir = None pilotWorkingDirectory = tempfile.mkdtemp( suffix = 'pilot', prefix = 'DIRAC_', dir = pilotExecDir ) os.chdir( pilotWorkingDirectory ) open( 'proxy', "w" ).write(bz2.decompress( base64.decodestring( "%(compressedAndEncodedProxy)s" ) ) ) open( '%(pilotScript)s', "w" ).write(bz2.decompress( base64.decodestring( "%(compressedAndEncodedPilot)s" ) ) ) open( '%(installScript)s', "w" ).write(bz2.decompress( base64.decodestring( "%(compressedAndEncodedInstall)s" ) ) ) os.chmod("proxy", stat.S_IRUSR | stat.S_IWUSR) os.chmod("%(pilotScript)s", stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR) os.chmod("%(installScript)s", stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR) if "LD_LIBRARY_PATH" not in os.environ: os.environ["LD_LIBRARY_PATH"]="" os.environ["X509_USER_PROXY"]=os.path.join(pilotWorkingDirectory, 'proxy') if "%(httpProxy)s": os.environ["HTTP_PROXY"]="%(httpProxy)s" os.environ["X509_CERT_DIR"]=os.path.join(pilotWorkingDirectory, 'etc/grid-security/certificates') # TODO: structure the output print '===========================================================' print 'Environment of execution host' for key in os.environ.keys(): print key + '=' + os.environ[key] print '===========================================================' except Exception, x: print >> sys.stderr, x sys.exit(-1) cmd = "python %(pilotScript)s %(pilotOptions)s" print 'Executing: ', cmd sys.stdout.flush() os.system( cmd ) shutil.rmtree( pilotWorkingDirectory ) EOF """ % { 'compressedAndEncodedProxy': compressedAndEncodedProxy, 'compressedAndEncodedPilot': compressedAndEncodedPilot, 'compressedAndEncodedInstall': compressedAndEncodedInstall, 'httpProxy': httpProxy, 'pilotScript': os.path.basename(self.pilot), 'installScript': os.path.basename(self.install), 'pilotOptions': ' '.join(pilotOptions), 'pilotExecDir': pilotExecDir } fd, name = tempfile.mkstemp(suffix='_pilotwrapper.py', prefix='DIRAC_', dir=workingDirectory) pilotWrapper = os.fdopen(fd, 'w') pilotWrapper.write(localPilot) pilotWrapper.close() return name def _getPilotProxyFromDIRACGroup(self, ownerDN, ownerGroup, requiredTimeLeft): """ Download a limited pilot proxy with VOMS extensions depending on the group """ #Assign VOMS attribute vomsAttr = CS.getVOMSAttributeForGroup(ownerGroup) if not vomsAttr: self.log.info( "Downloading a proxy without VOMS extensions for %s@%s" % (ownerDN, ownerGroup)) return gProxyManager.downloadProxy( ownerDN, ownerGroup, limited=True, requiredTimeLeft=requiredTimeLeft) else: self.log.info( "Downloading a proxy with '%s' VOMS extension for %s@%s" % (vomsAttr, ownerDN, ownerGroup)) return gProxyManager.downloadVOMSProxy( ownerDN, ownerGroup, limited=True, requiredTimeLeft=requiredTimeLeft, requiredVOMSAttribute=vomsAttr)
class RSSCache( object ): ''' Cache with purgeThread integrated ''' def __init__( self, lifeTime, updateFunc = None, cacheHistoryLifeTime = None ): ''' Constructor ''' self.__lifeTime = lifeTime # lifetime of the history on hours self.__cacheHistoryLifeTime = ( 1 and cacheHistoryLifeTime ) or 24 self.__updateFunc = updateFunc # RSSCache self.__rssCache = DictCache() self.__rssCacheStatus = [] # ( updateTime, message ) self.__rssCacheLock = threading.Lock() # Create purgeThread self.__refreshStop = False self.__refreshThread = threading.Thread( target = self.__refreshCacheThreadRun ) self.__refreshThread.setDaemon( True ) def startRefreshThread( self ): ''' Run refresh thread. ''' self.__refreshThread.start() def stopRefreshThread( self ): ''' Stop refresh thread. ''' self.__refreshStop = True def isCacheAlive( self ): ''' Returns status of the cache refreshing thread ''' return S_OK( self.__refreshThread.isAlive() ) def setLifeTime( self, lifeTime ): ''' Set cache life time ''' self.__lifeTime = lifeTime def setCacheHistoryLifeTime( self, cacheHistoryLifeTime ): ''' Set cache life time ''' self.__cacheHistoryLifeTime = cacheHistoryLifeTime def getCacheKeys( self ): ''' List all the keys stored in the cache. ''' self.__rssCacheLock.acquire() keys = self.__rssCache.getKeys() self.__rssCacheLock.release() return S_OK( keys ) def acquireLock( self ): ''' Acquires RSSCache lock ''' self.__rssCacheLock.acquire() def releaseLock( self ): ''' Releases RSSCache lock ''' self.__rssCacheLock.release() def getCacheStatus( self ): ''' Return the latest cache status ''' self.__rssCacheLock.acquire() if self.__rssCacheStatus: res = dict( [ self.__rssCacheStatus[ 0 ] ] ) else: res = {} self.__rssCacheLock.release() return S_OK( res ) def getCacheHistory( self ): ''' Return the cache updates history ''' self.__rssCacheLock.acquire() res = dict( self.__rssCacheStatus ) self.__rssCacheLock.release() return S_OK( res ) def get( self, resourceKey ): ''' Gets the resource(s) status(es). Every resource can have multiple statuses, so in order to speed up things, we store them on the cache as follows:: { (<resourceName>,<resourceStatusType0>) : whatever0, (<resourceName>,<resourceStatusType1>) : whatever1, } ''' #cacheKey = '%s#%s' % ( resourceName, resourceStatusType ) self.__rssCacheLock.acquire() resourceStatus = self.__rssCache.get( resourceKey ) self.__rssCacheLock.release() if resourceStatus: return S_OK( { resourceKey : resourceStatus } ) return S_ERROR( 'Cannot get %s' % resourceKey ) def getBulk( self, resourceKeys ): ''' Gets values for resourceKeys in one ATOMIC operation. ''' result = {} self.__rssCacheLock.acquire() for resourceKey in resourceKeys: resourceRow = self.__rssCache.get( resourceKey ) if not resourceRow: return S_ERROR( 'Cannot get %s' % resourceKey ) result.update( { resourceKey : resourceRow } ) self.__rssCacheLock.release() return S_OK( result ) def resetCache( self ): ''' Reset cache. ''' self.__rssCacheLock.acquire() self.__rssCache.purgeAll() self.__rssCacheLock.release() return S_OK() def refreshCache( self ): ''' Clears the cache and gets its latest version, not Thread safe ! Acquire a lock before using it ! ( and release it afterwards ! ) ''' self.__rssCache.purgeAll() if self.__updateFunc is None: return S_ERROR( 'RSSCache has no updateFunction' ) newCache = self.__updateFunc() if not newCache[ 'OK' ]: return newCache itemsAdded = self.__updateCache( newCache[ 'Value' ] ) return itemsAdded def refreshCacheAndHistory( self ): ''' Method that refreshes the cache and updates the history. Not thread safe, you must acquire a lock before using it, and release it right after ! ''' refreshResult = self.refreshCache() now = datetime.datetime.utcnow() if self.__rssCacheStatus: # Check oldest record dateInserted, _message = self.__rssCacheStatus[ -1 ] if dateInserted < now - datetime.timedelta( hours = self.__cacheHistoryLifeTime ): self.__rssCacheStatus.pop() self.__rssCacheStatus.insert( 0, ( now, refreshResult ) ) ################################################################################ # Private methods def __updateCache( self, newCache ): ''' The new cache must be a dictionary, which should look like:: { ( <resourceName>,<resourceStatusType0>) : whatever0, ( <resourceName>,<resourceStatusType1>) : whatever1, } ''' itemsCounter = 0 for cacheKey, cacheValue in newCache.items(): self.__rssCache.add( cacheKey, self.__lifeTime, value = cacheValue ) itemsCounter += 1 return S_OK( itemsCounter ) def __refreshCacheThreadRun( self ): ''' Method that refreshes periodically the cache. ''' while not self.__refreshStop: self.__rssCacheLock.acquire() self.refreshCacheAndHistory() self.__rssCacheLock.release() time.sleep( self.__lifeTime ) self.__refreshStop = False
class DataCache: def __init__( self ): self.graphsLocation = os.path.join( gConfig.getValue( '/LocalSite/InstancePath', rootPath ), 'data', 'accountingPlots' ) self.cachedGraphs = {} self.alive = True self.purgeThread = threading.Thread( target = self.purgeExpired ) self.purgeThread.setDaemon( 1 ) self.purgeThread.start() self.__dataCache = DictCache() self.__graphCache = DictCache( deleteFunction = self._deleteGraph ) self.__dataLifeTime = 600 self.__graphLifeTime = 3600 def setGraphsLocation( self, graphsDir ): self.graphsLocation = graphsDir for graphName in os.listdir( self.graphsLocation ): if graphName.find( ".png" ) > 0: graphLocation = "%s/%s" % ( self.graphsLocation, graphName ) gLogger.verbose( "Purging %s" % graphLocation ) os.unlink( graphLocation ) def purgeExpired( self ): while self.alive: time.sleep( 600 ) self.__graphCache.purgeExpired() self.__dataCache.purgeExpired() def getReportData( self, reportRequest, reportHash, dataFunc ): """ Get report data from cache if exists, else generate it """ reportData = self.__dataCache.get( reportHash ) if reportData == False: retVal = dataFunc( reportRequest ) if not retVal[ 'OK' ]: return retVal reportData = retVal[ 'Value' ] self.__dataCache.add( reportHash, self.__dataLifeTime, reportData ) return S_OK( reportData ) def getReportPlot( self, reportRequest, reportHash, reportData, plotFunc ): """ Get report data from cache if exists, else generate it """ plotDict = self.__graphCache.get( reportHash ) if plotDict == False: basePlotFileName = "%s/%s" % ( self.graphsLocation, reportHash ) retVal = plotFunc( reportRequest, reportData, basePlotFileName ) if not retVal[ 'OK' ]: return retVal plotDict = retVal[ 'Value' ] if plotDict[ 'plot' ]: plotDict[ 'plot' ] = "%s.png" % reportHash if plotDict[ 'thumbnail' ]: plotDict[ 'thumbnail' ] = "%s.thb.png" % reportHash self.__graphCache.add( reportHash, self.__graphLifeTime, plotDict ) return S_OK( plotDict ) def getPlotData( self, plotFileName ): filename = "%s/%s" % ( self.graphsLocation, plotFileName ) try: fd = file( filename, "rb" ) data = fd.read() fd.close() except Exception, e: return S_ERROR( "Can't open file %s: %s" % ( plotFileName, str( e ) ) ) return S_OK( data )
from DIRAC.Core.Utilities.ReturnValues import returnSingleResult from DIRAC.ConfigurationSystem.Client.Helpers import Registry ## globals BASE_PATH = "" HTTP_FLAG = False HTTP_PORT = 9180 HTTP_PATH = "" def purgeCacheDirectory(path): """ del recursively :path: """ shutil.rmtree(path) gRegister = DictCache(purgeCacheDirectory) def initializeStorageElementProxyHandler(serviceInfo): """ handler initialisation """ global BASE_PATH, HTTP_FLAG, HTTP_PORT, HTTP_PATH cfgPath = serviceInfo['serviceSectionPath'] BASE_PATH = gConfig.getValue("%s/BasePath" % cfgPath, BASE_PATH) if not BASE_PATH: gLogger.error('Failed to get the base path') return S_ERROR('Failed to get the base path') BASE_PATH = os.path.abspath(BASE_PATH) gLogger.info('The base path obtained is %s. Checking its existence...' %
class Cache( object ): """ Cache basic class. WARNING: None of its methods is thread safe. Acquire / Release lock when using them ! """ def __init__( self, lifeTime, updateFunc ): """ Constructor :Parameters: **lifeTime** - `int` Lifetime of the elements in the cache ( seconds ! ) **updateFunc** - `function` This function MUST return a S_OK | S_ERROR object. In the case of the first, its value must be a dictionary. """ # We set a 20% of the lifetime randomly, so that if we have thousands of jobs # starting at the same time, all the caches will not end at the same time. randomLifeTimeBias = 0.2 * random.random() self.log = gLogger.getSubLogger( self.__class__.__name__ ) self.__lifeTime = int( lifeTime * ( 1 + randomLifeTimeBias ) ) self.__updateFunc = updateFunc # The records returned from the cache must be valid at least 10 seconds. self.__validSeconds = 10 # Cache self.__cache = DictCache() self.__cacheLock = LockRing() self.__cacheLock.getLock( self.__class__.__name__ ) #............................................................................. # internal cache object getter def cacheKeys( self ): """ Cache keys getter :returns: list with valid keys on the cache """ return self.__cache.getKeys( validSeconds = self.__validSeconds ) #............................................................................. # acquire / release Locks def acquireLock( self ): """ Acquires Cache lock """ self.__cacheLock.acquire( self.__class__.__name__ ) def releaseLock( self ): """ Releases Cache lock """ self.__cacheLock.release( self.__class__.__name__) #............................................................................. # Cache getters def get( self, cacheKeys ): """ Gets values for cacheKeys given, if all are found ( present on the cache and valid ), returns S_OK with the results. If any is not neither present not valid, returns S_ERROR. :Parameters: **cacheKeys** - `list` list of keys to be extracted from the cache :return: S_OK | S_ERROR """ result = {} for cacheKey in cacheKeys: cacheRow = self.__cache.get( cacheKey, validSeconds = self.__validSeconds ) if not cacheRow: self.log.error( str( cacheKey ) ) return S_ERROR( 'Cannot get %s' % str( cacheKey ) ) result.update( { cacheKey : cacheRow } ) return S_OK( result ) #............................................................................. # Cache refreshers def refreshCache( self ): """ Purges the cache and gets fresh data from the update function. :return: S_OK | S_ERROR. If the first, its content is the new cache. """ self.log.verbose( 'refreshing...' ) self.__cache.purgeAll() newCache = self.__updateFunc() if not newCache[ 'OK' ]: self.log.error( newCache[ 'Message' ] ) return newCache newCache = self.__updateCache( newCache[ 'Value' ] ) self.log.verbose( 'refreshed' ) return newCache #............................................................................. # Private methods def __updateCache( self, newCache ): """ Given the new cache dictionary, updates the internal cache with it. It sets a duration to the entries of <self.__lifeTime> seconds. :Parameters: **newCache** - `dict` dictionary containing a new cache :return: dictionary. It is newCache argument. """ for cacheKey, cacheValue in newCache.items(): self.__cache.add( cacheKey, self.__lifeTime, value = cacheValue ) # We are assuming nothing will fail while inserting in the cache. There is # no apparent reason to suspect from that piece of code. return S_OK( newCache )