def __init__(self): self.__aliveLock = self.AliveLock() self.__executorModules = {} self.__codeModules = {} self.__minds = {} self.__loader = ModuleLoader("Executor", PathFinder.getExecutorSection, ExecutorModule)
def __init__( self, baseAgentName ): self.__agentModules = {} self.__loader = ModuleLoader( "Agent", PathFinder.getAgentSection, AgentModule ) self.__tasks = {} self.__baseAgentName = baseAgentName self.__scheduler = ThreadScheduler.ThreadScheduler( enableReactorThread = False, minPeriod = 30 ) self.__alive = True self.__running = False
def __init__(self): self.__services = {} self.__alive = True self.__loader = ModuleLoader("Service", PathFinder.getServiceSection, RequestHandler, moduleSuffix="Handler") self.__maxFD = 0 self.__listeningConnections = {} self.__stats = ReactorStats()
def __init__(self, autoDiscovery=True): """ Initialization function, you can set autoDiscovery=False to prevent automatic discovery of handler. If disabled you can use loadHandlersByServiceName() to load your handlers or loadHandlerInHandlerManager() :param autoDiscovery: (default True) Disable the automatic discovery, can be used to choose service we want to load. """ self.__handlers = {} self.__objectLoader = ObjectLoader() self.__autoDiscovery = autoDiscovery self.loader = ModuleLoader("Service", PathFinder.getServiceSection, RequestHandler, moduleSuffix="Handler")
def __init__( self ): self.__aliveLock = self.AliveLock() self.__executorModules = {} self.__codeModules = {} self.__minds = {} self.__loader = ModuleLoader( "Executor", PathFinder.getExecutorSection, ExecutorModule )
class AgentReactor(object): """ Main interface to DIRAC Agents. It allows to : - define a Agents modules to be executed - define the number of cycles to execute - steer the execution Agents are declared via: - loadAgentModule(): for a single Agent - loadAgentModules(): for a list of Agents The number of cycles to execute for a defined Agent can be set via: - setAgentModuleCyclesToExecute() The execution of the Agents is done with: - runNumCycles(): to execute an additional number of cycles - go(): During the execution of the cycles, each of the Agents can be signaled to stop by creating a file named "stop_agent" in its Control Directory. """ def __init__(self, baseAgentName): self.__agentModules = {} self.__loader = ModuleLoader("Agent", PathFinder.getAgentSection, AgentModule) self.__tasks = {} self.__baseAgentName = baseAgentName self.__scheduler = ThreadScheduler.ThreadScheduler( enableReactorThread=False, minPeriod=30) self.__alive = True self.__running = False def loadAgentModules(self, modulesList, hideExceptions=False): """ Load all modules required in moduleList """ result = self.__loader.loadModules(modulesList, hideExceptions=hideExceptions) if not result['OK']: return result self.__agentModules = self.__loader.getModules() for agentName in self.__agentModules: agentData = self.__agentModules[agentName] agentData['running'] = False try: instanceObj = agentData['classObj'](agentName, agentData['loadName'], self.__baseAgentName) result = instanceObj.am_initialize() if not result['OK']: return S_ERROR( "Error while calling initialize method of %s: %s" % (agentName, result['Message'])) agentData['instanceObj'] = instanceObj except Exception as excp: if not hideExceptions: gLogger.exception("Can't load agent %s" % agentName, lException=excp) return S_ERROR("Can't load agent %s: \n %s" % (agentName, excp)) agentPeriod = instanceObj.am_getPollingTime() result = self.__scheduler.addPeriodicTask( agentPeriod, instanceObj.am_go, executions=instanceObj.am_getMaxCycles(), elapsedTime=agentPeriod) if not result['OK']: return result taskId = result['Value'] self.__tasks[result['Value']] = agentName agentData['taskId'] = taskId agentData['running'] = True if not self.__agentModules: return S_ERROR("No agent module loaded") return S_OK() def runNumCycles(self, agentName=None, numCycles=1): """ Run all defined agents a given number of cycles """ if agentName: self.loadAgentModules([agentName]) error = '' for aName in self.__agentModules: result = self.setAgentModuleCyclesToExecute(aName, numCycles) if not result['OK']: error = 'Failed to set cycles to execute' gLogger.error('%s:' % error, aName) break if error: return S_ERROR(error) self.go() return S_OK() def __finalize(self): """ Execute the finalize method of all Agents """ for agentName in self.__agentModules: try: self.__agentModules[agentName]['instanceObj'].finalize() except Exception as excp: gLogger.exception('Failed to execute finalize for Agent: %s' % agentName, lException=excp) def go(self): """ Main method to control the execution of all configured Agents """ if self.__running: return self.__running = True try: while self.__alive: self.__checkControlDir() timeToNext = self.__scheduler.executeNextTask() if timeToNext is None: gLogger.info("No more agent modules to execute. Exiting") break time.sleep(min(max(timeToNext, 0.5), 5)) finally: self.__running = False self.__finalize() def setAgentModuleCyclesToExecute(self, agentName, maxCycles=1): """ Set number of cycles to execute for a given agent (previously defined) """ if agentName not in self.__agentModules: return S_ERROR("%s has not been loaded" % agentName) if maxCycles: try: maxCycles += self.__agentModules[agentName][ 'instanceObj'].am_getCyclesDone() except Exception as excp: error = 'Can not determine number of cycles to execute' gLogger.exception("%s: '%s'" % (error, maxCycles), lException=excp) return S_ERROR(error) self.__agentModules[agentName]['instanceObj'].am_setOption( 'MaxCycles', maxCycles) self.__scheduler.setNumExecutionsForTask( self.__agentModules[agentName]['taskId'], maxCycles) return S_OK() def __checkControlDir(self): """ Check for the presence of stop_agent file to stop execution of the corresponding Agent """ for agentName in self.__agentModules: if not self.__agentModules[agentName]['running']: continue agent = self.__agentModules[agentName]['instanceObj'] alive = agent.am_getModuleParam('alive') if alive: if agent.am_checkStopAgentFile(): gLogger.info("Found StopAgent file for agent %s" % agentName) alive = False if not alive: gLogger.info("Stopping agent module %s" % (agentName)) self.__scheduler.removeTask( self.__agentModules[agentName]['taskId']) del self.__tasks[self.__agentModules[agentName]['taskId']] self.__agentModules[agentName]['running'] = False agent.am_removeStopAgentFile()
class ServiceReactor(object): __transportExtraKeywords = {'SSLSessionTimeout': False, 'IgnoreCRLs': False, 'PacketTimeout': 'timeout', 'SocketBacklog': False, } def __init__(self): self.__services = {} self.__alive = True self.__loader = ModuleLoader("Service", PathFinder.getServiceSection, RequestHandler, moduleSuffix="Handler") self.__maxFD = 0 self.__listeningConnections = {} self.__stats = ReactorStats() self.__processes = [] def initialize(self, servicesList): try: servicesList.remove(GatewayService.GATEWAY_NAME) self.__services[GatewayService.GATEWAY_NAME] = GatewayService() except ValueError: # No GW in the service list pass result = self.__loader.loadModules(servicesList) if not result['OK']: return result self.__serviceModules = self.__loader.getModules() for serviceName in self.__serviceModules: self.__services[serviceName] = Service(self.__serviceModules[serviceName]) # Loop again to include the GW in case there is one (included in the __init__) for serviceName in self.__services: gLogger.info("Initializing %s" % serviceName) result = self.__services[serviceName].initialize() if not result['OK']: return result return S_OK() def closeListeningConnections(self): gLogger.info("Closing listening connections...") for svcName in self.__listeningConnections: if 'transport' in self.__listeningConnections[svcName]: try: self.__listeningConnections[svcName]['transport'].close() except BaseException: pass del(self.__listeningConnections[svcName]['transport']) gLogger.info("Connections closed") def __createListeners(self): for serviceName in self.__services: svcCfg = self.__services[serviceName].getConfig() protocol = svcCfg.getProtocol() port = svcCfg.getPort() if not port: return S_ERROR("No port defined for service %s" % serviceName) if protocol not in gProtocolDict: return S_ERROR("Protocol %s is not known for service %s" % (protocol, serviceName)) self.__listeningConnections[serviceName] = {'port': port, 'protocol': protocol} transportArgs = {} for kw in ServiceReactor.__transportExtraKeywords: value = svcCfg.getOption(kw) if value: ikw = ServiceReactor.__transportExtraKeywords[kw] if ikw: kw = ikw if kw == 'timeout': value = int(value) transportArgs[kw] = value gLogger.verbose("Initializing %s transport" % protocol, svcCfg.getURL()) transport = gProtocolDict[protocol]['transport'](("", port), bServerMode=True, **transportArgs) retVal = transport.initAsServer() if not retVal['OK']: return S_ERROR("Cannot start listening connection for service %s: %s" % (serviceName, retVal['Message'])) self.__listeningConnections[serviceName]['transport'] = transport self.__listeningConnections[serviceName]['socket'] = transport.getSocket() return S_OK() def stopChildProcesses(self, _sig, frame): """ It is used to properly stop the service when more than one process are used. In principle this is doing the job of runsv, becuase runsv only send a sigterm to the parent process... :param int _sig: the signal sent to the process :param object frame: execution frame which contains the child processes """ handler = frame.f_locals.get('self') if handler and isinstance(handler, ServiceReactor): handler.stopAllProcess() for child in frame.f_locals.get('children', []): gLogger.info("Stopping child processes: %d" % child) os.kill(child, signal.SIGTERM) sys.exit(0) def serve(self): result = self.__createListeners() if not result['OK']: self.__closeListeningConnections() return result for svcName in self.__listeningConnections: gLogger.always("Listening at %s" % self.__services[svcName].getConfig().getURL()) isMultiProcessingAllowed = False for svcName in self.__listeningConnections: if self.__services[svcName].getConfig().getCloneProcesses() > 0: isMultiProcessingAllowed = True break if isMultiProcessingAllowed: signal.signal(signal.SIGTERM, self.stopChildProcesses) signal.signal(signal.SIGINT, self.stopChildProcesses) for svcName in self.__listeningConnections: clones = self.__services[svcName].getConfig().getCloneProcesses() for i in range(1, clones): p = multiprocessing.Process(target=self.__startCloneProcess, args=(svcName, i)) self.__processes.append(p) p.start() gLogger.always("Started clone process %s for %s" % (i, svcName)) while self.__alive: self.__acceptIncomingConnection() def stopAllProcess(self): """ It stops all the running processes. """ for process in self.__processes: gLogger.info("Stopping: PID=%d, name=%s, parentPid=%d" % (process.pid, process.name, process._parent_pid)) if process.is_alive(): process.terminate() self.__processes.remove(process) # This function runs in a different process def __startCloneProcess(self, svcName, i): self.__services[svcName].setCloneProcessId(i) self.__alive = i while self.__alive: self.__acceptIncomingConnection(svcName) def __getListeningSocketsList(self, svcName=False): if svcName: sockets = [self.__listeningConnections[svcName]['socket']] else: sockets = [] for svcName in self.__listeningConnections: sockets.append(self.__listeningConnections[svcName]['socket']) return sockets def __acceptIncomingConnection(self, svcName=False): """ This method just gets the incoming connection, checks IP address and generates job. SSL/TLS handshake and execution of the remote call are made by Service._processInThread() (in another thread) so the service can accept other clients while another thread handling remote call :param str svcName=False: Name of a service if you use multiple services at the same time """ sockets = self.__getListeningSocketsList(svcName) while self.__alive: try: inList, _outList, _exList = select.select(sockets, [], [], 10) if len(inList) == 0: return for inSocket in inList: for svcName in self.__listeningConnections: if inSocket == self.__listeningConnections[svcName]['socket']: retVal = self.__listeningConnections[svcName]['transport'].acceptConnection() if not retVal['OK']: gLogger.warn("Error while accepting a connection: ", retVal['Message']) return clientTransport = retVal['Value'] except socket.error: return self.__maxFD = max(self.__maxFD, clientTransport.oSocket.fileno()) # Is it banned? clientIP = clientTransport.getRemoteAddress()[0] if clientIP in Registry.getBannedIPs(): gLogger.warn("Client connected from banned ip %s" % clientIP) clientTransport.close() continue # Handle connection self.__stats.connectionStablished() self.__services[svcName].handleConnection(clientTransport) # Renew context? now = time.time() renewed = False for svcName in self.__listeningConnections: tr = self.__listeningConnections[svcName]['transport'] if now - tr.latestServerRenewTime() > self.__services[svcName].getConfig().getContextLifeTime(): result = tr.renewServerContext() if result['OK']: renewed = True if renewed: sockets = self.__getListeningSocketsList() def __closeListeningConnections(self): for svcName in self.__listeningConnections: lc = self.__listeningConnections[svcName] if 'transport' in lc and lc['transport']: lc['transport'].close()
class AgentReactor: """ Main interface to DIRAC Agents. It allows to : - define a Agents modules to be executed - define the number of cycles to execute - steer the execution Agents are declared via: - loadAgentModule(): for a single Agent - loadAgentModules(): for a list of Agents The number of cycles to execute for a defined Agent can be set via: - setAgentModuleCyclesToExecute() The execution of the Agents is done with: - runNumCycles(): to execute an additional number of cycles - go(): During the execution of the cycles, each of the Agents can be signaled to stop by creating a file named "stop_agent" in its Control Directory. """ def __init__( self, baseAgentName ): self.__agentModules = {} self.__loader = ModuleLoader( "Agent", PathFinder.getAgentSection, AgentModule ) self.__tasks = {} self.__baseAgentName = baseAgentName self.__scheduler = ThreadScheduler.ThreadScheduler( enableReactorThread = False, minPeriod = 30 ) self.__alive = True self.__running = False def loadAgentModules( self, modulesList, hideExceptions = False ): """ Load all modules required in moduleList """ result = self.__loader.loadModules( modulesList, hideExceptions = hideExceptions ) if not result[ 'OK' ]: return result self.__agentModules = self.__loader.getModules() for agentName in self.__agentModules: agentData = self.__agentModules[ agentName ] agentData[ 'running' ] = False try: instanceObj = agentData[ 'classObj' ]( agentName, agentData[ 'loadName' ], self.__baseAgentName ) result = instanceObj.am_initialize() if not result[ 'OK' ]: return S_ERROR( "Error while calling initialize method of %s: %s" % ( agentName, result[ 'Message' ] ) ) agentData[ 'instanceObj' ] = instanceObj except Exception, excp: if not hideExceptions: gLogger.exception( "Can't load agent %s" % agentName ) return S_ERROR( "Can't load agent %s: \n %s" % ( agentName, excp ) ) agentPeriod = instanceObj.am_getPollingTime() result = self.__scheduler.addPeriodicTask( agentPeriod, instanceObj.am_go, executions = instanceObj.am_getMaxCycles(), elapsedTime = agentPeriod ) if not result[ 'OK' ]: return result taskId = result[ 'Value' ] self.__tasks[ result[ 'Value' ] ] = agentName agentData[ 'taskId' ] = taskId agentData[ 'running' ] = True if not self.__agentModules: return S_ERROR( "No agent module loaded" ) return S_OK()
class ServiceReactor(object): __transportExtraKeywords = {'SSLSessionTimeout': False, 'IgnoreCRLs': False, 'PacketTimeout': 'timeout', 'SocketBacklog': False, } def __init__(self): self.__services = {} self.__alive = True self.__loader = ModuleLoader("Service", PathFinder.getServiceSection, RequestHandler, moduleSuffix="Handler") self.__maxFD = 0 self.__listeningConnections = {} self.__stats = ReactorStats() def initialize(self, servicesList): try: servicesList.remove(GatewayService.GATEWAY_NAME) self.__services[GatewayService.GATEWAY_NAME] = GatewayService() except ValueError: # No GW in the service list pass result = self.__loader.loadModules(servicesList) if not result['OK']: return result self.__serviceModules = self.__loader.getModules() for serviceName in self.__serviceModules: self.__services[serviceName] = Service(self.__serviceModules[serviceName]) # Loop again to include the GW in case there is one (included in the __init__) for serviceName in self.__services: gLogger.info("Initializing %s" % serviceName) result = self.__services[serviceName].initialize() if not result['OK']: return result return S_OK() def closeListeningConnections(self): gLogger.info("Closing listening connections...") for svcName in self.__listeningConnections: if 'transport' in self.__listeningConnections[svcName]: try: self.__listeningConnections[svcName]['transport'].close() except: pass del(self.__listeningConnections[svcName]['transport']) gLogger.info("Connections closed") def __createListeners(self): for serviceName in self.__services: svcCfg = self.__services[serviceName].getConfig() protocol = svcCfg.getProtocol() port = svcCfg.getPort() if not port: return S_ERROR("No port defined for service %s" % serviceName) if protocol not in gProtocolDict: return S_ERROR("Protocol %s is not known for service %s" % (protocol, serviceName)) self.__listeningConnections[serviceName] = {'port': port, 'protocol': protocol} transportArgs = {} for kw in ServiceReactor.__transportExtraKeywords: value = svcCfg.getOption(kw) if value: ikw = ServiceReactor.__transportExtraKeywords[kw] if ikw: kw = ikw if kw == 'timeout': value = int(value) transportArgs[kw] = value gLogger.verbose("Initializing %s transport" % protocol, svcCfg.getURL()) transport = gProtocolDict[protocol]['transport'](("", port), bServerMode=True, **transportArgs) retVal = transport.initAsServer() if not retVal['OK']: return S_ERROR("Cannot start listening connection for service %s: %s" % (serviceName, retVal['Message'])) self.__listeningConnections[serviceName]['transport'] = transport self.__listeningConnections[serviceName]['socket'] = transport.getSocket() return S_OK() def serve(self): result = self.__createListeners() if not result['OK']: self.__closeListeningConnections() return result for svcName in self.__listeningConnections: gLogger.always("Listening at %s" % self.__services[svcName].getConfig().getURL()) # Multiple clones not yet working. Disabled by default if False and multiprocessing: for svcName in self.__listeningConnections: clones = self.__services[svcName].getConfig().getCloneProcesses() for i in range(1, clones): p = multiprocessing.Process(target=self.__startCloneProcess, args=(svcName, i)) p.start() gLogger.always("Started clone process %s for %s" % (i, svcName)) while self.__alive: self.__acceptIncomingConnection() # This function runs in a different process def __startCloneProcess(self, svcName, i): self.__services[svcName].setCloneProcessId(i) self.__alive = i while self.__alive: self.__acceptIncomingConnection(svcName) def __getListeningSocketsList(self, svcName=False): if svcName: sockets = [self.__listeningConnections[svcName]['socket']] else: sockets = [] for svcName in self.__listeningConnections: sockets.append(self.__listeningConnections[svcName]['socket']) return sockets def __acceptIncomingConnection(self, svcName=False): """ This method just gets the incoming connection, checks IP address and generates job. SSL/TLS handshake and execution of the remote call are made by Service._processInThread() (in another thread) so the service can accept other clients while another thread handling remote call :param str svcName=False: Name of a service if you use multiple services at the same time """ sockets = self.__getListeningSocketsList(svcName) while self.__alive: try: inList, _outList, _exList = select.select(sockets, [], [], 10) if len(inList) == 0: return for inSocket in inList: for svcName in self.__listeningConnections: if inSocket == self.__listeningConnections[svcName]['socket']: retVal = self.__listeningConnections[svcName]['transport'].acceptConnection() if not retVal['OK']: gLogger.warn("Error while accepting a connection: ", retVal['Message']) return clientTransport = retVal['Value'] except socket.error: return self.__maxFD = max(self.__maxFD, clientTransport.oSocket.fileno()) # Is it banned? clientIP = clientTransport.getRemoteAddress()[0] if clientIP in Registry.getBannedIPs(): gLogger.warn("Client connected from banned ip %s" % clientIP) clientTransport.close() continue # Handle connection self.__stats.connectionStablished() self.__services[svcName].handleConnection(clientTransport) # Renew context? now = time.time() renewed = False for svcName in self.__listeningConnections: tr = self.__listeningConnections[svcName]['transport'] if now - tr.latestServerRenewTime() > self.__services[svcName].getConfig().getContextLifeTime(): result = tr.renewServerContext() if result['OK']: renewed = True if renewed: sockets = self.__getListeningSocketsList() def __closeListeningConnections(self): for svcName in self.__listeningConnections: lc = self.__listeningConnections[svcName] if 'transport' in lc and lc['transport']: lc['transport'].close()
class ExecutorReactor( object ): class AliveLock( object ): def __init__( self ): self.__alive = 0 self.__cond = threading.Condition( threading.Lock() ) def alive( self ): self.__cond.acquire() self.__alive += 1 self.__cond.release() def dead( self ): self.__cond.acquire() self.__alive -= 1 self.__cond.notify() self.__cond.release() def lockUntilAllDead( self ): self.__cond.acquire() while True: if self.__alive < 1: break self.__cond.wait( 1 ) self.__cond.release() class MindCluster( object ): def __init__( self, mindName, aliveLock ): self.__mindName = mindName self.__modules = {} self.__maxTasks = 1 self.__reconnectSleep = 1 self.__reconnectRetries = 10 self.__extraArgs = {} self.__instances = {} self.__instanceLock = threading.Lock() self.__aliveLock = aliveLock def updateMaxTasks( self, mt ): self.__maxTasks = max( self.__maxTasks, mt ) def addModule( self, name, exeClass ): self.__modules[ name ] = exeClass self.__maxTasks = max( self.__maxTasks, exeClass.ex_getOption( "MaxTasks" ) ) self.__reconnectSleep = max( self.__reconnectSleep, exeClass.ex_getOption( "ReconnectSleep" ) ) self.__reconnectRetries = max( self.__reconnectRetries, exeClass.ex_getOption( "ReconnectRetries" ) ) self.__extraArgs[ name ] = exeClass.ex_getExtraArguments() def connect( self ): self.__msgClient = MessageClient( self.__mindName ) self.__msgClient.subscribeToMessage( 'ProcessTask', self.__processTask ) self.__msgClient.subscribeToDisconnect( self.__disconnected ) result = self.__msgClient.connect( executorTypes = list( self.__modules.keys() ), maxTasks = self.__maxTasks, extraArgs = self.__extraArgs ) if result[ 'OK' ]: self.__aliveLock.alive() gLogger.info( "Connected to %s" % self.__mindName ) return result def __disconnected( self, msgClient ): retryCount = 0 while True: gLogger.notice( "Trying to reconnect to %s" % self.__mindName ) result = self.__msgClient.connect( executorTypes = list( self.__modules.keys() ), maxTasks = self.__maxTasks, extraArgs = self.__extraArgs ) if result[ 'OK' ]: if retryCount >= self.__reconnectRetries: self.__aliveLock.alive() gLogger.notice( "Reconnected to %s" % self.__mindName ) return S_OK() retryCount += 1 if retryCount == self.__reconnectRetries: self.__aliveLock.alive() gLogger.info( "Connect error failed: %s" % result[ 'Message' ] ) gLogger.notice( "Failed to reconnect. Sleeping for %d seconds" % self.__reconnectSleep ) time.sleep( self.__reconnectSleep ) def __storeInstance( self, modName, modObj ): self.__instanceLock.acquire() try: self.__instances[ modName ].append( modObj ) finally: self.__instanceLock.release() def __getInstance( self, moduleName ): self.__instanceLock.acquire() try: if moduleName not in self.__instances: self.__instances[ moduleName ] = [] try: return S_OK( self.__instances[ moduleName ].pop( 0 ) ) except IndexError: pass finally: self.__instanceLock.release() try: modObj = self.__modules[ moduleName ] except KeyError: return S_ERROR( "Unknown %s executor" ) modInstance = modObj() return S_OK( modInstance ) def __sendExecutorError( self, eType, taskId, errMsg ): result = self.__msgClient.createMessage( "ExecutorError" ) if not result[ 'OK' ]: return result msgObj = result[ 'Value' ] msgObj.taskId = taskId msgObj.errorMsg = errMsg msgObj.eType = eType return self.__msgClient.sendMessage( msgObj ) def __processTask( self, msgObj ): eType = msgObj.eType taskId = msgObj.taskId taskStub = msgObj.taskStub result = self.__moduleProcess( eType, taskId, taskStub ) if not result[ 'OK' ]: return self.__sendExecutorError( eType, taskId, result[ 'Message' ] ) msgName, taskStub, extra = result[ 'Value' ] result = self.__msgClient.createMessage( msgName ) if not result[ 'OK' ]: return self.__sendExecutorError( eType, taskId, "Can't generate %s message: %s" % ( msgName, result[ 'Message' ] ) ) gLogger.verbose( "Task %s: Sending %s" % ( str( taskId ), msgName ) ) msgObj = result[ 'Value' ] msgObj.taskId = taskId msgObj.taskStub = taskStub if msgName == "TaskError": msgObj.errorMsg = extra msgObj.eType = eType elif msgName == "TaskFreeze": msgObj.freezeTime = extra return self.__msgClient.sendMessage( msgObj ) def __moduleProcess( self, eType, taskId, taskStub, fastTrackLevel = 0 ): result = self.__getInstance( eType ) if not result[ 'OK' ]: return result modInstance = result[ 'Value' ] try: result = modInstance._ex_processTask( taskId, taskStub ) except Exception as excp: gLogger.exception( "Error while processing task %s" % taskId, lException = excp ) return S_ERROR( "Error processing task %s: %s" % ( taskId, excp ) ) self.__storeInstance( eType, modInstance ) if not result[ 'OK' ]: return S_OK( ( 'TaskError', taskStub, "Error: %s" % result[ 'Message' ] ) ) taskStub, freezeTime, fastTrackType = result[ 'Value' ] if freezeTime: return S_OK( ( "TaskFreeze", taskStub, freezeTime ) ) if fastTrackType: if fastTrackLevel < 10 and fastTrackType in self.__modules: gLogger.notice( "Fast tracking task %s to %s" % ( taskId, fastTrackType ) ) return self.__moduleProcess( fastTrackType, taskId, taskStub, fastTrackLevel + 1 ) else: gLogger.notice( "Stopping %s fast track. Sending back to the mind" % ( taskId ) ) return S_OK( ( "TaskDone", taskStub, True ) ) ##### # Start of ExecutorReactor ##### def __init__( self ): self.__aliveLock = self.AliveLock() self.__executorModules = {} self.__codeModules = {} self.__minds = {} self.__loader = ModuleLoader( "Executor", PathFinder.getExecutorSection, ExecutorModule ) def loadModules( self, modulesList, hideExceptions = False ): """ Load all modules required in moduleList """ result = self.__loader.loadModules( modulesList, hideExceptions = hideExceptions ) if not result[ 'OK' ]: return result self.__executorModules = self.__loader.getModules() return S_OK() #Go! def go( self ): for name in self.__executorModules: exeClass = self.__executorModules[ name ][ 'classObj' ] result = exeClass._ex_initialize( name, self.__executorModules[ name ][ 'loadName' ] ) if not result[ 'OK' ]: return result mind = exeClass.ex_getMind() if mind not in self.__minds: self.__minds[ mind ] = self.MindCluster( mind, self.__aliveLock ) mc = self.__minds[ mind ] mc.addModule( name, exeClass ) for mindName in self.__minds: gLogger.info( "Trying to connect to %s" % mindName ) result = self.__minds[ mindName ].connect() if not result[ 'OK' ]: return result self.__aliveLock.lockUntilAllDead() return S_OK()
class HandlerManager(object): """ This utility class allows to load the handlers, generate the appropriate route, and discover the handlers based on the CS. In order for a service to be considered as using HTTPS, it must have ``protocol = https`` as an option. Each of the Handler will have one associated route to it: * Directly specified as ``LOCATION`` in the handler module * automatically deduced from the module name, of the form ``System/Component`` (e.g. ``DataManagement/FileCatalog``) """ def __init__(self, autoDiscovery=True): """ Initialization function, you can set autoDiscovery=False to prevent automatic discovery of handler. If disabled you can use loadHandlersByServiceName() to load your handlers or loadHandlerInHandlerManager() :param autoDiscovery: (default True) Disable the automatic discovery, can be used to choose service we want to load. """ self.__handlers = {} self.__objectLoader = ObjectLoader() self.__autoDiscovery = autoDiscovery self.loader = ModuleLoader("Service", PathFinder.getServiceSection, RequestHandler, moduleSuffix="Handler") def __addHandler(self, handlerTuple, url=None): """ Function which add handler to list of known handlers :param handlerTuple: (path, class) """ # Check if handler not already loaded if not url or url not in self.__handlers: gLogger.debug("Find new handler %s" % (handlerTuple[0])) # If url is not given, try to discover it if url is None: # FIRST TRY: Url is hardcoded try: url = handlerTuple[1].LOCATION # SECOND TRY: URL can be deduced from path except AttributeError: gLogger.debug("No location defined for %s try to get it from path" % handlerTuple[0]) url = urlFinder(handlerTuple[0]) # We add "/" if missing at begin, e.g. we found "Framework/Service" # URL can't be relative in Tornado if url and not url.startswith("/"): url = "/%s" % url elif not url: gLogger.warn("URL not found for %s" % (handlerTuple[0])) return S_ERROR("URL not found for %s" % (handlerTuple[0])) # Finally add the URL to handlers if url not in self.__handlers: self.__handlers[url] = handlerTuple[1] gLogger.info("New handler: %s with URL %s" % (handlerTuple[0], url)) else: gLogger.debug("Handler already loaded %s" % (handlerTuple[0])) return S_OK() def discoverHandlers(self): """ Force the discovery of URL, automatic call when we try to get handlers for the first time. You can disable the automatic call with autoDiscovery=False at initialization """ gLogger.debug("Trying to auto-discover the handlers for Tornado") # Look in config diracSystems = gConfig.getSections("/Systems") serviceList = [] if diracSystems["OK"]: for system in diracSystems["Value"]: try: instance = PathFinder.getSystemInstance(system) services = gConfig.getSections("/Systems/%s/%s/Services" % (system, instance)) if services["OK"]: for service in services["Value"]: newservice = "%s/%s" % (system, service) # We search in the CS all handlers which used HTTPS as protocol isHTTPS = gConfig.getValue( "/Systems/%s/%s/Services/%s/Protocol" % (system, instance, service) ) if isHTTPS and isHTTPS.lower() == "https": serviceList.append(newservice) # On systems sometime you have things not related to services... except RuntimeError: pass return self.loadHandlersByServiceName(serviceList) def loadHandlersByServiceName(self, servicesNames): """ Load a list of handler from list of service using DIRAC moduleLoader Use :py:class:`DIRAC.Core.Base.private.ModuleLoader` :param servicesNames: list of service, e.g. ['Framework/Hello', 'Configuration/Server'] """ # Use DIRAC system to load: search in CS if path is given and if not defined # it search in place it should be (e.g. in DIRAC/FrameworkSystem/Service) if not isinstance(servicesNames, list): servicesNames = [servicesNames] load = self.loader.loadModules(servicesNames) if not load["OK"]: return load for module in self.loader.getModules().values(): url = module["loadName"] # URL can be like https://domain:port/service/name or just service/name # Here we just want the service name, for tornado serviceTuple = url.replace("https://", "").split("/")[-2:] url = "%s/%s" % (serviceTuple[0], serviceTuple[1]) self.__addHandler((module["loadName"], module["classObj"]), url) return S_OK() def getHandlersURLs(self): """ Get all handler for usage in Tornado, as a list of tornado.web.url If there is no handler found before, it try to find them :returns: a list of URL (not the string with "https://..." but the tornado object) see http://www.tornadoweb.org/en/stable/web.html#tornado.web.URLSpec """ if not self.__handlers and self.__autoDiscovery: self.__autoDiscovery = False self.discoverHandlers() urls = [] for key in self.__handlers: urls.append(TornadoURL(key, self.__handlers[key])) return urls def getHandlersDict(self): """ Return all handler dictionary :returns: dictionary with absolute url as key ("/System/Service") and tornado.web.url object as value """ if not self.__handlers and self.__autoDiscovery: self.__autoDiscovery = False res = self.discoverHandlers() if not res["OK"]: gLogger.error("Could not load handlers", res) return self.__handlers
class ExecutorReactor(object): class AliveLock(object): def __init__(self): self.__alive = 0 self.__cond = threading.Condition(threading.Lock()) def alive(self): self.__cond.acquire() self.__alive += 1 self.__cond.release() def dead(self): self.__cond.acquire() self.__alive -= 1 self.__cond.notify() self.__cond.release() def lockUntilAllDead(self): self.__cond.acquire() while True: if self.__alive < 1: break self.__cond.wait(1) self.__cond.release() class MindCluster(object): def __init__(self, mindName, aliveLock): self.__mindName = mindName self.__modules = {} self.__maxTasks = 1 self.__reconnectSleep = 1 self.__reconnectRetries = 10 self.__extraArgs = {} self.__instances = {} self.__instanceLock = threading.Lock() self.__aliveLock = aliveLock def updateMaxTasks(self, mt): self.__maxTasks = max(self.__maxTasks, mt) def addModule(self, name, exeClass): self.__modules[name] = exeClass self.__maxTasks = max(self.__maxTasks, exeClass.ex_getOption("MaxTasks", 0)) self.__reconnectSleep = max(self.__reconnectSleep, exeClass.ex_getOption("ReconnectSleep", 0)) self.__reconnectRetries = max(self.__reconnectRetries, exeClass.ex_getOption("ReconnectRetries", 0)) self.__extraArgs[name] = exeClass.ex_getExtraArguments() def connect(self): self.__msgClient = MessageClient(self.__mindName) self.__msgClient.subscribeToMessage("ProcessTask", self.__processTask) self.__msgClient.subscribeToDisconnect(self.__disconnected) result = self.__msgClient.connect( executorTypes=list(self.__modules), maxTasks=self.__maxTasks, extraArgs=self.__extraArgs ) if result["OK"]: self.__aliveLock.alive() gLogger.info("Connected to %s" % self.__mindName) return result def __disconnected(self, msgClient): retryCount = 0 while True: gLogger.notice("Trying to reconnect to %s" % self.__mindName) result = self.__msgClient.connect( executorTypes=list(self.__modules), maxTasks=self.__maxTasks, extraArgs=self.__extraArgs ) if result["OK"]: if retryCount >= self.__reconnectRetries: self.__aliveLock.alive() gLogger.notice("Reconnected to %s" % self.__mindName) return S_OK() retryCount += 1 if retryCount == self.__reconnectRetries: self.__aliveLock.alive() gLogger.info("Connect error failed: %s" % result["Message"]) gLogger.notice("Failed to reconnect. Sleeping for %d seconds" % self.__reconnectSleep) time.sleep(self.__reconnectSleep) def __storeInstance(self, modName, modObj): self.__instanceLock.acquire() try: self.__instances[modName].append(modObj) finally: self.__instanceLock.release() def __getInstance(self, moduleName): self.__instanceLock.acquire() try: if moduleName not in self.__instances: self.__instances[moduleName] = [] try: return S_OK(self.__instances[moduleName].pop(0)) except IndexError: pass finally: self.__instanceLock.release() try: modObj = self.__modules[moduleName] except KeyError: return S_ERROR("Unknown %s executor") modInstance = modObj() return S_OK(modInstance) def __sendExecutorError(self, eType, taskId, errMsg): result = self.__msgClient.createMessage("ExecutorError") if not result["OK"]: return result msgObj = result["Value"] msgObj.taskId = taskId msgObj.errorMsg = errMsg msgObj.eType = eType return self.__msgClient.sendMessage(msgObj) def __processTask(self, msgObj): eType = msgObj.eType taskId = msgObj.taskId taskStub = msgObj.taskStub result = self.__moduleProcess(eType, taskId, taskStub) if not result["OK"]: return self.__sendExecutorError(eType, taskId, result["Message"]) msgName, taskStub, extra = result["Value"] result = self.__msgClient.createMessage(msgName) if not result["OK"]: return self.__sendExecutorError( eType, taskId, "Can't generate %s message: %s" % (msgName, result["Message"]) ) gLogger.verbose("Task %s: Sending %s" % (str(taskId), msgName)) msgObj = result["Value"] msgObj.taskId = taskId msgObj.taskStub = taskStub if msgName == "TaskError": msgObj.errorMsg = extra msgObj.eType = eType elif msgName == "TaskFreeze": msgObj.freezeTime = extra return self.__msgClient.sendMessage(msgObj) def __moduleProcess(self, eType, taskId, taskStub, fastTrackLevel=0): result = self.__getInstance(eType) if not result["OK"]: return result modInstance = result["Value"] try: result = modInstance._ex_processTask(taskId, taskStub) except Exception as excp: gLogger.exception("Error while processing task %s" % taskId, lException=excp) return S_ERROR("Error processing task %s: %s" % (taskId, excp)) self.__storeInstance(eType, modInstance) if not result["OK"]: return S_OK(("TaskError", taskStub, "Error: %s" % result["Message"])) taskStub, freezeTime, fastTrackType = result["Value"] if freezeTime: return S_OK(("TaskFreeze", taskStub, freezeTime)) if fastTrackType: if fastTrackLevel < 10 and fastTrackType in self.__modules: gLogger.notice("Fast tracking task %s to %s" % (taskId, fastTrackType)) return self.__moduleProcess(fastTrackType, taskId, taskStub, fastTrackLevel + 1) else: gLogger.notice("Stopping %s fast track. Sending back to the mind" % (taskId)) return S_OK(("TaskDone", taskStub, True)) ##### # Start of ExecutorReactor ##### def __init__(self): self.__aliveLock = self.AliveLock() self.__executorModules = {} self.__codeModules = {} self.__minds = {} self.__loader = ModuleLoader("Executor", PathFinder.getExecutorSection, ExecutorModule) def loadModules(self, modulesList, hideExceptions=False): """ Load all modules required in moduleList """ result = self.__loader.loadModules(modulesList, hideExceptions=hideExceptions) if not result["OK"]: return result self.__executorModules = self.__loader.getModules() return S_OK() # Go! def go(self): for name in self.__executorModules: exeClass = self.__executorModules[name]["classObj"] result = exeClass._ex_initialize(name, self.__executorModules[name]["loadName"]) if not result["OK"]: return result mind = exeClass.ex_getMind() if mind not in self.__minds: self.__minds[mind] = self.MindCluster(mind, self.__aliveLock) mc = self.__minds[mind] mc.addModule(name, exeClass) for mindName in self.__minds: gLogger.info("Trying to connect to %s" % mindName) result = self.__minds[mindName].connect() if not result["OK"]: return result self.__aliveLock.lockUntilAllDead() return S_OK()
class AgentReactor( object ): """ Main interface to DIRAC Agents. It allows to : - define a Agents modules to be executed - define the number of cycles to execute - steer the execution Agents are declared via: - loadAgentModule(): for a single Agent - loadAgentModules(): for a list of Agents The number of cycles to execute for a defined Agent can be set via: - setAgentModuleCyclesToExecute() The execution of the Agents is done with: - runNumCycles(): to execute an additional number of cycles - go(): During the execution of the cycles, each of the Agents can be signaled to stop by creating a file named "stop_agent" in its Control Directory. """ def __init__( self, baseAgentName ): self.__agentModules = {} self.__loader = ModuleLoader( "Agent", PathFinder.getAgentSection, AgentModule ) self.__tasks = {} self.__baseAgentName = baseAgentName self.__scheduler = ThreadScheduler.ThreadScheduler( enableReactorThread = False, minPeriod = 30 ) self.__alive = True self.__running = False def loadAgentModules( self, modulesList, hideExceptions = False ): """ Load all modules required in moduleList """ result = self.__loader.loadModules( modulesList, hideExceptions = hideExceptions ) if not result[ 'OK' ]: return result self.__agentModules = self.__loader.getModules() for agentName in self.__agentModules: agentData = self.__agentModules[ agentName ] agentData[ 'running' ] = False try: instanceObj = agentData[ 'classObj' ]( agentName, agentData[ 'loadName' ], self.__baseAgentName ) result = instanceObj.am_initialize() if not result[ 'OK' ]: return S_ERROR( "Error while calling initialize method of %s: %s" % ( agentName, result[ 'Message' ] ) ) agentData[ 'instanceObj' ] = instanceObj except Exception as excp: if not hideExceptions: gLogger.exception( "Can't load agent %s" % agentName, lException = excp ) return S_ERROR( "Can't load agent %s: \n %s" % ( agentName, excp ) ) agentPeriod = instanceObj.am_getPollingTime() result = self.__scheduler.addPeriodicTask( agentPeriod, instanceObj.am_go, executions = instanceObj.am_getMaxCycles(), elapsedTime = agentPeriod ) if not result[ 'OK' ]: return result taskId = result[ 'Value' ] self.__tasks[ result[ 'Value' ] ] = agentName agentData[ 'taskId' ] = taskId agentData[ 'running' ] = True if not self.__agentModules: return S_ERROR( "No agent module loaded" ) return S_OK() def runNumCycles( self, agentName = None, numCycles = 1 ): """ Run all defined agents a given number of cycles """ if agentName: self.loadAgentModules( [ agentName ] ) error = '' for aName in self.__agentModules: result = self.setAgentModuleCyclesToExecute( aName, numCycles ) if not result['OK']: error = 'Failed to set cycles to execute' gLogger.error( '%s:' % error, aName ) break if error: return S_ERROR( error ) self.go() return S_OK() def __finalize( self ): """ Execute the finalize method of all Agents """ for agentName in self.__agentModules: try: self.__agentModules[agentName]['instanceObj'].finalize() except Exception as excp: gLogger.exception( 'Failed to execute finalize for Agent: %s' % agentName, lException = excp ) def go( self ): """ Main method to control the execution of all configured Agents """ if self.__running: return self.__running = True try: while self.__alive: self.__checkControlDir() timeToNext = self.__scheduler.executeNextTask() if timeToNext is None: gLogger.info( "No more agent modules to execute. Exiting" ) break time.sleep( min( max( timeToNext, 0.5 ), 5 ) ) finally: self.__running = False self.__finalize() def setAgentModuleCyclesToExecute( self, agentName, maxCycles = 1 ): """ Set number of cycles to execute for a given agent (previously defined) """ if not agentName in self.__agentModules: return S_ERROR( "%s has not been loaded" % agentName ) if maxCycles: try: maxCycles += self.__agentModules[ agentName ][ 'instanceObj' ].am_getCyclesDone() except Exception as excp: error = 'Can not determine number of cycles to execute' gLogger.exception( "%s: '%s'" % ( error, maxCycles ), lException = excp ) return S_ERROR( error ) self.__agentModules[ agentName ][ 'instanceObj' ].am_setOption( 'MaxCycles', maxCycles ) self.__scheduler.setNumExecutionsForTask( self.__agentModules[ agentName ][ 'taskId' ], maxCycles ) return S_OK() def __checkControlDir( self ): """ Check for the presence of stop_agent file to stop execution of the corresponding Agent """ for agentName in self.__agentModules: if not self.__agentModules[ agentName ][ 'running' ]: continue agent = self.__agentModules[ agentName ][ 'instanceObj' ] alive = agent.am_getModuleParam( 'alive' ) if alive: if agent.am_checkStopAgentFile(): gLogger.info( "Found StopAgent file for agent %s" % agentName ) alive = False if not alive: gLogger.info( "Stopping agent module %s" % ( agentName ) ) self.__scheduler.removeTask( self.__agentModules[ agentName ][ 'taskId' ] ) del self.__tasks[ self.__agentModules[ agentName ][ 'taskId' ] ] self.__agentModules[ agentName ][ 'running' ] = False agent.am_removeStopAgentFile()