def __init__(self, serviceData): """ Init the variables for the service :param serviceData: dict with modName, standalone, loadName, moduleObj, classObj. e.g.: {'modName': 'Framework/serviceName', 'standalone': True, 'loadName': 'Framework/serviceName', 'moduleObj': <module 'serviceNameHandler' from '/home/DIRAC/FrameworkSystem/Service/serviceNameHandler.pyo'>, 'classObj': <class 'serviceNameHandler.serviceHandler'>} Standalone is true if there is only one service started If it's false, every service is linked to a different MonitoringClient """ self._svcData = serviceData self._name = serviceData['modName'] self._startTime = Time.dateTime() self._validNames = [serviceData['modName']] if serviceData['loadName'] not in self._validNames: self._validNames.append(serviceData['loadName']) self._cfg = ServiceConfiguration(list(self._validNames)) if serviceData['standalone']: self._monitor = gMonitor else: self._monitor = MonitoringClient() self.__monitorLastStatsUpdate = time.time() self._stats = {'queries': 0, 'connections': 0} self._authMgr = AuthManager( "%s/Authorization" % PathFinder.getServiceSection(serviceData['loadName'])) self._transportPool = getGlobalTransportPool() self.__cloneId = 0 self.__maxFD = 0
def __init__( self, serviceName ): self._name = serviceName self._startTime = Time.dateTime() self._cfg = ServiceConfiguration( serviceName ) self._validNames = [ self._name ] self._monitor = MonitoringClient() self.__monitorLastStatsUpdate = time.time() self._stats = { 'queries' : 0, 'connections' : 0 } self._authMgr = AuthManager( "%s/Authorization" % self._cfg.getServicePath() ) self._transportPool = getGlobalTransportPool() self.__cloneId = 0
def __init__(self, services=None, port=None): """ :param list services: (default None) List of service handlers to load. If ``None``, loads all :param int port: Port to listen to. If None, the port is resolved following the logic described in the class documentation """ if port is None: port = gConfig.getValue( "/Systems/Tornado/%s/Port" % PathFinder.getSystemInstance('Tornado'), 8443) if services and not isinstance(services, list): services = [services] # URLs for services. # Contains Tornado :py:class:`tornado.web.url` object self.urls = [] # Other infos self.port = port self.handlerManager = HandlerManager() # Monitoring attributes self._monitor = MonitoringClient() # temp value for computation, used by the monitoring self.__report = None # Last update time stamp self.__monitorLastStatsUpdate = None self.__monitoringLoopDelay = 60 # In secs # If services are defined, load only these ones (useful for debug purpose or specific services) if services: retVal = self.handlerManager.loadHandlersByServiceName(services) if not retVal['OK']: sLog.error(retVal['Message']) raise ImportError( "Some services can't be loaded, check the service names and configuration." ) # if no service list is given, load services from configuration handlerDict = self.handlerManager.getHandlersDict() for item in handlerDict.items(): # handlerDict[key].initializeService(key) self.urls.append(url(item[0], item[1])) # If there is no services loaded: if not self.urls: raise ImportError( "There is no services loaded, please check your configuration")
def _initMonitoring(cls, serviceName, fullUrl): """ Initialize the monitoring specific to this handler This has to be called only by :py:meth:`.__initializeService` to ensure thread safety and unicity of the call. :param serviceName: relative URL ``/<System>/<Component>`` :param fullUrl: full URl like ``https://<host>:<port>/<System>/<Component>`` """ # Init extra bits of monitoring cls._monitor = MonitoringClient() cls._monitor.setComponentType(MonitoringClient.COMPONENT_WEB) cls._monitor.initialize() if tornado.process.task_id() is None: # Single process mode cls._monitor.setComponentName('Tornado/%s' % serviceName) else: cls._monitor.setComponentName( 'Tornado/CPU%d/%s' % (tornado.process.task_id(), serviceName)) cls._monitor.setComponentLocation(fullUrl) cls._monitor.registerActivity("Queries", "Queries served", "Framework", "queries", MonitoringClient.OP_RATE) cls._monitor.setComponentExtraParam('DIRACVersion', DIRAC.version) cls._monitor.setComponentExtraParam('platform', DIRAC.getPlatform()) cls._monitor.setComponentExtraParam('startTime', datetime.utcnow()) cls._stats = {'requests': 0, 'monitorLastStatsUpdate': time.time()} return S_OK()
def __init__( self, serviceData ): self._svcData = serviceData self._name = serviceData[ 'loadName' ] self._startTime = Time.dateTime() self._validNames = [ serviceData[ 'modName' ] ] if serviceData[ 'loadName' ] not in self._validNames: self._validNames.append( serviceData[ 'loadName' ] ) self._cfg = ServiceConfiguration( list( self._validNames ) ) if serviceData[ 'standalone' ]: self._monitor = gMonitor else: self._monitor = MonitoringClient() self.__monitorLastStatsUpdate = time.time() self._stats = { 'queries' : 0, 'connections' : 0 } self._authMgr = AuthManager( "%s/Authorization" % PathFinder.getServiceSection( serviceData[ 'loadName' ] ) ) self._transportPool = getGlobalTransportPool() self.__cloneId = 0 self.__maxFD = 0
def __initializeMonitor(self): """ Initialize the system monitoring. """ # This flag is used to activate ES based monitoring # if the "EnableActivityMonitoring" flag in "yes" or "true" in the cfg file. self.activityMonitoring = ( Operations().getValue("EnableActivityMonitoring", False) or self.am_getOption("EnableActivityMonitoring", False)) if self.activityMonitoring: # The import needs to be here because of the CS must be initialized before importing # this class (see https://github.com/DIRACGrid/DIRAC/issues/4793) from DIRAC.MonitoringSystem.Client.MonitoringReporter import MonitoringReporter self.activityMonitoringReporter = MonitoringReporter( monitoringType="ComponentMonitoring") # With the help of this periodic task we commit the data to ES at an interval of 100 seconds. gThreadScheduler.addPeriodicTask( 100, self.__activityMonitoringReporting) else: if self.__moduleProperties['standalone']: self.monitor = gMonitor else: self.monitor = MonitoringClient() self.monitor.setComponentType(self.monitor.COMPONENT_AGENT) self.monitor.setComponentName(self.__moduleProperties['fullName']) self.monitor.initialize() self.monitor.registerActivity('CPU', "CPU Usage", 'Framework', "CPU,%", self.monitor.OP_MEAN, 600) self.monitor.registerActivity('MEM', "Memory Usage", 'Framework', 'Memory,MB', self.monitor.OP_MEAN, 600) # Component monitor for field in ('version', 'DIRACVersion', 'description', 'platform'): self.monitor.setComponentExtraParam( field, self.__codeProperties[field]) self.monitor.setComponentExtraParam('startTime', Time.dateTime()) self.monitor.setComponentExtraParam('cycles', 0) self.monitor.disable() self.__monitorLastStatsUpdate = time.time()
def __initializeMonitor(self): """ Initialize the system monitor client """ if self.__moduleProperties['standalone']: self.monitor = gMonitor else: self.monitor = MonitoringClient() self.monitor.setComponentType(self.monitor.COMPONENT_AGENT) self.monitor.setComponentName(self.__moduleProperties['fullName']) self.monitor.initialize() self.monitor.registerActivity('CPU', "CPU Usage", 'Framework', "CPU,%", self.monitor.OP_MEAN, 600) self.monitor.registerActivity('MEM', "Memory Usage", 'Framework', 'Memory,MB', self.monitor.OP_MEAN, 600) #Component monitor for field in ('version', 'DIRACVersion', 'description', 'platform'): self.monitor.setComponentExtraParam(field, self.__codeProperties[field]) self.monitor.setComponentExtraParam('startTime', Time.dateTime()) self.monitor.setComponentExtraParam('cycles', 0) self.monitor.disable() self.__monitorLastStatsUpdate = time.time()
def __init__( self, serviceData ): self._svcData = serviceData self._name = serviceData[ 'modName' ] self._startTime = Time.dateTime() self._validNames = [ serviceData[ 'modName' ] ] if serviceData[ 'loadName' ] not in self._validNames: self._validNames.append( serviceData[ 'loadName' ] ) self._cfg = ServiceConfiguration( list( self._validNames ) ) if serviceData[ 'standalone' ]: self._monitor = gMonitor else: self._monitor = MonitoringClient() self.__monitorLastStatsUpdate = time.time() self._stats = { 'queries' : 0, 'connections' : 0 } self._authMgr = AuthManager( "%s/Authorization" % PathFinder.getServiceSection( serviceData[ 'loadName' ] ) ) self._transportPool = getGlobalTransportPool() self.__cloneId = 0 self.__maxFD = 0
def __initializeMonitor( self ): """ Initialize the system monitor client """ if self.__moduleProperties[ 'standalone' ]: self.monitor = gMonitor else: self.monitor = MonitoringClient() self.monitor.setComponentType( self.monitor.COMPONENT_AGENT ) self.monitor.setComponentName( self.__moduleProperties[ 'fullName' ] ) self.monitor.initialize() self.monitor.registerActivity( 'CPU', "CPU Usage", 'Framework', "CPU,%", self.monitor.OP_MEAN, 600 ) self.monitor.registerActivity( 'MEM', "Memory Usage", 'Framework', 'Memory,MB', self.monitor.OP_MEAN, 600 ) # Component monitor for field in ( 'version', 'DIRACVersion', 'description', 'platform' ): self.monitor.setComponentExtraParam( field, self.__codeProperties[ field ] ) self.monitor.setComponentExtraParam( 'startTime', Time.dateTime() ) self.monitor.setComponentExtraParam( 'cycles', 0 ) self.monitor.disable() self.__monitorLastStatsUpdate = time.time()
class AgentModule( object ): """ Base class for all agent modules This class is used by the AgentReactor Class to steer the execution of DIRAC Agents. For this purpose the following methods are used: - am_initialize() just after instantiated - am_getPollingTime() to set the execution frequency - am_getMaxCycles() to determine the number of cycles - am_go() for the actual execution of one cycle Before each iteration, the following methods are used to determine if the new cycle is to be started. - am_getModuleParam( 'alive' ) - am_checkStopAgentFile() - am_removeStopAgentFile() To start new execution cycle the following methods are used - am_getCyclesDone() - am_setOption( 'MaxCycles', maxCycles ) At the same time it provides all Agents with common interface. All Agent class must inherit from this base class and must implement at least the following method: - execute() main method called in the agent cycle Additionally they may provide: - initialize() for initial settings - finalize() the graceful exit - beginExecution() before each execution cycle - endExecution() at the end of each execution cycle The agent can be stopped either by a signal or by creating a 'stop_agent' file in the controlDirectory defined in the agent configuration """ def __init__( self, agentName, loadName, baseAgentName = False, properties = {} ): """ Common __init__ method for all Agents. All Agent modules must define: __doc__ __RCSID__ They are used to populate __codeProperties The following Options are used from the Configuration: - /LocalSite/InstancePath - /DIRAC/Setup - Status - Enabled - PollingTime default = 120 - MaxCycles default = 500 - ControlDirectory control/SystemName/AgentName - WorkDirectory work/SystemName/AgentName - shifterProxy '' - shifterProxyLocation WorkDirectory/SystemName/AgentName/.shifterCred It defines the following default Options that can be set via Configuration (above): - MonitoringEnabled True - Enabled True if Status == Active - PollingTime 120 - MaxCycles 500 - ControlDirectory control/SystemName/AgentName - WorkDirectory work/SystemName/AgentName - shifterProxy False - shifterProxyLocation work/SystemName/AgentName/.shifterCred different defaults can be set in the initialize() method of the Agent using am_setOption() In order to get a shifter proxy in the environment during the execute() the configuration Option 'shifterProxy' must be set, a default may be given in the initialize() method. """ if baseAgentName and agentName == baseAgentName: self.log = gLogger standaloneModule = True else: self.log = gLogger.getSubLogger( agentName, child = False ) standaloneModule = False self.__basePath = gConfig.getValue( '/LocalSite/InstancePath', rootPath ) self.__agentModule = None self.__codeProperties = {} self.__getCodeInfo() self.__moduleProperties = { 'fullName' : agentName, 'loadName' : loadName, 'section' : PathFinder.getAgentSection( agentName ), 'loadSection' : PathFinder.getAgentSection( loadName ), 'standalone' : standaloneModule, 'cyclesDone' : 0, 'totalElapsedTime' : 0, 'setup' : gConfig.getValue( "/DIRAC/Setup", "Unknown" ), 'alive' : True } self.__moduleProperties[ 'system' ], self.__moduleProperties[ 'agentName' ] = agentName.split( "/" ) self.__configDefaults = {} self.__configDefaults[ 'MonitoringEnabled'] = True self.__configDefaults[ 'Enabled'] = self.am_getOption( "Status", "Active" ).lower() in ( 'active' ) self.__configDefaults[ 'PollingTime'] = self.am_getOption( "PollingTime", 120 ) self.__configDefaults[ 'MaxCycles'] = self.am_getOption( "MaxCycles", 500 ) self.__configDefaults[ 'ControlDirectory' ] = os.path.join( self.__basePath, 'control', *agentName.split( "/" ) ) self.__configDefaults[ 'WorkDirectory' ] = os.path.join( self.__basePath, 'work', *agentName.split( "/" ) ) self.__configDefaults[ 'shifterProxy' ] = '' self.__configDefaults[ 'shifterProxyLocation' ] = os.path.join( self.__configDefaults[ 'WorkDirectory' ], '.shifterCred' ) if isinstance( properties, dict): for key in properties: self.__moduleProperties[ key ] = properties[ key ] self.__moduleProperties[ 'executors' ] = [ ( self.execute, () ) ] self.__moduleProperties[ 'shifterProxy' ] = False self.__monitorLastStatsUpdate = -1 self.monitor = None self.__initializeMonitor() self.__initialized = False def __getCodeInfo( self ): versionVar = "__RCSID__" docVar = "__doc__" try: self.__agentModule = __import__( self.__class__.__module__, globals(), locals(), versionVar ) except Exception: self.log.exception( "Cannot load agent module" ) for prop in ( ( versionVar, "version" ), ( docVar, "description" ) ): try: self.__codeProperties[ prop[1] ] = getattr( self.__agentModule, prop[0] ) except Exception: self.log.error( "Missing property", prop[0] ) self.__codeProperties[ prop[1] ] = 'unset' self.__codeProperties[ 'DIRACVersion' ] = DIRAC.version self.__codeProperties[ 'platform' ] = DIRAC.getPlatform() def am_initialize( self, *initArgs ): agentName = self.am_getModuleParam( 'fullName' ) result = self.initialize( *initArgs ) if not isReturnStructure( result ): return S_ERROR( "initialize must return S_OK/S_ERROR" ) if not result[ 'OK' ]: return S_ERROR( "Error while initializing %s: %s" % ( agentName, result[ 'Message' ] ) ) mkDir( self.am_getControlDirectory() ) workDirectory = self.am_getWorkDirectory() mkDir( workDirectory ) # Set the work directory in an environment variable available to subprocesses if needed os.environ['AGENT_WORKDIRECTORY'] = workDirectory self.__moduleProperties[ 'shifterProxy' ] = self.am_getOption( 'shifterProxy' ) if self.am_monitoringEnabled(): self.monitor.enable() if len( self.__moduleProperties[ 'executors' ] ) < 1: return S_ERROR( "At least one executor method has to be defined" ) if not self.am_Enabled(): return S_ERROR( "Agent is disabled via the configuration" ) self.log.notice( "="*40 ) self.log.notice( "Loaded agent module %s" % self.__moduleProperties[ 'fullName' ] ) self.log.notice( " Site: %s" % DIRAC.siteName() ) self.log.notice( " Setup: %s" % gConfig.getValue( "/DIRAC/Setup" ) ) self.log.notice( " Base Module version: %s " % __RCSID__ ) self.log.notice( " Agent version: %s" % self.__codeProperties[ 'version' ] ) self.log.notice( " DIRAC version: %s" % DIRAC.version ) self.log.notice( " DIRAC platform: %s" % DIRAC.getPlatform() ) pollingTime = int( self.am_getOption( 'PollingTime' ) ) if pollingTime > 3600: self.log.notice( " Polling time: %s hours" % ( pollingTime / 3600. ) ) else: self.log.notice( " Polling time: %s seconds" % self.am_getOption( 'PollingTime' ) ) self.log.notice( " Control dir: %s" % self.am_getControlDirectory() ) self.log.notice( " Work dir: %s" % self.am_getWorkDirectory() ) if self.am_getOption( 'MaxCycles' ) > 0: self.log.notice( " Cycles: %s" % self.am_getMaxCycles() ) else: self.log.notice( " Cycles: unlimited" ) self.log.notice( "="*40 ) self.__initialized = True return S_OK() def am_getControlDirectory( self ): return os.path.join( self.__basePath, str( self.am_getOption( 'ControlDirectory' ) ) ) def am_getStopAgentFile( self ): return os.path.join( self.am_getControlDirectory(), 'stop_agent' ) def am_checkStopAgentFile( self ): return os.path.isfile( self.am_getStopAgentFile() ) def am_createStopAgentFile( self ): try: with open( self.am_getStopAgentFile(), 'w' ) as fd: fd.write( 'Dirac site agent Stopped at %s' % Time.toString() ) except Exception: pass def am_removeStopAgentFile( self ): try: os.unlink( self.am_getStopAgentFile() ) except Exception: pass def am_getBasePath( self ): return self.__basePath def am_getWorkDirectory( self ): return os.path.join( self.__basePath, str( self.am_getOption( 'WorkDirectory' ) ) ) def am_getShifterProxyLocation( self ): return os.path.join( self.__basePath, str( self.am_getOption( 'shifterProxyLocation' ) ) ) def am_getOption( self, optionName, defaultValue = None ): if defaultValue == None: if optionName in self.__configDefaults: defaultValue = self.__configDefaults[ optionName ] if optionName and optionName[0] == "/": return gConfig.getValue( optionName, defaultValue ) for section in ( self.__moduleProperties[ 'section' ], self.__moduleProperties[ 'loadSection' ] ): result = gConfig.getOption( "%s/%s" % ( section, optionName ), defaultValue ) if result[ 'OK' ]: return result[ 'Value' ] return defaultValue def am_setOption( self, optionName, value ): self.__configDefaults[ optionName ] = value def am_getModuleParam( self, optionName ): return self.__moduleProperties[ optionName ] def am_setModuleParam( self, optionName, value ): self.__moduleProperties[ optionName ] = value def am_getPollingTime( self ): return self.am_getOption( "PollingTime" ) def am_getMaxCycles( self ): return self.am_getOption( "MaxCycles" ) def am_getCyclesDone( self ): return self.am_getModuleParam( 'cyclesDone' ) def am_Enabled( self ): return self.am_getOption( "Enabled" ) def am_disableMonitoring( self ): self.am_setOption( 'MonitoringEnabled' , False ) def am_monitoringEnabled( self ): return self.am_getOption( "MonitoringEnabled" ) def am_stopExecution( self ): self.am_setModuleParam( 'alive', False ) def __initializeMonitor( self ): """ Initialize the system monitor client """ if self.__moduleProperties[ 'standalone' ]: self.monitor = gMonitor else: self.monitor = MonitoringClient() self.monitor.setComponentType( self.monitor.COMPONENT_AGENT ) self.monitor.setComponentName( self.__moduleProperties[ 'fullName' ] ) self.monitor.initialize() self.monitor.registerActivity( 'CPU', "CPU Usage", 'Framework', "CPU,%", self.monitor.OP_MEAN, 600 ) self.monitor.registerActivity( 'MEM', "Memory Usage", 'Framework', 'Memory,MB', self.monitor.OP_MEAN, 600 ) # Component monitor for field in ( 'version', 'DIRACVersion', 'description', 'platform' ): self.monitor.setComponentExtraParam( field, self.__codeProperties[ field ] ) self.monitor.setComponentExtraParam( 'startTime', Time.dateTime() ) self.monitor.setComponentExtraParam( 'cycles', 0 ) self.monitor.disable() self.__monitorLastStatsUpdate = time.time() def am_secureCall( self, functor, args = (), name = False ): if not name: name = str( functor ) try: result = functor( *args ) if not isReturnStructure( result ): raise Exception( "%s method for %s module has to return S_OK/S_ERROR" % ( name, self.__moduleProperties[ 'fullName' ] ) ) return result except Exception as e: self.log.exception( "Agent exception while calling method", name ) return S_ERROR( "Exception while calling %s method: %s" % ( name, str( e ) ) ) def _setShifterProxy( self ): if self.__moduleProperties[ "shifterProxy" ]: result = setupShifterProxyInEnv( self.__moduleProperties[ "shifterProxy" ], self.am_getShifterProxyLocation() ) if not result[ 'OK' ]: self.log.error( "Failed to set shifter proxy", result['Message'] ) return result return S_OK() def am_go( self ): # Set the shifter proxy if required result = self._setShifterProxy() if not result[ 'OK' ]: return result self.log.notice( "-"*40 ) self.log.notice( "Starting cycle for module %s" % self.__moduleProperties[ 'fullName' ] ) mD = self.am_getMaxCycles() if mD > 0: cD = self.__moduleProperties[ 'cyclesDone' ] self.log.notice( "Remaining %s of %s cycles" % ( mD - cD, mD ) ) self.log.notice( "-"*40 ) elapsedTime = time.time() cpuStats = self._startReportToMonitoring() cycleResult = self.__executeModuleCycle() if cpuStats: self._endReportToMonitoring( *cpuStats ) # Increment counters self.__moduleProperties[ 'cyclesDone' ] += 1 # Show status elapsedTime = time.time() - elapsedTime self.__moduleProperties[ 'totalElapsedTime' ] += elapsedTime self.log.notice( "-"*40 ) self.log.notice( "Agent module %s run summary" % self.__moduleProperties[ 'fullName' ] ) self.log.notice( " Executed %s times previously" % self.__moduleProperties[ 'cyclesDone' ] ) self.log.notice( " Cycle took %.2f seconds" % elapsedTime ) averageElapsedTime = self.__moduleProperties[ 'totalElapsedTime' ] / self.__moduleProperties[ 'cyclesDone' ] self.log.notice( " Average execution time: %.2f seconds" % ( averageElapsedTime ) ) elapsedPollingRate = averageElapsedTime * 100 / self.am_getOption( 'PollingTime' ) self.log.notice( " Polling time: %s seconds" % self.am_getOption( 'PollingTime' ) ) self.log.notice( " Average execution/polling time: %.2f%%" % elapsedPollingRate ) if cycleResult[ 'OK' ]: self.log.notice( " Cycle was successful" ) else: self.log.warn( " Cycle had an error:", cycleResult[ 'Message' ] ) self.log.notice( "-"*40 ) # Update number of cycles self.monitor.setComponentExtraParam( 'cycles', self.__moduleProperties[ 'cyclesDone' ] ) return cycleResult def _startReportToMonitoring( self ): try: now = time.time() stats = os.times() cpuTime = stats[0] + stats[2] if now - self.__monitorLastStatsUpdate < 10: return ( now, cpuTime ) # Send CPU consumption mark self.__monitorLastStatsUpdate = now # Send Memory consumption mark membytes = MemStat.VmB( 'VmRSS:' ) if membytes: mem = membytes / ( 1024. * 1024. ) gMonitor.addMark( 'MEM', mem ) return( now, cpuTime ) except Exception: return False def _endReportToMonitoring( self, initialWallTime, initialCPUTime ): wallTime = time.time() - initialWallTime stats = os.times() cpuTime = stats[0] + stats[2] - initialCPUTime percentage = 0 if wallTime: percentage = cpuTime / wallTime * 100. if percentage > 0: gMonitor.addMark( 'CPU', percentage ) def __executeModuleCycle( self ): # Execute the beginExecution function result = self.am_secureCall( self.beginExecution, name = "beginExecution" ) if not result[ 'OK' ]: return result # Launch executor functions executors = self.__moduleProperties[ 'executors' ] if len( executors ) == 1: result = self.am_secureCall( executors[0][0], executors[0][1] ) if not result[ 'OK' ]: return result else: exeThreads = [ threading.Thread( target = executor[0], args = executor[1] ) for executor in executors ] for thread in exeThreads: thread.setDaemon( 1 ) thread.start() for thread in exeThreads: thread.join() # Execute the endExecution function return self.am_secureCall( self.endExecution, name = "endExecution" ) def initialize( self, *args, **kwargs ): return S_OK() def beginExecution( self ): return S_OK() def endExecution( self ): return S_OK() def finalize( self ): return S_OK() def execute( self ): return S_ERROR( "Execute method has to be overwritten by agent module" )
class AgentModule: """ Base class for all agent modules This class is used by the AgentReactor Class to steer the execution of DIRAC Agents. For this purpose the following methods are used: - am_initialize() just after instantiated - am_getPollingTime() to set the execution frequency - am_getMaxCycles() to determine the number of cycles - am_go() for the actual execution of one cycle Before each iteration, the following methods are used to determine if the new cycle is to be started. - am_getModuleParam( 'alive' ) - am_checkStopAgentFile() - am_removeStopAgentFile() To start new execution cycle the following methods are used - am_getCyclesDone() - am_setOption( 'MaxCycles', maxCycles ) At the same time it provides all Agents with common interface. All Agent class must inherit from this base class and must implement at least the following method: - execute() main method called in the agent cycle Additionally they may provide: - initialize() for initial settings - finalize() the graceful exit - beginExecution() before each execution cycle - endExecution() at the end of each execution cycle The agent can be stopped either by a signal or by creating a 'stop_agent' file in the controlDirectory defined in the agent configuration """ def __init__( self, agentName, loadName, baseAgentName = False, properties = {} ): """ Common __init__ method for all Agents. All Agent modules must define: __doc__ __RCSID__ They are used to populate __codeProperties The following Options are used from the Configuration: - /LocalSite/InstancePath - /DIRAC/Setup - Status - Enabled - PollingTime default = 120 - MaxCycles default = 500 - ControlDirectory control/SystemName/AgentName - WorkDirectory work/SystemName/AgentName - shifterProxy '' - shifterProxyLocation WorkDirectory/SystemName/AgentName/.shifterCred It defines the following default Options that can be set via Configuration (above): - MonitoringEnabled True - Enabled True if Status == Active - PollingTime 120 - MaxCycles 500 - ControlDirectory control/SystemName/AgentName - WorkDirectory work/SystemName/AgentName - shifterProxy False - shifterProxyLocation work/SystemName/AgentName/.shifterCred different defaults can be set in the initialize() method of the Agent using am_setOption() In order to get a shifter proxy in the environment during the execute() the configuration Option 'shifterProxy' must be set, a default may be given in the initialize() method. """ if baseAgentName and agentName == baseAgentName: self.log = gLogger standaloneModule = True else: self.log = gLogger.getSubLogger( agentName, child = False ) standaloneModule = False self.__basePath = gConfig.getValue( '/LocalSite/InstancePath', rootPath ) self.__agentModule = None self.__codeProperties = {} self.__getCodeInfo() self.__moduleProperties = { 'fullName' : agentName, 'loadName' : loadName, 'section' : PathFinder.getAgentSection( agentName ), 'loadSection' : PathFinder.getAgentSection( loadName ), 'standalone' : standaloneModule, 'cyclesDone' : 0, 'totalElapsedTime' : 0, 'setup' : gConfig.getValue( "/DIRAC/Setup", "Unknown" ), 'alive' : True } self.__moduleProperties[ 'system' ], self.__moduleProperties[ 'agentName' ] = agentName.split( "/" ) self.__configDefaults = {} self.__configDefaults[ 'MonitoringEnabled'] = True self.__configDefaults[ 'Enabled'] = self.am_getOption( "Status", "Active" ).lower() in ( 'active' ) self.__configDefaults[ 'PollingTime'] = self.am_getOption( "PollingTime", 120 ) self.__configDefaults[ 'MaxCycles'] = self.am_getOption( "MaxCycles", 500 ) self.__configDefaults[ 'ControlDirectory' ] = os.path.join( self.__basePath, 'control', *agentName.split( "/" ) ) self.__configDefaults[ 'WorkDirectory' ] = os.path.join( self.__basePath, 'work', *agentName.split( "/" ) ) self.__configDefaults[ 'shifterProxy' ] = '' self.__configDefaults[ 'shifterProxyLocation' ] = os.path.join( self.__configDefaults[ 'WorkDirectory' ], '.shifterCred' ) if type( properties ) == types.DictType: for key in properties: self.__moduleProperties[ key ] = properties[ key ] self.__moduleProperties[ 'executors' ] = [ ( self.execute, () ) ] self.__moduleProperties[ 'shifterProxy' ] = False self.__monitorLastStatsUpdate = -1 self.monitor = None self.__initializeMonitor() self.__initialized = False def __getCodeInfo( self ): versionVar = "__RCSID__" docVar = "__doc__" try: self.__agentModule = __import__( self.__class__.__module__, globals(), locals(), versionVar ) except Exception: self.log.exception( "Cannot load agent module" ) for prop in ( ( versionVar, "version" ), ( docVar, "description" ) ): try: self.__codeProperties[ prop[1] ] = getattr( self.__agentModule, prop[0] ) except Exception: self.log.error( "Missing %s" % prop[0] ) self.__codeProperties[ prop[1] ] = 'unset' self.__codeProperties[ 'DIRACVersion' ] = DIRAC.version self.__codeProperties[ 'platform' ] = DIRAC.platform def am_initialize( self, *initArgs ): agentName = self.am_getModuleParam( 'fullName' ) result = self.initialize( *initArgs ) if not isReturnStructure( result ): return S_ERROR( "initialize must return S_OK/S_ERROR" ) if not result[ 'OK' ]: return S_ERROR( "Error while initializing %s: %s" % ( agentName, result[ 'Message' ] ) ) _checkDir( self.am_getControlDirectory() ) workDirectory = self.am_getWorkDirectory() _checkDir( workDirectory ) # Set the work directory in an environment variable available to subprocesses if needed os.environ['AGENT_WORKDIRECTORY'] = workDirectory self.__moduleProperties[ 'shifterProxy' ] = self.am_getOption( 'shifterProxy' ) if self.am_monitoringEnabled(): self.monitor.enable() if len( self.__moduleProperties[ 'executors' ] ) < 1: return S_ERROR( "At least one executor method has to be defined" ) if not self.am_Enabled(): return S_ERROR( "Agent is disabled via the configuration" ) self.log.notice( "="*40 ) self.log.notice( "Loaded agent module %s" % self.__moduleProperties[ 'fullName' ] ) self.log.notice( " Site: %s" % DIRAC.siteName() ) self.log.notice( " Setup: %s" % gConfig.getValue( "/DIRAC/Setup" ) ) self.log.notice( " Base Module version: %s " % __RCSID__ ) self.log.notice( " Agent version: %s" % self.__codeProperties[ 'version' ] ) self.log.notice( " DIRAC version: %s" % DIRAC.version ) self.log.notice( " DIRAC platform: %s" % DIRAC.platform ) pollingTime = int( self.am_getOption( 'PollingTime' ) ) if pollingTime > 3600: self.log.notice( " Polling time: %s hours" % ( pollingTime / 3600. ) ) else: self.log.notice( " Polling time: %s seconds" % self.am_getOption( 'PollingTime' ) ) self.log.notice( " Control dir: %s" % self.am_getControlDirectory() ) self.log.notice( " Work dir: %s" % self.am_getWorkDirectory() ) if self.am_getOption( 'MaxCycles' ) > 0: self.log.notice( " Cycles: %s" % self.am_getMaxCycles() ) else: self.log.notice( " Cycles: unlimited" ) self.log.notice( "="*40 ) self.__initialized = True return S_OK() def am_getControlDirectory( self ): return os.path.join( self.__basePath, str( self.am_getOption( 'ControlDirectory' ) ) ) def am_getStopAgentFile( self ): return os.path.join( self.am_getControlDirectory(), 'stop_agent' ) def am_checkStopAgentFile( self ): return os.path.isfile( self.am_getStopAgentFile() ) def am_createStopAgentFile( self ): try: fd = open( self.am_getStopAgentFile(), 'w' ) fd.write( 'Dirac site agent Stopped at %s' % Time.toString() ) fd.close() except Exception: pass def am_removeStopAgentFile( self ): try: os.unlink( self.am_getStopAgentFile() ) except Exception: pass def am_getBasePath( self ): return self.__basePath def am_getWorkDirectory( self ): return os.path.join( self.__basePath, str( self.am_getOption( 'WorkDirectory' ) ) ) def am_getShifterProxyLocation( self ): return os.path.join( self.__basePath, str( self.am_getOption( 'shifterProxyLocation' ) ) ) def am_getOption( self, optionName, defaultValue = None ): if defaultValue == None: if optionName in self.__configDefaults: defaultValue = self.__configDefaults[ optionName ] if optionName and optionName[0] == "/": return gConfig.getValue( optionName, defaultValue ) for section in ( self.__moduleProperties[ 'section' ], self.__moduleProperties[ 'loadSection' ] ): result = gConfig.getOption( "%s/%s" % ( section, optionName ), defaultValue ) if result[ 'OK' ]: return result[ 'Value' ] return defaultValue def am_setOption( self, optionName, value ): self.__configDefaults[ optionName ] = value def am_getModuleParam( self, optionName ): return self.__moduleProperties[ optionName ] def am_setModuleParam( self, optionName, value ): self.__moduleProperties[ optionName ] = value def am_getPollingTime( self ): return self.am_getOption( "PollingTime" ) def am_getMaxCycles( self ): return self.am_getOption( "MaxCycles" ) def am_getCyclesDone( self ): return self.am_getModuleParam( 'cyclesDone' ) def am_Enabled( self ): return self.am_getOption( "Enabled" ) def am_disableMonitoring( self ): self.am_setOption( 'MonitoringEnabled' , False ) def am_monitoringEnabled( self ): return self.am_getOption( "MonitoringEnabled" ) def am_stopExecution( self ): self.am_setModuleParam( 'alive', False ) def __initializeMonitor( self ): """ Initialize the system monitor client """ if self.__moduleProperties[ 'standalone' ]: self.monitor = gMonitor else: self.monitor = MonitoringClient() self.monitor.setComponentType( self.monitor.COMPONENT_AGENT ) self.monitor.setComponentName( self.__moduleProperties[ 'fullName' ] ) self.monitor.initialize() self.monitor.registerActivity( 'CPU', "CPU Usage", 'Framework', "CPU,%", self.monitor.OP_MEAN, 600 ) self.monitor.registerActivity( 'MEM', "Memory Usage", 'Framework', 'Memory,MB', self.monitor.OP_MEAN, 600 ) # Component monitor for field in ( 'version', 'DIRACVersion', 'description', 'platform' ): self.monitor.setComponentExtraParam( field, self.__codeProperties[ field ] ) self.monitor.setComponentExtraParam( 'startTime', Time.dateTime() ) self.monitor.setComponentExtraParam( 'cycles', 0 ) self.monitor.disable() self.__monitorLastStatsUpdate = time.time() def am_secureCall( self, functor, args = (), name = False ): if not name: name = str( functor ) try: result = functor( *args ) if not isReturnStructure( result ): raise Exception( "%s method for %s module has to return S_OK/S_ERROR" % ( name, self.__moduleProperties[ 'fullName' ] ) ) return result except Exception, e: self.log.exception( "Exception while calling %s method" % name ) return S_ERROR( "Exception while calling %s method: %s" % ( name, str( e ) ) )
class Service(object): SVC_VALID_ACTIONS = { 'RPC': 'export', 'FileTransfer': 'transfer', 'Message': 'msg', 'Connection': 'Message' } SVC_SECLOG_CLIENT = SecurityLogClient() def __init__(self, serviceData): """ Init the variables for the service :param serviceData: dict with modName, standalone, loadName, moduleObj, classObj. e.g.: {'modName': 'Framework/serviceName', 'standalone': True, 'loadName': 'Framework/serviceName', 'moduleObj': <module 'serviceNameHandler' from '/home/DIRAC/FrameworkSystem/Service/serviceNameHandler.pyo'>, 'classObj': <class 'serviceNameHandler.serviceHandler'>} Standalone is true if there is only one service started If it's false, every service is linked to a different MonitoringClient """ self._svcData = serviceData self._name = serviceData['modName'] self._startTime = Time.dateTime() self._validNames = [serviceData['modName']] if serviceData['loadName'] not in self._validNames: self._validNames.append(serviceData['loadName']) self._cfg = ServiceConfiguration(list(self._validNames)) if serviceData['standalone']: self._monitor = gMonitor else: self._monitor = MonitoringClient() self.__monitorLastStatsUpdate = time.time() self._stats = {'queries': 0, 'connections': 0} self._authMgr = AuthManager( "%s/Authorization" % PathFinder.getServiceSection(serviceData['loadName'])) self._transportPool = getGlobalTransportPool() self.__cloneId = 0 self.__maxFD = 0 def setCloneProcessId(self, cloneId): self.__cloneId = cloneId self._monitor.setComponentName("%s-Clone:%s" % (self._name, cloneId)) def _isMetaAction(self, action): referedAction = Service.SVC_VALID_ACTIONS[action] if referedAction in Service.SVC_VALID_ACTIONS: return referedAction return False def initialize(self): # Build the URLs self._url = self._cfg.getURL() if not self._url: return S_ERROR("Could not build service URL for %s" % self._name) gLogger.verbose("Service URL is %s" % self._url) # Load handler result = self._loadHandlerInit() if not result['OK']: return result self._handler = result['Value'] # Initialize lock manager self._lockManager = LockManager(self._cfg.getMaxWaitingPetitions()) self._initMonitoring() # TODO: remove ThreadPool if useThreadPoolExecutor: self._threadPool = ThreadPoolExecutor( max(0, self._cfg.getMaxThreads())) else: self._threadPool = ThreadPool(max(1, self._cfg.getMinThreads()), max(0, self._cfg.getMaxThreads()), self._cfg.getMaxWaitingPetitions()) self._threadPool.daemonize() self._msgBroker = MessageBroker("%sMSB" % self._name, threadPool=self._threadPool) # Create static dict self._serviceInfoDict = { 'serviceName': self._name, 'serviceSectionPath': PathFinder.getServiceSection(self._name), 'URL': self._cfg.getURL(), 'messageSender': MessageSender(self._name, self._msgBroker), 'validNames': self._validNames, 'csPaths': [ PathFinder.getServiceSection(svcName) for svcName in self._validNames ] } # Call static initialization function try: self._handler['class']._rh__initializeClass( dict(self._serviceInfoDict), self._lockManager, self._msgBroker, self._monitor) if self._handler['init']: for initFunc in self._handler['init']: gLogger.verbose("Executing initialization function") try: result = initFunc(dict(self._serviceInfoDict)) except Exception as excp: gLogger.exception( "Exception while calling initialization function", lException=excp) return S_ERROR( "Exception while calling initialization function: %s" % str(excp)) if not isReturnStructure(result): return S_ERROR( "Service initialization function %s must return S_OK/S_ERROR" % initFunc) if not result['OK']: return S_ERROR("Error while initializing %s: %s" % (self._name, result['Message'])) except Exception as e: errMsg = "Exception while initializing %s" % self._name gLogger.exception(e) gLogger.exception(errMsg) return S_ERROR(errMsg) # Load actions after the handler has initialized itself result = self._loadActions() if not result['OK']: return result self._actions = result['Value'] gThreadScheduler.addPeriodicTask(30, self.__reportThreadPoolContents) return S_OK() def __searchInitFunctions(self, handlerClass, currentClass=None): if not currentClass: currentClass = handlerClass initFuncs = [] ancestorHasInit = False for ancestor in currentClass.__bases__: initFuncs += self.__searchInitFunctions(handlerClass, ancestor) if 'initializeHandler' in dir(ancestor): ancestorHasInit = True if ancestorHasInit: initFuncs.append( super(currentClass, handlerClass).initializeHandler) if currentClass == handlerClass and 'initializeHandler' in dir( handlerClass): initFuncs.append(handlerClass.initializeHandler) return initFuncs def _loadHandlerInit(self): handlerClass = self._svcData['classObj'] handlerName = handlerClass.__name__ handlerInitMethods = self.__searchInitFunctions(handlerClass) try: handlerInitMethods.append( getattr(self._svcData['moduleObj'], "initialize%s" % handlerName)) except AttributeError: gLogger.verbose( "Not found global initialization function for service") if handlerInitMethods: gLogger.info("Found %s initialization methods" % len(handlerInitMethods)) handlerInfo = {} handlerInfo["name"] = handlerName handlerInfo["module"] = self._svcData['moduleObj'] handlerInfo["class"] = handlerClass handlerInfo["init"] = handlerInitMethods return S_OK(handlerInfo) def _loadActions(self): handlerClass = self._handler['class'] authRules = {} typeCheck = {} methodsList = {} for actionType in Service.SVC_VALID_ACTIONS: if self._isMetaAction(actionType): continue authRules[actionType] = {} typeCheck[actionType] = {} methodsList[actionType] = [] handlerAttributeList = dir(handlerClass) for actionType in Service.SVC_VALID_ACTIONS: if self._isMetaAction(actionType): continue methodPrefix = '%s_' % Service.SVC_VALID_ACTIONS[actionType] for attribute in handlerAttributeList: if attribute.find(methodPrefix) != 0: continue exportedName = attribute[len(methodPrefix):] methodsList[actionType].append(exportedName) gLogger.verbose("+ Found %s method %s" % (actionType, exportedName)) # Create lock for method self._lockManager.createLock( "%s/%s" % (actionType, exportedName), self._cfg.getMaxThreadsForMethod(actionType, exportedName)) # Look for type and auth rules if actionType == 'RPC': typeAttr = "types_%s" % exportedName authAttr = "auth_%s" % exportedName else: typeAttr = "types_%s_%s" % ( Service.SVC_VALID_ACTIONS[actionType], exportedName) authAttr = "auth_%s_%s" % ( Service.SVC_VALID_ACTIONS[actionType], exportedName) if typeAttr in handlerAttributeList: obj = getattr(handlerClass, typeAttr) gLogger.verbose("|- Found type definition %s: %s" % (typeAttr, str(obj))) typeCheck[actionType][exportedName] = obj if authAttr in handlerAttributeList: obj = getattr(handlerClass, authAttr) gLogger.verbose("|- Found auth rules %s: %s" % (authAttr, str(obj))) authRules[actionType][exportedName] = obj for actionType in Service.SVC_VALID_ACTIONS: referedAction = self._isMetaAction(actionType) if not referedAction: continue gLogger.verbose("Action %s is a meta action for %s" % (actionType, referedAction)) authRules[actionType] = [] for method in authRules[referedAction]: for prop in authRules[referedAction][method]: if prop not in authRules[actionType]: authRules[actionType].append(prop) gLogger.verbose("Meta action %s props are %s" % (actionType, authRules[actionType])) return S_OK({ 'methods': methodsList, 'auth': authRules, 'types': typeCheck }) def _initMonitoring(self): # Init extra bits of monitoring self._monitor.setComponentType(MonitoringClient.COMPONENT_SERVICE) self._monitor.setComponentName(self._name) self._monitor.setComponentLocation(self._cfg.getURL()) self._monitor.initialize() self._monitor.registerActivity("Connections", "Connections received", "Framework", "connections", MonitoringClient.OP_RATE) self._monitor.registerActivity("Queries", "Queries served", "Framework", "queries", MonitoringClient.OP_RATE) self._monitor.registerActivity('CPU', "CPU Usage", 'Framework', "CPU,%", MonitoringClient.OP_MEAN, 600) self._monitor.registerActivity('MEM', "Memory Usage", 'Framework', 'Memory,MB', MonitoringClient.OP_MEAN, 600) self._monitor.registerActivity('PendingQueries', "Pending queries", 'Framework', 'queries', MonitoringClient.OP_MEAN) self._monitor.registerActivity('ActiveQueries', "Active queries", 'Framework', 'threads', MonitoringClient.OP_MEAN) self._monitor.registerActivity('RunningThreads', "Running threads", 'Framework', 'threads', MonitoringClient.OP_MEAN) self._monitor.registerActivity('MaxFD', "Max File Descriptors", 'Framework', 'fd', MonitoringClient.OP_MEAN) self._monitor.setComponentExtraParam('DIRACVersion', DIRAC.version) self._monitor.setComponentExtraParam('platform', DIRAC.getPlatform()) self._monitor.setComponentExtraParam('startTime', Time.dateTime()) for prop in (("__RCSID__", "version"), ("__doc__", "description")): try: value = getattr(self._handler['module'], prop[0]) except Exception as e: gLogger.exception(e) gLogger.error("Missing property", prop[0]) value = 'unset' self._monitor.setComponentExtraParam(prop[1], value) for secondaryName in self._cfg.registerAlsoAs(): gLogger.info("Registering %s also as %s" % (self._name, secondaryName)) self._validNames.append(secondaryName) return S_OK() def __reportThreadPoolContents(self): # TODO: remove later if useThreadPoolExecutor: pendingQueries = self._threadPool._work_queue.qsize() activeQuereies = len(self._threadPool._threads) else: pendingQueries = self._threadPool.pendingJobs() activeQuereies = self._threadPool.numWorkingThreads() self._monitor.addMark('PendingQueries', pendingQueries) self._monitor.addMark('ActiveQueries', activeQuereies) self._monitor.addMark('RunningThreads', threading.activeCount()) self._monitor.addMark('MaxFD', self.__maxFD) self.__maxFD = 0 def getConfig(self): return self._cfg # End of initialization functions def handleConnection(self, clientTransport): """ This method may be called by ServiceReactor. The method stacks openened connection in a queue, another thread read this queue and handle connection. :param clientTransport: Object wich describe opened connection (PlainTransport or SSLTransport) """ self._stats['connections'] += 1 self._monitor.setComponentExtraParam('queries', self._stats['connections']) # TODO: remove later if useThreadPoolExecutor: self._threadPool.submit(self._processInThread, clientTransport) else: self._threadPool.generateJobAndQueueIt(self._processInThread, args=(clientTransport, )) # Threaded process function def _processInThread(self, clientTransport): """ This method handles a RPC, FileTransfer or Connection. Connection may be opened via ServiceReactor.__acceptIncomingConnection - Do the SSL/TLS Handshake (if dips is used) and extract credentials - Get the action called by the client - Check if the client is authorized to perform ation - If not, connection is closed - Instanciate the RequestHandler (RequestHandler contain all methods callable) (Following is not directly in this method but it describe what happen at #Execute the action) - Notify the client we're ready to execute the action (via _processProposal) and call RequestHandler._rh_executeAction() - Receive arguments/file/something else (depending on action) in the RequestHandler - Executing the action asked by the client :param clientTransport: Object who describe the opened connection (SSLTransport or PlainTransport) :return: S_OK with "closeTransport" a boolean to indicate if th connection have to be closed e.g. after RPC, closeTransport=True """ self.__maxFD = max(self.__maxFD, clientTransport.oSocket.fileno()) self._lockManager.lockGlobal() try: monReport = self.__startReportToMonitoring() except Exception: monReport = False try: # Handshake try: result = clientTransport.handshake() if not result['OK']: clientTransport.close() return except BaseException: return # Add to the transport pool trid = self._transportPool.add(clientTransport) if not trid: return # Receive and check proposal result = self._receiveAndCheckProposal(trid) if not result['OK']: self._transportPool.sendAndClose(trid, result) return proposalTuple = result['Value'] # Instantiate handler result = self._instantiateHandler(trid, proposalTuple) if not result['OK']: self._transportPool.sendAndClose(trid, result) return handlerObj = result['Value'] # Execute the action result = self._processProposal(trid, proposalTuple, handlerObj) # Close the connection if required if result['closeTransport'] or not result['OK']: if not result['OK']: gLogger.error("Error processing proposal", result['Message']) self._transportPool.close(trid) return result finally: self._lockManager.unlockGlobal() if monReport: self.__endReportToMonitoring(*monReport) def _createIdentityString(self, credDict, clientTransport=None): if 'username' in credDict: if 'group' in credDict: identity = "[%s:%s]" % (credDict['username'], credDict['group']) else: identity = "[%s:unknown]" % credDict['username'] else: identity = 'unknown' if clientTransport: addr = clientTransport.getRemoteAddress() if addr: addr = "{%s:%s}" % (addr[0], addr[1]) if 'DN' in credDict: identity += "(%s)" % credDict['DN'] return identity @staticmethod def _deserializeProposalTuple(serializedProposal): """ We receive the proposalTuple as a list. Turn it into a tuple again """ proposalTuple = tuple( tuple(x) if isinstance(x, list) else x for x in serializedProposal) return proposalTuple def _receiveAndCheckProposal(self, trid): clientTransport = self._transportPool.get(trid) # Get the peer credentials credDict = clientTransport.getConnectingCredentials() # Receive the action proposal retVal = clientTransport.receiveData(1024) if not retVal['OK']: gLogger.error( "Invalid action proposal", "%s %s" % (self._createIdentityString( credDict, clientTransport), retVal['Message'])) return S_ERROR("Invalid action proposal") proposalTuple = Service._deserializeProposalTuple(retVal['Value']) gLogger.debug("Received action from client", "/".join(list(proposalTuple[1]))) # Check if there are extra credentials if proposalTuple[2]: clientTransport.setExtraCredentials(proposalTuple[2]) # Check if this is the requested service requestedService = proposalTuple[0][0] if requestedService not in self._validNames: return S_ERROR("%s is not up in this server" % requestedService) # Check if the action is valid requestedActionType = proposalTuple[1][0] if requestedActionType not in Service.SVC_VALID_ACTIONS: return S_ERROR("%s is not a known action type" % requestedActionType) # Check if it's authorized result = self._authorizeProposal(proposalTuple[1], trid, credDict) if not result['OK']: return result # Proposal is OK return S_OK(proposalTuple) def _authorizeProposal(self, actionTuple, trid, credDict): # Find CS path for the Auth rules referedAction = self._isMetaAction(actionTuple[0]) if referedAction: csAuthPath = "%s/Default" % actionTuple[0] hardcodedMethodAuth = self._actions['auth'][actionTuple[0]] else: if actionTuple[0] == 'RPC': csAuthPath = actionTuple[1] else: csAuthPath = "/".join(actionTuple) # Find if there are hardcoded auth rules in the code hardcodedMethodAuth = False if actionTuple[0] in self._actions['auth']: hardcodedRulesByType = self._actions['auth'][actionTuple[0]] if actionTuple[0] == "FileTransfer": methodName = actionTuple[1][0].lower() + actionTuple[1][1:] else: methodName = actionTuple[1] if methodName in hardcodedRulesByType: hardcodedMethodAuth = hardcodedRulesByType[methodName] # Auth time! if not self._authMgr.authQuery(csAuthPath, credDict, hardcodedMethodAuth): # Get the identity string identity = self._createIdentityString(credDict) fromHost = "unknown host" tr = self._transportPool.get(trid) if tr: fromHost = '/'.join( [str(item) for item in tr.getRemoteAddress()]) gLogger.warn( "Unauthorized query", "to %s:%s by %s from %s" % (self._name, "/".join(actionTuple), identity, fromHost)) result = S_ERROR(ENOAUTH, "Unauthorized query") else: result = S_OK() # Security log tr = self._transportPool.get(trid) if not tr: return S_ERROR("Client disconnected") sourceAddress = tr.getRemoteAddress() identity = self._createIdentityString(credDict) Service.SVC_SECLOG_CLIENT.addMessage(result['OK'], sourceAddress[0], sourceAddress[1], identity, self._cfg.getHostname(), self._cfg.getPort(), self._name, "/".join(actionTuple)) return result def _instantiateHandler(self, trid, proposalTuple=None): """ Generate an instance of the handler for a given service :param int trid: transport ID :param tuple proposalTuple: tuple describing the proposed action :return: S_OK/S_ERROR, Value is the handler object """ # Generate the client params clientParams = {'serviceStartTime': self._startTime} if proposalTuple: # The 4th element is the client version clientParams['clientVersion'] = proposalTuple[3] if len( proposalTuple) > 3 else None clientParams['clientSetup'] = proposalTuple[0][1] if len(proposalTuple[0]) < 3: clientParams['clientVO'] = gConfig.getValue( "/DIRAC/VirtualOrganization", "unknown") else: clientParams['clientVO'] = proposalTuple[0][2] clientTransport = self._transportPool.get(trid) if clientTransport: clientParams['clientAddress'] = clientTransport.getRemoteAddress() # Generate handler dict with per client info handlerInitDict = dict(self._serviceInfoDict) for key in clientParams: handlerInitDict[key] = clientParams[key] # Instantiate and initialize try: handlerInstance = self._handler['class'](handlerInitDict, trid) handlerInstance.initialize() except Exception as e: gLogger.exception("Server error while loading handler: %s" % str(e)) return S_ERROR("Server error while loading handler") return S_OK(handlerInstance) def _processProposal(self, trid, proposalTuple, handlerObj): # Notify the client we're ready to execute the action retVal = self._transportPool.send(trid, S_OK()) if not retVal['OK']: return retVal messageConnection = False if proposalTuple[1] == ('Connection', 'new'): messageConnection = True if messageConnection: if self._msgBroker.getNumConnections( ) > self._cfg.getMaxMessagingConnections(): result = S_ERROR( "Maximum number of connections reached. Try later") result['closeTransport'] = True return result # This is a stable connection self._msgBroker.addTransportId( trid, self._name, receiveMessageCallback=self._mbReceivedMsg, disconnectCallback=self._mbDisconnect, listenToConnection=False) result = self._executeAction(trid, proposalTuple, handlerObj) if result['OK'] and messageConnection: self._msgBroker.listenToTransport(trid) result = self._mbConnect(trid, handlerObj) if not result['OK']: self._msgBroker.removeTransport(trid) result['closeTransport'] = not messageConnection or not result['OK'] return result def _mbConnect(self, trid, handlerObj=None): if not handlerObj: result = self._instantiateHandler(trid) if not result['OK']: return result handlerObj = result['Value'] return handlerObj._rh_executeConnectionCallback('connected') def _executeAction(self, trid, proposalTuple, handlerObj): try: return handlerObj._rh_executeAction(proposalTuple) except Exception as e: gLogger.exception("Exception while executing handler action") return S_ERROR("Server error while executing action: %s" % str(e)) def _mbReceivedMsg(self, trid, msgObj): result = self._authorizeProposal( ('Message', msgObj.getName()), trid, self._transportPool.get(trid).getConnectingCredentials()) if not result['OK']: return result result = self._instantiateHandler(trid) if not result['OK']: return result handlerObj = result['Value'] return handlerObj._rh_executeMessageCallback(msgObj) def _mbDisconnect(self, trid): result = self._instantiateHandler(trid) if not result['OK']: return result handlerObj = result['Value'] return handlerObj._rh_executeConnectionCallback('drop') def __startReportToMonitoring(self): self._monitor.addMark("Queries") now = time.time() stats = os.times() cpuTime = stats[0] + stats[2] if now - self.__monitorLastStatsUpdate < 0: return (now, cpuTime) # Send CPU consumption mark wallClock = now - self.__monitorLastStatsUpdate self.__monitorLastStatsUpdate = now # Send Memory consumption mark membytes = MemStat.VmB('VmRSS:') if membytes: mem = membytes / (1024. * 1024.) self._monitor.addMark('MEM', mem) return (now, cpuTime) def __endReportToMonitoring(self, initialWallTime, initialCPUTime): wallTime = time.time() - initialWallTime stats = os.times() cpuTime = stats[0] + stats[2] - initialCPUTime percentage = cpuTime / wallTime * 100. if percentage > 0: self._monitor.addMark('CPU', percentage)
class Service( object ): SVC_VALID_ACTIONS = { 'RPC' : 'export', 'FileTransfer': 'transfer', 'Message' : 'msg', 'Connection' : 'Message' } SVC_SECLOG_CLIENT = SecurityLogClient() def __init__( self, serviceData ): self._svcData = serviceData self._name = serviceData[ 'modName' ] self._startTime = Time.dateTime() self._validNames = [ serviceData[ 'modName' ] ] if serviceData[ 'loadName' ] not in self._validNames: self._validNames.append( serviceData[ 'loadName' ] ) self._cfg = ServiceConfiguration( list( self._validNames ) ) if serviceData[ 'standalone' ]: self._monitor = gMonitor else: self._monitor = MonitoringClient() self.__monitorLastStatsUpdate = time.time() self._stats = { 'queries' : 0, 'connections' : 0 } self._authMgr = AuthManager( "%s/Authorization" % PathFinder.getServiceSection( serviceData[ 'loadName' ] ) ) self._transportPool = getGlobalTransportPool() self.__cloneId = 0 self.__maxFD = 0 def setCloneProcessId( self, cloneId ): self.__cloneId = cloneId self._monitor.setComponentName( "%s-Clone:%s" % ( self._name, cloneId ) ) def _isMetaAction( self, action ): referedAction = Service.SVC_VALID_ACTIONS[ action ] if referedAction in Service.SVC_VALID_ACTIONS: return referedAction return False def initialize( self ): #Build the URLs self._url = self._cfg.getURL() if not self._url: return S_ERROR( "Could not build service URL for %s" % self._name ) gLogger.verbose( "Service URL is %s" % self._url ) #Load handler result = self._loadHandlerInit() if not result[ 'OK' ]: return result self._handler = result[ 'Value' ] #Initialize lock manager self._lockManager = LockManager( self._cfg.getMaxWaitingPetitions() ) self._initMonitoring() self._threadPool = ThreadPool( max( 1, self._cfg.getMinThreads() ), max( 0, self._cfg.getMaxThreads() ), self._cfg.getMaxWaitingPetitions() ) self._threadPool.daemonize() self._msgBroker = MessageBroker( "%sMSB" % self._name, threadPool = self._threadPool ) #Create static dict self._serviceInfoDict = { 'serviceName' : self._name, 'serviceSectionPath' : PathFinder.getServiceSection( self._name ), 'URL' : self._cfg.getURL(), 'messageSender' : MessageSender( self._name, self._msgBroker ), 'validNames' : self._validNames, 'csPaths' : [ PathFinder.getServiceSection( svcName ) for svcName in self._validNames ] } #Call static initialization function try: self._handler[ 'class' ]._rh__initializeClass( dict( self._serviceInfoDict ), self._lockManager, self._msgBroker, self._monitor ) if self._handler[ 'init' ]: for initFunc in self._handler[ 'init' ]: gLogger.verbose( "Executing initialization function" ) try: result = initFunc( dict( self._serviceInfoDict ) ) except Exception as excp: gLogger.exception( "Exception while calling initialization function", lException = excp ) return S_ERROR( "Exception while calling initialization function: %s" % str( excp ) ) if not isReturnStructure( result ): return S_ERROR( "Service initialization function %s must return S_OK/S_ERROR" % initFunc ) if not result[ 'OK' ]: return S_ERROR( "Error while initializing %s: %s" % ( self._name, result[ 'Message' ] ) ) except Exception as e: errMsg = "Exception while initializing %s" % self._name gLogger.exception( e ) gLogger.exception( errMsg ) return S_ERROR( errMsg ) #Load actions after the handler has initialized itself result = self._loadActions() if not result[ 'OK' ]: return result self._actions = result[ 'Value' ] gThreadScheduler.addPeriodicTask( 30, self.__reportThreadPoolContents ) return S_OK() def __searchInitFunctions( self, handlerClass, currentClass = None ): if not currentClass: currentClass = handlerClass initFuncs = [] ancestorHasInit = False for ancestor in currentClass.__bases__: initFuncs += self.__searchInitFunctions( handlerClass, ancestor ) if 'initializeHandler' in dir( ancestor ): ancestorHasInit = True if ancestorHasInit: initFuncs.append( super( currentClass, handlerClass ).initializeHandler ) if currentClass == handlerClass and 'initializeHandler' in dir( handlerClass ): initFuncs.append( handlerClass.initializeHandler ) return initFuncs def _loadHandlerInit( self ): handlerClass = self._svcData[ 'classObj' ] handlerName = handlerClass.__name__ handlerInitMethods = self.__searchInitFunctions( handlerClass ) try: handlerInitMethods.append( getattr( self._svcData[ 'moduleObj' ], "initialize%s" % handlerName ) ) except AttributeError: gLogger.verbose( "Not found global initialization function for service" ) if handlerInitMethods: gLogger.info( "Found %s initialization methods" % len( handlerInitMethods ) ) handlerInfo = {} handlerInfo[ "name" ] = handlerName handlerInfo[ "module" ] = self._svcData[ 'moduleObj' ] handlerInfo[ "class" ] = handlerClass handlerInfo[ "init" ] = handlerInitMethods return S_OK( handlerInfo ) def _loadActions( self ): handlerClass = self._handler[ 'class' ] authRules = {} typeCheck = {} methodsList = {} for actionType in Service.SVC_VALID_ACTIONS: if self._isMetaAction( actionType ): continue authRules[ actionType ] = {} typeCheck[ actionType ] = {} methodsList[ actionType ] = [] handlerAttributeList = dir( handlerClass ) for actionType in Service.SVC_VALID_ACTIONS: if self._isMetaAction( actionType ): continue methodPrefix = '%s_' % Service.SVC_VALID_ACTIONS[ actionType ] for attribute in handlerAttributeList: if attribute.find( methodPrefix ) != 0: continue exportedName = attribute[ len( methodPrefix ) : ] methodsList[ actionType ].append( exportedName ) gLogger.verbose( "+ Found %s method %s" % ( actionType, exportedName ) ) #Create lock for method self._lockManager.createLock( "%s/%s" % ( actionType, exportedName ), self._cfg.getMaxThreadsForMethod( actionType, exportedName ) ) #Look for type and auth rules if actionType == 'RPC': typeAttr = "types_%s" % exportedName authAttr = "auth_%s" % exportedName else: typeAttr = "types_%s_%s" % ( Service.SVC_VALID_ACTIONS[ actionType ], exportedName ) authAttr = "auth_%s_%s" % ( Service.SVC_VALID_ACTIONS[ actionType ], exportedName ) if typeAttr in handlerAttributeList: obj = getattr( handlerClass, typeAttr ) gLogger.verbose( "|- Found type definition %s: %s" % ( typeAttr, str( obj ) ) ) typeCheck[ actionType ][ exportedName ] = obj if authAttr in handlerAttributeList: obj = getattr( handlerClass, authAttr ) gLogger.verbose( "|- Found auth rules %s: %s" % ( authAttr, str( obj ) ) ) authRules[ actionType ][ exportedName ] = obj for actionType in Service.SVC_VALID_ACTIONS: referedAction = self._isMetaAction( actionType ) if not referedAction: continue gLogger.verbose( "Action %s is a meta action for %s" % ( actionType, referedAction ) ) authRules[ actionType ] = [] for method in authRules[ referedAction ]: for prop in authRules[ referedAction ][ method ]: if prop not in authRules[ actionType ]: authRules[ actionType ].append( prop ) gLogger.verbose( "Meta action %s props are %s" % ( actionType, authRules[ actionType ] ) ) return S_OK( { 'methods' : methodsList, 'auth' : authRules, 'types' : typeCheck } ) def _initMonitoring( self ): #Init extra bits of monitoring self._monitor.setComponentType( MonitoringClient.COMPONENT_SERVICE ) self._monitor.setComponentName( self._name ) self._monitor.setComponentLocation( self._cfg.getURL() ) self._monitor.initialize() self._monitor.registerActivity( "Connections", "Connections received", "Framework", "connections", MonitoringClient.OP_RATE ) self._monitor.registerActivity( "Queries", "Queries served", "Framework", "queries", MonitoringClient.OP_RATE ) self._monitor.registerActivity( 'CPU', "CPU Usage", 'Framework', "CPU,%", MonitoringClient.OP_MEAN, 600 ) self._monitor.registerActivity( 'MEM', "Memory Usage", 'Framework', 'Memory,MB', MonitoringClient.OP_MEAN, 600 ) self._monitor.registerActivity( 'PendingQueries', "Pending queries", 'Framework', 'queries', MonitoringClient.OP_MEAN ) self._monitor.registerActivity( 'ActiveQueries', "Active queries", 'Framework', 'threads', MonitoringClient.OP_MEAN ) self._monitor.registerActivity( 'RunningThreads', "Running threads", 'Framework', 'threads', MonitoringClient.OP_MEAN ) self._monitor.registerActivity( 'MaxFD', "Max File Descriptors", 'Framework', 'fd', MonitoringClient.OP_MEAN ) self._monitor.setComponentExtraParam( 'DIRACVersion', DIRAC.version ) self._monitor.setComponentExtraParam( 'platform', DIRAC.getPlatform() ) self._monitor.setComponentExtraParam( 'startTime', Time.dateTime() ) for prop in ( ( "__RCSID__", "version" ), ( "__doc__", "description" ) ): try: value = getattr( self._handler[ 'module' ], prop[0] ) except Exception as e: gLogger.exception( e ) gLogger.error( "Missing property", prop[0] ) value = 'unset' self._monitor.setComponentExtraParam( prop[1], value ) for secondaryName in self._cfg.registerAlsoAs(): gLogger.info( "Registering %s also as %s" % ( self._name, secondaryName ) ) self._validNames.append( secondaryName ) return S_OK() def __reportThreadPoolContents( self ): self._monitor.addMark( 'PendingQueries', self._threadPool.pendingJobs() ) self._monitor.addMark( 'ActiveQueries', self._threadPool.numWorkingThreads() ) self._monitor.addMark( 'RunningThreads', threading.activeCount() ) self._monitor.addMark( 'MaxFD', self.__maxFD ) self.__maxFD = 0 def getConfig( self ): return self._cfg #End of initialization functions def handleConnection( self, clientTransport ): self._stats[ 'connections' ] += 1 self._monitor.setComponentExtraParam( 'queries', self._stats[ 'connections' ] ) self._threadPool.generateJobAndQueueIt( self._processInThread, args = ( clientTransport, ) ) #Threaded process function def _processInThread( self, clientTransport ): self.__maxFD = max( self.__maxFD, clientTransport.oSocket.fileno() ) self._lockManager.lockGlobal() try: monReport = self.__startReportToMonitoring() except Exception: monReport = False try: #Handshake try: result = clientTransport.handshake() if not result[ 'OK' ]: clientTransport.close() return except: return #Add to the transport pool trid = self._transportPool.add( clientTransport ) if not trid: return #Receive and check proposal result = self._receiveAndCheckProposal( trid ) if not result[ 'OK' ]: self._transportPool.sendAndClose( trid, result ) return proposalTuple = result[ 'Value' ] #Instantiate handler result = self._instantiateHandler( trid, proposalTuple ) if not result[ 'OK' ]: self._transportPool.sendAndClose( trid, result ) return handlerObj = result[ 'Value' ] #Execute the action result = self._processProposal( trid, proposalTuple, handlerObj ) #Close the connection if required if result[ 'closeTransport' ] or not result[ 'OK' ]: if not result[ 'OK' ]: gLogger.error( "Error processing proposal", result[ 'Message' ] ) self._transportPool.close( trid ) return result finally: self._lockManager.unlockGlobal() if monReport: self.__endReportToMonitoring( *monReport ) def _createIdentityString( self, credDict, clientTransport = None ): if 'username' in credDict: if 'group' in credDict: identity = "[%s:%s]" % ( credDict[ 'username' ], credDict[ 'group' ] ) else: identity = "[%s:unknown]" % credDict[ 'username' ] else: identity = 'unknown' if clientTransport: addr = clientTransport.getRemoteAddress() if addr: addr = "{%s:%s}" % ( addr[0], addr[1] ) if 'DN' in credDict: identity += "(%s)" % credDict[ 'DN' ] return identity def _receiveAndCheckProposal( self, trid ): clientTransport = self._transportPool.get( trid ) #Get the peer credentials credDict = clientTransport.getConnectingCredentials() #Receive the action proposal retVal = clientTransport.receiveData( 1024 ) if not retVal[ 'OK' ]: gLogger.error( "Invalid action proposal", "%s %s" % ( self._createIdentityString( credDict, clientTransport ), retVal[ 'Message' ] ) ) return S_ERROR( "Invalid action proposal" ) proposalTuple = retVal[ 'Value' ] gLogger.debug( "Received action from client", "/".join( list( proposalTuple[1] ) ) ) #Check if there are extra credentials if proposalTuple[2]: clientTransport.setExtraCredentials( proposalTuple[2] ) #Check if this is the requested service requestedService = proposalTuple[0][0] if requestedService not in self._validNames: return S_ERROR( "%s is not up in this server" % requestedService ) #Check if the action is valid requestedActionType = proposalTuple[1][0] if requestedActionType not in Service.SVC_VALID_ACTIONS: return S_ERROR( "%s is not a known action type" % requestedActionType ) #Check if it's authorized result = self._authorizeProposal( proposalTuple[1], trid, credDict ) if not result[ 'OK' ]: return result #Proposal is OK return S_OK( proposalTuple ) def _authorizeProposal( self, actionTuple, trid, credDict ): #Find CS path for the Auth rules referedAction = self._isMetaAction( actionTuple[0] ) if referedAction: csAuthPath = "%s/Default" % actionTuple[0] hardcodedMethodAuth = self._actions[ 'auth' ][ actionTuple[0] ] else: if actionTuple[0] == 'RPC': csAuthPath = actionTuple[1] else: csAuthPath = "/".join( actionTuple ) #Find if there are hardcoded auth rules in the code hardcodedMethodAuth = False if actionTuple[0] in self._actions[ 'auth' ]: hardcodedRulesByType = self._actions[ 'auth' ][ actionTuple[0] ] if actionTuple[0] == "FileTransfer": methodName = actionTuple[1][0].lower() + actionTuple[1][1:] else: methodName = actionTuple[1] if methodName in hardcodedRulesByType: hardcodedMethodAuth = hardcodedRulesByType[ methodName ] #Auth time! if not self._authMgr.authQuery( csAuthPath, credDict, hardcodedMethodAuth ): #Get the identity string identity = self._createIdentityString( credDict ) fromHost = "unknown host" tr = self._transportPool.get( trid ) if tr: fromHost = '/'.join( [ str( item ) for item in tr.getRemoteAddress() ] ) gLogger.warn( "Unauthorized query", "to %s:%s by %s from %s" % ( self._name, "/".join( actionTuple ), identity, fromHost ) ) result = S_ERROR( "Unauthorized query" ) else: result = S_OK() #Security log tr = self._transportPool.get( trid ) if not tr: return S_ERROR( "Client disconnected" ) sourceAddress = tr.getRemoteAddress() identity = self._createIdentityString( credDict ) Service.SVC_SECLOG_CLIENT.addMessage( result[ 'OK' ], sourceAddress[0], sourceAddress[1], identity, self._cfg.getHostname(), self._cfg.getPort(), self._name, "/".join( actionTuple ) ) return result def _instantiateHandler( self, trid, proposalTuple = None ): """ Generate an instance of the handler for a given service """ #Generate the client params clientParams = { 'serviceStartTime' : self._startTime } if proposalTuple: clientParams[ 'clientSetup' ] = proposalTuple[0][1] if len( proposalTuple[0] ) < 3: clientParams[ 'clientVO' ] = gConfig.getValue( "/DIRAC/VirtualOrganization", "unknown" ) else: clientParams[ 'clientVO' ] = proposalTuple[0][2] clientTransport = self._transportPool.get( trid ) if clientTransport: clientParams[ 'clientAddress' ] = clientTransport.getRemoteAddress() #Generate handler dict with per client info handlerInitDict = dict( self._serviceInfoDict ) for key in clientParams: handlerInitDict[ key ] = clientParams[ key ] #Instantiate and initialize try: handlerInstance = self._handler[ 'class' ]( handlerInitDict, trid ) handlerInstance.initialize() except Exception as e: gLogger.exception( "Server error while loading handler: %s" % str( e ) ) return S_ERROR( "Server error while loading handler" ) return S_OK( handlerInstance ) def _processProposal( self, trid, proposalTuple, handlerObj ): #Notify the client we're ready to execute the action retVal = self._transportPool.send( trid, S_OK() ) if not retVal[ 'OK' ]: return retVal messageConnection = False if proposalTuple[1] == ( 'Connection', 'new' ): messageConnection = True if messageConnection: if self._msgBroker.getNumConnections() > self._cfg.getMaxMessagingConnections(): result = S_ERROR( "Maximum number of connections reached. Try later" ) result[ 'closeTransport' ] = True return result #This is a stable connection self._msgBroker.addTransportId( trid, self._name, receiveMessageCallback = self._mbReceivedMsg, disconnectCallback = self._mbDisconnect, listenToConnection = False ) result = self._executeAction( trid, proposalTuple, handlerObj ) if result[ 'OK' ] and messageConnection: self._msgBroker.listenToTransport( trid ) result = self._mbConnect( trid, handlerObj ) if not result[ 'OK' ]: self._msgBroker.removeTransport( trid ) result[ 'closeTransport' ] = not messageConnection or not result[ 'OK' ] return result def _mbConnect( self, trid, handlerObj = None ): if not handlerObj: result = self._instantiateHandler( trid ) if not result[ 'OK' ]: return result handlerObj = result[ 'Value' ] return handlerObj._rh_executeConnectionCallback( 'connected' ) def _executeAction( self, trid, proposalTuple, handlerObj ): try: return handlerObj._rh_executeAction( proposalTuple ) except Exception as e: gLogger.exception( "Exception while executing handler action" ) return S_ERROR( "Server error while executing action: %s" % str( e ) ) def _mbReceivedMsg( self, trid, msgObj ): result = self._authorizeProposal( ( 'Message', msgObj.getName() ), trid, self._transportPool.get( trid ).getConnectingCredentials() ) if not result[ 'OK' ]: return result result = self._instantiateHandler( trid ) if not result[ 'OK' ]: return result handlerObj = result[ 'Value' ] return handlerObj._rh_executeMessageCallback( msgObj ) def _mbDisconnect( self, trid ): result = self._instantiateHandler( trid ) if not result[ 'OK' ]: return result handlerObj = result[ 'Value' ] return handlerObj._rh_executeConnectionCallback( 'drop' ) def __startReportToMonitoring( self ): self._monitor.addMark( "Queries" ) now = time.time() stats = os.times() cpuTime = stats[0] + stats[2] if now - self.__monitorLastStatsUpdate < 0: return ( now, cpuTime ) # Send CPU consumption mark wallClock = now - self.__monitorLastStatsUpdate self.__monitorLastStatsUpdate = now # Send Memory consumption mark membytes = MemStat.VmB( 'VmRSS:' ) if membytes: mem = membytes / ( 1024. * 1024. ) self._monitor.addMark( 'MEM', mem ) return ( now, cpuTime ) def __endReportToMonitoring( self, initialWallTime, initialCPUTime ): wallTime = time.time() - initialWallTime stats = os.times() cpuTime = stats[0] + stats[2] - initialCPUTime percentage = cpuTime / wallTime * 100. if percentage > 0: self._monitor.addMark( 'CPU', percentage )
class TornadoServer(object): """ Tornado webserver Initialize and run an HTTPS Server for DIRAC services. By default it load all https services defined in the CS, but you can also give an explicit list. The listening port is either: * Given as parameter * Loaded from the CS ``/Systems/Tornado/<instance>/Port`` * Default to 8443 Example 1: Easy way to start tornado:: # Initialize server and load services serverToLaunch = TornadoServer() # Start listening when ready serverToLaunch.startTornado() Example 2:We want to debug service1 and service2 only, and use another port for that :: services = ['component/service1', 'component/service2'] serverToLaunch = TornadoServer(services=services, port=1234) serverToLaunch.startTornado() """ def __init__(self, services=None, port=None): """ :param list services: (default None) List of service handlers to load. If ``None``, loads all :param int port: Port to listen to. If None, the port is resolved following the logic described in the class documentation """ if port is None: port = gConfig.getValue( "/Systems/Tornado/%s/Port" % PathFinder.getSystemInstance('Tornado'), 8443) if services and not isinstance(services, list): services = [services] # URLs for services. # Contains Tornado :py:class:`tornado.web.url` object self.urls = [] # Other infos self.port = port self.handlerManager = HandlerManager() # Monitoring attributes self._monitor = MonitoringClient() # temp value for computation, used by the monitoring self.__report = None # Last update time stamp self.__monitorLastStatsUpdate = None self.__monitoringLoopDelay = 60 # In secs # If services are defined, load only these ones (useful for debug purpose or specific services) if services: retVal = self.handlerManager.loadHandlersByServiceName(services) if not retVal['OK']: sLog.error(retVal['Message']) raise ImportError( "Some services can't be loaded, check the service names and configuration." ) # if no service list is given, load services from configuration handlerDict = self.handlerManager.getHandlersDict() for item in handlerDict.items(): # handlerDict[key].initializeService(key) self.urls.append(url(item[0], item[1])) # If there is no services loaded: if not self.urls: raise ImportError( "There is no services loaded, please check your configuration") def startTornado(self): """ Starts the tornado server when ready. This method never returns. """ sLog.debug("Starting Tornado") self._initMonitoring() router = Application(self.urls, debug=False, compress_response=True) certs = Locations.getHostCertificateAndKeyLocation() if certs is False: sLog.fatal("Host certificates not found ! Can't start the Server") raise ImportError("Unable to load certificates") ca = Locations.getCAsLocation() ssl_options = { 'certfile': certs[0], 'keyfile': certs[1], 'cert_reqs': M2Crypto.SSL.verify_peer, 'ca_certs': ca, 'sslDebug': False, # Set to true if you want to see the TLS debug messages } self.__monitorLastStatsUpdate = time.time() self.__report = self.__startReportToMonitoringLoop() # Starting monitoring, IOLoop waiting time in ms, __monitoringLoopDelay is defined in seconds tornado.ioloop.PeriodicCallback(self.__reportToMonitoring, self.__monitoringLoopDelay * 1000).start() # Start server server = HTTPServer(router, ssl_options=ssl_options, decompress_request=True) try: server.listen(self.port) except Exception as e: # pylint: disable=broad-except sLog.exception("Exception starting HTTPServer", e) raise sLog.always("Listening on port %s" % self.port) for service in self.urls: sLog.debug("Available service: %s" % service) IOLoop.current().start() def _initMonitoring(self): """ Initialize the monitoring """ self._monitor.setComponentType(MonitoringClient.COMPONENT_TORNADO) self._monitor.initialize() self._monitor.setComponentName('Tornado') self._monitor.registerActivity('CPU', "CPU Usage", 'Framework', "CPU,%", MonitoringClient.OP_MEAN, 600) self._monitor.registerActivity('MEM', "Memory Usage", 'Framework', 'Memory,MB', MonitoringClient.OP_MEAN, 600) self._monitor.setComponentExtraParam('DIRACVersion', DIRAC.version) self._monitor.setComponentExtraParam('platform', DIRAC.getPlatform()) self._monitor.setComponentExtraParam('startTime', datetime.datetime.utcnow()) def __reportToMonitoring(self): """ Periodically report to the monitoring of the CPU and MEM """ # Calculate CPU usage by comparing realtime and cpu time since last report self.__endReportToMonitoringLoop(*self.__report) # Save memory usage and save realtime/CPU time for next call self.__report = self.__startReportToMonitoringLoop() def __startReportToMonitoringLoop(self): """ Snapshot of resources to be taken at the beginning of a monitoring cycle. Also sends memory snapshot to the monitoring. This is basically copy/paste of Service.py :returns: tuple (<time.time(), cpuTime ) """ now = time.time() # Used to calulate a delta stats = os.times() cpuTime = stats[0] + stats[2] if now - self.__monitorLastStatsUpdate < 0: return (now, cpuTime) # Send CPU consumption mark self.__monitorLastStatsUpdate = now # Send Memory consumption mark membytes = MemStat.VmB('VmRSS:') if membytes: mem = membytes / (1024. * 1024.) self._monitor.addMark('MEM', mem) return (now, cpuTime) def __endReportToMonitoringLoop(self, initialWallTime, initialCPUTime): """ Snapshot of resources to be taken at the end of a monitoring cycle. This is basically copy/paste of Service.py Determines CPU usage by comparing walltime and cputime and send it to monitor """ wallTime = time.time() - initialWallTime stats = os.times() cpuTime = stats[0] + stats[2] - initialCPUTime percentage = cpuTime / wallTime * 100. if percentage > 0: self._monitor.addMark('CPU', percentage)
class AgentModule: """ Base class for all agent modules This class is used by the AgentReactor Class to steer the execution of DIRAC Agents. For this purpose the following methods are used: - am_initialize() just after instantiated - am_getPollingTime() to set the execution frequency - am_getMaxCycles() to determine the number of cycles - am_go() for the actual execution of one cycle Before each iteration, the following methods are used to determine if the new cycle is to be started. - am_getModuleParam( 'alive' ) - am_checkStopAgentFile() - am_removeStopAgentFile() To start new execution cycle the following methods are used - am_getCyclesDone() - am_setOption( 'MaxCycles', maxCycles ) At the same time it provides all Agents with common interface. All Agent class must inherit from this base class and must implement at least the following method: - execute() main method called in the agent cycle Additionally they may provide: - initialize() for initial settings - finalize() the graceful exit - beginExecution() before each execution cycle - endExecution() at the end of each execution cycle The agent can be stopped either by a signal or by creating a 'stop_agent' file in the controlDirectory defined in the agent configuration """ def __init__(self, agentName, baseAgentName=False, properties={}): """ Common __init__ method for all Agents. All Agent modules must define: __doc__ __RCSID__ They are used to populate __codeProperties The following Options are used from the Configuration: - /LocalSite/InstancePath - /DIRAC/Setup - Status - Enabled - PollingTime default = 120 - MaxCycles default = 500 - ControlDirectory control/SystemName/AgentName - WorkDirectory work/SystemName/AgentName - shifterProxy '' - shifterProxyLocation WorkDirectory/SystemName/AgentName/.shifterCred It defines the following default Options that can be set via Configuration (above): - MonitoringEnabled True - Enabled True if Status == Active - PollingTime 120 - MaxCycles 500 - ControlDirectory control/SystemName/AgentName - WorkDirectory work/SystemName/AgentName - shifterProxy False - shifterProxyLocation work/SystemName/AgentName/.shifterCred different defaults can be set in the initialize() method of the Agent using am_setOption() In order to get a shifter proxy in the environment during the execute() the configuration Option 'shifterProxy' must be set, a default may be given in the initialize() method. """ if baseAgentName and agentName == baseAgentName: self.log = gLogger standaloneModule = True else: self.log = gLogger.getSubLogger(agentName, child=False) standaloneModule = False self.__basePath = gConfig.getValue('/LocalSite/InstancePath', rootPath) self.__agentModule = None self.__codeProperties = {} self.__getCodeInfo() self.__moduleProperties = { 'fullName': agentName, 'section': PathFinder.getAgentSection(agentName), 'standalone': standaloneModule, 'cyclesDone': 0, 'totalElapsedTime': 0, 'setup': gConfig.getValue("/DIRAC/Setup", "Unknown") } self.__moduleProperties['system'], self.__moduleProperties[ 'agentName'] = agentName.split("/") self.__configDefaults = {} self.__configDefaults['MonitoringEnabled'] = True self.__configDefaults['Enabled'] = self.am_getOption( "Status", "Active").lower() in ('active') self.__configDefaults['PollingTime'] = self.am_getOption( "PollingTime", 120) self.__configDefaults['MaxCycles'] = self.am_getOption( "MaxCycles", 500) self.__configDefaults['ControlDirectory'] = os.path.join( self.__basePath, 'control', *agentName.split("/")) self.__configDefaults['WorkDirectory'] = os.path.join( self.__basePath, 'work', *agentName.split("/")) self.__configDefaults['shifterProxy'] = '' self.__configDefaults['shifterProxyLocation'] = os.path.join( self.__configDefaults['WorkDirectory'], '.shifterCred') if type(properties) == types.DictType: for key in properties: self.__moduleProperties[key] = properties[key] self.__moduleProperties['executors'] = [(self.execute, ())] self.__moduleProperties['alive'] = True self.__moduleProperties['shifterProxy'] = False self.__monitorLastStatsUpdate = -1 self.monitor = None self.__initializeMonitor() self.__initialized = False def __getCodeInfo(self): versionVar = "__RCSID__" docVar = "__doc__" try: self.__agentModule = __import__(self.__class__.__module__, globals(), locals(), versionVar) except Exception: self.log.exception("Cannot load agent module") for prop in ((versionVar, "version"), (docVar, "description")): try: self.__codeProperties[prop[1]] = getattr( self.__agentModule, prop[0]) except Exception: self.log.error("Missing %s" % prop[0]) self.__codeProperties[prop[1]] = 'unset' self.__codeProperties['DIRACVersion'] = DIRAC.version self.__codeProperties['platform'] = DIRAC.platform def am_initialize(self, *initArgs): agentName = self.am_getModuleParam('fullName') result = self.initialize(*initArgs) if result == None: return S_ERROR( "Error while initializing %s module: initialize must return S_OK/S_ERROR" % agentName) if not result['OK']: return S_ERROR("Error while initializing %s: %s" % (agentName, result['Message'])) _checkDir(self.am_getControlDirectory()) _checkDir(self.am_getWorkDirectory()) self.__moduleProperties['shifterProxy'] = self.am_getOption( 'shifterProxy') if self.am_monitoringEnabled(): self.monitor.enable() if len(self.__moduleProperties['executors']) < 1: return S_ERROR("At least one executor method has to be defined") if not self.am_Enabled(): return S_ERROR("Agent is disabled via the configuration") self.log.notice("=" * 40) self.log.notice("Loaded agent module %s" % self.__moduleProperties['fullName']) self.log.notice(" Site: %s" % DIRAC.siteName()) self.log.notice(" Setup: %s" % gConfig.getValue("/DIRAC/Setup")) self.log.notice(" Base Module version: %s " % __RCSID__) self.log.notice(" Agent version: %s" % self.__codeProperties['version']) self.log.notice(" DIRAC version: %s" % DIRAC.version) self.log.notice(" DIRAC platform: %s" % DIRAC.platform) pollingTime = int(self.am_getOption('PollingTime')) if pollingTime > 3600: self.log.notice(" Polling time: %s hours" % (pollingTime / 3600.)) else: self.log.notice(" Polling time: %s seconds" % self.am_getOption('PollingTime')) self.log.notice(" Control dir: %s" % self.am_getControlDirectory()) self.log.notice(" Work dir: %s" % self.am_getWorkDirectory()) if self.am_getOption('MaxCycles') > 0: self.log.notice(" Cycles: %s" % self.am_getMaxCycles()) else: self.log.notice(" Cycles: unlimited") self.log.notice("=" * 40) self.__initialized = True return S_OK() def am_getControlDirectory(self): return os.path.join(self.__basePath, str(self.am_getOption('ControlDirectory'))) def am_getStopAgentFile(self): return os.path.join(self.am_getControlDirectory(), 'stop_agent') def am_checkStopAgentFile(self): return os.path.isfile(self.am_getStopAgentFile()) def am_createStopAgentFile(self): try: fd = open(self.am_getStopAgentFile(), 'w') fd.write('Dirac site agent Stopped at %s' % Time.toString()) fd.close() except Exception: pass def am_removeStopAgentFile(self): try: os.unlink(self.am_getStopAgentFile()) except Exception: pass def am_getBasePath(self): return self.__basePath def am_getWorkDirectory(self): return os.path.join(self.__basePath, str(self.am_getOption('WorkDirectory'))) def am_getShifterProxyLocation(self): return os.path.join(self.__basePath, str(self.am_getOption('shifterProxyLocation'))) def am_getOption(self, optionName, defaultValue=None): if defaultValue == None: if optionName in self.__configDefaults: defaultValue = self.__configDefaults[optionName] if optionName and optionName[0] == "/": return gConfig.getValue(optionName, defaultValue) return gConfig.getValue( "%s/%s" % (self.__moduleProperties['section'], optionName), defaultValue) def am_setOption(self, optionName, value): self.__configDefaults[optionName] = value def am_getModuleParam(self, optionName): return self.__moduleProperties[optionName] def am_setModuleParam(self, optionName, value): self.__moduleProperties[optionName] = value def am_getPollingTime(self): return self.am_getOption("PollingTime") def am_getMaxCycles(self): return self.am_getOption("MaxCycles") def am_getCyclesDone(self): return self.am_getModuleParam('cyclesDone') def am_Enabled(self): return self.am_getOption("Enabled") def am_disableMonitoring(self): self.am_setOption('MonitoringEnabled', False) def am_monitoringEnabled(self): return self.am_getOption("MonitoringEnabled") def am_stopExecution(self): self.am_setModuleParam('alive', False) def __initializeMonitor(self): """ Initialize the system monitor client """ if self.__moduleProperties['standalone']: self.monitor = gMonitor else: self.monitor = MonitoringClient() self.monitor.setComponentType(self.monitor.COMPONENT_AGENT) self.monitor.setComponentName(self.__moduleProperties['fullName']) self.monitor.initialize() self.monitor.registerActivity('CPU', "CPU Usage", 'Framework', "CPU,%", self.monitor.OP_MEAN, 600) self.monitor.registerActivity('MEM', "Memory Usage", 'Framework', 'Memory,MB', self.monitor.OP_MEAN, 600) #Component monitor for field in ('version', 'DIRACVersion', 'description', 'platform'): self.monitor.setComponentExtraParam(field, self.__codeProperties[field]) self.monitor.setComponentExtraParam('startTime', Time.dateTime()) self.monitor.setComponentExtraParam('cycles', 0) self.monitor.disable() self.__monitorLastStatsUpdate = time.time() def am_secureCall(self, functor, args=(), name=False): if not name: name = str(functor) try: result = functor(*args) if result == None: return S_ERROR( "%s method for %s module has to return S_OK/S_ERROR" % (name, self.__moduleProperties['fullName'])) return result except Exception, e: self.log.exception("Exception while calling %s method" % name) return S_ERROR("Exception while calling %s method: %s" % (name, str(e)))
class Service: SVC_VALID_ACTIONS = { 'RPC' : 'export', 'FileTransfer': 'transfer', 'Message' : 'msg', 'Connection' : 'Message' } SVC_SECLOG_CLIENT = SecurityLogClient() def __init__( self, serviceName ): self._name = serviceName self._startTime = Time.dateTime() self._cfg = ServiceConfiguration( serviceName ) self._validNames = [ self._name ] self._monitor = MonitoringClient() self.__monitorLastStatsUpdate = time.time() self._stats = { 'queries' : 0, 'connections' : 0 } self._authMgr = AuthManager( "%s/Authorization" % self._cfg.getServicePath() ) self._transportPool = getGlobalTransportPool() self.__cloneId = 0 def setCloneProcessId( self, cloneId ): self.__cloneId = cloneId self._monitor.setComponentName( "%s-Clone:%s" % ( self._name, cloneId ) ) def _isMetaAction( self, action ): referedAction = Service.SVC_VALID_ACTIONS[ action ] if referedAction in Service.SVC_VALID_ACTIONS: return referedAction return False def initialize( self ): #Build the URLs self._url = self._cfg.getURL() if not self._url: return S_ERROR( "Could not build service URL for %s" % self._name ) gLogger.verbose( "Service URL is %s" % self._url ) #Discover Handler self._handlerLocation = self._discoverHandlerLocation() if not self._handlerLocation: return S_ERROR( "Could not find handler location for %s" % self._name ) gLogger.verbose( "Handler found at %s" % self._handlerLocation ) #Load handler result = self._loadHandler() if not result[ 'OK' ]: return result self._handler = result[ 'Value' ] #Initialize lock manager self._lockManager = LockManager( self._cfg.getMaxWaitingPetitions() ) #Load actions result = self._loadActions() if not result[ 'OK' ]: return result self._actions = result[ 'Value' ] self._initMonitoring() self._threadPool = ThreadPool( 1, max( 0, self._cfg.getMaxThreads() ), self._cfg.getMaxWaitingPetitions() ) self._threadPool.daemonize() self._msgBroker = MessageBroker( "%sMSB" % self._name, threadPool = self._threadPool ) #Create static dict self._serviceInfoDict = { 'serviceName' : self._name, 'URL' : self._cfg.getURL(), 'systemSectionPath' : self._cfg.getSystemPath(), 'serviceSectionPath' : self._cfg.getServicePath(), 'messageSender' : MessageSender( self._msgBroker ) } #Call static initialization function try: if self._handler[ 'init' ]: result = self._handler[ 'init' ]( dict( self._serviceInfoDict ) ) if not isReturnStructure( result ): return S_ERROR( "Service initialization function must return S_OK/S_ERROR" ) if not result[ 'OK' ]: return S_ERROR( "Error while initializing %s: %s" % ( self._name, result[ 'Message' ] ) ) except Exception, e: errMsg = "Exception while intializing %s" % self._name gLogger.exception( errMsg ) return S_ERROR( errMsg ) gThreadScheduler.addPeriodicTask( 30, self.__reportThreadPoolContents ) return S_OK()
class AgentModule(object): """ Base class for all agent modules This class is used by the AgentReactor Class to steer the execution of DIRAC Agents. For this purpose the following methods are used: - am_initialize() just after instantiated - am_getPollingTime() to set the execution frequency - am_getMaxCycles() to determine the number of cycles - am_go() for the actual execution of one cycle Before each iteration, the following methods are used to determine if the new cycle is to be started. - am_getModuleParam( 'alive' ) - am_checkStopAgentFile() - am_removeStopAgentFile() To start new execution cycle the following methods are used - am_getCyclesDone() - am_setOption( 'MaxCycles', maxCycles ) At the same time it provides all Agents with common interface. All Agent class must inherit from this base class and must implement at least the following method: - execute() main method called in the agent cycle Additionally they may provide: - initialize() for initial settings - finalize() the graceful exit - beginExecution() before each execution cycle - endExecution() at the end of each execution cycle The agent can be stopped either by a signal or by creating a 'stop_agent' file in the controlDirectory defined in the agent configuration """ def __init__(self, agentName, loadName, baseAgentName=False, properties={}): """ Common __init__ method for all Agents. All Agent modules must define: __doc__ __RCSID__ They are used to populate __codeProperties The following Options are used from the Configuration: - /LocalSite/InstancePath - /DIRAC/Setup - Status - Enabled - PollingTime default = 120 - MaxCycles default = 500 - WatchdogTime default = 0 (disabled) - ControlDirectory control/SystemName/AgentName - WorkDirectory work/SystemName/AgentName - shifterProxy '' - shifterProxyLocation WorkDirectory/SystemName/AgentName/.shifterCred It defines the following default Options that can be set via Configuration (above): - MonitoringEnabled True - Enabled True if Status == Active - PollingTime 120 - MaxCycles 500 - ControlDirectory control/SystemName/AgentName - WorkDirectory work/SystemName/AgentName - shifterProxy False - shifterProxyLocation work/SystemName/AgentName/.shifterCred different defaults can be set in the initialize() method of the Agent using am_setOption() In order to get a shifter proxy in the environment during the execute() the configuration Option 'shifterProxy' must be set, a default may be given in the initialize() method. """ if baseAgentName and agentName == baseAgentName: self.log = gLogger standaloneModule = True else: self.log = gLogger.getSubLogger(agentName, child=False) standaloneModule = False self.__basePath = gConfig.getValue('/LocalSite/InstancePath', rootPath) self.__agentModule = None self.__codeProperties = {} self.__getCodeInfo() self.__moduleProperties = { 'fullName': agentName, 'loadName': loadName, 'section': PathFinder.getAgentSection(agentName), 'loadSection': PathFinder.getAgentSection(loadName), 'standalone': standaloneModule, 'cyclesDone': 0, 'totalElapsedTime': 0, 'setup': gConfig.getValue("/DIRAC/Setup", "Unknown"), 'alive': True } self.__moduleProperties['system'], self.__moduleProperties[ 'agentName'] = agentName.split("/") self.__configDefaults = {} self.__configDefaults['MonitoringEnabled'] = True self.__configDefaults['Enabled'] = self.am_getOption( "Status", "Active").lower() in ('active') self.__configDefaults['PollingTime'] = self.am_getOption( "PollingTime", 120) self.__configDefaults['MaxCycles'] = self.am_getOption( "MaxCycles", 500) self.__configDefaults['WatchdogTime'] = self.am_getOption( "WatchdogTime", 0) self.__configDefaults['ControlDirectory'] = os.path.join( self.__basePath, 'control', *agentName.split("/")) self.__configDefaults['WorkDirectory'] = os.path.join( self.__basePath, 'work', *agentName.split("/")) self.__configDefaults['shifterProxy'] = '' self.__configDefaults['shifterProxyLocation'] = os.path.join( self.__configDefaults['WorkDirectory'], '.shifterCred') if isinstance(properties, dict): for key in properties: self.__moduleProperties[key] = properties[key] self.__moduleProperties['executors'] = [(self.execute, ())] self.__moduleProperties['shifterProxy'] = False self.__monitorLastStatsUpdate = -1 self.monitor = None self.__initializeMonitor() self.__initialized = False def __getCodeInfo(self): versionVar = "__RCSID__" docVar = "__doc__" try: self.__agentModule = __import__(self.__class__.__module__, globals(), locals(), versionVar) except Exception as excp: self.log.exception("Cannot load agent module", lException=excp) for prop in ((versionVar, "version"), (docVar, "description")): try: self.__codeProperties[prop[1]] = getattr( self.__agentModule, prop[0]) except Exception: self.log.error("Missing property", prop[0]) self.__codeProperties[prop[1]] = 'unset' self.__codeProperties['DIRACVersion'] = DIRAC.version self.__codeProperties['platform'] = DIRAC.getPlatform() def am_initialize(self, *initArgs): """ Common initialization for all the agents. This is executed every time an agent (re)starts. This is called by the AgentReactor, should not be overridden. """ agentName = self.am_getModuleParam('fullName') result = self.initialize(*initArgs) if not isReturnStructure(result): return S_ERROR("initialize must return S_OK/S_ERROR") if not result['OK']: return S_ERROR("Error while initializing %s: %s" % (agentName, result['Message'])) mkDir(self.am_getControlDirectory()) workDirectory = self.am_getWorkDirectory() mkDir(workDirectory) # Set the work directory in an environment variable available to subprocesses if needed os.environ['AGENT_WORKDIRECTORY'] = workDirectory self.__moduleProperties['shifterProxy'] = self.am_getOption( 'shifterProxy') if self.am_monitoringEnabled() and not self.activityMonitoring: self.monitor.enable() if len(self.__moduleProperties['executors']) < 1: return S_ERROR("At least one executor method has to be defined") if not self.am_Enabled(): return S_ERROR("Agent is disabled via the configuration") self.log.notice("=" * 40) self.log.notice("Loaded agent module %s" % self.__moduleProperties['fullName']) self.log.notice(" Site: %s" % DIRAC.siteName()) self.log.notice(" Setup: %s" % gConfig.getValue("/DIRAC/Setup")) self.log.notice(" Base Module version: %s " % __RCSID__) self.log.notice(" Agent version: %s" % self.__codeProperties['version']) self.log.notice(" DIRAC version: %s" % DIRAC.version) self.log.notice(" DIRAC platform: %s" % DIRAC.getPlatform()) pollingTime = int(self.am_getOption('PollingTime')) if pollingTime > 3600: self.log.notice(" Polling time: %s hours" % (pollingTime / 3600.)) else: self.log.notice(" Polling time: %s seconds" % self.am_getOption('PollingTime')) self.log.notice(" Control dir: %s" % self.am_getControlDirectory()) self.log.notice(" Work dir: %s" % self.am_getWorkDirectory()) if self.am_getOption('MaxCycles') > 0: self.log.notice(" Cycles: %s" % self.am_getMaxCycles()) else: self.log.notice(" Cycles: unlimited") if self.am_getWatchdogTime() > 0: self.log.notice(" Watchdog interval: %s" % self.am_getWatchdogTime()) else: self.log.notice(" Watchdog interval: disabled ") self.log.notice("=" * 40) self.__initialized = True return S_OK() def am_getControlDirectory(self): return os.path.join(self.__basePath, str(self.am_getOption('ControlDirectory'))) def am_getStopAgentFile(self): return os.path.join(self.am_getControlDirectory(), 'stop_agent') def am_checkStopAgentFile(self): return os.path.isfile(self.am_getStopAgentFile()) def am_createStopAgentFile(self): try: with open(self.am_getStopAgentFile(), 'w') as fd: fd.write('Dirac site agent Stopped at %s' % Time.toString()) except Exception: pass def am_removeStopAgentFile(self): try: os.unlink(self.am_getStopAgentFile()) except Exception: pass def am_getBasePath(self): return self.__basePath def am_getWorkDirectory(self): return os.path.join(self.__basePath, str(self.am_getOption('WorkDirectory'))) def am_getShifterProxyLocation(self): return os.path.join(self.__basePath, str(self.am_getOption('shifterProxyLocation'))) def am_getOption(self, optionName, defaultValue=None): """ Gets an option from the agent's configuration section. The section will be a subsection of the /Systems section in the CS. """ if defaultValue is None: if optionName in self.__configDefaults: defaultValue = self.__configDefaults[optionName] if optionName and optionName[0] == "/": return gConfig.getValue(optionName, defaultValue) for section in (self.__moduleProperties['section'], self.__moduleProperties['loadSection']): result = gConfig.getOption("%s/%s" % (section, optionName), defaultValue) if result['OK']: return result['Value'] return defaultValue def am_setOption(self, optionName, value): self.__configDefaults[optionName] = value def am_getModuleParam(self, optionName): return self.__moduleProperties[optionName] def am_setModuleParam(self, optionName, value): self.__moduleProperties[optionName] = value def am_getPollingTime(self): return self.am_getOption("PollingTime") def am_getMaxCycles(self): return self.am_getOption("MaxCycles") def am_getWatchdogTime(self): return int(self.am_getOption("WatchdogTime")) def am_getCyclesDone(self): return self.am_getModuleParam('cyclesDone') def am_Enabled(self): return self.am_getOption("Enabled") def am_disableMonitoring(self): self.am_setOption('MonitoringEnabled', False) def am_monitoringEnabled(self): return self.am_getOption("MonitoringEnabled") def am_stopExecution(self): self.am_setModuleParam('alive', False) def __initializeMonitor(self): """ Initialize the system monitoring. """ # This flag is used to activate ES based monitoring # if the "EnableActivityMonitoring" flag in "yes" or "true" in the cfg file. self.activityMonitoring = ( Operations().getValue("EnableActivityMonitoring", False) or self.am_getOption("EnableActivityMonitoring", False)) if self.activityMonitoring: # The import needs to be here because of the CS must be initialized before importing # this class (see https://github.com/DIRACGrid/DIRAC/issues/4793) from DIRAC.MonitoringSystem.Client.MonitoringReporter import MonitoringReporter self.activityMonitoringReporter = MonitoringReporter( monitoringType="ComponentMonitoring") # With the help of this periodic task we commit the data to ES at an interval of 100 seconds. gThreadScheduler.addPeriodicTask( 100, self.__activityMonitoringReporting) else: if self.__moduleProperties['standalone']: self.monitor = gMonitor else: self.monitor = MonitoringClient() self.monitor.setComponentType(self.monitor.COMPONENT_AGENT) self.monitor.setComponentName(self.__moduleProperties['fullName']) self.monitor.initialize() self.monitor.registerActivity('CPU', "CPU Usage", 'Framework', "CPU,%", self.monitor.OP_MEAN, 600) self.monitor.registerActivity('MEM', "Memory Usage", 'Framework', 'Memory,MB', self.monitor.OP_MEAN, 600) # Component monitor for field in ('version', 'DIRACVersion', 'description', 'platform'): self.monitor.setComponentExtraParam( field, self.__codeProperties[field]) self.monitor.setComponentExtraParam('startTime', Time.dateTime()) self.monitor.setComponentExtraParam('cycles', 0) self.monitor.disable() self.__monitorLastStatsUpdate = time.time() def am_secureCall(self, functor, args=(), name=False): if not name: name = str(functor) try: result = functor(*args) if not isReturnStructure(result): raise Exception( "%s method for %s module has to return S_OK/S_ERROR" % (name, self.__moduleProperties['fullName'])) return result except Exception as e: self.log.exception("Agent exception while calling method %s" % name, lException=e) return S_ERROR("Exception while calling %s method: %s" % (name, str(e))) def _setShifterProxy(self): if self.__moduleProperties["shifterProxy"]: result = setupShifterProxyInEnv( self.__moduleProperties["shifterProxy"], self.am_getShifterProxyLocation()) if not result['OK']: self.log.error("Failed to set shifter proxy", result['Message']) return result return S_OK() def am_go(self): # Set the shifter proxy if required result = self._setShifterProxy() if not result['OK']: return result self.log.notice("-" * 40) self.log.notice("Starting cycle for module %s" % self.__moduleProperties['fullName']) mD = self.am_getMaxCycles() if mD > 0: cD = self.__moduleProperties['cyclesDone'] self.log.notice("Remaining %s of %s cycles" % (mD - cD, mD)) self.log.notice("-" * 40) # use SIGALARM as a watchdog interrupt if enabled watchdogInt = self.am_getWatchdogTime() if watchdogInt > 0: signal.signal(signal.SIGALRM, signal.SIG_DFL) signal.alarm(watchdogInt) elapsedTime = time.time() cpuStats = self._startReportToMonitoring() cycleResult = self.__executeModuleCycle() if cpuStats: self._endReportToMonitoring(*cpuStats) # Increment counters self.__moduleProperties['cyclesDone'] += 1 # Show status elapsedTime = time.time() - elapsedTime self.__moduleProperties['totalElapsedTime'] += elapsedTime self.log.notice("-" * 40) self.log.notice("Agent module %s run summary" % self.__moduleProperties['fullName']) self.log.notice(" Executed %s times previously" % self.__moduleProperties['cyclesDone']) self.log.notice(" Cycle took %.2f seconds" % elapsedTime) averageElapsedTime = self.__moduleProperties[ 'totalElapsedTime'] / self.__moduleProperties['cyclesDone'] self.log.notice(" Average execution time: %.2f seconds" % (averageElapsedTime)) elapsedPollingRate = averageElapsedTime * 100 / self.am_getOption( 'PollingTime') self.log.notice(" Polling time: %s seconds" % self.am_getOption('PollingTime')) self.log.notice(" Average execution/polling time: %.2f%%" % elapsedPollingRate) if cycleResult['OK']: self.log.notice(" Cycle was successful") if self.activityMonitoring: # Here we record the data about the cycle duration along with some basic details about the # component and right now it isn't committed to the ES backend. self.activityMonitoringReporter.addRecord({ 'timestamp': int(Time.toEpoch()), 'host': Network.getFQDN(), 'componentType': "agent", 'component': "_".join(self.__moduleProperties['fullName'].split("/")), 'cycleDuration': elapsedTime, 'cycles': 1 }) else: self.log.warn(" Cycle had an error:", cycleResult['Message']) self.log.notice("-" * 40) # Update number of cycles if not self.activityMonitoring: self.monitor.setComponentExtraParam( 'cycles', self.__moduleProperties['cyclesDone']) # cycle finished successfully, cancel watchdog if watchdogInt > 0: signal.alarm(0) return cycleResult def _startReportToMonitoring(self): try: if not self.activityMonitoring: now = time.time() stats = os.times() cpuTime = stats[0] + stats[2] if now - self.__monitorLastStatsUpdate < 10: return (now, cpuTime) # Send CPU consumption mark self.__monitorLastStatsUpdate = now # Send Memory consumption mark membytes = MemStat.VmB('VmRSS:') if membytes: mem = membytes / (1024. * 1024.) gMonitor.addMark('MEM', mem) return (now, cpuTime) else: return False except Exception: return False def _endReportToMonitoring(self, initialWallTime, initialCPUTime): wallTime = time.time() - initialWallTime stats = os.times() cpuTime = stats[0] + stats[2] - initialCPUTime percentage = 0 if wallTime: percentage = cpuTime / wallTime * 100. if percentage > 0: gMonitor.addMark('CPU', percentage) def __executeModuleCycle(self): # Execute the beginExecution function result = self.am_secureCall(self.beginExecution, name="beginExecution") if not result['OK']: return result # Launch executor functions executors = self.__moduleProperties['executors'] if len(executors) == 1: result = self.am_secureCall(executors[0][0], executors[0][1]) if not result['OK']: return result else: exeThreads = [ threading.Thread(target=executor[0], args=executor[1]) for executor in executors ] for thread in exeThreads: thread.setDaemon(1) thread.start() for thread in exeThreads: thread.join() # Execute the endExecution function return self.am_secureCall(self.endExecution, name="endExecution") def initialize(self, *args, **kwargs): """ Agents should override this method for specific initialization. Executed at every agent (re)start. """ return S_OK() def beginExecution(self): return S_OK() def endExecution(self): return S_OK() def finalize(self): return S_OK() def execute(self): return S_ERROR("Execute method has to be overwritten by agent module") def __activityMonitoringReporting(self): """ This method is called by the ThreadScheduler as a periodic task in order to commit the collected data which is done by the MonitoringReporter and is send to the 'ComponentMonitoring' type. :return: True / False """ result = self.activityMonitoringReporter.commit() return result['OK']
def initialize(self): # Build the URLs self._url = self._cfg.getURL() if not self._url: return S_ERROR("Could not build service URL for %s" % self._name) gLogger.verbose("Service URL is %s" % self._url) # Load handler result = self._loadHandlerInit() if not result["OK"]: return result self._handler = result["Value"] # Initialize lock manager self._lockManager = LockManager(self._cfg.getMaxWaitingPetitions()) self._threadPool = ThreadPoolExecutor(max(0, self._cfg.getMaxThreads())) self._msgBroker = MessageBroker("%sMSB" % self._name, threadPool=self._threadPool) # Create static dict self._serviceInfoDict = { "serviceName": self._name, "serviceSectionPath": PathFinder.getServiceSection(self._name), "URL": self._cfg.getURL(), "messageSender": MessageSender(self._name, self._msgBroker), "validNames": self._validNames, "csPaths": [ PathFinder.getServiceSection(svcName) for svcName in self._validNames ], } self.securityLogging = Operations().getValue( "EnableSecurityLogging", True) and getServiceOption( self._serviceInfoDict, "EnableSecurityLogging", True) # Initialize Monitoring # This is a flag used to check whether "EnableActivityMonitoring" is enabled or not from the config file. self.activityMonitoring = Operations().getValue( "EnableActivityMonitoring", False) or getServiceOption( self._serviceInfoDict, "EnableActivityMonitoring", False) if self.activityMonitoring: # The import needs to be here because of the CS must be initialized before importing # this class (see https://github.com/DIRACGrid/DIRAC/issues/4793) from DIRAC.MonitoringSystem.Client.MonitoringReporter import MonitoringReporter self.activityMonitoringReporter = MonitoringReporter( monitoringType="ComponentMonitoring") gThreadScheduler.addPeriodicTask( 100, self.__activityMonitoringReporting) elif self._standalone: self._monitor = gMonitor else: self._monitor = MonitoringClient() self._initMonitoring() # Call static initialization function try: if self.activityMonitoring: self._handler["class"]._rh__initializeClass( dict(self._serviceInfoDict), self._lockManager, self._msgBroker, self.activityMonitoringReporter) else: self._handler["class"]._rh__initializeClass( dict(self._serviceInfoDict), self._lockManager, self._msgBroker, self._monitor) if self._handler["init"]: for initFunc in self._handler["init"]: gLogger.verbose("Executing initialization function") try: result = initFunc(dict(self._serviceInfoDict)) except Exception as excp: gLogger.exception( "Exception while calling initialization function", lException=excp) return S_ERROR( "Exception while calling initialization function: %s" % str(excp)) if not isReturnStructure(result): return S_ERROR( "Service initialization function %s must return S_OK/S_ERROR" % initFunc) if not result["OK"]: return S_ERROR("Error while initializing %s: %s" % (self._name, result["Message"])) except Exception as e: errMsg = "Exception while initializing %s" % self._name gLogger.exception(e) gLogger.exception(errMsg) return S_ERROR(errMsg) # Load actions after the handler has initialized itself result = self._loadActions() if not result["OK"]: return result self._actions = result["Value"] if not self.activityMonitoring: gThreadScheduler.addPeriodicTask(30, self.__reportThreadPoolContents) return S_OK()
class Service: SVC_VALID_ACTIONS = { 'RPC': 'export', 'FileTransfer': 'transfer', 'Message': 'msg', 'Connection': 'Message' } SVC_SECLOG_CLIENT = SecurityLogClient() def __init__(self, serviceData): self._svcData = serviceData self._name = serviceData['loadName'] self._startTime = Time.dateTime() self._validNames = [serviceData['modName']] if serviceData['loadName'] not in self._validNames: self._validNames.append(serviceData['loadName']) self._cfg = ServiceConfiguration(list(self._validNames)) if serviceData['standalone']: self._monitor = gMonitor else: self._monitor = MonitoringClient() self.__monitorLastStatsUpdate = time.time() self._stats = {'queries': 0, 'connections': 0} self._authMgr = AuthManager( "%s/Authorization" % PathFinder.getServiceSection(serviceData['loadName'])) self._transportPool = getGlobalTransportPool() self.__cloneId = 0 self.__maxFD = 0 def setCloneProcessId(self, cloneId): self.__cloneId = cloneId self._monitor.setComponentName("%s-Clone:%s" % (self._name, cloneId)) def _isMetaAction(self, action): referedAction = Service.SVC_VALID_ACTIONS[action] if referedAction in Service.SVC_VALID_ACTIONS: return referedAction return False def initialize(self): #Build the URLs self._url = self._cfg.getURL() if not self._url: return S_ERROR("Could not build service URL for %s" % self._name) gLogger.verbose("Service URL is %s" % self._url) #Load handler result = self._loadHandlerInit() if not result['OK']: return result self._handler = result['Value'] #Initialize lock manager self._lockManager = LockManager(self._cfg.getMaxWaitingPetitions()) self._initMonitoring() self._threadPool = ThreadPool(1, max(0, self._cfg.getMaxThreads()), self._cfg.getMaxWaitingPetitions()) self._threadPool.daemonize() self._msgBroker = MessageBroker("%sMSB" % self._name, threadPool=self._threadPool) #Create static dict self._serviceInfoDict = { 'serviceName': self._name, 'serviceSectionPath': PathFinder.getServiceSection(self._name), 'URL': self._cfg.getURL(), 'messageSender': MessageSender(self._name, self._msgBroker), 'validNames': self._validNames, 'csPaths': [ PathFinder.getServiceSection(svcName) for svcName in self._validNames ] } #Call static initialization function try: self._handler['class']._rh__initializeClass( dict(self._serviceInfoDict), self._lockManager, self._msgBroker, self._monitor) if self._handler['init']: for initFunc in self._handler['init']: gLogger.verbose("Executing initialization function") try: result = initFunc(dict(self._serviceInfoDict)) except Exception, excp: gLogger.exception( "Exception while calling initialization function") return S_ERROR( "Exception while calling initialization function: %s" % str(excp)) if not isReturnStructure(result): return S_ERROR( "Service initialization function %s must return S_OK/S_ERROR" % initFunc) if not result['OK']: return S_ERROR("Error while initializing %s: %s" % (self._name, result['Message'])) except Exception, e: errMsg = "Exception while initializing %s" % self._name gLogger.exception(errMsg) return S_ERROR(errMsg) #Load actions after the handler has initialized itself result = self._loadActions() if not result['OK']: return result self._actions = result['Value'] gThreadScheduler.addPeriodicTask(30, self.__reportThreadPoolContents) return S_OK()
class Service: SVC_VALID_ACTIONS = { 'RPC' : 'export', 'FileTransfer': 'transfer', 'Message' : 'msg', 'Connection' : 'Message' } SVC_SECLOG_CLIENT = SecurityLogClient() def __init__( self, serviceData ): self._svcData = serviceData self._name = serviceData[ 'loadName' ] self._startTime = Time.dateTime() self._validNames = [ serviceData[ 'modName' ] ] if serviceData[ 'loadName' ] not in self._validNames: self._validNames.append( serviceData[ 'loadName' ] ) self._cfg = ServiceConfiguration( list( self._validNames ) ) if serviceData[ 'standalone' ]: self._monitor = gMonitor else: self._monitor = MonitoringClient() self.__monitorLastStatsUpdate = time.time() self._stats = { 'queries' : 0, 'connections' : 0 } self._authMgr = AuthManager( "%s/Authorization" % PathFinder.getServiceSection( serviceData[ 'loadName' ] ) ) self._transportPool = getGlobalTransportPool() self.__cloneId = 0 self.__maxFD = 0 def setCloneProcessId( self, cloneId ): self.__cloneId = cloneId self._monitor.setComponentName( "%s-Clone:%s" % ( self._name, cloneId ) ) def _isMetaAction( self, action ): referedAction = Service.SVC_VALID_ACTIONS[ action ] if referedAction in Service.SVC_VALID_ACTIONS: return referedAction return False def initialize( self ): #Build the URLs self._url = self._cfg.getURL() if not self._url: return S_ERROR( "Could not build service URL for %s" % self._name ) gLogger.verbose( "Service URL is %s" % self._url ) #Load handler result = self._loadHandlerInit() if not result[ 'OK' ]: return result self._handler = result[ 'Value' ] #Initialize lock manager self._lockManager = LockManager( self._cfg.getMaxWaitingPetitions() ) self._initMonitoring() self._threadPool = ThreadPool( 1, max( 0, self._cfg.getMaxThreads() ), self._cfg.getMaxWaitingPetitions() ) self._threadPool.daemonize() self._msgBroker = MessageBroker( "%sMSB" % self._name, threadPool = self._threadPool ) #Create static dict self._serviceInfoDict = { 'serviceName' : self._name, 'serviceSectionPath' : PathFinder.getServiceSection( self._name ), 'URL' : self._cfg.getURL(), 'messageSender' : MessageSender( self._name, self._msgBroker ), 'validNames' : self._validNames, 'csPaths' : [ PathFinder.getServiceSection( svcName ) for svcName in self._validNames ] } #Call static initialization function try: self._handler[ 'class' ]._rh__initializeClass( dict( self._serviceInfoDict ), self._lockManager, self._msgBroker, self._monitor ) if self._handler[ 'init' ]: for initFunc in self._handler[ 'init' ]: gLogger.verbose( "Executing initialization function" ) try: result = initFunc( dict( self._serviceInfoDict ) ) except Exception, excp: gLogger.exception( "Exception while calling initialization function" ) return S_ERROR( "Exception while calling initialization function: %s" % str( excp ) ) if not isReturnStructure( result ): return S_ERROR( "Service initialization function %s must return S_OK/S_ERROR" % initFunc ) if not result[ 'OK' ]: return S_ERROR( "Error while initializing %s: %s" % ( self._name, result[ 'Message' ] ) ) except Exception, e: errMsg = "Exception while initializing %s" % self._name gLogger.exception( errMsg ) return S_ERROR( errMsg ) #Load actions after the handler has initialized itself result = self._loadActions() if not result[ 'OK' ]: return result self._actions = result[ 'Value' ] gThreadScheduler.addPeriodicTask( 30, self.__reportThreadPoolContents ) return S_OK()