import time import datetime from pandacommon.pandalogger import logger_utils from pandaatm.atmconfig import atm_config from pandaatm.atmcore import core_utils from pandaatm.atmbody.agent_base import AgentBase base_logger = logger_utils.setup_logger('testing_agent') class TestingAgent(AgentBase): def __init__(self): super().__init__() self.sleepPeriod = 10 def run(self): tmp_log = logger_utils.make_logger(base_logger, method_name='TestingAgent.run') while True: # do something tmp_log.debug('start') # sleep time.sleep(self.sleepPeriod)
import json from pandajedi.jedimsgprocessor.base_msg_processor import BaseMsgProcPlugin from pandacommon.pandalogger import logger_utils # logger base_logger = logger_utils.setup_logger(__name__.split('.')[-1]) # processing message processing plugin class ProcessingMsgProcPlugin(BaseMsgProcPlugin): def process(self, msg_obj, decoded_data=None): # logger tmp_log = logger_utils.make_logger(base_logger, method_name='process') # start tmp_log.info('start') tmp_log.debug('sub_id={0} ; msg_id={1}'.format(msg_obj.sub_id, msg_obj.msg_id)) # parse if decoded_data is None: # json decode try: msg_dict = json.loads(msg_obj.data) except Exception as e: err_str = 'failed to parse message json {2} , skipped. {0} : {1}'.format( e.__class__.__name__, e, msg_obj.data) tmp_log.error(err_str) raise else: msg_dict = decoded_data
import collections import time import copy import traceback try: from queue import Queue, Empty except ImportError: from Queue import Queue, Empty import stomp from pandacommon.pandalogger import logger_utils # logger base_logger = logger_utils.setup_logger('msg_bkr_utils') # global lock _GLOBAL_LOCK = threading.Lock() # global map of message buffers _BUFFER_MAP = dict() # get connection dict def _get_connection_dict(host_port_list, use_ssl=False, cert_file=None, key_file=None, vhost=None, force=False):
import os import re import time import socket import json import logging from .msg_bkr_utils import MsgBuffer, MBListenerProxy, MBSenderProxy from pandacommon.pandautils.thread_utils import GenericThread from pandacommon.pandautils.plugin_factory import PluginFactory from pandacommon.pandalogger import logger_utils # logger base_logger = logger_utils.setup_logger('msg_processor') # get mb proxy instance def get_mb_proxy(name, sconf, qconf, mode='listener', **kwargs): """ get MBListenerProxy or MBSenderProxy instance according to config dict """ # class of mb proxy the_class = MBListenerProxy if mode == 'sender': the_class = MBSenderProxy # resolve env variables if any host_port_list = sconf['host_port_list'] if host_port_list: new_list = [] for host_port in host_port_list: m = re.search(r'^\${(\w+)\}$', host_port)
from pandacommon.pandamsgbkr import msg_processor from pandacommon.pandalogger import logger_utils from pandajedi.jediconfig import jedi_config # logger msg_processor.base_logger = logger_utils.setup_logger('JediMsgProcessor') # Main message processing agent class MsgProcAgent(msg_processor.MsgProcAgentBase): pass # launch def launcher(stop_event): tmp_log = logger_utils.make_logger(msg_processor.base_logger, method_name='launcher') tmp_log.debug('start') try: config_file = jedi_config.msgprocessor.configFile except Exception as e: tmp_log.error( 'failed to read config json file; should not happen... {0}: {1}'. format(e.__class__.__name__, e)) raise e # start agent = MsgProcAgent(config_file) agent.start() tmp_log.debug('started') # wait for stop event
from pandacommon.pandamsgbkr import msg_processor from pandacommon.pandalogger import logger_utils from pandaserver.config import panda_config # logger msg_processor.base_logger = logger_utils.setup_logger('PanDAMsgProcessor') # Main message processing agent class MsgProcAgent(msg_processor.MsgProcAgentBase): pass
import os import time import datetime import copy import json from pandacommon.pandalogger import logger_utils from pandaatm.atmconfig import atm_config from pandaatm.atmcore import core_utils from pandaatm.atmbody.agent_base import AgentBase from pandaatm.atmutils.slow_task_analyzer_utils import get_job_durations, get_jobs_time_consumption_statistics, bad_job_test_main base_logger = logger_utils.setup_logger('slow_task_analyzer') # agent class class SlowTaskAnalyzer(AgentBase): def __init__(self): super().__init__() # parameters self.sleepPeriod = 300 self.sinceHours = 336 self.taskDurationMaxHours = 168 self.taskSuccefulRunTimeMinPercent = 80 self.taskEachStatusMaxHours = 12 self.joblessIntervalMaxHours = 16 self.jobBadTimeMaxPercent = 10 self.jobMaxHoursMap = {
import threading from pandacommon.pandalogger import logger_utils # logger base_logger = logger_utils.setup_logger('plugin_factory') # plugin factory class PluginFactory(object): # class lock __lock = threading.Lock() # constructor def __init__(self): self.classMap = dict() # get plugin def get_plugin(self, plugin_conf): # logger tmpLog = logger_utils.make_logger(base_logger, method_name='get_plugin') # use module + class as key moduleName = plugin_conf['module'] className = plugin_conf['name'] if moduleName is None or className is None: tmpLog.warning( 'Invalid plugin; either module or name is missing '.format( moduleName)) return None pluginKey = '{0}.{1}'.format(moduleName, className)
def daemon_loop(dem_config, msg_queue, pipe_conn, worker_lifetime, tbuf=None): # pid of the worker my_pid = os.getpid() my_full_pid = '{0}-{1}-{2}'.format(socket.getfqdn().split('.')[0], os.getpgrp(), my_pid) # logger to log in file base_logger = logger_utils.setup_logger('daemons') tmp_log = logger_utils.make_logger(base_logger, 'worker_pid={pid}'.format(pid=my_pid)) tmp_log.info('daemon worker start') # signal handler def got_end_sig(sig, frame): tmp_log.warning('(got signal {sig})'.format(sig=sig)) for sig in END_SIGNALS: signal.signal(sig, got_end_sig) # dict of all daemons and their script module object module_map = {} # package of daemon scripts mod_package = getattr(daemon_config, 'package') # start timestamp start_ts = time.time() # expiry time expiry_ts = start_ts + worker_lifetime # create taskBuffer object if not given if tbuf is None: # initialize cx_Oracle using dummy connection try: from pandaserver.taskbuffer.Initializer import initializer initializer.init() except Exception as e: tmp_log.error('failed to launch initializer with {err} ; terminated'.format( err='{0}: {1}'.format(e.__class__.__name__, e))) return # taskBuffer object try: from pandaserver.taskbuffer.TaskBuffer import taskBuffer as tbuf tbuf.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1) tmp_log.debug('taskBuffer initialized') except Exception as e: tmp_log.error('failed to initialize taskBuffer with {err} ; terminated'.format( err='{0}: {1}'.format(e.__class__.__name__, e))) return # import module of all daemons for dem_name, attrs in dem_config.items(): mod_name = attrs['module'] try: the_module = importlib.import_module('.{mod}'.format(mod=mod_name), mod_package) module_map[dem_name] = the_module except Exception as e: tmp_log.warning('for daemon {dem}, failed to import {mod} with {err} ; skipped it'.format( dem=dem_name, mod=mod_name, err='{0}: {1}'.format(e.__class__.__name__, e))) else: module_map[dem_name] = the_module tmp_log.debug('initialized, running') # loop while True: # stop the worker since when reaches its lifetime if time.time() > expiry_ts: tmp_log.info('worker reached its lifetime, stop this worker') break # get command from pipe if pipe_conn.poll(): cmd = pipe_conn.recv() if cmd == CMD_STOP: # got stop command, stop the process tmp_log.info('got stop command, stop this worker') break else: tmp_log.debug('got invalid command "{cmd}" ; skipped it'.format(cmd=cmd)) # clean up memory gc.collect() # get a message from queue tmp_log.debug('waiting for message...') keep_going = True one_msg = None while True: try: one_msg = msg_queue.get(timeout=5) break except queue.Empty: # timeout to get from queue, check whether to keep going if time.time() > expiry_ts: # worker expired, do not keep going keep_going = False break # keep going if not keep_going: continue # process message if one_msg in module_map and one_msg is not None: # got a daemon name, get the module object and corresponding attributes dem_name = one_msg tmp_log.debug('got message of {dem}'.format(dem=dem_name)) the_module = module_map[dem_name] attrs = dem_config[dem_name] mod_args = attrs['arguments'] mod_argv = tuple([__file__] + mod_args) dem_period = attrs['period'] dem_period_in_minute = dem_period/60. is_sync = attrs['sync'] is_loop = attrs['loop'] # initialize variables to_run_daemon = False has_run = False last_run_start_ts = 0 last_run_end_ts = 0 # component name in lock table component = 'pandaD.{dem}'.format(dem=dem_name) # whether the daemon shoule be synchronized among nodes if is_sync: # sychronized daemon, check process lock in DB ret_val, locked_time = tbuf.checkProcessLock_PANDA(component=component, pid=my_full_pid, time_limit=dem_period_in_minute) if ret_val: # locked by some process on other nodes last_run_start_ts = int((locked_time - EPOCH).total_seconds()) tmp_log.debug('found {dem} is locked by other process ; skipped it'.format(dem=dem_name)) else: # try to get the lock got_lock = tbuf.lockProcess_PANDA(component=component, pid=my_full_pid, time_limit=dem_period_in_minute) if got_lock: # got the lock to_run_daemon = True tmp_log.debug('got lock of {dem}'.format(dem=dem_name)) else: # did not get lock, skip last_run_start_ts = int(time.time()) tmp_log.debug('did not get lock of {dem} ; skipped it'.format(dem=dem_name)) else: to_run_daemon = True # run daemon if to_run_daemon: last_run_start_ts = int(time.time()) try: if is_loop: # go looping the script until reaching daemon period tmp_log.info('{dem} start looping'.format(dem=dem_name)) start_ts = time.time() while True: ret_val = the_module.main(argv=mod_argv, tbuf=tbuf) now_ts = time.time() if not ret_val: # daemon main function says stop the loop break if now_ts > start_ts + dem_period: # longer than the period, stop the loop break tmp_log.info('{dem} finish looping'.format(dem=dem_name)) else: # execute the module script with arguments tmp_log.info('{dem} start'.format(dem=dem_name)) the_module.main(argv=mod_argv, tbuf=tbuf) tmp_log.info('{dem} finish'.format(dem=dem_name)) except Exception as e: # with error tb = traceback.format_exc() tmp_log.error('failed to run daemon {dem} with {err} ; stop this worker'.format( dem=dem_name, err='{0}: {1}\n{2}\n'.format(e.__class__.__name__, e, tb))) # daemon has run but failed last_run_end_ts = int(time.time()) has_run = True # send daemon status back to master status_tuple = (dem_name, has_run, last_run_start_ts, last_run_end_ts) pipe_conn.send(status_tuple) # stop the worker break else: # daemon has run last_run_end_ts = int(time.time()) has_run = True # send daemon status back to master status_tuple = (dem_name, has_run, last_run_start_ts, last_run_end_ts) pipe_conn.send(status_tuple) # FIXME: stop and spawn worker in every run for now since some script breaks the worker without exception # tmp_log.info('as script done, stop this worker') # break else: # got invalid message tmp_log.warning('got invalid message "{msg}", skipped it'.format(msg=one_msg)) # sleep time.sleep(2**-5)