Example #1
0
import time
import datetime

from pandacommon.pandalogger import logger_utils

from pandaatm.atmconfig import atm_config
from pandaatm.atmcore import core_utils
from pandaatm.atmbody.agent_base import AgentBase





base_logger = logger_utils.setup_logger('testing_agent')


class TestingAgent(AgentBase):

    def __init__(self):
        super().__init__()
        self.sleepPeriod = 10

    def run(self):
        tmp_log = logger_utils.make_logger(base_logger, method_name='TestingAgent.run')
        while True:
            # do something
            tmp_log.debug('start')
            # sleep
            time.sleep(self.sleepPeriod)

Example #2
0
import json

from pandajedi.jedimsgprocessor.base_msg_processor import BaseMsgProcPlugin

from pandacommon.pandalogger import logger_utils

# logger
base_logger = logger_utils.setup_logger(__name__.split('.')[-1])


# processing message processing plugin
class ProcessingMsgProcPlugin(BaseMsgProcPlugin):
    def process(self, msg_obj, decoded_data=None):
        # logger
        tmp_log = logger_utils.make_logger(base_logger, method_name='process')
        # start
        tmp_log.info('start')
        tmp_log.debug('sub_id={0} ; msg_id={1}'.format(msg_obj.sub_id,
                                                       msg_obj.msg_id))
        # parse
        if decoded_data is None:
            # json decode
            try:
                msg_dict = json.loads(msg_obj.data)
            except Exception as e:
                err_str = 'failed to parse message json {2} , skipped. {0} : {1}'.format(
                    e.__class__.__name__, e, msg_obj.data)
                tmp_log.error(err_str)
                raise
        else:
            msg_dict = decoded_data
Example #3
0
import collections
import time
import copy
import traceback

try:
    from queue import Queue, Empty
except ImportError:
    from Queue import Queue, Empty

import stomp

from pandacommon.pandalogger import logger_utils

# logger
base_logger = logger_utils.setup_logger('msg_bkr_utils')

# global lock
_GLOBAL_LOCK = threading.Lock()

# global map of message buffers
_BUFFER_MAP = dict()


# get connection dict
def _get_connection_dict(host_port_list,
                         use_ssl=False,
                         cert_file=None,
                         key_file=None,
                         vhost=None,
                         force=False):
Example #4
0
import os
import re
import time
import socket
import json
import logging

from .msg_bkr_utils import MsgBuffer, MBListenerProxy, MBSenderProxy
from pandacommon.pandautils.thread_utils import GenericThread
from pandacommon.pandautils.plugin_factory import PluginFactory
from pandacommon.pandalogger import logger_utils

# logger
base_logger = logger_utils.setup_logger('msg_processor')


# get mb proxy instance
def get_mb_proxy(name, sconf, qconf, mode='listener', **kwargs):
    """
    get MBListenerProxy or MBSenderProxy instance according to config dict
    """
    # class of mb proxy
    the_class = MBListenerProxy
    if mode == 'sender':
        the_class = MBSenderProxy
    # resolve env variables if any
    host_port_list = sconf['host_port_list']
    if host_port_list:
        new_list = []
        for host_port in host_port_list:
            m = re.search(r'^\${(\w+)\}$', host_port)
Example #5
0
from pandacommon.pandamsgbkr import msg_processor
from pandacommon.pandalogger import logger_utils

from pandajedi.jediconfig import jedi_config

# logger
msg_processor.base_logger = logger_utils.setup_logger('JediMsgProcessor')


# Main message processing agent
class MsgProcAgent(msg_processor.MsgProcAgentBase):
    pass


# launch
def launcher(stop_event):
    tmp_log = logger_utils.make_logger(msg_processor.base_logger,
                                       method_name='launcher')
    tmp_log.debug('start')
    try:
        config_file = jedi_config.msgprocessor.configFile
    except Exception as e:
        tmp_log.error(
            'failed to read config json file; should not happen... {0}: {1}'.
            format(e.__class__.__name__, e))
        raise e
    # start
    agent = MsgProcAgent(config_file)
    agent.start()
    tmp_log.debug('started')
    # wait for stop event
from pandacommon.pandamsgbkr import msg_processor
from pandacommon.pandalogger import logger_utils

from pandaserver.config import panda_config


# logger
msg_processor.base_logger = logger_utils.setup_logger('PanDAMsgProcessor')


# Main message processing agent
class MsgProcAgent(msg_processor.MsgProcAgentBase):
    pass
Example #7
0
import os
import time
import datetime
import copy
import json

from pandacommon.pandalogger import logger_utils

from pandaatm.atmconfig import atm_config
from pandaatm.atmcore import core_utils
from pandaatm.atmbody.agent_base import AgentBase
from pandaatm.atmutils.slow_task_analyzer_utils import get_job_durations, get_jobs_time_consumption_statistics, bad_job_test_main


base_logger = logger_utils.setup_logger('slow_task_analyzer')


# agent class
class SlowTaskAnalyzer(AgentBase):

    def __init__(self):
        super().__init__()
        # parameters
        self.sleepPeriod = 300
        self.sinceHours = 336
        self.taskDurationMaxHours = 168
        self.taskSuccefulRunTimeMinPercent = 80
        self.taskEachStatusMaxHours = 12
        self.joblessIntervalMaxHours = 16
        self.jobBadTimeMaxPercent = 10
        self.jobMaxHoursMap = {
Example #8
0
import threading

from pandacommon.pandalogger import logger_utils

# logger
base_logger = logger_utils.setup_logger('plugin_factory')


# plugin factory
class PluginFactory(object):
    # class lock
    __lock = threading.Lock()

    # constructor
    def __init__(self):
        self.classMap = dict()

    # get plugin
    def get_plugin(self, plugin_conf):
        # logger
        tmpLog = logger_utils.make_logger(base_logger,
                                          method_name='get_plugin')
        # use module + class as key
        moduleName = plugin_conf['module']
        className = plugin_conf['name']
        if moduleName is None or className is None:
            tmpLog.warning(
                'Invalid plugin; either module or name is missing '.format(
                    moduleName))
            return None
        pluginKey = '{0}.{1}'.format(moduleName, className)
Example #9
0
def daemon_loop(dem_config, msg_queue, pipe_conn, worker_lifetime, tbuf=None):
    # pid of the worker
    my_pid = os.getpid()
    my_full_pid = '{0}-{1}-{2}'.format(socket.getfqdn().split('.')[0], os.getpgrp(), my_pid)
    # logger to log in file
    base_logger = logger_utils.setup_logger('daemons')
    tmp_log = logger_utils.make_logger(base_logger, 'worker_pid={pid}'.format(pid=my_pid))
    tmp_log.info('daemon worker start')
    # signal handler
    def got_end_sig(sig, frame):
        tmp_log.warning('(got signal {sig})'.format(sig=sig))
    for sig in END_SIGNALS:
        signal.signal(sig, got_end_sig)
    # dict of all daemons and their script module object
    module_map = {}
    # package of daemon scripts
    mod_package = getattr(daemon_config, 'package')
    # start timestamp
    start_ts = time.time()
    # expiry time
    expiry_ts = start_ts + worker_lifetime
    # create taskBuffer object if not given
    if tbuf is None:
        # initialize cx_Oracle using dummy connection
        try:
            from pandaserver.taskbuffer.Initializer import initializer
            initializer.init()
        except Exception as e:
            tmp_log.error('failed to launch initializer with {err} ; terminated'.format(
                                err='{0}: {1}'.format(e.__class__.__name__, e)))
            return
        # taskBuffer object
        try:
            from pandaserver.taskbuffer.TaskBuffer import taskBuffer as tbuf
            tbuf.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1)
            tmp_log.debug('taskBuffer initialized')
        except Exception as e:
            tmp_log.error('failed to initialize taskBuffer with {err} ; terminated'.format(
                                err='{0}: {1}'.format(e.__class__.__name__, e)))
            return
    # import module of all daemons
    for dem_name, attrs in dem_config.items():
        mod_name = attrs['module']
        try:
            the_module = importlib.import_module('.{mod}'.format(mod=mod_name), mod_package)
            module_map[dem_name] = the_module
        except Exception as e:
            tmp_log.warning('for daemon {dem}, failed to import {mod} with {err} ; skipped it'.format(
                                dem=dem_name, mod=mod_name, err='{0}: {1}'.format(e.__class__.__name__, e)))
        else:
            module_map[dem_name] = the_module
    tmp_log.debug('initialized, running')
    # loop
    while True:
        # stop the worker since when reaches its lifetime
        if time.time() > expiry_ts:
            tmp_log.info('worker reached its lifetime, stop this worker')
            break
        # get command from pipe
        if pipe_conn.poll():
            cmd = pipe_conn.recv()
            if cmd == CMD_STOP:
                # got stop command, stop the process
                tmp_log.info('got stop command, stop this worker')
                break
            else:
                tmp_log.debug('got invalid command "{cmd}" ; skipped it'.format(cmd=cmd))
        # clean up memory
        gc.collect()
        # get a message from queue
        tmp_log.debug('waiting for message...')
        keep_going = True
        one_msg = None
        while True:
            try:
                one_msg = msg_queue.get(timeout=5)
                break
            except queue.Empty:
                # timeout to get from queue, check whether to keep going
                if time.time() > expiry_ts:
                    # worker expired, do not keep going
                    keep_going = False
                    break
        # keep going
        if not keep_going:
            continue
        # process message
        if one_msg in module_map and one_msg is not None:
            # got a daemon name, get the module object and corresponding attributes
            dem_name = one_msg
            tmp_log.debug('got message of {dem}'.format(dem=dem_name))
            the_module = module_map[dem_name]
            attrs = dem_config[dem_name]
            mod_args = attrs['arguments']
            mod_argv = tuple([__file__] + mod_args)
            dem_period = attrs['period']
            dem_period_in_minute = dem_period/60.
            is_sync = attrs['sync']
            is_loop = attrs['loop']
            # initialize variables
            to_run_daemon = False
            has_run = False
            last_run_start_ts = 0
            last_run_end_ts = 0
            # component name in lock table
            component = 'pandaD.{dem}'.format(dem=dem_name)
            # whether the daemon shoule be synchronized among nodes
            if is_sync:
                # sychronized daemon, check process lock in DB
                ret_val, locked_time = tbuf.checkProcessLock_PANDA(component=component, pid=my_full_pid, time_limit=dem_period_in_minute)
                if ret_val:
                    # locked by some process on other nodes
                    last_run_start_ts = int((locked_time - EPOCH).total_seconds())
                    tmp_log.debug('found {dem} is locked by other process ; skipped it'.format(dem=dem_name))
                else:
                    # try to get the lock
                    got_lock = tbuf.lockProcess_PANDA(component=component, pid=my_full_pid, time_limit=dem_period_in_minute)
                    if got_lock:
                        # got the lock
                        to_run_daemon = True
                        tmp_log.debug('got lock of {dem}'.format(dem=dem_name))
                    else:
                        # did not get lock, skip
                        last_run_start_ts = int(time.time())
                        tmp_log.debug('did not get lock of {dem} ; skipped it'.format(dem=dem_name))
            else:
                to_run_daemon = True
            # run daemon
            if to_run_daemon:
                last_run_start_ts = int(time.time())
                try:
                    if is_loop:
                        # go looping the script until reaching daemon period
                        tmp_log.info('{dem} start looping'.format(dem=dem_name))
                        start_ts = time.time()
                        while True:
                            ret_val = the_module.main(argv=mod_argv, tbuf=tbuf)
                            now_ts = time.time()
                            if not ret_val:
                                # daemon main function says stop the loop
                                break
                            if now_ts > start_ts + dem_period:
                                # longer than the period, stop the loop
                                break
                        tmp_log.info('{dem} finish looping'.format(dem=dem_name))
                    else:
                        # execute the module script with arguments
                        tmp_log.info('{dem} start'.format(dem=dem_name))
                        the_module.main(argv=mod_argv, tbuf=tbuf)
                        tmp_log.info('{dem} finish'.format(dem=dem_name))
                except Exception as e:
                    # with error
                    tb = traceback.format_exc()
                    tmp_log.error('failed to run daemon {dem} with {err} ; stop this worker'.format(
                                    dem=dem_name, err='{0}: {1}\n{2}\n'.format(e.__class__.__name__, e, tb)))
                    # daemon has run but failed
                    last_run_end_ts = int(time.time())
                    has_run = True
                    # send daemon status back to master
                    status_tuple = (dem_name, has_run, last_run_start_ts, last_run_end_ts)
                    pipe_conn.send(status_tuple)
                    # stop the worker
                    break
                else:
                    # daemon has run
                    last_run_end_ts = int(time.time())
                    has_run = True
            # send daemon status back to master
            status_tuple = (dem_name, has_run, last_run_start_ts, last_run_end_ts)
            pipe_conn.send(status_tuple)
            # FIXME: stop and spawn worker in every run for now since some script breaks the worker without exception
            # tmp_log.info('as script done, stop this worker')
            # break
        else:
            # got invalid message
            tmp_log.warning('got invalid message "{msg}", skipped it'.format(msg=one_msg))
        # sleep
        time.sleep(2**-5)