class ExceptionContextManager: """ 用上下文管理器捕获异常,可对代码片段进行错误捕捉,比装饰器更细腻 """ def __init__(self, logger_name='ExceptionContextManager', verbose=100, donot_raise__exception=True, ): """ :param verbose: 打印错误的深度,对应traceback对象的limit,为正整数 :param donot_raise__exception:是否不重新抛出错误,为Fasle则抛出,为True则不抛出 """ self.logger = LogManager(logger_name).get_logger_and_add_handlers() self._verbose = verbose self._donot_raise__exception = donot_raise__exception def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): # print(exc_val) # print(traceback.format_exc()) exc_str = str(exc_type) + ' : ' + str(exc_val) exc_str_color = '\033[0;30;45m%s\033[0m' % exc_str if self._donot_raise__exception: if exc_tb is not None: self.logger.error('\n'.join(traceback.format_tb(exc_tb)[:self._verbose]) + exc_str_color) return self._donot_raise__exception # __exit__方法必须retuen True才会不重新抛出错误
def __init__(self, queue_name, log_level_int=10, logger_prefix='', is_add_file_handler=True, clear_queue_within_init=False, is_add_publish_time=True, consuming_function: callable = None): """ :param queue_name: :param log_level_int: :param logger_prefix: :param is_add_file_handler: :param clear_queue_within_init: :param is_add_publish_time:是否添加发布时间,以后废弃,都添加。 :param consuming_function:消费函数,为了做发布时候的函数入参校验用的,如果不传则不做发布任务的校验, 例如add 函数接收x,y入参,你推送{"x":1,"z":3}就是不正确的,函数不接受z参数。 """ self._queue_name = queue_name if logger_prefix != '': logger_prefix += '--' logger_name = f'{logger_prefix}{self.__class__.__name__}--{queue_name}' self.logger = LogManager(logger_name).get_logger_and_add_handlers(log_level_int, log_filename=f'{logger_name}.log' if is_add_file_handler else None) # self.publish_params_checker = PublishParamsChecker(consuming_function) if consuming_function else None # self.rabbit_client = RabbitMqFactory(is_use_rabbitpy=is_use_rabbitpy).get_rabbit_cleint() # self.channel = self.rabbit_client.creat_a_channel() # self.queue = self.channel.queue_declare(queue=queue_name, durable=True) self._lock_for_count = Lock() self._current_time = None self.count_per_minute = None self._init_count() self.custom_init() self.logger.info(f'{self.__class__} 被实例化了') self.publish_msg_num_total = 0 self._is_add_publish_time = is_add_publish_time self.__init_time = time.time() atexit.register(self.__at_exit) if clear_queue_within_init: self.clear()
def __init__(self, logger_name='ExceptionContextManager', verbose=100, donot_raise__exception=True, ): """ :param verbose: 打印错误的深度,对应traceback对象的limit,为正整数 :param donot_raise__exception:是否不重新抛出错误,为Fasle则抛出,为True则不抛出 """ self.logger = LogManager(logger_name).get_logger_and_add_handlers() self._verbose = verbose self._donot_raise__exception = donot_raise__exception
class TimerContextManager(object): """ 用上下文管理器计时,可对代码片段计时 """ log = LogManager('TimerContext').get_logger_and_add_handlers() def __init__(self, is_print_log=True): self._is_print_log = is_print_log self.t_spend = None self._line = None self._file_name = None self.time_start = None def __enter__(self): self._line = sys._getframe().f_back.f_lineno # 调用此方法的代码的函数 self._file_name = sys._getframe(1).f_code.co_filename # 哪个文件调了用此方法 self.time_start = time.time() return self def __exit__(self, exc_type, exc_val, exc_tb): self.t_spend = time.time() - self.time_start if self._is_print_log: self.log.debug( f'对下面代码片段进行计时: \n执行"{self._file_name}:{self._line}" 用时 {round(self.t_spend, 2)} 秒' )
def keep_circulating(time_sleep=0.001, exit_if_function_run_sucsess=False, is_display_detail_exception=True): """间隔一段时间,一直循环运行某个方法的装饰器 :param time_sleep :循环的间隔时间 :param exit_if_function_run_sucsess :如果成功了就退出循环 :param is_display_detail_exception """ if not hasattr(keep_circulating, 'keep_circulating_log'): keep_circulating.log = LogManager('keep_circulating').get_logger_and_add_handlers() def _keep_circulating(func): # noinspection PyBroadException @wraps(func) def __keep_circulating(*args, **kwargs): while 1: try: result = func(*args, **kwargs) if exit_if_function_run_sucsess: return result except Exception as e: msg = func.__name__ + ' 运行出错\n ' + traceback.format_exc(limit=10) if is_display_detail_exception else str(e) keep_circulating.log.error(msg) finally: time.sleep(time_sleep) return __keep_circulating return _keep_circulating
def __init__( self, queue_name, log_level_int=10, logger_prefix='', is_add_file_handler=True, clear_queue_within_init=False, is_add_publish_time=True, ): """ :param queue_name: :param log_level_int: :param logger_prefix: :param is_add_file_handler: :param clear_queue_within_init: """ self._queue_name = queue_name if logger_prefix != '': logger_prefix += '--' logger_name = f'{logger_prefix}{self.__class__.__name__}--{queue_name}' self.logger = LogManager(logger_name).get_logger_and_add_handlers( log_level_int, log_filename=f'{logger_name}.log' if is_add_file_handler else None) # # self.rabbit_client = RabbitMqFactory(is_use_rabbitpy=is_use_rabbitpy).get_rabbit_cleint() # self.channel = self.rabbit_client.creat_a_channel() # self.queue = self.channel.queue_declare(queue=queue_name, durable=True) self._lock_for_pika = Lock() self._lock_for_count = Lock() self._current_time = None self.count_per_minute = None self._init_count() self.custom_init() self.logger.info(f'{self.__class__} 被实例化了') self.publish_msg_num_total = 0 self._is_add_publish_time = is_add_publish_time self.__init_time = time.time() atexit.register(self.__at_exit) if clear_queue_within_init: self.clear()
def timer(func): """计时器装饰器,只能用来计算函数运行时间""" if not hasattr(timer, 'log'): timer.log = LogManager(f'timer_{func.__name__}').get_logger_and_add_handlers(log_filename=f'timer_{func.__name__}.log') @wraps(func) def _timer(*args, **kwargs): t1 = time.time() result = func(*args, **kwargs) t2 = time.time() t_spend = round(t2 - t1, 2) timer.log.debug('执行[ {} ]方法用时 {} 秒'.format(func.__name__, t_spend)) return result return _timer
class FunctionResultCacher: logger = LogManager('FunctionResultChche').get_logger_and_add_handlers() func_result_dict = {} """ { (f1,(1,2,3,4)):(10,1532066199.739), (f2,(5,6,7,8)):(26,1532066211.645), } """ @classmethod def cached_function_result_for_a_time(cls, cache_time: float): """ 函数的结果缓存一段时间装饰器,不要装饰在返回结果是超大字符串或者其他占用大内存的数据结构上的函数上面。 :param cache_time :缓存的时间 :type cache_time : float """ def _cached_function_result_for_a_time(fun): @wraps(fun) def __cached_function_result_for_a_time(*args, **kwargs): # print(cls.func_result_dict) # if len(cls.func_result_dict) > 1024: if sys.getsizeof(cls.func_result_dict) > 100 * 1000 * 1000: cls.func_result_dict.clear() key = cls._make_arguments_to_key(args, kwargs) if (fun, key) in cls.func_result_dict and time.time() - cls.func_result_dict[(fun, key)][1] < cache_time: return cls.func_result_dict[(fun, key)][0] else: cls.logger.debug('函数 [{}] 此次不能使用缓存'.format(fun.__name__)) result = fun(*args, **kwargs) cls.func_result_dict[(fun, key)] = (result, time.time()) return result return __cached_function_result_for_a_time return _cached_function_result_for_a_time @staticmethod def _make_arguments_to_key(args, kwds): key = args if kwds: sorted_items = sorted(kwds.items()) for item in sorted_items: key += item return key # 元祖可以相加。
def where_is_it_called(func): """一个装饰器,被装饰的函数,如果被调用,将记录一条日志,记录函数被什么文件的哪一行代码所调用""" if not hasattr(where_is_it_called, 'log'): where_is_it_called.log = LogManager( 'where_is_it_called').get_logger_and_add_handlers() # noinspection PyProtectedMember @wraps(func) def _where_is_it_called(*args, **kwargs): # 获取被调用函数名称 # func_name = sys._getframe().f_code.co_name func_name = func.__name__ # 什么函数调用了此函数 which_fun_call_this = sys._getframe(1).f_code.co_name # NOQA # 获取被调用函数在被调用时所处代码行数 line = sys._getframe().f_back.f_lineno # 获取被调用函数所在模块文件名 file_name = sys._getframe(1).f_code.co_filename # noinspection PyPep8 where_is_it_called.log.debug( f'文件[{func.__code__.co_filename}]的第[{func.__code__.co_firstlineno}]行即模块 [{func.__module__}] 中的方法 [{func_name}] 正在被文件 [{file_name}] 中的' f'方法 [{which_fun_call_this}] 中的第 [{line}] 行处调用,传入的参数为[{args},{kwargs}]' ) try: t0 = time.time() result = func(*args, **kwargs) result_raw = result t_spend = round(time.time() - t0, 2) if isinstance(result, dict): result = json.dumps(result) if len(str(result)) > 200: result = str(result)[0:200] + ' 。。。。。。 ' where_is_it_called.log.debug( '执行函数[{}]消耗的时间是{}秒,返回的结果是 --> '.format(func_name, t_spend) + str(result)) return result_raw except Exception as e: where_is_it_called.log.debug('执行函数{},发生错误'.format(func_name)) where_is_it_called.log.exception(e) raise e return _where_is_it_called
# -*- coding: utf-8 -*- # @Author : ydf # @Time : 2019/8/8 0008 14:57 import time from function_scheduling_distributed_framework import get_consumer from function_scheduling_distributed_framework.utils import LogManager from test_frame.my_patch_frame_config import do_patch_frame_config do_patch_frame_config() logger = LogManager('test.f2').get_logger_and_add_handlers( formatter_template=7) def f2(a, b): logger.info(f'消费此消息 {a} + {b} 中。。。。。') time.sleep(10) # 模拟做某事需要阻塞10秒种,必须用并发绕过此阻塞。 logger.info(f'计算 {a} + {b} 得到的结果是 {a + b}') # 把消费的函数名传给consuming_function,就这么简单。 consumer = get_consumer( 'queue_test2', consuming_function=f2, broker_kind=6) # 通过设置broker_kind,一键切换中间件为mq或redis等7种中间件或包。 if __name__ == '__main__': consumer.start_consuming_message()
# -*- coding: utf-8 -*- # @Author : ydf # @Time : 2019/8/8 0008 13:05 import json import sqlite3 import persistqueue from function_scheduling_distributed_framework import frame_config from function_scheduling_distributed_framework.publishers.base_publisher import AbstractPublisher from function_scheduling_distributed_framework.utils import LogManager LogManager('persistqueue').get_logger_and_add_handlers(10) # noinspection PyProtectedMember class PersistQueuePublisher(AbstractPublisher): """ 使用persistqueue实现的本地持久化队列。 这个是本地持久化,支持本地多个启动的python脚本共享队列任务。与LocalPythonQueuePublisher相比,不会随着python解释器退出,导致任务丢失。 """ # noinspection PyAttributeOutsideInit def custom_init(self): # noinspection PyShadowingNames def _my_new_db_connection(self, path, multithreading, timeout): # 主要是改了sqlite文件后缀,方便pycharm识别和打开。 # noinspection PyUnusedLocal conn = None if path == self._MEMORY: conn = sqlite3.connect(path, check_same_thread=not multithreading)
class AbstractPublisher(LoggerLevelSetterMixin, metaclass=abc.ABCMeta, ): has_init_broker = 0 def __init__(self, queue_name, log_level_int=10, logger_prefix='', is_add_file_handler=True, clear_queue_within_init=False, is_add_publish_time=True, consuming_function: callable = None): """ :param queue_name: :param log_level_int: :param logger_prefix: :param is_add_file_handler: :param clear_queue_within_init: :param is_add_publish_time:是否添加发布时间,以后废弃,都添加。 :param consuming_function:消费函数,为了做发布时候的函数入参校验用的,如果不传则不做发布任务的校验, 例如add 函数接收x,y入参,你推送{"x":1,"z":3}就是不正确的,函数不接受z参数。 """ self._queue_name = queue_name if logger_prefix != '': logger_prefix += '--' logger_name = f'{logger_prefix}{self.__class__.__name__}--{queue_name}' self.logger = LogManager(logger_name).get_logger_and_add_handlers(log_level_int, log_filename=f'{logger_name}.log' if is_add_file_handler else None) # self.publish_params_checker = PublishParamsChecker(consuming_function) if consuming_function else None # self.rabbit_client = RabbitMqFactory(is_use_rabbitpy=is_use_rabbitpy).get_rabbit_cleint() # self.channel = self.rabbit_client.creat_a_channel() # self.queue = self.channel.queue_declare(queue=queue_name, durable=True) self._lock_for_count = Lock() self._current_time = None self.count_per_minute = None self._init_count() self.custom_init() self.logger.info(f'{self.__class__} 被实例化了') self.publish_msg_num_total = 0 self._is_add_publish_time = is_add_publish_time self.__init_time = time.time() atexit.register(self.__at_exit) if clear_queue_within_init: self.clear() def set_is_add_publish_time(self, is_add_publish_time=True): self._is_add_publish_time = is_add_publish_time return self def _init_count(self): with self._lock_for_count: self._current_time = time.time() self.count_per_minute = 0 def custom_init(self): pass def publish(self, msg: typing.Union[str, dict], priority_control_config: PriorityConsumingControlConfig = None): if isinstance(msg, str): msg = json.loads(msg) if self.publish_params_checker: self.publish_params_checker.check_params(msg) task_id = f'{self._queue_name}_result:{uuid.uuid4()}' msg['extra'] = extra_params = {'task_id': task_id, 'publish_time': round(time.time(), 4), 'publish_time_format': time.strftime('%Y-%m-%d %H:%M:%S')} if priority_control_config: extra_params.update(priority_control_config.to_dict()) t_start = time.time() decorators.handle_exception(retry_times=10, is_throw_error=True, time_sleep=0.1)(self.concrete_realization_of_publish)(json.dumps(msg, ensure_ascii=False)) self.logger.debug(f'向{self._queue_name} 队列,推送消息 耗时{round(time.time() - t_start, 4)}秒 {msg}') with self._lock_for_count: self.count_per_minute += 1 self.publish_msg_num_total += 1 if time.time() - self._current_time > 10: self.logger.info(f'10秒内推送了 {self.count_per_minute} 条消息,累计推送了 {self.publish_msg_num_total} 条消息到 {self._queue_name} 中') self._init_count() return RedisAsyncResult(task_id) @abc.abstractmethod def concrete_realization_of_publish(self, msg): raise NotImplementedError @abc.abstractmethod def clear(self): raise NotImplementedError @abc.abstractmethod def get_message_count(self): raise NotImplementedError @abc.abstractmethod def close(self): raise NotImplementedError def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close() self.logger.warning(f'with中自动关闭publisher连接,累计推送了 {self.publish_msg_num_total} 条消息 ') def __at_exit(self): self.logger.warning(f'程序关闭前,{round(time.time() - self.__init_time)} 秒内,累计推送了 {self.publish_msg_num_total} 条消息 到 {self._queue_name} 中')
# @Time : 2019/8/8 0008 14:57 import gevent.monkey; # gevent.monkey.patch_all() import time import random from function_scheduling_distributed_framework import get_consumer, AbstractConsumer from function_scheduling_distributed_framework.consumers.base_consumer import ConsumersManager, FunctionResultStatusPersistanceConfig from function_scheduling_distributed_framework.utils import LogManager from test_frame.my_patch_frame_config import do_patch_frame_config do_patch_frame_config() logger = LogManager('test_consume').get_logger_and_add_handlers() class RandomError(Exception): pass def add(a, b): logger.info(f'消费此消息 {a} + {b} 中。。。。。') time.sleep(random.randint(3, 5)) # 模拟做某事需要阻塞10秒种,必须用并发绕过此阻塞。 if random.randint(4, 6) == 5: raise RandomError('演示随机出错') logger.info(f'计算 {a} + {b} 得到的结果是 {a + b}') return a + b
def check_gevent_monkey_patch(raise_exc=True): if not monkey.is_module_patched('socket'): # 随便选一个检测标志 if raise_exc: warnings.warn( f'检测到 你还没有打gevent包的猴子补丁,请在所运行的起始脚本第一行写上 【import gevent.monkey;gevent.monkey.patch_all()】 这句话。' ) raise Exception( f'检测到 你还没有打gevent包的猴子补丁,请在所运行的起始脚本第一行写上 【import gevent.monkey;gevent.monkey.patch_all()】 这句话。' ) else: return 1 logger_gevent_timeout_deco = LogManager( 'gevent_timeout_deco').get_logger_and_add_handlers() def gevent_timeout_deco(timeout_t): def _gevent_timeout_deco(f): def __gevent_timeout_deceo(*args, **kwargs): timeout = gevent.Timeout(timeout_t, ) timeout.start() result = None try: result = f(*args, **kwargs) except gevent.Timeout as t: logger_gevent_timeout_deco.error(f'函数 {f} 运行超过了 {timeout_t} 秒') if t is not timeout: nb_print(t) # raise # not my timeout
def __init__( self, queue_name, *, consuming_function: Callable = None, function_timeout=0, threads_num=50, specify_threadpool=None, concurrent_mode=1, max_retry_times=3, log_level=10, is_print_detail_exception=True, msg_schedule_time_intercal=0.0, msg_expire_senconds=0, logger_prefix='', create_logger_file=True, do_task_filtering=False, is_consuming_function_use_multi_params=True, is_do_not_run_by_specify_time_effect=False, do_not_run_by_specify_time=('10:00:00', '22:00:00'), schedule_tasks_on_main_thread=False, function_result_status_persistance_conf=FunctionResultStatusPersistanceConfig( False, False, 7 * 24 * 3600)): """ :param queue_name: :param consuming_function: 处理消息的函数。 :param function_timeout : 超时秒数,函数运行超过这个时间,则自动杀死函数。为0是不限制。 :param threads_num: :param specify_threadpool:使用指定的线程池,可以多个消费者共使用一个线程池,不为None时候。threads_num失效 :param concurrent_mode:并发模式,暂时支持 线程 、gevent、eventlet三种模式。 1线程 2 gevent 3 evenlet :param max_retry_times: :param log_level: :param is_print_detail_exception: :param msg_schedule_time_intercal:消息调度的时间间隔,用于控频 :param logger_prefix: 日志前缀,可使不同的消费者生成不同的日志 :param create_logger_file : 是否创建文件日志 :param do_task_filtering :是否执行基于函数参数的任务过滤 :is_consuming_function_use_multi_params 函数的参数是否是传统的多参数,不为单个body字典表示多个参数。 :param is_do_not_run_by_specify_time_effect :是否使不运行的时间段生效 :param do_not_run_by_specify_time :不运行的时间段 :param schedule_tasks_on_main_thread :直接在主线程调度任务,意味着不能直接在当前主线程同时开启两个消费者。 :function_result_status_persistance_conf :配置。是否保存函数的入参,运行结果和运行状态到mongodb。这一步用于后续的参数追溯, 任务统计和web展示,需要安装mongo。 """ ConsumersManager.consumers_queue__info_map[ queue_name] = current_queue__info_dict = copy.copy(locals()) current_queue__info_dict['consuming_function'] = str( consuming_function) # consuming_function.__name__ current_queue__info_dict[ 'function_result_status_persistance_conf'] = function_result_status_persistance_conf.to_dict( ) current_queue__info_dict.pop('self') current_queue__info_dict['broker_kind'] = self.__class__.BROKER_KIND current_queue__info_dict['class_name'] = self.__class__.__name__ concurrent_name = ConsumersManager.get_concurrent_name_by_concurrent_mode( concurrent_mode) current_queue__info_dict['concurrent_mode_name'] = concurrent_name self._queue_name = queue_name self.queue_name = queue_name # 可以换成公有的,免得外部访问有警告。 self.consuming_function = consuming_function self._function_timeout = function_timeout self._threads_num = threads_num self._specify_threadpool = specify_threadpool self._threadpool = None # 单独加一个检测消息数量和心跳的线程 self._concurrent_mode = concurrent_mode self._max_retry_times = max_retry_times self._is_print_detail_exception = is_print_detail_exception self._msg_schedule_time_intercal = msg_schedule_time_intercal if msg_schedule_time_intercal > 0.001 else 0.001 self._msg_expire_senconds = msg_expire_senconds if self._concurrent_mode not in (1, 2, 3): raise ValueError('设置的并发模式不正确') self._concurrent_mode_dispatcher = ConcurrentModeDispatcher(self) self._logger_prefix = logger_prefix self._log_level = log_level if logger_prefix != '': logger_prefix += '--' logger_name = f'{logger_prefix}{self.__class__.__name__}--{concurrent_name}--{queue_name}' # nb_print(logger_name) self.logger = LogManager(logger_name).get_logger_and_add_handlers( log_level, log_filename=f'{logger_name}.log' if create_logger_file else None) self.logger.info(f'{self.__class__} 被实例化') self._do_task_filtering = do_task_filtering self._redis_filter_key_name = f'filter:{queue_name}' self._redis_filter = RedisFilter(self._redis_filter_key_name) self._is_consuming_function_use_multi_params = is_consuming_function_use_multi_params self._lock_for_pika = Lock() self._execute_task_times_every_minute = 0 # 每分钟执行了多少次任务。 self._lock_for_count_execute_task_times_every_minute = Lock() self._current_time_for_execute_task_times_every_minute = time.time() self._msg_num_in_broker = 0 self._last_timestamp_when_has_task_in_queue = 0 self._last_timestamp_print_msg_num = 0 self._is_do_not_run_by_specify_time_effect = is_do_not_run_by_specify_time_effect self._do_not_run_by_specify_time = do_not_run_by_specify_time # 可以设置在指定的时间段不运行。 self._schedule_tasks_on_main_thread = schedule_tasks_on_main_thread self._result_persistence_helper = ResultPersistenceHelper( function_result_status_persistance_conf, queue_name) self.stop_flag = False self._publisher_of_same_queue = None self.custom_init()
class AbstractConsumer( LoggerLevelSetterMixin, metaclass=abc.ABCMeta, ): time_interval_for_check_do_not_run_time = 60 BROKER_KIND = None @property @decorators.synchronized def publisher_of_same_queue(self): if not self._publisher_of_same_queue: self._publisher_of_same_queue = get_publisher( self._queue_name, broker_kind=self.BROKER_KIND) if self._msg_expire_senconds: self._publisher_of_same_queue.set_is_add_publish_time() return self._publisher_of_same_queue @classmethod def join_shedual_task_thread(cls): """ :return: """ """ def ff(): RabbitmqConsumer('queue_test', consuming_function=f3, threads_num=20, msg_schedule_time_intercal=2, log_level=10, logger_prefix='yy平台消费', is_consuming_function_use_multi_params=True).start_consuming_message() RabbitmqConsumer('queue_test2', consuming_function=f4, threads_num=20, msg_schedule_time_intercal=4, log_level=10, logger_prefix='zz平台消费', is_consuming_function_use_multi_params=True).start_consuming_message() AbstractConsumer.join_shedual_task_thread() # 如果开多进程启动消费者,在linux上需要这样写下这一行。 if __name__ == '__main__': [Process(target=ff).start() for _ in range(4)] """ ConsumersManager.join_all_consumer_shedual_task_thread() def __init__( self, queue_name, *, consuming_function: Callable = None, function_timeout=0, threads_num=50, specify_threadpool=None, concurrent_mode=1, max_retry_times=3, log_level=10, is_print_detail_exception=True, msg_schedule_time_intercal=0.0, msg_expire_senconds=0, logger_prefix='', create_logger_file=True, do_task_filtering=False, is_consuming_function_use_multi_params=True, is_do_not_run_by_specify_time_effect=False, do_not_run_by_specify_time=('10:00:00', '22:00:00'), schedule_tasks_on_main_thread=False, function_result_status_persistance_conf=FunctionResultStatusPersistanceConfig( False, False, 7 * 24 * 3600)): """ :param queue_name: :param consuming_function: 处理消息的函数。 :param function_timeout : 超时秒数,函数运行超过这个时间,则自动杀死函数。为0是不限制。 :param threads_num: :param specify_threadpool:使用指定的线程池,可以多个消费者共使用一个线程池,不为None时候。threads_num失效 :param concurrent_mode:并发模式,暂时支持 线程 、gevent、eventlet三种模式。 1线程 2 gevent 3 evenlet :param max_retry_times: :param log_level: :param is_print_detail_exception: :param msg_schedule_time_intercal:消息调度的时间间隔,用于控频 :param logger_prefix: 日志前缀,可使不同的消费者生成不同的日志 :param create_logger_file : 是否创建文件日志 :param do_task_filtering :是否执行基于函数参数的任务过滤 :is_consuming_function_use_multi_params 函数的参数是否是传统的多参数,不为单个body字典表示多个参数。 :param is_do_not_run_by_specify_time_effect :是否使不运行的时间段生效 :param do_not_run_by_specify_time :不运行的时间段 :param schedule_tasks_on_main_thread :直接在主线程调度任务,意味着不能直接在当前主线程同时开启两个消费者。 :function_result_status_persistance_conf :配置。是否保存函数的入参,运行结果和运行状态到mongodb。这一步用于后续的参数追溯, 任务统计和web展示,需要安装mongo。 """ ConsumersManager.consumers_queue__info_map[ queue_name] = current_queue__info_dict = copy.copy(locals()) current_queue__info_dict['consuming_function'] = str( consuming_function) # consuming_function.__name__ current_queue__info_dict[ 'function_result_status_persistance_conf'] = function_result_status_persistance_conf.to_dict( ) current_queue__info_dict.pop('self') current_queue__info_dict['broker_kind'] = self.__class__.BROKER_KIND current_queue__info_dict['class_name'] = self.__class__.__name__ concurrent_name = ConsumersManager.get_concurrent_name_by_concurrent_mode( concurrent_mode) current_queue__info_dict['concurrent_mode_name'] = concurrent_name self._queue_name = queue_name self.queue_name = queue_name # 可以换成公有的,免得外部访问有警告。 self.consuming_function = consuming_function self._function_timeout = function_timeout self._threads_num = threads_num self._specify_threadpool = specify_threadpool self._threadpool = None # 单独加一个检测消息数量和心跳的线程 self._concurrent_mode = concurrent_mode self._max_retry_times = max_retry_times self._is_print_detail_exception = is_print_detail_exception self._msg_schedule_time_intercal = msg_schedule_time_intercal if msg_schedule_time_intercal > 0.001 else 0.001 self._msg_expire_senconds = msg_expire_senconds if self._concurrent_mode not in (1, 2, 3): raise ValueError('设置的并发模式不正确') self._concurrent_mode_dispatcher = ConcurrentModeDispatcher(self) self._logger_prefix = logger_prefix self._log_level = log_level if logger_prefix != '': logger_prefix += '--' logger_name = f'{logger_prefix}{self.__class__.__name__}--{concurrent_name}--{queue_name}' # nb_print(logger_name) self.logger = LogManager(logger_name).get_logger_and_add_handlers( log_level, log_filename=f'{logger_name}.log' if create_logger_file else None) self.logger.info(f'{self.__class__} 被实例化') self._do_task_filtering = do_task_filtering self._redis_filter_key_name = f'filter:{queue_name}' self._redis_filter = RedisFilter(self._redis_filter_key_name) self._is_consuming_function_use_multi_params = is_consuming_function_use_multi_params self._lock_for_pika = Lock() self._execute_task_times_every_minute = 0 # 每分钟执行了多少次任务。 self._lock_for_count_execute_task_times_every_minute = Lock() self._current_time_for_execute_task_times_every_minute = time.time() self._msg_num_in_broker = 0 self._last_timestamp_when_has_task_in_queue = 0 self._last_timestamp_print_msg_num = 0 self._is_do_not_run_by_specify_time_effect = is_do_not_run_by_specify_time_effect self._do_not_run_by_specify_time = do_not_run_by_specify_time # 可以设置在指定的时间段不运行。 self._schedule_tasks_on_main_thread = schedule_tasks_on_main_thread self._result_persistence_helper = ResultPersistenceHelper( function_result_status_persistance_conf, queue_name) self.stop_flag = False self._publisher_of_same_queue = None self.custom_init() @property @decorators.synchronized def threadpool(self): return self._concurrent_mode_dispatcher.build_pool() def custom_init(self): pass def keep_circulating(self, time_sleep=0.001, exit_if_function_run_sucsess=False, is_display_detail_exception=True): """间隔一段时间,一直循环运行某个方法的装饰器 :param time_sleep :循环的间隔时间 :param is_display_detail_exception :param exit_if_function_run_sucsess :如果成功了就退出循环 """ def _keep_circulating(func): # noinspection PyBroadException @wraps(func) def __keep_circulating(*args, **kwargs): while 1: if self.stop_flag: break try: result = func(*args, **kwargs) if exit_if_function_run_sucsess: return result except Exception as e: msg = func.__name__ + ' 运行出错\n ' + traceback.format_exc( limit=10) if is_display_detail_exception else str( e) self.logger.error(msg) finally: time.sleep(time_sleep) return __keep_circulating return _keep_circulating def start_consuming_message(self): self.logger.warning(f'开始消费 {self._queue_name} 中的消息') # self.threadpool.submit(decorators.keep_circulating(20)(self.check_heartbeat_and_message_count)) self.threadpool.submit( self.keep_circulating(20)(self.check_heartbeat_and_message_count)) if self._schedule_tasks_on_main_thread: # decorators.keep_circulating(1)(self._shedual_task)() self.keep_circulating(1)(self._shedual_task)() else: # t = Thread(target=decorators.keep_circulating(1)(self._shedual_task)) self._concurrent_mode_dispatcher.schedulal_task_with_no_block() @abc.abstractmethod def _shedual_task(self): raise NotImplementedError def _run( self, kw: dict, ): if self._do_task_filtering and self._redis_filter.check_value_exists( kw['body']): # 对函数的参数进行检查,过滤已经执行过并且成功的任务。 self.logger.info( f'redis的 [{self._redis_filter_key_name}] 键 中 过滤任务 {kw["body"]}' ) self._confirm_consume(kw) return with self._lock_for_count_execute_task_times_every_minute: self._execute_task_times_every_minute += 1 if time.time( ) - self._current_time_for_execute_task_times_every_minute > 60: self.logger.info( f'一分钟内执行了 {self._execute_task_times_every_minute} 次函数 [ {self.consuming_function.__name__} ] ,预计' f'还需要 {time_util.seconds_to_hour_minute_second(self._msg_num_in_broker / self._execute_task_times_every_minute * 60)} 时间' f'才能执行完成 {self._msg_num_in_broker}个剩余的任务 ') self._current_time_for_execute_task_times_every_minute = time.time( ) self._execute_task_times_every_minute = 0 self._run_consuming_function_with_confirm_and_retry( kw, current_retry_times=0, function_result_status=FunctionResultStatus( self.queue_name, self.consuming_function.__name__, kw['body'])) def _run_consuming_function_with_confirm_and_retry( self, kw: dict, current_retry_times, function_result_status: FunctionResultStatus): if current_retry_times < self._max_retry_times: function_result_status.run_times += 1 # noinspection PyBroadException t_start = time.time() try: function_run = self.consuming_function if self._function_timeout == 0 else self._concurrent_mode_dispatcher.timeout_deco( self._function_timeout)(self.consuming_function) if self._is_consuming_function_use_multi_params: # 消费函数使用传统的多参数形式 function_result_status.result = function_run( **delete_keys_and_return_new_dict( kw['body'], ['publish_time', 'publish_time_format'])) else: function_result_status.result = function_run( delete_keys_and_return_new_dict( kw['body'], ['publish_time', 'publish_time_format' ])) # 消费函数使用单个参数,参数自身是一个字典,由键值对表示各个参数。 function_result_status.success = True self._confirm_consume(kw) if self._do_task_filtering: self._redis_filter.add_a_value( kw['body']) # 函数执行成功后,添加函数的参数排序后的键值对字符串到set中。 self.logger.debug( f' 函数 {self.consuming_function.__name__} ' f'第{current_retry_times + 1}次 运行, 正确了,函数运行时间是 {round(time.time() - t_start, 4)} 秒,入参是 【 {kw["body"]} 】。 {ConsumersManager.get_concurrent_info()}' ) except Exception as e: if isinstance( e, (PyMongoError, ExceptionForRequeue) ): # mongo经常维护备份时候插入不了或挂了,或者自己主动抛出一个ExceptionForRequeue类型的错误会重新入队,不受指定重试次数逇约束。 self.logger.critical( f'函数 [{self.consuming_function.__name__}] 中发生错误 {type(e)} {e}' ) return self._requeue(kw) self.logger.error( f'函数 {self.consuming_function.__name__} 第{current_retry_times + 1}次发生错误,' f'函数运行时间是 {round(time.time() - t_start, 4)} 秒,\n 入参是 【 {kw["body"]} 】 \n 原因是 {type(e)} {e} ', exc_info=self._is_print_detail_exception) function_result_status.exception = f'{e.__class__.__name__} {str(e)}' self._run_consuming_function_with_confirm_and_retry( kw, current_retry_times + 1, function_result_status) else: self.logger.critical( f'函数 {self.consuming_function.__name__} 达到最大重试次数 {self._max_retry_times} 后,仍然失败, 入参是 【 {kw["body"]} 】' ) self._confirm_consume(kw) # 错得超过指定的次数了,就确认消费了。 self._result_persistence_helper.save_function_result_to_mongo( function_result_status) @abc.abstractmethod def _confirm_consume(self, kw): """确认消费""" raise NotImplementedError # noinspection PyUnusedLocal def check_heartbeat_and_message_count(self): self._msg_num_in_broker = self.publisher_of_same_queue.get_message_count( ) if time.time() - self._last_timestamp_print_msg_num > 60: self.logger.info( f'[{self._queue_name}] 队列中还有 [{self._msg_num_in_broker}] 个任务') self._last_timestamp_print_msg_num = time.time() if self._msg_num_in_broker != 0: self._last_timestamp_when_has_task_in_queue = time.time() return self._msg_num_in_broker @abc.abstractmethod def _requeue(self, kw): """重新入队""" raise NotImplementedError def _submit_task(self, kw): if self._judge_is_daylight(): self._requeue(kw) time.sleep(self.time_interval_for_check_do_not_run_time) return if self._msg_expire_senconds != 0 and time.time( ) - self._msg_expire_senconds > kw['body']['publish_time']: self.logger.warning( f'消息发布时戳是 {kw["body"]["publish_time"]} {kw["body"].get("publish_time_format", "")},距离现在 {round(time.time() - kw["body"]["publish_time"], 4)} 秒 ,' f'超过了指定的 {self._msg_expire_senconds} 秒,丢弃任务') self._confirm_consume(kw) return 0 self.threadpool.submit(self._run, kw) time.sleep(self._msg_schedule_time_intercal) @decorators.FunctionResultCacher.cached_function_result_for_a_time(120) def _judge_is_daylight(self): if self._is_do_not_run_by_specify_time_effect and self._do_not_run_by_specify_time[ 0] < time_util.DatetimeConverter( ).time_str < self._do_not_run_by_specify_time[1]: self.logger.warning( f'现在时间是 {time_util.DatetimeConverter()} ,现在时间是在 {self._do_not_run_by_specify_time} 之间,不运行' ) return True def __str__(self): return f'队列为 {self.queue_name} 函数为 {self.consuming_function} 的消费者'
# -*- coding: utf-8 -*- # @Author : ydf # @Time : 2019/8/8 0008 13:32 import json from gnsq import Consumer, Message from function_scheduling_distributed_framework import frame_config from function_scheduling_distributed_framework.consumers.base_consumer import AbstractConsumer from function_scheduling_distributed_framework.utils import LogManager LogManager('gnsq').get_logger_and_add_handlers(20) class NsqConsumer(AbstractConsumer): """ nsq作为中间件实现的。 """ BROKER_KIND = 7 def _shedual_task(self): consumer = Consumer( self._queue_name, 'frame_channel', frame_config.NSQD_TCP_ADDRESSES, max_in_flight=self._threads_num, heartbeat_interval=600, timeout=600, ) @consumer.on_message.connect def handler(consumerx: Consumer, message: Message):
import time from function_scheduling_distributed_framework.utils import RedisMixin, LogManager from test_frame.test_frame_using_thread.test_celery.test_celery_app import add LogManager().get_logger_and_add_handlers() RedisMixin().redis_db_frame.delete('queue_add') t1 = time.time() for i in range(100): # print('生产者添加任务') print(i) result = add.delay(i, i * 2) print(type(result)) # print(result.get()) print(time.time() - t1) print('任务添加完成') """ import celery celery.result.AsyncResult """
# -*- coding: utf-8 -*- # @Author : ydf # @Time : 2019/8/8 0008 13:27 import json from pika.exceptions import AMQPError from function_scheduling_distributed_framework.consumers.base_consumer import AbstractConsumer from function_scheduling_distributed_framework.utils import LogManager from function_scheduling_distributed_framework.utils.rabbitmq_factory import RabbitMqFactory LogManager('pika').get_logger_and_add_handlers(20) class RabbitmqConsumer(AbstractConsumer): """ 使用pika包实现的。 """ BROKER_KIND = 0 def _shedual_task(self): channel = RabbitMqFactory(is_use_rabbitpy=0).get_rabbit_cleint().creat_a_channel() channel.queue_declare(queue=self._queue_name, durable=True) channel.basic_qos(prefetch_count=self._threads_num) def callback(ch, method, properties, body): body = body.decode() self.logger.debug(f'从rabbitmq的 [{self._queue_name}] 队列中 取出的消息是: {body}') body = json.loads(body) kw = {'ch': ch, 'method': method, 'properties': properties, 'body': body} self._submit_task(kw)
class AbstractConsumer(LoggerLevelSetterMixin, metaclass=abc.ABCMeta, ): time_interval_for_check_do_not_run_time = 60 BROKER_KIND = None @property @decorators.synchronized def publisher_of_same_queue(self): if not self._publisher_of_same_queue: self._publisher_of_same_queue = get_publisher(self._queue_name, consuming_function=self.consuming_function, broker_kind=self.BROKER_KIND) if self._msg_expire_senconds: self._publisher_of_same_queue.set_is_add_publish_time() return self._publisher_of_same_queue def bulid_a_new_publisher_of_same_queue(self): return get_publisher(self._queue_name, broker_kind=self.BROKER_KIND) @classmethod def join_shedual_task_thread(cls): """ :return: """ """ def ff(): RabbitmqConsumer('queue_test', consuming_function=f3, threads_num=20, msg_schedule_time_intercal=2, log_level=10, logger_prefix='yy平台消费', is_consuming_function_use_multi_params=True).start_consuming_message() RabbitmqConsumer('queue_test2', consuming_function=f4, threads_num=20, msg_schedule_time_intercal=4, log_level=10, logger_prefix='zz平台消费', is_consuming_function_use_multi_params=True).start_consuming_message() AbstractConsumer.join_shedual_task_thread() # 如果开多进程启动消费者,在linux上需要这样写下这一行。 if __name__ == '__main__': [Process(target=ff).start() for _ in range(4)] """ ConsumersManager.join_all_consumer_shedual_task_thread() # noinspection PyProtectedMember def __init__(self, queue_name, *, consuming_function: Callable = None, function_timeout=0, threads_num=50, concurrent_num=50, specify_threadpool=None, concurrent_mode=1, max_retry_times=3, log_level=10, is_print_detail_exception=True, msg_schedule_time_intercal=0.0, qps=0, msg_expire_senconds=0, logger_prefix='', create_logger_file=True, do_task_filtering=False, task_filtering_expire_seconds=0, is_consuming_function_use_multi_params=True, is_do_not_run_by_specify_time_effect=False, do_not_run_by_specify_time=('10:00:00', '22:00:00'), schedule_tasks_on_main_thread=False, function_result_status_persistance_conf=FunctionResultStatusPersistanceConfig( False, False, 7 * 24 * 3600), is_using_rpc_mode=False): """ :param queue_name: :param consuming_function: 处理消息的函数。 :param function_timeout : 超时秒数,函数运行超过这个时间,则自动杀死函数。为0是不限制。 :param threads_num:线程或协程并发数量 :param concurrent_num:并发数量,这个覆盖threads_num。以后会废弃threads_num参数,因为表达的意思不太准确,不一定是线程模式并发。 :param specify_threadpool:使用指定的线程池/携程池,可以多个消费者共使用一个线程池,不为None时候。threads_num失效 :param concurrent_mode:并发模式,暂时支持 线程 、gevent、eventlet三种模式。 1线程 2 gevent 3 evenlet :param max_retry_times: :param log_level: :param is_print_detail_exception: :param msg_schedule_time_intercal:消息调度的时间间隔,用于控频 :param qps:指定1秒内的函数执行次数,qps会覆盖msg_schedule_time_intercal,一会废弃msg_schedule_time_intercal这个参数。 :param logger_prefix: 日志前缀,可使不同的消费者生成不同的日志 :param create_logger_file : 是否创建文件日志 :param do_task_filtering :是否执行基于函数参数的任务过滤 :param task_filtering_expire_seconds:任务过滤的失效期,为0则永久性过滤任务。例如设置过滤过期时间是1800秒 , 30分钟前发布过1 + 2 的任务,现在仍然执行, 如果是30分钟以内发布过这个任务,则不执行1 + 2,现在把这个逻辑集成到框架,一般用于接口价格缓存。 :is_consuming_function_use_multi_params 函数的参数是否是传统的多参数,不为单个body字典表示多个参数。 :param is_do_not_run_by_specify_time_effect :是否使不运行的时间段生效 :param do_not_run_by_specify_time :不运行的时间段 :param schedule_tasks_on_main_thread :直接在主线程调度任务,意味着不能直接在当前主线程同时开启两个消费者。 :param function_result_status_persistance_conf :配置。是否保存函数的入参,运行结果和运行状态到mongodb。 这一步用于后续的参数追溯,任务统计和web展示,需要安装mongo。 :param is_using_rpc_mode 是否使用rpc模式,可以在发布端获取消费端的结果回调,但消耗一定性能,使用async_result.result时候会等待阻塞住当前线程。 """ ConsumersManager.consumers_queue__info_map[queue_name] = current_queue__info_dict = copy.copy(locals()) current_queue__info_dict['consuming_function'] = str(consuming_function) # consuming_function.__name__ current_queue__info_dict['function_result_status_persistance_conf'] = function_result_status_persistance_conf.to_dict() current_queue__info_dict.pop('self') current_queue__info_dict['broker_kind'] = self.__class__.BROKER_KIND current_queue__info_dict['class_name'] = self.__class__.__name__ concurrent_name = ConsumersManager.get_concurrent_name_by_concurrent_mode(concurrent_mode) current_queue__info_dict['concurrent_mode_name'] = concurrent_name # 方便点击跳转定位到当前解释器下所有实例化消费者的文件行,点击可跳转到该处。 # 获取被调用函数在被调用时所处代码行数 # 直接实例化相应的类和使用工厂模式来实例化相应的类,得到的消费者实际实例化的行是不一样的,希望定位到用户的代码处,而不是定位到工厂模式处。 line = sys._getframe(0).f_back.f_lineno # 获取被调用函数所在模块文件名 file_name = sys._getframe(1).f_code.co_filename if 'consumer_factory.py' in file_name: line = sys._getframe(1).f_back.f_lineno file_name = sys._getframe(2).f_code.co_filename current_queue__info_dict['where_to_instantiate'] = f'{file_name}:{line}' self._queue_name = queue_name self.queue_name = queue_name # 可以换成公有的,免得外部访问有警告。 self.consuming_function = consuming_function self._function_timeout = function_timeout self._threads_num = concurrent_num if threads_num == 50 else threads_num # concurrent参数优先,以后废弃threads_num参数。 self._specify_threadpool = specify_threadpool self._threadpool = None # 单独加一个检测消息数量和心跳的线程 self._concurrent_mode = concurrent_mode self._max_retry_times = max_retry_times self._is_print_detail_exception = is_print_detail_exception if qps != 0: msg_schedule_time_intercal = 1.0 / qps # 使用qps覆盖消息调度间隔,以qps为准,以后废弃msg_schedule_time_intercal这个参数。 self._msg_schedule_time_intercal = msg_schedule_time_intercal if msg_schedule_time_intercal > 0.001 else 0.001 self._msg_expire_senconds = msg_expire_senconds if self._concurrent_mode not in (1, 2, 3): raise ValueError('设置的并发模式不正确') self._concurrent_mode_dispatcher = ConcurrentModeDispatcher(self) self._logger_prefix = logger_prefix self._log_level = log_level if logger_prefix != '': logger_prefix += '--' logger_name = f'{logger_prefix}{self.__class__.__name__}--{concurrent_name}--{queue_name}' # nb_print(logger_name) self.logger = LogManager(logger_name).get_logger_and_add_handlers(log_level, log_filename=f'{logger_name}.log' if create_logger_file else None) # self.logger.info(f'{self.__class__} 在 {current_queue__info_dict["where_to_instantiate"]} 被实例化') sys.stdout.write(f'{time.strftime("%H:%M:%S")} "{current_queue__info_dict["where_to_instantiate"]}" \033[0;30;44m此行 实例化队列名 {current_queue__info_dict["queue_name"]} 的消费者, 类型为 {self.__class__}\033[0m\n') self._do_task_filtering = do_task_filtering self._redis_filter_key_name = f'filter_zset:{queue_name}' if task_filtering_expire_seconds else f'filter_set:{queue_name}' filter_class = RedisFilter if task_filtering_expire_seconds == 0 else RedisImpermanencyFilter self._redis_filter = filter_class(self._redis_filter_key_name, task_filtering_expire_seconds) self._is_consuming_function_use_multi_params = is_consuming_function_use_multi_params self._execute_task_times_every_minute = 0 # 每分钟执行了多少次任务。 self._lock_for_count_execute_task_times_every_minute = Lock() self._current_time_for_execute_task_times_every_minute = time.time() self._msg_num_in_broker = 0 self._last_timestamp_when_has_task_in_queue = 0 self._last_timestamp_print_msg_num = 0 self._is_do_not_run_by_specify_time_effect = is_do_not_run_by_specify_time_effect self._do_not_run_by_specify_time = do_not_run_by_specify_time # 可以设置在指定的时间段不运行。 self._schedule_tasks_on_main_thread = schedule_tasks_on_main_thread self._result_persistence_helper = ResultPersistenceHelper(function_result_status_persistance_conf, queue_name) self._is_using_rpc_mode = is_using_rpc_mode self.stop_flag = False self._publisher_of_same_queue = None self.custom_init() @property @decorators.synchronized def threadpool(self): return self._concurrent_mode_dispatcher.build_pool() def custom_init(self): pass def keep_circulating(self, time_sleep=0.001, exit_if_function_run_sucsess=False, is_display_detail_exception=True, block=True): """间隔一段时间,一直循环运行某个方法的装饰器 :param time_sleep :循环的间隔时间 :param is_display_detail_exception :param exit_if_function_run_sucsess :如果成功了就退出循环 :param block:是否阻塞在当前主线程运行。 """ def _keep_circulating(func): @wraps(func) def __keep_circulating(*args, **kwargs): # noinspection PyBroadException def ___keep_circulating(): while 1: try: result = func(*args, **kwargs) if exit_if_function_run_sucsess: return result except Exception as e: msg = func.__name__ + ' 运行出错\n ' + traceback.format_exc(limit=10) if is_display_detail_exception else str(e) self.logger.error(msg) finally: time.sleep(time_sleep) if block: return ___keep_circulating() else: threading.Thread(target=___keep_circulating, ).start() return __keep_circulating return _keep_circulating def start_consuming_message(self): self.logger.warning(f'开始消费 {self._queue_name} 中的消息') self.keep_circulating(20, block=False)(self.check_heartbeat_and_message_count)() self._redis_filter.delete_expire_filter_task_cycle() if self._schedule_tasks_on_main_thread: self.keep_circulating(1)(self._shedual_task)() else: self._concurrent_mode_dispatcher.schedulal_task_with_no_block() @abc.abstractmethod def _shedual_task(self): """ 每个子类必须实现这个的方法,完成如何从中间件取出消息,并将函数和运行参数添加到工作池。 :return: """ raise NotImplementedError def _run(self, kw: dict, ): do_task_filtering_priority = self.__get_priority_conf(kw, 'do_task_filtering') function_only_params = delete_keys_and_return_new_dict(kw['body'], ) if do_task_filtering_priority and self._redis_filter.check_value_exists(function_only_params): # 对函数的参数进行检查,过滤已经执行过并且成功的任务。 self.logger.info(f'redis的 [{self._redis_filter_key_name}] 键 中 过滤任务 {kw["body"]}') self._confirm_consume(kw) return with self._lock_for_count_execute_task_times_every_minute: self._execute_task_times_every_minute += 1 if time.time() - self._current_time_for_execute_task_times_every_minute > 60: self.logger.info( f'一分钟内执行了 {self._execute_task_times_every_minute} 次函数 [ {self.consuming_function.__name__} ] ,预计' f'还需要 {time_util.seconds_to_hour_minute_second(self._msg_num_in_broker / self._execute_task_times_every_minute * 60)} 时间' f'才能执行完成 {self._msg_num_in_broker}个剩余的任务 ') self._current_time_for_execute_task_times_every_minute = time.time() self._execute_task_times_every_minute = 0 self._run_consuming_function_with_confirm_and_retry(kw, current_retry_times=0, function_result_status=FunctionResultStatus( self.queue_name, self.consuming_function.__name__, kw['body']), do_task_filtering_priority=do_task_filtering_priority) def __get_priority_conf(self, kw: dict, broker_task_config_key: str): broker_task_config = kw['body'].get('extra', {}).get(broker_task_config_key, None) if broker_task_config is None: return getattr(self, f'_{broker_task_config_key}') else: return broker_task_config def _run_consuming_function_with_confirm_and_retry(self, kw: dict, current_retry_times, function_result_status: FunctionResultStatus, do_task_filtering_priority): function_only_params = delete_keys_and_return_new_dict(kw['body']) if current_retry_times < self.__get_priority_conf(kw, 'max_retry_times'): function_result_status.run_times += 1 # noinspection PyBroadException t_start = time.time() try: function_run = self.consuming_function if self._function_timeout == 0 else self._concurrent_mode_dispatcher.timeout_deco(self.__get_priority_conf(kw, 'function_timeout'))(self.consuming_function) if self._is_consuming_function_use_multi_params: # 消费函数使用传统的多参数形式 function_result_status.result = function_run(**function_only_params) else: function_result_status.result = function_run(function_only_params) # 消费函数使用单个参数,参数自身是一个字典,由键值对表示各个参数。 function_result_status.success = True self._confirm_consume(kw) if do_task_filtering_priority: self._redis_filter.add_a_value(function_only_params) # 函数执行成功后,添加函数的参数排序后的键值对字符串到set中。 self.logger.debug(f' 函数 {self.consuming_function.__name__} ' f'第{current_retry_times + 1}次 运行, 正确了,函数运行时间是 {round(time.time() - t_start, 4)} 秒,入参是 【 {function_only_params} 】。 {ConsumersManager.get_concurrent_info()}') except Exception as e: if isinstance(e, (PyMongoError, ExceptionForRequeue)): # mongo经常维护备份时候插入不了或挂了,或者自己主动抛出一个ExceptionForRequeue类型的错误会重新入队,不受指定重试次数逇约束。 self.logger.critical(f'函数 [{self.consuming_function.__name__}] 中发生错误 {type(e)} {e}') return self._requeue(kw) self.logger.error(f'函数 {self.consuming_function.__name__} 第{current_retry_times + 1}次发生错误,' f'函数运行时间是 {round(time.time() - t_start, 4)} 秒,\n 入参是 【 {function_only_params} 】 \n 原因是 {type(e)} {e} ', exc_info=self.__get_priority_conf(kw, 'is_print_detail_exception')) function_result_status.exception = f'{e.__class__.__name__} {str(e)}' self._run_consuming_function_with_confirm_and_retry(kw, current_retry_times + 1, function_result_status, do_task_filtering_priority) else: self.logger.critical(f'函数 {self.consuming_function.__name__} 达到最大重试次数 {self.__get_priority_conf(kw, "max_retry_times")} 后,仍然失败, 入参是 【 {function_only_params} 】') self._confirm_consume(kw) # 错得超过指定的次数了,就确认消费了。 self._result_persistence_helper.save_function_result_to_mongo(function_result_status) if self.__get_priority_conf(kw, 'is_using_rpc_mode'): # print(function_result_status.get_status_dict(without_datetime_obj=True)) with RedisMixin().redis_db_frame.pipeline() as p: # RedisMixin().redis_db_frame.lpush(kw['body']['extra']['task_id'], json.dumps(function_result_status.get_status_dict(without_datetime_obj=True))) # RedisMixin().redis_db_frame.expire(kw['body']['extra']['task_id'], 600) p.lpush(kw['body']['extra']['task_id'], json.dumps(function_result_status.get_status_dict(without_datetime_obj=True))) p.expire(kw['body']['extra']['task_id'], 600) p.execute() @abc.abstractmethod def _confirm_consume(self, kw): """确认消费""" raise NotImplementedError def check_heartbeat_and_message_count(self): self._msg_num_in_broker = self.publisher_of_same_queue.get_message_count() if time.time() - self._last_timestamp_print_msg_num > 60: self.logger.info(f'[{self._queue_name}] 队列中还有 [{self._msg_num_in_broker}] 个任务') self._last_timestamp_print_msg_num = time.time() if self._msg_num_in_broker != 0: self._last_timestamp_when_has_task_in_queue = time.time() return self._msg_num_in_broker @abc.abstractmethod def _requeue(self, kw): """重新入队""" raise NotImplementedError def _submit_task(self, kw): if self._judge_is_daylight(): self._requeue(kw) time.sleep(self.time_interval_for_check_do_not_run_time) return publish_time = _get_publish_time(kw['body']) msg_expire_senconds_priority = self.__get_priority_conf(kw, 'msg_expire_senconds') if msg_expire_senconds_priority != 0 and time.time() - msg_expire_senconds_priority > publish_time: self.logger.warning(f'消息发布时戳是 {publish_time} {kw["body"].get("publish_time_format", "")},距离现在 {round(time.time() - publish_time, 4)} 秒 ,' f'超过了指定的 {msg_expire_senconds_priority} 秒,丢弃任务') self._confirm_consume(kw) return 0 self.threadpool.submit(self._run, kw) time.sleep(self._msg_schedule_time_intercal) @decorators.FunctionResultCacher.cached_function_result_for_a_time(120) def _judge_is_daylight(self): if self._is_do_not_run_by_specify_time_effect and self._do_not_run_by_specify_time[0] < time_util.DatetimeConverter().time_str < self._do_not_run_by_specify_time[1]: self.logger.warning(f'现在时间是 {time_util.DatetimeConverter()} ,现在时间是在 {self._do_not_run_by_specify_time} 之间,不运行') return True def __str__(self): return f'队列为 {self.queue_name} 函数为 {self.consuming_function} 的消费者'
def __init__(self, queue_name, *, consuming_function: Callable = None, function_timeout=0, threads_num=50, concurrent_num=50, specify_threadpool=None, concurrent_mode=1, max_retry_times=3, log_level=10, is_print_detail_exception=True, msg_schedule_time_intercal=0.0, qps=0, msg_expire_senconds=0, logger_prefix='', create_logger_file=True, do_task_filtering=False, task_filtering_expire_seconds=0, is_consuming_function_use_multi_params=True, is_do_not_run_by_specify_time_effect=False, do_not_run_by_specify_time=('10:00:00', '22:00:00'), schedule_tasks_on_main_thread=False, function_result_status_persistance_conf=FunctionResultStatusPersistanceConfig( False, False, 7 * 24 * 3600), is_using_rpc_mode=False): """ :param queue_name: :param consuming_function: 处理消息的函数。 :param function_timeout : 超时秒数,函数运行超过这个时间,则自动杀死函数。为0是不限制。 :param threads_num:线程或协程并发数量 :param concurrent_num:并发数量,这个覆盖threads_num。以后会废弃threads_num参数,因为表达的意思不太准确,不一定是线程模式并发。 :param specify_threadpool:使用指定的线程池/携程池,可以多个消费者共使用一个线程池,不为None时候。threads_num失效 :param concurrent_mode:并发模式,暂时支持 线程 、gevent、eventlet三种模式。 1线程 2 gevent 3 evenlet :param max_retry_times: :param log_level: :param is_print_detail_exception: :param msg_schedule_time_intercal:消息调度的时间间隔,用于控频 :param qps:指定1秒内的函数执行次数,qps会覆盖msg_schedule_time_intercal,一会废弃msg_schedule_time_intercal这个参数。 :param logger_prefix: 日志前缀,可使不同的消费者生成不同的日志 :param create_logger_file : 是否创建文件日志 :param do_task_filtering :是否执行基于函数参数的任务过滤 :param task_filtering_expire_seconds:任务过滤的失效期,为0则永久性过滤任务。例如设置过滤过期时间是1800秒 , 30分钟前发布过1 + 2 的任务,现在仍然执行, 如果是30分钟以内发布过这个任务,则不执行1 + 2,现在把这个逻辑集成到框架,一般用于接口价格缓存。 :is_consuming_function_use_multi_params 函数的参数是否是传统的多参数,不为单个body字典表示多个参数。 :param is_do_not_run_by_specify_time_effect :是否使不运行的时间段生效 :param do_not_run_by_specify_time :不运行的时间段 :param schedule_tasks_on_main_thread :直接在主线程调度任务,意味着不能直接在当前主线程同时开启两个消费者。 :param function_result_status_persistance_conf :配置。是否保存函数的入参,运行结果和运行状态到mongodb。 这一步用于后续的参数追溯,任务统计和web展示,需要安装mongo。 :param is_using_rpc_mode 是否使用rpc模式,可以在发布端获取消费端的结果回调,但消耗一定性能,使用async_result.result时候会等待阻塞住当前线程。 """ ConsumersManager.consumers_queue__info_map[queue_name] = current_queue__info_dict = copy.copy(locals()) current_queue__info_dict['consuming_function'] = str(consuming_function) # consuming_function.__name__ current_queue__info_dict['function_result_status_persistance_conf'] = function_result_status_persistance_conf.to_dict() current_queue__info_dict.pop('self') current_queue__info_dict['broker_kind'] = self.__class__.BROKER_KIND current_queue__info_dict['class_name'] = self.__class__.__name__ concurrent_name = ConsumersManager.get_concurrent_name_by_concurrent_mode(concurrent_mode) current_queue__info_dict['concurrent_mode_name'] = concurrent_name # 方便点击跳转定位到当前解释器下所有实例化消费者的文件行,点击可跳转到该处。 # 获取被调用函数在被调用时所处代码行数 # 直接实例化相应的类和使用工厂模式来实例化相应的类,得到的消费者实际实例化的行是不一样的,希望定位到用户的代码处,而不是定位到工厂模式处。 line = sys._getframe(0).f_back.f_lineno # 获取被调用函数所在模块文件名 file_name = sys._getframe(1).f_code.co_filename if 'consumer_factory.py' in file_name: line = sys._getframe(1).f_back.f_lineno file_name = sys._getframe(2).f_code.co_filename current_queue__info_dict['where_to_instantiate'] = f'{file_name}:{line}' self._queue_name = queue_name self.queue_name = queue_name # 可以换成公有的,免得外部访问有警告。 self.consuming_function = consuming_function self._function_timeout = function_timeout self._threads_num = concurrent_num if threads_num == 50 else threads_num # concurrent参数优先,以后废弃threads_num参数。 self._specify_threadpool = specify_threadpool self._threadpool = None # 单独加一个检测消息数量和心跳的线程 self._concurrent_mode = concurrent_mode self._max_retry_times = max_retry_times self._is_print_detail_exception = is_print_detail_exception if qps != 0: msg_schedule_time_intercal = 1.0 / qps # 使用qps覆盖消息调度间隔,以qps为准,以后废弃msg_schedule_time_intercal这个参数。 self._msg_schedule_time_intercal = msg_schedule_time_intercal if msg_schedule_time_intercal > 0.001 else 0.001 self._msg_expire_senconds = msg_expire_senconds if self._concurrent_mode not in (1, 2, 3): raise ValueError('设置的并发模式不正确') self._concurrent_mode_dispatcher = ConcurrentModeDispatcher(self) self._logger_prefix = logger_prefix self._log_level = log_level if logger_prefix != '': logger_prefix += '--' logger_name = f'{logger_prefix}{self.__class__.__name__}--{concurrent_name}--{queue_name}' # nb_print(logger_name) self.logger = LogManager(logger_name).get_logger_and_add_handlers(log_level, log_filename=f'{logger_name}.log' if create_logger_file else None) # self.logger.info(f'{self.__class__} 在 {current_queue__info_dict["where_to_instantiate"]} 被实例化') sys.stdout.write(f'{time.strftime("%H:%M:%S")} "{current_queue__info_dict["where_to_instantiate"]}" \033[0;30;44m此行 实例化队列名 {current_queue__info_dict["queue_name"]} 的消费者, 类型为 {self.__class__}\033[0m\n') self._do_task_filtering = do_task_filtering self._redis_filter_key_name = f'filter_zset:{queue_name}' if task_filtering_expire_seconds else f'filter_set:{queue_name}' filter_class = RedisFilter if task_filtering_expire_seconds == 0 else RedisImpermanencyFilter self._redis_filter = filter_class(self._redis_filter_key_name, task_filtering_expire_seconds) self._is_consuming_function_use_multi_params = is_consuming_function_use_multi_params self._execute_task_times_every_minute = 0 # 每分钟执行了多少次任务。 self._lock_for_count_execute_task_times_every_minute = Lock() self._current_time_for_execute_task_times_every_minute = time.time() self._msg_num_in_broker = 0 self._last_timestamp_when_has_task_in_queue = 0 self._last_timestamp_print_msg_num = 0 self._is_do_not_run_by_specify_time_effect = is_do_not_run_by_specify_time_effect self._do_not_run_by_specify_time = do_not_run_by_specify_time # 可以设置在指定的时间段不运行。 self._schedule_tasks_on_main_thread = schedule_tasks_on_main_thread self._result_persistence_helper = ResultPersistenceHelper(function_result_status_persistance_conf, queue_name) self._is_using_rpc_mode = is_using_rpc_mode self.stop_flag = False self._publisher_of_same_queue = None self.custom_init()
# -*- coding: utf-8 -*- # @Author : ydf # @Time : 2019/8/8 0008 14:57 from multiprocessing import Process import time from function_scheduling_distributed_framework import get_consumer, get_publisher, AbstractConsumer from function_scheduling_distributed_framework.consumers.redis_consumer import RedisConsumer from function_scheduling_distributed_framework.utils import LogManager from test_frame.my_patch_frame_config import do_patch_frame_config do_patch_frame_config() logger = LogManager('complex_example').get_logger_and_add_handlers() pb2 = get_publisher('task2_queue', broker_kind=2) def task1(x, y): logger.info(f'消费此消息 {x} - {y} ,结果是 {x - y}') for i in range(10): pb2.publish({'n': x * 100 + i}) # 消费时候发布任务到别的队列或自己的队列。可以边消费边推送。 time.sleep(10) # 模拟做某事需要阻塞10秒种,必须用并发绕过此阻塞。 def task2(n): logger.info(n) time.sleep(3) def multi_processing_consume():
do_patch_frame_config() def foo(): raise TypeError('bad') def bar(): try: foo() except Exception: str(1) raise logger = LogManager('test_pysnoop').get_logger_and_add_handlers( log_filename='test_pysnoop.log') @pysnooper_ydf.snoop(depth=300) def main(): try: # logger.info('测试pysnoop') for i in range(5): print(i) j = 333 resp = requests.get('https://www.baidu.com') # 测试深层次跳转下的代码轨迹自动跟踪效果。 logger.debug(resp.text) print(RedisMixin().redis_db_frame.set('key_test', '1')) bar() except:
# @Time : 2019/6/14 17:33 import os from pathlib import Path from flask import Flask, send_from_directory, url_for, jsonify, request, render_template, current_app, abort, g, send_file, redirect from flask_httpauth import HTTPBasicAuth from flask_bootstrap import Bootstrap from function_scheduling_distributed_framework.utils import LogManager, nb_print, time_util print(str((Path(__file__).parent / Path('ydf_dir')).absolute())) app = Flask(__name__, template_folder=str( (Path(__file__).parent / Path('ydf_dir')).absolute())) app.config['JSON_AS_ASCII'] = False app.config['REFRESH_MSEC'] = 1000 auth = HTTPBasicAuth() LogManager(app.logger.name).get_logger_and_add_handlers() bootstrap = Bootstrap(app) @app.route('/favicon.ico') def favicon(): print(Path(__file__).parent / Path('ydf_dir/').absolute()) return send_from_directory(str( Path(__file__).parent / Path('ydf_dir/').absolute()), 'log_favicon.ico', mimetype='image/vnd.microsoft.icon') @app.route("/ajax0/<path:fullname>/") def info0(fullname): fullname = f'/{fullname}'
# -*- coding: utf-8 -*- # @Author : ydf # @Time : 2019/8/8 0008 14:57 import time from function_scheduling_distributed_framework import get_consumer from function_scheduling_distributed_framework.utils import LogManager from test_frame.my_patch_frame_config import do_patch_frame_config do_patch_frame_config() logger = LogManager('f2').get_logger_and_add_handlers() def f2(a, b): logger.info(f'消费此消息 {a} + {b} 中。。。。。') time.sleep(10) # 模拟做某事需要阻塞10秒种,必须用并发绕过此阻塞。 logger.info(f'计算 {a} + {b} 得到的结果是 {a + b}') # 把消费的函数名传给consuming_function,就这么简单。 consumer = get_consumer( 'queue_test2', consuming_function=f2, broker_kind=6) # 通过设置broker_kind,一键切换中间件为mq或redis等7种中间件或包。 if __name__ == '__main__': consumer.start_consuming_message()
class AbstractPublisher( LoggerLevelSetterMixin, metaclass=abc.ABCMeta, ): has_init_broker = 0 def __init__( self, queue_name, log_level_int=10, logger_prefix='', is_add_file_handler=True, clear_queue_within_init=False, is_add_publish_time=True, ): """ :param queue_name: :param log_level_int: :param logger_prefix: :param is_add_file_handler: :param clear_queue_within_init: """ self._queue_name = queue_name if logger_prefix != '': logger_prefix += '--' logger_name = f'{logger_prefix}{self.__class__.__name__}--{queue_name}' self.logger = LogManager(logger_name).get_logger_and_add_handlers( log_level_int, log_filename=f'{logger_name}.log' if is_add_file_handler else None) # # self.rabbit_client = RabbitMqFactory(is_use_rabbitpy=is_use_rabbitpy).get_rabbit_cleint() # self.channel = self.rabbit_client.creat_a_channel() # self.queue = self.channel.queue_declare(queue=queue_name, durable=True) self._lock_for_pika = Lock() self._lock_for_count = Lock() self._current_time = None self.count_per_minute = None self._init_count() self.custom_init() self.logger.info(f'{self.__class__} 被实例化了') self.publish_msg_num_total = 0 self._is_add_publish_time = is_add_publish_time self.__init_time = time.time() atexit.register(self.__at_exit) if clear_queue_within_init: self.clear() def set_is_add_publish_time(self, is_add_publish_time=True): self._is_add_publish_time = is_add_publish_time return self def _init_count(self): with self._lock_for_count: self._current_time = time.time() self.count_per_minute = 0 def custom_init(self): pass def publish(self, msg: typing.Union[str, dict]): if isinstance(msg, str): msg = json.loads(msg) if self._is_add_publish_time: # msg.update({'publish_time': time.time(), 'publish_time_format': time_util.DatetimeConverter().datetime_str}) msg.update({ 'publish_time': round(time.time(), 4), }) t_start = time.time() decorators.handle_exception(retry_times=10, is_throw_error=True, time_sleep=0.1)( self.concrete_realization_of_publish)( json.dumps(msg)) self.logger.debug( f'向{self._queue_name} 队列,推送消息 耗时{round(time.time() - t_start, 4)}秒 {msg}' ) with self._lock_for_count: self.count_per_minute += 1 self.publish_msg_num_total += 1 if time.time() - self._current_time > 10: self.logger.info( f'10秒内推送了 {self.count_per_minute} 条消息,累计推送了 {self.publish_msg_num_total} 条消息到 {self._queue_name} 中' ) self._init_count() @abc.abstractmethod def concrete_realization_of_publish(self, msg): raise NotImplementedError @abc.abstractmethod def clear(self): raise NotImplementedError @abc.abstractmethod def get_message_count(self): raise NotImplementedError @abc.abstractmethod def close(self): raise NotImplementedError def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close() self.logger.warning( f'with中自动关闭publisher连接,累计推送了 {self.publish_msg_num_total} 条消息 ') def __at_exit(self): self.logger.warning( f'程序关闭前,{round(time.time() - self.__init_time)} 秒内,累计推送了 {self.publish_msg_num_total} 条消息 到 {self._queue_name} 中' )
del work_item self._executorx.change_threads_free_count(1) self._run_times += 1 if self._run_times == 50: self._remove_thread(f'运行超过了50次,销毁线程') break continue if _shutdown or self._executorx._shutdown: self._executorx.work_queue.put(None) break process_name_set = set() logger_show_current_threads_num = LogManager( 'show_current_threads_num').get_logger_and_add_handlers( formatter_template=5, log_filename='show_current_threads_num.log', do_not_use_color_handler=True) def show_current_threads_num(sleep_time=60, process_name='', block=False): process_name = sys.argv[0] if process_name == '' else process_name def _show_current_threads_num(): while True: # logger_show_current_threads_num.info(f'{process_name} 进程 的 并发数量是 --> {threading.active_count()}') nb_print( f'{process_name} 进程 的 线程数量是 --> {threading.active_count()}') time.sleep(sleep_time) if process_name not in process_name_set:
import threading import time import traceback import unittest from functools import wraps # noinspection PyUnresolvedReferences import pysnooper from tomorrow3 import threads as tomorrow_threads from function_scheduling_distributed_framework.utils import LogManager, nb_print # noinspection PyUnresolvedReferences from function_scheduling_distributed_framework.utils.custom_pysnooper import _snoop_can_click, snoop_deco, patch_snooper_max_variable_length os_name = os.name nb_print(f' 操作系统类型是 {os_name}') handle_exception_log = LogManager('function_error').get_logger_and_add_handlers() run_times_log = LogManager('run_many_times').get_logger_and_add_handlers(20) class CustomException(Exception): def __init__(self, err=''): err0 = 'fatal exception\n' Exception.__init__(self, err0 + err) def run_many_times(times=1): """把函数运行times次的装饰器 :param times:运行次数 没有捕获错误,出错误就中断运行,可以配合handle_exception装饰器不管是否错误都运行n次。 """