def dataflux_func_auto_run(self, *args, **kwargs): lock_key = toolkit.get_cache_key('lock', 'autoRun') lock_value = toolkit.gen_uuid() if not self.cache_db.lock(lock_key, lock_value, 30): self.logger.warning('DataFluxFunc AutoRun Task already launched.') return self.logger.info('DataFluxFunc AutoRun Task launched.') # 获取函数功能集成自动运行函数 integrated_auto_run_funcs = self.get_integrated_auto_run_funcs() for f in integrated_auto_run_funcs: # 任务ID task_id = gen_task_id() # 任务参数 task_kwargs = { 'funcId' : f['id'], 'origin' : 'integration', 'execMode': 'async', 'queue' : CONFIG['_FUNC_TASK_DEFAULT_QUEUE'], } # 自动运行总是使用默认队列 queue = toolkit.get_worker_queue(CONFIG['_FUNC_TASK_DEFAULT_QUEUE']) dataflux_func_runner.apply_async(task_id=task_id, kwargs=task_kwargs, queue=queue)
def dataflux_func_worker_queue_pressure_recover(self, *args, **kwargs): self.logger.info('DataFluxFunc Worker Queue Pressure Recover Task launched.') for i in range(CONFIG['_WORKER_QUEUE_COUNT']): queue_key = toolkit.get_worker_queue(i) queue_length = self.cache_db.run('llen', queue_key) if not queue_length or int(queue_length) <= 0: cache_key = toolkit.get_cache_key('cache', 'workerQueuePressure', tags=['workerQueue', i]) self.cache_db.run('set', cache_key, 0)
def reset_worker_queue_pressure(self, *args, **kwargs): # 上锁 self.lock(max_age=30) for i in range(CONFIG['_WORKER_QUEUE_COUNT']): queue_key = toolkit.get_worker_queue(i) queue_length = self.cache_db.run('llen', queue_key) if not queue_length or int(queue_length) <= 0: cache_key = toolkit.get_server_cache_key('cache', 'workerQueuePressure', tags=['workerQueue', i]) self.cache_db.run('set', cache_key, 0)
def auto_run(self, *args, **kwargs): # 上锁 self.lock(max_age=30) # 获取函数功能集成自动运行函数 integrated_auto_run_funcs = self.get_integrated_auto_run_funcs() for f in integrated_auto_run_funcs: # 任务ID task_id = gen_task_id() # 任务参数 task_kwargs = { 'funcId' : f['id'], 'origin' : 'integration', 'execMode': 'async', 'queue' : CONFIG['_FUNC_TASK_DEFAULT_QUEUE'], } # 自动运行总是使用默认队列 queue = toolkit.get_worker_queue(CONFIG['_FUNC_TASK_DEFAULT_QUEUE']) func_runner.apply_async(task_id=task_id, kwargs=task_kwargs, queue=queue)
worker_concurrency = 3 elif memory_gb > 10: worker_concurrency = 10 else: worker_concurrency = memory_gb worker_prefetch_multiplier = CONFIG['_WORKER_PREFETCH_MULTIPLIER'] worker_max_tasks_per_child = CONFIG['_WORKER_MAX_TASKS_PER_CHILD'] # Worker log worker_hijack_root_logger = False worker_log_color = False worker_redirect_stdouts = False # Queue task_default_queue = toolkit.get_worker_queue(CONFIG['_WORKER_DEFAULT_QUEUE']) task_default_routing_key = task_default_queue task_queues = [ create_queue(task_default_queue), ] # Task task_routes = { # '<Task Name>': {'queue': toolkit.get_worker_queue('<Queue Name>')}, } imports = [ 'worker.tasks.webhook', 'worker.tasks.internal', 'worker.tasks.example',
def dataflux_func_starter_crontab(self, *args, **kwargs): self.logger.info('DataFluxFunc Crontab Starter Task launched.') # 注:需要等待1秒,确保不会在整点运行,导致跳回上一触发点 time.sleep(1) # 计算当前触发点 now = arrow.get().to('Asia/Shanghai').datetime starter_crontab = crontab_parser.CronTab(CONFIG['_CRONTAB_STARTER']) trigger_time = int(starter_crontab.previous(delta=False, now=now)) current_time = int(time.time()) # 获取函数功能集成自动触发 integrated_crontab_configs = self.get_integrated_func_crontab_configs() # 循环获取需要执行的自动触发配置 next_seq = 0 while next_seq is not None: crontab_configs, next_seq = self.fetch_crontab_configs(next_seq) # 第一轮查询时,加入功能集成中自动执行的函数 if integrated_crontab_configs: crontab_configs = integrated_crontab_configs + crontab_configs integrated_crontab_configs = None # 分发任务 for c in crontab_configs: # 跳过未到达出发时间的任务 if not self.crontab_config_filter(trigger_time, c): continue # 确定执行队列 specified_queue = None try: specified_queue = c['funcExtraConfig']['queue'] except Exception as e: pass queue = None if specified_queue is None: queue = toolkit.get_worker_queue( CONFIG['_FUNC_TASK_DEFAULT_CRONTAB_QUEUE']) else: if isinstance( specified_queue, int ) and 0 <= specified_queue < CONFIG['_WORKER_QUEUE_COUNT']: # 直接指定队列编号 queue = toolkit.get_worker_queue(specified_queue) else: # 指定队列别名 try: queue_number = int( CONFIG['WORKER_QUEUE_ALIAS_MAP'][specified_queue]) except Exception as e: # 配置错误,无法解析为队列编号,或队列编号超过范围,使用默认函数队列。 # 保证无论如何都有Worker负责执行(实际运行会报错) queue = toolkit.get_worker_queue( CONFIG['_FUNC_TASK_DEFAULT_CRONTAB_QUEUE']) else: # 队列别名转换为队列编号 queue = toolkit.get_worker_queue(queue_number) # 确定超时时间 soft_time_limit = CONFIG['_FUNC_TASK_DEFAULT_TIMEOUT'] time_limit = CONFIG['_FUNC_TASK_DEFAULT_TIMEOUT'] + CONFIG[ '_FUNC_TASK_EXTRA_TIMEOUT_TO_KILL'] func_timeout = None try: func_timeout = c['funcExtraConfig']['timeout'] except Exception as e: pass # 存在且正确配置,更新超时时间 if isinstance(func_timeout, (six.integer_types, float)) and func_timeout > 0: soft_time_limit = func_timeout time_limit = func_timeout + CONFIG[ '_FUNC_TASK_EXTRA_TIMEOUT_TO_KILL'] # 计算任务过期时间 _shift_seconds = int(soft_time_limit * CONFIG['_FUNC_TASK_TIMEOUT_TO_EXPIRE_SCALE']) expires = arrow.get().shift(seconds=_shift_seconds).datetime # 上锁 lock_key = toolkit.get_cache_key('lock', 'CrontabConfig', ['crontabConfigId', c['id']]) lock_value = toolkit.gen_uuid() if not self.cache_db.lock(lock_key, lock_value, time_limit): # 触发任务前上锁,失败则跳过 continue # 任务ID task_id = gen_task_id() # 记录任务信息(入队) self.cache_task_status(c['id'], task_id, func_id=c['funcId']) # 任务入队 task_headers = { 'origin': '{}-{}'.format(c['id'], current_time) # 来源标记为「<自动触发配置ID>-<时间戳>」 } task_kwargs = { 'funcId': c['funcId'], 'funcCallKwargs': c['funcCallKwargs'], 'origin': c.get('execMode') or 'crontab', 'originId': c['id'], 'saveResult': c['saveResult'], 'execMode': 'crontab', 'triggerTime': trigger_time, 'crontab': c['crontab'], 'queue': specified_queue, 'lockKey': lock_key, 'lockValue': lock_value, } dataflux_func_runner.apply_async(task_id=task_id, kwargs=task_kwargs, headers=task_headers, queue=queue, soft_time_limit=soft_time_limit, time_limit=time_limit, expires=expires)
def send_task(self, crontab_config, current_time, trigger_time): # 确定超时时间 soft_time_limit, time_limit = self._get_time_limit(crontab_config) # 确定执行队列 queue = self._get_queue(crontab_config) # 延迟执行支持 delayed_crontab = None try: delayed_crontab = crontab_config['funcExtraConfig'].get( 'delayedCrontab') or [0] except Exception as e: delayed_crontab = [0] for delay in delayed_crontab: # 上锁 lock_key = toolkit.get_cache_key('lock', 'CrontabConfig', tags=[ 'crontabConfigId', crontab_config['id'], 'funcId', crontab_config['funcId'], 'crontabDelay', delay ]) lock_value = toolkit.gen_uuid() if not self.cache_db.lock(lock_key, lock_value, time_limit): # 触发任务前上锁,失败则跳过 continue # 任务ID task_id = gen_task_id() # 计算任务过期时间 _shift_seconds = int(soft_time_limit * CONFIG['_FUNC_TASK_TIMEOUT_TO_EXPIRE_SCALE'] + delay) expires = arrow.get().shift(seconds=_shift_seconds).datetime # 任务入队 task_headers = { 'origin': '{}-{}'.format(crontab_config['id'], current_time) # 来源标记为「<自动触发配置ID>-<时间戳>」 } # 注意: # 此处「任务的`origin`」与「自动触发配置的`origin`」不同 # 「任务的`origin`」表示任务来源(取值 authLink, crontab, batch, integration),配合`originId`可确定业务实体 # 「自动触发配置的`origin`」表示配置来源(取值 API, UI, INTEGRATION) task_kwargs = { 'funcId': crontab_config['funcId'], 'funcCallKwargs': crontab_config['funcCallKwargs'], 'origin': crontab_config['taskOrigin'], 'originId': crontab_config['id'], 'saveResult': crontab_config['saveResult'], 'execMode': crontab_config['execMode'], 'triggerTime': (trigger_time + delay), 'triggerTimeMs': (trigger_time + delay) * 1000, 'crontab': crontab_config['crontab'], 'crontabDelay': delay, 'queue': queue, 'taskInfoLimit': crontab_config['taskInfoLimit'], 'lockKey': lock_key, 'lockValue': lock_value, } func_runner.apply_async(task_id=task_id, kwargs=task_kwargs, headers=task_headers, queue=toolkit.get_worker_queue(queue), soft_time_limit=soft_time_limit, time_limit=time_limit, expires=expires, countdown=delay or None)