def ts_add(self, key, value, timestamp=None): if not self.skip_log: self.logger.debug('[REDIS TS] ADD {}'.format(key)) if key not in self.checked_keys: cache_res = self.client.type(key) if six.ensure_str(cache_res) != 'zset': self.client.delete(key) self.checked_keys.add(key) timestamp = timestamp or int(time.time()) value = toolkit.json_safe_dumps(value, indent=0) data = ','.join([str(timestamp), value]) self.client.zadd(key, {data: timestamp}) self.client.expire(key, self.config['tsMaxAge']) if self.config['tsMaxPeriod']: min_timestamp = int(time.time()) - self.config['tsMaxPeriod'] self.client.zremrangebyscore(key, '-inf', min_timestamp) if self.config['tsMaxLength']: self.client.zremrangebyrank(key, 0, -1 * self.config['tsMaxLength'] - 1)
def cache_task_status(self, origin, origin_id, exec_mode, status, func_id=None, script_publish_version=None, log_messages=None, einfo_text=None): if not all([origin, origin_id]): return if origin not in ('crontab', 'batch') and exec_mode != 'crontab': return cache_key = toolkit.get_cache_key('syncCache', 'taskInfo') data = { 'taskId': self.request.id, 'origin': origin, 'originId': origin_id, 'funcId': func_id, 'scriptPublishVersion': script_publish_version, 'execMode': exec_mode, 'status': status, 'logMessages': log_messages, 'einfoTEXT': einfo_text, 'timestamp': int(time.time()), } data = toolkit.json_safe_dumps(data, indent=0) self.cache_db.run('lpush', cache_key, data)
def publish(self, topic, message, timeout=None): if time.time( ) - self.producers_update_timestamp > self.PRODUCERS_UPDATE_INTERVAL: self.update_producers() nsq_node = random.choice(self.nsq_nodes) url = '{}://{}/pub'.format(self.config['protocol'], nsq_node) query = { 'topic': topic, } if isinstance(message, (dict, list, tuple)): message = toolkit.json_safe_dumps(message, indent=None, separators=(',', ':')) message = six.ensure_binary(message) timeout = timeout or self.config['timeout'] message_dump = six.ensure_str(message) if len(message_dump) > LIMIT_MESSAGE_DUMP: message_dump = message_dump[0:LIMIT_MESSAGE_DUMP - 3] + '...' self.logger.debug('[NSQLOOKUP] PUB `{}`'.format(message_dump)) r = requests.post(url, params=query, data=message, timeout=timeout) r.raise_for_status()
def cache_script_failure(self, func_id, script_publish_version, exec_mode=None, einfo_text=None, trace_info=None): if not CONFIG['_INTERNAL_KEEP_SCRIPT_FAILURE']: return if not einfo_text: return cache_key = toolkit.get_cache_key('syncCache', 'scriptFailure') data = { 'funcId': func_id, 'scriptPublishVersion': script_publish_version, 'execMode': exec_mode, 'einfoTEXT': einfo_text, 'traceInfo': trace_info, 'timestamp': int(time.time()), } data = toolkit.json_safe_dumps(data, indent=0) self.cache_db.run('lpush', cache_key, data)
def _cache_scripts(self): scripts = sorted(SCRIPT_MAP.values(), key=lambda x: x['seq']) scripts_dump = toolkit.json_safe_dumps(scripts, sort_keys=True) cache_key = toolkit.get_cache_key('fixedCache', 'scriptsMD5') self.cache_db.set(cache_key, toolkit.get_md5(scripts_dump)) cache_key = toolkit.get_cache_key('fixedCache', 'scriptsDump') self.cache_db.set(cache_key, scripts_dump)
def cache_func_result(self, func_id, script_code_md5, script_publish_version, func_call_kwargs_md5, result, cache_result_expires): if not all([func_id, script_code_md5, script_publish_version, func_call_kwargs_md5, cache_result_expires]): return cache_key = toolkit.get_cache_key('cache', 'funcResult', tags=[ 'funcId' , func_id, 'scriptCodeMD5' , script_code_md5, 'scriptPublishVersion', script_publish_version, 'funcCallKwargsMD5' , func_call_kwargs_md5]) result_dumps = toolkit.json_safe_dumps(result) self.cache_db.setex(cache_key, cache_result_expires, result_dumps)
def cache_script_running_info(self, func_id, script_publish_version, exec_mode=None, is_failed=False, cost=None): cache_key = toolkit.get_cache_key('syncCache', 'scriptRunningInfo') data = { 'funcId' : func_id, 'scriptPublishVersion': script_publish_version, 'execMode' : exec_mode, 'isFailed' : is_failed, 'cost' : cost, 'timestamp' : int(time.time()), } data = toolkit.json_safe_dumps(data, indent=0) self.cache_db.run('lpush', cache_key, data)
def cache_task_status(self, crontab_id, task_id, func_id): if not crontab_id: return cache_key = toolkit.get_cache_key('syncCache', 'taskInfo') data = { 'taskId': task_id, 'origin': 'crontab', 'originId': crontab_id, 'funcId': func_id, 'status': 'queued', 'timestamp': int(time.time()), } data = toolkit.json_safe_dumps(data, indent=0) self.cache_db.run('lpush', cache_key, data)
def cache_script_log(self, func_id, script_publish_version, log_messages, exec_mode=None): if not CONFIG['_INTERNAL_KEEP_SCRIPT_LOG']: return if not log_messages: return cache_key = toolkit.get_cache_key('syncCache', 'scriptLog') data = { 'funcId' : func_id, 'scriptPublishVersion': script_publish_version, 'execMode' : exec_mode, 'logMessages' : log_messages, 'timestamp' : int(time.time()), } data = toolkit.json_safe_dumps(data, indent=0) self.cache_db.run('lpush', cache_key, data)
def _set_task_status(self, status, **next_context): ''' Set task result for WAT's monitor. ''' # Fixed in Celery for saving/publishing task result. # See [https://github.com/celery/celery/blob/v4.1.0/celery/backends/base.py#L518] if self.request.called_directly: return self.request.update(**next_context) task_key_prefix = 'celery-task-meta-' key = task_key_prefix + self.request.id content = { 'task': self.name, 'id': self.request.id, 'args': self.request.args, 'kwargs': self.request.kwargs, 'queue': self.request.delivery_info['routing_key'], 'origin': self.request.origin, 'status': status, 'startTime': self.request.x_start_time, 'endTime': self.request.x_end_time, 'retval': self.request.x_retval, 'einfoTEXT': self.request.x_einfo_text, 'exceptionMessage': self.request.x_exception_message, 'exceptionDump': self.request.x_exception_dump, } if hasattr(self.request, 'extra'): content['extra'] = self.request.extra content = toolkit.json_safe_dumps(content, indent=None) # self.backend.client.setex(key, CONFIG['_WORKER_RESULT_EXPIRES'], content) if status in (celery_status.SUCCESS, celery_status.FAILURE): self.backend.client.publish(key, content)
def __call__(self, *args, **kwargs): # Add logger self.logger = LogHelper(self) # Add DB Helper self.db = MySQLHelper(self.logger) # Add Cache Helper self.cache_db = RedisHelper(self.logger) # Add File Storage Helper self.file_storage = FileSystemHelper(self.logger) if CONFIG['MODE'] == 'prod': self.db.skip_log = True self.cache_db.skip_log = True # Add extra information if not self.request.called_directly: self._set_task_status(celery_status.PENDING, x_start_time=int(time.time()), x_end_time=None, x_retval=None, x_einfo_text=None, x_exception_message=None, x_exception_dump=None) # Sleep delay if 'sleepDelay' in kwargs: sleep_delay = 0 try: sleep_delay = float(kwargs['sleepDelay']) self.logger.debug( '[SLEEP DELAY] {} seconds...'.format(sleep_delay)) except Exception as e: for line in traceback.format_exc().splitlines(): self.logger.error(line) else: time.sleep(sleep_delay) # Run try: if LOG_CALL_ARGS: args_dumps = toolkit.json_safe_dumps(args, indent=None) if len(args_dumps) > LIMIT_ARGS_DUMP: args_dumps = args_dumps[0:LIMIT_ARGS_DUMP - 3] + '...' kwargs_dumps = toolkit.json_safe_dumps(kwargs, indent=None) if len(kwargs_dumps) > LIMIT_ARGS_DUMP: kwargs_dumps = kwargs_dumps[0:LIMIT_ARGS_DUMP - 3] + '...' self.logger.debug('[CALL] args: `{}`; kwargs: `{}`'.format( args_dumps, kwargs_dumps)) return super(BaseTask, self).__call__(*args, **kwargs) except (SoftTimeLimitExceeded, TimeLimitExceeded) as e: raise except Exception as e: for line in traceback.format_exc().splitlines(): self.logger.error(line) raise
def dump_for_json(self, val): ''' Dump JSON to string ''' return toolkit.json_safe_dumps(val)
def dataflux_func_debugger(self, *args, **kwargs): # 执行函数、参数 func_id = kwargs.get('funcId') func_call_kwargs = kwargs.get('funcCallKwargs') or {} script_set_id = func_id.split('__')[0] script_id = func_id.split('.')[0] func_name = func_id[len(script_id) + 1:] self.logger.info('DataFluxFunc Debugger Task launched: `{}`'.format(func_id)) # 来源 origin = kwargs.get('origin') origin_id = kwargs.get('originId') # 顶层任务ID root_task_id = kwargs.get('rootTaskId') or self.request.id # 函数链 func_chain = kwargs.get('funcChain') or [] func_chain.append(func_id) # 执行模式:UI执行只能使用同步模式 exec_mode = 'sync' # 启动时间 start_time = int(time.time()) start_time_ms = int(time.time() * 1000) # 队列 queue = kwargs.get('queue') # HTTP请求 http_request = kwargs.get('httpRequest') or {} # 函数结果、上下文、跟踪信息、错误堆栈 func_result = None script_scope = None trace_info = None error_stack = None try: # 获取代码对象 script_dict = self.get_script_dict_for_debugger(script_id) target_script = script_dict.get(script_id) if not target_script: e = NotFoundException('Script `{}` not found'.format(script_id)) raise e extra_vars = { '_DFF_DEBUG' : True, '_DFF_ROOT_TASK_ID' : root_task_id, '_DFF_SCRIPT_SET_ID': script_set_id, '_DFF_SCRIPT_ID' : script_id, '_DFF_FUNC_ID' : func_id, '_DFF_FUNC_NAME' : func_name, '_DFF_FUNC_CHAIN' : func_chain, '_DFF_ORIGIN' : origin, '_DFF_ORIGIN_ID' : origin_id, '_DFF_EXEC_MODE' : exec_mode, '_DFF_START_TIME' : start_time, '_DFF_START_TIME_MS': start_time_ms, '_DFF_TRIGGER_TIME' : kwargs.get('triggerTime') or start_time, '_DFF_CRONTAB' : kwargs.get('crontab'), '_DFF_QUEUE' : queue, '_DFF_HTTP_REQUEST' : http_request, } self.logger.info('[CREATE SAFE SCOPE] `{}`'.format(script_id)) script_scope = self.create_safe_scope( script_name=script_id, script_dict=script_dict, extra_vars=extra_vars) # 加载代码 self.logger.info('[LOAD SCRIPT] `{}`'.format(script_id)) script_scope = self.safe_exec(target_script['codeObj'], globals=script_scope) # 执行脚本 if func_name: entry_func = script_scope.get(func_name) if not entry_func: e = NotFoundException('Function `{}` not found in `{}`'.format(func_name, script_id)) raise e # 执行函数 self.logger.info('[RUN FUNC] `{}`'.format(func_id)) func_result = entry_func(**func_call_kwargs) except Exception as e: # 函数报错只输出WARNING级别日志 for line in traceback.format_exc().splitlines(): self.logger.warning(line) trace_info = self.get_trace_info() error_stack = self.get_formated_einfo(trace_info, only_in_script=True) # 预检查任务需要将检查结果和错误同时返回给调用方,因此本身永远不会失败 # API端需要判断预检查是否通过,并将错误重新包装后返回给调用放 finally: result = {} if script_scope: # 脚本解析结果 exported_api_func = script_scope['DFF'].exported_api_funcs or [] result['exportedAPIFuncs'] = exported_api_func # 脚本输出日志 log_messages = script_scope['DFF'].log_messages or [] result['logMessages'] = log_messages # 【待废除】脚本输出图表 plot_charts = script_scope['DFF'].plot_charts or [] result['plotCharts'] = plot_charts if func_name: # 准备函数运行结果 func_result_raw = None func_result_repr = None func_result_json_dumps = None try: func_result_raw = toolkit.json_safe_copy(func_result) except Exception as e: for line in traceback.format_exc().splitlines(): self.logger.error(line) try: func_result_repr = pprint.saferepr(func_result) except Exception as e: for line in traceback.format_exc().splitlines(): self.logger.error(line) try: func_result_json_dumps = toolkit.json_safe_dumps(func_result) except Exception as e: for line in traceback.format_exc().splitlines(): self.logger.error(line) result['funcResult'] = { 'raw' : func_result_raw, 'repr' : func_result_repr, 'jsonDumps': func_result_json_dumps, 'cost' : time.time() - start_time, } # 准备返回值 retval = { 'result' : result, 'traceInfo': trace_info, 'stack' : error_stack, } # 清理资源 self.clean_up() # 返回函数结果 return retval
def update_script_dict_cache(self): ''' 更新脚本字典缓存 与 DataFluxFuncReloadScriptsTask 配合完成高速脚本加载处理 具体如下: 1. 从本地内存中获取缓存时间,未超时直接结束 2. 从Redis检查当前脚本缓存MD5值 2.1. 如未改变,则延长缓存时间并结束 2.2. 如已改变,则从Redis中获取脚本缓存数据 3. 如Redis中无脚本缓存数据,则直接从数据库中获取数据 (正常不会发生,DataFluxFuncReloadScriptsTask 会定时更新Redis缓存) ''' global SCRIPTS_CACHE_MD5 global SCRIPTS_CACHE_TIMESTAMP global SCRIPT_DICT_CACHE current_timestamp = time.time() cache_key_script_md5 = toolkit.get_cache_key('fixedCache', 'scriptsMD5') cache_key_script_dump = toolkit.get_cache_key('fixedCache', 'scriptsDump') # 1. 尝试使用本地缓存,不检查数据更新 if current_timestamp - SCRIPTS_CACHE_TIMESTAMP < CONFIG[ '_FUNC_TASK_LOCAL_CACHE_EXPIRES']: # 处于保留期内,跳过 self.logger.debug('[SCRIPT CACHE] Use local cache') return # 2. 检查Redis缓存 scripts_md5 = self.cache_db.get(cache_key_script_md5) if scripts_md5: scripts_md5 = six.ensure_str(scripts_md5) scripts_dump_exists = self.cache_db.exists(cache_key_script_dump) if scripts_md5 and scripts_md5 == SCRIPTS_CACHE_MD5 and scripts_dump_exists: # 存在缓存,且MD5未发生变化,延长本地缓存 SCRIPTS_CACHE_TIMESTAMP = current_timestamp self.logger.debug( '[SCRIPT CACHE] Not Modified, extend local cache') return # 3. 不存在缓存/缓存MD5发生变化,从Redis读取Dump scripts = None scripts_dump = self.cache_db.get(cache_key_script_dump) if scripts_dump: self.logger.debug('[SCRIPT CACHE] Modified, Use Redis cache') scripts_dump = six.ensure_str(scripts_dump) try: scripts = ujson.loads(scripts_dump) except Exception as e: pass if not scripts_md5: # 不存在缓存,自行计算(极少情况) scripts_md5 = toolkit.get_md5(scripts_dump) # 记录缓存MD5 SCRIPTS_CACHE_MD5 = scripts_md5 # 4. 未能从Redis读取Dump,从数据库获取完整用户脚本 if not scripts or not scripts_dump: self.logger.warning('[SCRIPT CACHE] Cache failed! Use DB data') scripts = self.get_scripts() # 自行计算并记录缓存MD5 scripts_dump = toolkit.json_safe_dumps(scripts, sort_keys=True) SCRIPTS_CACHE_MD5 = toolkit.get_md5(scripts_dump) # 记录到本地缓存 SCRIPTS_CACHE_TIMESTAMP = current_timestamp SCRIPT_DICT_CACHE = self.create_script_dict(scripts)
def dataflux_func_runner(self, *args, **kwargs): # 执行函数、参数 func_id = kwargs.get('funcId') func_call_kwargs = kwargs.get('funcCallKwargs') or {} func_call_kwargs_md5 = kwargs.get('funcCallKwargsMD5') script_set_id = func_id.split('__')[0] script_id = func_id.split('.')[0] func_name = func_id[len(script_id) + 1:] self.logger.info('DataFluxFunc Runner Task launched: `{}`'.format(func_id)) # 来源 origin = kwargs.get('origin') origin_id = kwargs.get('originId') # 顶层任务ID root_task_id = kwargs.get('rootTaskId') or self.request.id # 函数链 func_chain = kwargs.get('funcChain') or [] func_chain.append(func_id) # 执行模式 exec_mode = kwargs.get('execMode') or 'sync' # 启动时间 start_time = int(time.time()) start_time_ms = int(time.time() * 1000) # 队列 queue = kwargs.get('queue') # HTTP请求 http_request = kwargs.get('httpRequest') or {} # 函数结果、上下文 save_result = kwargs.get('saveResult') or False func_resp = None script_scope = None is_succeeded = False trace_info = None log_messages = None einfo_text = None target_script = None try: # 记录任务信息(运行中) self.cache_task_status(origin=origin, origin_id=origin_id, exec_mode=exec_mode, status='pending', func_id=func_id) global SCRIPT_DICT_CACHE # 更新脚本缓存 self.update_script_dict_cache() target_script = SCRIPT_DICT_CACHE.get(script_id) if not target_script: e = NotFoundException('Script `{}` not found'.format(script_id)) raise e extra_vars = { '_DFF_DEBUG': False, '_DFF_ROOT_TASK_ID': root_task_id, '_DFF_SCRIPT_SET_ID': script_set_id, '_DFF_SCRIPT_ID': script_id, '_DFF_FUNC_ID': func_id, '_DFF_FUNC_NAME': func_name, '_DFF_FUNC_CHAIN': func_chain, '_DFF_ORIGIN': origin, '_DFF_ORIGIN_ID': origin_id, '_DFF_EXEC_MODE': exec_mode, '_DFF_START_TIME': start_time, '_DFF_START_TIME_MS': start_time_ms, '_DFF_TRIGGER_TIME': kwargs.get('triggerTime') or start_time, '_DFF_TRIGGER_TIME_MS': kwargs.get('triggerTimeMs') or start_time_ms, '_DFF_CRONTAB': kwargs.get('crontab'), '_DFF_QUEUE': queue, '_DFF_HTTP_REQUEST': http_request, } self.logger.info('[CREATE SAFE SCOPE] `{}`'.format(script_id)) script_scope = self.create_safe_scope(script_name=script_id, script_dict=SCRIPT_DICT_CACHE, extra_vars=extra_vars) # 加载代码 self.logger.info('[LOAD SCRIPT] `{}`'.format(script_id)) script_scope = self.safe_exec(target_script['codeObj'], globals=script_scope) # 执行脚本 entry_func = script_scope.get(func_name) if not entry_func: e = NotFoundException('Function `{}` not found in `{}`'.format( func_name, script_id)) raise e # 执行函数 self.logger.info('[RUN FUNC] `{}`'.format(func_id)) func_resp = entry_func(**func_call_kwargs) if not isinstance(func_resp, BaseFuncResponse): func_resp = FuncResponse(func_resp) if isinstance(func_resp.data, Exception): raise func_resp.data except Exception as e: is_succeeded = False self.logger.error('Error occured in script. `{}`'.format(func_id)) # 记录函数运行信息 self.cache_script_running_info( func_id=func_id, script_publish_version=target_script['publishVersion'], exec_mode=exec_mode, is_failed=True, cost=time.time() - start_time) # 记录函数运行故障 trace_info = self.get_trace_info() einfo_text = self.get_formated_einfo(trace_info, only_in_script=True) self.cache_script_failure( func_id=func_id, script_publish_version=target_script['publishVersion'], exec_mode=exec_mode, einfo_text=einfo_text, trace_info=trace_info) raise else: is_succeeded = True # 记录函数运行信息 self.cache_script_running_info( func_id=func_id, script_publish_version=target_script['publishVersion'], exec_mode=exec_mode, cost=time.time() - start_time) # 准备函数运行结果 func_result_raw = None func_result_repr = None func_result_json_dumps = None if func_resp.data: try: func_result_raw = func_resp.data except Exception as e: for line in traceback.format_exc().splitlines(): self.logger.error(line) try: func_result_repr = pprint.saferepr(func_resp.data) except Exception as e: for line in traceback.format_exc().splitlines(): self.logger.error(line) try: func_result_json_dumps = toolkit.json_safe_dumps( func_resp.data) except Exception as e: for line in traceback.format_exc().splitlines(): self.logger.error(line) result = { 'raw': func_result_raw, 'repr': func_result_repr, 'jsonDumps': func_result_json_dumps, '_responseControl': func_resp._create_response_control() } # 记录函数运行结果 if save_result: args = (self.request.id, self.name, self.request.origin, self.request.x_start_time, int(time.time()), self.request.args, self.request.kwargs, result, celery_status.SUCCESS, None) result_task_id = '{}-RESULT'.format(self.request.id) result_saving_task.apply_async(task_id=result_task_id, args=args) # 缓存函数运行结果 cache_result_expires = None try: cache_result_expires = target_script['funcExtraConfig'][func_id][ 'cacheResult'] except (KeyError, TypeError) as e: pass else: self.cache_func_result( func_id=func_id, script_code_md5=target_script['codeMD5'], script_publish_version=target_script['publishVersion'], func_call_kwargs_md5=func_call_kwargs_md5, result=result, cache_result_expires=cache_result_expires) # 返回函数结果 return result finally: if script_scope: # 提取输出日志 log_messages = script_scope['DFF'].log_messages or None # Crontab解锁 lock_key = kwargs.get('lockKey') lock_value = kwargs.get('lockValue') if lock_key and lock_value: self.cache_db.unlock(lock_key, lock_value) # 记录脚本日志 self.cache_script_log( func_id=func_id, script_publish_version=target_script['publishVersion'], log_messages=log_messages, exec_mode=exec_mode) # 记录任务信息(结束) end_status = None if is_succeeded: end_status = 'success' else: end_status = 'failure' self.cache_task_status( origin=origin, origin_id=origin_id, exec_mode=exec_mode, status=end_status, func_id=func_id, script_publish_version=target_script['publishVersion'], log_messages=log_messages, einfo_text=einfo_text) # 记录压力值(仅限同步方式) if exec_mode == 'sync': func_pressure = kwargs.get( 'funcPressure') or CONFIG['_WORKER_LIMIT_FUNC_PRESSURE_BASE'] func_cost = abs(time.time() * 1000 - start_time_ms) self.cache_func_pressure(func_id=func_id, func_call_kwargs_md5=func_call_kwargs_md5, func_pressure=func_pressure, func_cost=func_cost, queue=queue) # 清理资源 self.clean_up()