def cache_running_info(self,
                           func_id,
                           script_publish_version,
                           exec_mode=None,
                           is_failed=False,
                           cost=None):
        timestamp = int(time.time())

        # 全局计数
        data = {
            'funcId': func_id,
            'scriptPublishVersion': script_publish_version,
            'execMode': exec_mode,
            'isFailed': is_failed,
            'cost': cost,
            'timestamp': timestamp,
        }
        data = toolkit.json_dumps(data, indent=0)

        cache_key = toolkit.get_cache_key('syncCache', 'scriptRunningInfo')
        self.cache_db.lpush(cache_key, data)

        # 函数调用记数
        data = {
            'funcId': func_id,
            'timestamp': timestamp,
        }
        data = toolkit.json_dumps(data, indent=0)

        cache_key = toolkit.get_cache_key('syncCache', 'funcCallInfo')
        self.cache_db.lpush(cache_key, data)
Beispiel #2
0
 def clear_deprecated_data(self):
     self.clear_table('biz_main_script_log')
     self.clear_table('biz_main_script_failure')
     self.clear_table('biz_main_batch_task_info')
     self.clear_table('biz_main_crontab_task_info')
     self.clear_cache_key(toolkit.get_cache_key('syncCache', 'scriptFailure'))
     self.clear_cache_key(toolkit.get_cache_key('syncCache', 'scriptLog'))
     self.clear_cache_key(toolkit.get_cache_key('syncCache', 'taskInfo'))
Beispiel #3
0
    def _cache_scripts(self):
        scripts = sorted(SCRIPT_MAP.values(), key=lambda x: x['seq'])
        scripts_dump = toolkit.json_dumps(scripts, sort_keys=True)

        cache_key = toolkit.get_cache_key('fixedCache', 'scriptsMD5')
        self.cache_db.set(cache_key, toolkit.get_md5(scripts_dump))

        cache_key = toolkit.get_cache_key('fixedCache', 'scriptsDump')
        self.cache_db.set(cache_key, scripts_dump)
Beispiel #4
0
    def clear_outdated_task_info(self):
        origin_ids = set()

        # 集成函数自动触发配置永不过期
        origin_ids.add(CONFIG['_INTEGRATION_CRONTAB_CONFIG_ID'])

        # 自动触发配置ID
        sql = '''
            SELECT id FROM biz_main_crontab_config
            '''
        db_res = self.db.query(sql)
        for d in db_res:
            origin_ids.add(d['id'])

        # 批处理ID
        sql = '''
            SELECT id FROM biz_main_batch
            '''
        db_res = self.db.query(sql)
        for d in db_res:
            origin_ids.add(d['id'])

        # 获取所有任务信息Key
        cache_pattern = toolkit.get_cache_key('syncCache', 'taskInfo', tags=[ 'originId', '*' ])
        cache_res = self.cache_db.keys(cache_pattern)
        for cache_key in cache_res:
            cache_key_info = toolkit.parse_cache_key(cache_key)

            if cache_key_info['tags']['originId'] not in origin_ids:
                self.cache_db.delete(cache_key)
Beispiel #5
0
    def cache_task_status(self,
                          origin,
                          origin_id,
                          exec_mode,
                          status,
                          func_id=None,
                          script_publish_version=None,
                          log_messages=None,
                          einfo_text=None):
        if not all([origin, origin_id]):
            return

        if origin not in ('crontab', 'batch') and exec_mode != 'crontab':
            return

        cache_key = toolkit.get_cache_key('syncCache', 'taskInfo')

        data = {
            'taskId': self.request.id,
            'origin': origin,
            'originId': origin_id,
            'funcId': func_id,
            'scriptPublishVersion': script_publish_version,
            'execMode': exec_mode,
            'status': status,
            'logMessages': log_messages,
            'einfoTEXT': einfo_text,
            'timestamp': int(time.time()),
        }
        data = toolkit.json_safe_dumps(data, indent=0)

        self.cache_db.run('lpush', cache_key, data)
Beispiel #6
0
    def cache_script_failure(self,
                             func_id,
                             script_publish_version,
                             exec_mode=None,
                             einfo_text=None,
                             trace_info=None):
        if not CONFIG['_INTERNAL_KEEP_SCRIPT_FAILURE']:
            return

        if not einfo_text:
            return

        cache_key = toolkit.get_cache_key('syncCache', 'scriptFailure')

        data = {
            'funcId': func_id,
            'scriptPublishVersion': script_publish_version,
            'execMode': exec_mode,
            'einfoTEXT': einfo_text,
            'traceInfo': trace_info,
            'timestamp': int(time.time()),
        }
        data = toolkit.json_safe_dumps(data, indent=0)

        self.cache_db.run('lpush', cache_key, data)
Beispiel #7
0
def dataflux_func_auto_run(self, *args, **kwargs):
    lock_key   = toolkit.get_cache_key('lock', 'autoRun')
    lock_value = toolkit.gen_uuid()
    if not self.cache_db.lock(lock_key, lock_value, 30):
        self.logger.warning('DataFluxFunc AutoRun Task already launched.')
        return

    self.logger.info('DataFluxFunc AutoRun Task launched.')

    # 获取函数功能集成自动运行函数
    integrated_auto_run_funcs = self.get_integrated_auto_run_funcs()
    for f in integrated_auto_run_funcs:
        # 任务ID
        task_id = gen_task_id()

        # 任务参数
        task_kwargs = {
            'funcId'  : f['id'],
            'origin'  : 'integration',
            'execMode': 'async',
            'queue'   : CONFIG['_FUNC_TASK_DEFAULT_QUEUE'],
        }

        # 自动运行总是使用默认队列
        queue = toolkit.get_worker_queue(CONFIG['_FUNC_TASK_DEFAULT_QUEUE'])

        dataflux_func_runner.apply_async(task_id=task_id, kwargs=task_kwargs, queue=queue)
Beispiel #8
0
def dataflux_func_auto_cleaner(self, *args, **kwargs):
    lock_key   = toolkit.get_cache_key('lock', 'autoCleaner')
    lock_value = toolkit.gen_uuid()
    if not self.cache_db.lock(lock_key, lock_value, 30):
        self.logger.warning('DataFluxFunc AutoCleaner Task already launched.')
        return

    self.logger.info('DataFluxFunc AutoCleaner Task launched.')

    # 清空数据库数据
    if not CONFIG['_INTERNAL_KEEP_SCRIPT_LOG']:
        self.clear_table('biz_main_script_log')

    if not CONFIG['_INTERNAL_KEEP_SCRIPT_FAILURE']:
        self.clear_table('biz_main_script_failure')

    # 回卷数据库数据
    table_limit_map = CONFIG['_DBDATA_TABLE_LIMIT_MAP']
    for table, limit in table_limit_map.items():
        try:
            self.clear_table_by_limit(table=table, limit=int(limit))
        except Exception as e:
            for line in traceback.format_exc().splitlines():
                self.logger.error(line)

    # 回卷上传文件目录
    upload_file_expires = CONFIG['_UPLOAD_FILE_EXPIRES']
    self.clear_upload_file_by_expires(expires=upload_file_expires)
Beispiel #9
0
def dataflux_func_worker_queue_pressure_recover(self, *args, **kwargs):
    self.logger.info('DataFluxFunc Worker Queue Pressure Recover Task launched.')

    for i in range(CONFIG['_WORKER_QUEUE_COUNT']):
        queue_key = toolkit.get_worker_queue(i)
        queue_length = self.cache_db.run('llen', queue_key)

        if not queue_length or int(queue_length) <= 0:
            cache_key = toolkit.get_cache_key('cache', 'workerQueuePressure', tags=['workerQueue', i])
            self.cache_db.run('set', cache_key, 0)
Beispiel #10
0
    def lock(self, max_age=60):
        lock_key = toolkit.get_cache_key('lock', self.name)
        lock_value = toolkit.gen_uuid()
        if not self.cache_db.lock(lock_key, lock_value, max_age):
            self.logger.warning(f"`{self.name}` Task already launched.")
            return

        self.launch_log()

        return lock_key, lock_value
Beispiel #11
0
def dataflux_func_reload_scripts(self, *args, **kwargs):
    is_startup = kwargs.get('isOnLaunch') or False
    force = kwargs.get('force') or False

    # 启动时执行的,需要上锁
    if is_startup:
        lock_key = toolkit.get_cache_key('lock', 'reloadScripts')
        lock_value = toolkit.gen_uuid()
        if not self.cache_db.lock(lock_key, lock_value, 10):
            self.logger.warning(
                'DataFluxFunc ReloadScriptDict Task already launched.')
            return

    self.logger.info('DataFluxFunc ReloadScriptDict Task launched.')

    cache_key = toolkit.get_cache_key('fixedCache', 'prevDBUpdateTimestamp')

    # 上次脚本更新时间
    prev_publish_timestamp = float(self.cache_db.get(cache_key) or 0.0)
    if not prev_publish_timestamp:
        force = True

    # 最近脚本更新时间
    latest_publish_timestamp = self.get_latest_publish_timestamp()

    is_script_reloaded = False
    if force:
        self.force_reload_script()
        is_script_reloaded = True

    elif latest_publish_timestamp != prev_publish_timestamp:
        self.reload_script()
        is_script_reloaded = True

    if is_script_reloaded:
        self.logger.debug('[SCRIPT CACHE] Reload script {} -> {} {}'.format(
            arrow.get(prev_publish_timestamp).to('Asia/Shanghai').format(
                'YYYY-MM-DD HH:mm:ss'),
            arrow.get(latest_publish_timestamp).to('Asia/Shanghai').format(
                'YYYY-MM-DD HH:mm:ss'), '[FORCE]' if force else ''))

        self.cache_db.set(cache_key, str(latest_publish_timestamp))
Beispiel #12
0
    def cache_func_pressure(self, func_id, func_call_kwargs_md5, func_pressure, func_cost, queue):
        if not all([func_id, func_call_kwargs_md5, func_pressure, func_cost, queue]):
            return

        # 获取队列最大压力
        worker_queue_max_pressure = CONFIG['_WORKER_LIMIT_WORKER_QUEUE_PRESSURE_BASE']

        cache_key = toolkit.get_cache_key('heartbeat', 'workerOnQueueCount', tags=['workerQueue', queue])
        worker_count = self.cache_db.get(cache_key)

        if not worker_count:
            worker_count = 1
        else:
            worker_count = int(worker_count) or 1

        worker_queue_max_pressure = worker_count * CONFIG['_WORKER_LIMIT_WORKER_QUEUE_PRESSURE_BASE']

        # 计算并记录新函数压力
        cache_key = toolkit.get_cache_key('cache', 'funcPressure', tags=[
            'funcId'           , func_id,
            'funcCallKwargsMD5', func_call_kwargs_md5])

        prev_func_pressure = self.cache_db.get(cache_key)
        if prev_func_pressure:
            prev_func_pressure = int(prev_func_pressure)
        else:
            prev_func_pressure = CONFIG['_WORKER_LIMIT_FUNC_PRESSURE_BASE']

        next_func_pressure = int((prev_func_pressure + func_cost) / 2)

        self.cache_db.setex(cache_key, CONFIG['_WORKER_LIMIT_FUNC_PRESSURE_EXPIRES'], next_func_pressure)

        # 任务结束,减少队列压力
        cache_key = toolkit.get_cache_key('cache', 'workerQueuePressure', tags=['workerQueue', queue])
        current_worker_queue_pressure = self.cache_db.run('decrby', cache_key, func_pressure)

        self.cache_db.run('expire', cache_key, CONFIG['_WORKER_LIMIT_WORKER_QUEUE_PRESSURE_EXPIRES'])

        self.logger.debug('<<< FUNC PRESSURE >>> {}: {}, Cost: {}'.format(func_id, func_pressure, func_cost))
        self.logger.debug('<<< QUEUE PRESSURE >>> WorkerQueue#{}: {} (-{}, {}%)'.format(
                queue, current_worker_queue_pressure, abs(func_pressure),
                int(current_worker_queue_pressure / worker_queue_max_pressure * 100)))
Beispiel #13
0
    def cache_func_result(self, func_id, script_code_md5, script_publish_version, func_call_kwargs_md5, result, cache_result_expires):
        if not all([func_id, script_code_md5, script_publish_version, func_call_kwargs_md5, cache_result_expires]):
            return

        cache_key = toolkit.get_cache_key('cache', 'funcResult', tags=[
            'funcId'              , func_id,
            'scriptCodeMD5'       , script_code_md5,
            'scriptPublishVersion', script_publish_version,
            'funcCallKwargsMD5'   , func_call_kwargs_md5])

        result_dumps = toolkit.json_safe_dumps(result)
        self.cache_db.setex(cache_key, cache_result_expires, result_dumps)
Beispiel #14
0
    def cache_script_running_info(self, func_id, script_publish_version, exec_mode=None, is_failed=False, cost=None):
        cache_key = toolkit.get_cache_key('syncCache', 'scriptRunningInfo')

        data = {
            'funcId'              : func_id,
            'scriptPublishVersion': script_publish_version,
            'execMode'            : exec_mode,
            'isFailed'            : is_failed,
            'cost'                : cost,
            'timestamp'           : int(time.time()),
        }
        data = toolkit.json_safe_dumps(data, indent=0)

        self.cache_db.run('lpush', cache_key, data)
Beispiel #15
0
def dataflux_func_auto_cleaner(self, *args, **kwargs):
    lock_key = toolkit.get_cache_key('lock', 'autoCleaner')
    lock_value = toolkit.gen_uuid()
    if not self.cache_db.lock(lock_key, lock_value, 30):
        self.logger.warning('DataFluxFunc AutoCleaner Task already launched.')
        return

    self.logger.info('DataFluxFunc AutoCleaner Task launched.')

    # 清除数据
    table_limit_map = CONFIG['_DBDATA_TABLE_LIMIT_MAP']
    for table, limit in table_limit_map.items():
        try:
            self.clear_table_by_limit(table=table, limit=int(limit))
        except Exception as e:
            for line in traceback.format_exc().splitlines():
                self.logger.error(line)
Beispiel #16
0
    def cache_task_status(self, crontab_id, task_id, func_id):
        if not crontab_id:
            return

        cache_key = toolkit.get_cache_key('syncCache', 'taskInfo')

        data = {
            'taskId': task_id,
            'origin': 'crontab',
            'originId': crontab_id,
            'funcId': func_id,
            'status': 'queued',
            'timestamp': int(time.time()),
        }
        data = toolkit.json_safe_dumps(data, indent=0)

        self.cache_db.run('lpush', cache_key, data)
    def trim_task_info(self,
                       origin,
                       origin_id,
                       exec_mode,
                       task_info_limit=None):
        if not self.is_support_task_info(origin, origin_id, exec_mode):
            return

        task_info_limit = task_info_limit or CONFIG['_TASK_INFO_DEFAULT_LIMIT']
        task_info_limit = task_info_limit - 1
        if task_info_limit < 0:
            task_info_limit = 0

        _start = 0
        _stop = task_info_limit - 1
        cache_key = toolkit.get_cache_key('syncCache',
                                          'taskInfo',
                                          tags=['originId', origin_id])
        self.cache_db.run('ltrim', cache_key, _start, _stop)
Beispiel #18
0
    def cache_script_log(self, func_id, script_publish_version, log_messages, exec_mode=None):
        if not CONFIG['_INTERNAL_KEEP_SCRIPT_LOG']:
            return

        if not log_messages:
            return

        cache_key = toolkit.get_cache_key('syncCache', 'scriptLog')

        data = {
            'funcId'              : func_id,
            'scriptPublishVersion': script_publish_version,
            'execMode'            : exec_mode,
            'logMessages'         : log_messages,
            'timestamp'           : int(time.time()),
        }
        data = toolkit.json_safe_dumps(data, indent=0)

        self.cache_db.run('lpush', cache_key, data)
Beispiel #19
0
    def sync_func_call_count(self):
        data = []

        # 搜集数据
        cache_key = toolkit.get_cache_key('syncCache', 'funcCallInfo')
        for i in range(CONFIG['_SYNC_CACHE_BATCH_COUNT']):
            cache_res = self.cache_db.run('rpop', cache_key)
            if not cache_res:
                break

            try:
                cache_res = toolkit.json_loads(cache_res)
            except Exception as e:
                for line in traceback.format_exc().splitlines():
                    self.logger.error(line)
            else:
                data.append(cache_res)

        # 归类计算
        count_map = {}
        for d in data:
            func_id   = d['funcId']
            timestamp = d.get('timestamp')

            # 时间戳按照分钟对齐(减少内部时序数据存储压力)
            timestamp = int(int(timestamp) / 60) * 60

            pk = '~'.join([func_id, str(timestamp)])
            if pk not in count_map:
                count_map[pk] = {
                    'funcId'   : func_id,
                    'timestamp': timestamp,
                    'count'    : 0
                }

            count_map[pk]['count'] += 1

        # 写入时序数据
        for pk, c in count_map.items():
            cache_key = toolkit.get_server_cache_key('monitor', 'sysStats', ['metric', 'funcCallCount', 'funcId', c['funcId']]);

            self.cache_db.ts_add(cache_key, c['count'], timestamp=c['timestamp'], mode='addUp')
Beispiel #20
0
def reload_scripts(self, *args, **kwargs):
    is_startup = kwargs.get('isOnLaunch')  or False
    is_crontab = kwargs.get('isOnCrontab') or False
    force      = kwargs.get('force')       or False

    # 启动时执行/Crontab执行的,需要上锁
    if is_startup or is_crontab:
        self.lock(max_age=10)
    else:
        self.launch_log()

    cache_key = toolkit.get_cache_key('fixedCache', 'prevScriptDataHash')

    # 上次脚本更新时间
    prev_script_data_hash = self.cache_db.get(cache_key)
    if not prev_script_data_hash:
        force = True
    else:
        prev_script_data_hash = six.ensure_str(prev_script_data_hash)

    # 最新脚本数据Hash
    latest_script_data_hash = self.get_latest_script_data_hash()

    is_script_reloaded = False
    if force:
        self.force_reload_script()
        is_script_reloaded = True

    elif latest_script_data_hash != prev_script_data_hash:
        self.reload_script()
        is_script_reloaded = True

    if is_script_reloaded:
        self.logger.info('[SCRIPT CACHE] Reload script {} -> {} {}'.format(
            prev_script_data_hash, latest_script_data_hash, '[FORCE]' if force else ''))

        self.cache_db.set(cache_key, latest_script_data_hash)
Beispiel #21
0
def dataflux_func_sync_cache(self, *args, **kwargs):
    lock_key   = toolkit.get_cache_key('lock', 'syncCache')
    lock_value = toolkit.gen_uuid()
    if not self.cache_db.lock(lock_key, lock_value, 30):
        self.logger.warning('DataFluxFunc SyncCache Task already launched.')
        return

    self.logger.info('DataFluxFunc SyncCache Task launched.')

    # 脚本运行信息刷入数据库
    try:
        self.sync_script_running_info()
    except Exception as e:
        for line in traceback.format_exc().splitlines():
            self.logger.error(line)

    # 脚本失败信息刷入数据库
    try:
        self.sync_script_failure()
    except Exception as e:
        for line in traceback.format_exc().splitlines():
            self.logger.error(line)

    # 脚本日志刷入数据库
    try:
        self.sync_script_log()
    except Exception as e:
        for line in traceback.format_exc().splitlines():
            self.logger.error(line)

    # 任务信息刷入数据库
    try:
        self.sync_task_info()
    except Exception as e:
        for line in traceback.format_exc().splitlines():
            self.logger.error(line)
Beispiel #22
0
def dataflux_func_starter_crontab(self, *args, **kwargs):
    self.logger.info('DataFluxFunc Crontab Starter Task launched.')

    # 注:需要等待1秒,确保不会在整点运行,导致跳回上一触发点
    time.sleep(1)

    # 计算当前触发点
    now = arrow.get().to('Asia/Shanghai').datetime
    starter_crontab = crontab_parser.CronTab(CONFIG['_CRONTAB_STARTER'])
    trigger_time = int(starter_crontab.previous(delta=False, now=now))
    current_time = int(time.time())

    # 获取函数功能集成自动触发
    integrated_crontab_configs = self.get_integrated_func_crontab_configs()

    # 循环获取需要执行的自动触发配置
    next_seq = 0
    while next_seq is not None:
        crontab_configs, next_seq = self.fetch_crontab_configs(next_seq)

        # 第一轮查询时,加入功能集成中自动执行的函数
        if integrated_crontab_configs:
            crontab_configs = integrated_crontab_configs + crontab_configs
            integrated_crontab_configs = None

        # 分发任务
        for c in crontab_configs:
            # 跳过未到达出发时间的任务
            if not self.crontab_config_filter(trigger_time, c):
                continue

            # 确定执行队列
            specified_queue = None
            try:
                specified_queue = c['funcExtraConfig']['queue']
            except Exception as e:
                pass

            queue = None
            if specified_queue is None:
                queue = toolkit.get_worker_queue(
                    CONFIG['_FUNC_TASK_DEFAULT_CRONTAB_QUEUE'])

            else:
                if isinstance(
                        specified_queue, int
                ) and 0 <= specified_queue < CONFIG['_WORKER_QUEUE_COUNT']:
                    # 直接指定队列编号
                    queue = toolkit.get_worker_queue(specified_queue)

                else:
                    # 指定队列别名
                    try:
                        queue_number = int(
                            CONFIG['WORKER_QUEUE_ALIAS_MAP'][specified_queue])
                    except Exception as e:
                        # 配置错误,无法解析为队列编号,或队列编号超过范围,使用默认函数队列。
                        # 保证无论如何都有Worker负责执行(实际运行会报错)
                        queue = toolkit.get_worker_queue(
                            CONFIG['_FUNC_TASK_DEFAULT_CRONTAB_QUEUE'])
                    else:
                        # 队列别名转换为队列编号
                        queue = toolkit.get_worker_queue(queue_number)

            # 确定超时时间
            soft_time_limit = CONFIG['_FUNC_TASK_DEFAULT_TIMEOUT']
            time_limit = CONFIG['_FUNC_TASK_DEFAULT_TIMEOUT'] + CONFIG[
                '_FUNC_TASK_EXTRA_TIMEOUT_TO_KILL']

            func_timeout = None
            try:
                func_timeout = c['funcExtraConfig']['timeout']
            except Exception as e:
                pass

            # 存在且正确配置,更新超时时间
            if isinstance(func_timeout,
                          (six.integer_types, float)) and func_timeout > 0:
                soft_time_limit = func_timeout
                time_limit = func_timeout + CONFIG[
                    '_FUNC_TASK_EXTRA_TIMEOUT_TO_KILL']

            # 计算任务过期时间
            _shift_seconds = int(soft_time_limit *
                                 CONFIG['_FUNC_TASK_TIMEOUT_TO_EXPIRE_SCALE'])
            expires = arrow.get().shift(seconds=_shift_seconds).datetime

            # 上锁
            lock_key = toolkit.get_cache_key('lock', 'CrontabConfig',
                                             ['crontabConfigId', c['id']])
            lock_value = toolkit.gen_uuid()
            if not self.cache_db.lock(lock_key, lock_value, time_limit):
                # 触发任务前上锁,失败则跳过
                continue

            # 任务ID
            task_id = gen_task_id()

            # 记录任务信息(入队)
            self.cache_task_status(c['id'], task_id, func_id=c['funcId'])

            # 任务入队
            task_headers = {
                'origin':
                '{}-{}'.format(c['id'],
                               current_time)  # 来源标记为「<自动触发配置ID>-<时间戳>」
            }
            task_kwargs = {
                'funcId': c['funcId'],
                'funcCallKwargs': c['funcCallKwargs'],
                'origin': c.get('execMode') or 'crontab',
                'originId': c['id'],
                'saveResult': c['saveResult'],
                'execMode': 'crontab',
                'triggerTime': trigger_time,
                'crontab': c['crontab'],
                'queue': specified_queue,
                'lockKey': lock_key,
                'lockValue': lock_value,
            }
            dataflux_func_runner.apply_async(task_id=task_id,
                                             kwargs=task_kwargs,
                                             headers=task_headers,
                                             queue=queue,
                                             soft_time_limit=soft_time_limit,
                                             time_limit=time_limit,
                                             expires=expires)
Beispiel #23
0
 def get_server_cache_key(topic, name, tags=None):
     return toolkit.get_cache_key(topic, name, tags, APP_NAME_SERVER)
Beispiel #24
0
    def update_script_dict_cache(self):
        '''
        更新脚本字典缓存
        与 DataFluxFuncReloadScriptsTask 配合完成高速脚本加载处理
        具体如下:
            1. 从本地内存中获取缓存时间,未超时直接结束
            2. 从Redis检查当前脚本缓存MD5值
            2.1. 如未改变,则延长缓存时间并结束
            2.2. 如已改变,则从Redis中获取脚本缓存数据
            3. 如Redis中无脚本缓存数据,则直接从数据库中获取数据
              (正常不会发生,DataFluxFuncReloadScriptsTask 会定时更新Redis缓存)
        '''
        global SCRIPTS_CACHE_MD5
        global SCRIPTS_CACHE_TIMESTAMP
        global SCRIPT_DICT_CACHE

        current_timestamp = time.time()

        cache_key_script_md5 = toolkit.get_cache_key('fixedCache',
                                                     'scriptsMD5')
        cache_key_script_dump = toolkit.get_cache_key('fixedCache',
                                                      'scriptsDump')

        # 1. 尝试使用本地缓存,不检查数据更新
        if current_timestamp - SCRIPTS_CACHE_TIMESTAMP < CONFIG[
                '_FUNC_TASK_LOCAL_CACHE_EXPIRES']:
            # 处于保留期内,跳过
            self.logger.debug('[SCRIPT CACHE] Use local cache')
            return

        # 2. 检查Redis缓存
        scripts_md5 = self.cache_db.get(cache_key_script_md5)
        if scripts_md5:
            scripts_md5 = six.ensure_str(scripts_md5)

        scripts_dump_exists = self.cache_db.exists(cache_key_script_dump)

        if scripts_md5 and scripts_md5 == SCRIPTS_CACHE_MD5 and scripts_dump_exists:
            # 存在缓存,且MD5未发生变化,延长本地缓存
            SCRIPTS_CACHE_TIMESTAMP = current_timestamp

            self.logger.debug(
                '[SCRIPT CACHE] Not Modified, extend local cache')
            return

        # 3. 不存在缓存/缓存MD5发生变化,从Redis读取Dump
        scripts = None

        scripts_dump = self.cache_db.get(cache_key_script_dump)
        if scripts_dump:
            self.logger.debug('[SCRIPT CACHE] Modified, Use Redis cache')

            scripts_dump = six.ensure_str(scripts_dump)

            try:
                scripts = ujson.loads(scripts_dump)
            except Exception as e:
                pass

            if not scripts_md5:
                # 不存在缓存,自行计算(极少情况)
                scripts_md5 = toolkit.get_md5(scripts_dump)

            # 记录缓存MD5
            SCRIPTS_CACHE_MD5 = scripts_md5

        # 4. 未能从Redis读取Dump,从数据库获取完整用户脚本
        if not scripts or not scripts_dump:
            self.logger.warning('[SCRIPT CACHE] Cache failed! Use DB data')

            scripts = self.get_scripts()

            # 自行计算并记录缓存MD5
            scripts_dump = toolkit.json_safe_dumps(scripts, sort_keys=True)
            SCRIPTS_CACHE_MD5 = toolkit.get_md5(scripts_dump)

        # 记录到本地缓存
        SCRIPTS_CACHE_TIMESTAMP = current_timestamp
        SCRIPT_DICT_CACHE = self.create_script_dict(scripts)
Beispiel #25
0
    def sync_script_running_info(self):
        data = []

        # 搜集数据
        cache_key = toolkit.get_cache_key('syncCache', 'scriptRunningInfo')
        for i in range(CONFIG['_SYNC_CACHE_BATCH_COUNT']):
            cache_res = self.cache_db.run('rpop', cache_key)
            if not cache_res:
                break

            try:
                cache_res = toolkit.json_loads(cache_res)
            except Exception as e:
                for line in traceback.format_exc().splitlines():
                    self.logger.error(line)
            else:
                data.append(cache_res)

        # 计算最新版本号
        func_latest_version_map = {}
        for d in data:
            func_id                = d['funcId']
            script_publish_version = d['scriptPublishVersion']

            if func_id not in func_latest_version_map:
                func_latest_version_map[func_id] = script_publish_version
            else:
                func_latest_version_map[func_id] = max(script_publish_version, func_latest_version_map[func_id])

        # 分类计算
        data_map = {}
        for d in data:
            func_id                = d['funcId']
            script_publish_version = d['scriptPublishVersion']
            exec_mode              = d['execMode']
            is_failed              = d['isFailed']
            cost                   = d['cost']
            timestamp              = d.get('timestamp')

            if not timestamp:
                continue

            latest_version = func_latest_version_map.get(func_id)
            if latest_version and script_publish_version < latest_version:
                continue

            if exec_mode is None:
                exec_mode = 'sync'

            pk = '~'.join([func_id, str(script_publish_version), exec_mode])
            if pk not in data_map:
                data_map[pk] = {
                    'funcId'              : func_id,
                    'scriptPublishVersion': script_publish_version,
                    'execMode'            : exec_mode,
                }

            if 'succeedCount' not in data_map[pk]:
                data_map[pk]['succeedCount'] = 0

            if 'failCount' not in data_map[pk]:
                data_map[pk]['failCount'] = 0

            data_map[pk]['latestFailTimestamp']    = None
            data_map[pk]['latestSucceedTimestamp'] = None

            if is_failed:
                data_map[pk]['failCount']           += 1
                data_map[pk]['latestFailTimestamp'] = timestamp
                data_map[pk]['status']              = 'failed'
            else:
                data_map[pk]['succeedCount']           += 1
                data_map[pk]['latestSucceedTimestamp'] = timestamp
                data_map[pk]['status']                 = 'succeeded'

            if 'minCost' not in data_map[pk]:
                data_map[pk]['minCost'] = cost
            else:
                data_map[pk]['minCost'] = min(data_map[pk]['minCost'], cost)

            if 'maxCost' not in data_map[pk]:
                data_map[pk]['maxCost'] = cost
            else:
                data_map[pk]['maxCost'] = max(data_map[pk]['maxCost'], cost)

            if 'totalCost' not in data_map[pk]:
                data_map[pk]['totalCost'] = cost
            else:
                data_map[pk]['totalCost'] += cost

            data_map[pk]['latestCost'] = cost

        # 分类入库
        for pk, d in data_map.items():
            func_id                = d['funcId']
            script_publish_version = d['scriptPublishVersion']
            exec_mode              = d['execMode']

            sql = '''
                SELECT
                     `succeedCount`
                    ,`failCount`
                    ,`minCost`
                    ,`maxCost`
                    ,`totalCost`
                    ,`latestCost`
                    ,UNIX_TIMESTAMP(`latestSucceedTime`) AS `latestSucceedTimestamp`
                    ,UNIX_TIMESTAMP(`latestFailTime`)    AS `latestFailTimestamp`
                    ,`status`
                FROM biz_rel_func_running_info
                WHERE
                        `funcId`               = ?
                    AND `scriptPublishVersion` = ?
                    AND `execMode`             = ?
                LIMIT 1
                '''
            sql_params = [
                func_id,
                script_publish_version,
                exec_mode,
            ]
            prev_info = self.db.query(sql, sql_params)

            # 删除已过时记录

            if not prev_info:
                # 无记录,则补全记录
                sql = '''
                    INSERT IGNORE INTO biz_rel_func_running_info
                    SET
                       `funcId`               = ?
                      ,`scriptPublishVersion` = ?
                      ,`execMode`             = ?

                      ,`succeedCount`      = ?
                      ,`failCount`         = ?
                      ,`minCost`           = ?
                      ,`maxCost`           = ?
                      ,`totalCost`         = ?
                      ,`latestCost`        = ?
                      ,`latestSucceedTime` = FROM_UNIXTIME(?)
                      ,`latestFailTime`    = FROM_UNIXTIME(?)
                      ,`status`            = ?
                '''
                sql_params = [
                    func_id,
                    script_publish_version,
                    exec_mode,

                    d['succeedCount'],
                    d['failCount'],
                    d['minCost'],
                    d['maxCost'],
                    d['totalCost'],
                    d['latestCost'],
                    d['latestSucceedTimestamp'],
                    d['latestFailTimestamp'],
                    d['status'],
                ]
                self.db.query(sql, sql_params)

            else:
                prev_info = prev_info[0]

                # 有记录,合并
                sql = '''
                    UPDATE biz_rel_func_running_info
                    SET
                         `succeedCount`      = ?
                        ,`failCount`         = ?
                        ,`minCost`           = ?
                        ,`maxCost`           = ?
                        ,`totalCost`         = ?
                        ,`latestCost`        = ?
                        ,`latestSucceedTime` = FROM_UNIXTIME(?)
                        ,`latestFailTime`    = FROM_UNIXTIME(?)
                        ,`status`            = ?

                    WHERE
                            `funcId`               = ?
                        AND `scriptPublishVersion` = ?
                        AND `execMode`             = ?
                    LIMIT 1
                '''
                sql_params = [
                    d['succeedCount'] + (prev_info['succeedCount'] or 0),
                    d['failCount']    + (prev_info['failCount']    or 0),
                    min(filter(lambda x: x is not None, (d['minCost'], prev_info['minCost']))),
                    max(filter(lambda x: x is not None, (d['maxCost'], prev_info['maxCost']))),
                    d['totalCost'] + (prev_info['totalCost'] or 0),
                    d['latestCost'],
                    d['latestSucceedTimestamp'] or prev_info['latestSucceedTimestamp'],
                    d['latestFailTimestamp']    or prev_info['latestFailTimestamp'],
                    d['status'],

                    func_id,
                    script_publish_version,
                    exec_mode,
                ]
                self.db.query(sql, sql_params)

        # 删除过时数据
        for func_id, latest_version in func_latest_version_map.items():
            sql = '''
                DELETE FROM biz_rel_func_running_info
                WHERE
                        `funcId`                                      =  ?
                    AND `scriptPublishVersion`                        != ?
                    OR  UNIX_TIMESTAMP() - UNIX_TIMESTAMP(updateTime) >  ?
                '''
            sql_params = [
                func_id,
                latest_version,
                3600 * 24 * 30,
            ]
            self.db.query(sql, sql_params)
Beispiel #26
0
    def reload_script(self):
        global SCRIPT_MAP

        # 1. 获取当前所有脚本ID和MD5
        sql = '''
            SELECT
                 `scpt`.`id`
                ,`scpt`.`codeMD5`
                ,`scpt`.`publishVersion`
                ,`sset`.`id` AS `scriptSetId`
            FROM biz_main_script AS scpt

            JOIN biz_main_script_set as sset
            '''
        db_res = self.db.query(sql)

        current_script_ids = set()
        reload_script_ids  = set()
        for d in db_res:
            script_id  = d['id']

            current_script_ids.add(script_id)
            cached_script = SCRIPT_MAP.get(script_id)

            if not cached_script:
                # 新脚本
                reload_script_ids.add(script_id)

            elif cached_script['codeMD5'] != d['codeMD5'] or cached_script['publishVersion'] != d['publishVersion']:
                # 更新脚本
                reload_script_ids.add(script_id)

        # 去除已经不存在的脚本
        script_ids_to_pop = []
        for script_id in SCRIPT_MAP.keys():
            if script_id not in current_script_ids:
                self.logger.debug('[SCRIPT CACHE] Remove {}'.format(script_id))
                script_ids_to_pop.append(script_id)

        for script_id in script_ids_to_pop:
            SCRIPT_MAP.pop(script_id, None)

        if reload_script_ids:
            # 2. 从数据库获取更新后的脚本
            scripts = self.get_scripts(script_ids=reload_script_ids)
            for s in scripts:
                self.logger.debug('[SCRIPT CACHE] Load {}'.format(s['id']))

            # 合并加载的脚本
            reloaded_script_map = dict([(s['id'], s) for s in scripts])
            SCRIPT_MAP.update(reloaded_script_map)

            # 3. Dump和MD5值写入缓存
            self._cache_scripts()

            # 4. 删除函数结果缓存
            for script_id in reload_script_ids:
                func_id_pattern = '{0}.*'.format(script_id)
                cache_key = toolkit.get_cache_key('cache', 'funcResult', tags=[
                    'funcId', func_id_pattern,
                    'scriptCodeMD5', '*',
                    'funcKwargsMD5', '*'])
                for k in self.cache_db.client.scan_iter(cache_key):
                    self.cache_db.delete(six.ensure_str(k))
Beispiel #27
0
def on_heartbeat_sent(*args, **kwargs):
    global MAIN_PROCESS
    global CHILD_PROCESSES
    global HEARTBEAT_EXEC_TIMESTAMP

    # Limit run interval
    current_timestamp = int(time.time())
    if current_timestamp - HEARTBEAT_EXEC_TIMESTAMP < CONFIG[
            '_MONITOR_SYS_STATS_CHECK_INTERVAL']:
        return

    HEARTBEAT_EXEC_TIMESTAMP = current_timestamp

    # Get queue list
    _Q_flag = '-Q'

    # Record worker count
    worker_queues = []
    if _Q_flag in sys.argv:
        worker_queues = sys.argv[sys.argv.index(_Q_flag) + 1].split(',')
        worker_queues = list(map(lambda x: x.split('@').pop(), worker_queues))
        worker_queues.sort()
    else:
        worker_queues = [str(i) for i in range(CONFIG['_WORKER_QUEUE_COUNT'])]

    _expires = 30
    for q in worker_queues:
        cache_key = toolkit.get_cache_key(
            'heartbeat',
            'workerOnQueue',
            tags=['workerId', WORKER_ID, 'workerQueue', q])
        REDIS_HELPER.setex(cache_key, _expires, 'x')

        cache_pattern = toolkit.get_cache_key(
            'heartbeat',
            'workerOnQueue',
            tags=['workerId', '*', 'workerQueue', q])
        found_workers = REDIS_HELPER.keys(cache_pattern)

        cache_key = toolkit.get_cache_key('heartbeat',
                                          'workerOnQueueCount',
                                          tags=['workerQueue', q])
        REDIS_HELPER.setex(cache_key, _expires, len(found_workers))

    # Record CPU/Memory
    if MAIN_PROCESS:
        total_cpu_percent = MAIN_PROCESS.cpu_percent()

        main_memory_info = MAIN_PROCESS.memory_full_info()
        total_memory_pss = main_memory_info.pss

        # Update child process map
        next_child_process_map = dict([(p.pid, p)
                                       for p in MAIN_PROCESS.children()])

        prev_child_pids = set(CHILD_PROCESS_MAP.keys())
        next_child_pids = set(next_child_process_map.keys())

        exited_pids = prev_child_pids - next_child_pids
        for pid in exited_pids:
            CHILD_PROCESS_MAP.pop(pid, None)

        new_pids = next_child_pids - prev_child_pids
        for pid in new_pids:
            new_child_process = next_child_process_map[pid]
            new_child_process.cpu_percent(interval=1)
            CHILD_PROCESS_MAP[pid] = new_child_process

        # Count up
        for p in CHILD_PROCESS_MAP.values():
            child_cpu_percent = p.cpu_percent()
            child_memory_info = p.memory_full_info()

            total_cpu_percent += child_cpu_percent
            total_memory_pss += child_memory_info.pss

        total_cpu_percent = round(total_cpu_percent, 2)

        hostname = socket.gethostname()

        cache_key = toolkit.get_server_cache_key(
            'monitor', 'sysStats',
            ['metric', 'workerCPUPercent', 'hostname', hostname])
        REDIS_HELPER.ts_add(cache_key,
                            total_cpu_percent,
                            timestamp=current_timestamp)

        cache_key = toolkit.get_server_cache_key(
            'monitor', 'sysStats',
            ['metric', 'workerMemoryPSS', 'hostname', hostname])
        REDIS_HELPER.ts_add(cache_key,
                            total_memory_pss,
                            timestamp=current_timestamp)
Beispiel #28
0
    def sync_script_failure(self):
        if not CONFIG['_INTERNAL_KEEP_SCRIPT_FAILURE']:
            return

        cache_key = toolkit.get_cache_key('syncCache', 'scriptFailure')

        for i in range(CONFIG['_BUILTIN_TASK_SYNC_CACHE_BATCH_COUNT']):
            cache_res = self.cache_db.run('rpop', cache_key)
            if not cache_res:
                break

            try:
                cache_res = ujson.loads(cache_res)
            except Exception as e:
                for line in traceback.format_exc().splitlines():
                    self.logger.error(line)

                continue

            func_id                = cache_res['funcId']
            script_publish_version = cache_res['scriptPublishVersion']
            exec_mode              = cache_res['execMode']
            einfo_text             = cache_res.get('einfoTEXT')
            trace_info             = cache_res.get('traceInfo')
            timestamp              = cache_res.get('timestamp')

            if not all([einfo_text, timestamp]):
                continue

            if exec_mode is None:
                exec_mode = 'sync'

            # 记录脚本故障
            failure_id = gen_script_failure_id()

            exception = None
            if trace_info:
                exception = trace_info.get('exceptionDump') or ''
                if isinstance(exception, six.string_types):
                    exception = exception.split(':')[0]
                else:
                    exception = None

                trace_info = simplejson.dumps(trace_info, default=toolkit.json_dump_default)

            sql = '''
                INSERT INTO biz_main_script_failure
                SET
                   `id`                   = ?
                  ,`funcId`               = ?
                  ,`scriptPublishVersion` = ?
                  ,`execMode`             = ?
                  ,`einfoTEXT`            = ?
                  ,`exception`            = ?
                  ,`traceInfoJSON`        = ?
                  ,`createTime`           = FROM_UNIXTIME(?)
                  ,`updateTime`           = FROM_UNIXTIME(?)
            '''
            sql_params = [
                failure_id,
                func_id,
                script_publish_version,
                exec_mode,
                einfo_text,
                exception,
                trace_info,
                timestamp, timestamp,
            ]
            self.db.query(sql, sql_params)
Beispiel #29
0
    def sync_script_log(self):
        if not CONFIG['_INTERNAL_KEEP_SCRIPT_LOG']:
            return

        cache_key = toolkit.get_cache_key('syncCache', 'scriptLog')

        # 当队列数量过大时,一些内容不再记录
        queue_length = 0
        cache_res = self.cache_db.run('llen', cache_key)
        if cache_res:
            queue_length = int(cache_res)

        is_service_degraded = queue_length > CONFIG['_BUILTIN_TASK_SYNC_CACHE_SERVICE_DEGRADE_QUEUE_LENGTH']

        for i in range(CONFIG['_BUILTIN_TASK_SYNC_CACHE_BATCH_COUNT']):
            cache_res = self.cache_db.run('rpop', cache_key)
            if not cache_res:
                break

            # 发生服务降级时,随机丢弃
            if is_service_degraded:
                if random.randint(0, queue_length) * 2 > CONFIG['_BUILTIN_TASK_SYNC_CACHE_SERVICE_DEGRADE_QUEUE_LENGTH']:
                    continue

            try:
                cache_res = ujson.loads(cache_res)
            except Exception as e:
                for line in traceback.format_exc().splitlines():
                    self.logger.error(line)

                continue

            func_id                = cache_res['funcId']
            script_publish_version = cache_res['scriptPublishVersion']
            exec_mode              = cache_res['execMode']
            log_messages           = cache_res.get('logMessages')
            timestamp              = cache_res.get('timestamp')

            if not all([log_messages, timestamp]):
                continue

            if exec_mode is None:
                exec_mode = 'sync'

            # 记录脚本日志
            log_id = gen_script_log_id()

            message_text = '\n'.join(log_messages).strip()

            sql = '''
                INSERT INTO biz_main_script_log
                SET
                   `id`                   = ?
                  ,`funcId`               = ?
                  ,`scriptPublishVersion` = ?
                  ,`execMode`             = ?
                  ,`messageTEXT`          = ?
                  ,`createTime`           = FROM_UNIXTIME(?)
                  ,`updateTime`           = FROM_UNIXTIME(?)
            '''
            sql_params = [
                log_id,
                func_id,
                script_publish_version,
                exec_mode,
                message_text,
                timestamp, timestamp,
            ]
            self.db.query(sql, sql_params)
Beispiel #30
0
    def sync_task_info(self):
        cache_key = toolkit.get_cache_key('syncCache', 'taskInfo')

        # 当队列数量过大时,一些内容不再记录
        queue_length = 0
        cache_res = self.cache_db.run('llen', cache_key)
        if cache_res:
            queue_length = int(cache_res)

        is_service_degraded = queue_length > CONFIG['_BUILTIN_TASK_SYNC_CACHE_SERVICE_DEGRADE_QUEUE_LENGTH']

        for i in range(CONFIG['_BUILTIN_TASK_SYNC_CACHE_BATCH_COUNT']):
            cache_res = self.cache_db.run('rpop', cache_key)
            if not cache_res:
                break

            try:
                cache_res = ujson.loads(cache_res)
            except Exception as e:
                for line in traceback.format_exc().splitlines():
                    self.logger.error(line)
                continue

            task_id                = cache_res['taskId']
            origin                 = cache_res['origin']
            origin_id              = cache_res['originId']
            func_id                = cache_res.get('funcId')
            script_publish_version = cache_res.get('scriptPublishVersion')
            exec_mode              = cache_res.get('execMode')
            status                 = cache_res['status']
            log_messages           = cache_res.get('logMessages') or []
            einfo_text             = cache_res.get('einfoTEXT')   or ''
            timestamp              = cache_res.get('timestamp')

            if not all([origin, exec_mode, origin_id, timestamp]):
                continue

            if origin not in ('crontab', 'batch') and exec_mode != 'crontab':
                return

            message_text = '\n'.join(log_messages).strip()

            # 记录任务信息
            table_name      = None
            origin_id_field = None
            if origin == 'crontab' or exec_mode == 'crontab':
                table_name      = 'biz_main_crontab_task_info'
                origin_id_field = 'crontabConfigId'

            elif origin == 'batch':
                table_name      = 'biz_main_batch_task_info'
                origin_id_field = 'batchId'

            sql        = None
            sql_params = None

            # 根据是否服务降级区分处理
            if not is_service_degraded:
                # 未发生服务降级,正常处理
                if status == 'queued':
                    sql = '''
                        INSERT INTO ??
                        SET
                             `id`                   = ?
                            ,`??`                   = ?
                            ,`funcId`               = ?
                            ,`scriptPublishVersion` = ?
                            ,`queueTime`            = FROM_UNIXTIME(?)
                            ,`createTime`           = FROM_UNIXTIME(?)
                            ,`updateTime`           = FROM_UNIXTIME(?)
                        '''
                    sql_params = [
                        table_name,
                        task_id,
                        origin_id_field, origin_id,
                        func_id,
                        script_publish_version,
                        timestamp, timestamp, timestamp,
                    ]

                elif status == 'pending':
                    sql = '''
                        UPDATE ??
                        SET
                             `funcId`               = IFNULL(?, `funcId`)
                            ,`scriptPublishVersion` = IFNULL(?, `scriptPublishVersion`)
                            ,`startTime`  = FROM_UNIXTIME(?)
                            ,`status`     = ?
                            ,`updateTime` = FROM_UNIXTIME(?)
                        WHERE
                            `id` = ?
                        '''
                    sql_params = [
                        table_name,

                        func_id,
                        script_publish_version,
                        timestamp,
                        status,
                        timestamp,
                        task_id
                    ]

                else:
                    sql = '''
                        UPDATE ??
                        SET
                             `funcId`               = IFNULL(?, `funcId`)
                            ,`scriptPublishVersion` = IFNULL(?, `scriptPublishVersion`)
                            ,`endTime`              = FROM_UNIXTIME(?)
                            ,`status`               = ?
                            ,`logMessageTEXT`       = ?
                            ,`einfoTEXT`            = ?
                            ,`updateTime`           = FROM_UNIXTIME(?)
                        WHERE
                            `id` = ?
                        '''
                    sql_params = [
                        table_name,

                        func_id,
                        script_publish_version,
                        timestamp,
                        status,
                        message_text,
                        einfo_text,
                        timestamp,
                        task_id,
                    ]

            else:
                # 发生服务降级,处理最终结果
                if status in ('success', 'failure'):
                    sql = '''
                        REPLACE INTO ??
                        SET
                             `id`                   = ?
                            ,`??`                   = ?
                            ,`funcId`               = ?
                            ,`scriptPublishVersion` = ?
                            ,`endTime`              = FROM_UNIXTIME(?)
                            ,`status`               = ?
                            ,`logMessageTEXT`       = ?
                            ,`einfoTEXT`            = ?
                            ,`createTime`           = FROM_UNIXTIME(?)
                            ,`updateTime`           = FROM_UNIXTIME(?)
                        '''
                    sql_params = [
                        table_name,
                        task_id,
                        origin_id_field, origin_id,
                        func_id,
                        script_publish_version,
                        timestamp,
                        status,
                        message_text,
                        einfo_text,
                        timestamp, timestamp,
                    ]

                else:
                    continue

            self.db.query(sql, sql_params)