def __init__(self): self.connection = None self.subscription = None self.current_loop = None self.config_version = 0 self.run = True self.config = ConfigStorage() self.db_log = LoggingStorage() self.tq_storage = None self.triggers = {} self.metrics_id_to_triggers = defaultdict(list) self._reload_triggers_config_last_run = 0
def __init__(self): self.connection = None self.subscription = None self.current_loop = None self.config_version = 0 self.run = True self.storage = ConfigStorage() self.metrics_storage = MetricsStorage() self.db_log = LoggingStorage() self._lcache = {} self.metrics = {} self.actions_id_to_metrics = defaultdict(list) self._reload_metrics_config_last_run = 0
def __init__(self): self.current_loop = None self.connection = None self.run = True self.subscription = None self.sleep_task = None self.config_version = 0 self.scheduler_tasks = dict() self.scheduler_tasks_history = defaultdict(dict) self._ttl_check_last_run = 0 self._ttl_reload_config_last_run = 0 self.config = ConfigStorage() self.tq_storage = None
def bootstrap(self): log.info("Running worker loop") self.connection = yield from asyncio_redis.Pool.create( host='localhost', port=6379, encoder=asyncio_redis.encoders.BytesEncoder(), poolsize=3) self.config = ConfigStorage() self.comport_state = ComPortState() self.db_log = LoggingStorage()
class Scheduler(object): def __init__(self): self.current_loop = None self.connection = None self.run = True self.subscription = None self.sleep_task = None self.config_version = 0 self.scheduler_tasks = dict() self.scheduler_tasks_history = defaultdict(dict) self._ttl_check_last_run = 0 self._ttl_reload_config_last_run = 0 self.config = ConfigStorage() self.tq_storage = None def install_bootstrap(self): try: # Check is it already installed objects = OrderedDict() for key in reversed(SETTINGS.BOOTSTRAP_TYPES): objects[key] = self.config.get_config(key) # Collect all bootstrap objects any_objects = False bootstrap_objects = [] for key, items in objects.items(): for item in items: any_objects = True if item.get('bootstrap'): bootstrap_objects.append((key, item)) if any_objects: if not bootstrap_objects: # There are no bootstrap objects -- clean db, it's very old db log.info( "Bootstrap install: remove all objects in db, it's very old db" ) for key in reversed(SETTINGS.BOOTSTRAP_TYPES): self.config.del_config(key) else: # We should remove only bootstrap objects log.info( 'Bootstrap install: remove {} old bootstrap objects in db' .format(len(bootstrap_objects))) for key, obj in bootstrap_objects: self.config.del_object(key, obj['_id']) # Install bootstrap update filename = os.path.join(SETTINGS.BASE_DIR, SETTINGS.BOOTSTRAP_FILE) bootstrap = ujson.load(open(filename, 'br')) log.info("Install new bootstrap objects") for key, objects in ((key, bootstrap.get(key.capitalize(), [])) for key in SETTINGS.BOOTSTRAP_TYPES): for obj in objects: obj['bootstrap'] = True self.config.add_object(key, obj) if not self.config.connection.get(SETTINGS.DEVCONFIG): self.config.connection.set( SETTINGS.DEVCONFIG, ujson.dumps(SETTINGS.DEVCONFIG_DATA)) log.info("Install bootstrap objects finished") except: log.error("Failed to install bootstrap", exc_info=True) @asyncio.coroutine def schedule_task(self, name, task_type, run_at, ttl, kwargs): """ Create Task object and add to Scheduled queue """ # Create and store Task object task = yield from self.tq_storage.create_task( name, task_type, run_at, ttl, kwargs, store_to=Task.STORE_TO_METRICS) yield from self.tq_storage.schedule_task(task) # Store next_run and scheduled_task_id in TaskHistory task = yield from self.tq_storage.create_scheduler_task_history( task, last_run=self.scheduler_tasks_history.get(task.name).get( 'last_run', 0), next_run=datetime_to_timestamp(run_at), scheduled_task_id=task.id) @asyncio.coroutine def _reload_config_tasks_list(self): """ Load list of tasks, details """ time_now = int(now()) if time_now - self._ttl_reload_config_last_run < 1000: # 1000 = 1sec return self._ttl_reload_config_last_run = time_now config_version = self.config.get_config_version() if config_version != self.config_version: log.info('Changes in actions list, update.') new_scheduler_tasks = self.config.get_scheduled_actions() new_keys = set(new_scheduler_tasks.keys()) - set( self.scheduler_tasks.keys()) deleted_keys = set(self.scheduler_tasks.keys()) - set( new_scheduler_tasks.keys()) if new_keys or deleted_keys: log.info( 'New actions list, new_keys={}, deleted_keys={}'.format( new_keys, deleted_keys)) self.scheduler_tasks = new_scheduler_tasks yield from self._load_scheduler_tasks_history() # Check scheduler_tasks_history here, please # Возможно, интервал запуска изменился с длинного на короткий # А у нас уже next_run стоит далеко в будущем for scheduled_task_name, scheduled_task_history in self.scheduler_tasks_history.items( ): # Смотри все таски, для которых сохранена инфорамция по шедулингу if scheduled_task_history.get( 'next_run', 0 ): # and (scheduled_task_name in self.scheduler_tasks): # Если есть запланированный таск if scheduled_task_name in self.scheduler_tasks: # Если у таска осталось расписание possible_next_run = datetime_to_timestamp( self._get_next_run_time( scheduled_task_name, self.scheduler_tasks[scheduled_task_name], int(now()))) else: # У таска не осталось расписания, next_run надо привести к 0 и больше ничего не делать possible_next_run = 0 if scheduled_task_history.get('next_run', 0) != possible_next_run: # Cancel scheduled task # Reset next_run task_id = scheduled_task_history.get( 'scheduled_task_id') log.info( 'Schedule changed for task with id={}, name={}, reschedule next_task' .format(task_id, scheduled_task_name)) key = SETTINGS.TASK_STORAGE_KEY.format(task_id).encode( 'utf-8') task_obj = yield from self.connection.delete([key]) scheduled_task_history['next_run'] = 0 scheduled_task_history['scheduled_task_id'] = 0 try: task_scheduler_obj = yield from self.connection.hget( SETTINGS.SCHEDULER_HISTORY_HASH, scheduled_task_name.encode('utf-8')) task_scheduler = SchedulerTaskHistory.deserialize( task_scheduler_obj) task_scheduler = task_scheduler._replace( next_run=0, scheduled_task_id=None) yield from self.connection.hset( SETTINGS.SCHEDULER_HISTORY_HASH, task_scheduler.name.encode('utf-8'), task_scheduler.serialize()) except: log.error( 'Broken SchedulerTaskHistory object for task id={}, delete it' .format(scheduled_task_name)) yield from self.connection.hdel( SETTINGS.SCHEDULER_HISTORY_HASH, task_scheduler.name.encode('utf-8')) # Удалился какой-то таск? Удалим его из мониторинга выполнения for key in deleted_keys: if key in self.scheduler_tasks_history: del self.scheduler_tasks_history[key] self.config_version = config_version @asyncio.coroutine def _load_scheduler_tasks_history(self): """ Load list of scheduled tasks tasks run times """ # Load run history for scheduled tasks tasks_history = yield from self.connection.hgetall( SETTINGS.SCHEDULER_HISTORY_HASH) new_keys = set() for f in tasks_history: key, value = yield from f key = key.decode('utf-8') new_keys.add(key) # Iterate over all tasks in history if key in self.scheduler_tasks: # Is task still in crontab? # Deserialize try: task_history = SchedulerTaskHistory.deserialize(value) except (pickle.UnpicklingError, EOFError, TypeError, ImportError): log.error('Cannot deserialize SchedulerTaskHistory for {}'. format(key), exc_info=True) continue self.scheduler_tasks_history[key].update( dict(last_run=task_history.last_run, next_run=task_history.next_run, scheduled_task_id=task_history.scheduled_task_id)) for key in set(self.scheduler_tasks_history.keys()) - new_keys: del self.scheduler_tasks_history[key] def _get_next_run_time(self, scheduler_task_name, scheduler_task, current_time): interval = parse_timetable(scheduler_task['schedule']) if not interval: return timestamp_to_datetime(0) scheduled_task_history = self.scheduler_tasks_history[ scheduler_task_name] next_run = scheduled_task_history.get('last_run', 0) + interval return timestamp_to_datetime( next_run if next_run > current_time else current_time) @asyncio.coroutine def _check_expired_tasks(self): time_now = int(now()) if time_now - self._ttl_check_last_run < 1000: # 1000 = 1sec return self._ttl_check_last_run = time_now TTL = SETTINGS.WORKER_TASK_TIMEOUT * 1000 for scheduled_task_name, scheduled_task_history in self.scheduler_tasks_history.items( ): scheduled_task = self.scheduler_tasks.get(scheduled_task_name) if (scheduled_task_history.get('next_run') and scheduled_task_history.get('scheduled_task_id') and (time_now - scheduled_task_history.get('next_run')) > (scheduled_task.get('ttl') or SETTINGS.WORKER_TASK_TIMEOUT) * 1000): task_id = scheduled_task_history.get('scheduled_task_id') log.info('Fix broken task id={}, name={}'.format( task_id, scheduled_task_name)) # Get task object from redis key key = SETTINGS.TASK_STORAGE_KEY.format( scheduled_task_history.get('scheduled_task_id')).encode( 'utf-8') task_obj = yield from self.connection.get(key) # Deserialize task object try: if not task_obj: raise TypeError() task = Task.deserialize(task_obj) if task.status != Task.SUCCESSFUL: # Update task object status task = task._replace(status=Task.FAILED) # Set new status to redis yield from self.connection.set( key, task.serialize(), expire=SETTINGS.TASK_STORAGE_EXPIRE) except TypeError as ex: task = None log.error("Wrong task id={}".format( scheduled_task_history.get('scheduled_task_id')), exc_info=True) yield from self.connection.delete([key]) # Publish message about finish (FAILED) if task: yield from self.connection.publish( SETTINGS.TASK_CHANNEL.format(task_id).encode('utf-8'), task.status.encode('utf-8')) else: yield from self.connection.publish( SETTINGS.TASK_CHANNEL.format(task_id).encode('utf-8'), Task.FAILED.encode('utf-8')) # Update scheduler information # Store next_run in scheduled try: task_scheduler_obj = yield from self.connection.hget( SETTINGS.SCHEDULER_HISTORY_HASH, scheduled_task_name.encode('utf-8')) task_scheduler = SchedulerTaskHistory.deserialize( task_scheduler_obj) if task and task.status == Task.SUCCESSFUL: scheduled_task_history[ 'last_run'] = scheduled_task_history.get( 'next_run', 0) scheduled_task_history['next_run'] = 0 task_scheduler = task_scheduler._replace( last_run=task_scheduler.next_run, next_run=0, scheduled_task_id=None) else: scheduled_task_history['next_run'] = 0 scheduled_task_history['scheduled_task_id'] = None task_scheduler = task_scheduler._replace( next_run=0, scheduled_task_id=None) yield from self.connection.hset( SETTINGS.SCHEDULER_HISTORY_HASH, task_scheduler.name.encode('utf-8'), task_scheduler.serialize()) except: # We lost SCHEDULER_HISTORY_HASH in db if task and task.status == Task.SUCCESSFUL: scheduled_task_history[ 'last_run'] = scheduled_task_history.get( 'next_run', 0) scheduled_task_history['next_run'] = 0 else: scheduled_task_history['next_run'] = 0 scheduled_task_history['scheduled_task_id'] = None @asyncio.coroutine def _ping_disptacher(self): # Publish message about new scheduled task yield from self.connection.publish( SETTINGS.SCHEDULER_TO_DISPATCHER_CHANNEL, b'') @asyncio.coroutine def sleep(self): try: reply = yield from self.subscription.next_published() except GeneratorExit: log.info('Stop subscription') return except: log.error( "Broker sleep timer, problems with read from subscription", exc_info=True) pass self.sleep_task = asyncio.Task(self.sleep()) @asyncio.coroutine def _cleanup_scheduled_history(self): # Clean hash table in redis for task with very old last-run and without next_run log.info("Run cleanup task for table Scheduled History") tasks_history = yield from self.connection.hgetall( SETTINGS.SCHEDULER_HISTORY_HASH) for f in tasks_history: key, value = yield from f # Iterate over all tasks in history and deserialize try: task_history = SchedulerTaskHistory.deserialize(value) except (pickle.UnpicklingError, EOFError, TypeError, ImportError): log.error( 'Cannot deserialize SchedulerTaskHistory for {}'.format( key), exc_info=True) continue if not task_history.next_run and ( int(now()) - task_history.last_run) > ( SETTINGS.SCHEDULED_HISTORY_CLEANUP_MAX_TTL * 1000): # task is too old, remove it log.info( 'Cleanup for Scheduled History table. Remove task, name={}' .format(task_history.name)) yield from self.connection.hdel( SETTINGS.SCHEDULER_HISTORY_HASH, [key]) self.current_loop.call_later(SETTINGS.SCHEDULED_HISTORY_CLEANUP_PERIOD, self._create_asyncio_task, self._cleanup_scheduled_history) def _create_asyncio_task(self, f, args=None, kwargs=None): # XXX Should be at BaseEventLoop, but i can't find it!!! args = args or () kwargs = kwargs or {} asyncio.Task(f(), *args, **kwargs) def bootstrap(self): log.info("Running scheduler loop") self.connection = yield from asyncio_redis.Pool.create( host='localhost', port=6379, encoder=asyncio_redis.encoders.BytesEncoder(), poolsize=5) self.tq_storage = TaskStorage(self.current_loop, self.connection) # Update objects in storage self.install_bootstrap() # Initialize worker-scheduler feedback subscription self.subscription = yield from self.connection.start_subscribe() yield from self.subscription.subscribe( [SETTINGS.WORKER_TO_SCHEDULER_CHANNEL]) self.sleep_task = asyncio.Task(self.sleep()) # Run scheduled history cleanup yield from self._cleanup_scheduled_history() def start(self, loop): self.current_loop = loop loop.add_signal_handler(signal.SIGINT, partial(self.stop, 'SIGINT')) loop.add_signal_handler(signal.SIGTERM, partial(self.stop, 'SIGTERM')) asyncio.Task(self.loop()) def stop(self, sig): log.info( "Got {} signal, we should finish all tasks and stop daemon".format( sig)) self.run = False self.current_loop.stop() @asyncio.coroutine def loop(self): yield from self.bootstrap() while self.run: try: # Inside a while loop, fetch scheduled tasks t_start = time.time() # May be reload config (limited to 1 per second) yield from self._reload_config_tasks_list() # Refresh scheduler run history yield from self._load_scheduler_tasks_history() # Kill expired tasks (broken worker) yield from self._check_expired_tasks() current_time = now() for scheduler_task_name, scheduler_task in self.scheduler_tasks.items( ): scheduled_task_history = self.scheduler_tasks_history[ scheduler_task_name] # Iterate over all recurrent tasks if (scheduled_task_history.get('next_run', 0) <= scheduled_task_history.get('last_run', 0)): log.debug('Got unscheduled task {}'.format( scheduler_task_name)) # Task is not scheduled/executed now, so need to schedule next_run_dt = self._get_next_run_time( scheduler_task_name, scheduler_task, int(current_time)) log.debug('Next run {} for task {}'.format( next_run_dt, scheduler_task_name)) yield from self.schedule_task( name=scheduler_task_name, task_type=Task.TYPE_REGULAR, run_at=next_run_dt, ttl=scheduler_task.get('ttl') or SETTINGS.WORKER_TASK_TIMEOUT, kwargs=scheduler_task) yield from self._ping_disptacher() t_end = time.time() delay = SETTINGS.SCHEDULER_PULL_TIMEOUT - (t_end - t_start) if delay > 0: # Sleep for timeout or new push from scheduler try: yield from asyncio.wait([self.sleep_task], timeout=delay) except GeneratorExit: break except: log.error("Unexpected error in scheduler loop!", exc_info=True) self.current_loop.stop() self.connection.close() log.info('Bye-bye!')
class MetricsCollector(object): def __init__(self): self.connection = None self.subscription = None self.current_loop = None self.config_version = 0 self.run = True self.storage = ConfigStorage() self.metrics_storage = MetricsStorage() self.db_log = LoggingStorage() self._lcache = {} self.metrics = {} self.actions_id_to_metrics = defaultdict(list) self._reload_metrics_config_last_run = 0 @asyncio.coroutine def bootstrap(self): log.info("Running metrics collector loop") self.connection = yield from asyncio_redis.Pool.create( host='localhost', port=6379, encoder=asyncio_redis.encoders.BytesEncoder(), poolsize=3) # Setup subscription to action results self.subscription = yield from self.connection.start_subscribe() yield from self.subscription.psubscribe([ SETTINGS.ACTION_RESULTS_CHANNEL.format("*").encode('utf-8'), SETTINGS.CONNECTION_RESULTS_CHANNEL.format("*").encode('utf-8') ]) yield from self._reload_config() def start(self, loop): self.current_loop = loop loop.add_signal_handler(signal.SIGINT, partial(self.stop, 'SIGINT')) loop.add_signal_handler(signal.SIGTERM, partial(self.stop, 'SIGTERM')) asyncio.Task(self.loop()) def stop(self, sig): log.info( "Got {} signal, we should finish all tasks and stop daemon".format( sig)) self.run = False self.current_loop.stop() @asyncio.coroutine def loop(self): yield from self.bootstrap() while self.run: metrics = [] # Wait for new message try: reply = yield from self.subscription.next_published() except GeneratorExit: break log.debug('Got new message, channel={}'.format(reply.channel)) # Load metrics list yield from self._reload_config() # Decode new message try: channel_type, object_id = yield from self._decode_message( reply) results = pickle.loads(reply.value) task = results['task'] values = results['result'] except Exception: log.error( "Cannon load data from message in channel={}, data={}". format(reply.channel, reply.value), exc_info=True) continue # Process metrics if channel_type == 'actions-results': metrics = self.actions_id_to_metrics.get(object_id, []) elif channel_type == 'connections-results': # Skip empty lines for connection grep if not values.get('stdout'): continue metrics = self.connections_id_to_metrics.get(object_id, []) else: log.error( 'Unexpected metric-channel type={}'.format(channel_type)) continue for metric_id in metrics: asyncio.Task( self.store_metric_value(metric_id, object_id, task, values)) self.current_loop.stop() self.connection.close() log.info('Bye-bye!') @asyncio.coroutine def _decode_message(self, msg): action_mask = SETTINGS.ACTION_RESULTS_CHANNEL.replace("{}", "") connection_mask = SETTINGS.CONNECTION_RESULTS_CHANNEL.replace("{}", "") channel = msg.channel.decode('utf-8') if channel.startswith(action_mask): return 'actions-results', channel[len(action_mask):] elif channel.startswith(connection_mask): return 'connections-results', channel[len(connection_mask):] else: return '', channel @asyncio.coroutine def _reload_config(self): time_now = int(now()) if time_now - self._reload_metrics_config_last_run < 1000: # 1000 = 1sec return self._reload_metrics_config_last_run = time_now config_version = self.storage.get_config_version() if config_version != self.config_version: yield from self._reload_metrics() self.config_version = config_version @asyncio.coroutine def _reload_metrics(self): new_metrics = self.storage.list_metrics() self.metrics = new_metrics self.actions_id_to_metrics = defaultdict(list) self.connections_id_to_metrics = defaultdict(list) for metric_id, metric in new_metrics.items(): if 'action_id' in metric: self.actions_id_to_metrics[metric.get('action_id')].append( metric_id) elif 'connection_id' in metric: self.connections_id_to_metrics[metric.get( 'connection_id')].append(metric_id) self._lcache = {} log.info('Loaded {} metrics'.format(len(new_metrics))) """ TASKS """ @asyncio.coroutine def store_metric_value(self, metric_id, object_id, task, values): log.debug( 'store_metric_value {} for action/connection {} by task {}'.format( metric_id, object_id, task['id'])) exit_codes = values.get('exit_codes') stdout = values.get('stdout') metric = self.metrics.get(metric_id) value = self.parse_value(metric, stdout) log.debug('Metric (id={}) parsed value: {}'.format(metric_id, value)) if value is None: logging.error( "No parser match for metric {}, nothing to store".format( metric_id)) self.db_log.error("Пустое значение после фильтрации", stdout, "metric", metric_id) return converter = lambda x: x # Convert metric type if metric['type'] == 'boolean': value = self.cast_to_boolean(metric_id, metric, value) else: converter = SETTINGS.METRICS_TYPES_MAP[metric['type']] try: value = converter(value) except ValueError: log.error( "Wrong value for metric '{}', cannot convert to {}".format( metric_id, metric['type']), exc_info=True) self.db_log.error( "Не удалось привести тип значения к {}".format( metric['type']), str(value), "metric", metric_id) return # Trim strings if isinstance(value, str): value = value[:SETTINGS.METRIC_STRING_LIMIT] # Apply multiplier multiplier = metric.get('multiplier', None) try: if multiplier and metric['type'] in SETTINGS.METRIC_NUMERICAL_TYPES: multiplier = float(multiplier) value = value * multiplier # If it is int, convert to int value = converter(value) except: log.error('Cannot apply multiplier', exc_info=True) self.db_log.error("Не удалось применить множитель", str(value), "metric", metric_id) return timestamp = datetime_to_timestamp(task['run_at']) skip_interval = parse_timetable(metric.get('limit_duplicate_save', '')) if skip_interval: prev_val, prev_timestamp = self._lcache.get(metric_id, (None, 0)) if (prev_val == value) and (timestamp - prev_timestamp) < skip_interval: return True else: self._lcache[metric_id] = (value, datetime_to_timestamp( task['run_at'])) log.info('Store value="{}" for metric {}'.format(value, metric_id)) try: self.metrics_storage.store_metric(metric_id, value, time=task['run_at']) yield from self.connection.hset( SETTINGS.LAST_VALUES_HASH, metric_id.encode('utf-8'), ujson.dumps({ 'value': value, 'timestamp': timestamp }).encode('utf-8')) except: log.error('Cannot store metric value, storage exception', exc_info=True) return # Publish message about finish yield from self.connection.publish( SETTINGS.METRICS_CHANNEL.format(metric_id).encode('utf-8'), b'') return True def parse_value(self, metric, stdout): stdout_lines = stdout.split('\n') line_regexp = metric.get('line_regexp') line_numbers = str(metric.get('line_numbers', '')) word_regexp = metric.get('word_regexp') word_numbers = str(metric.get('word_numbers', '')) lines_str = None lines_no = set() if line_regexp: regexp = re.compile(line_regexp) for i, stdout_line in enumerate(stdout_lines, 1): if regexp.search(stdout_line): lines_no.add(i) if line_numbers: line_values = line_numbers.split(',') for line_value in line_values: if ':' in line_value: start, finish = map(int, line_value.split(':')) for i in range(start, finish + 1): lines_no.add(i) else: lines_no.add(int(line_value)) if (line_regexp or line_numbers): if lines_no: lines_no = sorted(list(lines_no)) lines = [] total_lines = len(stdout_lines) for line_no in lines_no: if line_no > total_lines: continue lines.append(stdout_lines[line_no - 1]) lines_str = '\n'.join(lines) else: lines_str = stdout if not lines_str: return None if word_regexp: match = re.findall(word_regexp, lines_str) if not match: return None return match[0] elif word_numbers: words_range = None if ':' in word_numbers: start, finish = map(int, word_numbers.split(':')) words_range = int(start) - 1, int(finish) - 1 else: words_range = int(word_numbers) - 1, int(word_numbers) - 1 else: return lines_str stdout_words = list( filter(lambda x: x is not None, SPLIT_RE.split(lines_str))) stdout_words = [ x for sublist in map(lambda word: SPLIT_NEG_RE.split(word), stdout_words) for x in sublist ] # Frontend do not count \t, ' ' and '' words :( skip_cnt = 0 words_no_map = {} for i, word in enumerate(stdout_words): if word == '' or SKIP_RE.match(word): skip_cnt += 1 continue words_no_map[i - skip_cnt] = i start = words_no_map.get(words_range[0], 0) finish = words_no_map.get(words_range[1], len(stdout_words) - 1) + 1 result_words = stdout_words[start:finish] words_str = ''.join(result_words) return words_str def cast_to_boolean(self, metric_id, metric, value): try: condition = metric['function'] cmp_value = metric['value'] except Exception: log.error( 'Boolean metric (id={}) without condition!'.format(metric_id)) return if condition not in SETTINGS.CONDITIONS_CMP_FUNCTIONS.keys(): log.error( "Cannot convert value for metric '{}' to bool: wrong function '{}'" .format(metric_id, condition)) self.db_log.error( "Не удалось привести значение к булевой метрике, невреная функция '{}'" .format(condition), str(value), "metric", metric_id) return if condition in SETTINGS.CONDITIONS_NUMBERIC: # Cast values to float try: value = float(value) except (ValueError, TypeError): log.error( "Wrong value for metric '{}', cannot convert '{}' to float before comparasion" .format(metric_id, value), exc_info=True) self.db_log.error( "Не удалось привести значение метрики к дробному типу для проведения сравнения", str(value), "metric", metric_id) return try: cmp_value = float(cmp_value) except (ValueError, TypeError): log.error( "Wrong value for metric '{}', cannot convert comparasion value '{}' to float before comparasion" .format(metric_id, cmp_value), exc_info=True) self.db_log.error( "Cannot convert comparasion value to float before comparasion", str(cmp_value), "metric", metric_id) return elif condition in SETTINGS.CONDITIONS_BOOLEAN and not isinstance( value, bool): log.error( "Wrong value for metric '{}', for booleans comparasion it should be boolean, not '{}'" .format(metric_id, value)) self.db_log.error( "For boolean comparasion value should be boolean, not '{}'". format(value), str(value), "metric", metric_id) return elif condition in SETTINGS.CONDITIONS_STRINGS and not isinstance( value, str): log.error( "Wrong value for metric '{}', for strings comparasion it should be string, not '{}'" .format(metric_id, value)) self.db_log.error( "For strings comparasion value should be strings, not '{}'". format(value), str(value), "metric", metric_id) return try: result = SETTINGS.CONDITIONS_CMP_FUNCTIONS[condition](value, cmp_value) except: log.error("Cannot compare values: '{}' {} '{}'".format( value, condition, cmp_value)) self.db_log.error( "Не удалось сравнить значения: '{}' {} '{}'".format( value, condition, cmp_value), None, "metric", metric_id) return None return (1 if result else 0)
class Trigger(object): def __init__(self): self.connection = None self.subscription = None self.current_loop = None self.config_version = 0 self.run = True self.config = ConfigStorage() self.db_log = LoggingStorage() self.tq_storage = None self.triggers = {} self.metrics_id_to_triggers = defaultdict(list) self._reload_triggers_config_last_run = 0 def bootstrap(self): log.info("Running trigger loop") self.connection = yield from asyncio_redis.Pool.create(host='localhost', port=6379, encoder=asyncio_redis.encoders.BytesEncoder(), poolsize=3) self.tq_storage = TaskStorage(self.current_loop, self.connection) # Setup subscription to action results self.subscription = yield from self.connection.start_subscribe() yield from self.subscription.psubscribe([ SETTINGS.METRICS_CHANNEL.format('*').encode('utf-8') ]) def start(self, loop): self.current_loop = loop loop.add_signal_handler(signal.SIGINT, partial(self.stop, 'SIGINT')) loop.add_signal_handler(signal.SIGTERM, partial(self.stop, 'SIGTERM')) asyncio.Task(self.loop()) def stop(self, sig): log.info("Got {} signal, we should finish all tasks and stop daemon".format(sig)) self.run = False self.current_loop.stop() @asyncio.coroutine def get_new_message(self): reply = yield from self.subscription.next_published() @asyncio.coroutine def loop(self): yield from self.bootstrap() while self.run: # Load triggers list yield from self._reload_config() # Wait for new message try: reply = yield from self.subscription.next_published() except GeneratorExit: log.info('Stop subscription') break log.debug('Got new message, channel={}'.format(reply.channel)) # Decode new message try: _, metric_id = yield from self._decode_message(reply) except Exception: log.error("Cannon load data from message in channel={}".format(reply.channel), exc_info=True) # Process triggers triggers = self.metrics_id_to_triggers.get(metric_id, []) for trigger_id in triggers: asyncio.Task(self.check_trigger(trigger_id, metric_id)) self.current_loop.stop() self.connection.close() log.info('Bye-bye!') @asyncio.coroutine def _decode_message(self, msg): metrics_mask = SETTINGS.METRICS_CHANNEL.replace("{}", "") channel = msg.channel.decode('utf-8') if channel.startswith(metrics_mask): return 'metrics-results', channel[len(metrics_mask):] else: raise Exception() # return '', channel @asyncio.coroutine def _reload_config(self): time_now = int(now()) if time_now - self._reload_triggers_config_last_run < 1000: # 1000 = 1sec return self._reload_triggers_config_last_run = time_now config_version = self.config.get_config_version() if config_version != self.config_version: yield from self._reload_triggers() self.config_version = config_version @asyncio.coroutine def _reload_triggers(self): new_triggers = self.config.list_triggers() self.triggers = new_triggers self.metrics_id_to_triggers = defaultdict(list) for trigger_id, trigger in new_triggers.items(): for condition in trigger.get('conditions', []): self.metrics_id_to_triggers[condition.get('metric_id')].append(trigger_id) log.info('Loaded {} triggers'.format(len(new_triggers))) @asyncio.coroutine def _activate_trigger(self, trigger): self.db_log.info("Триггер был запущен", None, "trigger", trigger['_id']) log.debug("_activare_trigger for {}".format(trigger['_id'])) for action_obj in trigger['scenario']: # Extract values from trigger into dict (param_name -> param_value) params_values = {param.get('param'): param.get('value') for param in action_obj['params']} # Get action with binded params action = self.config.get_action(action_obj['action_id'], initial_param_values=params_values, connection_id=trigger.get('connection_id')) log.debug('Trigger {}: create task for action {} with params {}'.format(trigger['_id'], action['_id'], params_values)) task = yield from self.tq_storage.create_task(name=action['_id'], task_type=Task.TYPE_TRIGGERED, run_at=datetime.datetime.now(), ttl=action.get('ttl') or SETTINGS.WORKER_TASK_TIMEOUT, kwargs=action, store_to=Task.STORE_TO_METRICS) yield from self.tq_storage.schedule_task(task) """ TASKS """ @asyncio.coroutine def check_trigger(self, trigger_id, metric_id): log.info('Check trigger {} for metric {}'.format(trigger_id, metric_id)) trigger = self.triggers[trigger_id] last_values = yield from self.tq_storage.get_metric_last_values(trigger['depends_on']) log.debug('Trigger {}: last values is {}'.format(trigger_id, last_values)) checks = ((trigger_id, cond.get('value', ''), cond['function'], last_values.get(cond['metric_id'])) for cond in trigger['conditions']) is_triggered = all(map(lambda check: self.check_condition(*check), checks)) if is_triggered: # Check lock log.info('Trigger {} is activated!'.format(trigger_id)) locked = yield from self.tq_storage.lock_trigger(trigger_id) if locked > 1: log.info('Trigger {} is locked {} times, do not perform action'.format(trigger_id, locked)) return # Perform action on trigger, it's not locked yield from self._activate_trigger(trigger) else: # Unlock trigger here log.debug('Trigger {} is NOT activated, try to unlock!'.format(trigger_id)) yield from self.tq_storage.unlock_trigger(trigger_id) def check_condition(self, trigger_id, cmp_value, condition, value): if condition not in SETTINGS.CONDITIONS_CMP_FUNCTIONS.keys(): log.error("Cannot determine condition for trigger '{}': wrong function '{}'".format(trigger_id, condition)) return False if condition in SETTINGS.CONDITIONS_NUMBERIC: try: value = float(value) except (ValueError, TypeError): log.error("Wrong value for trigger '{}', cannot convert metric value '{}' to float before comparasion".format(trigger_id, cmp_value), exc_info=True) self.db_log.error("Cannot convert metric value to float before comparasion", str(value), "trigger", trigger_id) return False try: cmp_value = float(cmp_value) except (ValueError, TypeError): log.error("Wrong value for trigger '{}', cannot convert comparasion value '{}' to float before comparasion".format(trigger_id, cmp_value), exc_info=True) self.db_log.error("Cannot convert comparasion value to float before comparasion", str(cmp_value), "trigger", trigger_id) return False elif condition in SETTINGS.CONDITIONS_BOOLEAN: try: value = bool(int(value)) except: log.error("Wrong value for trigger '{}', can't cast value to boolean '{}'".format(trigger_id, value)) self.db_log.error("Can't cast value to boolean '{}'".format(value), str(value), "trigger", trigger_id) return False elif condition in SETTINGS.CONDITIONS_STRINGS and not isinstance(value, str): log.error("Wrong value for trigger '{}', for strings comparasion it should be string, not '{}'".format(trigger_id, value)) self.db_log.error("For strings comparasion value should be strings, not '{}'".format(value), str(value), "trigger", trigger_id) return False try: result = SETTINGS.CONDITIONS_CMP_FUNCTIONS[condition](value, cmp_value) log.debug("Compare values: '{}' {} '{}'".format(value, condition, cmp_value)) except: log.error("Cannot compare values: '{}' {} '{}'".format(value, condition, cmp_value)) self.db_log.error("Cannot compare values: '{}' {} '{}'".format(value, condition, cmp_value), None, "trigger", trigger_id) return False return result
def setUp(self): self.storage = ConfigStorage()
class ConfigStorageTestCase(unittest.TestCase): def setUp(self): self.storage = ConfigStorage() def tearDown(self): del self.storage def test_parse_action(self): """ Test _parse_action method """ # Example: # o---> B1 ---o # / \ # C1 +--> EXEC # \ / # o---> B2 ---o # actions_dict = {action.get('_id'): action for action in actions_example1} result = self.storage._parse_action(actions_dict, actions_dict.get('c1')) self.assertEquals(len(result.get('scenario')), 2) for (test, action) in zip(('b1', 'b2'), result.get('scenario')): # Из айди получился экшн self.assertTrue(action.get('action')) self.assertTrue(action.get('action').get('_id'), test) b1_inner = result.get('scenario')[0].get('action') self.assertEquals(len(b1_inner.get('scenario')), 1) for action in b1_inner.get('scenario'): # Из айди получился экшн self.assertTrue(action.get('action')) self.assertEquals(action.get('action').get('_id'), 'exec') self.assertEquals(action.get('action').get('connection_id'), 'B1C') self.assertEquals(b1_inner.get('connection_id'), b1_inner.get('scenario')[0]['action']['connection_id']) b2_inner = result.get('scenario')[1].get('action') self.assertEquals(len(b2_inner.get('scenario')), 1) for action in b2_inner.get('scenario'): # Из айди получился экшн self.assertTrue(action.get('action')) self.assertEquals(action.get('action').get('_id'), 'exec') self.assertEquals(action.get('action').get('connection_id'), 'B2C') self.assertEquals(b1_inner.get('connection_id'), b1_inner.get('scenario')[0]['action']['connection_id']) self.assertEquals(result['scenario'][1]['action']['scenario'][0]['action']['params'][0]['value'], 'xxx B2_PARAM_VAL yyy') # Example 2: actions_dict = {action.get('_id'): action for action in actions_example2} result = self.storage._parse_action(actions_dict, actions_dict.get('ping-ya-ru')) self.assertEquals(result['scenario'][0]['action']['scenario'][0]['action']['params'][0]['value'], 'ping -c 2 -i 2 ya.ru') # Example 3, empty connection: actions_dict = {action.get('_id'): action for action in actions_example3} result = self.storage._parse_action(actions_dict, actions_dict.get('b1')) self.assertIsNone(result['scenario'][0]['action'].get('connection_id')) def test_parse_action_resursion(self): actions_dict = {action.get('_id'): action for action in actions_recursion} with self.assertRaises(ActionResursion): self.storage._parse_action(actions_dict, actions_dict.get('b1')) def test_parse_action_wrong_ref(self): actions_dict = {action.get('_id'): action for action in actions_wrong_ref} with mock.patch.object(self.storage.log, 'error') as m: result = self.storage._parse_action(actions_dict, actions_dict.get('b1')) self.assertEquals(m.call_count, 1) self.assertFalse('action' in result['scenario'][0]) def test_get_scheduled_actions(self): """ Test get_scheduled_actions method """ # For example1 actions with mock.patch('storage.redis.ConfigStorage.get_config', return_value=actions_example1) as m: result = self.storage.get_scheduled_actions() self.assertEquals(m.call_count, 1) self.assertEquals(len(result.items()), 1) self.assertTrue('c1' in result) self.assertEquals(result['c1']['schedule'], '1u') # For actions with recursion with mock.patch('storage.redis.ConfigStorage.get_config', return_value=actions_recursion) as m: result = self.storage.get_scheduled_actions() self.assertEquals(m.call_count, 1) self.assertEquals(len(result.items()), 0) def test_get_action(self): """ Test get_action method """ with mock.patch('storage.redis.ConfigStorage.get_config', return_value=actions_example1) as m: result = self.storage.get_action('c1') self.assertEquals(m.call_count, 1) self.assertEquals(result['_id'], 'c1') with mock.patch('storage.redis.ConfigStorage.get_config', return_value=actions_example1) as m: result = self.storage.get_action('no-action') self.assertEquals(m.call_count, 1) self.assertEquals(result, None) with mock.patch('storage.redis.ConfigStorage.get_config', return_value=actions_example1) as m: result = self.storage.get_action('exec', initial_param_values={"Команда": "ps -aux"}, connection_id="C1") self.assertEquals(m.call_count, 1) self.assertEquals(result['params'][0]['param'], 'Команда') self.assertEquals(result['params'][0]['value'], 'ps -aux') self.assertEquals(result['connection_id'], 'C1') def test_regression_sms(self): # action = send_sms_actions['WRkMC5vPiWPjoT7LJ'] with mock.patch('storage.redis.ConfigStorage.get_config', return_value=send_sms_actions) as m: result = self.storage.get_action('WRkMC5vPiWPjoT7LJ') self.assertEquals(m.call_count, 1) self.assertEquals(result['scenario'][0]['action']['scenario'][0]['action']['params'][0]['value'], 'AT+CMGF=1') self.assertEquals(result['scenario'][0]['action']['scenario'][1]['action']['params'][0]['value'], 'AT+CMGS="+79265225983"') self.assertEquals(result['scenario'][0]['action']['scenario'][2]['action']['params'][0]['value'], 'test sms<CTRL+Z>')
class Scheduler(object): def __init__(self): self.current_loop = None self.connection = None self.run = True self.subscription = None self.sleep_task = None self.config_version = 0 self.scheduler_tasks = dict() self.scheduler_tasks_history = defaultdict(dict) self._ttl_check_last_run = 0 self._ttl_reload_config_last_run = 0 self.config = ConfigStorage() self.tq_storage = None def install_bootstrap(self): try: # Check is it already installed objects = OrderedDict() for key in reversed(SETTINGS.BOOTSTRAP_TYPES): objects[key] = self.config.get_config(key) # Collect all bootstrap objects any_objects = False bootstrap_objects = [] for key, items in objects.items(): for item in items: any_objects = True if item.get('bootstrap'): bootstrap_objects.append( (key, item) ) if any_objects: if not bootstrap_objects: # There are no bootstrap objects -- clean db, it's very old db log.info("Bootstrap install: remove all objects in db, it's very old db") for key in reversed(SETTINGS.BOOTSTRAP_TYPES): self.config.del_config(key) else: # We should remove only bootstrap objects log.info('Bootstrap install: remove {} old bootstrap objects in db'.format(len(bootstrap_objects))) for key, obj in bootstrap_objects: self.config.del_object(key, obj['_id']) # Install bootstrap update filename = os.path.join(SETTINGS.BASE_DIR, SETTINGS.BOOTSTRAP_FILE) bootstrap = ujson.load(open(filename, 'br')) log.info("Install new bootstrap objects") for key, objects in ( (key, bootstrap.get(key.capitalize(), [])) for key in SETTINGS.BOOTSTRAP_TYPES ): for obj in objects: obj['bootstrap'] = True self.config.add_object(key, obj) if not self.config.connection.get(SETTINGS.DEVCONFIG): self.config.connection.set(SETTINGS.DEVCONFIG, ujson.dumps(SETTINGS.DEVCONFIG_DATA)) log.info("Install bootstrap objects finished") except: log.error("Failed to install bootstrap", exc_info=True) @asyncio.coroutine def schedule_task(self, name, task_type, run_at, ttl, kwargs): """ Create Task object and add to Scheduled queue """ # Create and store Task object task = yield from self.tq_storage.create_task(name, task_type, run_at, ttl, kwargs, store_to=Task.STORE_TO_METRICS) yield from self.tq_storage.schedule_task(task) # Store next_run and scheduled_task_id in TaskHistory task = yield from self.tq_storage.create_scheduler_task_history(task, last_run=self.scheduler_tasks_history.get(task.name).get('last_run', 0), next_run=datetime_to_timestamp(run_at), scheduled_task_id=task.id) @asyncio.coroutine def _reload_config_tasks_list(self): """ Load list of tasks, details """ time_now = int(now()) if time_now - self._ttl_reload_config_last_run < 1000: # 1000 = 1sec return self._ttl_reload_config_last_run = time_now config_version = self.config.get_config_version() if config_version != self.config_version: log.info('Changes in actions list, update.') new_scheduler_tasks = self.config.get_scheduled_actions() new_keys = set(new_scheduler_tasks.keys()) - set(self.scheduler_tasks.keys()) deleted_keys = set(self.scheduler_tasks.keys()) - set(new_scheduler_tasks.keys()) if new_keys or deleted_keys: log.info('New actions list, new_keys={}, deleted_keys={}'.format(new_keys, deleted_keys)) self.scheduler_tasks = new_scheduler_tasks yield from self._load_scheduler_tasks_history() # Check scheduler_tasks_history here, please # Возможно, интервал запуска изменился с длинного на короткий # А у нас уже next_run стоит далеко в будущем for scheduled_task_name, scheduled_task_history in self.scheduler_tasks_history.items(): # Смотри все таски, для которых сохранена инфорамция по шедулингу if scheduled_task_history.get('next_run', 0): # and (scheduled_task_name in self.scheduler_tasks): # Если есть запланированный таск if scheduled_task_name in self.scheduler_tasks: # Если у таска осталось расписание possible_next_run = datetime_to_timestamp(self._get_next_run_time(scheduled_task_name, self.scheduler_tasks[scheduled_task_name], int(now()))) else: # У таска не осталось расписания, next_run надо привести к 0 и больше ничего не делать possible_next_run = 0 if scheduled_task_history.get('next_run', 0) != possible_next_run: # Cancel scheduled task # Reset next_run task_id = scheduled_task_history.get('scheduled_task_id') log.info('Schedule changed for task with id={}, name={}, reschedule next_task'.format(task_id, scheduled_task_name)) key = SETTINGS.TASK_STORAGE_KEY.format(task_id).encode('utf-8') task_obj = yield from self.connection.delete([key]) scheduled_task_history['next_run'] = 0 scheduled_task_history['scheduled_task_id'] = 0 try: task_scheduler_obj = yield from self.connection.hget(SETTINGS.SCHEDULER_HISTORY_HASH, scheduled_task_name.encode('utf-8')) task_scheduler = SchedulerTaskHistory.deserialize(task_scheduler_obj) task_scheduler = task_scheduler._replace(next_run=0, scheduled_task_id=None) yield from self.connection.hset(SETTINGS.SCHEDULER_HISTORY_HASH, task_scheduler.name.encode('utf-8'), task_scheduler.serialize()) except: log.error('Broken SchedulerTaskHistory object for task id={}, delete it'.format(scheduled_task_name)) yield from self.connection.hdel(SETTINGS.SCHEDULER_HISTORY_HASH, task_scheduler.name.encode('utf-8')) # Удалился какой-то таск? Удалим его из мониторинга выполнения for key in deleted_keys: if key in self.scheduler_tasks_history: del self.scheduler_tasks_history[key] self.config_version = config_version @asyncio.coroutine def _load_scheduler_tasks_history(self): """ Load list of scheduled tasks tasks run times """ # Load run history for scheduled tasks tasks_history = yield from self.connection.hgetall(SETTINGS.SCHEDULER_HISTORY_HASH) new_keys = set() for f in tasks_history: key, value = yield from f key = key.decode('utf-8') new_keys.add(key) # Iterate over all tasks in history if key in self.scheduler_tasks: # Is task still in crontab? # Deserialize try: task_history = SchedulerTaskHistory.deserialize(value) except (pickle.UnpicklingError, EOFError, TypeError, ImportError): log.error('Cannot deserialize SchedulerTaskHistory for {}'.format(key), exc_info=True) continue self.scheduler_tasks_history[key].update(dict(last_run=task_history.last_run, next_run=task_history.next_run, scheduled_task_id=task_history.scheduled_task_id)) for key in set(self.scheduler_tasks_history.keys()) - new_keys: del self.scheduler_tasks_history[key] def _get_next_run_time(self, scheduler_task_name, scheduler_task, current_time): interval = parse_timetable(scheduler_task['schedule']) if not interval: return timestamp_to_datetime(0) scheduled_task_history = self.scheduler_tasks_history[scheduler_task_name] next_run = scheduled_task_history.get('last_run', 0) + interval return timestamp_to_datetime(next_run if next_run > current_time else current_time) @asyncio.coroutine def _check_expired_tasks(self): time_now = int(now()) if time_now - self._ttl_check_last_run < 1000: # 1000 = 1sec return self._ttl_check_last_run = time_now TTL = SETTINGS.WORKER_TASK_TIMEOUT * 1000 for scheduled_task_name, scheduled_task_history in self.scheduler_tasks_history.items(): scheduled_task = self.scheduler_tasks.get(scheduled_task_name) if (scheduled_task_history.get('next_run') and scheduled_task_history.get('scheduled_task_id') and (time_now - scheduled_task_history.get('next_run')) > (scheduled_task.get('ttl') or SETTINGS.WORKER_TASK_TIMEOUT)*1000): task_id = scheduled_task_history.get('scheduled_task_id') log.info('Fix broken task id={}, name={}'.format(task_id, scheduled_task_name)) # Get task object from redis key key = SETTINGS.TASK_STORAGE_KEY.format(scheduled_task_history.get('scheduled_task_id')).encode('utf-8') task_obj = yield from self.connection.get(key) # Deserialize task object try: if not task_obj: raise TypeError() task = Task.deserialize(task_obj) if task.status != Task.SUCCESSFUL: # Update task object status task = task._replace(status=Task.FAILED) # Set new status to redis yield from self.connection.set(key, task.serialize(), expire=SETTINGS.TASK_STORAGE_EXPIRE) except TypeError as ex: task = None log.error("Wrong task id={}".format(scheduled_task_history.get('scheduled_task_id')), exc_info=True) yield from self.connection.delete([key]) # Publish message about finish (FAILED) if task: yield from self.connection.publish(SETTINGS.TASK_CHANNEL.format(task_id).encode('utf-8'), task.status.encode('utf-8')) else: yield from self.connection.publish(SETTINGS.TASK_CHANNEL.format(task_id).encode('utf-8'), Task.FAILED.encode('utf-8')) # Update scheduler information # Store next_run in scheduled try: task_scheduler_obj = yield from self.connection.hget(SETTINGS.SCHEDULER_HISTORY_HASH, scheduled_task_name.encode('utf-8')) task_scheduler = SchedulerTaskHistory.deserialize(task_scheduler_obj) if task and task.status == Task.SUCCESSFUL: scheduled_task_history['last_run'] = scheduled_task_history.get('next_run', 0) scheduled_task_history['next_run'] = 0 task_scheduler = task_scheduler._replace(last_run=task_scheduler.next_run, next_run=0, scheduled_task_id=None) else: scheduled_task_history['next_run'] = 0 scheduled_task_history['scheduled_task_id'] = None task_scheduler = task_scheduler._replace(next_run=0, scheduled_task_id=None) yield from self.connection.hset(SETTINGS.SCHEDULER_HISTORY_HASH, task_scheduler.name.encode('utf-8'), task_scheduler.serialize()) except: # We lost SCHEDULER_HISTORY_HASH in db if task and task.status == Task.SUCCESSFUL: scheduled_task_history['last_run'] = scheduled_task_history.get('next_run', 0) scheduled_task_history['next_run'] = 0 else: scheduled_task_history['next_run'] = 0 scheduled_task_history['scheduled_task_id'] = None @asyncio.coroutine def _ping_disptacher(self): # Publish message about new scheduled task yield from self.connection.publish(SETTINGS.SCHEDULER_TO_DISPATCHER_CHANNEL, b'') @asyncio.coroutine def sleep(self): try: reply = yield from self.subscription.next_published() except GeneratorExit: log.info('Stop subscription') return except: log.error("Broker sleep timer, problems with read from subscription", exc_info=True) pass self.sleep_task = asyncio.Task(self.sleep()) @asyncio.coroutine def _cleanup_scheduled_history(self): # Clean hash table in redis for task with very old last-run and without next_run log.info("Run cleanup task for table Scheduled History") tasks_history = yield from self.connection.hgetall(SETTINGS.SCHEDULER_HISTORY_HASH) for f in tasks_history: key, value = yield from f # Iterate over all tasks in history and deserialize try: task_history = SchedulerTaskHistory.deserialize(value) except (pickle.UnpicklingError, EOFError, TypeError, ImportError): log.error('Cannot deserialize SchedulerTaskHistory for {}'.format(key), exc_info=True) continue if not task_history.next_run and (int(now()) - task_history.last_run) > (SETTINGS.SCHEDULED_HISTORY_CLEANUP_MAX_TTL * 1000): # task is too old, remove it log.info('Cleanup for Scheduled History table. Remove task, name={}'.format(task_history.name)) yield from self.connection.hdel(SETTINGS.SCHEDULER_HISTORY_HASH, [key]) self.current_loop.call_later(SETTINGS.SCHEDULED_HISTORY_CLEANUP_PERIOD, self._create_asyncio_task, self._cleanup_scheduled_history) def _create_asyncio_task(self, f, args=None, kwargs=None): # XXX Should be at BaseEventLoop, but i can't find it!!! args = args or () kwargs = kwargs or {} asyncio.Task(f(), *args, **kwargs) def bootstrap(self): log.info("Running scheduler loop") self.connection = yield from asyncio_redis.Pool.create(host='localhost', port=6379, encoder=asyncio_redis.encoders.BytesEncoder(), poolsize=5) self.tq_storage = TaskStorage(self.current_loop, self.connection) # Update objects in storage self.install_bootstrap() # Initialize worker-scheduler feedback subscription self.subscription = yield from self.connection.start_subscribe() yield from self.subscription.subscribe([SETTINGS.WORKER_TO_SCHEDULER_CHANNEL]) self.sleep_task = asyncio.Task(self.sleep()) # Run scheduled history cleanup yield from self._cleanup_scheduled_history() def start(self, loop): self.current_loop = loop loop.add_signal_handler(signal.SIGINT, partial(self.stop, 'SIGINT')) loop.add_signal_handler(signal.SIGTERM, partial(self.stop, 'SIGTERM')) asyncio.Task(self.loop()) def stop(self, sig): log.info("Got {} signal, we should finish all tasks and stop daemon".format(sig)) self.run = False self.current_loop.stop() @asyncio.coroutine def loop(self): yield from self.bootstrap() while self.run: try: # Inside a while loop, fetch scheduled tasks t_start = time.time() # May be reload config (limited to 1 per second) yield from self._reload_config_tasks_list() # Refresh scheduler run history yield from self._load_scheduler_tasks_history() # Kill expired tasks (broken worker) yield from self._check_expired_tasks() current_time = now() for scheduler_task_name, scheduler_task in self.scheduler_tasks.items(): scheduled_task_history = self.scheduler_tasks_history[scheduler_task_name] # Iterate over all recurrent tasks if (scheduled_task_history.get('next_run', 0) <= scheduled_task_history.get('last_run', 0)): log.debug('Got unscheduled task {}'.format(scheduler_task_name)) # Task is not scheduled/executed now, so need to schedule next_run_dt = self._get_next_run_time(scheduler_task_name, scheduler_task, int(current_time)) log.debug('Next run {} for task {}'.format(next_run_dt, scheduler_task_name)) yield from self.schedule_task(name=scheduler_task_name, task_type=Task.TYPE_REGULAR, run_at=next_run_dt, ttl=scheduler_task.get('ttl') or SETTINGS.WORKER_TASK_TIMEOUT, kwargs=scheduler_task) yield from self._ping_disptacher() t_end = time.time() delay = SETTINGS.SCHEDULER_PULL_TIMEOUT - (t_end - t_start) if delay > 0: # Sleep for timeout or new push from scheduler try: yield from asyncio.wait([self.sleep_task], timeout=delay) except GeneratorExit: break except: log.error("Unexpected error in scheduler loop!", exc_info=True) self.current_loop.stop() self.connection.close() log.info('Bye-bye!')
class Trigger(object): def __init__(self): self.connection = None self.subscription = None self.current_loop = None self.config_version = 0 self.run = True self.config = ConfigStorage() self.db_log = LoggingStorage() self.tq_storage = None self.triggers = {} self.metrics_id_to_triggers = defaultdict(list) self._reload_triggers_config_last_run = 0 def bootstrap(self): log.info("Running trigger loop") self.connection = yield from asyncio_redis.Pool.create( host='localhost', port=6379, encoder=asyncio_redis.encoders.BytesEncoder(), poolsize=3) self.tq_storage = TaskStorage(self.current_loop, self.connection) # Setup subscription to action results self.subscription = yield from self.connection.start_subscribe() yield from self.subscription.psubscribe( [SETTINGS.METRICS_CHANNEL.format('*').encode('utf-8')]) def start(self, loop): self.current_loop = loop loop.add_signal_handler(signal.SIGINT, partial(self.stop, 'SIGINT')) loop.add_signal_handler(signal.SIGTERM, partial(self.stop, 'SIGTERM')) asyncio.Task(self.loop()) def stop(self, sig): log.info( "Got {} signal, we should finish all tasks and stop daemon".format( sig)) self.run = False self.current_loop.stop() @asyncio.coroutine def get_new_message(self): reply = yield from self.subscription.next_published() @asyncio.coroutine def loop(self): yield from self.bootstrap() while self.run: # Load triggers list yield from self._reload_config() # Wait for new message try: reply = yield from self.subscription.next_published() except GeneratorExit: log.info('Stop subscription') break log.debug('Got new message, channel={}'.format(reply.channel)) # Decode new message try: _, metric_id = yield from self._decode_message(reply) except Exception: log.error("Cannon load data from message in channel={}".format( reply.channel), exc_info=True) # Process triggers triggers = self.metrics_id_to_triggers.get(metric_id, []) for trigger_id in triggers: asyncio.Task(self.check_trigger(trigger_id, metric_id)) self.current_loop.stop() self.connection.close() log.info('Bye-bye!') @asyncio.coroutine def _decode_message(self, msg): metrics_mask = SETTINGS.METRICS_CHANNEL.replace("{}", "") channel = msg.channel.decode('utf-8') if channel.startswith(metrics_mask): return 'metrics-results', channel[len(metrics_mask):] else: raise Exception() # return '', channel @asyncio.coroutine def _reload_config(self): time_now = int(now()) if time_now - self._reload_triggers_config_last_run < 1000: # 1000 = 1sec return self._reload_triggers_config_last_run = time_now config_version = self.config.get_config_version() if config_version != self.config_version: yield from self._reload_triggers() self.config_version = config_version @asyncio.coroutine def _reload_triggers(self): new_triggers = self.config.list_triggers() self.triggers = new_triggers self.metrics_id_to_triggers = defaultdict(list) for trigger_id, trigger in new_triggers.items(): for condition in trigger.get('conditions', []): self.metrics_id_to_triggers[condition.get('metric_id')].append( trigger_id) log.info('Loaded {} triggers'.format(len(new_triggers))) @asyncio.coroutine def _activate_trigger(self, trigger): self.db_log.info("Триггер был запущен", None, "trigger", trigger['_id']) log.debug("_activare_trigger for {}".format(trigger['_id'])) for action_obj in trigger['scenario']: # Extract values from trigger into dict (param_name -> param_value) params_values = { param.get('param'): param.get('value') for param in action_obj['params'] } # Get action with binded params action = self.config.get_action( action_obj['action_id'], initial_param_values=params_values, connection_id=trigger.get('connection_id')) log.debug( 'Trigger {}: create task for action {} with params {}'.format( trigger['_id'], action['_id'], params_values)) task = yield from self.tq_storage.create_task( name=action['_id'], task_type=Task.TYPE_TRIGGERED, run_at=datetime.datetime.now(), ttl=action.get('ttl') or SETTINGS.WORKER_TASK_TIMEOUT, kwargs=action, store_to=Task.STORE_TO_METRICS) yield from self.tq_storage.schedule_task(task) """ TASKS """ @asyncio.coroutine def check_trigger(self, trigger_id, metric_id): log.info('Check trigger {} for metric {}'.format( trigger_id, metric_id)) trigger = self.triggers[trigger_id] last_values = yield from self.tq_storage.get_metric_last_values( trigger['depends_on']) log.debug('Trigger {}: last values is {}'.format( trigger_id, last_values)) checks = ((trigger_id, cond.get('value', ''), cond['function'], last_values.get(cond['metric_id'])) for cond in trigger['conditions']) is_triggered = all( map(lambda check: self.check_condition(*check), checks)) if is_triggered: # Check lock log.info('Trigger {} is activated!'.format(trigger_id)) locked = yield from self.tq_storage.lock_trigger(trigger_id) if locked > 1: log.info( 'Trigger {} is locked {} times, do not perform action'. format(trigger_id, locked)) return # Perform action on trigger, it's not locked yield from self._activate_trigger(trigger) else: # Unlock trigger here log.debug('Trigger {} is NOT activated, try to unlock!'.format( trigger_id)) yield from self.tq_storage.unlock_trigger(trigger_id) def check_condition(self, trigger_id, cmp_value, condition, value): if condition not in SETTINGS.CONDITIONS_CMP_FUNCTIONS.keys(): log.error( "Cannot determine condition for trigger '{}': wrong function '{}'" .format(trigger_id, condition)) return False if condition in SETTINGS.CONDITIONS_NUMBERIC: try: value = float(value) except (ValueError, TypeError): log.error( "Wrong value for trigger '{}', cannot convert metric value '{}' to float before comparasion" .format(trigger_id, cmp_value), exc_info=True) self.db_log.error( "Cannot convert metric value to float before comparasion", str(value), "trigger", trigger_id) return False try: cmp_value = float(cmp_value) except (ValueError, TypeError): log.error( "Wrong value for trigger '{}', cannot convert comparasion value '{}' to float before comparasion" .format(trigger_id, cmp_value), exc_info=True) self.db_log.error( "Cannot convert comparasion value to float before comparasion", str(cmp_value), "trigger", trigger_id) return False elif condition in SETTINGS.CONDITIONS_BOOLEAN: try: value = bool(int(value)) except: log.error( "Wrong value for trigger '{}', can't cast value to boolean '{}'" .format(trigger_id, value)) self.db_log.error( "Can't cast value to boolean '{}'".format(value), str(value), "trigger", trigger_id) return False elif condition in SETTINGS.CONDITIONS_STRINGS and not isinstance( value, str): log.error( "Wrong value for trigger '{}', for strings comparasion it should be string, not '{}'" .format(trigger_id, value)) self.db_log.error( "For strings comparasion value should be strings, not '{}'". format(value), str(value), "trigger", trigger_id) return False try: result = SETTINGS.CONDITIONS_CMP_FUNCTIONS[condition](value, cmp_value) log.debug("Compare values: '{}' {} '{}'".format( value, condition, cmp_value)) except: log.error("Cannot compare values: '{}' {} '{}'".format( value, condition, cmp_value)) self.db_log.error( "Cannot compare values: '{}' {} '{}'".format( value, condition, cmp_value), None, "trigger", trigger_id) return False return result
class MetricsCollector(object): def __init__(self): self.connection = None self.subscription = None self.current_loop = None self.config_version = 0 self.run = True self.storage = ConfigStorage() self.metrics_storage = MetricsStorage() self.db_log = LoggingStorage() self._lcache = {} self.metrics = {} self.actions_id_to_metrics = defaultdict(list) self._reload_metrics_config_last_run = 0 @asyncio.coroutine def bootstrap(self): log.info("Running metrics collector loop") self.connection = yield from asyncio_redis.Pool.create(host='localhost', port=6379, encoder=asyncio_redis.encoders.BytesEncoder(), poolsize=3) # Setup subscription to action results self.subscription = yield from self.connection.start_subscribe() yield from self.subscription.psubscribe([SETTINGS.ACTION_RESULTS_CHANNEL.format("*").encode('utf-8'), SETTINGS.CONNECTION_RESULTS_CHANNEL.format("*").encode('utf-8')]) yield from self._reload_config() def start(self, loop): self.current_loop = loop loop.add_signal_handler(signal.SIGINT, partial(self.stop, 'SIGINT')) loop.add_signal_handler(signal.SIGTERM, partial(self.stop, 'SIGTERM')) asyncio.Task(self.loop()) def stop(self, sig): log.info("Got {} signal, we should finish all tasks and stop daemon".format(sig)) self.run = False self.current_loop.stop() @asyncio.coroutine def loop(self): yield from self.bootstrap() while self.run: metrics = [] # Wait for new message try: reply = yield from self.subscription.next_published() except GeneratorExit: break log.debug('Got new message, channel={}'.format(reply.channel)) # Load metrics list yield from self._reload_config() # Decode new message try: channel_type, object_id = yield from self._decode_message(reply) results = pickle.loads(reply.value) task = results['task'] values = results['result'] except Exception: log.error("Cannon load data from message in channel={}, data={}".format(reply.channel, reply.value), exc_info=True) continue # Process metrics if channel_type == 'actions-results': metrics = self.actions_id_to_metrics.get(object_id, []) elif channel_type == 'connections-results': # Skip empty lines for connection grep if not values.get('stdout'): continue metrics = self.connections_id_to_metrics.get(object_id, []) else: log.error('Unexpected metric-channel type={}'.format(channel_type)) continue for metric_id in metrics: asyncio.Task(self.store_metric_value(metric_id, object_id, task, values)) self.current_loop.stop() self.connection.close() log.info('Bye-bye!') @asyncio.coroutine def _decode_message(self, msg): action_mask = SETTINGS.ACTION_RESULTS_CHANNEL.replace("{}", "") connection_mask = SETTINGS.CONNECTION_RESULTS_CHANNEL.replace("{}", "") channel = msg.channel.decode('utf-8') if channel.startswith(action_mask): return 'actions-results', channel[len(action_mask):] elif channel.startswith(connection_mask): return 'connections-results', channel[len(connection_mask):] else: return '', channel @asyncio.coroutine def _reload_config(self): time_now = int(now()) if time_now - self._reload_metrics_config_last_run < 1000: # 1000 = 1sec return self._reload_metrics_config_last_run = time_now config_version = self.storage.get_config_version() if config_version != self.config_version: yield from self._reload_metrics() self.config_version = config_version @asyncio.coroutine def _reload_metrics(self): new_metrics = self.storage.list_metrics() self.metrics = new_metrics self.actions_id_to_metrics = defaultdict(list) self.connections_id_to_metrics = defaultdict(list) for metric_id, metric in new_metrics.items(): if 'action_id' in metric: self.actions_id_to_metrics[metric.get('action_id')].append(metric_id) elif 'connection_id' in metric: self.connections_id_to_metrics[metric.get('connection_id')].append(metric_id) self._lcache = {} log.info('Loaded {} metrics'.format(len(new_metrics))) """ TASKS """ @asyncio.coroutine def store_metric_value(self, metric_id, object_id, task, values): log.debug('store_metric_value {} for action/connection {} by task {}'.format(metric_id, object_id, task['id'])) exit_codes = values.get('exit_codes') stdout = values.get('stdout') metric = self.metrics.get(metric_id) value = self.parse_value(metric, stdout) log.debug('Metric (id={}) parsed value: {}'.format(metric_id, value)) if value is None: logging.error("No parser match for metric {}, nothing to store".format(metric_id)) self.db_log.error("Пустое значение после фильтрации", stdout, "metric", metric_id) return converter = lambda x: x # Convert metric type if metric['type'] == 'boolean': value = self.cast_to_boolean(metric_id, metric, value) else: converter = SETTINGS.METRICS_TYPES_MAP[metric['type']] try: value = converter(value) except ValueError: log.error("Wrong value for metric '{}', cannot convert to {}".format(metric_id, metric['type']), exc_info=True) self.db_log.error("Не удалось привести тип значения к {}".format(metric['type']), str(value), "metric", metric_id) return # Trim strings if isinstance(value, str): value = value[:SETTINGS.METRIC_STRING_LIMIT] # Apply multiplier multiplier = metric.get('multiplier', None) try: if multiplier and metric['type'] in SETTINGS.METRIC_NUMERICAL_TYPES: multiplier = float(multiplier) value = value * multiplier # If it is int, convert to int value = converter(value) except: log.error('Cannot apply multiplier', exc_info=True) self.db_log.error("Не удалось применить множитель", str(value), "metric", metric_id) return timestamp = datetime_to_timestamp(task['run_at']) skip_interval = parse_timetable(metric.get('limit_duplicate_save', '')) if skip_interval: prev_val, prev_timestamp = self._lcache.get(metric_id, (None, 0)) if (prev_val == value) and (timestamp - prev_timestamp) < skip_interval: return True else: self._lcache[metric_id] = (value, datetime_to_timestamp(task['run_at'])) log.info('Store value="{}" for metric {}'.format(value, metric_id)) try: self.metrics_storage.store_metric(metric_id, value, time=task['run_at']) yield from self.connection.hset(SETTINGS.LAST_VALUES_HASH, metric_id.encode('utf-8'), ujson.dumps({'value': value, 'timestamp': timestamp}).encode('utf-8')) except: log.error('Cannot store metric value, storage exception', exc_info=True) return # Publish message about finish yield from self.connection.publish(SETTINGS.METRICS_CHANNEL.format(metric_id).encode('utf-8'), b'') return True def parse_value(self, metric, stdout): stdout_lines = stdout.split('\n') line_regexp = metric.get('line_regexp') line_numbers = str(metric.get('line_numbers', '')) word_regexp = metric.get('word_regexp') word_numbers = str(metric.get('word_numbers', '')) lines_str = None lines_no = set() if line_regexp: regexp = re.compile(line_regexp) for i, stdout_line in enumerate(stdout_lines, 1): if regexp.search(stdout_line): lines_no.add(i) if line_numbers: line_values = line_numbers.split(',') for line_value in line_values: if ':' in line_value: start, finish = map(int, line_value.split(':')) for i in range(start, finish+1): lines_no.add(i) else: lines_no.add(int(line_value)) if (line_regexp or line_numbers): if lines_no: lines_no = sorted(list(lines_no)) lines = [] total_lines = len(stdout_lines) for line_no in lines_no: if line_no > total_lines: continue lines.append(stdout_lines[line_no-1]) lines_str = '\n'.join(lines) else: lines_str = stdout if not lines_str: return None if word_regexp: match = re.findall(word_regexp, lines_str) if not match: return None return match[0] elif word_numbers: words_range = None if ':' in word_numbers: start, finish = map(int, word_numbers.split(':')) words_range = int(start)-1, int(finish)-1 else: words_range = int(word_numbers)-1, int(word_numbers)-1 else: return lines_str stdout_words = list(filter(lambda x: x is not None, SPLIT_RE.split(lines_str))) stdout_words = [x for sublist in map(lambda word: SPLIT_NEG_RE.split(word), stdout_words) for x in sublist] # Frontend do not count \t, ' ' and '' words :( skip_cnt = 0 words_no_map = {} for i, word in enumerate(stdout_words): if word == '' or SKIP_RE.match(word): skip_cnt += 1 continue words_no_map[i-skip_cnt] = i start = words_no_map.get(words_range[0], 0) finish = words_no_map.get(words_range[1], len(stdout_words)-1) + 1 result_words = stdout_words[start:finish] words_str = ''.join(result_words) return words_str def cast_to_boolean(self, metric_id, metric, value): try: condition = metric['function'] cmp_value = metric['value'] except Exception: log.error('Boolean metric (id={}) without condition!'.format(metric_id)) return if condition not in SETTINGS.CONDITIONS_CMP_FUNCTIONS.keys(): log.error("Cannot convert value for metric '{}' to bool: wrong function '{}'".format(metric_id, condition)) self.db_log.error("Не удалось привести значение к булевой метрике, невреная функция '{}'".format(condition), str(value), "metric", metric_id) return if condition in SETTINGS.CONDITIONS_NUMBERIC: # Cast values to float try: value = float(value) except (ValueError, TypeError): log.error("Wrong value for metric '{}', cannot convert '{}' to float before comparasion".format(metric_id, value), exc_info=True) self.db_log.error("Не удалось привести значение метрики к дробному типу для проведения сравнения", str(value), "metric", metric_id) return try: cmp_value = float(cmp_value) except (ValueError, TypeError): log.error("Wrong value for metric '{}', cannot convert comparasion value '{}' to float before comparasion".format(metric_id, cmp_value), exc_info=True) self.db_log.error("Cannot convert comparasion value to float before comparasion", str(cmp_value), "metric", metric_id) return elif condition in SETTINGS.CONDITIONS_BOOLEAN and not isinstance(value, bool): log.error("Wrong value for metric '{}', for booleans comparasion it should be boolean, not '{}'".format(metric_id, value)) self.db_log.error("For boolean comparasion value should be boolean, not '{}'".format(value), str(value), "metric", metric_id) return elif condition in SETTINGS.CONDITIONS_STRINGS and not isinstance(value, str): log.error("Wrong value for metric '{}', for strings comparasion it should be string, not '{}'".format(metric_id, value)) self.db_log.error("For strings comparasion value should be strings, not '{}'".format(value), str(value), "metric", metric_id) return try: result = SETTINGS.CONDITIONS_CMP_FUNCTIONS[condition](value, cmp_value) except: log.error("Cannot compare values: '{}' {} '{}'".format(value, condition, cmp_value)) self.db_log.error("Не удалось сравнить значения: '{}' {} '{}'".format(value, condition, cmp_value), None, "metric", metric_id) return None return (1 if result else 0)