Example #1
0
 def bootstrap(self):
     log.info("Running worker loop")
     self.connection = yield from asyncio_redis.Pool.create(
         host='localhost',
         port=6379,
         encoder=asyncio_redis.encoders.BytesEncoder(),
         poolsize=3)
     self.config = ConfigStorage()
     self.comport_state = ComPortState()
     self.db_log = LoggingStorage()
Example #2
0
    def __init__(self):
        self.connection = None
        self.subscription = None
        self.current_loop = None
        self.config_version = 0

        self.run = True

        self.config = ConfigStorage()
        self.db_log = LoggingStorage()
        self.tq_storage = None

        self.triggers = {}
        self.metrics_id_to_triggers = defaultdict(list)

        self._reload_triggers_config_last_run = 0
Example #3
0
    def __init__(self):
        self.connection = None
        self.subscription = None
        self.current_loop = None
        self.config_version = 0
        self.run = True

        self.storage = ConfigStorage()
        self.metrics_storage = MetricsStorage()
        self.db_log = LoggingStorage()

        self._lcache = {}

        self.metrics = {}
        self.actions_id_to_metrics = defaultdict(list)

        self._reload_metrics_config_last_run = 0
Example #4
0
    def __init__(self):
        self.connection = None
        self.subscription = None
        self.current_loop = None
        self.config_version = 0

        self.run = True

        self.config = ConfigStorage()
        self.db_log = LoggingStorage()
        self.tq_storage = None

        self.triggers = {}
        self.metrics_id_to_triggers = defaultdict(list)

        self._reload_triggers_config_last_run = 0
Example #5
0
    def __init__(self):
        self.connection = None
        self.subscription = None
        self.current_loop = None
        self.config_version = 0
        self.run = True

        self.storage = ConfigStorage()
        self.metrics_storage = MetricsStorage()
        self.db_log = LoggingStorage()

        self._lcache = {}

        self.metrics = {}
        self.actions_id_to_metrics = defaultdict(list)

        self._reload_metrics_config_last_run = 0
Example #6
0
class MetricsCollector(object):
    def __init__(self):
        self.connection = None
        self.subscription = None
        self.current_loop = None
        self.config_version = 0
        self.run = True

        self.storage = ConfigStorage()
        self.metrics_storage = MetricsStorage()
        self.db_log = LoggingStorage()

        self._lcache = {}

        self.metrics = {}
        self.actions_id_to_metrics = defaultdict(list)

        self._reload_metrics_config_last_run = 0

    @asyncio.coroutine
    def bootstrap(self):
        log.info("Running metrics collector loop")
        self.connection = yield from asyncio_redis.Pool.create(
            host='localhost',
            port=6379,
            encoder=asyncio_redis.encoders.BytesEncoder(),
            poolsize=3)

        # Setup subscription to action results
        self.subscription = yield from self.connection.start_subscribe()
        yield from self.subscription.psubscribe([
            SETTINGS.ACTION_RESULTS_CHANNEL.format("*").encode('utf-8'),
            SETTINGS.CONNECTION_RESULTS_CHANNEL.format("*").encode('utf-8')
        ])
        yield from self._reload_config()

    def start(self, loop):
        self.current_loop = loop
        loop.add_signal_handler(signal.SIGINT, partial(self.stop, 'SIGINT'))
        loop.add_signal_handler(signal.SIGTERM, partial(self.stop, 'SIGTERM'))
        asyncio.Task(self.loop())

    def stop(self, sig):
        log.info(
            "Got {} signal, we should finish all tasks and stop daemon".format(
                sig))
        self.run = False
        self.current_loop.stop()

    @asyncio.coroutine
    def loop(self):
        yield from self.bootstrap()
        while self.run:
            metrics = []
            # Wait for new message
            try:
                reply = yield from self.subscription.next_published()
            except GeneratorExit:
                break
            log.debug('Got new message, channel={}'.format(reply.channel))

            # Load metrics list
            yield from self._reload_config()

            # Decode new message
            try:
                channel_type, object_id = yield from self._decode_message(
                    reply)
                results = pickle.loads(reply.value)
                task = results['task']
                values = results['result']
            except Exception:
                log.error(
                    "Cannon load data from message in channel={}, data={}".
                    format(reply.channel, reply.value),
                    exc_info=True)
                continue
            # Process metrics
            if channel_type == 'actions-results':
                metrics = self.actions_id_to_metrics.get(object_id, [])
            elif channel_type == 'connections-results':
                # Skip empty lines for connection grep
                if not values.get('stdout'):
                    continue
                metrics = self.connections_id_to_metrics.get(object_id, [])
            else:
                log.error(
                    'Unexpected metric-channel type={}'.format(channel_type))
                continue

            for metric_id in metrics:
                asyncio.Task(
                    self.store_metric_value(metric_id, object_id, task,
                                            values))

        self.current_loop.stop()
        self.connection.close()
        log.info('Bye-bye!')

    @asyncio.coroutine
    def _decode_message(self, msg):
        action_mask = SETTINGS.ACTION_RESULTS_CHANNEL.replace("{}", "")
        connection_mask = SETTINGS.CONNECTION_RESULTS_CHANNEL.replace("{}", "")

        channel = msg.channel.decode('utf-8')
        if channel.startswith(action_mask):
            return 'actions-results', channel[len(action_mask):]
        elif channel.startswith(connection_mask):
            return 'connections-results', channel[len(connection_mask):]
        else:
            return '', channel

    @asyncio.coroutine
    def _reload_config(self):
        time_now = int(now())
        if time_now - self._reload_metrics_config_last_run < 1000:  # 1000 = 1sec
            return
        self._reload_metrics_config_last_run = time_now
        config_version = self.storage.get_config_version()
        if config_version != self.config_version:
            yield from self._reload_metrics()
            self.config_version = config_version

    @asyncio.coroutine
    def _reload_metrics(self):
        new_metrics = self.storage.list_metrics()
        self.metrics = new_metrics
        self.actions_id_to_metrics = defaultdict(list)
        self.connections_id_to_metrics = defaultdict(list)
        for metric_id, metric in new_metrics.items():
            if 'action_id' in metric:
                self.actions_id_to_metrics[metric.get('action_id')].append(
                    metric_id)
            elif 'connection_id' in metric:
                self.connections_id_to_metrics[metric.get(
                    'connection_id')].append(metric_id)
        self._lcache = {}
        log.info('Loaded {} metrics'.format(len(new_metrics)))

    """ TASKS """

    @asyncio.coroutine
    def store_metric_value(self, metric_id, object_id, task, values):
        log.debug(
            'store_metric_value {} for action/connection {} by task {}'.format(
                metric_id, object_id, task['id']))
        exit_codes = values.get('exit_codes')
        stdout = values.get('stdout')

        metric = self.metrics.get(metric_id)
        value = self.parse_value(metric, stdout)
        log.debug('Metric (id={}) parsed value: {}'.format(metric_id, value))
        if value is None:
            logging.error(
                "No parser match for metric {}, nothing to store".format(
                    metric_id))
            self.db_log.error("Пустое значение после фильтрации", stdout,
                              "metric", metric_id)
            return

        converter = lambda x: x
        # Convert metric type
        if metric['type'] == 'boolean':
            value = self.cast_to_boolean(metric_id, metric, value)
        else:
            converter = SETTINGS.METRICS_TYPES_MAP[metric['type']]
            try:
                value = converter(value)
            except ValueError:
                log.error(
                    "Wrong value for metric '{}', cannot convert to {}".format(
                        metric_id, metric['type']),
                    exc_info=True)
                self.db_log.error(
                    "Не удалось привести тип значения к {}".format(
                        metric['type']), str(value), "metric", metric_id)
                return

        # Trim strings
        if isinstance(value, str):
            value = value[:SETTINGS.METRIC_STRING_LIMIT]

        # Apply multiplier
        multiplier = metric.get('multiplier', None)
        try:
            if multiplier and metric['type'] in SETTINGS.METRIC_NUMERICAL_TYPES:
                multiplier = float(multiplier)
                value = value * multiplier

                # If it is int, convert to int
                value = converter(value)
        except:
            log.error('Cannot apply multiplier', exc_info=True)
            self.db_log.error("Не удалось применить множитель", str(value),
                              "metric", metric_id)
            return

        timestamp = datetime_to_timestamp(task['run_at'])
        skip_interval = parse_timetable(metric.get('limit_duplicate_save', ''))
        if skip_interval:
            prev_val, prev_timestamp = self._lcache.get(metric_id, (None, 0))
            if (prev_val
                    == value) and (timestamp - prev_timestamp) < skip_interval:
                return True
            else:
                self._lcache[metric_id] = (value,
                                           datetime_to_timestamp(
                                               task['run_at']))

        log.info('Store value="{}" for metric {}'.format(value, metric_id))
        try:
            self.metrics_storage.store_metric(metric_id,
                                              value,
                                              time=task['run_at'])
            yield from self.connection.hset(
                SETTINGS.LAST_VALUES_HASH, metric_id.encode('utf-8'),
                ujson.dumps({
                    'value': value,
                    'timestamp': timestamp
                }).encode('utf-8'))
        except:
            log.error('Cannot store metric value, storage exception',
                      exc_info=True)
            return

        # Publish message about finish
        yield from self.connection.publish(
            SETTINGS.METRICS_CHANNEL.format(metric_id).encode('utf-8'), b'')
        return True

    def parse_value(self, metric, stdout):
        stdout_lines = stdout.split('\n')
        line_regexp = metric.get('line_regexp')
        line_numbers = str(metric.get('line_numbers', ''))
        word_regexp = metric.get('word_regexp')
        word_numbers = str(metric.get('word_numbers', ''))

        lines_str = None
        lines_no = set()
        if line_regexp:
            regexp = re.compile(line_regexp)
            for i, stdout_line in enumerate(stdout_lines, 1):
                if regexp.search(stdout_line):
                    lines_no.add(i)
        if line_numbers:
            line_values = line_numbers.split(',')
            for line_value in line_values:
                if ':' in line_value:
                    start, finish = map(int, line_value.split(':'))
                    for i in range(start, finish + 1):
                        lines_no.add(i)
                else:
                    lines_no.add(int(line_value))

        if (line_regexp or line_numbers):
            if lines_no:
                lines_no = sorted(list(lines_no))
                lines = []
                total_lines = len(stdout_lines)
                for line_no in lines_no:
                    if line_no > total_lines:
                        continue
                    lines.append(stdout_lines[line_no - 1])
                lines_str = '\n'.join(lines)
        else:
            lines_str = stdout

        if not lines_str:
            return None

        if word_regexp:
            match = re.findall(word_regexp, lines_str)
            if not match:
                return None
            return match[0]
        elif word_numbers:
            words_range = None
            if ':' in word_numbers:
                start, finish = map(int, word_numbers.split(':'))
                words_range = int(start) - 1, int(finish) - 1
            else:
                words_range = int(word_numbers) - 1, int(word_numbers) - 1
        else:
            return lines_str

        stdout_words = list(
            filter(lambda x: x is not None, SPLIT_RE.split(lines_str)))
        stdout_words = [
            x for sublist in map(lambda word: SPLIT_NEG_RE.split(word),
                                 stdout_words) for x in sublist
        ]
        # Frontend do not count \t, ' ' and '' words :(
        skip_cnt = 0
        words_no_map = {}
        for i, word in enumerate(stdout_words):
            if word == '' or SKIP_RE.match(word):
                skip_cnt += 1
                continue
            words_no_map[i - skip_cnt] = i

        start = words_no_map.get(words_range[0], 0)
        finish = words_no_map.get(words_range[1], len(stdout_words) - 1) + 1

        result_words = stdout_words[start:finish]
        words_str = ''.join(result_words)

        return words_str

    def cast_to_boolean(self, metric_id, metric, value):
        try:
            condition = metric['function']
            cmp_value = metric['value']
        except Exception:
            log.error(
                'Boolean metric (id={}) without condition!'.format(metric_id))
            return

        if condition not in SETTINGS.CONDITIONS_CMP_FUNCTIONS.keys():
            log.error(
                "Cannot convert value for metric '{}' to bool: wrong function '{}'"
                .format(metric_id, condition))
            self.db_log.error(
                "Не удалось привести значение к булевой метрике, невреная функция '{}'"
                .format(condition), str(value), "metric", metric_id)
            return

        if condition in SETTINGS.CONDITIONS_NUMBERIC:
            # Cast values to float
            try:
                value = float(value)
            except (ValueError, TypeError):
                log.error(
                    "Wrong value for metric '{}', cannot convert '{}' to float before comparasion"
                    .format(metric_id, value),
                    exc_info=True)
                self.db_log.error(
                    "Не удалось привести значение метрики к дробному типу для проведения сравнения",
                    str(value), "metric", metric_id)
                return
            try:
                cmp_value = float(cmp_value)
            except (ValueError, TypeError):
                log.error(
                    "Wrong value for metric '{}', cannot convert comparasion value '{}' to float before comparasion"
                    .format(metric_id, cmp_value),
                    exc_info=True)
                self.db_log.error(
                    "Cannot convert comparasion value to float before comparasion",
                    str(cmp_value), "metric", metric_id)
                return
        elif condition in SETTINGS.CONDITIONS_BOOLEAN and not isinstance(
                value, bool):
            log.error(
                "Wrong value for metric '{}', for booleans comparasion it should be boolean, not '{}'"
                .format(metric_id, value))
            self.db_log.error(
                "For boolean comparasion value should be boolean, not '{}'".
                format(value), str(value), "metric", metric_id)
            return
        elif condition in SETTINGS.CONDITIONS_STRINGS and not isinstance(
                value, str):
            log.error(
                "Wrong value for metric '{}', for strings comparasion it should be string, not '{}'"
                .format(metric_id, value))
            self.db_log.error(
                "For strings comparasion value should be strings, not '{}'".
                format(value), str(value), "metric", metric_id)
            return

        try:
            result = SETTINGS.CONDITIONS_CMP_FUNCTIONS[condition](value,
                                                                  cmp_value)
        except:
            log.error("Cannot compare values: '{}' {} '{}'".format(
                value, condition, cmp_value))
            self.db_log.error(
                "Не удалось сравнить значения: '{}' {} '{}'".format(
                    value, condition, cmp_value), None, "metric", metric_id)
            return None
        return (1 if result else 0)
Example #7
0
class Trigger(object):

    def __init__(self):
        self.connection = None
        self.subscription = None
        self.current_loop = None
        self.config_version = 0

        self.run = True

        self.config = ConfigStorage()
        self.db_log = LoggingStorage()
        self.tq_storage = None

        self.triggers = {}
        self.metrics_id_to_triggers = defaultdict(list)

        self._reload_triggers_config_last_run = 0

    def bootstrap(self):
        log.info("Running trigger loop")
        self.connection = yield from asyncio_redis.Pool.create(host='localhost', port=6379, encoder=asyncio_redis.encoders.BytesEncoder(), poolsize=3)
        self.tq_storage = TaskStorage(self.current_loop, self.connection)

        # Setup subscription to action results
        self.subscription = yield from self.connection.start_subscribe()
        yield from self.subscription.psubscribe([ SETTINGS.METRICS_CHANNEL.format('*').encode('utf-8') ])

    def start(self, loop):
        self.current_loop = loop
        loop.add_signal_handler(signal.SIGINT, partial(self.stop, 'SIGINT'))
        loop.add_signal_handler(signal.SIGTERM, partial(self.stop, 'SIGTERM'))
        asyncio.Task(self.loop())

    def stop(self, sig):
        log.info("Got {} signal, we should finish all tasks and stop daemon".format(sig))
        self.run = False
        self.current_loop.stop()

    @asyncio.coroutine
    def get_new_message(self):
        reply = yield from self.subscription.next_published()

    @asyncio.coroutine
    def loop(self):
        yield from self.bootstrap()
        while self.run:
            # Load triggers list
            yield from self._reload_config()

            # Wait for new message
            try:
                reply = yield from self.subscription.next_published()
            except GeneratorExit:
                log.info('Stop subscription')
                break
            log.debug('Got new message, channel={}'.format(reply.channel))
            # Decode new message
            try:
                _, metric_id = yield from self._decode_message(reply)
            except Exception:
                log.error("Cannon load data from message in channel={}".format(reply.channel), exc_info=True)

            # Process triggers
            triggers = self.metrics_id_to_triggers.get(metric_id, [])
            for trigger_id in triggers:
                asyncio.Task(self.check_trigger(trigger_id, metric_id))

        self.current_loop.stop()
        self.connection.close()
        log.info('Bye-bye!')

    @asyncio.coroutine
    def _decode_message(self, msg):
        metrics_mask = SETTINGS.METRICS_CHANNEL.replace("{}", "")

        channel = msg.channel.decode('utf-8')
        if channel.startswith(metrics_mask):
            return 'metrics-results', channel[len(metrics_mask):]
        else:
            raise Exception()
            # return '', channel

    @asyncio.coroutine
    def _reload_config(self):
        time_now = int(now())
        if time_now - self._reload_triggers_config_last_run < 1000:  # 1000 = 1sec
            return
        self._reload_triggers_config_last_run = time_now
        config_version = self.config.get_config_version()
        if config_version != self.config_version:
            yield from self._reload_triggers()
            self.config_version = config_version

    @asyncio.coroutine
    def _reload_triggers(self):
        new_triggers = self.config.list_triggers()
        self.triggers = new_triggers
        self.metrics_id_to_triggers = defaultdict(list)
        for trigger_id, trigger in new_triggers.items():
            for condition in trigger.get('conditions', []):
                self.metrics_id_to_triggers[condition.get('metric_id')].append(trigger_id)
        log.info('Loaded {} triggers'.format(len(new_triggers)))

    @asyncio.coroutine
    def _activate_trigger(self, trigger):
        self.db_log.info("Триггер был запущен", None, "trigger", trigger['_id'])
        log.debug("_activare_trigger for {}".format(trigger['_id']))
        for action_obj in trigger['scenario']:
            # Extract values from trigger into dict (param_name -> param_value)
            params_values = {param.get('param'): param.get('value') for param in action_obj['params']}
            # Get action with binded params
            action = self.config.get_action(action_obj['action_id'],
                                            initial_param_values=params_values,
                                            connection_id=trigger.get('connection_id'))

            log.debug('Trigger {}: create task for action {} with params {}'.format(trigger['_id'], action['_id'], params_values))
            task = yield from self.tq_storage.create_task(name=action['_id'],
                                                          task_type=Task.TYPE_TRIGGERED,
                                                          run_at=datetime.datetime.now(),
                                                          ttl=action.get('ttl') or SETTINGS.WORKER_TASK_TIMEOUT,
                                                          kwargs=action,
                                                          store_to=Task.STORE_TO_METRICS)
            yield from self.tq_storage.schedule_task(task)

    """ TASKS """

    @asyncio.coroutine
    def check_trigger(self, trigger_id, metric_id):
        log.info('Check trigger {} for metric {}'.format(trigger_id, metric_id))
        trigger = self.triggers[trigger_id]

        last_values = yield from self.tq_storage.get_metric_last_values(trigger['depends_on'])
        log.debug('Trigger {}: last values is {}'.format(trigger_id, last_values))

        checks = ((trigger_id, cond.get('value', ''), cond['function'], last_values.get(cond['metric_id'])) for cond in trigger['conditions'])
        is_triggered = all(map(lambda check: self.check_condition(*check), checks))

        if is_triggered:
            # Check lock
            log.info('Trigger {} is activated!'.format(trigger_id))
            locked = yield from self.tq_storage.lock_trigger(trigger_id)
            if locked > 1:
                log.info('Trigger {} is locked {} times, do not perform action'.format(trigger_id, locked))
                return
            # Perform action on trigger, it's not locked
            yield from self._activate_trigger(trigger)
        else:
            # Unlock trigger here
            log.debug('Trigger {} is NOT activated, try to unlock!'.format(trigger_id))
            yield from self.tq_storage.unlock_trigger(trigger_id)

    def check_condition(self, trigger_id, cmp_value, condition, value):
        if condition not in SETTINGS.CONDITIONS_CMP_FUNCTIONS.keys():
            log.error("Cannot determine condition for trigger '{}': wrong function '{}'".format(trigger_id, condition))
            return False

        if condition in SETTINGS.CONDITIONS_NUMBERIC:
            try:
                value = float(value)
            except (ValueError, TypeError):
                log.error("Wrong value for trigger '{}', cannot convert metric value '{}' to float before comparasion".format(trigger_id, cmp_value), exc_info=True)
                self.db_log.error("Cannot convert metric value to float before comparasion", str(value), "trigger", trigger_id)
                return False
            try:
                cmp_value = float(cmp_value)
            except (ValueError, TypeError):
                log.error("Wrong value for trigger '{}', cannot convert comparasion value '{}' to float before comparasion".format(trigger_id, cmp_value), exc_info=True)
                self.db_log.error("Cannot convert comparasion value to float before comparasion", str(cmp_value), "trigger", trigger_id)
                return False
        elif condition in SETTINGS.CONDITIONS_BOOLEAN:
            try:
                value = bool(int(value))
            except:
                log.error("Wrong value for trigger '{}', can't cast value to boolean '{}'".format(trigger_id, value))
                self.db_log.error("Can't cast value to boolean '{}'".format(value), str(value), "trigger", trigger_id)
                return False
        elif condition in SETTINGS.CONDITIONS_STRINGS and not isinstance(value, str):
            log.error("Wrong value for trigger '{}', for strings comparasion it should be string, not '{}'".format(trigger_id, value))
            self.db_log.error("For strings comparasion value should be strings, not '{}'".format(value), str(value), "trigger", trigger_id)
            return False

        try:
            result = SETTINGS.CONDITIONS_CMP_FUNCTIONS[condition](value, cmp_value)
            log.debug("Compare values: '{}' {} '{}'".format(value, condition, cmp_value))
        except:
            log.error("Cannot compare values: '{}' {} '{}'".format(value, condition, cmp_value))
            self.db_log.error("Cannot compare values: '{}' {} '{}'".format(value, condition, cmp_value), None, "trigger", trigger_id)
            return False
        return result
Example #8
0
 def bootstrap(self):
     log.info("Running worker loop")
     self.connection = yield from asyncio_redis.Pool.create(host='localhost', port=6379, encoder=asyncio_redis.encoders.BytesEncoder(), poolsize=3)
     self.config = ConfigStorage()
     self.comport_state = ComPortState()
     self.db_log = LoggingStorage()
Example #9
0
class BaseWorker(metaclass=ABCMeta):

    def __init__(self):
        self.current_loop = None
        self.connection = None
        self.comport_state = None
        self.config = None
        self.db_log = None

        self.run = True

        # List of current worker tasks; we use it for tasks per worker limitation
        self.TASKS = TasksList()

        # List for temporary storage completed task for clenup
        self.COMPLETED_TASKS = TasksList()

    @asyncio.coroutine
    def bootstrap(self):
        log.info("Running worker loop")
        self.connection = yield from asyncio_redis.Pool.create(host='localhost', port=6379, encoder=asyncio_redis.encoders.BytesEncoder(), poolsize=3)
        self.config = ConfigStorage()
        self.comport_state = ComPortState()
        self.db_log = LoggingStorage()

    def start(self, loop):
        self.current_loop = loop
        loop.add_signal_handler(signal.SIGINT, partial(self.stop, 'SIGINT'))
        loop.add_signal_handler(signal.SIGTERM, partial(self.stop, 'SIGTERM'))
        asyncio.Task(self.loop())

    def stop(self, sig):
        log.info("Got {} signal, we should finish all tasks and stop daemon".format(sig))
        self.run = False
        self.current_loop.stop()

    @asyncio.coroutine
    def loop(self):
        """ Main event loop of worker

        :param loop: current event loop
        :return: None
        """
        yield from self.bootstrap()
        # Inside a while loop, wait for incoming events.
        while self.run:
            # Limit tasks per worker, wait for task complete, do not fetch new
            if len(self.TASKS) > SETTINGS.WORKER_TASKS_LIMIT:
                log.debug("Too much tasks in local queue, wait for complete, timeout {}s".format(SETTINGS.WORKER_TASK_TIMEOUT))
                try:
                    done, pending = yield from asyncio.wait(self.TASKS, return_when=asyncio.FIRST_COMPLETED, timeout=SETTINGS.WORKER_TASK_TIMEOUT)
                    continue
                except GeneratorExit:
                    break

            # Pop new task from dispatched queue
            try:
                raw_task = yield from self._pop_task()
            except GeneratorExit:
                break
            if not raw_task:
                continue

            # Deserialize
            task = yield from self._deserialize_task(raw_task)
            if not task:
                continue

            # Set new status
            task = yield from self._move_to_inprogress(task)

            # Run task
            try:
                task_future = yield from self._run_task(task)
            except Exception as ex:
                log.error(ex, exc_info=True)
                pass
        # When finished, close the connection.
        self.current_loop.stop()
        self.connection.close()
        log.info('Bye-bye!')

    @asyncio.coroutine
    def _pop_task(self):
        # Blocking pop task from dispatched list (and atomic push to inprogress list)
        try:
            raw_task = yield from self.connection.brpoplpush(SETTINGS.DISPATCHED_QUEUE, SETTINGS.INPROGRESS_QUEUE, SETTINGS.WORKER_BPOP_TIMEOUT)
            log.debug("Got new tasks from queue {}".format(raw_task))
            return raw_task
        except asyncio_redis.TimeoutError:
            # No new tasks? Sleep for a while
            yield from asyncio.sleep(SETTINGS.WORKER_PULL_SLEEP)
            return
        except Exception:
            log.error('Unexpected error', exc_info=True)

    @asyncio.coroutine
    def _deserialize_task(self, raw_task):
        try:
            task_obj = yield from self.connection.get(SETTINGS.TASK_STORAGE_KEY.format(raw_task.decode('utf-8')).encode('utf-8'))
            if not task_obj:
                raise TypeError()
            task = Task.deserialize(task_obj)
            log.info("Got new task id={}, type={}, status={}".format(task.id, task.type, task.status))
            if not task.status == Task.SCHEDULED:
                log.error("Wrong status={} for task id={}, type={}; Should be SCHEDULED".format(task.status, task.id, task.type))
                return
            return task
        except TypeError as ex:
            log.error("Wrong task_id {}".format(raw_task))
            return
        except (pickle.UnpicklingError, EOFError, TypeError, ImportError):
            yield from self.connection.lrem(SETTINGS.INPROGRESS_QUEUE, value=raw_task)
            log.error("Wrong message in queue", exc_info=True)
            return

    @asyncio.coroutine
    def _move_to_inprogress(self, task):
        """ move_to_inprogress -- Change task status to 'in progress'
        Store task in sorted set with TTL

        :param task: `sensors.models.Task` instance
        """
        ttl = task.ttl or SETTINGS.WORKER_TASK_TIMEOUT
        expires_at = datetime.datetime.now() + datetime.timedelta(seconds=ttl)
        new_task = task._replace(status=Task.INPROGRESS)

        # XXX may be transaction here?
        yield from self.connection.zadd(SETTINGS.INPROGRESS_TASKS_SET, {task.bid(): datetime_to_timestamp(expires_at)})
        yield from self.connection.set(SETTINGS.TASK_STORAGE_KEY.format(new_task.id).encode('utf-8'), new_task.serialize(), expire=SETTINGS.TASK_STORAGE_EXPIRE)

        return new_task

    @asyncio.coroutine
    def _throttle(self, task):
        try:
            freq = int(task.kwargs.get('execution_limit', 0))
        except:
            freq = 0

        if freq == 0:
            return False, 0
        key = SETTINGS.WORKER_THROTTLE_LOCKS.format(task.name).encode('utf-8')
        ttl = yield from self.connection.ttl(key)
        if ttl <= 0:
            # No throttle, no key
            yield from self.connection.set(key, b'', expire=freq)
            return False, 0
        else:
            # Throttled
            return True, ttl

    @asyncio.coroutine
    def _run_task(self, task):
        """ run_task -- Runs tasks

        :param task: `sensors.models.Task` instance
        """
        log.debug("_run_task {}".format(task.id))

        if task.type == Task.TYPE_TRIGGERED:
            is_throttled, expiration = yield from self._throttle(task)
            if is_throttled:
                task_future = asyncio.Future()
                self.TASKS.append(task_future)
                task_future.set_result(None)
                log.info("Task id={}, name={} throttled for {} seconds.".format(task.id, task.name, expiration))
                self.db_log.info("Сценарий не был запущен из-за превышения частоты запуска", None, 'action', task.id)
                self._mark_as_completed_callback(task, task_future)
                return
        # Create future to store Process
        pid = asyncio.Future()
        stopper = asyncio.Future()
        ttl = task.ttl or SETTINGS.WORKER_TASK_TIMEOUT
        # Call runner, pass pid there (need to fill in process!)
        task_future = asyncio.Task(self.runner(task, _pid=pid, stopper=stopper, ttl=ttl, kwargs=task.kwargs))
        # Run expire timer task, pass pid there for ability to kill process
        expire_timer_future = asyncio.Task(self._expire_timer_task(task=task, task_future=task_future, stopper=stopper, _pid=pid, timeout=ttl))

        # Store task in local worker cache list (need to limit tasks per worker)
        self.TASKS.append(task_future)

        # Add callbacks for completed tasks cleanup and remove expire timer
        task_future.add_done_callback(partial(self._mark_as_completed_callback, task))
        task_future.add_done_callback(partial(self._remove_expire_timer_callback, task, expire_timer_future))

    ### TASKS

    @abstractmethod
    @asyncio.coroutine
    def runner(self, task, _pid, stopper, ttl, kwargs):
        """ runner -- Abstarct method, you should implement it in concrete
        Worker class.

        Runner should be a coroutine. System should run it as Task.

        Runner should accept only keyword arguments.

        Runner should accept `_pid` argument (Future of Process) and set_result
        for the _pid variable -- asyncio.subprocess.Process instance.

        Runner should accept kwargs argument.
        """
        pass

    @abstractmethod
    @asyncio.coroutine
    def _store_results(self, task, task_results):
        """ _store_results -- Abstarct method, you should implement it in concrete
        Worker class.

        Runner should be a coroutine.
        """
        pass

    @asyncio.coroutine
    def _cleanup_task(self, task):
        """ clenaup_task -- Task for cleanup completed tasks from redis queue.

        :param task: `sensors.models.Task` instance
        :return: None
        """
        log.debug("_cleanup_task task_id={}".format(task.id))
        # Remove task from inprogress queue
        # XXX may be transaction here?
        cnt1 = yield from self.connection.lrem(SETTINGS.INPROGRESS_QUEUE, value=task.bid())
        # Remove task from sorted set
        cnt2 = yield from self.connection.zrem(SETTINGS.INPROGRESS_TASKS_SET, [task.bid()])
        # Update scheduler information
        # Store next_run in scheduled
        task_scheduler_obj = yield from self.connection.hget(SETTINGS.SCHEDULER_HISTORY_HASH, task.name.encode('utf-8'))
        try:
            task_scheduler = SchedulerTaskHistory.deserialize(task_scheduler_obj)
        except (pickle.UnpicklingError, EOFError, TypeError, ImportError):
            task_scheduler = None
        if task_scheduler and task_scheduler.scheduled_task_id == task.id:
            #if task.status == Task.SUCCESSFUL:
            #    # Update last_run only on success
            #    last_run = datetime_to_timestamp(task.run_at)
            #else:
            #    # If task failed, do not update last_run (last_run is about SUCCESSFUL task exectuion)
            #    last_run = task_scheduler.last_run
            last_run = datetime_to_timestamp(task.run_at)
            task_scheduler = task_scheduler._replace(last_run=last_run, next_run=0, scheduled_task_id=None)
            yield from self.connection.hset(SETTINGS.SCHEDULER_HISTORY_HASH, task.name.encode('utf-8'), task_scheduler.serialize())

        # Publish message about finish
        yield from self.connection.publish(SETTINGS.TASK_CHANNEL.format(task.id).encode('utf-8'), task.status.encode('utf-8'))
        log.debug('Publish message about task {} to {}'.format(task.id, SETTINGS.TASK_CHANNEL.format(task.id)))
        log.debug("_cleanup_task lrem result {}".format(cnt1))
        log.debug("_cleanup_task zrem result {}".format(cnt2))

        # Ping scheduler
        yield from self._ping_scheduler(task)

    @asyncio.coroutine
    def _ping_scheduler(self, task):
        # Publish message about new finished task
        if task.type == Task.TYPE_REGULAR:
            yield from self.connection.publish(SETTINGS.WORKER_TO_SCHEDULER_CHANNEL, b'')

    @asyncio.coroutine
    def _expire_timer_task(self, task, task_future, _pid, stopper, timeout):
        """ expire_timer -- Task for check timeouted processes and kill it.

        :param task: `sensors.models.Task` instance
        :param task_future: `asyncio.Future instance` -- (Future of) runned
                            task we should cancel() after `timeout`
        :param _pid: `asyncio.Future instance` -- (Future of) instance of
                     asyncio.subprocess.Process -- proces we should kill() after
                     `timeout`
        :param timeout: int, timeout in seconds
        :return: None
        """
        log.debug("Run expire timer for task {}".format(task.id))
        yield from asyncio.sleep(timeout)
        try:
            stopper.set_result(True)
            task_future.cancel()
            killers = _pid.result()
            for killer in killers:
                try:
                    killer()
                except ProcessLookupError:
                    pass
                except:
                    log.error("What is this? I try to kill my action process", exc_info=True)
        except:
            log.error("Unexpected error in _expire_timer", exc_info=True)
        log.debug('EXPIRE TIMER for task {}, killing process {}...'.format(task.id, _pid))
        self.db_log.error("Сценарий был остановлен по превышению лимита времени исполнения", None, 'action', task.kwargs.get('_id'))

    ### CALLBACKS

    def _mark_as_completed_callback(self, task, task_future):
        # Store results, change status, remove task from local list
        log.debug('Mark as completed callback is here!')
        try:
            if task_future.result() is not None:
                asyncio.Task(self._store_results(task, task_future.result()))
                new_task = task._replace(status=Task.SUCCESSFUL)
            else:
                new_task = task._replace(status=Task.FAILED)
        except asyncio.CancelledError:
            new_task = task._replace(status=Task.FAILED)
        finally:
            del task
        log.info("Finish task id={}, status={}".format(new_task.id, new_task.status))

        log.debug("Update task status as COMPLETED <id={}> status={}".format(new_task.id, new_task.status))
        asyncio.Task(self.connection.set(SETTINGS.TASK_STORAGE_KEY.format(new_task.id).encode('utf-8'), new_task.serialize(), expire=SETTINGS.TASK_STORAGE_EXPIRE))
        asyncio.Task(self._cleanup_task(new_task))

        # Callback вызывается thread-safe — можно выпилить локи и юзать просто лист
        self.TASKS.remove(task_future)

    def _remove_expire_timer_callback(self, task, expire_timer_future, task_future):
        # Remove exire time checker if task is successfully completed
        log.debug("Cancel expire timer Task for task {}".format(task.id))
        expire_timer_future.cancel()

    ## Helpers
    @staticmethod
    def _get_esc_char(matchobj):
        char = matchobj.group(1).upper()
        if ord(char) in range(65, 96):
            code = ord(char) - 64
            return chr(code)
        return matchobj.group(0)

    @classmethod
    def _parse_command(cls, command):
        # Convert command like <CTRL+X> to escape character
        tmp = command
        tmp = PARSE_COMMAND_RE.sub(cls._get_esc_char, tmp)
        tmp = PARSE_COMMAND_NEWLINE_RE.sub(lambda x: '\r', tmp)
        return tmp
Example #10
0
class Trigger(object):
    def __init__(self):
        self.connection = None
        self.subscription = None
        self.current_loop = None
        self.config_version = 0

        self.run = True

        self.config = ConfigStorage()
        self.db_log = LoggingStorage()
        self.tq_storage = None

        self.triggers = {}
        self.metrics_id_to_triggers = defaultdict(list)

        self._reload_triggers_config_last_run = 0

    def bootstrap(self):
        log.info("Running trigger loop")
        self.connection = yield from asyncio_redis.Pool.create(
            host='localhost',
            port=6379,
            encoder=asyncio_redis.encoders.BytesEncoder(),
            poolsize=3)
        self.tq_storage = TaskStorage(self.current_loop, self.connection)

        # Setup subscription to action results
        self.subscription = yield from self.connection.start_subscribe()
        yield from self.subscription.psubscribe(
            [SETTINGS.METRICS_CHANNEL.format('*').encode('utf-8')])

    def start(self, loop):
        self.current_loop = loop
        loop.add_signal_handler(signal.SIGINT, partial(self.stop, 'SIGINT'))
        loop.add_signal_handler(signal.SIGTERM, partial(self.stop, 'SIGTERM'))
        asyncio.Task(self.loop())

    def stop(self, sig):
        log.info(
            "Got {} signal, we should finish all tasks and stop daemon".format(
                sig))
        self.run = False
        self.current_loop.stop()

    @asyncio.coroutine
    def get_new_message(self):
        reply = yield from self.subscription.next_published()

    @asyncio.coroutine
    def loop(self):
        yield from self.bootstrap()
        while self.run:
            # Load triggers list
            yield from self._reload_config()

            # Wait for new message
            try:
                reply = yield from self.subscription.next_published()
            except GeneratorExit:
                log.info('Stop subscription')
                break
            log.debug('Got new message, channel={}'.format(reply.channel))
            # Decode new message
            try:
                _, metric_id = yield from self._decode_message(reply)
            except Exception:
                log.error("Cannon load data from message in channel={}".format(
                    reply.channel),
                          exc_info=True)

            # Process triggers
            triggers = self.metrics_id_to_triggers.get(metric_id, [])
            for trigger_id in triggers:
                asyncio.Task(self.check_trigger(trigger_id, metric_id))

        self.current_loop.stop()
        self.connection.close()
        log.info('Bye-bye!')

    @asyncio.coroutine
    def _decode_message(self, msg):
        metrics_mask = SETTINGS.METRICS_CHANNEL.replace("{}", "")

        channel = msg.channel.decode('utf-8')
        if channel.startswith(metrics_mask):
            return 'metrics-results', channel[len(metrics_mask):]
        else:
            raise Exception()
            # return '', channel

    @asyncio.coroutine
    def _reload_config(self):
        time_now = int(now())
        if time_now - self._reload_triggers_config_last_run < 1000:  # 1000 = 1sec
            return
        self._reload_triggers_config_last_run = time_now
        config_version = self.config.get_config_version()
        if config_version != self.config_version:
            yield from self._reload_triggers()
            self.config_version = config_version

    @asyncio.coroutine
    def _reload_triggers(self):
        new_triggers = self.config.list_triggers()
        self.triggers = new_triggers
        self.metrics_id_to_triggers = defaultdict(list)
        for trigger_id, trigger in new_triggers.items():
            for condition in trigger.get('conditions', []):
                self.metrics_id_to_triggers[condition.get('metric_id')].append(
                    trigger_id)
        log.info('Loaded {} triggers'.format(len(new_triggers)))

    @asyncio.coroutine
    def _activate_trigger(self, trigger):
        self.db_log.info("Триггер был запущен", None, "trigger",
                         trigger['_id'])
        log.debug("_activare_trigger for {}".format(trigger['_id']))
        for action_obj in trigger['scenario']:
            # Extract values from trigger into dict (param_name -> param_value)
            params_values = {
                param.get('param'): param.get('value')
                for param in action_obj['params']
            }
            # Get action with binded params
            action = self.config.get_action(
                action_obj['action_id'],
                initial_param_values=params_values,
                connection_id=trigger.get('connection_id'))

            log.debug(
                'Trigger {}: create task for action {} with params {}'.format(
                    trigger['_id'], action['_id'], params_values))
            task = yield from self.tq_storage.create_task(
                name=action['_id'],
                task_type=Task.TYPE_TRIGGERED,
                run_at=datetime.datetime.now(),
                ttl=action.get('ttl') or SETTINGS.WORKER_TASK_TIMEOUT,
                kwargs=action,
                store_to=Task.STORE_TO_METRICS)
            yield from self.tq_storage.schedule_task(task)

    """ TASKS """

    @asyncio.coroutine
    def check_trigger(self, trigger_id, metric_id):
        log.info('Check trigger {} for metric {}'.format(
            trigger_id, metric_id))
        trigger = self.triggers[trigger_id]

        last_values = yield from self.tq_storage.get_metric_last_values(
            trigger['depends_on'])
        log.debug('Trigger {}: last values is {}'.format(
            trigger_id, last_values))

        checks = ((trigger_id, cond.get('value', ''), cond['function'],
                   last_values.get(cond['metric_id']))
                  for cond in trigger['conditions'])
        is_triggered = all(
            map(lambda check: self.check_condition(*check), checks))

        if is_triggered:
            # Check lock
            log.info('Trigger {} is activated!'.format(trigger_id))
            locked = yield from self.tq_storage.lock_trigger(trigger_id)
            if locked > 1:
                log.info(
                    'Trigger {} is locked {} times, do not perform action'.
                    format(trigger_id, locked))
                return
            # Perform action on trigger, it's not locked
            yield from self._activate_trigger(trigger)
        else:
            # Unlock trigger here
            log.debug('Trigger {} is NOT activated, try to unlock!'.format(
                trigger_id))
            yield from self.tq_storage.unlock_trigger(trigger_id)

    def check_condition(self, trigger_id, cmp_value, condition, value):
        if condition not in SETTINGS.CONDITIONS_CMP_FUNCTIONS.keys():
            log.error(
                "Cannot determine condition for trigger '{}': wrong function '{}'"
                .format(trigger_id, condition))
            return False

        if condition in SETTINGS.CONDITIONS_NUMBERIC:
            try:
                value = float(value)
            except (ValueError, TypeError):
                log.error(
                    "Wrong value for trigger '{}', cannot convert metric value '{}' to float before comparasion"
                    .format(trigger_id, cmp_value),
                    exc_info=True)
                self.db_log.error(
                    "Cannot convert metric value to float before comparasion",
                    str(value), "trigger", trigger_id)
                return False
            try:
                cmp_value = float(cmp_value)
            except (ValueError, TypeError):
                log.error(
                    "Wrong value for trigger '{}', cannot convert comparasion value '{}' to float before comparasion"
                    .format(trigger_id, cmp_value),
                    exc_info=True)
                self.db_log.error(
                    "Cannot convert comparasion value to float before comparasion",
                    str(cmp_value), "trigger", trigger_id)
                return False
        elif condition in SETTINGS.CONDITIONS_BOOLEAN:
            try:
                value = bool(int(value))
            except:
                log.error(
                    "Wrong value for trigger '{}', can't cast value to boolean '{}'"
                    .format(trigger_id, value))
                self.db_log.error(
                    "Can't cast value to boolean '{}'".format(value),
                    str(value), "trigger", trigger_id)
                return False
        elif condition in SETTINGS.CONDITIONS_STRINGS and not isinstance(
                value, str):
            log.error(
                "Wrong value for trigger '{}', for strings comparasion it should be string, not '{}'"
                .format(trigger_id, value))
            self.db_log.error(
                "For strings comparasion value should be strings, not '{}'".
                format(value), str(value), "trigger", trigger_id)
            return False

        try:
            result = SETTINGS.CONDITIONS_CMP_FUNCTIONS[condition](value,
                                                                  cmp_value)
            log.debug("Compare values: '{}' {} '{}'".format(
                value, condition, cmp_value))
        except:
            log.error("Cannot compare values: '{}' {} '{}'".format(
                value, condition, cmp_value))
            self.db_log.error(
                "Cannot compare values: '{}' {} '{}'".format(
                    value, condition, cmp_value), None, "trigger", trigger_id)
            return False
        return result
Example #11
0
class MetricsCollector(object):

    def __init__(self):
        self.connection = None
        self.subscription = None
        self.current_loop = None
        self.config_version = 0
        self.run = True

        self.storage = ConfigStorage()
        self.metrics_storage = MetricsStorage()
        self.db_log = LoggingStorage()

        self._lcache = {}

        self.metrics = {}
        self.actions_id_to_metrics = defaultdict(list)

        self._reload_metrics_config_last_run = 0

    @asyncio.coroutine
    def bootstrap(self):
        log.info("Running metrics collector loop")
        self.connection = yield from asyncio_redis.Pool.create(host='localhost', port=6379, encoder=asyncio_redis.encoders.BytesEncoder(), poolsize=3)

        # Setup subscription to action results
        self.subscription = yield from self.connection.start_subscribe()
        yield from self.subscription.psubscribe([SETTINGS.ACTION_RESULTS_CHANNEL.format("*").encode('utf-8'),
                                                 SETTINGS.CONNECTION_RESULTS_CHANNEL.format("*").encode('utf-8')])
        yield from self._reload_config()

    def start(self, loop):
        self.current_loop = loop
        loop.add_signal_handler(signal.SIGINT, partial(self.stop, 'SIGINT'))
        loop.add_signal_handler(signal.SIGTERM, partial(self.stop, 'SIGTERM'))
        asyncio.Task(self.loop())

    def stop(self, sig):
        log.info("Got {} signal, we should finish all tasks and stop daemon".format(sig))
        self.run = False
        self.current_loop.stop()

    @asyncio.coroutine
    def loop(self):
        yield from self.bootstrap()
        while self.run:
            metrics = []
            # Wait for new message
            try:
                reply = yield from self.subscription.next_published()
            except GeneratorExit:
                break
            log.debug('Got new message, channel={}'.format(reply.channel))

            # Load metrics list
            yield from self._reload_config()

            # Decode new message
            try:
                channel_type, object_id = yield from self._decode_message(reply)
                results = pickle.loads(reply.value)
                task = results['task']
                values = results['result']
            except Exception:
                log.error("Cannon load data from message in channel={}, data={}".format(reply.channel, reply.value), exc_info=True)
                continue
            # Process metrics
            if channel_type == 'actions-results':
                metrics = self.actions_id_to_metrics.get(object_id, [])
            elif channel_type == 'connections-results':
                # Skip empty lines for connection grep
                if not values.get('stdout'):
                    continue
                metrics = self.connections_id_to_metrics.get(object_id, [])
            else:
                log.error('Unexpected metric-channel type={}'.format(channel_type))
                continue

            for metric_id in metrics:
                asyncio.Task(self.store_metric_value(metric_id, object_id, task, values))

        self.current_loop.stop()
        self.connection.close()
        log.info('Bye-bye!')

    @asyncio.coroutine
    def _decode_message(self, msg):
        action_mask = SETTINGS.ACTION_RESULTS_CHANNEL.replace("{}", "")
        connection_mask = SETTINGS.CONNECTION_RESULTS_CHANNEL.replace("{}", "")

        channel = msg.channel.decode('utf-8')
        if channel.startswith(action_mask):
            return 'actions-results', channel[len(action_mask):]
        elif channel.startswith(connection_mask):
            return 'connections-results', channel[len(connection_mask):]
        else:
            return '', channel

    @asyncio.coroutine
    def _reload_config(self):
        time_now = int(now())
        if time_now - self._reload_metrics_config_last_run < 1000:  # 1000 = 1sec
            return
        self._reload_metrics_config_last_run = time_now
        config_version = self.storage.get_config_version()
        if config_version != self.config_version:
            yield from self._reload_metrics()
            self.config_version = config_version

    @asyncio.coroutine
    def _reload_metrics(self):
        new_metrics = self.storage.list_metrics()
        self.metrics = new_metrics
        self.actions_id_to_metrics = defaultdict(list)
        self.connections_id_to_metrics = defaultdict(list)
        for metric_id, metric in new_metrics.items():
            if 'action_id' in metric:
                self.actions_id_to_metrics[metric.get('action_id')].append(metric_id)
            elif 'connection_id' in metric:
                self.connections_id_to_metrics[metric.get('connection_id')].append(metric_id)
        self._lcache = {}
        log.info('Loaded {} metrics'.format(len(new_metrics)))


    """ TASKS """

    @asyncio.coroutine
    def store_metric_value(self, metric_id, object_id, task, values):
        log.debug('store_metric_value {} for action/connection {} by task {}'.format(metric_id, object_id, task['id']))
        exit_codes = values.get('exit_codes')
        stdout = values.get('stdout')

        metric = self.metrics.get(metric_id)
        value = self.parse_value(metric, stdout)
        log.debug('Metric (id={}) parsed value: {}'.format(metric_id, value))
        if value is None:
            logging.error("No parser match for metric {}, nothing to store".format(metric_id))
            self.db_log.error("Пустое значение после фильтрации", stdout, "metric", metric_id)
            return

        converter = lambda x: x
        # Convert metric type
        if metric['type'] == 'boolean':
            value = self.cast_to_boolean(metric_id, metric, value)
        else:
            converter = SETTINGS.METRICS_TYPES_MAP[metric['type']]
            try:
                value = converter(value)
            except ValueError:
                log.error("Wrong value for metric '{}', cannot convert to {}".format(metric_id, metric['type']), exc_info=True)
                self.db_log.error("Не удалось привести тип значения к {}".format(metric['type']), str(value), "metric", metric_id)
                return

        # Trim strings
        if isinstance(value, str):
            value = value[:SETTINGS.METRIC_STRING_LIMIT]

        # Apply multiplier
        multiplier = metric.get('multiplier', None)
        try:
            if multiplier and metric['type'] in SETTINGS.METRIC_NUMERICAL_TYPES:
                multiplier = float(multiplier)
                value = value * multiplier

                # If it is int, convert to int
                value = converter(value)
        except:
            log.error('Cannot apply multiplier', exc_info=True)
            self.db_log.error("Не удалось применить множитель", str(value), "metric", metric_id)
            return

        timestamp = datetime_to_timestamp(task['run_at'])
        skip_interval = parse_timetable(metric.get('limit_duplicate_save', ''))
        if skip_interval:
            prev_val, prev_timestamp = self._lcache.get(metric_id, (None, 0))
            if (prev_val == value) and (timestamp - prev_timestamp) < skip_interval:
                return True
            else:
                self._lcache[metric_id] = (value, datetime_to_timestamp(task['run_at']))

        log.info('Store value="{}" for metric {}'.format(value, metric_id))
        try:
            self.metrics_storage.store_metric(metric_id, value, time=task['run_at'])
            yield from self.connection.hset(SETTINGS.LAST_VALUES_HASH, metric_id.encode('utf-8'), ujson.dumps({'value': value, 'timestamp': timestamp}).encode('utf-8'))
        except:
            log.error('Cannot store metric value, storage exception', exc_info=True)
            return

        # Publish message about finish
        yield from self.connection.publish(SETTINGS.METRICS_CHANNEL.format(metric_id).encode('utf-8'), b'')
        return True

    def parse_value(self, metric, stdout):
        stdout_lines = stdout.split('\n')
        line_regexp = metric.get('line_regexp')
        line_numbers = str(metric.get('line_numbers', ''))
        word_regexp = metric.get('word_regexp')
        word_numbers = str(metric.get('word_numbers', ''))

        lines_str = None
        lines_no = set()
        if line_regexp:
            regexp = re.compile(line_regexp)
            for i, stdout_line in enumerate(stdout_lines, 1):
                if regexp.search(stdout_line):
                    lines_no.add(i)
        if line_numbers:
            line_values = line_numbers.split(',')
            for line_value in line_values:
                if ':' in line_value:
                    start, finish = map(int, line_value.split(':'))
                    for i in range(start, finish+1):
                        lines_no.add(i)
                else:
                    lines_no.add(int(line_value))

        if (line_regexp or line_numbers):
            if lines_no:
                lines_no = sorted(list(lines_no))
                lines = []
                total_lines = len(stdout_lines)
                for line_no in lines_no:
                    if line_no > total_lines:
                        continue
                    lines.append(stdout_lines[line_no-1])
                lines_str = '\n'.join(lines)
        else:
            lines_str = stdout

        if not lines_str:
            return None

        if word_regexp:
            match = re.findall(word_regexp, lines_str)
            if not match:
                return None
            return match[0]
        elif word_numbers:
            words_range = None
            if ':' in word_numbers:
                start, finish = map(int, word_numbers.split(':'))
                words_range = int(start)-1, int(finish)-1
            else:
                words_range = int(word_numbers)-1, int(word_numbers)-1
        else:
            return lines_str

        stdout_words = list(filter(lambda x: x is not None, SPLIT_RE.split(lines_str)))
        stdout_words = [x for sublist in map(lambda word: SPLIT_NEG_RE.split(word), stdout_words) for x in sublist]
        # Frontend do not count \t, ' ' and '' words :(
        skip_cnt = 0
        words_no_map = {}
        for i, word in enumerate(stdout_words):
            if word == '' or SKIP_RE.match(word):
                skip_cnt += 1
                continue
            words_no_map[i-skip_cnt] = i

        start = words_no_map.get(words_range[0], 0)
        finish = words_no_map.get(words_range[1], len(stdout_words)-1) + 1

        result_words = stdout_words[start:finish]
        words_str = ''.join(result_words)

        return words_str

    def cast_to_boolean(self, metric_id, metric, value):
        try:
            condition = metric['function']
            cmp_value = metric['value']
        except Exception:
            log.error('Boolean metric (id={}) without condition!'.format(metric_id))
            return

        if condition not in SETTINGS.CONDITIONS_CMP_FUNCTIONS.keys():
            log.error("Cannot convert value for metric '{}' to bool: wrong function '{}'".format(metric_id, condition))
            self.db_log.error("Не удалось привести значение к булевой метрике, невреная функция '{}'".format(condition), str(value), "metric", metric_id)
            return

        if condition in SETTINGS.CONDITIONS_NUMBERIC:
            # Cast values to float
            try:
                value = float(value)
            except (ValueError, TypeError):
                log.error("Wrong value for metric '{}', cannot convert '{}' to float before comparasion".format(metric_id, value), exc_info=True)
                self.db_log.error("Не удалось привести значение метрики к дробному типу для проведения сравнения", str(value), "metric", metric_id)
                return
            try:
                cmp_value = float(cmp_value)
            except (ValueError, TypeError):
                log.error("Wrong value for metric '{}', cannot convert comparasion value '{}' to float before comparasion".format(metric_id, cmp_value), exc_info=True)
                self.db_log.error("Cannot convert comparasion value to float before comparasion", str(cmp_value), "metric", metric_id)
                return
        elif condition in SETTINGS.CONDITIONS_BOOLEAN and not isinstance(value, bool):
            log.error("Wrong value for metric '{}', for booleans comparasion it should be boolean, not '{}'".format(metric_id, value))
            self.db_log.error("For boolean comparasion value should be boolean, not '{}'".format(value), str(value), "metric", metric_id)
            return
        elif condition in SETTINGS.CONDITIONS_STRINGS and not isinstance(value, str):
            log.error("Wrong value for metric '{}', for strings comparasion it should be string, not '{}'".format(metric_id, value))
            self.db_log.error("For strings comparasion value should be strings, not '{}'".format(value), str(value), "metric", metric_id)
            return

        try:
            result = SETTINGS.CONDITIONS_CMP_FUNCTIONS[condition](value, cmp_value)
        except:
            log.error("Cannot compare values: '{}' {} '{}'".format(value, condition, cmp_value))
            self.db_log.error("Не удалось сравнить значения: '{}' {} '{}'".format(value, condition, cmp_value), None, "metric", metric_id)
            return None
        return (1 if result else 0)
Example #12
0
class BaseWorker(metaclass=ABCMeta):
    def __init__(self):
        self.current_loop = None
        self.connection = None
        self.comport_state = None
        self.config = None
        self.db_log = None

        self.run = True

        # List of current worker tasks; we use it for tasks per worker limitation
        self.TASKS = TasksList()

        # List for temporary storage completed task for clenup
        self.COMPLETED_TASKS = TasksList()

    @asyncio.coroutine
    def bootstrap(self):
        log.info("Running worker loop")
        self.connection = yield from asyncio_redis.Pool.create(
            host='localhost',
            port=6379,
            encoder=asyncio_redis.encoders.BytesEncoder(),
            poolsize=3)
        self.config = ConfigStorage()
        self.comport_state = ComPortState()
        self.db_log = LoggingStorage()

    def start(self, loop):
        self.current_loop = loop
        loop.add_signal_handler(signal.SIGINT, partial(self.stop, 'SIGINT'))
        loop.add_signal_handler(signal.SIGTERM, partial(self.stop, 'SIGTERM'))
        asyncio.Task(self.loop())

    def stop(self, sig):
        log.info(
            "Got {} signal, we should finish all tasks and stop daemon".format(
                sig))
        self.run = False
        self.current_loop.stop()

    @asyncio.coroutine
    def loop(self):
        """ Main event loop of worker

        :param loop: current event loop
        :return: None
        """
        yield from self.bootstrap()
        # Inside a while loop, wait for incoming events.
        while self.run:
            # Limit tasks per worker, wait for task complete, do not fetch new
            if len(self.TASKS) > SETTINGS.WORKER_TASKS_LIMIT:
                log.debug(
                    "Too much tasks in local queue, wait for complete, timeout {}s"
                    .format(SETTINGS.WORKER_TASK_TIMEOUT))
                try:
                    done, pending = yield from asyncio.wait(
                        self.TASKS,
                        return_when=asyncio.FIRST_COMPLETED,
                        timeout=SETTINGS.WORKER_TASK_TIMEOUT)
                    continue
                except GeneratorExit:
                    break

            # Pop new task from dispatched queue
            try:
                raw_task = yield from self._pop_task()
            except GeneratorExit:
                break
            if not raw_task:
                continue

            # Deserialize
            task = yield from self._deserialize_task(raw_task)
            if not task:
                continue

            # Set new status
            task = yield from self._move_to_inprogress(task)

            # Run task
            try:
                task_future = yield from self._run_task(task)
            except Exception as ex:
                log.error(ex, exc_info=True)
                pass
        # When finished, close the connection.
        self.current_loop.stop()
        self.connection.close()
        log.info('Bye-bye!')

    @asyncio.coroutine
    def _pop_task(self):
        # Blocking pop task from dispatched list (and atomic push to inprogress list)
        try:
            raw_task = yield from self.connection.brpoplpush(
                SETTINGS.DISPATCHED_QUEUE, SETTINGS.INPROGRESS_QUEUE,
                SETTINGS.WORKER_BPOP_TIMEOUT)
            log.debug("Got new tasks from queue {}".format(raw_task))
            return raw_task
        except asyncio_redis.TimeoutError:
            # No new tasks? Sleep for a while
            yield from asyncio.sleep(SETTINGS.WORKER_PULL_SLEEP)
            return
        except Exception:
            log.error('Unexpected error', exc_info=True)

    @asyncio.coroutine
    def _deserialize_task(self, raw_task):
        try:
            task_obj = yield from self.connection.get(
                SETTINGS.TASK_STORAGE_KEY.format(
                    raw_task.decode('utf-8')).encode('utf-8'))
            if not task_obj:
                raise TypeError()
            task = Task.deserialize(task_obj)
            log.info("Got new task id={}, type={}, status={}".format(
                task.id, task.type, task.status))
            if not task.status == Task.SCHEDULED:
                log.error(
                    "Wrong status={} for task id={}, type={}; Should be SCHEDULED"
                    .format(task.status, task.id, task.type))
                return
            return task
        except TypeError as ex:
            log.error("Wrong task_id {}".format(raw_task))
            return
        except (pickle.UnpicklingError, EOFError, TypeError, ImportError):
            yield from self.connection.lrem(SETTINGS.INPROGRESS_QUEUE,
                                            value=raw_task)
            log.error("Wrong message in queue", exc_info=True)
            return

    @asyncio.coroutine
    def _move_to_inprogress(self, task):
        """ move_to_inprogress -- Change task status to 'in progress'
        Store task in sorted set with TTL

        :param task: `sensors.models.Task` instance
        """
        ttl = task.ttl or SETTINGS.WORKER_TASK_TIMEOUT
        expires_at = datetime.datetime.now() + datetime.timedelta(seconds=ttl)
        new_task = task._replace(status=Task.INPROGRESS)

        # XXX may be transaction here?
        yield from self.connection.zadd(
            SETTINGS.INPROGRESS_TASKS_SET,
            {task.bid(): datetime_to_timestamp(expires_at)})
        yield from self.connection.set(SETTINGS.TASK_STORAGE_KEY.format(
            new_task.id).encode('utf-8'),
                                       new_task.serialize(),
                                       expire=SETTINGS.TASK_STORAGE_EXPIRE)

        return new_task

    @asyncio.coroutine
    def _throttle(self, task):
        try:
            freq = int(task.kwargs.get('execution_limit', 0))
        except:
            freq = 0

        if freq == 0:
            return False, 0
        key = SETTINGS.WORKER_THROTTLE_LOCKS.format(task.name).encode('utf-8')
        ttl = yield from self.connection.ttl(key)
        if ttl <= 0:
            # No throttle, no key
            yield from self.connection.set(key, b'', expire=freq)
            return False, 0
        else:
            # Throttled
            return True, ttl

    @asyncio.coroutine
    def _run_task(self, task):
        """ run_task -- Runs tasks

        :param task: `sensors.models.Task` instance
        """
        log.debug("_run_task {}".format(task.id))

        if task.type == Task.TYPE_TRIGGERED:
            is_throttled, expiration = yield from self._throttle(task)
            if is_throttled:
                task_future = asyncio.Future()
                self.TASKS.append(task_future)
                task_future.set_result(None)
                log.info(
                    "Task id={}, name={} throttled for {} seconds.".format(
                        task.id, task.name, expiration))
                self.db_log.info(
                    "Сценарий не был запущен из-за превышения частоты запуска",
                    None, 'action', task.id)
                self._mark_as_completed_callback(task, task_future)
                return
        # Create future to store Process
        pid = asyncio.Future()
        stopper = asyncio.Future()
        ttl = task.ttl or SETTINGS.WORKER_TASK_TIMEOUT
        # Call runner, pass pid there (need to fill in process!)
        task_future = asyncio.Task(
            self.runner(task,
                        _pid=pid,
                        stopper=stopper,
                        ttl=ttl,
                        kwargs=task.kwargs))
        # Run expire timer task, pass pid there for ability to kill process
        expire_timer_future = asyncio.Task(
            self._expire_timer_task(task=task,
                                    task_future=task_future,
                                    stopper=stopper,
                                    _pid=pid,
                                    timeout=ttl))

        # Store task in local worker cache list (need to limit tasks per worker)
        self.TASKS.append(task_future)

        # Add callbacks for completed tasks cleanup and remove expire timer
        task_future.add_done_callback(
            partial(self._mark_as_completed_callback, task))
        task_future.add_done_callback(
            partial(self._remove_expire_timer_callback, task,
                    expire_timer_future))

    ### TASKS

    @abstractmethod
    @asyncio.coroutine
    def runner(self, task, _pid, stopper, ttl, kwargs):
        """ runner -- Abstarct method, you should implement it in concrete
        Worker class.

        Runner should be a coroutine. System should run it as Task.

        Runner should accept only keyword arguments.

        Runner should accept `_pid` argument (Future of Process) and set_result
        for the _pid variable -- asyncio.subprocess.Process instance.

        Runner should accept kwargs argument.
        """
        pass

    @abstractmethod
    @asyncio.coroutine
    def _store_results(self, task, task_results):
        """ _store_results -- Abstarct method, you should implement it in concrete
        Worker class.

        Runner should be a coroutine.
        """
        pass

    @asyncio.coroutine
    def _cleanup_task(self, task):
        """ clenaup_task -- Task for cleanup completed tasks from redis queue.

        :param task: `sensors.models.Task` instance
        :return: None
        """
        log.debug("_cleanup_task task_id={}".format(task.id))
        # Remove task from inprogress queue
        # XXX may be transaction here?
        cnt1 = yield from self.connection.lrem(SETTINGS.INPROGRESS_QUEUE,
                                               value=task.bid())
        # Remove task from sorted set
        cnt2 = yield from self.connection.zrem(SETTINGS.INPROGRESS_TASKS_SET,
                                               [task.bid()])
        # Update scheduler information
        # Store next_run in scheduled
        task_scheduler_obj = yield from self.connection.hget(
            SETTINGS.SCHEDULER_HISTORY_HASH, task.name.encode('utf-8'))
        try:
            task_scheduler = SchedulerTaskHistory.deserialize(
                task_scheduler_obj)
        except (pickle.UnpicklingError, EOFError, TypeError, ImportError):
            task_scheduler = None
        if task_scheduler and task_scheduler.scheduled_task_id == task.id:
            #if task.status == Task.SUCCESSFUL:
            #    # Update last_run only on success
            #    last_run = datetime_to_timestamp(task.run_at)
            #else:
            #    # If task failed, do not update last_run (last_run is about SUCCESSFUL task exectuion)
            #    last_run = task_scheduler.last_run
            last_run = datetime_to_timestamp(task.run_at)
            task_scheduler = task_scheduler._replace(last_run=last_run,
                                                     next_run=0,
                                                     scheduled_task_id=None)
            yield from self.connection.hset(SETTINGS.SCHEDULER_HISTORY_HASH,
                                            task.name.encode('utf-8'),
                                            task_scheduler.serialize())

        # Publish message about finish
        yield from self.connection.publish(
            SETTINGS.TASK_CHANNEL.format(task.id).encode('utf-8'),
            task.status.encode('utf-8'))
        log.debug('Publish message about task {} to {}'.format(
            task.id, SETTINGS.TASK_CHANNEL.format(task.id)))
        log.debug("_cleanup_task lrem result {}".format(cnt1))
        log.debug("_cleanup_task zrem result {}".format(cnt2))

        # Ping scheduler
        yield from self._ping_scheduler(task)

    @asyncio.coroutine
    def _ping_scheduler(self, task):
        # Publish message about new finished task
        if task.type == Task.TYPE_REGULAR:
            yield from self.connection.publish(
                SETTINGS.WORKER_TO_SCHEDULER_CHANNEL, b'')

    @asyncio.coroutine
    def _expire_timer_task(self, task, task_future, _pid, stopper, timeout):
        """ expire_timer -- Task for check timeouted processes and kill it.

        :param task: `sensors.models.Task` instance
        :param task_future: `asyncio.Future instance` -- (Future of) runned
                            task we should cancel() after `timeout`
        :param _pid: `asyncio.Future instance` -- (Future of) instance of
                     asyncio.subprocess.Process -- proces we should kill() after
                     `timeout`
        :param timeout: int, timeout in seconds
        :return: None
        """
        log.debug("Run expire timer for task {}".format(task.id))
        yield from asyncio.sleep(timeout)
        try:
            stopper.set_result(True)
            task_future.cancel()
            killers = _pid.result()
            for killer in killers:
                try:
                    killer()
                except ProcessLookupError:
                    pass
                except:
                    log.error("What is this? I try to kill my action process",
                              exc_info=True)
        except:
            log.error("Unexpected error in _expire_timer", exc_info=True)
        log.debug('EXPIRE TIMER for task {}, killing process {}...'.format(
            task.id, _pid))
        self.db_log.error(
            "Сценарий был остановлен по превышению лимита времени исполнения",
            None, 'action', task.kwargs.get('_id'))

    ### CALLBACKS

    def _mark_as_completed_callback(self, task, task_future):
        # Store results, change status, remove task from local list
        log.debug('Mark as completed callback is here!')
        try:
            if task_future.result() is not None:
                asyncio.Task(self._store_results(task, task_future.result()))
                new_task = task._replace(status=Task.SUCCESSFUL)
            else:
                new_task = task._replace(status=Task.FAILED)
        except asyncio.CancelledError:
            new_task = task._replace(status=Task.FAILED)
        finally:
            del task
        log.info("Finish task id={}, status={}".format(new_task.id,
                                                       new_task.status))

        log.debug("Update task status as COMPLETED <id={}> status={}".format(
            new_task.id, new_task.status))
        asyncio.Task(
            self.connection.set(SETTINGS.TASK_STORAGE_KEY.format(
                new_task.id).encode('utf-8'),
                                new_task.serialize(),
                                expire=SETTINGS.TASK_STORAGE_EXPIRE))
        asyncio.Task(self._cleanup_task(new_task))

        # Callback вызывается thread-safe — можно выпилить локи и юзать просто лист
        self.TASKS.remove(task_future)

    def _remove_expire_timer_callback(self, task, expire_timer_future,
                                      task_future):
        # Remove exire time checker if task is successfully completed
        log.debug("Cancel expire timer Task for task {}".format(task.id))
        expire_timer_future.cancel()

    ## Helpers
    @staticmethod
    def _get_esc_char(matchobj):
        char = matchobj.group(1).upper()
        if ord(char) in range(65, 96):
            code = ord(char) - 64
            return chr(code)
        return matchobj.group(0)

    @classmethod
    def _parse_command(cls, command):
        # Convert command like <CTRL+X> to escape character
        tmp = command
        tmp = PARSE_COMMAND_RE.sub(cls._get_esc_char, tmp)
        tmp = PARSE_COMMAND_NEWLINE_RE.sub(lambda x: '\r', tmp)
        return tmp