def resolve_patterns(request, expression_values): now = int(time()) context = createRequestContext(str(now - 600), str(now), allowRealTimeAlerting=True) resolved = set() target_num = 1 context['time_series_names'] = set() is_simple_trigger = True if len(request.body_json["targets"]) > 1: is_simple_trigger = False for target in request.body_json["targets"]: time_series = yield evaluateTarget(context, target) if is_simple_trigger and not is_simple_target(context): is_simple_trigger = False target_name = "t%s" % target_num for ts in time_series: context['time_series_names'].add(ts.name) expression_values[target_name] = 42 target_num += 1 for pattern, resolve in context['graphite_patterns'].iteritems(): for r in resolve: if r != pattern: resolved.add(r) request.body_json["patterns"] = [ pattern for pattern in context['graphite_patterns'] if pattern not in resolved ] request.body_json["is_simple_trigger"] = is_simple_trigger request.context = context
def resolve_patterns(request, expression_values): now = int(time()) context = createRequestContext(str(now - 600), str(now), allowRealTimeAlerting=True) resolved = set() target_num = 1 context['time_series_names'] = set() is_simple_trigger = True if len(request.body_json["targets"]) > 1: is_simple_trigger = False for target in request.body_json["targets"]: time_series = yield evaluateTarget(context, target) if is_simple_trigger and not is_simple_target(context): is_simple_trigger = False target_name = "t%s" % target_num for ts in time_series: context['time_series_names'].add(ts.name) expression_values[target_name] = 42 target_num += 1 for pattern, resolve in context['graphite_patterns'].iteritems(): for r in resolve: if r != pattern: resolved.add(r) request.body_json["patterns"] = [pattern for pattern in context['graphite_patterns'] if pattern not in resolved] request.body_json["is_simple_trigger"] = is_simple_trigger request.context = context
def movingAverageBootstrap(self, allowRealTimeAlerting): yield self.sendTrigger( '{"name": "t", "targets": ["movingAverage(m, 2)"], "warn_value": 1, "error_value": 90, "ttl":"600" }') for n in range(0, 10): yield self.db.sendMetric('m', 'm', self.now - 60 * (10 - n), n) yield self.trigger.check(fromTime=self.now - 300, now=self.now) fromTime = str(self.now - 180) endTime = str(self.now - 60) rc = datalib.createRequestContext(fromTime, endTime, allowRealTimeAlerting) result = yield self.trigger.get_timeseries(rc) ts = result[1][0] self.assertEqual(ts[0], 6.5)
def movingAverageBootstrap(self, allowRealTimeAlerting): yield self.sendTrigger( '{"name": "t", "targets": ["movingAverage(m, 2)"], "warn_value": 1, "error_value": 90, "ttl":"600" }' ) for n in range(0, 10): yield self.db.sendMetric('m', 'm', self.now - 60 * (10 - n), n) yield self.trigger.check(fromTime=self.now - 300, now=self.now) fromTime = str(self.now - 180) endTime = str(self.now - 60) rc = datalib.createRequestContext(fromTime, endTime, allowRealTimeAlerting) result = yield self.trigger.get_timeseries(rc) ts = result[1][0] self.assertEqual(ts[0], 6.5)
def render_GET(self, request): json, trigger = yield self.db.getTrigger(self.trigger_id) if json is None: defer.returnValue(bad_request(request, "Trigger not found")) raise StopIteration context = createRequestContext(request.args.get('from')[0], request.args.get('to')[0]) result = {} for target in trigger.get("targets", [trigger.get("target")]): time_series = yield evaluateTarget(context, target) for time_serie in time_series: values = [(time_serie.start + time_serie.step * i, time_serie[i]) for i in range(0, len(time_serie))] result[time_serie.name] = [{"ts": ts, "value": value} for ts, value in values if value is not None] self.write_json(request, result)
def render_GET(self, request): json, trigger = yield self.db.getTrigger(self.trigger_id) if json is None: defer.returnValue(bad_request(request, "Trigger not found")) raise StopIteration fromTime = request.args.get('from')[0] endTime = request.args.get('to')[0] context = createRequestContext(fromTime, endTime, allowRealTimeAlerting=True) result = {} for target in trigger.get("targets", [trigger.get("target")]): time_series = yield evaluateTarget(context, target) for time_serie in time_series: values = [(time_serie.start + time_serie.step * i, time_serie[i]) for i in range(0, len(time_serie))] result[time_serie.name] = [{"ts": ts, "value": value} for ts, value in values if value is not None] self.write_json(request, result)
def resolve_patterns(request, expression_values): now = int(time()) context = createRequestContext(str(now - 10), str(now)) resolved = set() target_num = 1 for target in request.body_json["targets"]: yield evaluateTarget(context, target) target_name = "t%s" % target_num expression_values[target_name] = 42 target_num += 1 for pattern, resolve in context['graphite_patterns'].iteritems(): for r in resolve: if r != pattern: resolved.add(r) request.body_json["patterns"] = [pattern for pattern in context['graphite_patterns'] if pattern not in resolved]
def resolve_patterns(request, expression_values): now = int(time()) requestContext = createRequestContext(str(now - 10), str(now)) resolved = set() target_num = 1 for target in request.body_json["targets"]: target_time_series = yield evaluateTarget(requestContext, target) target_name = "t%s" % target_num expression_values[target_name] = 42 if len(target_time_series) > 0 and len(target_time_series[0]) > 0: expression_values[target_name] = target_time_series[0][-1] target_num += 1 for pattern, resolve in requestContext['graphite_patterns'].iteritems(): for r in resolve: if r != pattern: resolved.add(r) request.body_json["patterns"] = [pattern for pattern in requestContext['graphite_patterns'] if pattern not in resolved]
def migrate_triggers(db): now = int(time()) trigger_ids = yield db.getTriggers() logs.log.info("triggers count: %d" % len(trigger_ids)) converted_triggers_count = 0 simple_triggers_count = 0 complex_triggers_count = 0 failed_triggers_count = 0 for trigger_id in trigger_ids: try: json, _ = yield db.getTrigger(trigger_id) if json is None: continue trigger = anyjson.deserialize(json) if "is_simple_trigger" in trigger: continue logs.log.info("recalculating for trigger %s (%s)" % (trigger_id, trigger.get("name"))) context = createRequestContext(str(now - 600), str(now), allowRealTimeAlerting=True) if len(trigger["targets"]) != 1: is_simple_trigger = False else: yield evaluateTarget(context, trigger["targets"][0]) is_simple_trigger = is_simple_target(context) trigger["is_simple_trigger"] = is_simple_trigger logs.log.info(str(trigger["is_simple_trigger"])) yield db.saveTrigger(trigger_id, trigger) converted_triggers_count += 1 if is_simple_trigger: simple_triggers_count += 1 else: complex_triggers_count += 1 except Exception, e: failed_triggers_count += 1 logs.log.error("conversion failed for trigger: %s" % e)
def trigger(trigger, fromTime, now, cache_ttl): now = now or int(time()) log.info("Checking trigger {id}", id=trigger.id) initialized = yield trigger.init(now, fromTime=fromTime) if not initialized: raise StopIteration if fromTime is None: fromTime = trigger.last_check.get("timestamp", now) requestContext = datalib.createRequestContext( str(fromTime - (trigger.ttl or 600)), str(now)) check = { "metrics": trigger.last_check["metrics"].copy(), "state": state.OK, "timestamp": now, "score": trigger.last_check.get("score") } try: time_series = yield trigger.get_timeseries(requestContext) for metric in requestContext['metrics']: yield trigger.db.cleanupMetricValues(metric, now - config.METRICS_TTL, cache_key=metric, cache_ttl=cache_ttl) if not time_series: if trigger.ttl: check["state"] = trigger.ttl_state check["msg"] = "Trigger has no metrics" yield event.compare_states(trigger, check, trigger.last_check, now) else: for t_series in time_series.values(): for tN in t_series: if not tN.stub: check["metrics"][tN.name] = tN.last_state.copy() for t1 in time_series[1]: log.debug("Checking timeserie {name}: {values}", name=t1.name, values=list(t1)) log.debug( "Checking interval: {start} - {end} ({duration}s), step: {step}", start=t1.start, end=t1.end, step=t1.step, duration=t1.end - t1.start) metric_state = check["metrics"].get(t1.name) if not metric_state: log.debug("No metric state for {name}.", name=t1.name) continue checkpoint = max( t1.last_state["timestamp"] - config.CHECKPOINT_GAP, metric_state.get("event_timestamp", 0)) log.debug("Checkpoint for {name}: {checkpoint}", name=t1.name, checkpoint=checkpoint) for value_timestamp in xrange(t1.start, now + t1.step, t1.step): if value_timestamp <= checkpoint: continue expression_values = time_series.get_expression_values( t1, value_timestamp) t1_value = expression_values["t1"] log.debug("values for ts {timestamp}: {values}", timestamp=value_timestamp, values=expression_values) if None in expression_values.values(): continue expression_values.update({ 'warn_value': trigger.struct.get('warn_value'), 'error_value': trigger.struct.get('error_value'), 'PREV_STATE': metric_state['state'] }) expression_state = expression.getExpression( trigger.struct.get('expression'), **expression_values) time_series.update_state(t1, check, expression_state, expression_values, value_timestamp) yield event.compare_states(trigger, metric_state, t1.last_state, value_timestamp, value=t1_value, metric=t1.name) # compare with last_check timestamp in case if we have not run checker for a long time if trigger.ttl and metric_state[ "timestamp"] + trigger.ttl < trigger.last_check[ "timestamp"]: log.info("Metric {name} TTL expired for state {state}", name=t1.name, state=metric_state) if trigger.ttl_state == state.DEL and metric_state.get( "event_timestamp") is not None: log.info("Remove metric {name}", name=t1.name) del check["metrics"][t1.name] for tN, tName in time_series.other_targets_names.iteritems( ): log.info("Remove metric {name}", name=tName) del check["metrics"][tName] for pattern in trigger.struct.get("patterns"): yield trigger.db.delPatternMetrics(pattern) continue time_series.update_state( t1, check, state.to_metric_state(trigger.ttl_state), None, trigger.last_check["timestamp"] - trigger.ttl) yield event.compare_states(trigger, metric_state, t1.last_state, metric_state["timestamp"], metric=t1.name) except StopIteration: raise except Exception as e: log.error("Trigger check failed: {e}", e=e) check["state"] = state.EXCEPTION check["msg"] = "Trigger evaluation exception" yield event.compare_states(trigger, check, trigger.last_check, now) scores = sum( map(lambda m: state.SCORES[m["state"]], check["metrics"].itervalues())) check["score"] = scores + state.SCORES[check["state"]] yield trigger.db.setTriggerLastCheck(trigger.id, check)
def trigger(trigger, fromTime, now, cache_ttl): now = now or int(time()) log.msg("Checking trigger %s" % trigger.id) initialized = yield trigger.init(now, fromTime=fromTime) if not initialized: raise StopIteration if fromTime is None: fromTime = trigger.last_check.get("timestamp", now) requestContext = datalib.createRequestContext( str(fromTime - (trigger.ttl or 600)), str(now)) check = { "metrics": trigger.last_check["metrics"].copy(), "state": state.OK, "timestamp": now, "score": trigger.last_check.get("score") } try: time_series = yield trigger.get_timeseries(requestContext) for metric in requestContext['metrics']: yield trigger.db.cleanupMetricValues(metric, now - config.METRICS_TTL, cache_key=metric, cache_ttl=cache_ttl) if not time_series: if trigger.ttl: check["state"] = trigger.ttl_state check["msg"] = "Trigger has no metrics" yield event.compare_states(trigger, check, trigger.last_check, now) else: for t_series in time_series.values(): for tN in t_series: if not tN.stub: check["metrics"][tN.name] = tN.last_state.copy() for t1 in time_series[1]: metric_state = check["metrics"].get(t1.name) if not metric_state: continue checkpoint = max( t1.last_state["timestamp"] - config.CHECKPOINT_GAP, metric_state.get("event_timestamp", 0)) for value_timestamp in xrange(t1.start, now + t1.step, t1.step): if value_timestamp <= checkpoint: continue expression_values = time_series.get_expression_values( t1, value_timestamp) t1_value = expression_values["t1"] if None in expression_values.values(): continue expression_values.update({ 'warn_value': trigger.struct.get('warn_value'), 'error_value': trigger.struct.get('error_value'), 'PREV_STATE': metric_state['state'] }) expression_state = expression.getExpression( trigger.struct.get('expression'), **expression_values) time_series.update_state(t1, check, expression_state, expression_values, value_timestamp) yield event.compare_states(trigger, metric_state, t1.last_state, value_timestamp, value=t1_value, metric=t1.name) # compare with last_check timestamp in case if we have not run checker for a long time if trigger.ttl and metric_state[ "timestamp"] + trigger.ttl < trigger.last_check[ "timestamp"]: log.msg("Metric %s TTL expired for state %s" % (t1.name, metric_state)) if trigger.ttl_state == state.DEL and metric_state.get( "event_timestamp") is not None: log.msg("Remove metric %s" % t1.name) del check["metrics"][t1.name] for tN, tName in time_series.other_targets_names.iteritems( ): log.msg("Remove metric %s" % tName) del check["metrics"][tName] for pattern in trigger.struct.get("patterns"): yield trigger.db.delPatternMetrics(pattern) continue time_series.update_state( t1, check, state.to_metric_state(trigger.ttl_state), None, trigger.last_check["timestamp"] - trigger.ttl) yield event.compare_states(trigger, metric_state, t1.last_state, metric_state["timestamp"], metric=t1.name) except StopIteration: raise except Exception: log.err() check["state"] = state.EXCEPTION check["msg"] = "Trigger evaluation exception" yield event.compare_states(trigger, check, trigger.last_check, now) if trigger.update_score: update_score(check) yield trigger.db.setTriggerLastCheck(trigger.id, check)
def check(self, fromTime=None, now=None, cache_ttl=60): now = now or int(time()) log.msg("Checking trigger %s" % self.id) initialized = yield self.init(now, fromTime=fromTime) if not initialized: raise StopIteration if fromTime is None: fromTime = self.last_check.get("timestamp", now) requestContext = datalib.createRequestContext(str(fromTime - (self.ttl or 600)), str(now)) check = {"metrics": self.last_check["metrics"].copy(), "state": state.OK, "timestamp": now} try: time_series = yield self.get_timeseries(requestContext) for metric in requestContext['metrics']: yield self.db.cleanupMetricValues(metric, now - config.METRICS_TTL, cache_key=metric, cache_ttl=cache_ttl) if len(time_series) == 0: if self.ttl: check["state"] = self.ttl_state check["msg"] = "Trigger has no metrics" yield self.compare_state(check, self.last_check, now) else: for t_series in time_series.values(): for tN in t_series: check["metrics"][tN.name] = tN.last_state.copy() for t1 in time_series[1]: metric_state = check["metrics"][t1.name] for value_timestamp in xrange(t1.start, now + t1.step, t1.step): if value_timestamp <= t1.last_state["timestamp"]: continue expression_values = time_series.get_expression_values(t1, value_timestamp) t1_value = expression_values["t1"] if None in expression_values.values(): continue expression_values.update({'warn_value': self.struct.get('warn_value'), 'error_value': self.struct.get('error_value'), 'PREV_STATE': metric_state['state']}) expression_state = expression.getExpression(self.struct.get('expression'), **expression_values) time_series.update_state(t1, check, expression_state, expression_values, value_timestamp) yield self.compare_state(metric_state, t1.last_state, value_timestamp, value=t1_value, metric=t1.name) # compare with last_check timestamp in case if we have not run checker for a long time if self.ttl and metric_state["timestamp"] + self.ttl < self.last_check["timestamp"]: log.msg("Metric %s TTL expired for state %s" % (t1.name, metric_state)) if self.ttl_state == state.DEL and metric_state.get("event_timestamp") is not None: log.msg("Remove metric %s" % t1.name) del check["metrics"][t1.name] for tN, tName in time_series.other_targets_names.iteritems(): log.msg("Remove metric %s" % tName) del check["metrics"][tName] for pattern in self.struct.get("patterns"): yield self.db.delPatternMetrics(pattern) continue time_series.update_state(t1, check, state.toMetricState(self.ttl_state), None, self.last_check["timestamp"] - self.ttl) yield self.compare_state(metric_state, t1.last_state, metric_state["timestamp"], metric=t1.name) except StopIteration: raise except: log.err() check["state"] = state.EXCEPTION check["msg"] = "Trigger evaluation exception" yield self.compare_state(check, self.last_check, now) yield self.db.setTriggerLastCheck(self.id, check)