def wrapper(): try: yield f(resource, request) except Exception as e: log.error("Error in delayed decorator wrapped function: {e}", e=e) request.setResponseCode(http.INTERNAL_SERVER_ERROR) request.finish()
def decorator(*args, **kwargs): request = args[1] try: request.body = request.content.getvalue() request.body_json = anyjson.deserialize(request.body) except Exception as e: log.error("Invalid trigger json [{json}]: {e}", json=request.body, e=e) defer.returnValue(bad_request(request, "Content is not json")) yield f(*args, **kwargs)
def checkNoData(self): try: now = reactor.seconds() if self.db.last_data + config.STOP_CHECKING_INTERVAL < now: log.info("Checking nodata disabled. No metrics for {seconds} seconds", seconds=int(now - self.db.last_data)) else: log.info("Checking nodata") triggers = yield self.db.getTriggers() for trigger_id in triggers: yield self.db.addTriggerCheck(trigger_id, cache_key=trigger_id, cache_ttl=60) except Exception as e: log.error("NoData check failed: {e}", e=e)
def send(self, get_metrics): index = self.index replica = self.replicas[self.index] while not replica.connected(): replica.connect() self.next() if self.index == index: log.error("No graphite connection") return replica = self.replicas[self.index] replica.send(get_metrics) self.next() log.info("Sent metrics to {replica}", replica=replica)
def perform(self): try: trigger_id = yield self.db.getTriggerToCheck() while trigger_id is not None: acquired = yield self.db.setTriggerCheckLock(trigger_id) if acquired is not None: start = reactor.seconds() trigger = Trigger(trigger_id, self.db) yield trigger.check() end = reactor.seconds() yield self.db.delTriggerCheckLock(trigger_id) spy.TRIGGER_CHECK.report(end - start) trigger_id = yield self.db.getTriggerToCheck() yield task.deferLater(reactor, random.uniform(PERFORM_INTERVAL * 10, PERFORM_INTERVAL * 20), lambda: None) except GeneratorExit: pass except Exception as e: spy.TRIGGER_CHECK_ERRORS.report(0) log.error("Failed to perform triggers check: {e}", e=e) yield task.deferLater(reactor, ERROR_TIMEOUT, lambda: None)
def convert(db): log.info(db.rc) log.info("Reading metrics keys") keys = yield db.rc.keys(METRIC_OLD_PREFIX.format("*")) log.info("Converting ...") for key in keys: _, name = key.split(':') try: pipe = yield db.rc.pipeline() metrics = yield db.rc.zrange(key) for metric in metrics: value, timestamp = metric.split() pipe.zadd(METRIC_PREFIX.format(name), timestamp, "{0} {1}".format(timestamp, value)) yield pipe.execute_pipeline() except txredisapi.ResponseError as e: log.error("Can not convert {key}: {e}", key=key, e=e) log.info("Metric {name} converted", name=name) yield db.stopService() reactor.stop()
def decorator(*args, **kwargs): request = args[1] json = request.body_json request.graphite_patterns = [] for field, alt in [("targets", None), ("warn_value", "expression"), ("error_value", "expression")]: if json.get(field) is None and json.get(alt) is None: defer.returnValue( bad_request(request, "%s is required" % field)) try: request.body_json = trigger_reformat(json, json.get("id"), json.get("tags", [])) except Exception as e: log.error("Invalid trigger format [{json}]: {e}", json=json, e=e) defer.returnValue(bad_request(request, "Invalid trigger format")) expression_values = { 'warn_value': json.get('warn_value'), 'error_value': json.get('error_value') } try: yield resolve_patterns(request, expression_values) except Exception as e: log.error("Invalid graphite targets [{targets}]: {e}", targets=request.body_json["targets"], e=e) defer.returnValue(bad_request(request, "Invalid graphite targets")) try: getExpression(json.get("expression"), **expression_values) except Exception as e: log.error("Invalid expression [{expression}]: {e}", expression=json.get("expression"), e=e) defer.returnValue(bad_request(request, "Invalid expression")) yield f(*args, **kwargs)
def decorator(*args, **kwargs): request = args[1] json = request.body_json request.graphite_patterns = [] for field, alt in [("targets", None), ("warn_value", "expression"), ("error_value", "expression")]: if json.get(field) is None and json.get(alt) is None: defer.returnValue(bad_request(request, "%s is required" % field)) if type(json["targets"]) is not list: defer.returnValue(bad_request(request, "Invalid trigger targets")) try: request.body_json = trigger_reformat(json, json.get("id"), json.get("tags", [])) except Exception as e: log.error("Invalid trigger format [{json}]: {e}", json=json, e=e) defer.returnValue(bad_request(request, "Invalid trigger format")) expression_values = {'warn_value': json.get('warn_value'), 'error_value': json.get('error_value')} try: yield resolve_patterns(request, expression_values) except Exception as e: log.error("Invalid graphite targets [{targets}]: {e}", targets=request.body_json["targets"], e=e) defer.returnValue(bad_request(request, "Invalid graphite targets")) try: getExpression(json.get("expression"), **expression_values) except Exception as e: log.error("Invalid expression [{expression}]: {e}", expression=json.get("expression"), e=e) defer.returnValue(bad_request(request, "Invalid expression")) yield f(*args, **kwargs)
def messageReceived(self, ignored, channel, message, nocache=False): try: json = anyjson.deserialize(message) db = self.factory.db db.last_data = reactor.seconds() pattern = json["pattern"] metric = json["metric"] yield db.addPatternMetric(pattern, metric) triggers = yield db.getPatternTriggers(pattern) if not triggers: yield db.removePattern(pattern) metrics = yield db.getPatternMetrics(pattern) for metric in metrics: yield db.delMetric(metric) yield db.delPatternMetrics(pattern) for trigger_id in triggers: if nocache: yield db.addTriggerCheck(trigger_id) else: yield db.addTriggerCheck(trigger_id, cache_key=trigger_id, cache_ttl=config.CHECK_INTERVAL) except Exception as e: log.error("Failed to receive metric: {e}", e=e)
def perform(self): try: trigger_id = yield self.db.getTriggerToCheck() while trigger_id is not None: acquired = yield self.db.setTriggerCheckLock(trigger_id) if acquired is not None: start = reactor.seconds() trigger = Trigger(trigger_id, self.db) yield trigger.check() end = reactor.seconds() yield self.db.delTriggerCheckLock(trigger_id) spy.TRIGGER_CHECK.report(end - start) trigger_id = yield self.db.getTriggerToCheck() yield task.deferLater( reactor, random.uniform(PERFORM_INTERVAL * 10, PERFORM_INTERVAL * 20), lambda: None) except GeneratorExit: pass except Exception as e: spy.TRIGGER_CHECK_ERRORS.report(0) log.error("Failed to perform triggers check: {e}", e=e) yield task.deferLater(reactor, ERROR_TIMEOUT, lambda: None)
def trigger(trigger, fromTime, now, cache_ttl): now = now or int(time()) log.info("Checking trigger {id}", id=trigger.id) initialized = yield trigger.init(now, fromTime=fromTime) if not initialized: raise StopIteration if fromTime is None: fromTime = trigger.last_check.get("timestamp", now) requestContext = datalib.createRequestContext( str(fromTime - (trigger.ttl or 600)), str(now)) check = { "metrics": trigger.last_check["metrics"].copy(), "state": state.OK, "timestamp": now, "score": trigger.last_check.get("score") } try: time_series = yield trigger.get_timeseries(requestContext) for metric in requestContext['metrics']: yield trigger.db.cleanupMetricValues(metric, now - config.METRICS_TTL, cache_key=metric, cache_ttl=cache_ttl) if not time_series: if trigger.ttl: check["state"] = trigger.ttl_state check["msg"] = "Trigger has no metrics" yield event.compare_states(trigger, check, trigger.last_check, now) else: for t_series in time_series.values(): for tN in t_series: if not tN.stub: check["metrics"][tN.name] = tN.last_state.copy() for t1 in time_series[1]: log.debug("Checking timeserie {name}: {values}", name=t1.name, values=list(t1)) log.debug( "Checking interval: {start} - {end} ({duration}s), step: {step}", start=t1.start, end=t1.end, step=t1.step, duration=t1.end - t1.start) metric_state = check["metrics"].get(t1.name) if not metric_state: log.debug("No metric state for {name}.", name=t1.name) continue checkpoint = max( t1.last_state["timestamp"] - config.CHECKPOINT_GAP, metric_state.get("event_timestamp", 0)) log.debug("Checkpoint for {name}: {checkpoint}", name=t1.name, checkpoint=checkpoint) for value_timestamp in xrange(t1.start, now + t1.step, t1.step): if value_timestamp <= checkpoint: continue expression_values = time_series.get_expression_values( t1, value_timestamp) t1_value = expression_values["t1"] log.debug("values for ts {timestamp}: {values}", timestamp=value_timestamp, values=expression_values) if None in expression_values.values(): continue expression_values.update({ 'warn_value': trigger.struct.get('warn_value'), 'error_value': trigger.struct.get('error_value'), 'PREV_STATE': metric_state['state'] }) expression_state = expression.getExpression( trigger.struct.get('expression'), **expression_values) time_series.update_state(t1, check, expression_state, expression_values, value_timestamp) yield event.compare_states(trigger, metric_state, t1.last_state, value_timestamp, value=t1_value, metric=t1.name) # compare with last_check timestamp in case if we have not run checker for a long time if trigger.ttl and metric_state[ "timestamp"] + trigger.ttl < trigger.last_check[ "timestamp"]: log.info("Metric {name} TTL expired for state {state}", name=t1.name, state=metric_state) if trigger.ttl_state == state.DEL and metric_state.get( "event_timestamp") is not None: log.info("Remove metric {name}", name=t1.name) del check["metrics"][t1.name] for tN, tName in time_series.other_targets_names.iteritems( ): log.info("Remove metric {name}", name=tName) del check["metrics"][tName] for pattern in trigger.struct.get("patterns"): yield trigger.db.delPatternMetrics(pattern) continue time_series.update_state( t1, check, state.to_metric_state(trigger.ttl_state), None, trigger.last_check["timestamp"] - trigger.ttl) yield event.compare_states(trigger, metric_state, t1.last_state, metric_state["timestamp"], metric=t1.name) except StopIteration: raise except Exception as e: log.error("Trigger check failed: {e}", e=e) check["state"] = state.EXCEPTION check["msg"] = "Trigger evaluation exception" yield event.compare_states(trigger, check, trigger.last_check, now) scores = sum( map(lambda m: state.SCORES[m["state"]], check["metrics"].itervalues())) check["score"] = scores + state.SCORES[check["state"]] yield trigger.db.setTriggerLastCheck(trigger.id, check)
def connectionLost(self, reason): log.error(str(reason)) self.connected = 0
def failed(error): log.error('Connect to {replica} failed: {error}', replica=self, error=error) reactor.callLater(10, self.connect, True)