Beispiel #1
0
 def wrapper():
     try:
         yield f(resource, request)
     except Exception as e:
         log.error("Error in delayed decorator wrapped function: {e}", e=e)
         request.setResponseCode(http.INTERNAL_SERVER_ERROR)
         request.finish()
Beispiel #2
0
 def wrapper():
     try:
         yield f(resource, request)
     except Exception as e:
         log.error("Error in delayed decorator wrapped function: {e}",
                   e=e)
         request.setResponseCode(http.INTERNAL_SERVER_ERROR)
         request.finish()
Beispiel #3
0
 def decorator(*args, **kwargs):
     request = args[1]
     try:
         request.body = request.content.getvalue()
         request.body_json = anyjson.deserialize(request.body)
     except Exception as e:
         log.error("Invalid trigger json [{json}]: {e}", json=request.body, e=e)
         defer.returnValue(bad_request(request, "Content is not json"))
     yield f(*args, **kwargs)
Beispiel #4
0
 def decorator(*args, **kwargs):
     request = args[1]
     try:
         request.body = request.content.getvalue()
         request.body_json = anyjson.deserialize(request.body)
     except Exception as e:
         log.error("Invalid trigger json [{json}]: {e}",
                   json=request.body,
                   e=e)
         defer.returnValue(bad_request(request, "Content is not json"))
     yield f(*args, **kwargs)
Beispiel #5
0
 def checkNoData(self):
     try:
         now = reactor.seconds()
         if self.db.last_data + config.STOP_CHECKING_INTERVAL < now:
             log.info("Checking nodata disabled. No metrics for {seconds} seconds",
                      seconds=int(now - self.db.last_data))
         else:
             log.info("Checking nodata")
             triggers = yield self.db.getTriggers()
             for trigger_id in triggers:
                 yield self.db.addTriggerCheck(trigger_id, cache_key=trigger_id, cache_ttl=60)
     except Exception as e:
         log.error("NoData check failed: {e}", e=e)
Beispiel #6
0
 def send(self, get_metrics):
     index = self.index
     replica = self.replicas[self.index]
     while not replica.connected():
         replica.connect()
         self.next()
         if self.index == index:
             log.error("No graphite connection")
             return
         replica = self.replicas[self.index]
     replica.send(get_metrics)
     self.next()
     log.info("Sent metrics to {replica}", replica=replica)
Beispiel #7
0
 def send(self, get_metrics):
     index = self.index
     replica = self.replicas[self.index]
     while not replica.connected():
         replica.connect()
         self.next()
         if self.index == index:
             log.error("No graphite connection")
             return
         replica = self.replicas[self.index]
     replica.send(get_metrics)
     self.next()
     log.info("Sent metrics to {replica}", replica=replica)
Beispiel #8
0
 def perform(self):
     try:
         trigger_id = yield self.db.getTriggerToCheck()
         while trigger_id is not None:
             acquired = yield self.db.setTriggerCheckLock(trigger_id)
             if acquired is not None:
                 start = reactor.seconds()
                 trigger = Trigger(trigger_id, self.db)
                 yield trigger.check()
                 end = reactor.seconds()
                 yield self.db.delTriggerCheckLock(trigger_id)
                 spy.TRIGGER_CHECK.report(end - start)
             trigger_id = yield self.db.getTriggerToCheck()
         yield task.deferLater(reactor, random.uniform(PERFORM_INTERVAL * 10, PERFORM_INTERVAL * 20), lambda: None)
     except GeneratorExit:
         pass
     except Exception as e:
         spy.TRIGGER_CHECK_ERRORS.report(0)
         log.error("Failed to perform triggers check: {e}", e=e)
         yield task.deferLater(reactor, ERROR_TIMEOUT, lambda: None)
Beispiel #9
0
def convert(db):

    log.info(db.rc)
    log.info("Reading metrics keys")
    keys = yield db.rc.keys(METRIC_OLD_PREFIX.format("*"))
    log.info("Converting ...")
    for key in keys:
        _, name = key.split(':')
        try:
            pipe = yield db.rc.pipeline()
            metrics = yield db.rc.zrange(key)
            for metric in metrics:
                value, timestamp = metric.split()
                pipe.zadd(METRIC_PREFIX.format(name), timestamp, "{0} {1}".format(timestamp, value))
            yield pipe.execute_pipeline()
        except txredisapi.ResponseError as e:
            log.error("Can not convert {key}: {e}", key=key, e=e)
        log.info("Metric {name} converted", name=name)

    yield db.stopService()
    reactor.stop()
Beispiel #10
0
 def decorator(*args, **kwargs):
     request = args[1]
     json = request.body_json
     request.graphite_patterns = []
     for field, alt in [("targets", None), ("warn_value", "expression"),
                        ("error_value", "expression")]:
         if json.get(field) is None and json.get(alt) is None:
             defer.returnValue(
                 bad_request(request, "%s is required" % field))
     try:
         request.body_json = trigger_reformat(json, json.get("id"),
                                              json.get("tags", []))
     except Exception as e:
         log.error("Invalid trigger format [{json}]: {e}", json=json, e=e)
         defer.returnValue(bad_request(request, "Invalid trigger format"))
     expression_values = {
         'warn_value': json.get('warn_value'),
         'error_value': json.get('error_value')
     }
     try:
         yield resolve_patterns(request, expression_values)
     except Exception as e:
         log.error("Invalid graphite targets [{targets}]: {e}",
                   targets=request.body_json["targets"],
                   e=e)
         defer.returnValue(bad_request(request, "Invalid graphite targets"))
     try:
         getExpression(json.get("expression"), **expression_values)
     except Exception as e:
         log.error("Invalid expression [{expression}]: {e}",
                   expression=json.get("expression"),
                   e=e)
         defer.returnValue(bad_request(request, "Invalid expression"))
     yield f(*args, **kwargs)
Beispiel #11
0
 def decorator(*args, **kwargs):
     request = args[1]
     json = request.body_json
     request.graphite_patterns = []
     for field, alt in [("targets", None), ("warn_value", "expression"), ("error_value", "expression")]:
         if json.get(field) is None and json.get(alt) is None:
             defer.returnValue(bad_request(request, "%s is required" % field))
     if type(json["targets"]) is not list:
         defer.returnValue(bad_request(request, "Invalid trigger targets"))
     try:
         request.body_json = trigger_reformat(json, json.get("id"), json.get("tags", []))
     except Exception as e:
         log.error("Invalid trigger format [{json}]: {e}", json=json, e=e)
         defer.returnValue(bad_request(request, "Invalid trigger format"))
     expression_values = {'warn_value': json.get('warn_value'),
                          'error_value': json.get('error_value')}
     try:
         yield resolve_patterns(request, expression_values)
     except Exception as e:
         log.error("Invalid graphite targets [{targets}]: {e}", targets=request.body_json["targets"], e=e)
         defer.returnValue(bad_request(request, "Invalid graphite targets"))
     try:
         getExpression(json.get("expression"), **expression_values)
     except Exception as e:
         log.error("Invalid expression [{expression}]: {e}", expression=json.get("expression"), e=e)
         defer.returnValue(bad_request(request, "Invalid expression"))
     yield f(*args, **kwargs)
Beispiel #12
0
    def messageReceived(self, ignored, channel, message, nocache=False):
        try:
            json = anyjson.deserialize(message)
            db = self.factory.db
            db.last_data = reactor.seconds()
            pattern = json["pattern"]
            metric = json["metric"]
            yield db.addPatternMetric(pattern, metric)
            triggers = yield db.getPatternTriggers(pattern)
            if not triggers:
                yield db.removePattern(pattern)
                metrics = yield db.getPatternMetrics(pattern)
                for metric in metrics:
                    yield db.delMetric(metric)
                yield db.delPatternMetrics(pattern)

            for trigger_id in triggers:
                if nocache:
                    yield db.addTriggerCheck(trigger_id)
                else:
                    yield db.addTriggerCheck(trigger_id, cache_key=trigger_id, cache_ttl=config.CHECK_INTERVAL)
        except Exception as e:
            log.error("Failed to receive metric: {e}", e=e)
Beispiel #13
0
 def perform(self):
     try:
         trigger_id = yield self.db.getTriggerToCheck()
         while trigger_id is not None:
             acquired = yield self.db.setTriggerCheckLock(trigger_id)
             if acquired is not None:
                 start = reactor.seconds()
                 trigger = Trigger(trigger_id, self.db)
                 yield trigger.check()
                 end = reactor.seconds()
                 yield self.db.delTriggerCheckLock(trigger_id)
                 spy.TRIGGER_CHECK.report(end - start)
             trigger_id = yield self.db.getTriggerToCheck()
         yield task.deferLater(
             reactor,
             random.uniform(PERFORM_INTERVAL * 10, PERFORM_INTERVAL * 20),
             lambda: None)
     except GeneratorExit:
         pass
     except Exception as e:
         spy.TRIGGER_CHECK_ERRORS.report(0)
         log.error("Failed to perform triggers check: {e}", e=e)
         yield task.deferLater(reactor, ERROR_TIMEOUT, lambda: None)
Beispiel #14
0
def trigger(trigger, fromTime, now, cache_ttl):
    now = now or int(time())

    log.info("Checking trigger {id}", id=trigger.id)
    initialized = yield trigger.init(now, fromTime=fromTime)
    if not initialized:
        raise StopIteration

    if fromTime is None:
        fromTime = trigger.last_check.get("timestamp", now)

    requestContext = datalib.createRequestContext(
        str(fromTime - (trigger.ttl or 600)), str(now))

    check = {
        "metrics": trigger.last_check["metrics"].copy(),
        "state": state.OK,
        "timestamp": now,
        "score": trigger.last_check.get("score")
    }

    try:
        time_series = yield trigger.get_timeseries(requestContext)

        for metric in requestContext['metrics']:
            yield trigger.db.cleanupMetricValues(metric,
                                                 now - config.METRICS_TTL,
                                                 cache_key=metric,
                                                 cache_ttl=cache_ttl)

        if not time_series:
            if trigger.ttl:
                check["state"] = trigger.ttl_state
                check["msg"] = "Trigger has no metrics"
                yield event.compare_states(trigger, check, trigger.last_check,
                                           now)
        else:

            for t_series in time_series.values():
                for tN in t_series:
                    if not tN.stub:
                        check["metrics"][tN.name] = tN.last_state.copy()

            for t1 in time_series[1]:

                log.debug("Checking timeserie {name}: {values}",
                          name=t1.name,
                          values=list(t1))
                log.debug(
                    "Checking interval: {start} - {end} ({duration}s), step: {step}",
                    start=t1.start,
                    end=t1.end,
                    step=t1.step,
                    duration=t1.end - t1.start)
                metric_state = check["metrics"].get(t1.name)
                if not metric_state:
                    log.debug("No metric state for {name}.", name=t1.name)
                    continue

                checkpoint = max(
                    t1.last_state["timestamp"] - config.CHECKPOINT_GAP,
                    metric_state.get("event_timestamp", 0))
                log.debug("Checkpoint for {name}: {checkpoint}",
                          name=t1.name,
                          checkpoint=checkpoint)

                for value_timestamp in xrange(t1.start, now + t1.step,
                                              t1.step):

                    if value_timestamp <= checkpoint:
                        continue

                    expression_values = time_series.get_expression_values(
                        t1, value_timestamp)

                    t1_value = expression_values["t1"]

                    log.debug("values for ts {timestamp}: {values}",
                              timestamp=value_timestamp,
                              values=expression_values)
                    if None in expression_values.values():
                        continue

                    expression_values.update({
                        'warn_value':
                        trigger.struct.get('warn_value'),
                        'error_value':
                        trigger.struct.get('error_value'),
                        'PREV_STATE':
                        metric_state['state']
                    })

                    expression_state = expression.getExpression(
                        trigger.struct.get('expression'), **expression_values)

                    time_series.update_state(t1, check, expression_state,
                                             expression_values,
                                             value_timestamp)

                    yield event.compare_states(trigger,
                                               metric_state,
                                               t1.last_state,
                                               value_timestamp,
                                               value=t1_value,
                                               metric=t1.name)

                # compare with last_check timestamp in case if we have not run checker for a long time
                if trigger.ttl and metric_state[
                        "timestamp"] + trigger.ttl < trigger.last_check[
                            "timestamp"]:
                    log.info("Metric {name} TTL expired for state {state}",
                             name=t1.name,
                             state=metric_state)
                    if trigger.ttl_state == state.DEL and metric_state.get(
                            "event_timestamp") is not None:
                        log.info("Remove metric {name}", name=t1.name)
                        del check["metrics"][t1.name]
                        for tN, tName in time_series.other_targets_names.iteritems(
                        ):
                            log.info("Remove metric {name}", name=tName)
                            del check["metrics"][tName]
                        for pattern in trigger.struct.get("patterns"):
                            yield trigger.db.delPatternMetrics(pattern)
                        continue
                    time_series.update_state(
                        t1, check, state.to_metric_state(trigger.ttl_state),
                        None, trigger.last_check["timestamp"] - trigger.ttl)
                    yield event.compare_states(trigger,
                                               metric_state,
                                               t1.last_state,
                                               metric_state["timestamp"],
                                               metric=t1.name)

    except StopIteration:
        raise
    except Exception as e:
        log.error("Trigger check failed: {e}", e=e)
        check["state"] = state.EXCEPTION
        check["msg"] = "Trigger evaluation exception"
        yield event.compare_states(trigger, check, trigger.last_check, now)
    scores = sum(
        map(lambda m: state.SCORES[m["state"]], check["metrics"].itervalues()))
    check["score"] = scores + state.SCORES[check["state"]]
    yield trigger.db.setTriggerLastCheck(trigger.id, check)
Beispiel #15
0
 def connectionLost(self, reason):
     log.error(str(reason))
     self.connected = 0
Beispiel #16
0
 def failed(error):
     log.error('Connect to {replica} failed: {error}',
               replica=self,
               error=error)
     reactor.callLater(10, self.connect, True)
Beispiel #17
0
 def connectionLost(self, reason):
     log.error(str(reason))
     self.connected = 0
Beispiel #18
0
 def failed(error):
     log.error('Connect to {replica} failed: {error}', replica=self, error=error)
     reactor.callLater(10, self.connect, True)