Example #1
0
  def unmonitorMetric(self, metricId):
    """ Unmonitor a metric

    :param metricId: unique identifier of the metric row

    :raises htm-it.app.exceptions.ObjectNotFoundError: if metric with the
      referenced metric uid doesn't exist
    """
    # Delete the metric from the database
    with self.connectionFactory() as conn:
      repository.retryOnTransientErrors(repository.deleteMetric)(
        conn,
        metricId)

    # Send request to delete HTM model
    model_swapper_utils.deleteHTMModel(metricId)

    self._log.info("Cloudwatch Metric unmonitored: metric=%r", metricId)
Example #2
0
  def exportModel(self, metricId):
    """ Export the given model.

    :param metricId: datasource-specific unique metric identifier

    :returns: Model-export specification for Cloudwatch model
    :rtype: dict

    ::
        {
          "datasource": "cloudwatch",

          "metricSpec": {
            "region": "us-west-2",
            "namespace": "AWS/EC2",
            "metric": "CPUUtilization",
            "dimensions": {
              "InstanceId": "i-12d67826"
            }
          },

          # Same modelParams with which model was created, if any
          "modelParams": {
            "min": 0,  # optional
            "max": 100  # optional
          }
        }

    :raises htm-it.app.exceptions.ObjectNotFoundError: if referenced metric
      doesn't exist
    """
    with self.connectionFactory() as conn:
      metricObj = repository.retryOnTransientErrors(repository.getMetric)(
        conn, metricId, fields=[schema.metric.c.parameters])

    parameters = htmengine.utils.jsonDecode(metricObj.parameters)

    return parameters
  def run(self):
    with ModelSwapperInterface() as modelSwapper:
      engine = repository.engineFactory()
      while True:
        with engine.connect() as conn:
          pendingStacks = repository.retryOnTransientErrors(
            repository.getAutostackMetricsPendingDataCollection)(conn)

        if not pendingStacks:
          time.sleep(self._NOTHING_READY_SLEEP_TIME_SEC)
          continue

        # Build a sequence of autostack metric requests
        requests = []
        for autostack, metrics in pendingStacks:
          refBase = len(requests)
          requests.extend(
            AutostackMetricRequest(refID=refBase + i,
                                   autostack=autostack,
                                   metric=metric)
            for i, metric in enumerate(metrics))

        # Collect, aggregate, and stream metric data
        self._processAutostackMetricRequests(engine, requests, modelSwapper)
  def messageHandler(self, message):
    """ Inspect all inbound model results in a batch for anomaly thresholds and
        trigger notifications where applicable.

        :param amqp.messages.ConsumerMessage message: ``message.body`` is a
          serialized batch of model inference results generated in
          ``AnomalyService`` and must be deserialized using
          ``AnomalyService.deserializeModelResult()``. The message conforms to
          htmengine/runtime/json_schema/model_inference_results_msg_schema.json
    """
    if message.properties.headers and "dataType" in message.properties.headers:
      # Not a model inference result
      message.ack()
      return

    htm-it.app.config.loadConfig() # reload config on every batch
    engine = repository.engineFactory()
    # Cache minimum threshold to trigger any notification to avoid permuting
    # settings x metricDataRows
    try:
      try:
        batch = AnomalyService.deserializeModelResult(message.body)
      except Exception:
        self._log.exception("Error deserializing model result")
        raise

      # Load all settings for all users (once per incoming batch)
      with engine.connect() as conn:
        settings = repository.retryOnTransientErrors(
            repository.getAllNotificationSettings)(conn)

      self._log.debug("settings: %r" % settings)

      if settings:
        minThreshold = min(setting.sensitivity for setting in settings)
      else:
        minThreshold = 0.99999

      metricInfo = batch["metric"]
      metricId = metricInfo["uid"]
      resource = metricInfo["resource"]


      for row in batch["results"]:

        if row["anomaly"] >= minThreshold:
          rowDatetime = datetime.utcfromtimestamp(row["ts"])

          if not settings:
            # There are no device notification settings stored on this server,
            # no notifications will be generated.  However, log that a
            # an anomaly was detected and notification would be sent if there
            # were any configured devices
            self._log.info("<%r>" % (metricInfo) + (
                                          "{TAG:APP.NOTIFICATION} Anomaly "
                                          "detected at %s, but no devices are "
                                          "configured.") % rowDatetime)
            continue

          for settingObj in settings:
            if row["rowid"] <= 1000:
              continue # Not enough data


            if rowDatetime < datetime.utcnow() - timedelta(seconds=3600):
              continue # Skip old

            if row["anomaly"] >= settingObj.sensitivity:
              # First let's clear any old users out of the database.
              with engine.connect() as conn:
                repository.retryOnTransientErrors(
                    repository.deleteStaleNotificationDevices)(
                        conn, _NOTIFICATION_DEVICE_STALE_DAYS)

              # If anomaly_score meets or exceeds any of the device
              # notification sensitivity settings, trigger notification.
              # repository.addNotification() will handle throttling.
              notificationId = str(uuid.uuid4())

              with engine.connect() as conn:
                result = repository.retryOnTransientErrors(
                    repository.addNotification)(conn,
                                                uid=notificationId,
                                                server=resource,
                                                metric=metricId,
                                                rowid=row["rowid"],
                                                device=settingObj.uid,
                                                windowsize=(
                                                  settingObj.windowsize),
                                                timestamp=rowDatetime,
                                                acknowledged=0,
                                                seen=0)

              self._log.info("NOTIFICATION=%s SERVER=%s METRICID=%s DEVICE=%s "
                             "Notification generated. " % (notificationId,
                             resource, metricId,
                             settingObj.uid))

              if (result is not None and
                  result.rowcount > 0 and
                  settingObj.email_addr):
                # Notification was generated.  Attempt to send email
                with engine.connect() as conn:
                  notificationObj = repository.getNotification(conn,
                                                               notificationId)

                self.sendNotificationEmail(engine,
                                           settingObj,
                                           notificationObj)
    finally:
      message.ack()

    # Do cleanup
    with engine.connect() as conn:
      repository.clearOldNotifications(conn) # Delete all notifications outside
  def _processAutostackMetricRequests(self, engine, requests, modelSwapper):
    """ Execute autostack metric requests, aggregate and stream
    collected metric data

    :param engine: SQLAlchemy engine object
    :type engine: sqlalchemy.engine.Engine
    :param requests: sequence of AutostackMetricRequest objects
    :param modelSwapper: Model Swapper
    """
    # Start collecting requested metric data
    collectionIter = self._metricGetter.collectMetricData(requests)

    # Aggregate each collection and dispatch to app MetricStreamer
    for metricCollection in collectionIter:
      request = requests[metricCollection.refID]

      metricObj = request.metric
      data = None

      if metricCollection.slices:
        aggregationFn = getAggregationFn(metricObj)
        if aggregationFn:
          data = aggregate(metricCollection.slices,
                           aggregationFn=aggregationFn)
        else:
          data = aggregate(metricCollection.slices)

      try:
        with engine.connect() as conn:
          repository.retryOnTransientErrors(repository.setMetricLastTimestamp)(
            conn, metricObj.uid, metricCollection.nextMetricTime)
      except ObjectNotFoundError:
        self._log.warning("Processing autostack data collection results for "
                          "unknown model=%s (model deleted?)", metricObj.uid)
        continue

      if data:
        try:
          self.metricStreamer.streamMetricData(data,
                                               metricID=metricObj.uid,
                                               modelSwapper=modelSwapper)
        except ObjectNotFoundError:
          # We expect that the model exists but in the odd case that it has
          # already been deleted we don't want to crash the process.
          self._log.info("Metric not found when adding data. metric=%s" %
                         metricObj.uid)

        self._log.debug(
          "{TAG:APP.AGG.DATA.PUB} Published numItems=%d for metric=%s;"
          "timeRange=[%sZ-%sZ]; headTS=%sZ; tailTS=%sZ",
          len(data), getMetricLogPrefix(metricObj),
          metricCollection.timeRange.start.isoformat(),
          metricCollection.timeRange.end.isoformat(),
          data[0][0].isoformat(), data[-1][0].isoformat())

      else:
        self._log.info(
          "{TAG:APP.AGG.DATA.NONE} No data for metric=%s;"
          "timeRange=[%sZ-%sZ]", getMetricLogPrefix(metricObj),
          metricCollection.timeRange.start.isoformat(),
          metricCollection.timeRange.end.isoformat())