def unmonitorMetric(self, metricId): """ Unmonitor a metric :param metricId: unique identifier of the metric row :raises htm-it.app.exceptions.ObjectNotFoundError: if metric with the referenced metric uid doesn't exist """ # Delete the metric from the database with self.connectionFactory() as conn: repository.retryOnTransientErrors(repository.deleteMetric)( conn, metricId) # Send request to delete HTM model model_swapper_utils.deleteHTMModel(metricId) self._log.info("Cloudwatch Metric unmonitored: metric=%r", metricId)
def exportModel(self, metricId): """ Export the given model. :param metricId: datasource-specific unique metric identifier :returns: Model-export specification for Cloudwatch model :rtype: dict :: { "datasource": "cloudwatch", "metricSpec": { "region": "us-west-2", "namespace": "AWS/EC2", "metric": "CPUUtilization", "dimensions": { "InstanceId": "i-12d67826" } }, # Same modelParams with which model was created, if any "modelParams": { "min": 0, # optional "max": 100 # optional } } :raises htm-it.app.exceptions.ObjectNotFoundError: if referenced metric doesn't exist """ with self.connectionFactory() as conn: metricObj = repository.retryOnTransientErrors(repository.getMetric)( conn, metricId, fields=[schema.metric.c.parameters]) parameters = htmengine.utils.jsonDecode(metricObj.parameters) return parameters
def run(self): with ModelSwapperInterface() as modelSwapper: engine = repository.engineFactory() while True: with engine.connect() as conn: pendingStacks = repository.retryOnTransientErrors( repository.getAutostackMetricsPendingDataCollection)(conn) if not pendingStacks: time.sleep(self._NOTHING_READY_SLEEP_TIME_SEC) continue # Build a sequence of autostack metric requests requests = [] for autostack, metrics in pendingStacks: refBase = len(requests) requests.extend( AutostackMetricRequest(refID=refBase + i, autostack=autostack, metric=metric) for i, metric in enumerate(metrics)) # Collect, aggregate, and stream metric data self._processAutostackMetricRequests(engine, requests, modelSwapper)
def messageHandler(self, message): """ Inspect all inbound model results in a batch for anomaly thresholds and trigger notifications where applicable. :param amqp.messages.ConsumerMessage message: ``message.body`` is a serialized batch of model inference results generated in ``AnomalyService`` and must be deserialized using ``AnomalyService.deserializeModelResult()``. The message conforms to htmengine/runtime/json_schema/model_inference_results_msg_schema.json """ if message.properties.headers and "dataType" in message.properties.headers: # Not a model inference result message.ack() return htm-it.app.config.loadConfig() # reload config on every batch engine = repository.engineFactory() # Cache minimum threshold to trigger any notification to avoid permuting # settings x metricDataRows try: try: batch = AnomalyService.deserializeModelResult(message.body) except Exception: self._log.exception("Error deserializing model result") raise # Load all settings for all users (once per incoming batch) with engine.connect() as conn: settings = repository.retryOnTransientErrors( repository.getAllNotificationSettings)(conn) self._log.debug("settings: %r" % settings) if settings: minThreshold = min(setting.sensitivity for setting in settings) else: minThreshold = 0.99999 metricInfo = batch["metric"] metricId = metricInfo["uid"] resource = metricInfo["resource"] for row in batch["results"]: if row["anomaly"] >= minThreshold: rowDatetime = datetime.utcfromtimestamp(row["ts"]) if not settings: # There are no device notification settings stored on this server, # no notifications will be generated. However, log that a # an anomaly was detected and notification would be sent if there # were any configured devices self._log.info("<%r>" % (metricInfo) + ( "{TAG:APP.NOTIFICATION} Anomaly " "detected at %s, but no devices are " "configured.") % rowDatetime) continue for settingObj in settings: if row["rowid"] <= 1000: continue # Not enough data if rowDatetime < datetime.utcnow() - timedelta(seconds=3600): continue # Skip old if row["anomaly"] >= settingObj.sensitivity: # First let's clear any old users out of the database. with engine.connect() as conn: repository.retryOnTransientErrors( repository.deleteStaleNotificationDevices)( conn, _NOTIFICATION_DEVICE_STALE_DAYS) # If anomaly_score meets or exceeds any of the device # notification sensitivity settings, trigger notification. # repository.addNotification() will handle throttling. notificationId = str(uuid.uuid4()) with engine.connect() as conn: result = repository.retryOnTransientErrors( repository.addNotification)(conn, uid=notificationId, server=resource, metric=metricId, rowid=row["rowid"], device=settingObj.uid, windowsize=( settingObj.windowsize), timestamp=rowDatetime, acknowledged=0, seen=0) self._log.info("NOTIFICATION=%s SERVER=%s METRICID=%s DEVICE=%s " "Notification generated. " % (notificationId, resource, metricId, settingObj.uid)) if (result is not None and result.rowcount > 0 and settingObj.email_addr): # Notification was generated. Attempt to send email with engine.connect() as conn: notificationObj = repository.getNotification(conn, notificationId) self.sendNotificationEmail(engine, settingObj, notificationObj) finally: message.ack() # Do cleanup with engine.connect() as conn: repository.clearOldNotifications(conn) # Delete all notifications outside
def _processAutostackMetricRequests(self, engine, requests, modelSwapper): """ Execute autostack metric requests, aggregate and stream collected metric data :param engine: SQLAlchemy engine object :type engine: sqlalchemy.engine.Engine :param requests: sequence of AutostackMetricRequest objects :param modelSwapper: Model Swapper """ # Start collecting requested metric data collectionIter = self._metricGetter.collectMetricData(requests) # Aggregate each collection and dispatch to app MetricStreamer for metricCollection in collectionIter: request = requests[metricCollection.refID] metricObj = request.metric data = None if metricCollection.slices: aggregationFn = getAggregationFn(metricObj) if aggregationFn: data = aggregate(metricCollection.slices, aggregationFn=aggregationFn) else: data = aggregate(metricCollection.slices) try: with engine.connect() as conn: repository.retryOnTransientErrors(repository.setMetricLastTimestamp)( conn, metricObj.uid, metricCollection.nextMetricTime) except ObjectNotFoundError: self._log.warning("Processing autostack data collection results for " "unknown model=%s (model deleted?)", metricObj.uid) continue if data: try: self.metricStreamer.streamMetricData(data, metricID=metricObj.uid, modelSwapper=modelSwapper) except ObjectNotFoundError: # We expect that the model exists but in the odd case that it has # already been deleted we don't want to crash the process. self._log.info("Metric not found when adding data. metric=%s" % metricObj.uid) self._log.debug( "{TAG:APP.AGG.DATA.PUB} Published numItems=%d for metric=%s;" "timeRange=[%sZ-%sZ]; headTS=%sZ; tailTS=%sZ", len(data), getMetricLogPrefix(metricObj), metricCollection.timeRange.start.isoformat(), metricCollection.timeRange.end.isoformat(), data[0][0].isoformat(), data[-1][0].isoformat()) else: self._log.info( "{TAG:APP.AGG.DATA.NONE} No data for metric=%s;" "timeRange=[%sZ-%sZ]", getMetricLogPrefix(metricObj), metricCollection.timeRange.start.isoformat(), metricCollection.timeRange.end.isoformat())