def start(self): # Removes possible left over cached engine # (needed if non-patched engine is run prior) repository.engineFactory(config=htmengine.APP_CONFIG, reset=True) # Override the Repository database name try: self._configPatch.start() self._configPatchApplied = True # Now create the temporary repository database self._attemptedToCreateDatabase = True self.initTempDatabase() # Verify that the temporary repository database got created numDbFound = self._unaffiliatedEngine.execute( "SELECT COUNT(*) FROM INFORMATION_SCHEMA.SCHEMATA WHERE " "`SCHEMA_NAME` = '{db}'".format(db=self.tempDatabaseName)).scalar() assert numDbFound == 1, ( "Temp repo db={db} not found (numFound={numFound})".format( db=self.tempDatabaseName, numFound=numDbFound)) except: # Attempt to clean up self.stop() raise
def stop(self): try: if self._attemptedToCreateDatabase: self._attemptedToCreateDatabase = False # Drop the temporary repository database, if any self._unaffiliatedEngine.execute( "DROP DATABASE IF EXISTS {db}".format(db=self.tempDatabaseName)) finally: if self._configPatchApplied: self._configPatch.stop() repository.engineFactory(config=htmengine.APP_CONFIG, reset=True) # Dispose of the unaffiliated engine's connection pool self._unaffiliatedEngine.dispose()
def checkEncoderResolution(self, uid, minVal, maxVal, minResolution=None): """Check that encoder resolution is computed correctly.""" engine = repository.engineFactory(config=self.__config) with engine.begin() as conn: metricObj = repository.getMetric( conn, uid, fields=[schema.metric.c.name, schema.metric.c.model_params]) modelParams = json.loads(metricObj.model_params) self.assertNotEqual(modelParams, None, "No model exists for metric %s" % metricObj.name) sensorParams = modelParams["modelConfig"]["modelParams"][ "sensorParams"] encoderParams = sensorParams["encoders"]["c1"] # Estimate and check the bounds for the resolution based on min and max lower = (maxVal - minVal) / 300.0 upper = (maxVal - minVal) / 80.0 if minResolution is not None: lower = max(minResolution, lower) upper = float("inf") resolution = encoderParams["resolution"] self.assertGreaterEqual(resolution, lower) self.assertLessEqual(resolution, upper)
def checkEncoderResolution(self, uid, minVal, maxVal, minResolution=None): """Check that encoder resolution is computed correctly.""" engine = repository.engineFactory(config=self.__config) with engine.begin() as conn: metricObj = repository.getMetric(conn, uid, fields=[schema.metric.c.name, schema.metric.c.model_params]) modelParams = json.loads(metricObj.model_params) self.assertNotEqual(modelParams, None, "No model exists for metric %s" % metricObj.name) sensorParams = modelParams["modelConfig"]["modelParams"]["sensorParams"] encoderParams = sensorParams["encoders"]["c1"] # Estimate and check the bounds for the resolution based on min and max lower = (maxVal - minVal) / 300.0 upper = (maxVal - minVal) / 80.0 if minResolution is not None: lower = max(minResolution, lower) upper = float("inf") resolution = encoderParams["resolution"] self.assertGreaterEqual(resolution, lower) self.assertLessEqual(resolution, upper)
def storeDataWithRetries(): """ :returns: a three-tuple <modelInputRows, datasource, metricStatus>; modelInputRows: None if model was in state not suitable for streaming; otherwise a (possibly empty) tuple of ModelInputRow objects corresponding to the samples that were stored; ordered by rowid """ with repository.engineFactory(config).connect() as conn: with conn.begin(): # Syncrhonize with adapter's monitorMetric metricObj = repository.getMetricWithUpdateLock( conn, metricID, fields=[schema.metric.c.status, schema.metric.c.last_rowid, schema.metric.c.datasource], ) if ( metricObj.status != MetricStatus.UNMONITORED and metricObj.status != MetricStatus.ACTIVE and metricObj.status != MetricStatus.PENDING_DATA and metricObj.status != MetricStatus.CREATE_PENDING ): self._log.error("Can't stream: metric=%s has unexpected status=%s", metricID, metricObj.status) modelInputRows = None else: # TODO: unit-test passingSamples = self._scrubDataSamples(data, metricID, conn, metricObj.last_rowid) if passingSamples: modelInputRows = self._storeDataSamples(passingSamples, metricID, conn) else: modelInputRows = tuple() return (modelInputRows, metricObj.datasource, metricObj.status)
def runServer(): # Get the current list of custom metrics appConfig = Config("application.conf", os.environ.get("APPLICATION_CONFIG_PATH")) engine = repository.engineFactory(appConfig) global gCustomMetrics now = datetime.datetime.utcnow() with engine.connect() as conn: gCustomMetrics = dict( (m.name, [m, now]) for m in repository.getCustomMetrics(conn)) queueName = appConfig.get("metric_listener", "queue_name") global gProfiling gProfiling = (appConfig.getboolean("debugging", "profiling") or LOGGER.isEnabledFor(logging.DEBUG)) del appConfig metricStreamer = MetricStreamer() modelSwapper = ModelSwapperInterface() with MessageBusConnector() as bus: if not bus.isMessageQeueuePresent(queueName): bus.createMessageQueue(mqName=queueName, durable=True) LOGGER.info("Waiting for messages. To exit, press CTRL+C") with bus.consume(queueName) as consumer: messages = [] messageRxTimes = [] while True: message = consumer.pollOneMessage() if message is not None: messages.append(message) if gProfiling: messageRxTimes.append(time.time()) if message is None or len(messages) >= MAX_MESSAGES_PER_BATCH: if messages: # Process the batch try: _handleBatch(engine, messages, messageRxTimes, metricStreamer, modelSwapper) except Exception: # pylint: disable=W0703 LOGGER.exception("Unknown failure in processing messages.") # Make sure that we ack messages when there is an unexpected error # to avoid getting hung forever on one bad record. # Ack all the messages messages[-1].ack(multiple=True) # Clear the message buffer messages = [] messageRxTimes = [] else: # Queue is empty, wait before retrying time.sleep(POLL_DELAY_SEC)
def runServer(): # Get the current list of custom metrics appConfig = Config("application.conf", os.environ["APPLICATION_CONFIG_PATH"]) engine = repository.engineFactory(appConfig) global gCustomMetrics now = datetime.datetime.utcnow() with engine.connect() as conn: gCustomMetrics = dict( (m.name, [m, now]) for m in repository.getCustomMetrics(conn)) queueName = appConfig.get("metric_listener", "queue_name") global gProfiling gProfiling = (appConfig.getboolean("debugging", "profiling") or LOGGER.isEnabledFor(logging.DEBUG)) del appConfig metricStreamer = MetricStreamer() modelSwapper = ModelSwapperInterface() with MessageBusConnector() as bus: if not bus.isMessageQeueuePresent(queueName): bus.createMessageQueue(mqName=queueName, durable=True) LOGGER.info("Waiting for messages. To exit, press CTRL+C") with bus.consume(queueName) as consumer: messages = [] messageRxTimes = [] while True: message = consumer.pollOneMessage() if message is not None: messages.append(message) if gProfiling: messageRxTimes.append(time.time()) if message is None or len(messages) >= MAX_MESSAGES_PER_BATCH: if messages: # Process the batch try: _handleBatch(engine, messages, messageRxTimes, metricStreamer, modelSwapper) except Exception: # pylint: disable=W0703 LOGGER.exception("Unknown failure in processing messages.") # Make sure that we ack messages when there is an unexpected error # to avoid getting hung forever on one bad record. # Ack all the messages messages[-1].ack(multiple=True) # Clear the message buffer messages = [] messageRxTimes = [] else: # Queue is empty, wait before retrying time.sleep(POLL_DELAY_SEC)
def checkModelIsActive(self, uid): engine = repository.engineFactory(config=self.__config) with engine.begin() as conn: metricObj = repository.getMetric(conn, uid, fields=[schema.metric.c.status]) self.assertEqual(metricObj.status, MetricStatus.ACTIVE)
def initTempDatabase(self): """Initialize the temporary repository database with default schema and contents """ # Create the database dbName = htmengine.APP_CONFIG.get("repository", "db") self._unaffiliatedEngine.execute("CREATE DATABASE {}".format(dbName)) # Instantiate the schema in the database htmengine.repository.schema.metadata.create_all( repository.engineFactory(config=htmengine.APP_CONFIG))
def _composeModelCommandResultMessage(cls, modelID, cmdResult): """ Compose message corresponding to the completion of a model command for publishing to downstream services. :param modelID: model identifier :param model_swapper_interface.ModelCommandResult cmdResult: model command result :returns: JSON-ifiable message contents object per model_command_result_amqp_message.json :rtype: dict :raises ObjectNotFoundError: when attempted to request additional info about a model that is not in the repository :raises MetricNotMonitoredError: when required info about a model is not available, because it's no longer monitored """ commandResultMessage = dict( method=cmdResult.method, modelId=modelID, commandId=cmdResult.commandID, status=cmdResult.status, errorMessage=cmdResult.errorMessage, ) if (cmdResult.method == "defineModel" and cmdResult.status == htmengineerrno.SUCCESS): # Add modelInfo for successfully-completed "defineModel" commands engine = repository.engineFactory(config) fields = [ schema.metric.c.name, schema.metric.c.server, schema.metric.c.parameters ] try: with engine.connect() as conn: metricObj = repository.getMetric(conn, modelID, fields=fields) except ObjectNotFoundError: g_log.warning( "_composeModelCommandResultMessage: method=%s; " "model=%s not found", cmdResult.method, modelID) raise if not metricObj.parameters: g_log.warning( "_composeModelCommandResultMessage: method=%s; " "model=%s not monitored", cmdResult.method, modelID) raise MetricNotMonitoredError commandResultMessage["modelInfo"] = dict(metricName=metricObj.name, resource=metricObj.server, modelSpec=json.loads( metricObj.parameters)) return commandResultMessage
def _createModel(self, nativeMetric): adapter = createDatasourceAdapter("custom") try: metricId = adapter.monitorMetric(nativeMetric) except MetricAlreadyMonitored as e: metricId = e.uid engine = repository.engineFactory(config=self.config) with engine.begin() as conn: return repository.getMetric(conn, metricId)
def _composeModelCommandResultMessage(cls, modelID, cmdResult): """ Compose message corresponding to the completion of a model command for publishing to downstream services. :param modelID: model identifier :param model_swapper_interface.ModelCommandResult cmdResult: model command result :returns: JSON-ifiable message contents object per model_command_result_amqp_message.json :rtype: dict :raises ObjectNotFoundError: when attempted to request additional info about a model that is not in the repository :raises MetricNotMonitoredError: when required info about a model is not available, because it's no longer monitored """ commandResultMessage = dict( method=cmdResult.method, modelId=modelID, commandId=cmdResult.commandID, status=cmdResult.status, errorMessage=cmdResult.errorMessage, ) if (cmdResult.method == "defineModel" and cmdResult.status == htmengineerrno.SUCCESS): # Add modelInfo for successfully-completed "defineModel" commands engine = repository.engineFactory(config) fields = [ schema.metric.c.name, schema.metric.c.server, schema.metric.c.parameters ] try: with engine.connect() as conn: metricObj = repository.getMetric( conn, modelID, fields=fields) except ObjectNotFoundError: g_log.warning("_composeModelCommandResultMessage: method=%s; " "model=%s not found", cmdResult.method, modelID) raise if not metricObj.parameters: g_log.warning("_composeModelCommandResultMessage: method=%s; " "model=%s not monitored", cmdResult.method, modelID) raise MetricNotMonitoredError commandResultMessage["modelInfo"] = dict( metricName=metricObj.name, resource=metricObj.server, modelSpec=json.loads(metricObj.parameters)) return commandResultMessage
def createDatasourceAdapter(cls, datasource): """ Factory for Datasource adapters :param datasource: datasource (e.g., "cloudwatch") :returns: DatasourceAdapterIface-based adapter object corresponding to the given datasource value """ config = Config("application.conf", os.environ.get("APPLICATION_CONFIG_PATH")) return cls._adapterRegistry[datasource]( repository.engineFactory(config).connect)
def checkStats(self, metricName, mn, mx): """Check that stats are computed correctly from the database""" engine = repository.engineFactory(config=self.__config) with engine.begin() as conn: metricObj = (repository.getCustomMetricByName( conn, metricName, fields=[schema.metric.c.uid, schema.metric.c.parameters])) stats = repository.getMetricStats(conn, metricObj.uid) self.assertSetEqual(set(stats.keys()), set(("min", "max"))) self.assertAlmostEqual(stats["min"], mn) self.assertAlmostEqual(stats["max"], mx)
def checkMetricUnmonitoredById(self, uid): engine = repository.engineFactory(config=self.__config) with engine.begin() as conn: metricObj = repository.getMetric(conn, uid, fields=[schema.metric.c.status, schema.metric.c.parameters]) self.assertEqual(metricObj.status, MetricStatus.UNMONITORED) self.assertIsNone(metricObj.parameters) with self.assertRaises(model_checkpoint_mgr.ModelNotFound): model_checkpoint_mgr.ModelCheckpointMgr().loadModelDefinition(uid)
def checkModelResultsDeleted(self, uid): """Check that the model results have been deleted""" engine = repository.engineFactory(config=self.__config) with engine.begin() as conn: result = (repository.getMetricData( conn, metricId=uid, sort=schema.metric_data.c.timestamp.desc())) for row in result: self.assertIsNone(row.raw_anomaly_score) self.assertIsNone(row.anomaly_score) self.assertIsNone(row.display_value)
def start(): with repository.engineFactory(config).begin() as conn: metricObj = repository.getMetric(conn, metricId) modelStarted = (_startModelHelper(conn=conn, metricObj=metricObj, swarmParams=swarmParams, logger=logger)) if modelStarted: sendBacklogDataToModel(conn=conn, metricId=metricId, logger=logger) return modelStarted
def checkMetricUnmonitoredById(self, uid): engine = repository.engineFactory(config=self.__config) with engine.begin() as conn: metricObj = repository.getMetric( conn, uid, fields=[schema.metric.c.status, schema.metric.c.parameters]) self.assertEqual(metricObj.status, MetricStatus.UNMONITORED) self.assertIsNone(metricObj.parameters) with self.assertRaises(model_checkpoint_mgr.ModelNotFound): model_checkpoint_mgr.ModelCheckpointMgr().loadModelDefinition(uid)
def checkStats(self, metricName, mn, mx): """Check that stats are computed correctly from the database""" engine = repository.engineFactory(config=self.__config) with engine.begin() as conn: metricObj = ( repository.getCustomMetricByName(conn, metricName, fields=[schema.metric.c.uid, schema.metric.c.parameters])) stats = repository.getMetricStats(conn, metricObj.uid) self.assertSetEqual(set(stats.keys()), set(("min", "max"))) self.assertAlmostEqual(stats["min"], mn) self.assertAlmostEqual(stats["max"], mx)
def checkModelResultsDeleted(self, uid): """Check that the model results have been deleted""" engine = repository.engineFactory(config=self.__config) with engine.begin() as conn: result = ( repository.getMetricData(conn, metricId=uid, sort=schema.metric_data.c.timestamp.desc())) for row in result: self.assertIsNone(row.raw_anomaly_score) self.assertIsNone(row.anomaly_score) self.assertIsNone(row.display_value)
def start(): with repository.engineFactory(config).begin() as conn: metricObj = repository.getMetric(conn, metricId) modelStarted = ( _startModelHelper(conn=conn, metricObj=metricObj, swarmParams=swarmParams, logger=logger)) if modelStarted: sendBacklogDataToModel(conn=conn, metricId=metricId, logger=logger) return modelStarted
def checkMetricDeleted(self, uid): engine = repository.engineFactory(config=self.__config) with engine.begin() as conn: with self.assertRaises(Exception) as e: metric = repository.getMetric(conn, uid) models = repository.getAllModels(conn) for model in models: self.assertNotEqual(model.uid, uid, "Model showing up after deletion.") with self.assertRaises(model_checkpoint_mgr.ModelNotFound): model_checkpoint_mgr.ModelCheckpointMgr().loadModelDefinition(uid)
def checkModelResults(self, uid, expectedResults): """Check that the results for metric uid match expectedResults. """ engine = repository.engineFactory(config=self.__config) with engine.begin() as conn: result = (repository.getMetricData( conn, metricId=uid, sort=schema.metric_data.c.timestamp.desc())) self.assertEqual(result.rowcount, len(expectedResults)) for result, expected in zip(result, expectedResults): self.assertSequenceEqual([ result.timestamp.strftime("%Y-%m-%d %H:%M:%S"), result.metric_value, result.anomaly_score, result.rowid ], expected)
def checkMetricCreated(self, metricName, numRecords=None): """Check that the new metrics show up in custom metrics list. :param metricName: metric name to check :param numRecords: optional number of records to wait for """ engine = repository.engineFactory(config=self.__config) with engine.begin() as conn: metrics = repository.getCustomMetrics(conn) for metric in metrics: if metric.name == metricName: if numRecords: self.assertGreaterEqual(metric.last_rowid,numRecords) return metric.uid raise AssertionError("Metric not created!")
def checkMetricCreated(self, metricName, numRecords=None): """Check that the new metrics show up in custom metrics list. :param metricName: metric name to check :param numRecords: optional number of records to wait for """ engine = repository.engineFactory(config=self.__config) with engine.begin() as conn: metrics = repository.getCustomMetrics(conn) for metric in metrics: if metric.name == metricName: if numRecords: self.assertGreaterEqual(metric.last_rowid, numRecords) return metric.uid raise AssertionError("Metric not created!")
def checkModelDeleted(self, uid): """Check that the model has been deleted""" engine = repository.engineFactory(config=self.__config) with engine.begin() as conn: try: metric = repository.getMetric(conn, uid) raise Exception("Metric not deleted as expected") except app_exceptions.ObjectNotFoundError: pass models = repository.getAllModels(conn) for model in models: self.assertNotEqual(model.uid, uid, "Model showing up after deletion.") with self.assertRaises(model_checkpoint_mgr.ModelNotFound): model_checkpoint_mgr.ModelCheckpointMgr().loadModelDefinition(uid)
def checkModelResults(self, uid, expectedResults): """Check that the results for metric uid match expectedResults. """ engine = repository.engineFactory(config=self.__config) with engine.begin() as conn: result = ( repository.getMetricData(conn, metricId=uid, sort=schema.metric_data.c.timestamp.desc())) self.assertEqual(result.rowcount, len(expectedResults)) for result, expected in zip(result, expectedResults): self.assertSequenceEqual([result.timestamp.strftime("%Y-%m-%d %H:%M:%S"), result.metric_value, result.anomaly_score, result.rowid], expected)
def testStats(self): """Tests that stats are computed correctly.""" metricName = "testStats.%i" % int(time.time()) LOGGER.info("Running test with metric name: %s", metricName) self.addCleanup(self._deleteMetric, metricName) # Add custom metric data sock = socket.socket() sock.connect(("localhost", self.plaintextPort)) sock.sendall("%s 5.0 1386201600\n" % metricName) sock.sendall("%s 6.0 1386288000\n" % metricName) sock.sendall("%s 7.0 1386374400\n" % metricName) self.gracefullyCloseSocket(sock) time.sleep(5) for _attempt in xrange(6): try: uid = self.checkMetricCreated(metricName, numRecords=3) LOGGER.info("Metric %s has uid: %s", metricName, uid) break except: time.sleep(10) else: self.fail("Metric not created within a reasonable amount of time.") # Check that stats are computed correctly from the database for _attempt in xrange(6): try: with repository.engineFactory(self.config).connect() as conn: stats = repository.getMetricStats(conn, uid) self.assertSetEqual(set(stats.keys()), set(("min", "max"))) self.assertAlmostEqual(stats["min"], 5.0) self.assertAlmostEqual(stats["max"], 7.0) break except MetricStatisticsNotReadyError: time.sleep(10) else: self.fail( "Metric created, but statistics not ready within a reasonable" " amount of time.")
def testStats(self): """Tests that stats are computed correctly.""" metricName = "testStats.%i" % int(time.time()) LOGGER.info("Running test with metric name: %s", metricName) self.addCleanup(self._deleteMetric, metricName) # Add custom metric data sock = socket.socket() sock.connect(("localhost", self.plaintextPort)) sock.sendall("%s 5.0 1386201600\n" % metricName) sock.sendall("%s 6.0 1386288000\n" % metricName) sock.sendall("%s 7.0 1386374400\n" % metricName) self.gracefullyCloseSocket(sock) time.sleep(5) for _attempt in xrange(6): try: uid = self.checkMetricCreated(metricName, numRecords=3) LOGGER.info("Metric %s has uid: %s", metricName, uid) break except: time.sleep(10) else: self.fail("Metric not created within a reasonable amount of time.") # Check that stats are computed correctly from the database for _attempt in xrange(6): try: with repository.engineFactory(self.config).connect() as conn: stats = repository.getMetricStats(conn, uid) self.assertSetEqual(set(stats.keys()), set(("min", "max"))) self.assertAlmostEqual(stats["min"], 5.0) self.assertAlmostEqual(stats["max"], 7.0) break except MetricStatisticsNotReadyError: time.sleep(10) else: self.fail("Metric created, but statistics not ready within a reasonable" " amount of time.")
def storeDataWithRetries(): """ :returns: a three-tuple <modelInputRows, datasource, metricStatus>; modelInputRows: None if model was in state not suitable for streaming; otherwise a (possibly empty) tuple of ModelInputRow objects corresponding to the samples that were stored; ordered by rowid """ with repository.engineFactory(config).connect() as conn: with conn.begin(): # Syncrhonize with adapter's monitorMetric metricObj = repository.getMetricWithUpdateLock( conn, metricID, fields=[ schema.metric.c.status, schema.metric.c.last_rowid, schema.metric.c.datasource ]) if (metricObj.status != MetricStatus.UNMONITORED and metricObj.status != MetricStatus.ACTIVE and metricObj.status != MetricStatus.PENDING_DATA and metricObj.status != MetricStatus.CREATE_PENDING): self._log.error( "Can't stream: metric=%s has unexpected status=%s", metricID, metricObj.status) modelInputRows = None else: # TODO: unit-test passingSamples = self._scrubDataSamples( data, metricID, conn, metricObj.last_rowid) if passingSamples: modelInputRows = self._storeDataSamples( passingSamples, metricID, conn) else: modelInputRows = tuple() return (modelInputRows, metricObj.datasource, metricObj.status)
def checkModelResultsSize(self, uid, size, atLeast=False): """Check that the number of results for metric uid matches size. This is not compatible with ManagedTempRepository since it makes an HTTP request that may be outside the temp repository process tree. :param uid: the uid of the metric to check results for :param size: the expected number of results :param atLeast: if True, checks for at least that many results; if False, checks for exact match of the result count; defaults to False """ engine = repository.engineFactory(config=self.__config) with engine.begin() as conn: result = repository.getMetricData(conn, metricId=uid) if atLeast: self.assertGreaterEqual(result.rowcount, size) else: self.assertEqual(result.rowcount, size) for row in result: self.assertIsNotNone(row)
def getModelResults(self, uid, resultCount): """Queries MySQL db and returns rows with anomaly results :param uid: uid of metric :param resultCount: number of rows expected :return: List of tuples containing timestamp, metric_value, anomaly_score, and rowid """ engine = repository.engineFactory(config=self.__config) fields = (schema.metric_data.c.timestamp, schema.metric_data.c.metric_value, schema.metric_data.c.anomaly_score, schema.metric_data.c.rowid) with engine.begin() as conn: result = (repository.getMetricData( conn, metricId=uid, fields=fields, sort=schema.metric_data.c.timestamp.desc(), score=0.0)) self.assertEqual(result.rowcount, resultCount) return result.fetchall()
def getModelResults(self, uid, resultCount): """Queries MySQL db and returns rows with anomaly results :param uid: uid of metric :param resultCount: number of rows expected :return: List of tuples containing timestamp, metric_value, anomaly_score, and rowid """ engine = repository.engineFactory(config=self.__config) fields = (schema.metric_data.c.timestamp, schema.metric_data.c.metric_value, schema.metric_data.c.anomaly_score, schema.metric_data.c.rowid) with engine.begin() as conn: result = ( repository.getMetricData(conn, metricId=uid, fields=fields, sort=schema.metric_data.c.timestamp.desc(), score=0.0)) self.assertEqual(result.rowcount, resultCount) return result.fetchall()
def _processModelInferenceResults(self, inferenceResults, metricID): """ Process a batch of model inference results Store the updated MetricData and anomaly likelihood parameters in the database. A row's anomaly_score value will be set to and remain at 0 in the first self._statisticsMinSampleSize rows; once we get enough inference results to create an anomaly likelyhood model, anomaly_score will be computed on the subsequent rows. :param inferenceResults: a sequence of ModelInferenceResult instances in the processed order (ascending by timestamp) :param metricID: metric/model ID of the model that emitted the results :returns: None if the batch was rejected; otherwise a pair: (metric, metricDataRows) metric: Metric RowProxy instance corresponding to the given metricID metricDataRows: a sequence of MutableMetricDataRow instances corresponding to the updated metric_data rows. TODO: unit-test return value :rtype: None or tuple *NOTE:* the processing must be idempotent due to the "at least once" delivery semantics of the message bus *NOTE:* the performance goal is to minimize costly database access and avoid falling behind while processing model results, especially during the model's initial "catch-up" phase when large inference result batches are prevalent. """ engine = repository.engineFactory(config) # Validate model ID try: with engine.connect() as conn: metricObj = repository.getMetric(conn, metricID) except ObjectNotFoundError: # Ignore inferences for unkonwn models. Typically, this is is the result # of a deleted model. Another scenario where this might occur is when a # developer resets db while there are result messages still on the # message bus. It would be an error if this were to occur in production # environment. self._log.warning( "Received inference results for unknown model=%s; " "(model deleted?)", metricID, exc_info=True) return None # Reject the results if model is in non-ACTIVE state (e.g., if HTM Metric # was unmonitored after the results were generated) if metricObj.status != MetricStatus.ACTIVE: self._log.warning( "Received inference results for a non-ACTIVE " "model=%s; metric=<%s>; (metric unmonitored?)", metricID, getMetricLogPrefix(metricObj)) return None # Load the MetricData instances corresponding to the results with engine.connect() as conn: metricDataRows = repository.getMetricData( conn, metricID, start=inferenceResults[0].rowID, stop=inferenceResults[-1].rowID) # metricDataRows must be mutable, as the data is massaged in # _scrubInferenceResultsAndInitMetricData() metricDataRows = list(metricDataRows) if not metricDataRows: self._log.error( "Rejected inference result batch=[%s..%s] of model=%s " "due to no matching metric_data rows", inferenceResults[0].rowID, inferenceResults[-1].rowID, metricID) return None try: self._scrubInferenceResultsAndInitMetricData( engine=engine, inferenceResults=inferenceResults, metricDataRows=metricDataRows, metricObj=metricObj) except RejectedInferenceResultBatch as e: # TODO: unit-test self._log.error( "Rejected inference result batch=[%s..%s] corresponding to " "rows=[%s..%s] of model=%s due to error=%r", inferenceResults[0].rowID, inferenceResults[-1].rowID, metricDataRows[0].rowid, metricDataRows[-1].rowid, metricID, e) return None # Update anomaly scores based on the new results anomalyLikelihoodParams = ( self.likelihoodHelper.updateModelAnomalyScores( engine=engine, metricObj=metricObj, metricDataRows=metricDataRows)) # Update metric data rows with rescaled display values # NOTE: doing this outside the updateColumns loop to avoid holding row locks # any longer than necessary for metricData in metricDataRows: metricData.display_value = rescaleForDisplay( metricData.anomaly_score, active=(metricObj.status == MetricStatus.ACTIVE)) # Update database once via transaction! startTime = time.time() try: @retryOnTransientErrors def runSQL(engine): with engine.begin() as conn: for metricData in metricDataRows: fields = { "raw_anomaly_score": metricData.raw_anomaly_score, "anomaly_score": metricData.anomaly_score, "display_value": metricData.display_value } repository.updateMetricDataColumns( conn, metricData, fields) self._updateAnomalyLikelihoodParams( conn, metricObj.uid, metricObj.model_params, anomalyLikelihoodParams) runSQL(engine) except (ObjectNotFoundError, MetricNotActiveError): self._log.warning( "Rejected inference result batch=[%s..%s] of model=%s", inferenceResults[0].rowID, inferenceResults[-1].rowID, metricID, exc_info=True) return None self._log.debug( "Updated HTM metric_data rows=[%s..%s] " "of model=%s: duration=%ss", metricDataRows[0].rowid, metricDataRows[-1].rowid, metricID, time.time() - startTime) return ( metricObj, metricDataRows, )
def handler(environ, start_response): metricName = environ["PATH_INFO"] if environ["REQUEST_METHOD"] == "PUT": # Trigger model creation... modelSpec = {"datasource": "custom", "metricSpec": {"metric": metricName}, "modelParams": {}} try: modelSpec["modelParams"].update(json.load(environ["wsgi.input"])) except Exception as e: print e start_response("400 Bad Request", [("Content-Type", "text/html")]) yield "Unable to parse request" adapter = createDatasourceAdapter(modelSpec["datasource"]) try: modelId = adapter.monitorMetric(modelSpec) start_response("201 Created", [("Content-Type", "text/html")]) yield "Created %s\n" % modelId except MetricAlreadyMonitored: start_response("400 Bad Request", [("Content-Type", "text/html")]) yield "Model already exists for %s" % metricName elif environ["REQUEST_METHOD"] == "POST": # Send data... start_response("200 OK", [("Content-Type", "text/html")]) for sample in environ["wsgi.input"]: value, ts = sample.split(" ") sendSample(bus, metricName=metricName, value=float(value), epochTimestamp=int(ts)) yield "Saved %s %f @ %d\n" % (metricName, float(value), int(ts)) elif environ["REQUEST_METHOD"] == "GET": # parameters = parse_qs(environ.get('QUERY_STRING', '')) # print parameters # if 'since' in parameters: # since = parameters['since'][0] with repository.engineFactory(appConfig).connect() as conn: fields = ( schema.metric_data.c.metric_value, schema.metric_data.c.timestamp, schema.metric_data.c.rowid, schema.metric_data.c.anomaly_score, ) sort = schema.metric_data.c.timestamp.asc() metricObj = repository.getCustomMetricByName(conn, metricName, fields=[schema.metric.c.uid]) result = repository.getMetricData(conn, metricId=metricObj.uid, fields=fields, sort=sort) start_response("200 OK", [("Content-Type", "text/html")]) for row in result: yield " ".join( ( metricName, str(row.metric_value), str(calendar.timegm(row.timestamp.timetuple())), str(row.anomaly_score), ) ) + "\n"
def handler(environ, start_response): metricName = environ["PATH_INFO"] if environ["REQUEST_METHOD"] == "PUT": # Trigger model creation... modelSpec = { "datasource": "custom", "metricSpec": { "metric": metricName }, "modelParams": {} } try: modelSpec["modelParams"].update(json.load(environ["wsgi.input"])) except Exception as e: start_response("400 Bad Request", [("Content-Type", "text/html")]) yield "Unable to parse request" adapter = createDatasourceAdapter(modelSpec["datasource"]) try: modelId = adapter.monitorMetric(modelSpec) start_response("201 Created", [("Content-Type", "text/html")]) yield "Created %s\n" % modelId except MetricAlreadyMonitored: start_response("400 Bad Request", [("Content-Type", "text/html")]) yield "Model already exists for %s" % metricName elif environ["REQUEST_METHOD"] == "POST": # Send data... start_response("200 OK", [("Content-Type", "text/html")]) for sample in environ["wsgi.input"]: value, ts = sample.split(" ") sendSample(bus, metricName=metricName, value=float(value), epochTimestamp=int(ts)) yield "Saved %s %f @ %d\n" % (metricName, float(value), int(ts)) elif environ["REQUEST_METHOD"] == "GET": with repository.engineFactory(appConfig).connect() as conn: fields = (schema.metric_data.c.metric_value, schema.metric_data.c.timestamp, schema.metric_data.c.rowid, schema.metric_data.c.anomaly_score) sort = schema.metric_data.c.timestamp.asc() metricObj = repository.getCustomMetricByName( conn, metricName, fields=[schema.metric.c.uid]) result = repository.getMetricData(conn, metricId=metricObj.uid, fields=fields, sort=sort) start_response("200 OK", [("Content-Type", "text/html")]) for row in result: yield " ".join( (metricName, str(row.metric_value), str(calendar.timegm(row.timestamp.timetuple())), str(row.anomaly_score))) + "\n"
def setUpClass(cls): cls.engine = repository.engineFactory(taurus.engine.config)
def setUpClass(cls): cls.engine = repository.engineFactory(g_config)
def _processModelCommandResult(self, metricID, result): """ Process a single model command result """ engine = repository.engineFactory(config) # Check if deleting model if result.method == "deleteModel": self._log.info("Model=%s was deleted", metricID) return # Validate model ID try: # NOTE: use shared lock to prevent race condition with adapter's # monitorMetric, whereby adapter creates and/or activates a metric inside # a transaction, and we might get the defineModel command before the # metric row updates are committed with engine.connect() as conn: metricObj = repository.getMetricWithSharedLock(conn, metricID) except ObjectNotFoundError: # This may occur if the user deletes the model before the result was # delivered while there are result messages still on the message bus. self._log.warn("Received command result=%r for unknown model=%s " "(model deleted?)", result, metricID) return if result.status != 0: self._log.error(result.errorMessage) if metricObj.status != MetricStatus.ERROR: self._log.error("Placing model=<%s> in ERROR state due to " "commandResult=%s", getMetricLogPrefix(metricObj), result) with engine.connect() as conn: repository.setMetricStatus(conn, metricID, MetricStatus.ERROR, result.errorMessage) else: # NOTE: could be a race condition between app-layer and Model Swapper # or a side-effect of the at-least-once delivery guarantee self._log.warn("Received command result=%r for metricID=%s of " "metric=<%s> that was already in ERROR state", result, metricID, getMetricLogPrefix(metricObj)) return # Create Model if result.method == "defineModel": self._log.info("Model was created for <%s>", getMetricLogPrefix(metricObj)) if metricObj.status == MetricStatus.CREATE_PENDING: with engine.connect() as conn: repository.setMetricStatus(conn, metricID, MetricStatus.ACTIVE) else: # NOTE: could be a race condition between app-layer and Model Swapper # or a side-effect of the at-least-once delivery guarantee self._log.warn("Received command result=%r for model=%s of metric=<%s> " "that was not in CREATE_PENDING state", result, metricID, getMetricLogPrefix(metricObj)) return self._log.error("Unexpected model result=%r", result)
def _processModelInferenceResults(self, inferenceResults, metricID): """ Process a batch of model inference results Store the updated MetricData and anomaly likelihood parameters in the database. A row's anomaly_score value will be set to and remain at 0 in the first self._statisticsMinSampleSize rows; once we get enough inference results to create an anomaly likelihood model, anomaly_score will be computed on the subsequent rows. :param inferenceResults: a sequence of ModelInferenceResult instances in the processed order (ascending by timestamp) :param metricID: metric/model ID of the model that emitted the results :returns: None if the batch was rejected; otherwise a pair: (metric, metricDataRows) metric: Metric RowProxy instance corresponding to the given metricID metricDataRows: a sequence of MutableMetricDataRow instances corresponding to the updated metric_data rows. TODO: unit-test return value :rtype: None or tuple *NOTE:* the processing must be idempotent due to the "at least once" delivery semantics of the message bus *NOTE:* the performance goal is to minimize costly database access and avoid falling behind while processing model results, especially during the model's initial "catch-up" phase when large inference result batches are prevalent. """ engine = repository.engineFactory(config) # Validate model ID try: with engine.connect() as conn: metricObj = repository.getMetric(conn, metricID) except ObjectNotFoundError: # Ignore inferences for unknown models. Typically, this is is the result # of a deleted model. Another scenario where this might occur is when a # developer resets the db while there are result messages still on the # message bus. It would be an error if this were to occur in production # environment. self._log.warning("Received inference results for unknown model=%s; " "(model deleted?)", metricID, exc_info=True) return None # Reject the results if model is in non-ACTIVE state (e.g., if HTM Metric # was unmonitored after the results were generated) if metricObj.status != MetricStatus.ACTIVE: self._log.warning("Received inference results for a non-ACTIVE " "model=%s; metric=<%s>; (metric unmonitored?)", metricID, getMetricLogPrefix(metricObj)) return None # Load the MetricData instances corresponding to the results with engine.connect() as conn: metricDataRows = repository.getMetricData(conn, metricID, start=inferenceResults[0].rowID, stop=inferenceResults[-1].rowID) # metricDataRows must be mutable, as the data is massaged in # _scrubInferenceResultsAndInitMetricData() metricDataRows = list(metricDataRows) if not metricDataRows: self._log.error("Rejected inference result batch=[%s..%s] of model=%s " "due to no matching metric_data rows", inferenceResults[0].rowID, inferenceResults[-1].rowID, metricID) return None try: self._scrubInferenceResultsAndInitMetricData( engine=engine, inferenceResults=inferenceResults, metricDataRows=metricDataRows, metricObj=metricObj) except RejectedInferenceResultBatch as e: # TODO: unit-test self._log.error( "Rejected inference result batch=[%s..%s] corresponding to " "rows=[%s..%s] of model=%s due to error=%r", inferenceResults[0].rowID, inferenceResults[-1].rowID, metricDataRows[0].rowid, metricDataRows[-1].rowid, metricID, e) return None # Update anomaly scores based on the new results anomalyLikelihoodParams = ( self.likelihoodHelper.updateModelAnomalyScores( engine=engine, metricObj=metricObj, metricDataRows=metricDataRows)) # Update metric data rows with rescaled display values # NOTE: doing this outside the updateColumns loop to avoid holding row locks # any longer than necessary for metricData in metricDataRows: metricData.display_value = rescaleForDisplay( metricData.anomaly_score, active=(metricObj.status == MetricStatus.ACTIVE)) # Update database once via transaction! startTime = time.time() try: @retryOnTransientErrors def runSQL(engine): with engine.begin() as conn: for metricData in metricDataRows: fields = {"raw_anomaly_score": metricData.raw_anomaly_score, "anomaly_score": metricData.anomaly_score, "display_value": metricData.display_value, "multi_step_best_predictions": json.dumps(metricData.multi_step_best_predictions)} repository.updateMetricDataColumns(conn, metricData, fields) self._updateAnomalyLikelihoodParams( conn, metricObj.uid, metricObj.model_params, anomalyLikelihoodParams) runSQL(engine) except (ObjectNotFoundError, MetricNotActiveError): self._log.warning("Rejected inference result batch=[%s..%s] of model=%s", inferenceResults[0].rowID, inferenceResults[-1].rowID, metricID, exc_info=True) return None self._log.debug("Updated HTM metric_data rows=[%s..%s] " "of model=%s: duration=%ss", metricDataRows[0].rowid, metricDataRows[-1].rowid, metricID, time.time() - startTime) return (metricObj, metricDataRows,)
def _processModelCommandResult(self, metricID, result): """ Process a single model command result """ engine = repository.engineFactory(config) # Check if deleting model if result.method == "deleteModel": self._log.info("Model=%s was deleted", metricID) return # Validate model ID try: # NOTE: use shared lock to prevent race condition with adapter's # monitorMetric, whereby adapter creates and/or activates a metric inside # a transaction, and we might get the defineModel command before the # metric row updates are committed with engine.connect() as conn: metricObj = repository.getMetricWithSharedLock(conn, metricID) except ObjectNotFoundError: # This may occur if the user deletes the model before the result was # delivered while there are result messages still on the message bus. self._log.warn( "Received command result=%r for unknown model=%s " "(model deleted?)", result, metricID) return if result.status != 0: self._log.error(result.errorMessage) if metricObj.status != MetricStatus.ERROR: self._log.error( "Placing model=<%s> in ERROR state due to " "commandResult=%s", getMetricLogPrefix(metricObj), result) with engine.connect() as conn: repository.setMetricStatus(conn, metricID, MetricStatus.ERROR, result.errorMessage) else: # NOTE: could be a race condition between app-layer and Model Swapper # or a side-effect of the at-least-once delivery guarantee self._log.warn( "Received command result=%r for metricID=%s of " "metric=<%s> that was already in ERROR state", result, metricID, getMetricLogPrefix(metricObj)) return # Create Model if result.method == "defineModel": self._log.info("Model was created for <%s>" % (getMetricLogPrefix(metricObj))) if metricObj.status == MetricStatus.CREATE_PENDING: with engine.connect() as conn: repository.setMetricStatus(conn, metricID, MetricStatus.ACTIVE) else: # NOTE: could be a race condition between app-layer and Model Swapper # or a side-effect of the at-least-once delivery guarantee self._log.warn( "Received command result=%r for model=%s of metric=<%s> " "that was not in CREATE_PENDING state", result, metricID, getMetricLogPrefix(metricObj)) return self._log.error("Unexpected model result=%r", result)