def _addMetric(engine, metricName): """Add the new metric to the database.""" if metricName in gCustomMetrics: try: # Attempt to reload the metric metricId = gCustomMetrics[metricName][0].uid with engine.connect() as conn: gCustomMetrics[metricName][0] = repository.getMetric(conn, metricId) return except htmengine.exceptions.ObjectNotFoundError: # Do nothing, we will create new metric and update cache below pass # Use the adapter to create the metric try: metricId = createCustomDatasourceAdapter().createMetric(metricName) except htmengine.exceptions.MetricAlreadyExists as e: metricId = e.uid with engine.connect() as conn: metric = repository.getMetric(conn, metricId) # Add it to our cache gCustomMetrics[metricName] = [metric, datetime.datetime.utcnow()] _trimMetricCache()
def _addMetric(engine, metricName): """Add the new metric to the database.""" if metricName in gCustomMetrics: try: # Attempt to reload the metric metricId = gCustomMetrics[metricName][0].uid with engine.connect() as conn: gCustomMetrics[metricName][0] = repository.getMetric( conn, metricId) return except htmengine.exceptions.ObjectNotFoundError: # Do nothing, we will create new metric and update cache below pass # Use the adapter to create the metric try: metricId = createCustomDatasourceAdapter().createMetric(metricName) except htmengine.exceptions.MetricAlreadyExists as e: metricId = e.uid with engine.connect() as conn: metric = repository.getMetric(conn, metricId) # Add it to our cache gCustomMetrics[metricName] = [metric, datetime.datetime.utcnow()] _trimMetricCache()
def testActivateModelClassifierEnabled(self): """ Test activateModel with classifier enabled in model spec. """ metricName = "test-" + uuid.uuid1().hex adapter = datasource_adapter_factory.createCustomDatasourceAdapter() g_log.info("Creating htmengine custom metric; name=%s", metricName) metricId = adapter.createMetric(metricName) self.addCleanup(adapter.deleteMetricByName, metricName) # Turn on monitoring modelSpec = { "datasource": "custom", "metricSpec": { "metric": metricName }, "modelParams": { "enableClassifier": True } } adapter.monitorMetric(modelSpec) with self.engine.connect() as conn: metricObj = repository.getMetric(conn, metricId, fields=[schema.metric.c.status]) self.assertEqual(metricObj.status, MetricStatus.PENDING_DATA) # Add some data data = [ (0, datetime.datetime.utcnow() - datetime.timedelta(minutes=5)), (100, datetime.datetime.utcnow()) ] with self.engine.connect() as conn: repository.addMetricData(conn, metricId, data) # Activate model adapter.activateModel(metricId) with self.engine.connect() as conn: metricObj = repository.getMetric(conn, metricId, fields=[schema.metric.c.status, schema.metric.c.model_params]) self.assertIn(metricObj.status, (MetricStatus.CREATE_PENDING, MetricStatus.ACTIVE)) self._assertClassifierStatusInModelParams(metricObj.model_params, classifierEnabled=True) g_log.info("Waiting for model to become active") self.checkModelIsActive(metricId) g_log.info("Waiting at least one model result") self.checkModelResultsSize(metricId, 1, atLeast=True)
def testActivateModelClassifierEnabled(self): """ Test activateModel with classifier enabled in model spec. """ metricName = "test-" + uuid.uuid1().hex adapter = datasource_adapter_factory.createCustomDatasourceAdapter() g_log.info("Creating htmengine custom metric; name=%s", metricName) metricId = adapter.createMetric(metricName) self.addCleanup(adapter.deleteMetricByName, metricName) # Turn on monitoring modelSpec = { "datasource": "custom", "metricSpec": { "metric": metricName }, "modelParams": { "enableClassifier": True } } adapter.monitorMetric(modelSpec) with self.engine.connect() as conn: metricObj = repository.getMetric(conn, metricId, fields=[schema.metric.c.status]) self.assertEqual(metricObj.status, MetricStatus.PENDING_DATA) # Add some data data = [(0, datetime.datetime.utcnow() - datetime.timedelta(minutes=5)), (100, datetime.datetime.utcnow())] with self.engine.connect() as conn: repository.addMetricData(conn, metricId, data) # Activate model adapter.activateModel(metricId) with self.engine.connect() as conn: metricObj = repository.getMetric( conn, metricId, fields=[schema.metric.c.status, schema.metric.c.model_params]) self.assertIn(metricObj.status, (MetricStatus.CREATE_PENDING, MetricStatus.ACTIVE)) self._assertClassifierStatusInModelParams(metricObj.model_params, classifierEnabled=True) g_log.info("Waiting for model to become active") self.checkModelIsActive(metricId) g_log.info("Waiting at least one model result") self.checkModelResultsSize(metricId, 1, atLeast=True)
def testMonitorMetricWithResource(self): """Test monitorMetric that includes an explicit resource string.""" metricName = "test-" + uuid.uuid1().hex resource = "Test Resource" adapter = datasource_adapter_factory.createCustomDatasourceAdapter() g_log.info("Creating htmengine custom metric; name=%s", metricName) metricId = adapter.createMetric(metricName) self.addCleanup(adapter.deleteMetricByName, metricName) # Turn on monitoring modelSpec = { "datasource": "custom", "metricSpec": { "metric": metricName, "resource": resource, } } adapter.monitorMetric(modelSpec) with self.engine.connect() as conn: metricObj = repository.getMetric(conn, metricId, fields=[schema.metric.c.parameters, schema.metric.c.status, schema.metric.c.server]) self.assertEqual(metricObj.status, MetricStatus.PENDING_DATA) self.assertEqual(json.loads(metricObj.parameters), modelSpec) self.assertEqual(metricObj.server, resource) self._validateModelSpec(json.loads(metricObj.parameters))
def checkEncoderResolution(self, uid, minVal, maxVal, minResolution=None): """Check that encoder resolution is computed correctly.""" engine = repository.engineFactory(config=self.__config) with engine.begin() as conn: metricObj = repository.getMetric(conn, uid, fields=[schema.metric.c.name, schema.metric.c.model_params]) modelParams = json.loads(metricObj.model_params) self.assertNotEqual(modelParams, None, "No model exists for metric %s" % metricObj.name) sensorParams = modelParams["modelConfig"]["modelParams"]["sensorParams"] encoderParams = sensorParams["encoders"]["c1"] # Estimate and check the bounds for the resolution based on min and max lower = (maxVal - minVal) / 300.0 upper = (maxVal - minVal) / 80.0 if minResolution is not None: lower = max(minResolution, lower) upper = float("inf") resolution = encoderParams["resolution"] self.assertGreaterEqual(resolution, lower) self.assertLessEqual(resolution, upper)
def testUnmonitorMetricPendingData(self): """ Test unmonitorMetric on metric in PENDING_DATA state """ metricName = "test-" + uuid.uuid1().hex adapter = datasource_adapter_factory.createCustomDatasourceAdapter() g_log.info("Creating htmengine custom metric; name=%s", metricName) metricId = adapter.createMetric(metricName) self.addCleanup(adapter.deleteMetricByName, metricName) # Turn on monitoring modelSpec = { "datasource": "custom", "metricSpec": { "metric": metricName } } adapter.monitorMetric(modelSpec) with self.engine.connect() as conn: metricObj = repository.getMetric(conn, metricId, fields=[schema.metric.c.parameters, schema.metric.c.status]) self.assertEqual(metricObj.status, MetricStatus.PENDING_DATA) self.assertEqual(json.loads(metricObj.parameters), modelSpec) self._validateModelSpec(json.loads(metricObj.parameters)) # Turn off monitoring adapter.unmonitorMetric(metricId) self.checkMetricUnmonitoredById(metricId)
def activateModel(self, metricId): """ Start a model that is PENDING_DATA, creating the OPF/CLA model NOTE: used by MetricStreamer when model is in PENDING_DATA state and sufficient data samples are available to get statistics and complete model creation. :param metricId: unique identifier of the metric row :raises htmengine.exceptions.ObjectNotFoundError: if metric with the referenced metric uid doesn't exist :raises htmengine.exceptions.MetricStatisticsNotReadyError: """ # Load the existing metric with self.connectionFactory() as conn: metricObj = repository.getMetric(conn, metricId, fields=[schema.metric.c.datasource]) if metricObj.datasource != self._DATASOURCE: raise TypeError( "activateModel: not an HTM metric=%s; datasource=%s" % (metricId, metricObj.datasource)) stats = self._getMetricStatistics(metricId) swarmParams = scalar_metric_utils.generateSwarmParams(stats) scalar_metric_utils.startModel(metricId, swarmParams=swarmParams, logger=self._log)
def activateModel(self, metricId): """ Start a model that is PENDING_DATA, creating the OPF/CLA model NOTE: used by MetricStreamer when model is in PENDING_DATA state and sufficient data samples are available to get statistics and complete model creation. :param metricId: unique identifier of the metric row :raises htmengine.exceptions.ObjectNotFoundError: if metric with the referenced metric uid doesn't exist :raises htmengine.exceptions.MetricStatisticsNotReadyError: """ # Load the existing metric with self.connectionFactory() as conn: metricObj = repository.getMetric( conn, metricId, fields=[schema.metric.c.datasource]) if metricObj.datasource != self._DATASOURCE: raise TypeError( "activateModel: not an HTM metric=%s; datasource=%s" % (metricId, metricObj.datasource)) stats = self._getMetricStatistics(metricId) swarmParams = scalar_metric_utils.generateSwarmParams(stats) scalar_metric_utils.startModel(metricId, swarmParams=swarmParams, logger=self._log)
def checkEncoderResolution(self, uid, minVal, maxVal, minResolution=None): """Check that encoder resolution is computed correctly.""" engine = repository.engineFactory(config=self.__config) with engine.begin() as conn: metricObj = repository.getMetric( conn, uid, fields=[schema.metric.c.name, schema.metric.c.model_params]) modelParams = json.loads(metricObj.model_params) self.assertNotEqual(modelParams, None, "No model exists for metric %s" % metricObj.name) sensorParams = modelParams["modelConfig"]["modelParams"][ "sensorParams"] encoderParams = sensorParams["encoders"]["c1"] # Estimate and check the bounds for the resolution based on min and max lower = (maxVal - minVal) / 300.0 upper = (maxVal - minVal) / 80.0 if minResolution is not None: lower = max(minResolution, lower) upper = float("inf") resolution = encoderParams["resolution"] self.assertGreaterEqual(resolution, lower) self.assertLessEqual(resolution, upper)
def testUnmonitorMetricPendingData(self): """ Test unmonitorMetric on metric in PENDING_DATA state """ metricName = "test-" + uuid.uuid1().hex adapter = datasource_adapter_factory.createCustomDatasourceAdapter() g_log.info("Creating htmengine custom metric; name=%s", metricName) metricId = adapter.createMetric(metricName) self.addCleanup(adapter.deleteMetricByName, metricName) # Turn on monitoring modelSpec = { "datasource": "custom", "metricSpec": { "metric": metricName } } adapter.monitorMetric(modelSpec) with self.engine.connect() as conn: metricObj = repository.getMetric( conn, metricId, fields=[schema.metric.c.parameters, schema.metric.c.status]) self.assertEqual(metricObj.status, MetricStatus.PENDING_DATA) self.assertEqual(json.loads(metricObj.parameters), modelSpec) self._validateModelSpec(json.loads(metricObj.parameters)) # Turn off monitoring adapter.unmonitorMetric(metricId) self.checkMetricUnmonitoredById(metricId)
def test_MinMaxDelayedCreationNoMetricIntegrityErrorMER2190(self): """Tests that delayed creation doesn't cause integrity error in custom-metric model. It sends more than MODEL_CREATION_RECORD_THRESHOLD rows """ metricName = ( "testMinMaxDelayedCreationNoMetricIntegrityErrorMER2190.%i" % int(time.time())) LOGGER.info("Running test with metric name: %s", metricName) totalRowsToSend = MODEL_CREATION_RECORD_THRESHOLD + 700 self.addCleanup(self._deleteMetric, metricName) def timeGenerator(): """Generator for unix timestamps.""" backoff = datetime.timedelta(minutes=5 * (totalRowsToSend + 1)) dt = datetime.datetime.utcnow() - backoff td = datetime.timedelta(minutes=5) while True: dt += td yield int(calendar.timegm(dt.utctimetuple())) nextTime = timeGenerator() # Add custom metric data sock = socket.socket() sock.connect(("localhost", self.plaintextPort)) sock.sendall("%s 0.0 %i\n" % (metricName, nextTime.next())) self.gracefullyCloseSocket(sock) uid = self.checkMetricCreated(metricName) LOGGER.info("Metric %s has uid: %s", metricName, uid) # Send model creation request nativeMetric = {"datasource": "custom", "metricSpec": {"uid": uid}} model = self._createModel(nativeMetric) self.assertEqual(model.status, MetricStatus.PENDING_DATA) # Add more data sock = socket.socket() sock.connect(("localhost", self.plaintextPort)) for _ in xrange(totalRowsToSend - 1): sock.sendall("%s 7000.0 %i\n" % (metricName, nextTime.next())) self.gracefullyCloseSocket(sock) for _ in xrange(60): with self.engine.begin() as conn: metric = repository.getMetric(conn, uid) if metric.status == MetricStatus.ACTIVE: break LOGGER.info("Model=%s not ready. Sleeping 5 seconds...") time.sleep(5) else: self.fail("Model results not available within 5 minutes") # Check that the data all got processed self.checkModelResultsSize(uid, totalRowsToSend)
def testExportImport(self): metricName = "test-" + uuid.uuid1().hex adapter = datasource_adapter_factory.createCustomDatasourceAdapter() g_log.info("Creating htmengine custom metric; name=%s", metricName) metricId = adapter.createMetric(metricName) self.addCleanup(adapter.deleteMetricByName, metricName) # Add some data # NOTE: we discard the fractional part because it gets eliminated # in the database, and we will want to compare against retrieved # items later. now = datetime.datetime.utcnow().replace(microsecond=0) data = [(0, now - datetime.timedelta(minutes=5)), (100, now)] with self.engine.connect() as conn: repository.addMetricData(conn, metricId, data) # Turn on monitoring modelSpec = { "datasource": "custom", "metricSpec": { "metric": metricName }, } adapter.monitorMetric(modelSpec) def checkExportSpec(exportSpec): self.assertEqual(exportSpec["datasource"], modelSpec["datasource"]) self.assertEqual(exportSpec["metricSpec"], modelSpec["metricSpec"]) self.assertSequenceEqual(exportSpec["data"], data) # Export exportSpec = adapter.exportModel(metricId) checkExportSpec(exportSpec) # Delete metric adapter.deleteMetricByName(metricName) self.checkModelDeleted(metricId) # Import metricId = adapter.importModel( htmengine.utils.jsonDecode(htmengine.utils.jsonEncode(exportSpec))) with self.engine.connect() as conn: metricObj = repository.getMetric( conn, metricId, fields=[schema.metric.c.parameters, schema.metric.c.status]) self.assertEqual(metricObj.status, MetricStatus.PENDING_DATA) self.assertEqual(json.loads(metricObj.parameters), modelSpec) self._validateModelSpec(json.loads(metricObj.parameters)) # Export again exportSpec = adapter.exportModel(metricId) checkExportSpec(exportSpec)
def checkModelIsActive(self, uid): engine = repository.engineFactory(config=self.__config) with engine.begin() as conn: metricObj = repository.getMetric(conn, uid, fields=[schema.metric.c.status]) self.assertEqual(metricObj.status, MetricStatus.ACTIVE)
def test_MinMaxDelayedCreation(self): """Tests that the min and max are set correctly when not specified.""" metricName = "testMinMaxDelayedCreation.%i" % int(time.time()) LOGGER.info("Running test with metric name: %s", metricName) self.addCleanup(self._deleteMetric, metricName) def timeGenerator(): """Generator for unix timestamps.""" dt = datetime.datetime.utcnow() - datetime.timedelta(hours=25) td = datetime.timedelta(minutes=5) while True: dt += td yield int(calendar.timegm(dt.utctimetuple())) nextTime = timeGenerator() # Add custom metric data sock = socket.socket() sock.connect(("localhost", self.plaintextPort)) sock.sendall("%s 0.0 %i\n" % (metricName, nextTime.next())) sock.sendall("%s 100.0 %i\n" % (metricName, nextTime.next())) self.gracefullyCloseSocket(sock) uid = self.checkMetricCreated(metricName) # Save the uid for later LOGGER.info("Metric %s has uid: %s", metricName, uid) # Send model creation request nativeMetric = {"datasource": "custom", "metricSpec": {"uid": uid}} model = self._createModel(nativeMetric) self.assertEqual(model.status, MetricStatus.PENDING_DATA) # Add more data sock = socket.socket() sock.connect(("localhost", self.plaintextPort)) for _ in xrange(MODEL_CREATION_RECORD_THRESHOLD - 2): sock.sendall("%s 7000.0 %i\n" % (metricName, nextTime.next())) self.gracefullyCloseSocket(sock) for _ in xrange(60): with self.engine.begin() as conn: metric = repository.getMetric(conn, uid) if metric.status == MetricStatus.ACTIVE: break LOGGER.info("Model=%s not ready. Sleeping 5 seconds...") time.sleep(5) else: self.fail("Model results not available within 5 minutes") # Check the min and max for the model self.checkEncoderResolution(uid, 0.0, 7000.0) # Check that the data all got processed self.checkModelResultsSize(uid, MODEL_CREATION_RECORD_THRESHOLD)
def test_MinMaxDelayedCreationNoMetricIntegrityErrorMER2190(self): """Tests that delayed creation doesn't cause integrity error in custom-metric model. It sends more than MODEL_CREATION_RECORD_THRESHOLD rows """ metricName = ("testMinMaxDelayedCreationNoMetricIntegrityErrorMER2190.%i" % int(time.time())) LOGGER.info("Running test with metric name: %s", metricName) totalRowsToSend = MODEL_CREATION_RECORD_THRESHOLD + 700 self.addCleanup(self._deleteMetric, metricName) def timeGenerator(): """Generator for unix timestamps.""" backoff = datetime.timedelta(minutes=5 * (totalRowsToSend + 1)) dt = datetime.datetime.utcnow() - backoff td = datetime.timedelta(minutes=5) while True: dt += td yield int(calendar.timegm(dt.utctimetuple())) nextTime = timeGenerator() # Add custom metric data sock = socket.socket() sock.connect(("localhost", self.plaintextPort)) sock.sendall("%s 0.0 %i\n" % (metricName, nextTime.next())) self.gracefullyCloseSocket(sock) uid = self.checkMetricCreated(metricName) LOGGER.info("Metric %s has uid: %s", metricName, uid) # Send model creation request nativeMetric = {"datasource": "custom", "metricSpec": {"uid": uid}} model = self._createModel(nativeMetric) self.assertEqual(model.status, MetricStatus.PENDING_DATA) # Add more data sock = socket.socket() sock.connect(("localhost", self.plaintextPort)) for _ in xrange(totalRowsToSend - 1): sock.sendall("%s 7000.0 %i\n" % (metricName, nextTime.next())) self.gracefullyCloseSocket(sock) for _ in xrange(60): with self.engine.begin() as conn: metric = repository.getMetric(conn, uid) if metric.status == MetricStatus.ACTIVE: break LOGGER.info("Model=%s not ready. Sleeping 5 seconds...") time.sleep(5) else: self.fail("Model results not available within 5 minutes") # Check that the data all got processed self.checkModelResultsSize(uid, totalRowsToSend)
def _createModel(self, nativeMetric): adapter = createDatasourceAdapter("custom") try: metricId = adapter.monitorMetric(nativeMetric) except MetricAlreadyMonitored as e: metricId = e.uid engine = repository.engineFactory(config=self.config) with engine.begin() as conn: return repository.getMetric(conn, metricId)
def _composeModelCommandResultMessage(cls, modelID, cmdResult): """ Compose message corresponding to the completion of a model command for publishing to downstream services. :param modelID: model identifier :param model_swapper_interface.ModelCommandResult cmdResult: model command result :returns: JSON-ifiable message contents object per model_command_result_amqp_message.json :rtype: dict :raises ObjectNotFoundError: when attempted to request additional info about a model that is not in the repository :raises MetricNotMonitoredError: when required info about a model is not available, because it's no longer monitored """ commandResultMessage = dict( method=cmdResult.method, modelId=modelID, commandId=cmdResult.commandID, status=cmdResult.status, errorMessage=cmdResult.errorMessage, ) if (cmdResult.method == "defineModel" and cmdResult.status == htmengineerrno.SUCCESS): # Add modelInfo for successfully-completed "defineModel" commands engine = repository.engineFactory(config) fields = [ schema.metric.c.name, schema.metric.c.server, schema.metric.c.parameters ] try: with engine.connect() as conn: metricObj = repository.getMetric( conn, modelID, fields=fields) except ObjectNotFoundError: g_log.warning("_composeModelCommandResultMessage: method=%s; " "model=%s not found", cmdResult.method, modelID) raise if not metricObj.parameters: g_log.warning("_composeModelCommandResultMessage: method=%s; " "model=%s not monitored", cmdResult.method, modelID) raise MetricNotMonitoredError commandResultMessage["modelInfo"] = dict( metricName=metricObj.name, resource=metricObj.server, modelSpec=json.loads(metricObj.parameters)) return commandResultMessage
def _composeModelCommandResultMessage(cls, modelID, cmdResult): """ Compose message corresponding to the completion of a model command for publishing to downstream services. :param modelID: model identifier :param model_swapper_interface.ModelCommandResult cmdResult: model command result :returns: JSON-ifiable message contents object per model_command_result_amqp_message.json :rtype: dict :raises ObjectNotFoundError: when attempted to request additional info about a model that is not in the repository :raises MetricNotMonitoredError: when required info about a model is not available, because it's no longer monitored """ commandResultMessage = dict( method=cmdResult.method, modelId=modelID, commandId=cmdResult.commandID, status=cmdResult.status, errorMessage=cmdResult.errorMessage, ) if (cmdResult.method == "defineModel" and cmdResult.status == htmengineerrno.SUCCESS): # Add modelInfo for successfully-completed "defineModel" commands engine = repository.engineFactory(config) fields = [ schema.metric.c.name, schema.metric.c.server, schema.metric.c.parameters ] try: with engine.connect() as conn: metricObj = repository.getMetric(conn, modelID, fields=fields) except ObjectNotFoundError: g_log.warning( "_composeModelCommandResultMessage: method=%s; " "model=%s not found", cmdResult.method, modelID) raise if not metricObj.parameters: g_log.warning( "_composeModelCommandResultMessage: method=%s; " "model=%s not monitored", cmdResult.method, modelID) raise MetricNotMonitoredError commandResultMessage["modelInfo"] = dict(metricName=metricObj.name, resource=metricObj.server, modelSpec=json.loads( metricObj.parameters)) return commandResultMessage
def start(): with repository.engineFactory(config).begin() as conn: metricObj = repository.getMetric(conn, metricId) modelStarted = (_startModelHelper(conn=conn, metricObj=metricObj, swarmParams=swarmParams, logger=logger)) if modelStarted: sendBacklogDataToModel(conn=conn, metricId=metricId, logger=logger) return modelStarted
def checkMetricUnmonitoredById(self, uid): engine = repository.engineFactory(config=self.__config) with engine.begin() as conn: metricObj = repository.getMetric(conn, uid, fields=[schema.metric.c.status, schema.metric.c.parameters]) self.assertEqual(metricObj.status, MetricStatus.UNMONITORED) self.assertIsNone(metricObj.parameters) with self.assertRaises(model_checkpoint_mgr.ModelNotFound): model_checkpoint_mgr.ModelCheckpointMgr().loadModelDefinition(uid)
def checkMetricUnmonitoredById(self, uid): engine = repository.engineFactory(config=self.__config) with engine.begin() as conn: metricObj = repository.getMetric( conn, uid, fields=[schema.metric.c.status, schema.metric.c.parameters]) self.assertEqual(metricObj.status, MetricStatus.UNMONITORED) self.assertIsNone(metricObj.parameters) with self.assertRaises(model_checkpoint_mgr.ModelNotFound): model_checkpoint_mgr.ModelCheckpointMgr().loadModelDefinition(uid)
def start(): with repository.engineFactory(config).begin() as conn: metricObj = repository.getMetric(conn, metricId) modelStarted = ( _startModelHelper(conn=conn, metricObj=metricObj, swarmParams=swarmParams, logger=logger)) if modelStarted: sendBacklogDataToModel(conn=conn, metricId=metricId, logger=logger) return modelStarted
def testMonitorMetricWithCompleteModelParams(self): """ Test monitorMetric with complete set of user-provided model parameters that activates a model """ metricName = "test-" + uuid.uuid1().hex adapter = datasource_adapter_factory.createCustomDatasourceAdapter() g_log.info("Creating htmengine custom metric; name=%s", metricName) metricId = adapter.createMetric(metricName) self.addCleanup(adapter.deleteMetricByName, metricName) fileName = "custom_datasource_adapter_test_model_config.json" with self._openTestDataFile(fileName) as modelConfigFile: modelConfig = json.load(modelConfigFile) # Turn on monitoring modelSpec = { "datasource": "custom", "metricSpec": { "metric": metricName }, "completeModelParams": { "modelConfig": modelConfig, "inferenceArgs": { "predictionSteps": [1], "predictedField": "bar", "inputPredictedField": "auto" }, "timestampFieldName": "foo", "valueFieldName": "bar" } } adapter.monitorMetric(modelSpec) with self.engine.connect() as conn: metricObj = repository.getMetric( conn, metricId, fields=[schema.metric.c.status, schema.metric.c.parameters]) self._validateModelSpec(json.loads(metricObj.parameters)) self.assertIn(metricObj.status, (MetricStatus.CREATE_PENDING, MetricStatus.ACTIVE)) self.assertEqual(json.loads(metricObj.parameters), modelSpec) g_log.info("Waiting for model to become active") self.checkModelIsActive(metricId)
def checkMetricDeleted(self, uid): engine = repository.engineFactory(config=self.__config) with engine.begin() as conn: with self.assertRaises(Exception) as e: metric = repository.getMetric(conn, uid) models = repository.getAllModels(conn) for model in models: self.assertNotEqual(model.uid, uid, "Model showing up after deletion.") with self.assertRaises(model_checkpoint_mgr.ModelNotFound): model_checkpoint_mgr.ModelCheckpointMgr().loadModelDefinition(uid)
def testMonitorMetricWithEnoughDataForStats(self): """ monitorMetric should create a model when there is enough data rows """ metricName = "test-" + uuid.uuid1().hex adapter = datasource_adapter_factory.createCustomDatasourceAdapter() g_log.info("Creating htmengine custom metric; name=%s", metricName) metricId = adapter.createMetric(metricName) self.addCleanup(adapter.deleteMetricByName, metricName) # Add enough data to force activation of model data = [ (offset, datetime.datetime.utcnow() + datetime.timedelta(minutes=offset)) for offset in xrange( 0, scalar_metric_utils.MODEL_CREATION_RECORD_THRESHOLD * 5, 5) ] self.assertEqual(len(data), scalar_metric_utils.MODEL_CREATION_RECORD_THRESHOLD) with self.engine.connect() as conn: repository.addMetricData(conn, metricId, data) # Turn on monitoring modelSpec = { "datasource": "custom", "metricSpec": { "metric": metricName }, } adapter.monitorMetric(modelSpec) with self.engine.connect() as conn: metricObj = repository.getMetric(conn, metricId, fields=[schema.metric.c.status]) self.assertIn(metricObj.status, (MetricStatus.CREATE_PENDING, MetricStatus.ACTIVE)) g_log.info("Waiting for model to become active") self.checkModelIsActive(metricId) g_log.info("Waiting at least one model result") self.checkModelResultsSize(metricId, 1, atLeast=True)
def testMonitorMetricWithMinResolution(self): """ Test monitorMetric with user-provided min/max and minResolution that activates a model. Make sure resolution doesn't drop below minResolution. """ metricName = "test-" + uuid.uuid1().hex adapter = datasource_adapter_factory.createCustomDatasourceAdapter() g_log.info("Creating htmengine custom metric; name=%s", metricName) metricId = adapter.createMetric(metricName) self.addCleanup(adapter.deleteMetricByName, metricName) # Turn on monitoring modelSpec = { "datasource": "custom", "metricSpec": { "metric": metricName }, "modelParams": { "min": 0, # optional "max": 1, # optional "minResolution": 0.5 # optional } } adapter.monitorMetric(modelSpec) with self.engine.connect() as conn: metricObj = repository.getMetric(conn, metricId, fields=[schema.metric.c.status, schema.metric.c.parameters]) self.assertIn(metricObj.status, (MetricStatus.CREATE_PENDING, MetricStatus.ACTIVE)) self.assertEqual(json.loads(metricObj.parameters), modelSpec) #print metricObj.parameters self._validateModelSpec(json.loads(metricObj.parameters)) g_log.info("Waiting for model to become active") self.checkModelIsActive(metricId) self.checkEncoderResolution(metricId, 0, 1, minResolution=0.5)
def testMonitorMetricWithCompleteModelParams(self): """ Test monitorMetric with complete set of user-provided model parameters that activates a model """ metricName = "test-" + uuid.uuid1().hex adapter = datasource_adapter_factory.createCustomDatasourceAdapter() g_log.info("Creating htmengine custom metric; name=%s", metricName) metricId = adapter.createMetric(metricName) self.addCleanup(adapter.deleteMetricByName, metricName) fileName = "custom_datasource_adapter_test_model_config.json" with self._openTestDataFile(fileName) as modelConfigFile: modelConfig = json.load(modelConfigFile) # Turn on monitoring modelSpec = { "datasource": "custom", "metricSpec": { "metric": metricName }, "completeModelParams": { "modelConfig": modelConfig, "inferenceArgs": {"predictionSteps": [1], "predictedField": "bar", "inputPredictedField": "auto"}, "timestampFieldName": "foo", "valueFieldName": "bar" } } adapter.monitorMetric(modelSpec) with self.engine.connect() as conn: metricObj = repository.getMetric(conn, metricId, fields=[schema.metric.c.status, schema.metric.c.parameters]) self._validateModelSpec(json.loads(metricObj.parameters)) self.assertIn(metricObj.status, (MetricStatus.CREATE_PENDING, MetricStatus.ACTIVE)) self.assertEqual(json.loads(metricObj.parameters), modelSpec) g_log.info("Waiting for model to become active") self.checkModelIsActive(metricId)
def testMonitorMetricClassifierEnabled(self): """ Test monitorMetric with request for enabled classifier in model params """ metricName = "test-" + uuid.uuid1().hex adapter = datasource_adapter_factory.createCustomDatasourceAdapter() g_log.info("Creating htmengine custom metric; name=%s", metricName) metricId = adapter.createMetric(metricName) self.addCleanup(adapter.deleteMetricByName, metricName) # Turn on monitoring modelSpec = { "datasource": "custom", "metricSpec": { "metric": metricName }, "modelParams": { "min": 0, # optional "max": 100, # optional "enableClassifier": True } } adapter.monitorMetric(modelSpec) with self.engine.connect() as conn: metricObj = repository.getMetric(conn, metricId, fields=[ schema.metric.c.status, schema.metric.c.parameters, schema.metric.c.model_params ]) self.assertEqual(metricObj.status, MetricStatus.CREATE_PENDING) self.assertEqual(json.loads(metricObj.parameters), modelSpec) self._assertClassifierStatusInModelParams(metricObj.model_params, classifierEnabled=True) self._validateModelSpec(json.loads(metricObj.parameters)) g_log.info("Waiting for model to become active") self.checkModelIsActive(metricId) self.checkEncoderResolution(metricId, 0, 100)
def testMonitorMetricClassifierEnabled(self): """ Test monitorMetric with request for enabled classifier in model params """ metricName = "test-" + uuid.uuid1().hex adapter = datasource_adapter_factory.createCustomDatasourceAdapter() g_log.info("Creating htmengine custom metric; name=%s", metricName) metricId = adapter.createMetric(metricName) self.addCleanup(adapter.deleteMetricByName, metricName) # Turn on monitoring modelSpec = { "datasource": "custom", "metricSpec": { "metric": metricName }, "modelParams": { "min": 0, # optional "max": 100, # optional "enableClassifier": True } } adapter.monitorMetric(modelSpec) with self.engine.connect() as conn: metricObj = repository.getMetric(conn, metricId, fields=[schema.metric.c.status, schema.metric.c.parameters, schema.metric.c.model_params]) self.assertEqual(metricObj.status, MetricStatus.CREATE_PENDING) self.assertEqual(json.loads(metricObj.parameters), modelSpec) self._assertClassifierStatusInModelParams(metricObj.model_params, classifierEnabled=True) self._validateModelSpec(json.loads(metricObj.parameters)) g_log.info("Waiting for model to become active") self.checkModelIsActive(metricId) self.checkEncoderResolution(metricId, 0, 100)
def testMonitorMetricWithMinResolution(self): """ Test monitorMetric with user-provided min/max and minResolution that activates a model. Make sure resolution doesn't drop below minResolution. """ metricName = "test-" + uuid.uuid1().hex adapter = datasource_adapter_factory.createCustomDatasourceAdapter() g_log.info("Creating htmengine custom metric; name=%s", metricName) metricId = adapter.createMetric(metricName) self.addCleanup(adapter.deleteMetricByName, metricName) # Turn on monitoring modelSpec = { "datasource": "custom", "metricSpec": { "metric": metricName }, "modelParams": { "min": 0, # optional "max": 1, # optional "minResolution": 0.5 # optional } } adapter.monitorMetric(modelSpec) with self.engine.connect() as conn: metricObj = repository.getMetric( conn, metricId, fields=[schema.metric.c.status, schema.metric.c.parameters]) self.assertIn(metricObj.status, (MetricStatus.CREATE_PENDING, MetricStatus.ACTIVE)) self.assertEqual(json.loads(metricObj.parameters), modelSpec) #print metricObj.parameters self._validateModelSpec(json.loads(metricObj.parameters)) g_log.info("Waiting for model to become active") self.checkModelIsActive(metricId) self.checkEncoderResolution(metricId, 0, 1, minResolution=0.5)
def checkModelDeleted(self, uid): """Check that the model has been deleted""" engine = repository.engineFactory(config=self.__config) with engine.begin() as conn: try: metric = repository.getMetric(conn, uid) raise Exception("Metric not deleted as expected") except app_exceptions.ObjectNotFoundError: pass models = repository.getAllModels(conn) for model in models: self.assertNotEqual(model.uid, uid, "Model showing up after deletion.") with self.assertRaises(model_checkpoint_mgr.ModelNotFound): model_checkpoint_mgr.ModelCheckpointMgr().loadModelDefinition(uid)
def testCreateMetric(self): """ Test creation of custom metric """ metricName = "test-" + uuid.uuid1().hex adapter = datasource_adapter_factory.createCustomDatasourceAdapter() g_log.info("Creating htmengine custom metric; name=%s", metricName) metricId = adapter.createMetric(metricName) self.addCleanup(adapter.deleteMetricByName, metricName) with self.engine.connect() as conn: metricObj = repository.getMetric(conn, metricId, fields=[schema.metric.c.name, schema.metric.c.datasource, schema.metric.c.status]) self.assertEqual(metricObj.name, metricName) self.assertEqual(metricObj.datasource, "custom") self.assertEqual(metricObj.status, MetricStatus.UNMONITORED)
def testCreateMetric(self): """ Test creation of custom metric """ metricName = "test-" + uuid.uuid1().hex adapter = datasource_adapter_factory.createCustomDatasourceAdapter() g_log.info("Creating htmengine custom metric; name=%s", metricName) metricId = adapter.createMetric(metricName) self.addCleanup(adapter.deleteMetricByName, metricName) with self.engine.connect() as conn: metricObj = repository.getMetric(conn, metricId, fields=[ schema.metric.c.name, schema.metric.c.datasource, schema.metric.c.status ]) self.assertEqual(metricObj.name, metricName) self.assertEqual(metricObj.datasource, "custom") self.assertEqual(metricObj.status, MetricStatus.UNMONITORED)
def testMonitorMetricWithUserInfo(self): """Test monitorMetric that includes an explicit userInfo property in metricSpec. """ metricName = "test-" + uuid.uuid1().hex userInfo = {"symbol": "test-user-info"} adapter = datasource_adapter_factory.createCustomDatasourceAdapter() g_log.info("Creating htmengine custom metric; name=%s", metricName) metricId = adapter.createMetric(metricName) self.addCleanup(adapter.deleteMetricByName, metricName) # Turn on monitoring modelSpec = { "datasource": "custom", "metricSpec": { "metric": metricName, "userInfo": userInfo } } adapter.monitorMetric(modelSpec) with self.engine.connect() as conn: metricObj = repository.getMetric(conn, metricId, fields=[ schema.metric.c.parameters, schema.metric.c.status, schema.metric.c.server ]) self.assertEqual(metricObj.status, MetricStatus.PENDING_DATA) self.assertEqual(json.loads(metricObj.parameters), modelSpec) self._validateModelSpec(json.loads(metricObj.parameters))
def testExportImportCompleteModelParams(self): metricName = "test-" + uuid.uuid1().hex adapter = datasource_adapter_factory.createCustomDatasourceAdapter() g_log.info("Creating htmengine custom metric; name=%s", metricName) metricId = adapter.createMetric(metricName) self.addCleanup(adapter.deleteMetricByName, metricName) # Add some data # NOTE: we discard the fractional part because it gets eliminated # in the database, and we will want to compare against retrieved # items later. now = datetime.datetime.utcnow().replace(microsecond=0) data = [(0, now - datetime.timedelta(minutes=5)), (100, now)] with self.engine.connect() as conn: repository.addMetricData(conn, metricId, data) fileName = "custom_datasource_adapter_test_model_config.json" with self._openTestDataFile(fileName) as modelConfigFile: modelConfig = json.load(modelConfigFile) # Turn on monitoring modelSpec = { "datasource": "custom", "metricSpec": { "metric": metricName }, "completeModelParams": { "modelConfig": modelConfig, "inferenceArgs": { "predictionSteps": [1], "predictedField": "bar", "inputPredictedField": "auto" }, "timestampFieldName": "foo", "valueFieldName": "bar" } } adapter.monitorMetric(modelSpec) def checkExportSpec(exportSpec): self.assertEqual(exportSpec["datasource"], modelSpec["datasource"]) self.assertEqual(exportSpec["metricSpec"], modelSpec["metricSpec"]) self.assertSequenceEqual(exportSpec["data"], data) # Export exportSpec = adapter.exportModel(metricId) checkExportSpec(exportSpec) # Delete metric adapter.deleteMetricByName(metricName) self.checkModelDeleted(metricId) # Import metricId = adapter.importModel( htmengine.utils.jsonDecode(htmengine.utils.jsonEncode(exportSpec))) with self.engine.connect() as conn: metricObj = repository.getMetric( conn, metricId, fields=[schema.metric.c.parameters, schema.metric.c.status]) self.assertIn(metricObj.status, (MetricStatus.CREATE_PENDING, MetricStatus.ACTIVE)) self.assertEqual(json.loads(metricObj.parameters), modelSpec) self._validateModelSpec(json.loads(metricObj.parameters)) # Export again exportSpec = adapter.exportModel(metricId) checkExportSpec(exportSpec)
def testPathwayToDynamoDB(self): """ Test metric data pathway to dynamodb """ metricName = "TEST." + "".join(random.sample(string.ascii_letters, 16)) nativeMetric = { "modelParams": { "minResolution": 0.2, "min": 0.0, "max": 10000.0, }, "datasource": "custom", "metricSpec": { "metric": metricName, "resource": "Test", "userInfo": { "symbol": "TEST", "metricType": "TwitterVolume", "metricTypeName": "Twitter Volume", } } } metricName = nativeMetric["metricSpec"]["metric"] instanceName = nativeMetric["metricSpec"]["resource"] userInfo = nativeMetric["metricSpec"]["userInfo"] now = datetime.datetime.utcnow().replace(minute=0, second=0, microsecond=0) data = [ (5000.0, now - datetime.timedelta(minutes=10)), (6000.0, now - datetime.timedelta(minutes=5)), (7000.0, now), ] # We'll be explicitly deleting the metric below, but we need to add a # cleanup step that runs in case there is some other failure that prevents # that part of the test from being reached. def gracefulDelete(): try: self._deleteMetric(metricName) except ObjectNotFoundError: pass self.addCleanup(gracefulDelete) # Add custom metric data sock = socket.socket() sock.connect(("localhost", self.plaintextPort)) for metricValue, ts in data: sock.sendall( "%s %r %s\n" % (metricName, metricValue, epochFromNaiveUTCDatetime(ts))) self.gracefullyCloseSocket(sock) uid = self.checkMetricCreated(metricName) # Save the uid for later LOGGER.info("Metric %s has uid: %s", metricName, uid) # Send model creation request model = self._createModel(nativeMetric) parameters = json.loads(model.parameters) self.assertEqual(parameters["metricSpec"]["userInfo"], userInfo) for _ in xrange(60): with self.engine.begin() as conn: metric = repository.getMetric(conn, uid) if metric.status == MetricStatus.ACTIVE: break LOGGER.info("Model=%s not ready. Sleeping 1 second...", uid) time.sleep(1) else: self.fail("Model results not available within 5 minutes") # Check that the data all got processed self.checkModelResultsSize(uid, 3) # Now check that the data was published to dynamodb... dynamodb = DynamoDBService.connectDynamoDB() metricTable = Table(MetricDynamoDBDefinition().tableName, connection=dynamodb) metricItem = metricTable.lookup(uid) self.assertEqual(metricItem["uid"], uid) self.assertEqual(metricItem["name"], metricName) self.assertEqual(metricItem["metricType"], "TwitterVolume") self.assertEqual(metricItem["metricTypeName"], "Twitter Volume") self.assertEqual(metricItem["symbol"], "TEST") metricDataTable = Table(MetricDataDynamoDBDefinition().tableName, connection=dynamodb) instanceDataAnomalyScores = {} for metricValue, ts in data: metricDataItem = _RETRY_ON_ITEM_NOT_FOUND_DYNAMODB_ERROR( metricDataTable.lookup)(uid, ts.isoformat()) # There is no server-side cleanup for metric data, so remove it here for # now to avoid accumulating test data self.addCleanup(metricDataItem.delete) self.assertEqual(metricValue, metricDataItem["metric_value"]) dt = datetime.datetime.strptime(metricDataItem["timestamp"], "%Y-%m-%dT%H:%M:%S") self.assertEqual(ts, dt) ts = ts.replace(minute=0, second=0, microsecond=0) date = ts.strftime("%Y-%m-%d") hour = ts.strftime("%H") key = (date, hour) maxVal = instanceDataAnomalyScores.get(key, 0.0) instanceDataAnomalyScores[key] = max( maxVal, metricDataItem["anomaly_score"]) # And check that the aggregated instance data is updated instanceDataHourlyTable = Table( InstanceDataHourlyDynamoDBDefinition().tableName, connection=dynamodb) for key, anomalyScore in instanceDataAnomalyScores.iteritems(): date, hour = key instanceDataHourlyItem = _RETRY_ON_ITEM_NOT_FOUND_DYNAMODB_ERROR( instanceDataHourlyTable.lookup)(instanceName, "%sT%s" % (date, hour)) self.addCleanup(instanceDataHourlyItem.delete) self.assertAlmostEqual( anomalyScore, float( instanceDataHourlyItem["anomaly_score"]["TwitterVolume"])) self.assertEqual(date, instanceDataHourlyItem["date"]) self.assertEqual(hour, instanceDataHourlyItem["hour"]) # Now send some twitter data and validate that it made it to dynamodb twitterData = [{ "metric_name": metricName, "tweet_uid": uid, "created_at": "2015-02-19T19:43:24.870109", "agg_ts": "2015-02-19T19:43:24.870118", "text": "Tweet text", "userid": "10", "username": "******", "retweet_count": "0" }] with MessageBusConnector() as messageBus: messageBus.publishExg( exchange=self.config.get("non_metric_data", "exchange_name"), routingKey=( self.config.get("non_metric_data", "exchange_name") + ".twitter"), body=json.dumps(twitterData)) metricTweetsTable = Table(MetricTweetsDynamoDBDefinition().tableName, connection=dynamodb) metricTweetItem = metricTweetsTable.lookup( "-".join((metricName, uid)), "2015-02-19T19:43:24.870118") # There is no server-side cleanup for tweet data, so remove it here for # now to avoid accumulating test data self.addCleanup(metricTweetItem.delete) self.assertEqual(metricTweetItem["username"], twitterData[0]["username"]) self.assertEqual(metricTweetItem["tweet_uid"], twitterData[0]["tweet_uid"]) self.assertEqual(metricTweetItem["created_at"], twitterData[0]["created_at"]) self.assertEqual(metricTweetItem["agg_ts"], twitterData[0]["agg_ts"]) self.assertEqual(metricTweetItem["text"], twitterData[0]["text"]) self.assertEqual(metricTweetItem["userid"], twitterData[0]["userid"]) self.assertEqual(metricTweetItem["username"], twitterData[0]["username"]) self.assertEqual(metricTweetItem["retweet_count"], twitterData[0]["retweet_count"]) queryResult = metricTweetsTable.query_2( metric_name__eq=metricName, agg_ts__eq=twitterData[0]["agg_ts"], index="taurus.metric_data-metric_name_index") queriedMetricTweetItem = next(queryResult) self.assertEqual(queriedMetricTweetItem["username"], twitterData[0]["username"]) self.assertEqual(queriedMetricTweetItem["tweet_uid"], twitterData[0]["tweet_uid"]) self.assertEqual(queriedMetricTweetItem["created_at"], twitterData[0]["created_at"]) self.assertEqual(queriedMetricTweetItem["agg_ts"], twitterData[0]["agg_ts"]) self.assertEqual(queriedMetricTweetItem["text"], twitterData[0]["text"]) self.assertEqual(queriedMetricTweetItem["userid"], twitterData[0]["userid"]) self.assertEqual(queriedMetricTweetItem["username"], twitterData[0]["username"]) self.assertEqual(queriedMetricTweetItem["retweet_count"], twitterData[0]["retweet_count"]) # Delete metric and ensure metric is deleted from dynamodb, too self._deleteMetric(metricName) for _ in xrange(60): time.sleep(1) try: metricItem = metricTable.lookup(uid) except ItemNotFound as err: break else: self.fail("Metric not deleted from dynamodb")
def testExportImportCompleteModelParams(self): metricName = "test-" + uuid.uuid1().hex adapter = datasource_adapter_factory.createCustomDatasourceAdapter() g_log.info("Creating htmengine custom metric; name=%s", metricName) metricId = adapter.createMetric(metricName) self.addCleanup(adapter.deleteMetricByName, metricName) # Add some data # NOTE: we discard the fractional part because it gets eliminated # in the database, and we will want to compare against retrieved # items later. now = datetime.datetime.utcnow().replace(microsecond=0) data = [ (0, now - datetime.timedelta(minutes=5)), (100, now) ] with self.engine.connect() as conn: repository.addMetricData(conn, metricId, data) fileName = "custom_datasource_adapter_test_model_config.json" with self._openTestDataFile(fileName) as modelConfigFile: modelConfig = json.load(modelConfigFile) # Turn on monitoring modelSpec = { "datasource": "custom", "metricSpec": { "metric": metricName }, "completeModelParams": { "modelConfig": modelConfig, "inferenceArgs": {"predictionSteps": [1], "predictedField": "bar", "inputPredictedField": "auto"}, "timestampFieldName": "foo", "valueFieldName": "bar" } } adapter.monitorMetric(modelSpec) def checkExportSpec(exportSpec): self.assertEqual(exportSpec["datasource"], modelSpec["datasource"]) self.assertEqual(exportSpec["metricSpec"], modelSpec["metricSpec"]) self.assertSequenceEqual(exportSpec["data"], data) # Export exportSpec = adapter.exportModel(metricId) checkExportSpec(exportSpec) # Delete metric adapter.deleteMetricByName(metricName) self.checkModelDeleted(metricId) # Import metricId = adapter.importModel( htmengine.utils.jsonDecode(htmengine.utils.jsonEncode(exportSpec))) with self.engine.connect() as conn: metricObj = repository.getMetric(conn, metricId, fields=[schema.metric.c.parameters, schema.metric.c.status]) self.assertIn(metricObj.status, (MetricStatus.CREATE_PENDING, MetricStatus.ACTIVE)) self.assertEqual(json.loads(metricObj.parameters), modelSpec) self._validateModelSpec(json.loads(metricObj.parameters)) # Export again exportSpec = adapter.exportModel(metricId) checkExportSpec(exportSpec)
def _startModelHelper(conn, metricObj, swarmParams, logger): """ Start the model :param conn: SQLAlchemy Connection object for executing SQL :type conn: sqlalchemy.engine.Connection :param metricObj: metric, freshly-loaded :type metricObj: sqlalchemy.engine.RowProxy (see repository.getMetric()) :param swarmParams: non-None swarmParams generated via scalar_metric_utils.generateSwarmParams(). :param logger: logger object :returns: True if model was started; False if not :raises htmengine.exceptions.ObjectNotFoundError: if the metric doesn't exist; this may happen if it got deleted by another process in the meantime. :raises htmengine.exceptions.MetricStatusChangedError: if Metric status was changed by someone else (most likely another process) before this operation could complete """ if swarmParams is None: raise ValueError( "startModel: 'swarmParams' must be non-None: metric=%s" % (metricObj.uid,)) if metricObj.status not in (MetricStatus.UNMONITORED, MetricStatus.PENDING_DATA): if metricObj.status in (MetricStatus.CREATE_PENDING, MetricStatus.ACTIVE): return False logger.error("Unexpected metric status; metric=%r", metricObj) raise ValueError("startModel: unexpected metric status; metric=%r" % (metricObj,)) startTime = time.time() # Save swarm parameters and update metric status refStatus = metricObj.status repository.updateMetricColumnsForRefStatus( conn, metricObj.uid, refStatus, {"status": MetricStatus.CREATE_PENDING, "model_params": htmengine.utils.jsonEncode(swarmParams)}) metricObj = repository.getMetric(conn, metricObj.uid, fields=[schema.metric.c.uid, schema.metric.c.status]) # refresh if metricObj.status != MetricStatus.CREATE_PENDING: raise app_exceptions.MetricStatusChangedError( "startModel: unable to start model=%s; " "metric status morphed from %s to %s" % (metricObj.uid, refStatus, metricObj.status,)) # Request to create the CLA model try: model_swapper_utils.createHTMModel(metricObj.uid, swarmParams) except Exception: logger.exception("startModel: createHTMModel failed.") repository.setMetricStatus(conn, metricObj.uid, status=MetricStatus.ERROR, message=repr(sys.exc_info()[1])) raise logger.info("startModel: started model=%r; duration=%.4fs", metricObj, time.time() - startTime) return True
def startMonitoring(conn, metricId, swarmParams, logger): """ Start monitoring an UNMONITORED metric. NOTE: typically called either inside a transaction and/or with locked tables Starts the CLA model if provided non-None swarmParams; otherwise defers model creation to a later time and places the metric in MetricStatus.PENDING_DATA state. :param conn: SQLAlchemy Connection object for executing SQL :type conn: sqlalchemy.engine.Connection :param metricId: unique identifier of the metric row :param swarmParams: swarmParams generated via scalar_metric_utils.generateSwarmParams() or None. :param logger: logger object :returns: True if model was started; False if not :raises htmengine.exceptions.ObjectNotFoundError: if metric with the referenced metric uid doesn't exist :raises htmengine.exceptions.MetricStatusChangedError: if Metric status was changed by someone else (most likely another process) before this operation could complete """ modelStarted = False startTime = time.time() metricObj = repository.getMetric(conn, metricId) assert metricObj.status == MetricStatus.UNMONITORED, ( "startMonitoring: metric=%s is already monitored; status=%s" % ( metricId, metricObj.status,)) if swarmParams is not None: # We have swarmParams, so start the model modelStarted = _startModelHelper(conn=conn, metricObj=metricObj, swarmParams=swarmParams, logger=logger) else: # Put the metric into the PENDING_DATA state until enough data arrives for # stats refStatus = metricObj.status repository.setMetricStatus(conn, metricId, MetricStatus.PENDING_DATA, refStatus=refStatus) # refresh metricStatus = repository.getMetric(conn, metricId, fields=[schema.metric.c.status]).status if metricStatus == MetricStatus.PENDING_DATA: logger.info("startMonitoring: promoted metric to model in PENDING_DATA; " "metric=%s; duration=%.4fs", metricId, time.time() - startTime) else: raise app_exceptions.MetricStatusChangedError( "startMonitoring: unable to promote metric=%s to model as " "PENDING_DATA; metric status morphed from %s to %s" % (metricId, refStatus, metricStatus,)) return modelStarted
def _startModelHelper(conn, metricObj, swarmParams, logger): """ Start the model :param conn: SQLAlchemy Connection object for executing SQL :type conn: sqlalchemy.engine.Connection :param metricObj: metric, freshly-loaded :type metricObj: sqlalchemy.engine.RowProxy (see repository.getMetric()) :param swarmParams: non-None swarmParams generated via scalar_metric_utils.generateSwarmParams(). :param logger: logger object :returns: True if model was started; False if not :raises htmengine.exceptions.ObjectNotFoundError: if the metric doesn't exist; this may happen if it got deleted by another process in the meantime. :raises htmengine.exceptions.MetricStatusChangedError: if Metric status was changed by someone else (most likely another process) before this operation could complete """ if swarmParams is None: raise ValueError( "startModel: 'swarmParams' must be non-None: metric=%s" % (metricObj.uid, )) if metricObj.status not in (MetricStatus.UNMONITORED, MetricStatus.PENDING_DATA): if metricObj.status in (MetricStatus.CREATE_PENDING, MetricStatus.ACTIVE): return False logger.error("Unexpected metric status; metric=%r", metricObj) raise ValueError("startModel: unexpected metric status; metric=%r" % (metricObj, )) startTime = time.time() # Save swarm parameters and update metric status refStatus = metricObj.status repository.updateMetricColumnsForRefStatus( conn, metricObj.uid, refStatus, { "status": MetricStatus.CREATE_PENDING, "model_params": htmengine.utils.jsonEncode(swarmParams) }) metricObj = repository.getMetric( conn, metricObj.uid, fields=[schema.metric.c.uid, schema.metric.c.status]) # refresh if metricObj.status != MetricStatus.CREATE_PENDING: raise app_exceptions.MetricStatusChangedError( "startModel: unable to start model=%s; " "metric status morphed from %s to %s" % ( metricObj.uid, refStatus, metricObj.status, )) # Request to create the CLA model try: model_swapper_utils.createHTMModel(metricObj.uid, swarmParams) except Exception: logger.exception("startModel: createHTMModel failed.") repository.setMetricStatus(conn, metricObj.uid, status=MetricStatus.ERROR, message=repr(sys.exc_info()[1])) raise logger.info("startModel: started model=%r; duration=%.4fs", metricObj, time.time() - startTime) return True
def testPathwayToDynamoDB(self): """ Test metric data pathway to dynamodb """ metricName = "TEST." + "".join(random.sample(string.ascii_letters, 16)) nativeMetric = { "modelParams": { "minResolution": 0.2, "min": 0.0, "max": 10000.0, }, "datasource": "custom", "metricSpec": { "metric": metricName, "resource": "Test", "userInfo": { "symbol": "TEST", "metricType": "TwitterVolume", "metricTypeName": "Twitter Volume", } } } metricName = nativeMetric["metricSpec"]["metric"] instanceName = nativeMetric["metricSpec"]["resource"] userInfo = nativeMetric["metricSpec"]["userInfo"] now = datetime.datetime.utcnow().replace(minute=0, second=0, microsecond=0) data = [ (5000.0, now - datetime.timedelta(minutes=10)), (6000.0, now - datetime.timedelta(minutes=5)), (7000.0, now), ] # We'll be explicitly deleting the metric below, but we need to add a # cleanup step that runs in case there is some other failure that prevents # that part of the test from being reached. def gracefulDelete(): try: self._deleteMetric(metricName) except ObjectNotFoundError: pass self.addCleanup(gracefulDelete) # Add custom metric data sock = socket.socket() sock.connect(("localhost", self.plaintextPort)) for metricValue, ts in data: sock.sendall("%s %r %s\n" % (metricName, metricValue, epochFromNaiveUTCDatetime(ts))) self.gracefullyCloseSocket(sock) uid = self.checkMetricCreated(metricName) # Save the uid for later LOGGER.info("Metric %s has uid: %s", metricName, uid) # Send model creation request model = self._createModel(nativeMetric) parameters = json.loads(model.parameters) self.assertEqual(parameters["metricSpec"]["userInfo"], userInfo) for _ in xrange(60): with self.engine.begin() as conn: metric = repository.getMetric(conn, uid) if metric.status == MetricStatus.ACTIVE: break LOGGER.info("Model=%s not ready. Sleeping 1 second...", uid) time.sleep(1) else: self.fail("Model results not available within 5 minutes") # Check that the data all got processed self.checkModelResultsSize(uid, 3) # Now check that the data was published to dynamodb... dynamodb = DynamoDBService.connectDynamoDB() metricTable = Table(MetricDynamoDBDefinition().tableName, connection=dynamodb) metricItem = metricTable.lookup(uid) self.assertEqual(metricItem["uid"], uid) self.assertEqual(metricItem["name"], metricName) self.assertEqual(metricItem["metricType"], "TwitterVolume") self.assertEqual(metricItem["metricTypeName"], "Twitter Volume") self.assertEqual(metricItem["symbol"], "TEST") metricDataTable = Table(MetricDataDynamoDBDefinition().tableName, connection=dynamodb) instanceDataAnomalyScores = {} for metricValue, ts in data: metricDataItem = _RETRY_ON_ITEM_NOT_FOUND_DYNAMODB_ERROR( metricDataTable.lookup )(uid, ts.isoformat()) # There is no server-side cleanup for metric data, so remove it here for # now to avoid accumulating test data self.addCleanup(metricDataItem.delete) self.assertEqual(metricValue, metricDataItem["metric_value"]) dt = datetime.datetime.strptime(metricDataItem["timestamp"], "%Y-%m-%dT%H:%M:%S") self.assertEqual(ts, dt) ts = ts.replace(minute=0, second=0, microsecond=0) date = ts.strftime("%Y-%m-%d") hour = ts.strftime("%H") key = (date, hour) maxVal = instanceDataAnomalyScores.get(key, 0.0) instanceDataAnomalyScores[key] = max( maxVal, metricDataItem["anomaly_score"]) # And check that the aggregated instance data is updated instanceDataHourlyTable = Table( InstanceDataHourlyDynamoDBDefinition().tableName, connection=dynamodb) for key, anomalyScore in instanceDataAnomalyScores.iteritems(): date, hour = key instanceDataHourlyItem = _RETRY_ON_ITEM_NOT_FOUND_DYNAMODB_ERROR( instanceDataHourlyTable.lookup )(instanceName, "%sT%s" % (date, hour)) self.addCleanup(instanceDataHourlyItem.delete) self.assertAlmostEqual( anomalyScore, float(instanceDataHourlyItem["anomaly_score"]["TwitterVolume"])) self.assertEqual(date, instanceDataHourlyItem["date"]) self.assertEqual(hour, instanceDataHourlyItem["hour"]) # Now send some twitter data and validate that it made it to dynamodb twitterData = [ { "metric_name": metricName, "tweet_uid": uid, "created_at": "2015-02-19T19:43:24.870109", "agg_ts": "2015-02-19T19:43:24.870118", "text": "Tweet text", "userid": "10", "username": "******", "retweet_count": "0" } ] with MessageBusConnector() as messageBus: messageBus.publishExg( exchange=self.config.get("non_metric_data", "exchange_name"), routingKey=( self.config.get("non_metric_data", "exchange_name") + ".twitter"), body=json.dumps(twitterData) ) metricTweetsTable = Table(MetricTweetsDynamoDBDefinition().tableName, connection=dynamodb) for _ in range(30): try: metricTweetItem = metricTweetsTable.lookup( twitterData[0]["text"], twitterData[0]["agg_ts"] ) break except ItemNotFound: # LOL eventual consistency time.sleep(1) continue # There is no server-side cleanup for tweet data, so remove it here for # now to avoid accumulating test data self.addCleanup(metricTweetItem.delete) self.assertEqual(metricTweetItem["username"], twitterData[0]["username"]) self.assertEqual(metricTweetItem["tweet_uid"], twitterData[0]["tweet_uid"]) self.assertEqual(metricTweetItem["created_at"], twitterData[0]["created_at"]) self.assertEqual(metricTweetItem["agg_ts"], twitterData[0]["agg_ts"]) self.assertEqual(metricTweetItem["text"], twitterData[0]["text"]) self.assertEqual(metricTweetItem["userid"], twitterData[0]["userid"]) self.assertEqual(metricTweetItem["username"], twitterData[0]["username"]) self.assertEqual(metricTweetItem["retweet_count"], twitterData[0]["retweet_count"]) self.assertEqual(metricTweetItem["copy_count"], 0) sort_key = twitterData[0]["agg_ts"] ts = (epochFromNaiveUTCDatetime( datetime.datetime.strptime(twitterData[0]["agg_ts"].partition(".")[0], "%Y-%m-%dT%H:%M:%S")) * 1e5) queryResult = metricTweetsTable.query_2( metric_name__eq=metricName, sort_key__gte=ts, index="taurus.metric_data-metric_name_index") queriedMetricTweetItem = next(queryResult) self.assertEqual(queriedMetricTweetItem["username"], twitterData[0]["username"]) self.assertEqual(queriedMetricTweetItem["tweet_uid"], twitterData[0]["tweet_uid"]) self.assertEqual(queriedMetricTweetItem["created_at"], twitterData[0]["created_at"]) self.assertEqual(queriedMetricTweetItem["agg_ts"], twitterData[0]["agg_ts"]) self.assertEqual(queriedMetricTweetItem["text"], twitterData[0]["text"]) self.assertEqual(queriedMetricTweetItem["userid"], twitterData[0]["userid"]) self.assertEqual(queriedMetricTweetItem["username"], twitterData[0]["username"]) self.assertEqual(queriedMetricTweetItem["retweet_count"], twitterData[0]["retweet_count"]) self.assertEqual(queriedMetricTweetItem["copy_count"], 0) self.assertEqual(queriedMetricTweetItem["sort_key"], ts) duplicatedTwitterData = [ { "metric_name": "copy of " + metricName, "tweet_uid": "copy of " + uid, "created_at": "2015-02-19T19:45:24.870109", "agg_ts": "2015-02-19T19:43:24.870118", # Same agg_ts! "text": "Tweet text", # Same text! "userid": "20", "username": "******", "retweet_count": "0" } ] with MessageBusConnector() as messageBus: messageBus.publishExg( exchange=self.config.get("non_metric_data", "exchange_name"), routingKey=( self.config.get("non_metric_data", "exchange_name") + ".twitter"), body=json.dumps(duplicatedTwitterData) ) for _ in range(30): metricTweetItem = metricTweetsTable.lookup( twitterData[0]["text"], twitterData[0]["agg_ts"] ) if metricTweetItem["copy_count"] != 1: time.sleep(1) continue # Assert same as original, except for copy_count, which should be 1 self.assertEqual(metricTweetItem["username"], twitterData[0]["username"]) self.assertEqual(metricTweetItem["tweet_uid"], twitterData[0]["tweet_uid"]) self.assertEqual(metricTweetItem["created_at"], twitterData[0]["created_at"]) self.assertEqual(metricTweetItem["agg_ts"], twitterData[0]["agg_ts"]) self.assertEqual(metricTweetItem["text"], twitterData[0]["text"]) self.assertEqual(metricTweetItem["userid"], twitterData[0]["userid"]) self.assertEqual(metricTweetItem["username"], twitterData[0]["username"]) self.assertEqual(metricTweetItem["retweet_count"], twitterData[0]["retweet_count"]) self.assertEqual(metricTweetItem["sort_key"], ts + 1) break else: self.fail("copy_count of original tweet not updated within reasonable" " amount of time (~30s) for duplicated tweet.") # Delete metric and ensure metric is deleted from dynamodb, too self._deleteMetric(metricName) for _ in xrange(60): time.sleep(1) try: metricItem = metricTable.lookup(uid) except ItemNotFound as err: break else: self.fail("Metric not deleted from dynamodb")
def _processModelInferenceResults(self, inferenceResults, metricID): """ Process a batch of model inference results Store the updated MetricData and anomaly likelihood parameters in the database. A row's anomaly_score value will be set to and remain at 0 in the first self._statisticsMinSampleSize rows; once we get enough inference results to create an anomaly likelihood model, anomaly_score will be computed on the subsequent rows. :param inferenceResults: a sequence of ModelInferenceResult instances in the processed order (ascending by timestamp) :param metricID: metric/model ID of the model that emitted the results :returns: None if the batch was rejected; otherwise a pair: (metric, metricDataRows) metric: Metric RowProxy instance corresponding to the given metricID metricDataRows: a sequence of MutableMetricDataRow instances corresponding to the updated metric_data rows. TODO: unit-test return value :rtype: None or tuple *NOTE:* the processing must be idempotent due to the "at least once" delivery semantics of the message bus *NOTE:* the performance goal is to minimize costly database access and avoid falling behind while processing model results, especially during the model's initial "catch-up" phase when large inference result batches are prevalent. """ engine = repository.engineFactory(config) # Validate model ID try: with engine.connect() as conn: metricObj = repository.getMetric(conn, metricID) except ObjectNotFoundError: # Ignore inferences for unknown models. Typically, this is is the result # of a deleted model. Another scenario where this might occur is when a # developer resets the db while there are result messages still on the # message bus. It would be an error if this were to occur in production # environment. self._log.warning("Received inference results for unknown model=%s; " "(model deleted?)", metricID, exc_info=True) return None # Reject the results if model is in non-ACTIVE state (e.g., if HTM Metric # was unmonitored after the results were generated) if metricObj.status != MetricStatus.ACTIVE: self._log.warning("Received inference results for a non-ACTIVE " "model=%s; metric=<%s>; (metric unmonitored?)", metricID, getMetricLogPrefix(metricObj)) return None # Load the MetricData instances corresponding to the results with engine.connect() as conn: metricDataRows = repository.getMetricData(conn, metricID, start=inferenceResults[0].rowID, stop=inferenceResults[-1].rowID) # metricDataRows must be mutable, as the data is massaged in # _scrubInferenceResultsAndInitMetricData() metricDataRows = list(metricDataRows) if not metricDataRows: self._log.error("Rejected inference result batch=[%s..%s] of model=%s " "due to no matching metric_data rows", inferenceResults[0].rowID, inferenceResults[-1].rowID, metricID) return None try: self._scrubInferenceResultsAndInitMetricData( engine=engine, inferenceResults=inferenceResults, metricDataRows=metricDataRows, metricObj=metricObj) except RejectedInferenceResultBatch as e: # TODO: unit-test self._log.error( "Rejected inference result batch=[%s..%s] corresponding to " "rows=[%s..%s] of model=%s due to error=%r", inferenceResults[0].rowID, inferenceResults[-1].rowID, metricDataRows[0].rowid, metricDataRows[-1].rowid, metricID, e) return None # Update anomaly scores based on the new results anomalyLikelihoodParams = ( self.likelihoodHelper.updateModelAnomalyScores( engine=engine, metricObj=metricObj, metricDataRows=metricDataRows)) # Update metric data rows with rescaled display values # NOTE: doing this outside the updateColumns loop to avoid holding row locks # any longer than necessary for metricData in metricDataRows: metricData.display_value = rescaleForDisplay( metricData.anomaly_score, active=(metricObj.status == MetricStatus.ACTIVE)) # Update database once via transaction! startTime = time.time() try: @retryOnTransientErrors def runSQL(engine): with engine.begin() as conn: for metricData in metricDataRows: fields = {"raw_anomaly_score": metricData.raw_anomaly_score, "anomaly_score": metricData.anomaly_score, "display_value": metricData.display_value, "multi_step_best_predictions": json.dumps(metricData.multi_step_best_predictions)} repository.updateMetricDataColumns(conn, metricData, fields) self._updateAnomalyLikelihoodParams( conn, metricObj.uid, metricObj.model_params, anomalyLikelihoodParams) runSQL(engine) except (ObjectNotFoundError, MetricNotActiveError): self._log.warning("Rejected inference result batch=[%s..%s] of model=%s", inferenceResults[0].rowID, inferenceResults[-1].rowID, metricID, exc_info=True) return None self._log.debug("Updated HTM metric_data rows=[%s..%s] " "of model=%s: duration=%ss", metricDataRows[0].rowid, metricDataRows[-1].rowid, metricID, time.time() - startTime) return (metricObj, metricDataRows,)
def _processModelInferenceResults(self, inferenceResults, metricID): """ Process a batch of model inference results Store the updated MetricData and anomaly likelihood parameters in the database. A row's anomaly_score value will be set to and remain at 0 in the first self._statisticsMinSampleSize rows; once we get enough inference results to create an anomaly likelyhood model, anomaly_score will be computed on the subsequent rows. :param inferenceResults: a sequence of ModelInferenceResult instances in the processed order (ascending by timestamp) :param metricID: metric/model ID of the model that emitted the results :returns: None if the batch was rejected; otherwise a pair: (metric, metricDataRows) metric: Metric RowProxy instance corresponding to the given metricID metricDataRows: a sequence of MutableMetricDataRow instances corresponding to the updated metric_data rows. TODO: unit-test return value :rtype: None or tuple *NOTE:* the processing must be idempotent due to the "at least once" delivery semantics of the message bus *NOTE:* the performance goal is to minimize costly database access and avoid falling behind while processing model results, especially during the model's initial "catch-up" phase when large inference result batches are prevalent. """ engine = repository.engineFactory(config) # Validate model ID try: with engine.connect() as conn: metricObj = repository.getMetric(conn, metricID) except ObjectNotFoundError: # Ignore inferences for unkonwn models. Typically, this is is the result # of a deleted model. Another scenario where this might occur is when a # developer resets db while there are result messages still on the # message bus. It would be an error if this were to occur in production # environment. self._log.warning( "Received inference results for unknown model=%s; " "(model deleted?)", metricID, exc_info=True) return None # Reject the results if model is in non-ACTIVE state (e.g., if HTM Metric # was unmonitored after the results were generated) if metricObj.status != MetricStatus.ACTIVE: self._log.warning( "Received inference results for a non-ACTIVE " "model=%s; metric=<%s>; (metric unmonitored?)", metricID, getMetricLogPrefix(metricObj)) return None # Load the MetricData instances corresponding to the results with engine.connect() as conn: metricDataRows = repository.getMetricData( conn, metricID, start=inferenceResults[0].rowID, stop=inferenceResults[-1].rowID) # metricDataRows must be mutable, as the data is massaged in # _scrubInferenceResultsAndInitMetricData() metricDataRows = list(metricDataRows) if not metricDataRows: self._log.error( "Rejected inference result batch=[%s..%s] of model=%s " "due to no matching metric_data rows", inferenceResults[0].rowID, inferenceResults[-1].rowID, metricID) return None try: self._scrubInferenceResultsAndInitMetricData( engine=engine, inferenceResults=inferenceResults, metricDataRows=metricDataRows, metricObj=metricObj) except RejectedInferenceResultBatch as e: # TODO: unit-test self._log.error( "Rejected inference result batch=[%s..%s] corresponding to " "rows=[%s..%s] of model=%s due to error=%r", inferenceResults[0].rowID, inferenceResults[-1].rowID, metricDataRows[0].rowid, metricDataRows[-1].rowid, metricID, e) return None # Update anomaly scores based on the new results anomalyLikelihoodParams = ( self.likelihoodHelper.updateModelAnomalyScores( engine=engine, metricObj=metricObj, metricDataRows=metricDataRows)) # Update metric data rows with rescaled display values # NOTE: doing this outside the updateColumns loop to avoid holding row locks # any longer than necessary for metricData in metricDataRows: metricData.display_value = rescaleForDisplay( metricData.anomaly_score, active=(metricObj.status == MetricStatus.ACTIVE)) # Update database once via transaction! startTime = time.time() try: @retryOnTransientErrors def runSQL(engine): with engine.begin() as conn: for metricData in metricDataRows: fields = { "raw_anomaly_score": metricData.raw_anomaly_score, "anomaly_score": metricData.anomaly_score, "display_value": metricData.display_value } repository.updateMetricDataColumns( conn, metricData, fields) self._updateAnomalyLikelihoodParams( conn, metricObj.uid, metricObj.model_params, anomalyLikelihoodParams) runSQL(engine) except (ObjectNotFoundError, MetricNotActiveError): self._log.warning( "Rejected inference result batch=[%s..%s] of model=%s", inferenceResults[0].rowID, inferenceResults[-1].rowID, metricID, exc_info=True) return None self._log.debug( "Updated HTM metric_data rows=[%s..%s] " "of model=%s: duration=%ss", metricDataRows[0].rowid, metricDataRows[-1].rowid, metricID, time.time() - startTime) return ( metricObj, metricDataRows, )
def startMonitoring(conn, metricId, swarmParams, logger): """ Start monitoring an UNMONITORED metric. NOTE: typically called either inside a transaction and/or with locked tables Starts the CLA model if provided non-None swarmParams; otherwise defers model creation to a later time and places the metric in MetricStatus.PENDING_DATA state. :param conn: SQLAlchemy Connection object for executing SQL :type conn: sqlalchemy.engine.Connection :param metricId: unique identifier of the metric row :param swarmParams: swarmParams generated via scalar_metric_utils.generateSwarmParams() or None. :param logger: logger object :returns: True if model was started; False if not :raises htmengine.exceptions.ObjectNotFoundError: if metric with the referenced metric uid doesn't exist :raises htmengine.exceptions.MetricStatusChangedError: if Metric status was changed by someone else (most likely another process) before this operation could complete """ modelStarted = False startTime = time.time() metricObj = repository.getMetric(conn, metricId) assert metricObj.status == MetricStatus.UNMONITORED, ( "startMonitoring: metric=%s is already monitored; status=%s" % ( metricId, metricObj.status, )) if swarmParams is not None: # We have swarmParams, so start the model modelStarted = _startModelHelper(conn=conn, metricObj=metricObj, swarmParams=swarmParams, logger=logger) else: # Put the metric into the PENDING_DATA state until enough data arrives for # stats refStatus = metricObj.status repository.setMetricStatus(conn, metricId, MetricStatus.PENDING_DATA, refStatus=refStatus) # refresh metricStatus = repository.getMetric(conn, metricId, fields=[schema.metric.c.status ]).status if metricStatus == MetricStatus.PENDING_DATA: logger.info( "startMonitoring: promoted metric to model in PENDING_DATA; " "metric=%s; duration=%.4fs", metricId, time.time() - startTime) else: raise app_exceptions.MetricStatusChangedError( "startMonitoring: unable to promote metric=%s to model as " "PENDING_DATA; metric status morphed from %s to %s" % ( metricId, refStatus, metricStatus, )) return modelStarted
def testExportImport(self): metricName = "test-" + uuid.uuid1().hex adapter = datasource_adapter_factory.createCustomDatasourceAdapter() g_log.info("Creating htmengine custom metric; name=%s", metricName) metricId = adapter.createMetric(metricName) self.addCleanup(adapter.deleteMetricByName, metricName) # Add some data # NOTE: we discard the fractional part because it gets eliminated # in the database, and we will want to compare against retrieved # items later. now = datetime.datetime.utcnow().replace(microsecond=0) data = [ (0, now - datetime.timedelta(minutes=5)), (100, now) ] with self.engine.connect() as conn: repository.addMetricData(conn, metricId, data) # Turn on monitoring modelSpec = { "datasource": "custom", "metricSpec": { "metric": metricName }, } adapter.monitorMetric(modelSpec) def checkExportSpec(exportSpec): self.assertEqual(exportSpec["datasource"], modelSpec["datasource"]) self.assertEqual(exportSpec["metricSpec"], modelSpec["metricSpec"]) self.assertSequenceEqual(exportSpec["data"], data) # Export exportSpec = adapter.exportModel(metricId) checkExportSpec(exportSpec) # Delete metric adapter.deleteMetricByName(metricName) self.checkModelDeleted(metricId) # Import metricId = adapter.importModel( htmengine.utils.jsonDecode(htmengine.utils.jsonEncode(exportSpec))) with self.engine.connect() as conn: metricObj = repository.getMetric(conn, metricId, fields=[schema.metric.c.parameters, schema.metric.c.status]) self.assertEqual(metricObj.status, MetricStatus.PENDING_DATA) self.assertEqual(json.loads(metricObj.parameters), modelSpec) self._validateModelSpec(json.loads(metricObj.parameters)) # Export again exportSpec = adapter.exportModel(metricId) checkExportSpec(exportSpec)