def testImportModel(self): adapter = datasource_adapter_factory.createAutostackDatasourceAdapter() autostack = adapter.createAutostack(self.stackSpec) modelSpec = self.getModelSpec("cloudwatch", "CPUUtilization", autostack) modelId = adapter.monitorMetric(modelSpec) spec = adapter.exportModel(modelId) adapter.unmonitorMetric(modelId) modelId = adapter.importModel(spec) self.validateModel(modelId, modelSpec, autostack) with self.engine.connect() as conn: metrics = repository.getAutostackMetrics(conn, autostack.uid) self.assertEqual(len([metricObj for metricObj in metrics]), 1) # Ensure that import can create an autostack if it doesn't exist repository.deleteAutostack(conn, autostack.uid) adapter = datasource_adapter_factory.createAutostackDatasourceAdapter() modelId = adapter.importModel(spec) newModelSpec = dict(modelSpec) with self.engine.connect() as conn: repository.getMetric(conn, modelId) autostack = repository.getAutostackFromMetric(conn, modelId) self.addCleanup(self._deleteAutostack, autostack.uid) newModelSpec["metricSpec"]["autostackId"] = autostack.uid self.validateModel(modelId, modelSpec, autostack)
def _addAutostackMetric(self, conn, autostackObj, name=None, **kwargs): name = name or "AWS/EC2/CPUUtilization" modelSpec = {"modelParams": {}, "datasource": "autostack", "metricSpec": {"slaveDatasource": "cloudwatch" if name.startswith("AWS/EC2") else "autostack", "slaveMetric": {"metric": name, "namespace": "AWS/EC2"}, "autostackId": autostackObj.uid}} metricDict = repository.addMetric( conn, datasource="autostack", name=name, description=("{0} on YOMP Autostack {1} in {2} " "region").format(name, autostackObj.name, autostackObj.region), server="Autostacks/{0}".format(autostackObj.uid), location=autostackObj.region, tag_name=name, parameters=htmengine.utils.jsonEncode(modelSpec), poll_interval=300, status=MetricStatus.UNMONITORED) metricObj = repository.getMetric(conn, metricDict["uid"]) repository.addMetricToAutostack(conn, autostackObj.uid, metricObj.uid) metricObj = type("MutableMetric", (object,), dict(metricObj.items()))() return metricObj
def deleteModel(metricId): try: with web.ctx.connFactory() as conn: metricRow = repository.getMetric(conn, metricId) except app_exceptions.ObjectNotFoundError: raise web.notfound("ObjectNotFoundError Metric not found: Metric ID: %s" % metricId) if metricRow.datasource == "autostack": raise NotAllowedResponse( {"result": ("Not a standalone model=%s; datasource=%s. Unable" " to DELETE from this endpoint") % (metricId, metricRow.datasource,) }) log.debug("Deleting model for %s metric=%s", metricRow.datasource, metricId) with web.ctx.connFactory() as conn: repository.deleteModel(conn, metricId) # NOTE: this is the new way using datasource adapters try: createDatasourceAdapter(metricRow.datasource).unmonitorMetric(metricId) except app_exceptions.ObjectNotFoundError: raise web.notfound( "ObjectNotFoundError Metric not found: Metric ID: %s" % (metricId,)) return utils.jsonEncode({'result': 'success'})
def testMetricDataForRandomRowID(uid): ''' This tests if the metric data returned by the GET call : _models/<uid>/data has anomaly_score consistent with what is there in the actual database by asserting it against a dao.MetricData.get() call It repeats the process for 5 random sample rows for each uid in the database. Algorithm : - Query the MetricDataHandler GET call for a certain uid - Check if response is OK - Find the last row id for the uid - Select a random row between 1 and last row id - Find the anomaly score for that row id - Assert on the anomaly score ''' response = self.app.get("/%s/data" %uid, headers=self.headers) assertions.assertSuccess(self, response) getAllModelsResult = utils.jsonDecode(response.body) with repository.engineFactory().connect() as conn: lastRowID = repository.getMetric(conn, uid).last_rowid for _ in range(5): randomRowID = randrange(1, lastRowID) with repository.engineFactory().connect() as conn: singleMetricData = repository.getMetricData( conn, uid, rowid=randomRowID).first() metricData = getMetricDataWithRowID(getAllModelsResult['data'], randomRowID) self.assertEqual(metricData[2], singleMetricData.anomaly_score) self.assertEqual(datetime.strptime(metricData[0], '%Y-%m-%d %H:%M:%S'), singleMetricData.timestamp)
def checkModelIsActive(self, uid): engine = repository.engineFactory() with engine.begin() as conn: metricObj = repository.getMetric(conn, uid, fields=[schema.metric.c.status]) self.assertEqual(metricObj.status, MetricStatus.ACTIVE)
def getModel(metricId): try: with web.ctx.connFactory() as conn: metric = repository.getMetric(conn, metricId, getMetricDisplayFields(conn)) return metric except app_exceptions.ObjectNotFoundError: raise web.notfound("ObjectNotFoundError Metric not found: Metric ID: %s" % metricId)
def tearDownClass(cls): try: engine = repository.engineFactory() with engine.connect() as conn: repository.deleteMetric(conn, cls.uid) with engine.connect() as conn: _ = repository.getMetric(conn, cls.uid) except ObjectNotFoundError: g_logger.info("Successful clean-up") else: g_logger.error("Test failed to delete metric=%s", cls.uid)
def checkMetricUnmonitoredById(self, uid): engine = repository.engineFactory() with engine.begin() as conn: metricObj = repository.getMetric(conn, uid, fields=[schema.metric.c.status, schema.metric.c.parameters]) self.assertEqual(metricObj.status, MetricStatus.UNMONITORED) self.assertIsNone(metricObj.parameters) with self.assertRaises(model_checkpoint_mgr.ModelNotFound): model_checkpoint_mgr.ModelCheckpointMgr().loadModelDefinition(uid)
def checkMetricUnmonitoredById(self, uid): engine = repository.engineFactory() with engine.begin() as conn: metricObj = repository.getMetric( conn, uid, fields=[schema.metric.c.status, schema.metric.c.parameters]) self.assertEqual(metricObj.status, MetricStatus.UNMONITORED) self.assertIsNone(metricObj.parameters) with self.assertRaises(model_checkpoint_mgr.ModelNotFound): model_checkpoint_mgr.ModelCheckpointMgr().loadModelDefinition(uid)
def validateModel(self, modelId, modelSpec, autostack): self.assertIsNotNone(modelId) with self.engine.connect() as conn: metricObj = repository.getMetric(conn, modelId, fields=[schema.metric.c.status, schema.metric.c.parameters]) self.assertIn(metricObj.status, [MetricStatus.CREATE_PENDING, MetricStatus.ACTIVE]) self.assertEqual(json.loads(metricObj.parameters), modelSpec) self.assertEqual(repository.getAutostackFromMetric(conn, modelId).uid, autostack.uid)
def validateModel(self, modelId, modelSpec, autostack): self.assertIsNotNone(modelId) with self.engine.connect() as conn: metricObj = repository.getMetric( conn, modelId, fields=[schema.metric.c.status, schema.metric.c.parameters]) self.assertIn(metricObj.status, [MetricStatus.CREATE_PENDING, MetricStatus.ACTIVE]) self.assertEqual(json.loads(metricObj.parameters), modelSpec) self.assertEqual( repository.getAutostackFromMetric(conn, modelId).uid, autostack.uid)
def GET(self, metricId=None): """ Returns a dict sufficient for importing a new model from scratch """ try: if metricId is not None: try: with web.ctx.connFactory() as conn: metricRow = repository.getMetric(conn, metricId, fields=[schema.metric.c.uid, schema.metric.c.datasource]) nativeMetrics = [self._exportNativeMetric(metricRow)] except app_exceptions.ObjectNotFoundError: raise web.notfound("ObjectNotFoundError Metric not found: " "Metric ID: %s" % metricId) else: with web.ctx.connFactory() as conn: metricRowList = repository.getAllModels(conn) if metricRowList: nativeMetrics = [self._exportNativeMetric(metricRow) for metricRow in metricRowList] else: nativeMetrics = [] self.addStandardHeaders() web.header("Content-Description", "YOMP Export") web.header("Expires", "0") web.header("Cache-Control", "must-revalidate, post-check=0, pre-check=0") data = web.input(filename=None) if data.filename: web.header("Content-Disposition", "attachment;filename=%s" % ( data.filename)) returned = utils.jsonEncode(nativeMetrics) web.header("Content-length", len(returned)) return returned except web.HTTPError as ex: log.info(str(ex) or repr(ex)) raise ex except Exception as ex: log.exception("GET Failed") raise web.internalerror(str(ex) or repr(ex))
def checkEncoderResolution(self, uid, minVal, maxVal): """Check that encoder resolution is computed correctly.""" engine = repository.engineFactory() with engine.begin() as conn: metricObj = repository.getMetric(conn, uid, fields=[schema.metric.c.name, schema.metric.c.model_params]) modelParams = json.loads(metricObj.model_params) self.assertNotEqual(modelParams, None, "No model exists for metric %s" % metricObj.name) sensorParams = modelParams["modelConfig"]["modelParams"]["sensorParams"] encoderParams = sensorParams["encoders"]["c1"] # Estimate and check the bounds for the resolution based on min and max lower = (maxVal - minVal) / 300.0 upper = (maxVal - minVal) / 80.0 self.assertGreater(encoderParams["resolution"], lower) self.assertLess(encoderParams["resolution"], upper)
def unmonitorMetric(self, metricId): """ Unmonitor a metric :param metricId: unique identifier of the metric row :raises YOMP.app.exceptions.ObjectNotFoundError: if metric with the referenced metric uid doesn't exist """ with self.connectionFactory() as conn: metricObj = repository.getMetric(conn, metricId) # Delete the metric from the database repository.retryOnTransientErrors(repository.deleteMetric)( conn, metricId) # Send request to delete CLA model model_swapper_utils.deleteHTMModel(metricId) self._log.info("Autostack Metric unmonitored: metric=%r", metricObj)
def unmonitorMetric(self, metricId): """ Unmonitor a metric :param metricId: unique identifier of the metric row :raises YOMP.app.exceptions.ObjectNotFoundError: if metric with the referenced metric uid doesn't exist """ with self.connectionFactory() as conn: metricObj = repository.getMetric(conn, metricId) # Delete the metric from the database repository.retryOnTransientErrors(repository.deleteMetric)(conn, metricId) # Send request to delete CLA model model_swapper_utils.deleteHTMModel(metricId) self._log.info("Autostack Metric unmonitored: metric=%r", metricObj)
def _runBasicChecksOnModel(self, modelId, _adapter, modelSpec): with repository.engineFactory().connect() as conn: metricObj = repository.getMetric(conn, modelId) _LOG.info( "Making sure metric is CREATE_PENDING or ACTIVE or PENDING_DATA") self.assertIn(metricObj.status, [ MetricStatus.CREATE_PENDING, MetricStatus.ACTIVE, MetricStatus.PENDING_DATA ]) _LOG.info("Checking modelSpec") self.assertEqual(jsonDecode(metricObj.parameters), modelSpec) _LOG.info("Waiting for model to become active") self.checkModelIsActive(modelId) _LOG.info("Waiting at least one model result") self.checkModelResultsSize(modelId, 1, atLeast=True)
def _runBasicChecksOnModel(self, modelId, _adapter, modelSpec): with repository.engineFactory().connect() as conn: metricObj = repository.getMetric(conn, modelId) _LOG.info("Making sure metric is CREATE_PENDING or ACTIVE or PENDING_DATA") self.assertIn( metricObj.status, [MetricStatus.CREATE_PENDING, MetricStatus.ACTIVE, MetricStatus.PENDING_DATA]) _LOG.info("Checking modelSpec") self.assertEqual(jsonDecode(metricObj.parameters), modelSpec) _LOG.info("Waiting for model to become active") self.checkModelIsActive(modelId) _LOG.info("Waiting at least one model result") self.checkModelResultsSize(modelId, 1, atLeast=True)
def _runBasicChecksOnModel(self, modelId, _adapter, modelSpec): with self.connFactory() as conn: metricObj = repository.getMetric( conn, modelId, fields=[schema.metric.c.status, schema.metric.c.parameters]) _LOG.info("Making sure metric is CREATE_PENDING or ACTIVE") self.assertIn(metricObj.status, [MetricStatus.CREATE_PENDING, MetricStatus.ACTIVE]) _LOG.info("Checking modelSpec") self.assertEqual(json.loads(metricObj.parameters), modelSpec) _LOG.info("Waiting for model to become active") self.checkModelIsActive(modelId) _LOG.info("Waiting at least one model result") self.checkModelResultsSize(modelId, 1, atLeast=True)
def checkEncoderResolution(self, uid, minVal, maxVal): """Check that encoder resolution is computed correctly.""" engine = repository.engineFactory() with engine.begin() as conn: metricObj = repository.getMetric( conn, uid, fields=[schema.metric.c.name, schema.metric.c.model_params]) modelParams = json.loads(metricObj.model_params) self.assertNotEqual(modelParams, None, "No model exists for metric %s" % metricObj.name) sensorParams = modelParams["modelConfig"]["modelParams"][ "sensorParams"] encoderParams = sensorParams["encoders"]["c1"] # Estimate and check the bounds for the resolution based on min and max lower = (maxVal - minVal) / 300.0 upper = (maxVal - minVal) / 80.0 self.assertGreater(encoderParams["resolution"], lower) self.assertLess(encoderParams["resolution"], upper)
def activateModel(self, metricId): """ Start a model that is PENDING_DATA, creating the OPF/CLA model NOTE: used by MetricStreamer when model is in PENDING_DATA state and sufficient data samples are available to get statistics and complete model creation. :param metricId: unique identifier of the metric row :raises YOMP.app.exceptions.ObjectNotFoundError: if metric with the referenced metric uid doesn't exist :raises YOMP.app.exceptions.MetricStatisticsNotReadyError: """ with self.connectionFactory() as conn: # TODO: This function is identical to custom metric activateModel() metricObj = repository.getMetric(conn, metricId, fields=[schema.metric.c.datasource, schema.metric.c.parameters]) if metricObj.datasource != self._DATASOURCE: raise TypeError("activateModel: not a cloudwatch metric=%r" % (metricObj,)) if metricObj.parameters: parameters = htmengine.utils.jsonDecode(metricObj.parameters) else: parameters = {} stats = self._getMetricStatistics(parameters["metricSpec"]) self._log.info("activateModel: metric=%s, stats=%r", metricId, stats) swarmParams = scalar_metric_utils.generateSwarmParams(stats) scalar_metric_utils.startModel(metricId, swarmParams=swarmParams, logger=self._log)
def _addAutostackMetric(self, conn, autostackObj, name=None, **kwargs): name = name or "AWS/EC2/CPUUtilization" modelSpec = { "modelParams": {}, "datasource": "autostack", "metricSpec": { "slaveDatasource": "cloudwatch" if name.startswith("AWS/EC2") else "autostack", "slaveMetric": { "metric": name, "namespace": "AWS/EC2" }, "autostackId": autostackObj.uid } } metricDict = repository.addMetric( conn, datasource="autostack", name=name, description=("{0} on YOMP Autostack {1} in {2} " "region").format(name, autostackObj.name, autostackObj.region), server="Autostacks/{0}".format(autostackObj.uid), location=autostackObj.region, tag_name=name, parameters=htmengine.utils.jsonEncode(modelSpec), poll_interval=300, status=MetricStatus.UNMONITORED) metricObj = repository.getMetric(conn, metricDict["uid"]) repository.addMetricToAutostack(conn, autostackObj.uid, metricObj.uid) metricObj = type("MutableMetric", (object, ), dict(metricObj.items()))() return metricObj
def _runBasicChecksOnModel(self, modelId, _adapter, modelSpec): with self.connFactory() as conn: metricObj = repository.getMetric(conn, modelId, fields=[schema.metric.c.status, schema.metric.c.parameters]) _LOG.info("Making sure metric is CREATE_PENDING or ACTIVE") self.assertIn( metricObj.status, [MetricStatus.CREATE_PENDING, MetricStatus.ACTIVE]) _LOG.info("Checking modelSpec") self.assertEqual(json.loads(metricObj.parameters), modelSpec) _LOG.info("Waiting for model to become active") self.checkModelIsActive(modelId) _LOG.info("Waiting at least one model result") self.checkModelResultsSize(modelId, 1, atLeast=True)
def setUpClass(cls): """ Setup steps for all test cases. Focus for these is to cover all API checks for ModelDataHandler. Hence, this does all setup creating metric, waiting for metricData across all testcases, all API call for querying metricData will be against single metric created in setup Setup Process 1) Update conf with aws credentials, ManagedTempRepository will not work in this test 2) Select test instance such that its running from longer time, We are using instance older than 15 days 3) Create Metric, wait for min metricData rows to become available Set to 100, configurable 4) Pick testRowId, set it lower value this will make sure to have Non NULL value for anomaly_score field for given row while invoking GET with consitions, set to 5 5) Decide queryParams for anomalyScore, to and from timestamp """ cls.headers = getDefaultHTTPHeaders(YOMP.app.config) # All other sevices needs AWS credentials to work # Set AWS credentials YOMP.app.config.loadConfig() # Select test instance such that its running from longer time g_logger.info("Getting long-running EC2 Instances") instances = aws_utils.getLongRunningEC2Instances( "us-west-2", YOMP.app.config.get("aws", "aws_access_key_id"), YOMP.app.config.get("aws", "aws_secret_access_key"), 15, ) testInstance = instances[randrange(1, len(instances))] createModelData = { "region": "us-west-2", "namespace": "AWS/EC2", "datasource": "cloudwatch", "metric": "CPUUtilization", "dimensions": {"InstanceId": testInstance.id}, } # Number of minimum rows cls.minDataRows = 100 cls.app = TestApp(models_api.app.wsgifunc()) # create test metric g_logger.info("Creating test metric; modelSpec=%s", createModelData) response = cls.app.put("/", utils.jsonEncode(createModelData), headers=cls.headers) postResult = utils.jsonDecode(response.body) maxWaitTime = 600 waitTimeMetricData = 0 waitAnomalyScore = 0 # Wait for enough metric data to be available cls.uid = postResult[0]["uid"] engine = repository.engineFactory() with engine.connect() as conn: cls.metricData = [row for row in repository.getMetricData(conn, cls.uid)] with engine.connect() as conn: cls.testMetric = repository.getMetric(conn, cls.uid) # Confirm that we have enough metricData g_logger.info("Waiting for metric data") while len(cls.metricData) < cls.minDataRows and waitTimeMetricData < maxWaitTime: g_logger.info("not ready, waiting for metric data: got %d of %d ...", len(cls.metricData), cls.minDataRows) time.sleep(5) waitTimeMetricData += 5 with engine.connect() as conn: cls.metricData = [row for row in repository.getMetricData(conn, cls.uid)] # taking lower value for testRowId, this will make sure to have # Non NULL value for anomaly_score field for given row cls.testRowId = 5 with engine.connect() as conn: cls.testMetricRow = repository.getMetricData(conn, cls.uid, rowid=cls.testRowId).fetchone() # Make sure we did not receive None etc for anomaly score g_logger.info("cls.testMetricRow.anomaly_score=%r", cls.testMetricRow.anomaly_score) g_logger.info("waitAnomalyScore=%r", waitAnomalyScore) while cls.testMetricRow.anomaly_score is None and waitAnomalyScore < maxWaitTime: g_logger.info("anomaly_score not ready, sleeping...") time.sleep(5) waitAnomalyScore += 5 with engine.connect() as conn: cls.testMetricRow = repository.getMetricData(conn, cls.uid, rowid=cls.testRowId).fetchone() # Decide queryParams for anomalyScore, to and from timestamp cls.testAnomalyScore = cls.testMetricRow.anomaly_score cls.testTimeStamp = cls.testMetricRow.timestamp
def exportModel(self, metricId): """ Export the given model. :param metricId: datasource-specific unique metric identifier :returns: Model-export specification for the Autostack model :rtype: dict :: { "datasource": "autostack", "stackSpec": { "name": "all_web_servers", # Autostack name "aggSpec": { # aggregation spec "datasource": "cloudwatch", "region": "us-west-2", "resourceType": "AWS::EC2::Instance" "filters": { # resourceType-specific filter "tag:Name":["*test*", "*YOMP*"], "tag:Description":["Blah", "foo"] }, } }, "modelSpec": { "datasource": "autostack", "metricSpec": { "slaveDatasource": "cloudwatch", "slaveMetric": { # specific to slaveDatasource "namespace": "AWS/EC2", "metric": "CPUUtilization" }, "period": 300 # aggregation period; seconds }, "modelParams": { # optional; specific to slave metric "min": 0, # optional "max": 100 # optional } } } """ with self.connectionFactory() as conn: spec = {} spec["datasource"] = self._DATASOURCE metricObj = repository.getMetric(conn, metricId, fields=[schema.metric.c.parameters]) autostackObj = repository.getAutostackFromMetric(conn, metricId) parameters = htmengine.utils.jsonDecode(metricObj.parameters) spec["modelSpec"] = parameters modelSpec = spec["modelSpec"] metricSpec = modelSpec["metricSpec"] del metricSpec["autostackId"] spec["stackSpec"] = {} stackSpec = spec["stackSpec"] stackSpec["name"] = autostackObj.name # Only supporting cloudwatch / EC2 for now stackSpec["aggSpec"] = {} aggSpec = stackSpec["aggSpec"] aggSpec["datasource"] = "cloudwatch" aggSpec["region"] = autostackObj.region aggSpec["resourceType"] = "AWS::EC2::Instance" aggSpec["filters"] = htmengine.utils.jsonDecode(autostackObj.filters) return spec
def POST(self, autostackId, data=None): # pylint: disable=C0103,R0201 """ Create one or more Autostack Metric(s) :: POST /_autostacks/{autostackId}/metrics [ { "namespace": "AWS/EC2", "metric": "CPUUtilization" }, ... ] Request body is a list of items, each of which are a subset of the standard cloudwatch native metric, specifying only: :param namespace: AWS Namespace :type namespace: str :param metric: AWS Metric name :type str: `datasource`, `region`, and `dimensions` normally required when creating models are not necessary. """ try: self.addStandardHeaders() with web.ctx.connFactory() as conn: autostackRow = repository.getAutostack(conn, autostackId) data = data or utils.jsonDecode(web.data()) for nativeMetric in data: try: if nativeMetric["namespace"] == "Autostacks": slaveDatasource = "autostack" else: slaveDatasource = "cloudwatch" # only support cloudwatch for now modelParams = {} if "min" and "max" in nativeMetric: modelParams["min"] = nativeMetric["min"] modelParams["max"] = nativeMetric["max"] modelSpec = { "datasource": "autostack", "metricSpec": { "autostackId": autostackRow.uid, "slaveDatasource": slaveDatasource, "slaveMetric": nativeMetric }, "modelParams": modelParams } metricId = (createAutostackDatasourceAdapter() .monitorMetric(modelSpec)) with web.ctx.connFactory() as conn: metricRow = repository.getMetric(conn, metricId) metricDict = convertMetricRowToMetricDict(metricRow) except KeyError: raise web.badrequest("Missing details in request") except ValueError: response = {"result": "failure"} raise web.badrequest(utils.jsonEncode(response)) response = {"result": "success", "metric": metricDict} raise web.created(utils.jsonEncode(response)) except ObjectNotFoundError: raise web.notfound("Autostack not found: Autostack ID: %s" % autostackId) except (web.HTTPError) as ex: if bool(re.match(r"([45][0-9][0-9])\s?", web.ctx.status)): # Log 400-599 status codes as errors, ignoring 200-399 log.error(str(ex) or repr(ex)) raise except Exception as ex: log.exception("POST Failed") raise web.internalerror(str(ex) or repr(ex))
def testModelInferencesLifeCycle(self): startTime = time() for model in sorted(self.data): #create a model; post is forwarded to put print "Creating metric for %s : " % model response = self.app.put("/", json.dumps(model), headers=self.headers) assertions.assertSuccess(self, response, code=201) response = self.app.get("/", headers=self.headers) assertions.assertSuccess(self, response) getAllModelsResult = utils.jsonDecode(response.body) totalMetricCount = len(getAllModelsResult) self.assertEqual(totalMetricCount, len(self.data)) #Get the uids of all the metrics created. uids = [metric['uid'] for metric in getAllModelsResult] while True: with repository.engineFactory().connect() as conn: initialModelCount = conn.execute( sql.select([sql.func.count()], from_obj=schema.metric_data) .where(schema.metric_data.c.rowid == 1)).scalar() if initialModelCount == totalMetricCount: print "Done creating all the initial models." break # Exit the test with some non-zero status if the test has run for more # than 20 minutes to just create the initial models. # Should not take more than that. currentElapsedTime = (time() - startTime) / 60 print "Current elapsed time %s" % currentElapsedTime if currentElapsedTime > 20: print "More than 20 minutes has elapsed. Timing out." sys.exit(42) print "%s initial models created." % initialModelCount print "Creating initial models for rest of the %s metrics" \ "..." % (totalMetricCount - initialModelCount) sleep(60) #Sleep for a long time. minutes = 15 print "Sleeping for %s minutes to let things settled down." % minutes while minutes > 0: print "Resume in %s minutes." % minutes minutes -= 1 sleep(60) modelCreationDuration = (time() - startTime) / 60 with repository.engineFactory().connect() as conn: lastRowIds = {uid: repository.getMetric(conn, uid).last_rowid for uid in uids} modelInferenceWithNonNullAnomalyScore = [] modelIds = lastRowIds.keys() while True: print set(modelInferenceWithNonNullAnomalyScore) if len(modelIds) == len(set(modelInferenceWithNonNullAnomalyScore)): print "Model inferences created for last_rowids for all the models." break for uid in modelIds: with repository.engineFactory().connect() as conn: anomalyNullCount = conn.execute( sql.select([sql.func.count()], from_obj=schema.metric_data) .where(schema.metric_data.c.rowid == lastRowIds[uid]) .where(schema.metric_data.c.uid == uid) .where(schema.metric_data.c.anomaly_score == None)).scalar() print "Model (%s) - Last Row ID (%s) : %s" \ % (uid, lastRowIds[uid], anomalyNullCount) if anomalyNullCount == 0: modelInferenceWithNonNullAnomalyScore.append(uid) # Exit the test with some non-zero status if the test has run for more # than 2 hours currentElapsedTime = (time() - startTime) / 60 print "Current elapsed time %s" % currentElapsedTime if currentElapsedTime > 120: print "More than 2 hours has elapsed. Timing out." sys.exit(42) print "Going back to sleep for 60s..." sleep(60) self.assertEqual(anomalyNullCount, 0) timeToCalculateAllInferences = time() def getMetricDataWithRowID(metricDataList, rowid): ''' Helper method to get the metric data of the nth row for a certain uid ''' for metricData in metricDataList: if metricData[3] == rowid: return metricData def testMetricDataForRandomRowID(uid): ''' This tests if the metric data returned by the GET call : _models/<uid>/data has anomaly_score consistent with what is there in the actual database by asserting it against a dao.MetricData.get() call It repeats the process for 5 random sample rows for each uid in the database. Algorithm : - Query the MetricDataHandler GET call for a certain uid - Check if response is OK - Find the last row id for the uid - Select a random row between 1 and last row id - Find the anomaly score for that row id - Assert on the anomaly score ''' response = self.app.get("/%s/data" %uid, headers=self.headers) assertions.assertSuccess(self, response) getAllModelsResult = utils.jsonDecode(response.body) with repository.engineFactory().connect() as conn: lastRowID = repository.getMetric(conn, uid).last_rowid for _ in range(5): randomRowID = randrange(1, lastRowID) with repository.engineFactory().connect() as conn: singleMetricData = repository.getMetricData( conn, uid, rowid=randomRowID).first() metricData = getMetricDataWithRowID(getAllModelsResult['data'], randomRowID) self.assertEqual(metricData[2], singleMetricData.anomaly_score) self.assertEqual(datetime.strptime(metricData[0], '%Y-%m-%d %H:%M:%S'), singleMetricData.timestamp) map(testMetricDataForRandomRowID, uids) def testMetricDataAnomalyAsQueryParams(uid): ''' This test makes MetricDataHandler GET calls with anomaly param : _models/<uid>/data?anomaly=<> ''' queryString = ("SELECT * FROM metric_data WHERE uid='%s' " " and abs(anomaly_score - 0) > 1e-5 LIMIT 1") % uid with repository.engineFactory().connect() as conn: sampleMetricData = conn.execute(queryString).first() anomalyScore = sampleMetricData.anomaly_score response = self.app.get("/%s/data?anomaly=%s" % (uid, anomalyScore), headers=self.headers) assertions.assertSuccess(self, response) getAllModelsResult = utils.jsonDecode(response.body) for metricData in getAllModelsResult['data']: self.assertGreaterEqual(metricData[2], anomalyScore) map(testMetricDataAnomalyAsQueryParams, uids) def testMetricDataTimeStampQueryParams(uid): ''' This test makes MetricDataHandler GET calls with from and to params : _models/<uid>/data?from=<>&to=<> ''' with repository.engineFactory().connect() as conn: firstMetricData = conn.execute( sql.select([schema.metric_data]) .where(schema.metric_data.c.uid == uid) .order_by(sql.expression.asc(schema.metric_data.c.timestamp)) .limit(1)).fetchall() lastMetricData = conn.execute( sql.select([schema.metric_data]) .where(schema.metric_data.c.uid == uid) .order_by(sql.expression.desc(schema.metric_data.c.timestamp)) .limit(1)).fetchall() firstTimeStamp = firstMetricData[0].timestamp lastTimeStamp = lastMetricData[0].timestamp response = self.app.get("/%s/data?from=%s&to=%s" % (uid, firstTimeStamp, lastTimeStamp), headers=self.headers) assertions.assertSuccess(self, response) getAllModelsResult = utils.jsonDecode(response.body) for metricData in getAllModelsResult['data']: self.assertGreaterEqual(datetime.strptime(metricData[0], '%Y-%m-%d %H:%M:%S'), firstTimeStamp) self.assertLessEqual(datetime.strptime(metricData[0], '%Y-%m-%d %H:%M:%S'), lastTimeStamp) map(testMetricDataTimeStampQueryParams, uids) def testMetricDataQueryParams(uid): ''' This test makes MetricDataHandler GET calls with various params : _models/<uid>/data?from=<>&to=<>&anomaly=<> ''' with repository.engineFactory().connect() as conn: firstMetricData = conn.execute( "SELECT * FROM `metric_data` WHERE `uid`='%s' " "and abs(`anomaly_score` - 0) > 1e-5 " "ORDER BY `timestamp` ASC LIMIT 1" % uid).fetchall() lastMetricData = conn.execute( "SELECT * FROM `metric_data` WHERE `uid`='%s' " "and abs(`anomaly_score` - 0) > 1e-5 " "ORDER BY `timestamp` DESC LIMIT 1" % uid).fetchall() firstTimeStamp = firstMetricData[0].timestamp lastTimeStamp = lastMetricData[0].timestamp anomalyScore = firstMetricData[0].anomaly_score response = self.app.get("/%s/data?from=%s&to=%s&anomaly=%s" % (uid, firstTimeStamp, lastTimeStamp, anomalyScore), headers=self.headers) assertions.assertSuccess(self, response) getAllModelsResult = utils.jsonDecode(response.body) for metricData in getAllModelsResult['data']: self.assertGreaterEqual(metricData[2], anomalyScore) self.assertGreaterEqual(datetime.strptime(metricData[0], '%Y-%m-%d %H:%M:%S'), firstTimeStamp) self.assertLessEqual(datetime.strptime(metricData[0], '%Y-%m-%d %H:%M:%S'), lastTimeStamp) map(testMetricDataQueryParams, uids) endTime = (time() - startTime) / 60 print "Test started at : %s" % \ strftime('%Y-%m-%d %H:%M:%S', localtime(startTime)) print "Test finished at : %s" % \ strftime('%Y-%m-%d %H:%M:%S', localtime(endTime)) print "Total metric count : %s" % totalMetricCount print "Initial models created : %s" % initialModelCount print "Approximate time taken to create inital models : %s minutes" \ % modelCreationDuration print "Approximate time taken to calculate all inferences : %s minutes" \ % ((timeToCalculateAllInferences - startTime) / 60) print "Approximate time taken for all the tests to finish : %s minutes" \ % ((time() - startTime) / 60)
def createModel(cls, modelSpec=None): """ NOTE MER-3479: this code path is presently incorrectly used for two purposes: * Creating CloudWatch models (correct) * Importing of all types of metrics (not desirable; there should be a separate endpoint or an import-specific flag in this endpoint for importing that facilitates slightly different behavior, such as suppressing certain errors to allow for re-import in case of tranisent error part way through the prior import) """ if not modelSpec: # Metric data is missing log.error("Data is missing in request, raising BadRequest exception") raise InvalidRequestResponse({"result": "Metric data is missing"}) # TODO MER-3479: import using import-specific endpoint # NOTE: pending MER-3479, this is presently a hack for exercising # the adapter import API importing = False if modelSpec.get("datasource") == "custom": # Convert to new YOMP-custom metric modelSpec format # NOTE: backward compatibility during first phase refactoring modelSpec = cls.upgradeCustomModelSpec(modelSpec) if "data" in modelSpec: importing = True elif (modelSpec.get("datasource") == "cloudwatch" and "filters" not in modelSpec): if "type" in modelSpec: # The legacy cloudwatch import modelSpec had the "type" property assert modelSpec["type"] == "metric", repr(modelSpec) importing = True # Convert to new YOMP-custom metric modelSpec format # NOTE: backward compatibility during first phase refactoring modelSpec = cls.upgradeCloudwatchModelSpec(modelSpec) elif (modelSpec.get("datasource") == "autostack" or modelSpec.get("type") == "autostack"): importing = True # Convert to new autostack metric modelSpec format # NOTE: backward compatibility during first phase refactoring modelSpec = cls.upgradeAutostackModelSpec(modelSpec) try: with web.ctx.connFactory() as conn: with conn.begin(): adapter = createDatasourceAdapter(modelSpec["datasource"]) if modelSpec["datasource"] == "custom": checkQuotaForCustomMetricAndRaise(conn) else: checkQuotaForInstanceAndRaise( conn, adapter.getInstanceNameForModelSpec(modelSpec)) try: if importing: # TODO MER-3479: import using import-specific endpoint # NOTE: pending MER-3479, this is presently a hack for exercising # the adapter import API metricId = adapter.importModel(modelSpec) else: metricId = adapter.monitorMetric(modelSpec) except app_exceptions.MetricAlreadyMonitored as e: metricId = e.uid return repository.getMetric(conn, metricId) except (ValueError, app_exceptions.MetricNotSupportedError) as e: raise InvalidRequestResponse({"result": repr(e)})
def sendNotificationEmail(self, engine, settingObj, notificationObj): """ Send notification email through Amazon SES :param engine: SQLAlchemy engine object :type engine: sqlalchemy.engine.Engine :param settingObj: Device settings :type settingObj: NotificationSettings :param notificationObj: Notification :type notificationObj: Notification See conf/notification-body.tpl (or relevant notification body configuration value) for template value. Values are substituted using python's `str.format(**data)` function where `data` is a dict containing the following keys: ============ =========== Key Description ============ =========== notification Notification instance data MetricData row that triggered notification date Formatted date (%A, %B %d, %Y) time Formatted time (%I:%M %p (%Z)) unit Canonical unit for metric value ============ =========== """ subject = YOMP.app.config.get("notifications", "subject") bodyType = "default" with engine.connect() as conn: metricObj = repository.getMetric(conn, notificationObj.metric) if metricObj.datasource == "custom": bodyType = "custom" body = open( resource_filename( YOMP.__name__, os.path.join("../conf", YOMP.app.config.get("notifications", "body_" + bodyType)) ) ).read() body = body.replace("\n", "\r\n") # Ensure windows newlines # Template variable storage (to be expanded in call to str.format()) templated = dict(notification=notificationObj) # Metric templated["metric"] = metricObj # Instance templated["instance"] = metricObj.tag_name or metricObj.server # Date/time templated["timestampUTC"] = notificationObj.timestamp.strftime("%A, %B %d, %Y %I:%M %p") localtime = localizedTimestamp(notificationObj.timestamp) templated["timestampLocal"] = localtime.strftime("%A, %B %d, %Y %I:%M %p") templated["timezoneLocal"] = localtime.strftime("%Z") # Region templated["region"] = _getCurrentRegion() self._log.info( "NOTIFICATION=%s SERVER=%s METRICID=%s METRIC=%s DEVICE=%s " "RECIPIENT=%s Sending email. " % ( notificationObj.uid, metricObj.server, metricObj.uid, metricObj.name, settingObj.uid, settingObj.email_addr, ) ) try: # Send through SES messageId = ses_utils.sendEmail( subject=subject.format(**templated), body=body.format(**templated), toAddresses=settingObj.email_addr ) if messageId is not None: # Record AWS SES Message ID with engine.connect() as conn: repository.updateNotificationMessageId(conn, notificationObj.uid, messageId) self._log.info("NOTIFICATION=%s SESMESSAGEID=%s Email sent. " % (notificationObj.uid, messageId)) except BotoServerError: self._log.exception("Unable to send email.")
def setUpClass(cls): """ Setup steps for all test cases. Focus for these is to cover all API checks for ModelDataHandler. Hence, this does all setup creating metric, waiting for metricData across all testcases, all API call for querying metricData will be against single metric created in setup Setup Process 1) Update conf with aws credentials, ManagedTempRepository will not work in this test 2) Select test instance such that its running from longer time, We are using instance older than 15 days 3) Create Metric, wait for min metricData rows to become available Set to 100, configurable 4) Pick testRowId, set it lower value this will make sure to have Non NULL value for anomaly_score field for given row while invoking GET with consitions, set to 5 5) Decide queryParams for anomalyScore, to and from timestamp """ cls.headers = getDefaultHTTPHeaders(YOMP.app.config) # All other sevices needs AWS credentials to work # Set AWS credentials YOMP.app.config.loadConfig() # Select test instance such that its running from longer time g_logger.info("Getting long-running EC2 Instances") instances = aws_utils.getLongRunningEC2Instances("us-west-2", YOMP.app.config.get("aws", "aws_access_key_id"), YOMP.app.config.get("aws", "aws_secret_access_key"), 15) testInstance = instances[randrange(1, len(instances))] createModelData = { "region": "us-west-2", "namespace": "AWS/EC2", "datasource": "cloudwatch", "metric": "CPUUtilization", "dimensions": { "InstanceId": testInstance.id } } # Number of minimum rows cls.minDataRows = 100 cls.app = TestApp(models_api.app.wsgifunc()) # create test metric g_logger.info("Creating test metric; modelSpec=%s", createModelData) response = cls.app.put("/", utils.jsonEncode(createModelData), headers=cls.headers) postResult = utils.jsonDecode(response.body) maxWaitTime = 600 waitTimeMetricData = 0 waitAnomalyScore = 0 # Wait for enough metric data to be available cls.uid = postResult[0]["uid"] engine = repository.engineFactory() with engine.connect() as conn: cls.metricData = [row for row in repository.getMetricData(conn, cls.uid)] with engine.connect() as conn: cls.testMetric = repository.getMetric(conn, cls.uid) # Confirm that we have enough metricData g_logger.info("Waiting for metric data") while (len(cls.metricData) < cls.minDataRows and waitTimeMetricData < maxWaitTime): g_logger.info("not ready, waiting for metric data: got %d of %d ...", len(cls.metricData), cls.minDataRows) time.sleep(5) waitTimeMetricData += 5 with engine.connect() as conn: cls.metricData = [row for row in repository.getMetricData(conn, cls.uid)] # taking lower value for testRowId, this will make sure to have # Non NULL value for anomaly_score field for given row cls.testRowId = 5 with engine.connect() as conn: cls.testMetricRow = (repository.getMetricData(conn, cls.uid, rowid=cls.testRowId) .fetchone()) # Make sure we did not receive None etc for anomaly score g_logger.info("cls.testMetricRow.anomaly_score=%r", cls.testMetricRow.anomaly_score) g_logger.info("waitAnomalyScore=%r", waitAnomalyScore) while (cls.testMetricRow.anomaly_score is None and waitAnomalyScore < maxWaitTime): g_logger.info("anomaly_score not ready, sleeping...") time.sleep(5) waitAnomalyScore += 5 with engine.connect() as conn: cls.testMetricRow = (repository.getMetricData(conn, cls.uid, rowid=cls.testRowId) .fetchone()) # Decide queryParams for anomalyScore, to and from timestamp cls.testAnomalyScore = cls.testMetricRow.anomaly_score cls.testTimeStamp = cls.testMetricRow.timestamp
def exportModel(self, metricId): """ Export the given model. :param metricId: datasource-specific unique metric identifier :returns: Model-export specification for the Autostack model :rtype: dict :: { "datasource": "autostack", "stackSpec": { "name": "all_web_servers", # Autostack name "aggSpec": { # aggregation spec "datasource": "cloudwatch", "region": "us-west-2", "resourceType": "AWS::EC2::Instance" "filters": { # resourceType-specific filter "tag:Name":["*test*", "*YOMP*"], "tag:Description":["Blah", "foo"] }, } }, "modelSpec": { "datasource": "autostack", "metricSpec": { "slaveDatasource": "cloudwatch", "slaveMetric": { # specific to slaveDatasource "namespace": "AWS/EC2", "metric": "CPUUtilization" }, "period": 300 # aggregation period; seconds }, "modelParams": { # optional; specific to slave metric "min": 0, # optional "max": 100 # optional } } } """ with self.connectionFactory() as conn: spec = {} spec["datasource"] = self._DATASOURCE metricObj = repository.getMetric( conn, metricId, fields=[schema.metric.c.parameters]) autostackObj = repository.getAutostackFromMetric(conn, metricId) parameters = htmengine.utils.jsonDecode(metricObj.parameters) spec["modelSpec"] = parameters modelSpec = spec["modelSpec"] metricSpec = modelSpec["metricSpec"] del metricSpec["autostackId"] spec["stackSpec"] = {} stackSpec = spec["stackSpec"] stackSpec["name"] = autostackObj.name # Only supporting cloudwatch / EC2 for now stackSpec["aggSpec"] = {} aggSpec = stackSpec["aggSpec"] aggSpec["datasource"] = "cloudwatch" aggSpec["region"] = autostackObj.region aggSpec["resourceType"] = "AWS::EC2::Instance" aggSpec["filters"] = htmengine.utils.jsonDecode(autostackObj.filters) return spec
def POST(cls): """Upload the metric info and metric data as a compressed tarfile to S3. The request must include the uid of the metric and may include other JSON keys as well. For instance, it is likely that a request from the mobile application will include information about the current view and data being displayed when the feedback request is sent. Any fields in addition to uid will be stored with the feedback archive file that is uploaded to S3. """ inputData = json.loads(web.data()) # Get the metric uid uid = inputData["uid"] del inputData["uid"] inputData["server_id"] = _MACHINE_ID # Data is written to a temporary directory before uploading path = tempfile.mkdtemp() try: # Retrieve the metric table record and add it to the other input # parameters metricFields = [ schema.metric.c.uid, schema.metric.c.datasource, schema.metric.c.name, schema.metric.c.description, schema.metric.c.server, schema.metric.c.location, schema.metric.c.parameters, schema.metric.c.status, schema.metric.c.message, schema.metric.c.last_timestamp, schema.metric.c.poll_interval, schema.metric.c.tag_name, schema.metric.c.last_rowid, ] with repository.engineFactory().connect() as conn: metricRow = repository.getMetric(conn, uid, metricFields) metric = dict( [ ( col.name, utils.jsonDecode(getattr(metricRow, col.name)) if col.name == "parameters" else getattr(metricRow, col.name), ) for col in metricFields ] ) if metric["tag_name"]: metric["display_name"] = "%s (%s)" % (metric["tag_name"], metric["server"]) else: metric["display_name"] = metric["server"] inputData["metric"] = utils.jsonEncode(metric) metricPath = os.path.join(path, "metric.json") with open(metricPath, "w") as f: json.dump(inputData, f) # Retrieve the metric data with repository.engineFactory().connect() as conn: metricDataRows = repository.getMetricData(conn, uid) metricData = [ dict([(col.name, getattr(metricData, col.name)) for col in schema.metric_data.columns]) for metricData in metricDataRows ] metricDataPath = os.path.join(path, "metric_data.csv") with open(metricDataPath, "w") as f: writer = csv.writer(f) if len(metricData) > 0: header = metricData[0].keys() # Write the field names first writer.writerow(header) # Then write out the data for each row for dataDict in metricData: row = [dataDict[h] for h in header] writer.writerow(row) # Create a tarfile to upload ts = datetime.datetime.utcnow().strftime("%Y%m%d-%H%M%S") filename = "metric_dump_%s_%s.tar.gz" % (uid, ts) tfPath = os.path.join(path, filename) with tarfile.open(tfPath, "w:gz") as tf: tf.add(metricPath, arcname=os.path.basename(metricPath)) tf.add(metricDataPath, arcname=os.path.basename(metricDataPath)) # Upload the tarfile return cls._uploadTarfile(filename, tfPath) finally: shutil.rmtree(path)
def POST(cls): """Upload the metric info and metric data as a compressed tarfile to S3. The request must include the uid of the metric and may include other JSON keys as well. For instance, it is likely that a request from the mobile application will include information about the current view and data being displayed when the feedback request is sent. Any fields in addition to uid will be stored with the feedback archive file that is uploaded to S3. """ inputData = json.loads(web.data()) # Get the metric uid uid = inputData["uid"] del inputData["uid"] inputData["server_id"] = _MACHINE_ID # Data is written to a temporary directory before uploading path = tempfile.mkdtemp() try: # Retrieve the metric table record and add it to the other input # parameters metricFields = [ schema.metric.c.uid, schema.metric.c.datasource, schema.metric.c.name, schema.metric.c.description, schema.metric.c.server, schema.metric.c.location, schema.metric.c.parameters, schema.metric.c.status, schema.metric.c.message, schema.metric.c.last_timestamp, schema.metric.c.poll_interval, schema.metric.c.tag_name, schema.metric.c.last_rowid ] with repository.engineFactory().connect() as conn: metricRow = repository.getMetric(conn, uid, metricFields) metric = dict([ (col.name, utils.jsonDecode(getattr(metricRow, col.name)) if col.name == "parameters" else getattr(metricRow, col.name)) for col in metricFields ]) if metric["tag_name"]: metric["display_name"] = "%s (%s)" % (metric["tag_name"], metric["server"]) else: metric["display_name"] = metric["server"] inputData["metric"] = utils.jsonEncode(metric) metricPath = os.path.join(path, "metric.json") with open(metricPath, "w") as f: json.dump(inputData, f) # Retrieve the metric data with repository.engineFactory().connect() as conn: metricDataRows = repository.getMetricData(conn, uid) metricData = [ dict([(col.name, getattr(metricData, col.name)) for col in schema.metric_data.columns]) for metricData in metricDataRows ] metricDataPath = os.path.join(path, "metric_data.csv") with open(metricDataPath, "w") as f: writer = csv.writer(f) if len(metricData) > 0: header = metricData[0].keys() # Write the field names first writer.writerow(header) # Then write out the data for each row for dataDict in metricData: row = [dataDict[h] for h in header] writer.writerow(row) # Create a tarfile to upload ts = datetime.datetime.utcnow().strftime("%Y%m%d-%H%M%S") filename = "metric_dump_%s_%s.tar.gz" % (uid, ts) tfPath = os.path.join(path, filename) with tarfile.open(tfPath, "w:gz") as tf: tf.add(metricPath, arcname=os.path.basename(metricPath)) tf.add(metricDataPath, arcname=os.path.basename(metricDataPath)) # Upload the tarfile return cls._uploadTarfile(filename, tfPath) finally: shutil.rmtree(path)