def testMetricDataQueryParams(uid): ''' This test makes MetricDataHandler GET calls with various params : _models/<uid>/data?from=<>&to=<>&anomaly=<> ''' with repository.engineFactory().connect() as conn: firstMetricData = conn.execute( "SELECT * FROM `metric_data` WHERE `uid`='%s' " "and abs(`anomaly_score` - 0) > 1e-5 " "ORDER BY `timestamp` ASC LIMIT 1" % uid).fetchall() lastMetricData = conn.execute( "SELECT * FROM `metric_data` WHERE `uid`='%s' " "and abs(`anomaly_score` - 0) > 1e-5 " "ORDER BY `timestamp` DESC LIMIT 1" % uid).fetchall() firstTimeStamp = firstMetricData[0].timestamp lastTimeStamp = lastMetricData[0].timestamp anomalyScore = firstMetricData[0].anomaly_score response = self.app.get("/%s/data?from=%s&to=%s&anomaly=%s" % (uid, firstTimeStamp, lastTimeStamp, anomalyScore), headers=self.headers) assertions.assertSuccess(self, response) getAllModelsResult = utils.jsonDecode(response.body) for metricData in getAllModelsResult['data']: self.assertGreaterEqual(metricData[2], anomalyScore) self.assertGreaterEqual(datetime.strptime(metricData[0], '%Y-%m-%d %H:%M:%S'), firstTimeStamp) self.assertLessEqual(datetime.strptime(metricData[0], '%Y-%m-%d %H:%M:%S'), lastTimeStamp)
def testMetricDataTimeStampQueryParams(uid): ''' This test makes MetricDataHandler GET calls with from and to params : _models/<uid>/data?from=<>&to=<> ''' with repository.engineFactory().connect() as conn: firstMetricData = conn.execute( sql.select([schema.metric_data]) .where(schema.metric_data.c.uid == uid) .order_by(sql.expression.asc(schema.metric_data.c.timestamp)) .limit(1)).fetchall() lastMetricData = conn.execute( sql.select([schema.metric_data]) .where(schema.metric_data.c.uid == uid) .order_by(sql.expression.desc(schema.metric_data.c.timestamp)) .limit(1)).fetchall() firstTimeStamp = firstMetricData[0].timestamp lastTimeStamp = lastMetricData[0].timestamp response = self.app.get("/%s/data?from=%s&to=%s" % (uid, firstTimeStamp, lastTimeStamp), headers=self.headers) assertions.assertSuccess(self, response) getAllModelsResult = utils.jsonDecode(response.body) for metricData in getAllModelsResult['data']: self.assertGreaterEqual(datetime.strptime(metricData[0], '%Y-%m-%d %H:%M:%S'), firstTimeStamp) self.assertLessEqual(datetime.strptime(metricData[0], '%Y-%m-%d %H:%M:%S'), lastTimeStamp)
def testMetricDataForRandomRowID(uid): ''' This tests if the metric data returned by the GET call : _models/<uid>/data has anomaly_score consistent with what is there in the actual database by asserting it against a dao.MetricData.get() call It repeats the process for 5 random sample rows for each uid in the database. Algorithm : - Query the MetricDataHandler GET call for a certain uid - Check if response is OK - Find the last row id for the uid - Select a random row between 1 and last row id - Find the anomaly score for that row id - Assert on the anomaly score ''' response = self.app.get("/%s/data" %uid, headers=self.headers) assertions.assertSuccess(self, response) getAllModelsResult = utils.jsonDecode(response.body) with repository.engineFactory().connect() as conn: lastRowID = repository.getMetric(conn, uid).last_rowid for _ in range(5): randomRowID = randrange(1, lastRowID) with repository.engineFactory().connect() as conn: singleMetricData = repository.getMetricData( conn, uid, rowid=randomRowID).first() metricData = getMetricDataWithRowID(getAllModelsResult['data'], randomRowID) self.assertEqual(metricData[2], singleMetricData.anomaly_score) self.assertEqual(datetime.strptime(metricData[0], '%Y-%m-%d %H:%M:%S'), singleMetricData.timestamp)
def testMetricDataAnomalyAsQueryParams(uid): ''' This test makes MetricDataHandler GET calls with anomaly param : _models/<uid>/data?anomaly=<> ''' queryString = ("SELECT * FROM metric_data WHERE uid='%s' " " and abs(anomaly_score - 0) > 1e-5 LIMIT 1") % uid with repository.engineFactory().connect() as conn: sampleMetricData = conn.execute(queryString).first() anomalyScore = sampleMetricData.anomaly_score response = self.app.get("/%s/data?anomaly=%s" % (uid, anomalyScore), headers=self.headers) assertions.assertSuccess(self, response) getAllModelsResult = utils.jsonDecode(response.body) for metricData in getAllModelsResult['data']: self.assertGreaterEqual(metricData[2], anomalyScore)
def testModelInferencesLifeCycle(self): startTime = time() for model in sorted(self.data): #create a model; post is forwarded to put print "Creating metric for %s : " % model response = self.app.put("/", json.dumps(model), headers=self.headers) assertions.assertSuccess(self, response, code=201) response = self.app.get("/", headers=self.headers) assertions.assertSuccess(self, response) getAllModelsResult = utils.jsonDecode(response.body) totalMetricCount = len(getAllModelsResult) self.assertEqual(totalMetricCount, len(self.data)) #Get the uids of all the metrics created. uids = [metric['uid'] for metric in getAllModelsResult] while True: with repository.engineFactory().connect() as conn: initialModelCount = conn.execute( sql.select([sql.func.count()], from_obj=schema.metric_data) .where(schema.metric_data.c.rowid == 1)).scalar() if initialModelCount == totalMetricCount: print "Done creating all the initial models." break # Exit the test with some non-zero status if the test has run for more # than 20 minutes to just create the initial models. # Should not take more than that. currentElapsedTime = (time() - startTime) / 60 print "Current elapsed time %s" % currentElapsedTime if currentElapsedTime > 20: print "More than 20 minutes has elapsed. Timing out." sys.exit(42) print "%s initial models created." % initialModelCount print "Creating initial models for rest of the %s metrics" \ "..." % (totalMetricCount - initialModelCount) sleep(60) #Sleep for a long time. minutes = 15 print "Sleeping for %s minutes to let things settled down." % minutes while minutes > 0: print "Resume in %s minutes." % minutes minutes -= 1 sleep(60) modelCreationDuration = (time() - startTime) / 60 with repository.engineFactory().connect() as conn: lastRowIds = {uid: repository.getMetric(conn, uid).last_rowid for uid in uids} modelInferenceWithNonNullAnomalyScore = [] modelIds = lastRowIds.keys() while True: print set(modelInferenceWithNonNullAnomalyScore) if len(modelIds) == len(set(modelInferenceWithNonNullAnomalyScore)): print "Model inferences created for last_rowids for all the models." break for uid in modelIds: with repository.engineFactory().connect() as conn: anomalyNullCount = conn.execute( sql.select([sql.func.count()], from_obj=schema.metric_data) .where(schema.metric_data.c.rowid == lastRowIds[uid]) .where(schema.metric_data.c.uid == uid) .where(schema.metric_data.c.anomaly_score == None)).scalar() print "Model (%s) - Last Row ID (%s) : %s" \ % (uid, lastRowIds[uid], anomalyNullCount) if anomalyNullCount == 0: modelInferenceWithNonNullAnomalyScore.append(uid) # Exit the test with some non-zero status if the test has run for more # than 2 hours currentElapsedTime = (time() - startTime) / 60 print "Current elapsed time %s" % currentElapsedTime if currentElapsedTime > 120: print "More than 2 hours has elapsed. Timing out." sys.exit(42) print "Going back to sleep for 60s..." sleep(60) self.assertEqual(anomalyNullCount, 0) timeToCalculateAllInferences = time() def getMetricDataWithRowID(metricDataList, rowid): ''' Helper method to get the metric data of the nth row for a certain uid ''' for metricData in metricDataList: if metricData[3] == rowid: return metricData def testMetricDataForRandomRowID(uid): ''' This tests if the metric data returned by the GET call : _models/<uid>/data has anomaly_score consistent with what is there in the actual database by asserting it against a dao.MetricData.get() call It repeats the process for 5 random sample rows for each uid in the database. Algorithm : - Query the MetricDataHandler GET call for a certain uid - Check if response is OK - Find the last row id for the uid - Select a random row between 1 and last row id - Find the anomaly score for that row id - Assert on the anomaly score ''' response = self.app.get("/%s/data" %uid, headers=self.headers) assertions.assertSuccess(self, response) getAllModelsResult = utils.jsonDecode(response.body) with repository.engineFactory().connect() as conn: lastRowID = repository.getMetric(conn, uid).last_rowid for _ in range(5): randomRowID = randrange(1, lastRowID) with repository.engineFactory().connect() as conn: singleMetricData = repository.getMetricData( conn, uid, rowid=randomRowID).first() metricData = getMetricDataWithRowID(getAllModelsResult['data'], randomRowID) self.assertEqual(metricData[2], singleMetricData.anomaly_score) self.assertEqual(datetime.strptime(metricData[0], '%Y-%m-%d %H:%M:%S'), singleMetricData.timestamp) map(testMetricDataForRandomRowID, uids) def testMetricDataAnomalyAsQueryParams(uid): ''' This test makes MetricDataHandler GET calls with anomaly param : _models/<uid>/data?anomaly=<> ''' queryString = ("SELECT * FROM metric_data WHERE uid='%s' " " and abs(anomaly_score - 0) > 1e-5 LIMIT 1") % uid with repository.engineFactory().connect() as conn: sampleMetricData = conn.execute(queryString).first() anomalyScore = sampleMetricData.anomaly_score response = self.app.get("/%s/data?anomaly=%s" % (uid, anomalyScore), headers=self.headers) assertions.assertSuccess(self, response) getAllModelsResult = utils.jsonDecode(response.body) for metricData in getAllModelsResult['data']: self.assertGreaterEqual(metricData[2], anomalyScore) map(testMetricDataAnomalyAsQueryParams, uids) def testMetricDataTimeStampQueryParams(uid): ''' This test makes MetricDataHandler GET calls with from and to params : _models/<uid>/data?from=<>&to=<> ''' with repository.engineFactory().connect() as conn: firstMetricData = conn.execute( sql.select([schema.metric_data]) .where(schema.metric_data.c.uid == uid) .order_by(sql.expression.asc(schema.metric_data.c.timestamp)) .limit(1)).fetchall() lastMetricData = conn.execute( sql.select([schema.metric_data]) .where(schema.metric_data.c.uid == uid) .order_by(sql.expression.desc(schema.metric_data.c.timestamp)) .limit(1)).fetchall() firstTimeStamp = firstMetricData[0].timestamp lastTimeStamp = lastMetricData[0].timestamp response = self.app.get("/%s/data?from=%s&to=%s" % (uid, firstTimeStamp, lastTimeStamp), headers=self.headers) assertions.assertSuccess(self, response) getAllModelsResult = utils.jsonDecode(response.body) for metricData in getAllModelsResult['data']: self.assertGreaterEqual(datetime.strptime(metricData[0], '%Y-%m-%d %H:%M:%S'), firstTimeStamp) self.assertLessEqual(datetime.strptime(metricData[0], '%Y-%m-%d %H:%M:%S'), lastTimeStamp) map(testMetricDataTimeStampQueryParams, uids) def testMetricDataQueryParams(uid): ''' This test makes MetricDataHandler GET calls with various params : _models/<uid>/data?from=<>&to=<>&anomaly=<> ''' with repository.engineFactory().connect() as conn: firstMetricData = conn.execute( "SELECT * FROM `metric_data` WHERE `uid`='%s' " "and abs(`anomaly_score` - 0) > 1e-5 " "ORDER BY `timestamp` ASC LIMIT 1" % uid).fetchall() lastMetricData = conn.execute( "SELECT * FROM `metric_data` WHERE `uid`='%s' " "and abs(`anomaly_score` - 0) > 1e-5 " "ORDER BY `timestamp` DESC LIMIT 1" % uid).fetchall() firstTimeStamp = firstMetricData[0].timestamp lastTimeStamp = lastMetricData[0].timestamp anomalyScore = firstMetricData[0].anomaly_score response = self.app.get("/%s/data?from=%s&to=%s&anomaly=%s" % (uid, firstTimeStamp, lastTimeStamp, anomalyScore), headers=self.headers) assertions.assertSuccess(self, response) getAllModelsResult = utils.jsonDecode(response.body) for metricData in getAllModelsResult['data']: self.assertGreaterEqual(metricData[2], anomalyScore) self.assertGreaterEqual(datetime.strptime(metricData[0], '%Y-%m-%d %H:%M:%S'), firstTimeStamp) self.assertLessEqual(datetime.strptime(metricData[0], '%Y-%m-%d %H:%M:%S'), lastTimeStamp) map(testMetricDataQueryParams, uids) endTime = (time() - startTime) / 60 print "Test started at : %s" % \ strftime('%Y-%m-%d %H:%M:%S', localtime(startTime)) print "Test finished at : %s" % \ strftime('%Y-%m-%d %H:%M:%S', localtime(endTime)) print "Total metric count : %s" % totalMetricCount print "Initial models created : %s" % initialModelCount print "Approximate time taken to create inital models : %s minutes" \ % modelCreationDuration print "Approximate time taken to calculate all inferences : %s minutes" \ % ((timeToCalculateAllInferences - startTime) / 60) print "Approximate time taken for all the tests to finish : %s minutes" \ % ((time() - startTime) / 60)