def setUp(self): self.db = "test-{}".format(int(datetime.datetime.now().timestamp())) self.source = InfluxDataSource({ 'name': 'test', 'addr': ADDR, 'database': self.db, }) self.source.drop() self.source.init() self.storage = TempStorage() generator = SinEventGenerator(base=3, sigma=0.05) self.to_date = datetime.datetime.now().timestamp() self.from_date = self.to_date - 3600 * 24 * 7 for ts in generator.generate_ts( self.from_date, self.to_date, step_ms=60000, ): self.source.insert_times_data( measurement='measure1', ts=ts, data={'foo': random.lognormvariate(10, 1)}, ) self.source.commit()
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.source = MemDataSource() self.storage = TempStorage() self.model = DonutModel(dict( name='test', offset=30, span=24 * 3, bucket_interval=20 * 60, interval=60, features=FEATURES, grace_period="140m", # = 7 points max_threshold=99.7, min_threshold=68, max_evals=10, )) self.generator = SinEventGenerator(base=3, amplitude=3, sigma=0.01) to_date = datetime.datetime.now().timestamp() # Be sure that date range is aligned self.to_date = math.floor(to_date / self.model.bucket_interval) * self.model.bucket_interval self.from_date = self.to_date - 3600 * 24 * 7 * 3 for ts in self.generator.generate_ts(self.from_date, self.to_date, step_ms=600000): self.source.insert_times_data({ 'timestamp': ts, 'foo': random.normalvariate(10, 1) })
def setUp(self): self.app = app.test_client() self.app.testing = True self.bucket_interval = 20 * 60 t0 = int(datetime.datetime.now().timestamp()) t0 -= t0 % self.bucket_interval self.t0 = t0 self.db = 'test-{}'.format(t0) logging.info("creating database %s", self.db) self.source = InfluxDataSource({ 'name': 'nose', 'addr': ADDR, 'database': self.db, }) self.source.drop() self.source.init() self.generator = SinEventGenerator(base=3, amplitude=3, sigma=0.01) to_date = datetime.datetime.now().timestamp() # Be sure that date range is aligned self.to_date = math.floor( to_date / self.bucket_interval) * self.bucket_interval # noqa E501 self.from_date = self.to_date - 3600 * 24 * 7 * 3 for ts in self.generator.generate_ts(self.from_date, self.to_date, step_ms=600000): # noqa E501 self.source.insert_times_data(measurement='bar', ts=ts, data={ 'foo': random.normalvariate(10, 1), }) self.source.commit() self.dirpath = tempfile.mkdtemp() configyml = os.path.join(self.dirpath, 'config.yml') cfg = open(configyml, 'w') cfg.write(CONFIG.format(ADDR, self.db, self.dirpath)) cfg.close() g_app_init(configyml)
def setUp(self): if 'LOUDML_ADDR' in os.environ: self.loudml_addr = os.environ['LOUDML_ADDR'] else: self.loudml_addr = 'localhost:8077' self.bucket_interval = 20 * 60 t0 = int(datetime.datetime.now().timestamp()) t0 -= t0 % self.bucket_interval self.t0 = t0 self.db = 'test-{}'.format(t0) logging.info("creating database %s", self.db) if 'INFLUXDB_ADDR' in os.environ: addr = os.environ['INFLUXDB_ADDR'] else: addr = 'localhost' self.source = InfluxDataSource({ 'name': 'nosetests', 'addr': addr, 'database': self.db, }) self.source.drop() self.source.init() self.generator = SinEventGenerator(base=3, amplitude=3, sigma=0.01) to_date = datetime.datetime.now().timestamp() # Be sure that date range is aligned self.to_date = math.floor(to_date / self.bucket_interval) * self.bucket_interval # noqa E501 self.from_date = self.to_date - 3600 * 24 * 7 * 3 for ts in self.generator.generate_ts(self.from_date, self.to_date, step_ms=600000): # noqa E501 self.source.insert_times_data( measurement='bar', ts=ts, data={ 'foo': random.normalvariate(10, 1), } ) self.source.commit()
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) for env_var in ['RANDOM_SEED', 'PYTHONHASHSEED']: if not os.environ.get(env_var): raise Exception('{} environment variable not set'.format( env_var)) np.random.seed(int(os.environ['RANDOM_SEED'])) random.seed(int(os.environ['RANDOM_SEED'])) self.source = MemDataSource() self.storage = TempStorage() self.model = DonutModel(dict( name='test', offset=30, span=24 * 3, bucket_interval=20 * 60, interval=60, features=FEATURES, grace_period="140m", # = 7 points max_threshold=99.7, min_threshold=68, max_evals=3, )) self.generator = SinEventGenerator(base=3, amplitude=3, sigma=0.01) to_date = datetime.datetime.now().timestamp() # Be sure that date range is aligned self.to_date = math.floor( to_date / self.model.bucket_interval) * self.model.bucket_interval self.from_date = self.to_date - 3600 * 24 * 7 * 3 for ts in self.generator.generate_ts(self.from_date, self.to_date, step_ms=600000): self.source.insert_times_data({ 'timestamp': ts, 'foo': random.normalvariate(10, 1) })
def test_forecast(self): model = DonutModel(dict( name='test', offset=30, span=100, forecast=1, bucket_interval=20 * 60, interval=60, features=[ FEATURE_COUNT_FOO, ], max_evals=21, )) source = MemDataSource() generator = SinEventGenerator(base=3, amplitude=3, sigma=0.01) # Align date range to day interval to_date = make_ts('1970-12-01T00:00:00.000Z') to_date = math.floor(to_date / (3600*24)) * (3600*24) from_date = to_date - 3600 * 24 * 7 * 3 for ts in generator.generate_ts(from_date, to_date, step_ms=600000): source.insert_times_data({ 'timestamp': ts, 'foo': random.normalvariate(10, 1) }) model.train(source, from_date, to_date) prediction = model.predict(source, from_date, to_date) from_date = to_date to_date = from_date + 48 * 3600 forecast = model.forecast(source, from_date, to_date) expected = math.ceil( (to_date - from_date) / model.bucket_interval ) self.assertEqual(len(forecast.timestamps), expected) self.assertEqual(forecast.observed.shape, (expected,)) self.assertEqual(forecast.predicted.shape, (expected,)) all_default = np.full( (expected,), model.features[0].default, dtype=float, ) np.testing.assert_allclose( forecast.observed, all_default, ) forecast_head = np.array([0.35, 0.67, 0.73, 0.70, 1.35]) forecast_tail = np.array([-0.09, -0.02, -0.05, 0.06, 0.08]) # print(forecast.predicted) delta = 1.0 forecast_good = np.abs(forecast.predicted[:len(forecast_head)] - forecast_head) <= delta # print(forecast_head) # print(forecast.predicted[:len(forecast_head)]) # print(forecast_good) self.assertEqual(np.all(forecast_good), True) forecast_good = np.abs(forecast.predicted[-len(forecast_tail):] - forecast_tail) <= delta # print(forecast_tail) # print(forecast.predicted[-len(forecast_tail):]) # print(forecast_good) self.assertEqual(np.all(forecast_good), True)
class TestTimes(unittest.TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.source = MemDataSource() self.storage = TempStorage() self.model = DonutModel(dict( name='test', offset=30, span=24 * 3, bucket_interval=20 * 60, interval=60, features=FEATURES, grace_period="140m", # = 7 points max_threshold=99.7, min_threshold=68, max_evals=10, )) self.generator = SinEventGenerator(base=3, amplitude=3, sigma=0.01) to_date = datetime.datetime.now().timestamp() # Be sure that date range is aligned self.to_date = math.floor(to_date / self.model.bucket_interval) * self.model.bucket_interval self.from_date = self.to_date - 3600 * 24 * 7 * 3 for ts in self.generator.generate_ts(self.from_date, self.to_date, step_ms=600000): self.source.insert_times_data({ 'timestamp': ts, 'foo': random.normalvariate(10, 1) }) def _require_training(self): if self.model.is_trained: return self.model.train( self.source, self.from_date, self.to_date, batch_size=32, ) def test_train(self): self._require_training() self.assertTrue(self.model.is_trained) def test_format_windows(self): from_date = 100 to_date = 200 step = 10 abnormal = _format_windows( from_date, to_date, step, [ ], ) self.assertEqual(np.all(abnormal == False), True) abnormal = _format_windows( from_date, to_date, step, [ [50, 90], [200, 220], ], ) self.assertEqual(np.all(abnormal == False), True) abnormal = _format_windows( from_date, to_date, step, [ [100, 200], ], ) self.assertEqual(np.all(abnormal == True), True) abnormal = _format_windows( from_date, to_date, step, [ [150, 160], ], ) self.assertEqual(abnormal.tolist(), [ False, False, False, False, False, True, False, False, False, False, ]) abnormal = _format_windows( from_date, to_date, step, [ [50, 110], [190, 240], ], ) self.assertEqual(abnormal.tolist(), [ True, False, False, False, False, False, False, False, False, True, ]) def test_format(self): dataset = np.array([0, np.nan, 4, 6, 8, 10, 12, 14]) abnormal = np.array([ False, False, True, False, False, False, False, True, ]) model = DonutModel(dict( name='test_fmt', offset=30, span=3, bucket_interval=20 * 60, interval=60, features=[ FEATURE_COUNT_FOO, ], max_evals=1, )) missing, x = model._format_dataset(dataset) self.assertEqual(missing.tolist(), [ [False, True, False], [True, False, False], [False, False, False], [False, False, False], [False, False, False], [False, False, False], ]) self.assertEqual(x.tolist(), [ [0.0, 0.0, 4.0], [0.0, 4.0, 6.0], [4.0, 6.0, 8.0], [6.0, 8.0, 10.0], [8.0, 10.0, 12.0], [10.0, 12.0, 14.0], ]) missing, x = model._format_dataset(dataset, accept_missing=False) self.assertEqual(missing.tolist(), [ [False, False, False], [False, False, False], [False, False, False], [False, False, False], ]) self.assertEqual(x.tolist(), [ [4.0, 6.0, 8.0], [6.0, 8.0, 10.0], [8.0, 10.0, 12.0], [10.0, 12.0, 14.0], ]) missing, x = model._format_dataset(dataset, abnormal=abnormal) self.assertEqual(missing.tolist(), [ [False, True, True], [True, True, False], [True, False, False], [False, False, False], [False, False, False], [False, False, True], ]) self.assertEqual(x.tolist(), [ [0.0, 0.0, 0.0], [0.0, 0.0, 6.0], [0.0, 6.0, 8.0], [6.0, 8.0, 10.0], [8.0, 10.0, 12.0], [10.0, 12.0, 0.0], ]) def test_train(self): self._require_training() self.assertTrue(self.model.is_trained) def test_train_abnormal(self): source = MemDataSource() from_date = '1970-01-01T00:00:00.000Z' to_date = '1970-01-01T00:10:00.000Z' for i in range(100): for j in range(3): source.insert_times_data({ 'timestamp': i*6 + j, 'foo': 1.0 if (i >= 10 and i < 20) else math.sin(j) }) for j in range(3): source.insert_times_data({ 'timestamp': i*6 + j + 3, 'foo': 1.0 if (i >= 10 and i < 20) else math.sin(-j) }) abnormal=[ # list windows containing abnormal data #date --date=@$((6*10)) --utc #date --date=@$((6*20)) --utc ['1970-01-01T00:01:00.000Z', '1970-01-01T00:02:00.000Z'], # [6*10, 6*20], ] model = DonutModel(dict( name='test', offset=30, span=10, bucket_interval=1, interval=60, features=[FEATURE_AVG_FOO], max_evals=1, )) result = model.train(source, from_date, to_date) loss1 = result['loss'] print("loss: %f" % result['loss']) #prediction = model.predict(source, from_date, to_date) #prediction.plot('avg_foo') result = model.train(source, from_date, to_date, windows=abnormal) loss2 = result['loss'] print("loss: %f" % result['loss']) #prediction = model.predict(source, from_date, to_date) #prediction.plot('avg_foo') self.assertTrue(loss2 < loss1) self.assertTrue(loss2 > 0) def test_span_auto(self): model = DonutModel(dict( name='test', offset=30, span='auto', bucket_interval=20 * 60, interval=60, features=FEATURES, max_evals=40, )) self.assertEqual(model.span, "auto") model.train(self.source, self.from_date, self.to_date) #print(model._span) self.assertTrue(10 <= model._span <= 15) def test_forecast(self): model = DonutModel(dict( name='test', offset=30, span=100, forecast=1, bucket_interval=20 * 60, interval=60, features=[ FEATURE_COUNT_FOO, ], max_evals=21, )) source = MemDataSource() generator = SinEventGenerator(base=3, amplitude=3, sigma=0.01) # Align date range to day interval to_date = make_ts('1970-12-01T00:00:00.000Z') to_date = math.floor(to_date / (3600*24)) * (3600*24) from_date = to_date - 3600 * 24 * 7 * 3 for ts in generator.generate_ts(from_date, to_date, step_ms=600000): source.insert_times_data({ 'timestamp': ts, 'foo': random.normalvariate(10, 1) }) model.train(source, from_date, to_date) prediction = model.predict(source, from_date, to_date) from_date = to_date to_date = from_date + 48 * 3600 forecast = model.forecast(source, from_date, to_date) expected = math.ceil( (to_date - from_date) / model.bucket_interval ) self.assertEqual(len(forecast.timestamps), expected) self.assertEqual(forecast.observed.shape, (expected,)) self.assertEqual(forecast.predicted.shape, (expected,)) all_default = np.full( (expected,), model.features[0].default, dtype=float, ) np.testing.assert_allclose( forecast.observed, all_default, ) forecast_head = np.array([0.35, 0.67, 0.73, 0.70, 1.35]) forecast_tail = np.array([-0.09, -0.02, -0.05, 0.06, 0.08]) # print(forecast.predicted) delta = 1.0 forecast_good = np.abs(forecast.predicted[:len(forecast_head)] - forecast_head) <= delta # print(forecast_head) # print(forecast.predicted[:len(forecast_head)]) # print(forecast_good) self.assertEqual(np.all(forecast_good), True) forecast_good = np.abs(forecast.predicted[-len(forecast_tail):] - forecast_tail) <= delta # print(forecast_tail) # print(forecast.predicted[-len(forecast_tail):]) # print(forecast_good) self.assertEqual(np.all(forecast_good), True) def test_predict_aligned(self): self._require_training() to_date = self.to_date from_date = to_date - 24 * 3600 prediction = self.model.predict(self.source, from_date, to_date) expected = math.ceil( (to_date - from_date) / self.model.bucket_interval ) # prediction.plot('count_foo') self.assertEqual(len(prediction.timestamps), expected) self.assertEqual(prediction.observed.shape, (expected,)) self.assertEqual(prediction.predicted.shape, (expected,)) for i in range(expected): self.assertAlmostEqual( prediction.observed[i], prediction.predicted[i], delta=2, ) def test_predict_with_nan(self): source = MemDataSource() storage = TempStorage() to_date = datetime.datetime.now().replace( hour=0, minute=0, second=0, microsecond=0, ).timestamp() # Generate 3 days of data nb_days = 3 hist_to = to_date hist_from = to_date - 3600 * 24 * nb_days ts = hist_from for i in range(nb_days): # [0h-12h[ for j in range(12): source.insert_times_data({ 'timestamp': ts, 'foo': j, }) ts += 3600 # No data for [12h, 13h[ ts += 3600 # [13h-0h[ for j in range(11): source.insert_times_data({ 'timestamp': ts, 'foo': j, }) ts += 3600 model = DonutModel(dict( name='test', offset=30, span=24, bucket_interval=3600, interval=60, features=[ { 'name': 'count_foo', 'metric': 'count', 'field': 'foo', }, ], max_threshold=30, min_threshold=25, max_evals=10, )) # train on all dataset model.train(source, hist_from, hist_to) self.assertTrue(model.is_trained) # predict on last 24h to_date = hist_to from_date = to_date - 3600 * 24 prediction = model.predict(source, from_date, to_date) # prediction.plot('count_foo') self.assertEqual(len(prediction.timestamps), 24) self.assertEqual(prediction.observed.shape, (24,)) self.assertEqual(prediction.predicted.shape, (24,)) # Adding this call to ensure detect_anomalies() can deal with nan model.detect_anomalies(prediction) # Donut does missing data insertion and can fill the gap in the data for i in range(24): self.assertAlmostEqual( 1.0, prediction.predicted[i], delta=0.22, ) def test_detect_anomalies(self): self._require_training() source = MemDataSource() bucket_interval = self.model.bucket_interval # Insert 1000 buckets of normal data to_date = datetime.datetime.now().replace( hour=0, minute=0, second=0, microsecond=0, ).timestamp() from_date = to_date - 1000 * bucket_interval for ts in self.generator.generate_ts(from_date, to_date, step_ms=600000): source.insert_times_data({ 'timestamp': ts, 'foo': random.normalvariate(10, 1) }) # Add abnormal data generator = FlatEventGenerator(base=5, sigma=0.01) from_date = to_date - 20 * bucket_interval for i in [5, 6, 7, 17, 18, 19]: ano_from = from_date + i * bucket_interval ano_to = ano_from + 1 * bucket_interval for ts in generator.generate_ts(ano_from, ano_to, step_ms=600000): source.insert_times_data({ 'timestamp': ts, 'foo': random.normalvariate(10, 1) }) # Make prediction on buckets [0-20[ prediction = self.model.predict2( source, from_date, to_date, mse_rtol=0, # unused ) self.model.detect_anomalies(prediction) buckets = prediction.format_buckets() assert len(buckets) == 20 # import json # print(json.dumps(buckets, indent=4)) # prediction.plot('count_foo') # Buckets [0-4] are normal for i in range(0, 5): self.assertFalse(buckets[i]['stats']['anomaly']) # Bucket 5 is abnormal self.assertTrue(buckets[5]['stats']['anomaly']) # Bucket 6 is abnormal self.assertTrue(buckets[6]['stats']['anomaly']) # Bucket 7 is abnormal self.assertTrue(buckets[7]['stats']['anomaly']) # lag: 8 and 9 for cool down time # Buckets [8-16] are in grace period and expected to be normal for i in range(10, 17): self.assertFalse(buckets[i]['stats']['anomaly']) # Bucket 17 and 18 and 19 are abnormal self.assertTrue(buckets[17]['stats']['anomaly']) self.assertTrue(buckets[18]['stats']['anomaly']) self.assertTrue(buckets[19]['stats']['anomaly']) anomalies = prediction.get_anomalies() self.assertEqual( anomalies[0:3], [buckets[i] for i in [5, 6, 7]], ) self.assertEqual( anomalies[-3:], [buckets[i] for i in [17, 18, 19]], ) def test_thresholds(self): source = MemDataSource() to_date = datetime.datetime.now().replace( hour=0, minute=0, second=0, microsecond=0, tzinfo=datetime.timezone.utc, ).timestamp() # Generate 3 weeks days of data nb_days = 3 * 7 hist_to = to_date hist_from = to_date - 3600 * 24 * nb_days ts = hist_from value = 5 for i in range(nb_days): for j in range(0, 24): source.insert_times_data({ 'timestamp': ts, 'foo': value, }) ts += 3600 model = DonutModel(dict( name='test', offset=30, span=24*3, bucket_interval=3600, interval=60, features=[ { 'name': 'avg_foo', 'metric': 'avg', 'field': 'foo', 'default': 0, }, ], max_threshold=99.7, min_threshold=68, max_evals=5, )) model.train(source, hist_from, hist_to) self.assertTrue(model.is_trained) # Add an extra day ts = hist_to values = [] # Normal value on [00:00-06:00[ values += [value] * 6 # Increase on [06:00-12:00[ values += list(range(value, value + 6)) # Decrease on [12:00-18:00[ values += list(range(value + 6, value, -1)) # Normal value on [18:00-24:00[ values += [value] * 6 for value in values: source.insert_times_data({ 'timestamp': ts, 'foo': value, }) ts += 3600 prediction = model.predict(source, hist_to, ts) self.assertEqual(len(prediction.timestamps), 24) hook = TestHook(model.settings, self.storage) model.detect_anomalies(prediction, hooks=[hook]) self.assertEqual(len(hook.events), 2) event0, event1 = hook.events self.assertEqual(event0['type'], 'start') self.assertEqual(event1['type'], 'end') self.assertGreaterEqual( (event1['dt'] - event0['dt']).seconds, 6 * 3600, ) def test_thresholds2(self): source = MemDataSource() to_date = datetime.datetime.now().replace( hour=0, minute=0, second=0, microsecond=0, tzinfo=datetime.timezone.utc, ).timestamp() # Generate 3 weeks days of data nb_days = 3 * 7 hist_to = to_date hist_from = to_date - 3600 * 24 * nb_days ts = hist_from value = 5 for i in range(nb_days): for j in range(0, 24): source.insert_times_data({ 'timestamp': ts, 'foo': value + random.normalvariate(0, 1), }) ts += 3600 model = DonutModel(dict( name='test', offset=30, span=24*3, bucket_interval=3600, interval=60, features=[ { 'name': 'avg_foo', 'metric': 'avg', 'field': 'foo', 'default': 0, 'anomaly_type': 'low', }, ], max_threshold=99.7, min_threshold=68, max_evals=5, )) model.train(source, hist_from, hist_to) self.assertTrue(model.is_trained) # Add an extra day ts = hist_to values = [] # Normal value on [00:00-06:00[ values += [value] * 6 # Decrease on [06:00-12:00[ values += list(range(value, value - 6, -1)) # Increase on [12:00-18:00[ values += list(range(value - 6, value, 1)) # Normal value on [18:00-24:00[ values += [value] * 6 for value in values: source.insert_times_data({ 'timestamp': ts, 'foo': value, }) ts += 3600 prediction = model.predict(source, hist_to, ts) self.assertEqual(len(prediction.timestamps), 24) hook = TestHook(model.settings, self.storage) model.detect_anomalies(prediction, hooks=[hook]) buckets = prediction.format_buckets() # 68–95–99.7 rule self.assertEqual(buckets[7]['stats']['anomalies']['avg_foo']['type'], 'low') self.assertAlmostEqual(buckets[7]['stats']['anomalies']['avg_foo']['score'], 100, delta=35) self.assertEqual(buckets[8]['stats']['anomalies']['avg_foo']['type'], 'low') self.assertAlmostEqual(buckets[8]['stats']['anomalies']['avg_foo']['score'], 100, delta=5) self.assertEqual(buckets[9]['stats']['anomalies']['avg_foo']['type'], 'low') self.assertAlmostEqual(buckets[9]['stats']['anomalies']['avg_foo']['score'], 100, delta=2) self.assertEqual(len(hook.events), 2) event0, event1 = hook.events self.assertEqual(event0['type'], 'start') self.assertEqual(event1['type'], 'end') self.assertGreaterEqual( (event1['dt'] - event0['dt']).seconds, 6 * 3600, ) def test_low(self): source = MemDataSource() to_date = datetime.datetime.now().replace( hour=0, minute=0, second=0, microsecond=0, tzinfo=datetime.timezone.utc, ).timestamp() # Generate 1 week days of data nb_days = 7 hist_to = to_date hist_from = to_date - 3600 * 24 * nb_days ts = hist_from for i in range(nb_days): for j in range(0, 24): source.insert_times_data({ 'timestamp': ts, 'foo': random.randrange(45, 55), # 'bar': random.randrange(45, 55), # 'baz': random.randrange(45, 55), }) ts += 3600 model = DonutModel(dict( name='test', offset=30, span=24, bucket_interval=3600, interval=60, features=[ { 'name': 'avg_foo', 'metric': 'avg', 'field': 'foo', 'anomaly_type': 'low', }, ], max_threshold=99.7, min_threshold=65, max_evals=5, )) model.train(source, hist_from, hist_to) self.assertTrue(model.is_trained) ts = hist_to data = [ [20.0, 50.0, 80.0], [50.0, 80.0, 50.0], [50.0, 50.0, 20.0], ] for values in data: source.insert_times_data({ 'timestamp': ts, 'foo': values[0], # 'bar': values[1], # 'baz': values[2], }) ts += 3600 prediction = model.predict(source, hist_to, ts) self.assertEqual(len(prediction.timestamps), 3) model.detect_anomalies(prediction) buckets = prediction.format_buckets() anomalies = buckets[0]['stats']['anomalies'] self.assertEqual(len(anomalies), 1) self.assertEqual(anomalies['avg_foo']['type'], 'low') anomalies = buckets[1]['stats']['anomalies'] self.assertEqual(len(anomalies), 0) anomalies = buckets[2]['stats']['anomalies'] self.assertEqual(len(anomalies), 0) def test_high(self): source = MemDataSource() to_date = datetime.datetime.now().replace( hour=0, minute=0, second=0, microsecond=0, tzinfo=datetime.timezone.utc, ).timestamp() # Generate 1 week days of data nb_days = 7 hist_to = to_date hist_from = to_date - 3600 * 24 * nb_days ts = hist_from for i in range(nb_days): for j in range(0, 24): source.insert_times_data({ 'timestamp': ts, # 'foo': random.randrange(45, 55), 'bar': random.randrange(45, 55), # 'baz': random.randrange(45, 55), }) ts += 3600 model = DonutModel(dict( name='test', offset=30, span=24, bucket_interval=3600, interval=60, features=[ { 'name': 'avg_bar', 'metric': 'avg', 'field': 'bar', 'anomaly_type': 'high', }, ], max_threshold=99.7, min_threshold=65, max_evals=5, )) model.train(source, hist_from, hist_to) self.assertTrue(model.is_trained) ts = hist_to data = [ [20.0, 50.0, 80.0], [50.0, 80.0, 50.0], [50.0, 50.0, 20.0], ] for values in data: source.insert_times_data({ 'timestamp': ts, # 'foo': values[0], 'bar': values[1], # 'baz': values[2], }) ts += 3600 prediction = model.predict(source, hist_to, ts) self.assertEqual(len(prediction.timestamps), 3) model.detect_anomalies(prediction) buckets = prediction.format_buckets() anomalies = buckets[0]['stats']['anomalies'] self.assertEqual(len(anomalies), 0) anomalies = buckets[1]['stats']['anomalies'] self.assertEqual(len(anomalies), 1) self.assertEqual(anomalies['avg_bar']['type'], 'high') anomalies = buckets[2]['stats']['anomalies'] self.assertEqual(len(anomalies), 0) def test_low_high(self): source = MemDataSource() to_date = datetime.datetime.now().replace( hour=0, minute=0, second=0, microsecond=0, tzinfo=datetime.timezone.utc, ).timestamp() # Generate 1 week days of data nb_days = 7 hist_to = to_date hist_from = to_date - 3600 * 24 * nb_days ts = hist_from for i in range(nb_days): for j in range(0, 24): source.insert_times_data({ 'timestamp': ts, # 'foo': random.randrange(45, 55), # 'bar': random.randrange(45, 55), 'baz': random.randrange(45, 55), }) ts += 3600 model = DonutModel(dict( name='test', offset=30, span=24, bucket_interval=3600, interval=60, features=[ { 'name': 'avg_baz', 'metric': 'avg', 'field': 'baz', 'anomaly_type': 'low_high', }, ], max_threshold=99.7, min_threshold=65, max_evals=5, )) model.train(source, hist_from, hist_to) self.assertTrue(model.is_trained) ts = hist_to data = [ [20.0, 50.0, 80.0], [50.0, 80.0, 50.0], [50.0, 50.0, 20.0], ] for values in data: source.insert_times_data({ 'timestamp': ts, # 'foo': values[0], # 'bar': values[1], 'baz': values[2], }) ts += 3600 prediction = model.predict(source, hist_to, ts) self.assertEqual(len(prediction.timestamps), 3) model.detect_anomalies(prediction) buckets = prediction.format_buckets() anomalies = buckets[0]['stats']['anomalies'] self.assertEqual(len(anomalies), 1) self.assertEqual(anomalies['avg_baz']['type'], 'high') anomalies = buckets[1]['stats']['anomalies'] self.assertEqual(len(anomalies), 0) anomalies = buckets[2]['stats']['anomalies'] self.assertEqual(len(anomalies), 1) self.assertEqual(anomalies['avg_baz']['type'], 'low')
def test_train_predict(self): model = DonutModel( dict( name='test', offset=30, span=5, bucket_interval=60 * 60, interval=60, features=[ { 'name': 'count_foo', 'metric': 'count', 'collection': 'coll', 'field': 'foo', 'default': 0, }, { 'name': 'avg_foo', 'metric': 'avg', 'collection': 'coll', 'field': 'foo', 'default': 5, }, ], max_evals=1, )) generator = SinEventGenerator(base=3, sigma=0.05) to_date = datetime.datetime.now(datetime.timezone.utc).replace( hour=0, minute=0, second=0, microsecond=0, ).timestamp() from_date = to_date - 3600 * 24 for ts in generator.generate_ts(from_date, to_date, step_ms=60000): self.source.insert_times_data( collection="coll", ts=ts, data={'foo': random.lognormvariate(10, 1)}, ) self.source.commit() # Train model.train(self.source, from_date=from_date, to_date=to_date) # Check self.assertTrue(model.is_trained) # Predict pred_from = to_date - 3 * model.bucket_interval pred_to = to_date prediction = model.predict( datasource=self.source, from_date=pred_from, to_date=pred_to, ) self.source.save_timeseries_prediction(prediction, model) boundaries = list( range( int(pred_from), int(pred_to + model.bucket_interval), int(model.bucket_interval), )) res = self.source.db['prediction_test'].aggregate([{ '$bucket': { 'groupBy': '$timestamp', 'boundaries': boundaries, 'default': None, 'output': { 'count_foo': { '$avg': '$count_foo' }, 'avg_foo': { '$avg': '$avg_foo' }, } } }]) pred_buckets = prediction.format_buckets() for i, entry in enumerate(res): predicted = pred_buckets[i]['predicted'] self.assertEqual(predicted['count_foo'], entry['count_foo']) self.assertEqual(predicted['avg_foo'], entry['avg_foo'])
def setUp(self): this_day = int( datetime.datetime.now(tz=datetime.timezone.utc).replace( hour=0, minute=0, second=0, microsecond=0, ).timestamp()) self.database = 'test-times-%d' % this_day logging.info("creating database %s", self.database) self.source = InfluxDataSource({ 'name': 'test', 'type': 'influx', 'addr': ADDR, 'database': self.database, }) self.source.drop() self.source.init() # Sin wave. 600s period. generator = SinEventGenerator(base=50, amplitude=50, period=600, sigma=0.01) # Normal data in range 06-12 dt = datetime.datetime(2018, 8, 1, 6, 0) from_date = dt.replace(tzinfo=timezone.utc).timestamp() dt = datetime.datetime(2018, 8, 1, 12, 0) to_date = dt.replace(tzinfo=timezone.utc).timestamp() for ts, data in self.generate_data(generator, from_date, to_date, step_ms=1000, errors=0): self.source.insert_times_data( measurement='test_auto', ts=ts, data=data, ) # Random 20s drops range 12-13 dt = datetime.datetime(2018, 8, 1, 12, 0) from_date = dt.replace(tzinfo=timezone.utc).timestamp() dt = datetime.datetime(2018, 8, 1, 13, 0) to_date = dt.replace(tzinfo=timezone.utc).timestamp() for ts, data in self.generate_data(generator, from_date, to_date, step_ms=1000, errors=0.0001, burst_ms=20000): self.source.insert_times_data( measurement='test_auto', ts=ts, data=data, ) # Again normal data in range 13-14 dt = datetime.datetime(2018, 8, 1, 13, 0) from_date = dt.replace(tzinfo=timezone.utc).timestamp() dt = datetime.datetime(2018, 8, 1, 14, 0) to_date = dt.replace(tzinfo=timezone.utc).timestamp() for ts, data in self.generate_data(generator, from_date, to_date, step_ms=1000, errors=0): self.source.insert_times_data( measurement='test_auto', ts=ts, data=data, ) # Duplicate normal data in range 06-14 and measurement=normal dt = datetime.datetime(2018, 8, 1, 6, 0) from_date = dt.replace(tzinfo=timezone.utc).timestamp() dt = datetime.datetime(2018, 8, 1, 14, 0) to_date = dt.replace(tzinfo=timezone.utc).timestamp() for ts, data in self.generate_data(generator, from_date, to_date, step_ms=1000, errors=0): self.source.insert_times_data( measurement='normal', ts=ts, data=data, ) self.source.commit()
class AppTests(unittest.TestCase): @classmethod def setUpClass(cls): pass @classmethod def tearDownClass(cls): pass def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def setUp(self): self.app = app.test_client() self.app.testing = True self.bucket_interval = 20 * 60 t0 = int(datetime.datetime.now().timestamp()) t0 -= t0 % self.bucket_interval self.t0 = t0 self.db = 'test-{}'.format(t0) logging.info("creating database %s", self.db) self.source = InfluxDataSource({ 'name': 'nose', 'addr': ADDR, 'database': self.db, }) self.source.drop() self.source.init() self.generator = SinEventGenerator(base=3, amplitude=3, sigma=0.01) to_date = datetime.datetime.now().timestamp() # Be sure that date range is aligned self.to_date = math.floor( to_date / self.bucket_interval) * self.bucket_interval # noqa E501 self.from_date = self.to_date - 3600 * 24 * 7 * 3 for ts in self.generator.generate_ts(self.from_date, self.to_date, step_ms=600000): # noqa E501 self.source.insert_times_data(measurement='bar', ts=ts, data={ 'foo': random.normalvariate(10, 1), }) self.source.commit() self.dirpath = tempfile.mkdtemp() configyml = os.path.join(self.dirpath, 'config.yml') cfg = open(configyml, 'w') cfg.write(CONFIG.format(ADDR, self.db, self.dirpath)) cfg.close() g_app_init(configyml) def tearDown(self): g_app_stop() self.source.drop() shutil.rmtree(self.dirpath) def _wait_job(self, job_id): state = None while not state_is_done(state): time.sleep(5) result = self.app.get('/jobs/{}'.format(job_id)) res = json.loads(result.data.decode('utf-8')) print(res) state = res['state'] return state def _get_models(self): result = self.app.get('/models', ) self.assertEqual(result.status_code, 200) d = json.loads(result.data.decode('utf-8')) return d def _require_model(self): d = self._get_models() if len(d) > 0 and d[0]['name'] == 'test-model': return model = dict( name='test-model', default_datasource='nose', offset=30, span=24 * 3, bucket_interval=self.bucket_interval, interval=60, features=FEATURES, grace_period="140m", # = 7 points max_threshold=99.7, min_threshold=68, max_evals=1, ) model['type'] = 'donut' result = self.app.put( '/models', follow_redirects=True, content_type='application/json', data=json.dumps(model), ) self.assertEqual(result.status_code, 201) d = self._get_models() self.assertEqual(len(d), 1) self.assertEqual(d[0]['settings']['name'], 'test-model') return d[0] def _require_training(self): model = self._require_model() if model['state']['trained']: return result = self.app.post( '/models/{}/_train?from={}&to={}'.format( 'test-model', str(self.from_date), str(self.to_date), ), ) job_id = read_job_id(result) status = self._wait_job(job_id) self.assertEqual(status, 'done') def test_train(self): self._require_training() def test_home(self): result = self.app.get('/') self.assertEqual(result.status_code, 200) d = json.loads(result.data.decode('utf-8')) self.assertIsNotNone(d.get('host_id')) self.assertIsNotNone(d.get('version'))
def test_train_predict(self): model = DonutModel( dict( name='test', offset=30, span=5, bucket_interval=60 * 60, interval=60, features=[ { 'name': 'count_foo', 'metric': 'count', 'field': 'prefix.foo', 'default': 0, }, { 'name': 'avg_foo', 'metric': 'avg', 'field': 'prefix.foo', 'default': 5, }, ], max_evals=1, )) generator = SinEventGenerator(base=3, sigma=0.05) to_date = datetime.datetime.now(datetime.timezone.utc).replace( hour=0, minute=0, second=0, microsecond=0, ).timestamp() from_date = to_date - 3600 * 24 for ts in generator.generate_ts(from_date, to_date, step_ms=60000): self.source.insert_times_data( ts=ts, data={'prefix.foo': random.lognormvariate(10, 1)}, ) self.source.commit() # Train model.train(self.source, from_date=from_date, to_date=to_date) # Check self.assertTrue(model.is_trained) # Predict pred_from = to_date - 3 * model.bucket_interval pred_to = to_date prediction = model.predict( datasource=self.source, from_date=pred_from, to_date=pred_to, ) self.source.save_timeseries_prediction(prediction, model, tags=self.tag) # Fake model just for extracting saved prediction model2 = Model( dict( name='test-prediction', offset=30, span=5, bucket_interval=60 * 60, interval=60, features=[ { 'name': 'count_foo', 'metric': 'avg', 'field': "prediction.{}.count_foo".format(model.name), }, { 'name': 'avg_foo', 'metric': 'avg', 'field': "prediction.{}.avg_foo".format(model.name), }, ], max_evals=1, )) res = self.source.get_times_data( model2, pred_from, pred_to, tags=self.tag, ) for i, pred_ts in enumerate(prediction.timestamps): values, ts = res[i][1:] self.assertEqual(ts, pred_ts) np.testing.assert_allclose( np.array(values), prediction.predicted[i], )
class AppTests(unittest.TestCase): @classmethod def setUpClass(cls): pass @classmethod def tearDownClass(cls): pass def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._jwt = None def setUp(self): if 'LOUDML_ADDR' in os.environ: self.loudml_addr = os.environ['LOUDML_ADDR'] else: self.loudml_addr = 'localhost:8077' self.bucket_interval = 20 * 60 t0 = int(datetime.datetime.now().timestamp()) t0 -= t0 % self.bucket_interval self.t0 = t0 self.db = 'test-{}'.format(t0) logging.info("creating database %s", self.db) if 'INFLUXDB_ADDR' in os.environ: addr = os.environ['INFLUXDB_ADDR'] else: addr = 'localhost' self.source = InfluxDataSource({ 'name': 'nosetests', 'addr': addr, 'database': self.db, }) self.source.drop() self.source.init() self.generator = SinEventGenerator(base=3, amplitude=3, sigma=0.01) to_date = datetime.datetime.now().timestamp() # Be sure that date range is aligned self.to_date = math.floor(to_date / self.bucket_interval) * self.bucket_interval # noqa E501 self.from_date = self.to_date - 3600 * 24 * 7 * 3 for ts in self.generator.generate_ts(self.from_date, self.to_date, step_ms=600000): # noqa E501 self.source.insert_times_data( measurement='bar', ts=ts, data={ 'foo': random.normalvariate(10, 1), } ) self.source.commit() def tearDown(self): self.source.drop() def get(self, url, data=None, **kwargs): headers = kwargs.pop('headers', {}) if self._jwt: headers['Authorization'] = 'Bearer {}'.format(self._jwt) return requests.get( self.get_url(url), data=data, headers=headers, **kwargs ) def post(self, url, data=None, **kwargs): headers = kwargs.pop('headers', {}) if self._jwt: headers['Authorization'] = 'Bearer {}'.format(self._jwt) return requests.post( self.get_url(url), data=data, headers=headers, **kwargs ) def patch(self, url, data=None, **kwargs): headers = kwargs.pop('headers', {}) if self._jwt: headers['Authorization'] = 'Bearer {}'.format(self._jwt) return requests.patch( self.get_url(url), data=data, headers=headers, **kwargs ) def delete(self, url, data=None, **kwargs): headers = kwargs.pop('headers', {}) if self._jwt: headers['Authorization'] = 'Bearer {}'.format(self._jwt) return requests.delete( self.get_url(url), data=data, headers=headers, **kwargs ) def put(self, url, data=None, content_type=None, **kwargs): headers = kwargs.pop('headers', {}) if self._jwt: headers['Authorization'] = 'Bearer {}'.format(self._jwt) if content_type: headers['Content-Type'] = content_type return requests.put( self.get_url(url), data=data, headers=headers, **kwargs ) def get_url(self, url): if 'USE_SSL' in os.environ: scheme = 'https://' else: scheme = 'http://' return scheme + self.loudml_addr + url def _wait_job(self, job_id): state = None while not state_is_done(state): time.sleep(5) response = self.get( '/jobs/{}'.format(job_id) ) res = response.json() # print(res) state = res['state'] return state def _get_models(self): response = self.get( '/models', ) self.assertEqual(response.status_code, 200) return response.json() def _get_sources(self): response = self.get( '/datasources', ) self.assertEqual(response.status_code, 200) return response.json() def _require_source(self): sources = { source['name']: source for source in self._get_sources() } if self.db in sources: return if 'INFLUXDB_ADDR' in os.environ: addr = os.environ['INFLUXDB_ADDR'] else: addr = 'localhost:8086' source = { 'name': self.db, 'type': 'influxdb', 'addr': addr, 'database': self.db, 'create_database': 'true', 'retention_policy': 'autogen', 'max_series_per_request': 2000, } response = self.put( '/datasources', content_type='application/json', data=json.dumps(source), ) self.assertEqual(response.status_code, 201) sources = { source['name']: source for source in self._get_sources() } self.assertTrue(self.db in sources) def _del_model(self, model_name): response = self.delete( '/models/{}'.format(model_name), ) self.assertTrue(response.status_code in [200, 404]) def _require_model(self): self._require_source() models = { model['settings']['name']: model for model in self._get_models() } if 'test-model' in models: return models['test-model'] model = dict( name='test-model', default_datasource=self.db, offset=30, span=24 * 3, bucket_interval=self.bucket_interval, interval=60, features=FEATURES, grace_period="140m", # = 7 points max_threshold=99.7, min_threshold=68, max_evals=1, ) model['type'] = 'donut' response = self.put( '/models', content_type='application/json', data=json.dumps(model), ) self.assertEqual(response.status_code, 201) models = { model['settings']['name']: model for model in self._get_models() } self.assertTrue('test-model' in models) return models['test-model'] def _require_training(self): model = self._require_model() if model['state']['trained']: return response = self.post( '/models/{}/_train?from={}&to={}'.format( 'test-model', str(self.from_date), str(self.to_date), ), ) job_id = read_job_id(response.text) status = self._wait_job(job_id) self.assertEqual(status, 'done') def test_training(self): self._del_model('test-model') self._require_training() def test_home(self): response = self.get('/') self.assertEqual(response.status_code, 200) home = response.json() self.assertIsNotNone(home.get('host_id')) self.assertIsNotNone(home.get('version'))