def insert_annotation( self, dt, desc, _type, _id, measurement='annotations', tags=None, ): ts = make_ts(dt.timestamp()) data = { 'deleted': False, 'modified_time_ns': ts_to_ns(ts), 'start_time': ts_to_ns(ts), 'text': desc, 'id': _id, } # tag type easier to view annotations using TICK 1.7.x _tags = { 'type': _type, } if tags is not None: _tags.update(tags) points = [{ 'measurement': measurement, 'time': ts_to_ns(ts), 'fields': data, 'tags': _tags, }] self.annotationdb.write_points(points) return points
def insert_times_data(self, ts, data, measurement='generic', tags=None, timestamp_field=None, *args, **kwargs): """ Insert data """ ts = make_ts(ts) # suppress None (nothing to save) data = {k: v for k, v in data.items() if v is not None} entry = { 'measurement': measurement, 'time': ts_to_ns(ts), 'fields': data, } if tags: entry['tags'] = tags if len(data) > 0: self.enqueue(entry)
def test_get_times_data2(self): res = self.source.get_times_data( self.model, from_date=self.t0, to_date=self.t0 + 8, ) # _source to write aggregate data to RAM _source = MemDataSource() _features = copy.deepcopy(self.model.features) for _, feature in enumerate(self.model.features): feature.metric = 'avg' i = None for i, (_, val, timeval) in enumerate(res): bucket = { feature.field: val[i] for i, feature in enumerate(self.model.features) } bucket.update({'timestamp': make_ts(timeval)}) _source.insert_times_data(bucket) res2 = _source.get_times_data( self.model, from_date=self.t0, to_date=self.t0 + 8, ) self.model.features = _features for i, (_, val2, timeval2) in enumerate(res2): (_, val, timeval) = res[i] np.testing.assert_allclose(val, val2)
def read_from_bucket( self, bucket_name, from_date, to_date, bucket_interval, features, ): """ Run query in the bucket TSDB and return data """ bucket_settings = self.config.get_bucket(bucket_name) bucket = loudml.bucket.load_bucket(bucket_settings) data = bucket.get_times_data( bucket_interval=bucket_interval, features=features, from_date=from_date, to_date=to_date, ) timestamps = [] obs = { feature.name: [] for feature in features } for (_, values, timeval) in data: timestamps.append(make_ts(timeval)) for (feature, val) in zip(features, values): obs[feature.name].append(float(val)) return { 'timestamps': timestamps, 'observed': obs, }
def insert_times_data(self, ts, data, tags=None, index=None, doc_type=None, doc_id=None, timestamp_field='timestamp', *args, **kwargs): """ Insert time-indexed entry """ ts = make_ts(ts) data[timestamp_field] = ts_to_ms(ts) if tags is not None: for tag, tag_val in tags.items(): data[tag] = tag_val self.insert_data( data, index=index, doc_type=doc_type or self.doc_type, doc_id=doc_id, timestamp=int(ts), )
def _build_times_queries( self, bucket_interval, features, from_date=None, to_date=None, ): """ Build queries according to requested features """ queries = [] for feature in features: queries.append({ "start": int(make_ts(from_date)), "end": int(make_ts(to_date)), "aggregator": feature.metric, "step": int(bucket_interval), "metric_name": feature.field, "tags": _build_tags_predicates(feature.match_all) }) return queries
def build_url_params(self, q): """ Forms a query URL params from bits. TODO: add better aggregator functions handling """ params = { 'start': int(make_ts(q['start'])), 'end': int(make_ts(q['end'])), 'step': q["step"], 'timeout': DEFAULT_REQUEST_TIMEOUT } aggregator = AGGREGATORS.get(q["aggregator"]) if aggregator: params['query'] = aggregator.format(q["metric_name"], q["tags"]) else: logging.warning('Unsupported aggregation operator.' 'Please submit a ticket on GitHub :)') params['query'] = "{}{}".format(q["metric_name"], q["tags"]) return params
def write_to_bucket( self, bucket_name, points, **kwargs ): """ Writes data points to the bucket TSDB """ bucket_settings = self.config.get_bucket(bucket_name) bucket = loudml.bucket.load_bucket(bucket_settings) fields = [ list(point.keys()) for point in points ] flat_fields = [ field for sub_fields in fields for field in sub_fields ] fields = set(flat_fields) - set(['timestamp', 'tags']) tags = [ list(point['tags'].keys()) for point in points if 'tags' in point ] flat_tags = [ tag for sub_tags in tags for tag in sub_tags ] tags = set(flat_tags) data_schema = {} data_schema.update({ tag: {"type": "keyword"} for tag in tags }) data_schema.update({ field: {"type": "float"} for field in fields }) bucket.init(data_schema=data_schema) for point in points: ts = make_ts(point.pop('timestamp')) tags = point.pop('tags', None) bucket.insert_times_data( ts=ts, data=point, tags=tags, **kwargs ) bucket.commit()
def update_annotation( self, dt, points, ): ts = make_ts(dt.timestamp()) points[0]['fields']['deleted'] = True self.annotationdb.write_points(points) points[0]['time'] = ts_to_ns(ts) points[0]['fields']['deleted'] = False self.annotationdb.write_points(points) return points
def test_datetime(self): expected = datetime.datetime( year=2018, month=1, day=8, hour=9, minute=39, second=26, microsecond=123000, tzinfo=datetime.timezone.utc, ) self.assertEqual( make_datetime(1515404366.123), expected, ) self.assertEqual( make_datetime("2018-01-08T09:39:26.123Z"), expected, ) self.assertEqual( make_ts(1515404366.123), 1515404366.123, ) self.assertEqual( make_ts("2018-01-08T09:39:26.123Z"), 1515404366.123, ) self.assertEqual( ts_to_str(1515404366.123), "2018-01-08T09:39:26.123Z", ) self.assertEqual( str_to_ts("2018-01-08T09:39:26.123Z"), 1515404366.123, ) with self.assertRaises(ValueError): make_datetime(253536624000.0)
def _build_times_queries( self, bucket_interval, features, from_date=None, to_date=None, ): """ Build queries according to requested features Notes: * OpenTSDB requires int timestamps * tags is required param http://opentsdb.net/docs/build/html/api_http/put.html """ start = floor(make_ts(from_date), bucket_interval) end = floor(make_ts(to_date), bucket_interval) - bucket_interval queries = [] for feature in features: queries.append({ "start": int(start), "end": int(end), "metric": feature.metric, "down_sampler": "{}s-{}-nan".format(int(bucket_interval), DOWNSAMPLE.get(feature.metric, 'avg')), "field": feature.field, "tags": _build_tags_predicates(feature.match_all) }) return queries
def insert_times_data(self, ts, data, measurement=None, tags={}, *args, **kwargs): """ Insert data """ ts = int(make_ts(ts)) filtered = filter(lambda item: item[1] is not None, data.items()) for k, v in filtered: self.enqueue({ 'metric': k, 'timestamp': ts, 'value': v, 'tags': tags, })
def insert_times_data(self, ts, data, measurement=None, tags={}, sync=False, *args, **kwargs): """ Insert data """ filtered = filter(lambda item: item[1] is not None, data.items()) tags = tags.copy() tags['loudml'] = self.global_tag millis = int(make_ts(ts) * 1e3) for k, v in filtered: self.enqueue({ 'metric': k, 'timestamp': millis, 'value': v, 'tags': tags, })
def test_forecast(self): model = DonutModel(dict( name='test', offset=30, span=100, forecast=1, bucket_interval=20 * 60, interval=60, features=[ FEATURE_COUNT_FOO, ], max_evals=21, )) source = MemDataSource() generator = SinEventGenerator(base=3, amplitude=3, sigma=0.01) # Align date range to day interval to_date = make_ts('1970-12-01T00:00:00.000Z') to_date = math.floor(to_date / (3600*24)) * (3600*24) from_date = to_date - 3600 * 24 * 7 * 3 for ts in generator.generate_ts(from_date, to_date, step_ms=600000): source.insert_times_data({ 'timestamp': ts, 'foo': random.normalvariate(10, 1) }) model.train(source, from_date, to_date) prediction = model.predict(source, from_date, to_date) from_date = to_date to_date = from_date + 48 * 3600 forecast = model.forecast(source, from_date, to_date) expected = math.ceil( (to_date - from_date) / model.bucket_interval ) self.assertEqual(len(forecast.timestamps), expected) self.assertEqual(forecast.observed.shape, (expected,)) self.assertEqual(forecast.predicted.shape, (expected,)) all_default = np.full( (expected,), model.features[0].default, dtype=float, ) np.testing.assert_allclose( forecast.observed, all_default, ) forecast_head = np.array([0.35, 0.67, 0.73, 0.70, 1.35]) forecast_tail = np.array([-0.09, -0.02, -0.05, 0.06, 0.08]) # print(forecast.predicted) delta = 1.0 forecast_good = np.abs(forecast.predicted[:len(forecast_head)] - forecast_head) <= delta # print(forecast_head) # print(forecast.predicted[:len(forecast_head)]) # print(forecast_good) self.assertEqual(np.all(forecast_good), True) forecast_good = np.abs(forecast.predicted[-len(forecast_tail):] - forecast_tail) <= delta # print(forecast_tail) # print(forecast.predicted[-len(forecast_tail):]) # print(forecast_good) self.assertEqual(np.all(forecast_good), True)
def make_ts_ns(mixed): """ Build a nanosecond timestamp from a mixed input (second timestamp or string) """ return ts_to_ns(make_ts(mixed))
def make_ts_ms(mixed): """ Build a millisecond timestamp from a mixed input (second timestamp or string) """ return ts_to_ms(make_ts(mixed))