def test_delete_old_measures(self): self.incoming.add_measures(self.metric.id, [ incoming.Measure(datetime64(2014, 1, 1, 12, 0, 1), 69), incoming.Measure(datetime64(2014, 1, 1, 12, 7, 31), 42), incoming.Measure(datetime64(2014, 1, 1, 12, 9, 31), 4), incoming.Measure(datetime64(2014, 1, 1, 12, 12, 45), 44), ]) self.trigger_processing() granularities = [ numpy.timedelta64(1, 'D'), numpy.timedelta64(1, 'h'), numpy.timedelta64(5, 'm'), ] self.assertEqual([ (datetime64(2014, 1, 1), numpy.timedelta64(1, 'D'), 39.75), (datetime64(2014, 1, 1, 12), numpy.timedelta64(1, 'h'), 39.75), (datetime64(2014, 1, 1, 12), numpy.timedelta64(5, 'm'), 69.0), (datetime64(2014, 1, 1, 12, 5), numpy.timedelta64(5, 'm'), 23.0), (datetime64(2014, 1, 1, 12, 10), numpy.timedelta64(5, 'm'), 44.0), ], self.storage.get_measures(self.metric, granularities)) # One year later… self.incoming.add_measures(self.metric.id, [ incoming.Measure(datetime64(2015, 1, 1, 12, 0, 1), 69), ]) self.trigger_processing() self.assertEqual([ (datetime64(2015, 1, 1), numpy.timedelta64(1, 'D'), 69), (datetime64(2015, 1, 1, 12), numpy.timedelta64(1, 'h'), 69), (datetime64(2015, 1, 1, 12), numpy.timedelta64(5, 'm'), 69), ], self.storage.get_measures(self.metric, granularities)) self.assertEqual( { carbonara.SplitKey(numpy.datetime64(1244160000, 's'), numpy.timedelta64(1, 'D')), }, self.storage._list_split_keys_for_metric(self.metric, "mean", numpy.timedelta64(1, 'D'))) self.assertEqual( { carbonara.SplitKey(numpy.datetime64(1412640000, 's'), numpy.timedelta64(1, 'h')), }, self.storage._list_split_keys_for_metric(self.metric, "mean", numpy.timedelta64(1, 'h'))) self.assertEqual( { carbonara.SplitKey(numpy.datetime64(1419120000, 's'), numpy.timedelta64(5, 'm')), }, self.storage._list_split_keys_for_metric(self.metric, "mean", numpy.timedelta64(5, 'm')))
def _list_split_keys(self, metric, aggregations, version=3): key = self._metric_key(metric) pipe = self._client.pipeline(transaction=False) pipe.exists(key) for aggregation in aggregations: self._scripts["list_split_keys"]( keys=[key], args=[self._aggregated_field_for_split( aggregation.method, "*", version, aggregation.granularity)], client=pipe, ) results = pipe.execute() metric_exists_p = results.pop(0) if not metric_exists_p: raise storage.MetricDoesNotExist(metric) keys = {} for aggregation, k in six.moves.zip(aggregations, results): if not k: keys[aggregation] = set() continue timestamps, methods, granularities = list(zip(*k)) timestamps = utils.to_timestamps(timestamps) granularities = map(utils.to_timespan, granularities) keys[aggregation] = { carbonara.SplitKey(timestamp, sampling=granularity) for timestamp, granularity in six.moves.zip(timestamps, granularities) } return keys
def _list_split_keys(self, metric, aggregations, version=3): keys = collections.defaultdict(set) for method, grouped_aggregations in itertools.groupby( sorted(aggregations, key=ATTRGETTER_METHOD), ATTRGETTER_METHOD): try: files = os.listdir(self._build_metric_path(metric, method)) except OSError as e: if e.errno == errno.ENOENT: raise storage.MetricDoesNotExist(metric) raise raw_keys = list( map(lambda k: k.split("_"), filter(lambda f: self._version_check(f, version), files))) if not raw_keys: continue zipped = list(zip(*raw_keys)) k_timestamps = utils.to_timestamps(zipped[0]) k_granularities = list(map(utils.to_timespan, zipped[1])) grouped_aggregations = list(grouped_aggregations) for timestamp, granularity in six.moves.zip( k_timestamps, k_granularities): for agg in grouped_aggregations: if granularity == agg.granularity: keys[agg].add( carbonara.SplitKey(timestamp, sampling=granularity)) break return keys
def _list_split_keys(self, metric, aggregations, version=3): container = self._container_name(metric) try: headers, files = self.swift.get_container(container, full_listing=True) except swclient.ClientException as e: if e.http_status == 404: raise storage.MetricDoesNotExist(metric) raise raw_keys = list( map(lambda k: k.split("_"), (f['name'] for f in files if self._version_check(f['name'], version) and not f['name'].startswith('none')))) keys = collections.defaultdict(set) if not raw_keys: return keys zipped = list(zip(*raw_keys)) k_timestamps = utils.to_timestamps(zipped[0]) k_methods = zipped[1] k_granularities = list(map(utils.to_timespan, zipped[2])) for timestamp, method, granularity in six.moves.zip( k_timestamps, k_methods, k_granularities): for aggregation in aggregations: if (aggregation.method == method and aggregation.granularity == granularity): keys[aggregation].add( carbonara.SplitKey(timestamp, sampling=granularity)) break return keys
def test_delete_old_measures(self): self.incoming.add_measures(self.metric, [ storage.Measure(utils.dt_to_unix_ns(2014, 1, 1, 12, 0, 1), 69), storage.Measure(utils.dt_to_unix_ns(2014, 1, 1, 12, 7, 31), 42), storage.Measure(utils.dt_to_unix_ns(2014, 1, 1, 12, 9, 31), 4), storage.Measure(utils.dt_to_unix_ns(2014, 1, 1, 12, 12, 45), 44), ]) self.trigger_processing() self.assertEqual([ (utils.datetime_utc(2014, 1, 1), 86400.0, 39.75), (utils.datetime_utc(2014, 1, 1, 12), 3600.0, 39.75), (utils.datetime_utc(2014, 1, 1, 12), 300.0, 69.0), (utils.datetime_utc(2014, 1, 1, 12, 5), 300.0, 23.0), (utils.datetime_utc(2014, 1, 1, 12, 10), 300.0, 44.0), ], self.storage.get_measures(self.metric)) # One year later… self.incoming.add_measures(self.metric, [ storage.Measure(utils.dt_to_unix_ns(2015, 1, 1, 12, 0, 1), 69), ]) self.trigger_processing() self.assertEqual([ (utils.datetime_utc(2014, 1, 1), 86400.0, 39.75), (utils.datetime_utc(2015, 1, 1), 86400.0, 69), (utils.datetime_utc(2015, 1, 1, 12), 3600.0, 69), (utils.datetime_utc(2015, 1, 1, 12), 300.0, 69), ], self.storage.get_measures(self.metric)) self.assertEqual({carbonara.SplitKey("1244160000.0", 86400)}, self.storage._list_split_keys_for_metric( self.metric, "mean", 86400.0)) self.assertEqual({carbonara.SplitKey("1412640000.0", 3600)}, self.storage._list_split_keys_for_metric( self.metric, "mean", 3600.0)) self.assertEqual({carbonara.SplitKey("1419120000.0", 300)}, self.storage._list_split_keys_for_metric( self.metric, "mean", 300.0))
def _list_split_keys(self, metrics_and_aggregations, version=3): pipe = self._client.pipeline(transaction=False) # Keep an ordered list of metrics metrics = list(metrics_and_aggregations.keys()) for metric in metrics: key = self._metric_key(metric) pipe.exists(key) aggregations = metrics_and_aggregations[metric] for aggregation in aggregations: self._scripts["list_split_keys"]( keys=[key], args=[ self._aggregated_field_for_split( aggregation.method, "*", version, aggregation.granularity) ], client=pipe, ) results = pipe.execute() keys = collections.defaultdict(dict) start = 0 for metric in metrics: metric_exists_p = results[start] if not metric_exists_p: raise storage.MetricDoesNotExist(metric) aggregations = metrics_and_aggregations[metric] number_of_aggregations = len(aggregations) keys_for_aggregations = results[start + 1:start + 1 + number_of_aggregations] start += 1 + number_of_aggregations # 1 for metric_exists_p for aggregation, k in six.moves.zip(aggregations, keys_for_aggregations): if not k: keys[metric][aggregation] = set() continue timestamps, methods, granularities = list(zip(*k)) timestamps = utils.to_timestamps(timestamps) granularities = map(utils.to_timespan, granularities) keys[metric][aggregation] = { carbonara.SplitKey(timestamp, sampling=granularity) for timestamp, granularity in six.moves.zip( timestamps, granularities) } return keys
def _list_split_keys(self, metric, aggregations, version=3): with rados.ReadOpCtx() as op: omaps, ret = self.ioctx.get_omap_vals(op, "", "", -1) try: self.ioctx.operate_read_op( op, self._build_unaggregated_timeserie_path(metric, 3)) except rados.ObjectNotFound: raise storage.MetricDoesNotExist(metric) # NOTE(sileht): after reading the libradospy, I'm # not sure that ret will have the correct value # get_omap_vals transforms the C int to python int # before operate_read_op is called, I dunno if the int # content is copied during this transformation or if # this is a pointer to the C int, I think it's copied... try: ceph.errno_to_exception(ret) except rados.ObjectNotFound: raise storage.MetricDoesNotExist(metric) raw_keys = [ name.split("_") for name, value in omaps if self._version_check(name, version) ] keys = collections.defaultdict(set) if not raw_keys: return keys zipped = list(zip(*raw_keys)) k_timestamps = utils.to_timestamps(zipped[2]) k_methods = zipped[3] k_granularities = list(map(utils.to_timespan, zipped[4])) for timestamp, method, granularity in six.moves.zip( k_timestamps, k_methods, k_granularities): for aggregation in aggregations: if (aggregation.method == method and aggregation.granularity == granularity): keys[aggregation].add( carbonara.SplitKey(timestamp, sampling=granularity)) break return keys
def test_split(self): sampling = numpy.timedelta64(5, 's') points = 100000 ts = carbonara.TimeSerie.from_data(timestamps=list( map(datetime.datetime.utcfromtimestamp, six.moves.range(points))), values=list( six.moves.range(points))) agg = self._resample(ts, sampling, 'mean') grouped_points = list(agg.split()) self.assertEqual( math.ceil((points / sampling.astype(float)) / carbonara.SplitKey.POINTS_PER_SPLIT), len(grouped_points)) self.assertEqual("0.0", str(carbonara.SplitKey(grouped_points[0][0], 0))) # 3600 × 5s = 5 hours self.assertEqual(datetime64(1970, 1, 1, 5), grouped_points[1][0]) self.assertEqual(carbonara.SplitKey.POINTS_PER_SPLIT, len(grouped_points[0][1]))
def _list_split_keys_unbatched(self, metric, aggregations, version=3): bucket = self._bucket_name keys = {} for aggregation in aggregations: keys[aggregation] = set() response = {} while response.get('IsTruncated', True): if 'NextContinuationToken' in response: kwargs = { 'ContinuationToken': response['NextContinuationToken'] } else: kwargs = {} response = self.s3.list_objects_v2( Bucket=bucket, Prefix=self._prefix(metric) + '%s_%s' % ( aggregation.method, utils.timespan_total_seconds(aggregation.granularity), ), **kwargs) # If response is empty then check that the metric exists contents = response.get('Contents', ()) if not contents and not self._metric_exists_p(metric, version): raise storage.MetricDoesNotExist(metric) for f in contents: try: if (self._version_check(f['Key'], version)): meta = f['Key'].split('_') keys[aggregation].add( carbonara.SplitKey( utils.to_timestamp(meta[2]), sampling=aggregation.granularity)) except (ValueError, IndexError): # Might be "none", or any other file. Be resilient. continue return keys
def test_rewrite_measures_corruption_bad_data(self): # Create an archive policy that spans on several splits. Each split # being 3600 points, let's go for 36k points so we have 10 splits. apname = str(uuid.uuid4()) ap = archive_policy.ArchivePolicy(apname, 0, [(36000, 60)]) self.index.create_archive_policy(ap) self.metric = storage.Metric(uuid.uuid4(), ap) self.index.create_metric(self.metric.id, str(uuid.uuid4()), apname) # First store some points scattered across different splits self.incoming.add_measures(self.metric, [ storage.Measure(utils.dt_to_unix_ns(2016, 1, 1, 12, 0, 1), 69), storage.Measure(utils.dt_to_unix_ns(2016, 1, 2, 13, 7, 31), 42), storage.Measure(utils.dt_to_unix_ns(2016, 1, 4, 14, 9, 31), 4), storage.Measure(utils.dt_to_unix_ns(2016, 1, 6, 15, 12, 45), 44), ]) self.trigger_processing() self.assertEqual({ carbonara.SplitKey(1451520000.0, 60), carbonara.SplitKey(1451736000.0, 60), carbonara.SplitKey(1451952000.0, 60), }, self.storage._list_split_keys_for_metric(self.metric, "mean", 60.0)) if self.storage.WRITE_FULL: assertCompressedIfWriteFull = self.assertTrue else: assertCompressedIfWriteFull = self.assertFalse data = self.storage._get_measures( self.metric, carbonara.SplitKey(1451520000.0, 60.0), "mean") self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data)) data = self.storage._get_measures( self.metric, carbonara.SplitKey(1451736000.0, 60.0), "mean") self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data)) data = self.storage._get_measures( self.metric, carbonara.SplitKey(1451952000.0, 60.0), "mean") assertCompressedIfWriteFull( carbonara.AggregatedTimeSerie.is_compressed(data)) self.assertEqual([ (utils.datetime_utc(2016, 1, 1, 12), 60.0, 69), (utils.datetime_utc(2016, 1, 2, 13, 7), 60.0, 42), (utils.datetime_utc(2016, 1, 4, 14, 9), 60.0, 4), (utils.datetime_utc(2016, 1, 6, 15, 12), 60.0, 44), ], self.storage.get_measures(self.metric, granularity=60.0)) # Test what happens if we write garbage self.storage._store_metric_measures( self.metric, carbonara.SplitKey(1451952000.0, 60.0), "mean", b"oh really?") # Now store brand new points that should force a rewrite of one of the # split (keep in mind the back window size in one hour here). We move # the BoundTimeSerie processing timeserie far away from its current # range. self.incoming.add_measures(self.metric, [ storage.Measure(utils.dt_to_unix_ns(2016, 1, 10, 16, 18, 45), 45), storage.Measure(utils.dt_to_unix_ns(2016, 1, 10, 17, 12, 45), 46), ]) self.trigger_processing()
def test_rewrite_measures_oldest_mutable_timestamp_eq_next_key(self): """See LP#1655422""" # Create an archive policy that spans on several splits. Each split # being 3600 points, let's go for 36k points so we have 10 splits. apname = str(uuid.uuid4()) ap = archive_policy.ArchivePolicy(apname, 0, [(36000, 60)]) self.index.create_archive_policy(ap) self.metric = storage.Metric(uuid.uuid4(), ap) self.index.create_metric(self.metric.id, str(uuid.uuid4()), apname) # First store some points scattered across different splits self.incoming.add_measures(self.metric, [ storage.Measure(utils.dt_to_unix_ns(2016, 1, 1, 12, 0, 1), 69), storage.Measure(utils.dt_to_unix_ns(2016, 1, 2, 13, 7, 31), 42), storage.Measure(utils.dt_to_unix_ns(2016, 1, 4, 14, 9, 31), 4), storage.Measure(utils.dt_to_unix_ns(2016, 1, 6, 15, 12, 45), 44), ]) self.trigger_processing() self.assertEqual({ carbonara.SplitKey(1451520000.0, 60), carbonara.SplitKey(1451736000.0, 60), carbonara.SplitKey(1451952000.0, 60), }, self.storage._list_split_keys_for_metric(self.metric, "mean", 60.0)) if self.storage.WRITE_FULL: assertCompressedIfWriteFull = self.assertTrue else: assertCompressedIfWriteFull = self.assertFalse data = self.storage._get_measures( self.metric, carbonara.SplitKey(1451520000.0, 60.0), "mean") self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data)) data = self.storage._get_measures( self.metric, carbonara.SplitKey(1451736000.0, 60.0), "mean") self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data)) data = self.storage._get_measures( self.metric, carbonara.SplitKey(1451952000.0, 60.0), "mean") assertCompressedIfWriteFull( carbonara.AggregatedTimeSerie.is_compressed(data)) self.assertEqual([ (utils.datetime_utc(2016, 1, 1, 12), 60.0, 69), (utils.datetime_utc(2016, 1, 2, 13, 7), 60.0, 42), (utils.datetime_utc(2016, 1, 4, 14, 9), 60.0, 4), (utils.datetime_utc(2016, 1, 6, 15, 12), 60.0, 44), ], self.storage.get_measures(self.metric, granularity=60.0)) # Now store brand new points that should force a rewrite of one of the # split (keep in mind the back window size in one hour here). We move # the BoundTimeSerie processing timeserie far away from its current # range. # Here we test a special case where the oldest_mutable_timestamp will # be 2016-01-10TOO:OO:OO = 1452384000.0, our new split key. self.incoming.add_measures(self.metric, [ storage.Measure(utils.dt_to_unix_ns(2016, 1, 10, 0, 12), 45), ]) self.trigger_processing() self.assertEqual({ carbonara.SplitKey(1452384000.0, 60), carbonara.SplitKey(1451736000.0, 60), carbonara.SplitKey(1451520000.0, 60), carbonara.SplitKey(1451952000.0, 60), }, self.storage._list_split_keys_for_metric(self.metric, "mean", 60.0)) data = self.storage._get_measures( self.metric, carbonara.SplitKey(1451520000.0, 60.0), "mean") self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data)) data = self.storage._get_measures( self.metric, carbonara.SplitKey(1451736000.0, 60.0), "mean") self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data)) data = self.storage._get_measures( self.metric, carbonara.SplitKey(1451952000.0, 60.0), "mean") # Now this one is compressed because it has been rewritten! self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data)) data = self.storage._get_measures( self.metric, carbonara.SplitKey(1452384000.0, 60.0), "mean") assertCompressedIfWriteFull( carbonara.AggregatedTimeSerie.is_compressed(data)) self.assertEqual([ (utils.datetime_utc(2016, 1, 1, 12), 60.0, 69), (utils.datetime_utc(2016, 1, 2, 13, 7), 60.0, 42), (utils.datetime_utc(2016, 1, 4, 14, 9), 60.0, 4), (utils.datetime_utc(2016, 1, 6, 15, 12), 60.0, 44), (utils.datetime_utc(2016, 1, 10, 0, 12), 60.0, 45), ], self.storage.get_measures(self.metric, granularity=60.0))
def _add_measures(self, aggregation, archive_policy_def, metric, grouped_serie, previous_oldest_mutable_timestamp, oldest_mutable_timestamp): ts = carbonara.AggregatedTimeSerie.from_grouped_serie( grouped_serie, archive_policy_def.granularity, aggregation, max_size=archive_policy_def.points) # Don't do anything if the timeserie is empty if not ts: return # We only need to check for rewrite if driver is not in WRITE_FULL mode # and if we already stored splits once need_rewrite = (not self.WRITE_FULL and previous_oldest_mutable_timestamp is not None) if archive_policy_def.timespan or need_rewrite: existing_keys = self._list_split_keys_for_metric( metric, aggregation, archive_policy_def.granularity) # First delete old splits if archive_policy_def.timespan: oldest_point_to_keep = ts.last - datetime.timedelta( seconds=archive_policy_def.timespan) oldest_key_to_keep = ts.get_split_key(oldest_point_to_keep) oldest_key_to_keep_s = str(oldest_key_to_keep) for key in list(existing_keys): # NOTE(jd) Only delete if the key is strictly inferior to # the timestamp; we don't delete any timeserie split that # contains our timestamp, so we prefer to keep a bit more # than deleting too much if key < oldest_key_to_keep_s: self._delete_metric_measures( metric, key, aggregation, archive_policy_def.granularity) existing_keys.remove(key) else: oldest_key_to_keep = carbonara.SplitKey(0, 0) # Rewrite all read-only splits just for fun (and compression). This # only happens if `previous_oldest_mutable_timestamp' exists, which # means we already wrote some splits at some point – so this is not the # first time we treat this timeserie. if need_rewrite: previous_oldest_mutable_key = str( ts.get_split_key(previous_oldest_mutable_timestamp)) oldest_mutable_key = str( ts.get_split_key(oldest_mutable_timestamp)) if previous_oldest_mutable_key != oldest_mutable_key: for key in existing_keys: if previous_oldest_mutable_key <= key < oldest_mutable_key: LOG.debug( "Compressing previous split %s (%s) for metric %s", key, aggregation, metric) # NOTE(jd) Rewrite it entirely for fun (and later for # compression). For that, we just pass None as split. self._store_timeserie_split( metric, carbonara.SplitKey(float(key), archive_policy_def.granularity), None, aggregation, archive_policy_def, oldest_mutable_timestamp) for key, split in ts.split(): if key >= oldest_key_to_keep: LOG.debug("Storing split %s (%s) for metric %s", key, aggregation, metric) self._store_timeserie_split(metric, key, split, aggregation, archive_policy_def, oldest_mutable_timestamp)
def test_rewrite_measures_corruption_missing_file(self): # Create an archive policy that spans on several splits. Each split # being 3600 points, let's go for 36k points so we have 10 splits. apname = str(uuid.uuid4()) ap = archive_policy.ArchivePolicy(apname, 0, [(36000, 60)]) self.index.create_archive_policy(ap) self.metric = indexer.Metric(uuid.uuid4(), ap) self.index.create_metric(self.metric.id, str(uuid.uuid4()), apname) # First store some points scattered across different splits self.incoming.add_measures(self.metric.id, [ incoming.Measure(datetime64(2016, 1, 1, 12, 0, 1), 69), incoming.Measure(datetime64(2016, 1, 2, 13, 7, 31), 42), incoming.Measure(datetime64(2016, 1, 4, 14, 9, 31), 4), incoming.Measure(datetime64(2016, 1, 6, 15, 12, 45), 44), ]) self.trigger_processing() self.assertEqual( { carbonara.SplitKey(numpy.datetime64('2015-12-31T00:00:00'), numpy.timedelta64(1, 'm')), carbonara.SplitKey(numpy.datetime64('2016-01-02T12:00:00'), numpy.timedelta64(1, 'm')), carbonara.SplitKey(numpy.datetime64('2016-01-05T00:00:00'), numpy.timedelta64(1, 'm')), }, self.storage._list_split_keys_for_metric(self.metric, "mean", numpy.timedelta64(1, 'm'))) if self.storage.WRITE_FULL: assertCompressedIfWriteFull = self.assertTrue else: assertCompressedIfWriteFull = self.assertFalse data = self.storage._get_measures(self.metric, [ carbonara.SplitKey( numpy.datetime64(1451520000, 's'), numpy.timedelta64(1, 'm'), ) ], "mean")[0] self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data)) data = self.storage._get_measures(self.metric, [ carbonara.SplitKey(numpy.datetime64(1451736000, 's'), numpy.timedelta64(1, 'm')) ], "mean")[0] self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data)) data = self.storage._get_measures(self.metric, [ carbonara.SplitKey( numpy.datetime64(1451952000, 's'), numpy.timedelta64(1, 'm'), ) ], "mean")[0] assertCompressedIfWriteFull( carbonara.AggregatedTimeSerie.is_compressed(data)) self.assertEqual([ (datetime64(2016, 1, 1, 12), numpy.timedelta64(1, 'm'), 69), (datetime64(2016, 1, 2, 13, 7), numpy.timedelta64(1, 'm'), 42), (datetime64(2016, 1, 4, 14, 9), numpy.timedelta64(1, 'm'), 4), (datetime64(2016, 1, 6, 15, 12), numpy.timedelta64(1, 'm'), 44), ], self.storage.get_measures( self.metric, granularities=[numpy.timedelta64(60, 's')])) # Test what happens if we delete the latest split and then need to # compress it! self.storage._delete_metric_measures( self.metric, carbonara.SplitKey( numpy.datetime64(1451952000, 's'), numpy.timedelta64(1, 'm'), ), 'mean') # Now store brand new points that should force a rewrite of one of the # split (keep in mind the back window size in one hour here). We move # the BoundTimeSerie processing timeserie far away from its current # range. self.incoming.add_measures(self.metric.id, [ incoming.Measure(datetime64(2016, 1, 10, 16, 18, 45), 45), incoming.Measure(datetime64(2016, 1, 10, 17, 12, 45), 46), ]) self.trigger_processing()
def test_rewrite_measures(self): # Create an archive policy that spans on several splits. Each split # being 3600 points, let's go for 36k points so we have 10 splits. apname = str(uuid.uuid4()) ap = archive_policy.ArchivePolicy(apname, 0, [(36000, 60)]) self.index.create_archive_policy(ap) self.metric = storage.Metric(uuid.uuid4(), ap) self.index.create_metric(self.metric.id, str(uuid.uuid4()), apname) # First store some points scattered across different splits self.incoming.add_measures(self.metric, [ storage.Measure(datetime64(2016, 1, 1, 12, 0, 1), 69), storage.Measure(datetime64(2016, 1, 2, 13, 7, 31), 42), storage.Measure(datetime64(2016, 1, 4, 14, 9, 31), 4), storage.Measure(datetime64(2016, 1, 6, 15, 12, 45), 44), ]) self.trigger_processing() self.assertEqual( { carbonara.SplitKey(numpy.datetime64(1451520000, 's'), numpy.timedelta64(1, 'm')), carbonara.SplitKey(numpy.datetime64(1451736000, 's'), numpy.timedelta64(1, 'm')), carbonara.SplitKey(numpy.datetime64(1451952000, 's'), numpy.timedelta64(1, 'm')), }, self.storage._list_split_keys_for_metric(self.metric, "mean", numpy.timedelta64(1, 'm'))) if self.storage.WRITE_FULL: assertCompressedIfWriteFull = self.assertTrue else: assertCompressedIfWriteFull = self.assertFalse data = self.storage._get_measures( self.metric, carbonara.SplitKey( numpy.datetime64(1451520000, 's'), numpy.timedelta64(1, 'm'), ), "mean") self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data)) data = self.storage._get_measures( self.metric, carbonara.SplitKey( numpy.datetime64(1451736000, 's'), numpy.timedelta64(60, 's'), ), "mean") self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data)) data = self.storage._get_measures( self.metric, carbonara.SplitKey( numpy.datetime64(1451952000, 's'), numpy.timedelta64(60, 's'), ), "mean") assertCompressedIfWriteFull( carbonara.AggregatedTimeSerie.is_compressed(data)) self.assertEqual([ (datetime64(2016, 1, 1, 12), numpy.timedelta64(1, 'm'), 69), (datetime64(2016, 1, 2, 13, 7), numpy.timedelta64(1, 'm'), 42), (datetime64(2016, 1, 4, 14, 9), numpy.timedelta64(1, 'm'), 4), (datetime64(2016, 1, 6, 15, 12), numpy.timedelta64(1, 'm'), 44), ], self.storage.get_measures( self.metric, granularity=numpy.timedelta64(1, 'm'))) # Now store brand new points that should force a rewrite of one of the # split (keep in mind the back window size in one hour here). We move # the BoundTimeSerie processing timeserie far away from its current # range. self.incoming.add_measures(self.metric, [ storage.Measure(datetime64(2016, 1, 10, 16, 18, 45), 45), storage.Measure(datetime64(2016, 1, 10, 17, 12, 45), 46), ]) self.trigger_processing() self.assertEqual( { carbonara.SplitKey(numpy.datetime64(1452384000, 's'), numpy.timedelta64(1, 'm')), carbonara.SplitKey(numpy.datetime64(1451736000, 's'), numpy.timedelta64(1, 'm')), carbonara.SplitKey(numpy.datetime64(1451520000, 's'), numpy.timedelta64(1, 'm')), carbonara.SplitKey(numpy.datetime64(1451952000, 's'), numpy.timedelta64(1, 'm')), }, self.storage._list_split_keys_for_metric(self.metric, "mean", numpy.timedelta64(1, 'm'))) data = self.storage._get_measures( self.metric, carbonara.SplitKey( numpy.datetime64(1451520000, 's'), numpy.timedelta64(60, 's'), ), "mean") self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data)) data = self.storage._get_measures( self.metric, carbonara.SplitKey( numpy.datetime64(1451736000, 's'), numpy.timedelta64(60, 's'), ), "mean") self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data)) data = self.storage._get_measures( self.metric, carbonara.SplitKey( numpy.datetime64(1451952000, 's'), numpy.timedelta64(1, 'm'), ), "mean") # Now this one is compressed because it has been rewritten! self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data)) data = self.storage._get_measures( self.metric, carbonara.SplitKey( numpy.datetime64(1452384000, 's'), numpy.timedelta64(60, 's'), ), "mean") assertCompressedIfWriteFull( carbonara.AggregatedTimeSerie.is_compressed(data)) self.assertEqual([ (datetime64(2016, 1, 1, 12), numpy.timedelta64(1, 'm'), 69), (datetime64(2016, 1, 2, 13, 7), numpy.timedelta64(1, 'm'), 42), (datetime64(2016, 1, 4, 14, 9), numpy.timedelta64(1, 'm'), 4), (datetime64(2016, 1, 6, 15, 12), numpy.timedelta64(1, 'm'), 44), (datetime64(2016, 1, 10, 16, 18), numpy.timedelta64(1, 'm'), 45), (datetime64(2016, 1, 10, 17, 12), numpy.timedelta64(1, 'm'), 46), ], self.storage.get_measures( self.metric, granularity=numpy.timedelta64(1, 'm')))