Esempio n. 1
0
    def test_delete_old_measures(self):
        self.incoming.add_measures(self.metric.id, [
            incoming.Measure(datetime64(2014, 1, 1, 12, 0, 1), 69),
            incoming.Measure(datetime64(2014, 1, 1, 12, 7, 31), 42),
            incoming.Measure(datetime64(2014, 1, 1, 12, 9, 31), 4),
            incoming.Measure(datetime64(2014, 1, 1, 12, 12, 45), 44),
        ])
        self.trigger_processing()

        granularities = [
            numpy.timedelta64(1, 'D'),
            numpy.timedelta64(1, 'h'),
            numpy.timedelta64(5, 'm'),
        ]

        self.assertEqual([
            (datetime64(2014, 1, 1), numpy.timedelta64(1, 'D'), 39.75),
            (datetime64(2014, 1, 1, 12), numpy.timedelta64(1, 'h'), 39.75),
            (datetime64(2014, 1, 1, 12), numpy.timedelta64(5, 'm'), 69.0),
            (datetime64(2014, 1, 1, 12, 5), numpy.timedelta64(5, 'm'), 23.0),
            (datetime64(2014, 1, 1, 12, 10), numpy.timedelta64(5, 'm'), 44.0),
        ], self.storage.get_measures(self.metric, granularities))

        # One year later…
        self.incoming.add_measures(self.metric.id, [
            incoming.Measure(datetime64(2015, 1, 1, 12, 0, 1), 69),
        ])
        self.trigger_processing()

        self.assertEqual([
            (datetime64(2015, 1, 1), numpy.timedelta64(1, 'D'), 69),
            (datetime64(2015, 1, 1, 12), numpy.timedelta64(1, 'h'), 69),
            (datetime64(2015, 1, 1, 12), numpy.timedelta64(5, 'm'), 69),
        ], self.storage.get_measures(self.metric, granularities))

        self.assertEqual(
            {
                carbonara.SplitKey(numpy.datetime64(1244160000, 's'),
                                   numpy.timedelta64(1, 'D')),
            },
            self.storage._list_split_keys_for_metric(self.metric, "mean",
                                                     numpy.timedelta64(1,
                                                                       'D')))
        self.assertEqual(
            {
                carbonara.SplitKey(numpy.datetime64(1412640000, 's'),
                                   numpy.timedelta64(1, 'h')),
            },
            self.storage._list_split_keys_for_metric(self.metric, "mean",
                                                     numpy.timedelta64(1,
                                                                       'h')))
        self.assertEqual(
            {
                carbonara.SplitKey(numpy.datetime64(1419120000, 's'),
                                   numpy.timedelta64(5, 'm')),
            },
            self.storage._list_split_keys_for_metric(self.metric, "mean",
                                                     numpy.timedelta64(5,
                                                                       'm')))
Esempio n. 2
0
 def _list_split_keys(self, metric, aggregations, version=3):
     key = self._metric_key(metric)
     pipe = self._client.pipeline(transaction=False)
     pipe.exists(key)
     for aggregation in aggregations:
         self._scripts["list_split_keys"](
             keys=[key], args=[self._aggregated_field_for_split(
                 aggregation.method, "*",
                 version, aggregation.granularity)],
             client=pipe,
         )
     results = pipe.execute()
     metric_exists_p = results.pop(0)
     if not metric_exists_p:
         raise storage.MetricDoesNotExist(metric)
     keys = {}
     for aggregation, k in six.moves.zip(aggregations, results):
         if not k:
             keys[aggregation] = set()
             continue
         timestamps, methods, granularities = list(zip(*k))
         timestamps = utils.to_timestamps(timestamps)
         granularities = map(utils.to_timespan, granularities)
         keys[aggregation] = {
             carbonara.SplitKey(timestamp,
                                sampling=granularity)
             for timestamp, granularity
             in six.moves.zip(timestamps, granularities)
         }
     return keys
Esempio n. 3
0
 def _list_split_keys(self, metric, aggregations, version=3):
     keys = collections.defaultdict(set)
     for method, grouped_aggregations in itertools.groupby(
             sorted(aggregations, key=ATTRGETTER_METHOD),
             ATTRGETTER_METHOD):
         try:
             files = os.listdir(self._build_metric_path(metric, method))
         except OSError as e:
             if e.errno == errno.ENOENT:
                 raise storage.MetricDoesNotExist(metric)
             raise
         raw_keys = list(
             map(lambda k: k.split("_"),
                 filter(lambda f: self._version_check(f, version), files)))
         if not raw_keys:
             continue
         zipped = list(zip(*raw_keys))
         k_timestamps = utils.to_timestamps(zipped[0])
         k_granularities = list(map(utils.to_timespan, zipped[1]))
         grouped_aggregations = list(grouped_aggregations)
         for timestamp, granularity in six.moves.zip(
                 k_timestamps, k_granularities):
             for agg in grouped_aggregations:
                 if granularity == agg.granularity:
                     keys[agg].add(
                         carbonara.SplitKey(timestamp,
                                            sampling=granularity))
                     break
     return keys
Esempio n. 4
0
    def _list_split_keys(self, metric, aggregations, version=3):
        container = self._container_name(metric)
        try:
            headers, files = self.swift.get_container(container,
                                                      full_listing=True)
        except swclient.ClientException as e:
            if e.http_status == 404:
                raise storage.MetricDoesNotExist(metric)
            raise

        raw_keys = list(
            map(lambda k: k.split("_"),
                (f['name']
                 for f in files if self._version_check(f['name'], version)
                 and not f['name'].startswith('none'))))
        keys = collections.defaultdict(set)
        if not raw_keys:
            return keys
        zipped = list(zip(*raw_keys))
        k_timestamps = utils.to_timestamps(zipped[0])
        k_methods = zipped[1]
        k_granularities = list(map(utils.to_timespan, zipped[2]))

        for timestamp, method, granularity in six.moves.zip(
                k_timestamps, k_methods, k_granularities):
            for aggregation in aggregations:
                if (aggregation.method == method
                        and aggregation.granularity == granularity):
                    keys[aggregation].add(
                        carbonara.SplitKey(timestamp, sampling=granularity))
                    break
        return keys
Esempio n. 5
0
    def test_delete_old_measures(self):
        self.incoming.add_measures(self.metric, [
            storage.Measure(utils.dt_to_unix_ns(2014, 1, 1, 12, 0, 1), 69),
            storage.Measure(utils.dt_to_unix_ns(2014, 1, 1, 12, 7, 31), 42),
            storage.Measure(utils.dt_to_unix_ns(2014, 1, 1, 12, 9, 31), 4),
            storage.Measure(utils.dt_to_unix_ns(2014, 1, 1, 12, 12, 45), 44),
        ])
        self.trigger_processing()

        self.assertEqual([
            (utils.datetime_utc(2014, 1, 1), 86400.0, 39.75),
            (utils.datetime_utc(2014, 1, 1, 12), 3600.0, 39.75),
            (utils.datetime_utc(2014, 1, 1, 12), 300.0, 69.0),
            (utils.datetime_utc(2014, 1, 1, 12, 5), 300.0, 23.0),
            (utils.datetime_utc(2014, 1, 1, 12, 10), 300.0, 44.0),
        ], self.storage.get_measures(self.metric))

        # One year later…
        self.incoming.add_measures(self.metric, [
            storage.Measure(utils.dt_to_unix_ns(2015, 1, 1, 12, 0, 1), 69),
        ])
        self.trigger_processing()

        self.assertEqual([
            (utils.datetime_utc(2014, 1, 1), 86400.0, 39.75),
            (utils.datetime_utc(2015, 1, 1), 86400.0, 69),
            (utils.datetime_utc(2015, 1, 1, 12), 3600.0, 69),
            (utils.datetime_utc(2015, 1, 1, 12), 300.0, 69),
        ], self.storage.get_measures(self.metric))

        self.assertEqual({carbonara.SplitKey("1244160000.0", 86400)},
                         self.storage._list_split_keys_for_metric(
                             self.metric, "mean", 86400.0))
        self.assertEqual({carbonara.SplitKey("1412640000.0", 3600)},
                         self.storage._list_split_keys_for_metric(
                             self.metric, "mean", 3600.0))
        self.assertEqual({carbonara.SplitKey("1419120000.0", 300)},
                         self.storage._list_split_keys_for_metric(
                             self.metric, "mean", 300.0))
Esempio n. 6
0
 def _list_split_keys(self, metrics_and_aggregations, version=3):
     pipe = self._client.pipeline(transaction=False)
     # Keep an ordered list of metrics
     metrics = list(metrics_and_aggregations.keys())
     for metric in metrics:
         key = self._metric_key(metric)
         pipe.exists(key)
         aggregations = metrics_and_aggregations[metric]
         for aggregation in aggregations:
             self._scripts["list_split_keys"](
                 keys=[key],
                 args=[
                     self._aggregated_field_for_split(
                         aggregation.method, "*", version,
                         aggregation.granularity)
                 ],
                 client=pipe,
             )
     results = pipe.execute()
     keys = collections.defaultdict(dict)
     start = 0
     for metric in metrics:
         metric_exists_p = results[start]
         if not metric_exists_p:
             raise storage.MetricDoesNotExist(metric)
         aggregations = metrics_and_aggregations[metric]
         number_of_aggregations = len(aggregations)
         keys_for_aggregations = results[start + 1:start + 1 +
                                         number_of_aggregations]
         start += 1 + number_of_aggregations  # 1 for metric_exists_p
         for aggregation, k in six.moves.zip(aggregations,
                                             keys_for_aggregations):
             if not k:
                 keys[metric][aggregation] = set()
                 continue
             timestamps, methods, granularities = list(zip(*k))
             timestamps = utils.to_timestamps(timestamps)
             granularities = map(utils.to_timespan, granularities)
             keys[metric][aggregation] = {
                 carbonara.SplitKey(timestamp, sampling=granularity)
                 for timestamp, granularity in six.moves.zip(
                     timestamps, granularities)
             }
     return keys
Esempio n. 7
0
    def _list_split_keys(self, metric, aggregations, version=3):
        with rados.ReadOpCtx() as op:
            omaps, ret = self.ioctx.get_omap_vals(op, "", "", -1)
            try:
                self.ioctx.operate_read_op(
                    op, self._build_unaggregated_timeserie_path(metric, 3))
            except rados.ObjectNotFound:
                raise storage.MetricDoesNotExist(metric)

            # NOTE(sileht): after reading the libradospy, I'm
            # not sure that ret will have the correct value
            # get_omap_vals transforms the C int to python int
            # before operate_read_op is called, I dunno if the int
            # content is copied during this transformation or if
            # this is a pointer to the C int, I think it's copied...
            try:
                ceph.errno_to_exception(ret)
            except rados.ObjectNotFound:
                raise storage.MetricDoesNotExist(metric)

            raw_keys = [
                name.split("_") for name, value in omaps
                if self._version_check(name, version)
            ]
            keys = collections.defaultdict(set)
            if not raw_keys:
                return keys
            zipped = list(zip(*raw_keys))
            k_timestamps = utils.to_timestamps(zipped[2])
            k_methods = zipped[3]
            k_granularities = list(map(utils.to_timespan, zipped[4]))

            for timestamp, method, granularity in six.moves.zip(
                    k_timestamps, k_methods, k_granularities):
                for aggregation in aggregations:
                    if (aggregation.method == method
                            and aggregation.granularity == granularity):
                        keys[aggregation].add(
                            carbonara.SplitKey(timestamp,
                                               sampling=granularity))
                        break
            return keys
Esempio n. 8
0
    def test_split(self):
        sampling = numpy.timedelta64(5, 's')
        points = 100000
        ts = carbonara.TimeSerie.from_data(timestamps=list(
            map(datetime.datetime.utcfromtimestamp, six.moves.range(points))),
                                           values=list(
                                               six.moves.range(points)))
        agg = self._resample(ts, sampling, 'mean')

        grouped_points = list(agg.split())

        self.assertEqual(
            math.ceil((points / sampling.astype(float)) /
                      carbonara.SplitKey.POINTS_PER_SPLIT),
            len(grouped_points))
        self.assertEqual("0.0", str(carbonara.SplitKey(grouped_points[0][0],
                                                       0)))
        # 3600 × 5s = 5 hours
        self.assertEqual(datetime64(1970, 1, 1, 5), grouped_points[1][0])
        self.assertEqual(carbonara.SplitKey.POINTS_PER_SPLIT,
                         len(grouped_points[0][1]))
Esempio n. 9
0
 def _list_split_keys_unbatched(self, metric, aggregations, version=3):
     bucket = self._bucket_name
     keys = {}
     for aggregation in aggregations:
         keys[aggregation] = set()
         response = {}
         while response.get('IsTruncated', True):
             if 'NextContinuationToken' in response:
                 kwargs = {
                     'ContinuationToken': response['NextContinuationToken']
                 }
             else:
                 kwargs = {}
             response = self.s3.list_objects_v2(
                 Bucket=bucket,
                 Prefix=self._prefix(metric) + '%s_%s' % (
                     aggregation.method,
                     utils.timespan_total_seconds(aggregation.granularity),
                 ),
                 **kwargs)
             # If response is empty then check that the metric exists
             contents = response.get('Contents', ())
             if not contents and not self._metric_exists_p(metric, version):
                 raise storage.MetricDoesNotExist(metric)
             for f in contents:
                 try:
                     if (self._version_check(f['Key'], version)):
                         meta = f['Key'].split('_')
                         keys[aggregation].add(
                             carbonara.SplitKey(
                                 utils.to_timestamp(meta[2]),
                                 sampling=aggregation.granularity))
                 except (ValueError, IndexError):
                     # Might be "none", or any other file. Be resilient.
                     continue
     return keys
Esempio n. 10
0
    def test_rewrite_measures_corruption_bad_data(self):
        # Create an archive policy that spans on several splits. Each split
        # being 3600 points, let's go for 36k points so we have 10 splits.
        apname = str(uuid.uuid4())
        ap = archive_policy.ArchivePolicy(apname, 0, [(36000, 60)])
        self.index.create_archive_policy(ap)
        self.metric = storage.Metric(uuid.uuid4(), ap)
        self.index.create_metric(self.metric.id, str(uuid.uuid4()),
                                 apname)

        # First store some points scattered across different splits
        self.incoming.add_measures(self.metric, [
            storage.Measure(utils.dt_to_unix_ns(2016, 1, 1, 12, 0, 1), 69),
            storage.Measure(utils.dt_to_unix_ns(2016, 1, 2, 13, 7, 31), 42),
            storage.Measure(utils.dt_to_unix_ns(2016, 1, 4, 14, 9, 31), 4),
            storage.Measure(utils.dt_to_unix_ns(2016, 1, 6, 15, 12, 45), 44),
        ])
        self.trigger_processing()

        self.assertEqual({
            carbonara.SplitKey(1451520000.0, 60),
            carbonara.SplitKey(1451736000.0, 60),
            carbonara.SplitKey(1451952000.0, 60),
        }, self.storage._list_split_keys_for_metric(self.metric, "mean", 60.0))

        if self.storage.WRITE_FULL:
            assertCompressedIfWriteFull = self.assertTrue
        else:
            assertCompressedIfWriteFull = self.assertFalse

        data = self.storage._get_measures(
            self.metric, carbonara.SplitKey(1451520000.0, 60.0), "mean")
        self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data))
        data = self.storage._get_measures(
            self.metric, carbonara.SplitKey(1451736000.0, 60.0), "mean")
        self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data))
        data = self.storage._get_measures(
            self.metric, carbonara.SplitKey(1451952000.0, 60.0), "mean")
        assertCompressedIfWriteFull(
            carbonara.AggregatedTimeSerie.is_compressed(data))

        self.assertEqual([
            (utils.datetime_utc(2016, 1, 1, 12), 60.0, 69),
            (utils.datetime_utc(2016, 1, 2, 13, 7), 60.0, 42),
            (utils.datetime_utc(2016, 1, 4, 14, 9), 60.0, 4),
            (utils.datetime_utc(2016, 1, 6, 15, 12), 60.0, 44),
        ], self.storage.get_measures(self.metric, granularity=60.0))

        # Test what happens if we write garbage
        self.storage._store_metric_measures(
            self.metric, carbonara.SplitKey(1451952000.0, 60.0), "mean",
            b"oh really?")

        # Now store brand new points that should force a rewrite of one of the
        # split (keep in mind the back window size in one hour here). We move
        # the BoundTimeSerie processing timeserie far away from its current
        # range.
        self.incoming.add_measures(self.metric, [
            storage.Measure(utils.dt_to_unix_ns(2016, 1, 10, 16, 18, 45), 45),
            storage.Measure(utils.dt_to_unix_ns(2016, 1, 10, 17, 12, 45), 46),
        ])
        self.trigger_processing()
Esempio n. 11
0
    def test_rewrite_measures_oldest_mutable_timestamp_eq_next_key(self):
        """See LP#1655422"""
        # Create an archive policy that spans on several splits. Each split
        # being 3600 points, let's go for 36k points so we have 10 splits.
        apname = str(uuid.uuid4())
        ap = archive_policy.ArchivePolicy(apname, 0, [(36000, 60)])
        self.index.create_archive_policy(ap)
        self.metric = storage.Metric(uuid.uuid4(), ap)
        self.index.create_metric(self.metric.id, str(uuid.uuid4()),
                                 apname)

        # First store some points scattered across different splits
        self.incoming.add_measures(self.metric, [
            storage.Measure(utils.dt_to_unix_ns(2016, 1, 1, 12, 0, 1), 69),
            storage.Measure(utils.dt_to_unix_ns(2016, 1, 2, 13, 7, 31), 42),
            storage.Measure(utils.dt_to_unix_ns(2016, 1, 4, 14, 9, 31), 4),
            storage.Measure(utils.dt_to_unix_ns(2016, 1, 6, 15, 12, 45), 44),
        ])
        self.trigger_processing()

        self.assertEqual({
            carbonara.SplitKey(1451520000.0, 60),
            carbonara.SplitKey(1451736000.0, 60),
            carbonara.SplitKey(1451952000.0, 60),
        }, self.storage._list_split_keys_for_metric(self.metric, "mean", 60.0))

        if self.storage.WRITE_FULL:
            assertCompressedIfWriteFull = self.assertTrue
        else:
            assertCompressedIfWriteFull = self.assertFalse

        data = self.storage._get_measures(
            self.metric, carbonara.SplitKey(1451520000.0, 60.0), "mean")
        self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data))
        data = self.storage._get_measures(
            self.metric, carbonara.SplitKey(1451736000.0, 60.0), "mean")
        self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data))
        data = self.storage._get_measures(
            self.metric, carbonara.SplitKey(1451952000.0, 60.0), "mean")
        assertCompressedIfWriteFull(
            carbonara.AggregatedTimeSerie.is_compressed(data))

        self.assertEqual([
            (utils.datetime_utc(2016, 1, 1, 12), 60.0, 69),
            (utils.datetime_utc(2016, 1, 2, 13, 7), 60.0, 42),
            (utils.datetime_utc(2016, 1, 4, 14, 9), 60.0, 4),
            (utils.datetime_utc(2016, 1, 6, 15, 12), 60.0, 44),
        ], self.storage.get_measures(self.metric, granularity=60.0))

        # Now store brand new points that should force a rewrite of one of the
        # split (keep in mind the back window size in one hour here). We move
        # the BoundTimeSerie processing timeserie far away from its current
        # range.

        # Here we test a special case where the oldest_mutable_timestamp will
        # be 2016-01-10TOO:OO:OO = 1452384000.0, our new split key.
        self.incoming.add_measures(self.metric, [
            storage.Measure(utils.dt_to_unix_ns(2016, 1, 10, 0, 12), 45),
        ])
        self.trigger_processing()

        self.assertEqual({
            carbonara.SplitKey(1452384000.0, 60),
            carbonara.SplitKey(1451736000.0, 60),
            carbonara.SplitKey(1451520000.0, 60),
            carbonara.SplitKey(1451952000.0, 60),
        }, self.storage._list_split_keys_for_metric(self.metric, "mean", 60.0))
        data = self.storage._get_measures(
            self.metric, carbonara.SplitKey(1451520000.0, 60.0), "mean")
        self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data))
        data = self.storage._get_measures(
            self.metric, carbonara.SplitKey(1451736000.0, 60.0), "mean")
        self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data))
        data = self.storage._get_measures(
            self.metric, carbonara.SplitKey(1451952000.0, 60.0), "mean")
        # Now this one is compressed because it has been rewritten!
        self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data))
        data = self.storage._get_measures(
            self.metric, carbonara.SplitKey(1452384000.0, 60.0), "mean")
        assertCompressedIfWriteFull(
            carbonara.AggregatedTimeSerie.is_compressed(data))

        self.assertEqual([
            (utils.datetime_utc(2016, 1, 1, 12), 60.0, 69),
            (utils.datetime_utc(2016, 1, 2, 13, 7), 60.0, 42),
            (utils.datetime_utc(2016, 1, 4, 14, 9), 60.0, 4),
            (utils.datetime_utc(2016, 1, 6, 15, 12), 60.0, 44),
            (utils.datetime_utc(2016, 1, 10, 0, 12), 60.0, 45),
        ], self.storage.get_measures(self.metric, granularity=60.0))
Esempio n. 12
0
    def _add_measures(self, aggregation, archive_policy_def, metric,
                      grouped_serie, previous_oldest_mutable_timestamp,
                      oldest_mutable_timestamp):
        ts = carbonara.AggregatedTimeSerie.from_grouped_serie(
            grouped_serie,
            archive_policy_def.granularity,
            aggregation,
            max_size=archive_policy_def.points)

        # Don't do anything if the timeserie is empty
        if not ts:
            return

        # We only need to check for rewrite if driver is not in WRITE_FULL mode
        # and if we already stored splits once
        need_rewrite = (not self.WRITE_FULL
                        and previous_oldest_mutable_timestamp is not None)

        if archive_policy_def.timespan or need_rewrite:
            existing_keys = self._list_split_keys_for_metric(
                metric, aggregation, archive_policy_def.granularity)

        # First delete old splits
        if archive_policy_def.timespan:
            oldest_point_to_keep = ts.last - datetime.timedelta(
                seconds=archive_policy_def.timespan)
            oldest_key_to_keep = ts.get_split_key(oldest_point_to_keep)
            oldest_key_to_keep_s = str(oldest_key_to_keep)
            for key in list(existing_keys):
                # NOTE(jd) Only delete if the key is strictly inferior to
                # the timestamp; we don't delete any timeserie split that
                # contains our timestamp, so we prefer to keep a bit more
                # than deleting too much
                if key < oldest_key_to_keep_s:
                    self._delete_metric_measures(
                        metric, key, aggregation,
                        archive_policy_def.granularity)
                    existing_keys.remove(key)
        else:
            oldest_key_to_keep = carbonara.SplitKey(0, 0)

        # Rewrite all read-only splits just for fun (and compression). This
        # only happens if `previous_oldest_mutable_timestamp' exists, which
        # means we already wrote some splits at some point – so this is not the
        # first time we treat this timeserie.
        if need_rewrite:
            previous_oldest_mutable_key = str(
                ts.get_split_key(previous_oldest_mutable_timestamp))
            oldest_mutable_key = str(
                ts.get_split_key(oldest_mutable_timestamp))

            if previous_oldest_mutable_key != oldest_mutable_key:
                for key in existing_keys:
                    if previous_oldest_mutable_key <= key < oldest_mutable_key:
                        LOG.debug(
                            "Compressing previous split %s (%s) for metric %s",
                            key, aggregation, metric)
                        # NOTE(jd) Rewrite it entirely for fun (and later for
                        # compression). For that, we just pass None as split.
                        self._store_timeserie_split(
                            metric,
                            carbonara.SplitKey(float(key),
                                               archive_policy_def.granularity),
                            None, aggregation, archive_policy_def,
                            oldest_mutable_timestamp)

        for key, split in ts.split():
            if key >= oldest_key_to_keep:
                LOG.debug("Storing split %s (%s) for metric %s", key,
                          aggregation, metric)
                self._store_timeserie_split(metric, key, split, aggregation,
                                            archive_policy_def,
                                            oldest_mutable_timestamp)
Esempio n. 13
0
    def test_rewrite_measures_corruption_missing_file(self):
        # Create an archive policy that spans on several splits. Each split
        # being 3600 points, let's go for 36k points so we have 10 splits.
        apname = str(uuid.uuid4())
        ap = archive_policy.ArchivePolicy(apname, 0, [(36000, 60)])
        self.index.create_archive_policy(ap)
        self.metric = indexer.Metric(uuid.uuid4(), ap)
        self.index.create_metric(self.metric.id, str(uuid.uuid4()), apname)

        # First store some points scattered across different splits
        self.incoming.add_measures(self.metric.id, [
            incoming.Measure(datetime64(2016, 1, 1, 12, 0, 1), 69),
            incoming.Measure(datetime64(2016, 1, 2, 13, 7, 31), 42),
            incoming.Measure(datetime64(2016, 1, 4, 14, 9, 31), 4),
            incoming.Measure(datetime64(2016, 1, 6, 15, 12, 45), 44),
        ])
        self.trigger_processing()

        self.assertEqual(
            {
                carbonara.SplitKey(numpy.datetime64('2015-12-31T00:00:00'),
                                   numpy.timedelta64(1, 'm')),
                carbonara.SplitKey(numpy.datetime64('2016-01-02T12:00:00'),
                                   numpy.timedelta64(1, 'm')),
                carbonara.SplitKey(numpy.datetime64('2016-01-05T00:00:00'),
                                   numpy.timedelta64(1, 'm')),
            },
            self.storage._list_split_keys_for_metric(self.metric, "mean",
                                                     numpy.timedelta64(1,
                                                                       'm')))

        if self.storage.WRITE_FULL:
            assertCompressedIfWriteFull = self.assertTrue
        else:
            assertCompressedIfWriteFull = self.assertFalse

        data = self.storage._get_measures(self.metric, [
            carbonara.SplitKey(
                numpy.datetime64(1451520000, 's'),
                numpy.timedelta64(1, 'm'),
            )
        ], "mean")[0]
        self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data))
        data = self.storage._get_measures(self.metric, [
            carbonara.SplitKey(numpy.datetime64(1451736000, 's'),
                               numpy.timedelta64(1, 'm'))
        ], "mean")[0]
        self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data))
        data = self.storage._get_measures(self.metric, [
            carbonara.SplitKey(
                numpy.datetime64(1451952000, 's'),
                numpy.timedelta64(1, 'm'),
            )
        ], "mean")[0]
        assertCompressedIfWriteFull(
            carbonara.AggregatedTimeSerie.is_compressed(data))

        self.assertEqual([
            (datetime64(2016, 1, 1, 12), numpy.timedelta64(1, 'm'), 69),
            (datetime64(2016, 1, 2, 13, 7), numpy.timedelta64(1, 'm'), 42),
            (datetime64(2016, 1, 4, 14, 9), numpy.timedelta64(1, 'm'), 4),
            (datetime64(2016, 1, 6, 15, 12), numpy.timedelta64(1, 'm'), 44),
        ],
                         self.storage.get_measures(
                             self.metric,
                             granularities=[numpy.timedelta64(60, 's')]))

        # Test what happens if we delete the latest split and then need to
        # compress it!
        self.storage._delete_metric_measures(
            self.metric,
            carbonara.SplitKey(
                numpy.datetime64(1451952000, 's'),
                numpy.timedelta64(1, 'm'),
            ), 'mean')

        # Now store brand new points that should force a rewrite of one of the
        # split (keep in mind the back window size in one hour here). We move
        # the BoundTimeSerie processing timeserie far away from its current
        # range.
        self.incoming.add_measures(self.metric.id, [
            incoming.Measure(datetime64(2016, 1, 10, 16, 18, 45), 45),
            incoming.Measure(datetime64(2016, 1, 10, 17, 12, 45), 46),
        ])
        self.trigger_processing()
Esempio n. 14
0
    def test_rewrite_measures(self):
        # Create an archive policy that spans on several splits. Each split
        # being 3600 points, let's go for 36k points so we have 10 splits.
        apname = str(uuid.uuid4())
        ap = archive_policy.ArchivePolicy(apname, 0, [(36000, 60)])
        self.index.create_archive_policy(ap)
        self.metric = storage.Metric(uuid.uuid4(), ap)
        self.index.create_metric(self.metric.id, str(uuid.uuid4()), apname)

        # First store some points scattered across different splits
        self.incoming.add_measures(self.metric, [
            storage.Measure(datetime64(2016, 1, 1, 12, 0, 1), 69),
            storage.Measure(datetime64(2016, 1, 2, 13, 7, 31), 42),
            storage.Measure(datetime64(2016, 1, 4, 14, 9, 31), 4),
            storage.Measure(datetime64(2016, 1, 6, 15, 12, 45), 44),
        ])
        self.trigger_processing()

        self.assertEqual(
            {
                carbonara.SplitKey(numpy.datetime64(1451520000, 's'),
                                   numpy.timedelta64(1, 'm')),
                carbonara.SplitKey(numpy.datetime64(1451736000, 's'),
                                   numpy.timedelta64(1, 'm')),
                carbonara.SplitKey(numpy.datetime64(1451952000, 's'),
                                   numpy.timedelta64(1, 'm')),
            },
            self.storage._list_split_keys_for_metric(self.metric, "mean",
                                                     numpy.timedelta64(1,
                                                                       'm')))

        if self.storage.WRITE_FULL:
            assertCompressedIfWriteFull = self.assertTrue
        else:
            assertCompressedIfWriteFull = self.assertFalse

        data = self.storage._get_measures(
            self.metric,
            carbonara.SplitKey(
                numpy.datetime64(1451520000, 's'),
                numpy.timedelta64(1, 'm'),
            ), "mean")
        self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data))
        data = self.storage._get_measures(
            self.metric,
            carbonara.SplitKey(
                numpy.datetime64(1451736000, 's'),
                numpy.timedelta64(60, 's'),
            ), "mean")
        self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data))
        data = self.storage._get_measures(
            self.metric,
            carbonara.SplitKey(
                numpy.datetime64(1451952000, 's'),
                numpy.timedelta64(60, 's'),
            ), "mean")
        assertCompressedIfWriteFull(
            carbonara.AggregatedTimeSerie.is_compressed(data))

        self.assertEqual([
            (datetime64(2016, 1, 1, 12), numpy.timedelta64(1, 'm'), 69),
            (datetime64(2016, 1, 2, 13, 7), numpy.timedelta64(1, 'm'), 42),
            (datetime64(2016, 1, 4, 14, 9), numpy.timedelta64(1, 'm'), 4),
            (datetime64(2016, 1, 6, 15, 12), numpy.timedelta64(1, 'm'), 44),
        ],
                         self.storage.get_measures(
                             self.metric,
                             granularity=numpy.timedelta64(1, 'm')))

        # Now store brand new points that should force a rewrite of one of the
        # split (keep in mind the back window size in one hour here). We move
        # the BoundTimeSerie processing timeserie far away from its current
        # range.
        self.incoming.add_measures(self.metric, [
            storage.Measure(datetime64(2016, 1, 10, 16, 18, 45), 45),
            storage.Measure(datetime64(2016, 1, 10, 17, 12, 45), 46),
        ])
        self.trigger_processing()

        self.assertEqual(
            {
                carbonara.SplitKey(numpy.datetime64(1452384000, 's'),
                                   numpy.timedelta64(1, 'm')),
                carbonara.SplitKey(numpy.datetime64(1451736000, 's'),
                                   numpy.timedelta64(1, 'm')),
                carbonara.SplitKey(numpy.datetime64(1451520000, 's'),
                                   numpy.timedelta64(1, 'm')),
                carbonara.SplitKey(numpy.datetime64(1451952000, 's'),
                                   numpy.timedelta64(1, 'm')),
            },
            self.storage._list_split_keys_for_metric(self.metric, "mean",
                                                     numpy.timedelta64(1,
                                                                       'm')))
        data = self.storage._get_measures(
            self.metric,
            carbonara.SplitKey(
                numpy.datetime64(1451520000, 's'),
                numpy.timedelta64(60, 's'),
            ), "mean")
        self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data))
        data = self.storage._get_measures(
            self.metric,
            carbonara.SplitKey(
                numpy.datetime64(1451736000, 's'),
                numpy.timedelta64(60, 's'),
            ), "mean")
        self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data))
        data = self.storage._get_measures(
            self.metric,
            carbonara.SplitKey(
                numpy.datetime64(1451952000, 's'),
                numpy.timedelta64(1, 'm'),
            ), "mean")
        # Now this one is compressed because it has been rewritten!
        self.assertTrue(carbonara.AggregatedTimeSerie.is_compressed(data))
        data = self.storage._get_measures(
            self.metric,
            carbonara.SplitKey(
                numpy.datetime64(1452384000, 's'),
                numpy.timedelta64(60, 's'),
            ), "mean")
        assertCompressedIfWriteFull(
            carbonara.AggregatedTimeSerie.is_compressed(data))

        self.assertEqual([
            (datetime64(2016, 1, 1, 12), numpy.timedelta64(1, 'm'), 69),
            (datetime64(2016, 1, 2, 13, 7), numpy.timedelta64(1, 'm'), 42),
            (datetime64(2016, 1, 4, 14, 9), numpy.timedelta64(1, 'm'), 4),
            (datetime64(2016, 1, 6, 15, 12), numpy.timedelta64(1, 'm'), 44),
            (datetime64(2016, 1, 10, 16, 18), numpy.timedelta64(1, 'm'), 45),
            (datetime64(2016, 1, 10, 17, 12), numpy.timedelta64(1, 'm'), 46),
        ],
                         self.storage.get_measures(
                             self.metric,
                             granularity=numpy.timedelta64(1, 'm')))