예제 #1
0
    def test_aggregated_different_archive_overlap_edge_missing2(self):
        tsc1 = {
            'sampling': numpy.timedelta64(60, 's'),
            'size': 10,
            'agg': 'mean'
        }
        tsb1 = carbonara.BoundTimeSerie(block_size=tsc1['sampling'])
        tsc2 = {
            'sampling': numpy.timedelta64(60, 's'),
            'size': 10,
            'agg': 'mean'
        }
        tsb2 = carbonara.BoundTimeSerie(block_size=tsc2['sampling'])

        tsb1.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 3, 0), 4)],
                                    dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
                        before_truncate_callback=functools.partial(
                            self._resample_and_merge, agg_dict=tsc1))

        tsb2.set_values(numpy.array([(datetime64(2014, 1, 1, 11, 0, 0), 4),
                                     (datetime64(2014, 1, 1, 12, 3, 0), 4)],
                                    dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
                        before_truncate_callback=functools.partial(
                            self._resample_and_merge, agg_dict=tsc2))

        output = cross_metric.aggregated([tsc1['return'], tsc2['return']],
                                         aggregation='mean')
        self.assertEqual([
            (datetime64(2014, 1, 1, 12, 3,
                        0), numpy.timedelta64(60000000000, 'ns'), 4.0),
        ], list(output))
예제 #2
0
    def test_aggregated_different_archive_no_overlap(self):
        tsc1 = {
            'sampling': numpy.timedelta64(60, 's'),
            'size': 50,
            'agg': 'mean'
        }
        tsb1 = carbonara.BoundTimeSerie(block_size=tsc1['sampling'])
        tsc2 = {
            'sampling': numpy.timedelta64(60, 's'),
            'size': 50,
            'agg': 'mean'
        }
        tsb2 = carbonara.BoundTimeSerie(block_size=tsc2['sampling'])

        tsb1.set_values(numpy.array([(datetime64(2014, 1, 1, 11, 46, 4), 4)],
                                    dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
                        before_truncate_callback=functools.partial(
                            self._resample_and_merge, agg_dict=tsc1))
        tsb2.set_values(numpy.array([(datetime64(2014, 1, 1, 9, 1, 4), 4)],
                                    dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
                        before_truncate_callback=functools.partial(
                            self._resample_and_merge, agg_dict=tsc2))

        dtfrom = datetime64(2014, 1, 1, 11, 0, 0)
        self.assertRaises(cross_metric.UnAggregableTimeseries,
                          cross_metric.aggregated,
                          [tsc1['return'], tsc2['return']],
                          from_timestamp=dtfrom,
                          aggregation='mean')
예제 #3
0
    def test_aggregated_different_archive_overlap_edge_missing2(self):
        tsc1 = carbonara.AggregatedTimeSerie(sampling=60,
                                             max_size=10,
                                             aggregation_method='mean')
        tsb1 = carbonara.BoundTimeSerie(block_size=tsc1.sampling)
        tsc2 = carbonara.AggregatedTimeSerie(sampling=60,
                                             max_size=10,
                                             aggregation_method='mean')
        tsb2 = carbonara.BoundTimeSerie(block_size=tsc2.sampling)

        tsb1.set_values([
            (datetime.datetime(2014, 1, 1, 12, 3, 0), 4),
        ],
                        before_truncate_callback=tsc1.update)

        tsb2.set_values([
            (datetime.datetime(2014, 1, 1, 11, 0, 0), 4),
            (datetime.datetime(2014, 1, 1, 12, 3, 0), 4),
        ],
                        before_truncate_callback=tsc2.update)

        output = carbonara.AggregatedTimeSerie.aggregated([tsc1, tsc2],
                                                          aggregation='mean')
        self.assertEqual([
            (pandas.Timestamp('2014-01-01 12:03:00'), 60.0, 4.0),
        ], output)
예제 #4
0
    def test_aggregated_some_overlap_with_fill_null(self):
        tsc1 = {
            'sampling': numpy.timedelta64(60, 's'),
            'size': 10,
            'agg': 'mean'
        }
        tsb1 = carbonara.BoundTimeSerie(block_size=tsc1['sampling'])
        tsc2 = {
            'sampling': numpy.timedelta64(60, 's'),
            'size': 10,
            'agg': 'mean'
        }
        tsb2 = carbonara.BoundTimeSerie(block_size=tsc2['sampling'])

        tsb1.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 3, 0), 9),
                                     (datetime64(2014, 1, 1, 12, 4, 0), 1),
                                     (datetime64(2014, 1, 1, 12, 5, 0), 2),
                                     (datetime64(2014, 1, 1, 12, 6, 0), 7),
                                     (datetime64(2014, 1, 1, 12, 7, 0), 5),
                                     (datetime64(2014, 1, 1, 12, 8, 0), 3)],
                                    dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
                        before_truncate_callback=functools.partial(
                            self._resample_and_merge, agg_dict=tsc1))

        tsb2.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 0, 0), 6),
                                     (datetime64(2014, 1, 1, 12, 1, 0), 2),
                                     (datetime64(2014, 1, 1, 12, 2, 0), 13),
                                     (datetime64(2014, 1, 1, 12, 3, 0), 24),
                                     (datetime64(2014, 1, 1, 12, 4, 0), 4),
                                     (datetime64(2014, 1, 1, 12, 5, 0), 16),
                                     (datetime64(2014, 1, 1, 12, 6, 0), 12)],
                                    dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
                        before_truncate_callback=functools.partial(
                            self._resample_and_merge, agg_dict=tsc2))

        output = cross_metric.aggregated([tsc1['return'], tsc2['return']],
                                         aggregation='mean',
                                         fill='null')

        self.assertEqual([
            (datetime64(2014, 1, 1, 12, 0,
                        0), numpy.timedelta64(60000000000, 'ns'), 6.0),
            (datetime64(2014, 1, 1, 12, 1,
                        0), numpy.timedelta64(60000000000, 'ns'), 2.0),
            (datetime64(2014, 1, 1, 12, 2,
                        0), numpy.timedelta64(60000000000, 'ns'), 13.0),
            (datetime64(2014, 1, 1, 12, 3,
                        0), numpy.timedelta64(60000000000, 'ns'), 16.5),
            (datetime64(2014, 1, 1, 12, 4,
                        0), numpy.timedelta64(60000000000, 'ns'), 2.5),
            (datetime64(2014, 1, 1, 12, 5,
                        0), numpy.timedelta64(60000000000, 'ns'), 9.0),
            (datetime64(2014, 1, 1, 12, 6,
                        0), numpy.timedelta64(60000000000, 'ns'), 9.5),
            (datetime64(2014, 1, 1, 12, 7,
                        0), numpy.timedelta64(60000000000, 'ns'), 5.0),
            (datetime64(2014, 1, 1, 12, 8,
                        0), numpy.timedelta64(60000000000, 'ns'), 3.0),
        ], list(output))
예제 #5
0
    def test_aggregated_different_archive_overlap_edge_missing1(self):
        tsc1 = {
            'sampling': numpy.timedelta64(60, 's'),
            'size': 10,
            'agg': 'mean'
        }
        tsb1 = carbonara.BoundTimeSerie(block_size=tsc1['sampling'])
        tsc2 = {
            'sampling': numpy.timedelta64(60, 's'),
            'size': 10,
            'agg': 'mean'
        }
        tsb2 = carbonara.BoundTimeSerie(block_size=tsc2['sampling'])

        tsb1.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 3, 0), 9),
                                     (datetime64(2014, 1, 1, 12, 4, 0), 1),
                                     (datetime64(2014, 1, 1, 12, 5, 0), 2),
                                     (datetime64(2014, 1, 1, 12, 6, 0), 7),
                                     (datetime64(2014, 1, 1, 12, 7, 0), 5),
                                     (datetime64(2014, 1, 1, 12, 8, 0), 3)],
                                    dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
                        before_truncate_callback=functools.partial(
                            self._resample_and_merge, agg_dict=tsc1))

        tsb2.set_values(numpy.array([(datetime64(2014, 1, 1, 11, 0, 0), 6),
                                     (datetime64(2014, 1, 1, 12, 1, 0), 2),
                                     (datetime64(2014, 1, 1, 12, 2, 0), 13),
                                     (datetime64(2014, 1, 1, 12, 3, 0), 24),
                                     (datetime64(2014, 1, 1, 12, 4, 0), 4),
                                     (datetime64(2014, 1, 1, 12, 5, 0), 16),
                                     (datetime64(2014, 1, 1, 12, 6, 0), 12)],
                                    dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
                        before_truncate_callback=functools.partial(
                            self._resample_and_merge, agg_dict=tsc2))

        # By default we require 100% of point that overlap
        # but we allow that the last datapoint is missing
        # of the precisest granularity
        output = cross_metric.aggregated([tsc1['return'], tsc2['return']],
                                         aggregation='sum')

        self.assertEqual([
            (datetime64(2014, 1, 1, 12, 3, 0), numpy.timedelta64(60,
                                                                 's'), 33.0),
            (datetime64(2014, 1, 1, 12, 4, 0), numpy.timedelta64(60,
                                                                 's'), 5.0),
            (datetime64(2014, 1, 1, 12, 5, 0), numpy.timedelta64(60,
                                                                 's'), 18.0),
            (datetime64(2014, 1, 1, 12, 6, 0), numpy.timedelta64(60,
                                                                 's'), 19.0),
        ], list(output))
예제 #6
0
    def test_fetch_nano(self):
        ts = {'sampling': numpy.timedelta64(200, 'ms'),
              'size': 10, 'agg': 'mean'}
        tsb = carbonara.BoundTimeSerie(block_size=ts['sampling'])

        tsb.set_values(numpy.array([
            (datetime64(2014, 1, 1, 11, 46, 0, 200123), 4),
            (datetime64(2014, 1, 1, 11, 46, 0, 340000), 8),
            (datetime64(2014, 1, 1, 11, 47, 0, 323154), 50),
            (datetime64(2014, 1, 1, 11, 48, 0, 590903), 4),
            (datetime64(2014, 1, 1, 11, 48, 0, 903291), 4)],
            dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
            before_truncate_callback=functools.partial(
                self._resample_and_merge, agg_dict=ts))

        tsb.set_values(numpy.array([
            (datetime64(2014, 1, 1, 11, 48, 0, 821312), 5)],
            dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
            before_truncate_callback=functools.partial(
                self._resample_and_merge, agg_dict=ts))

        self.assertEqual([
            (datetime64(2014, 1, 1, 11, 46, 0, 200000), 6.0),
            (datetime64(2014, 1, 1, 11, 47, 0, 200000), 50.0),
            (datetime64(2014, 1, 1, 11, 48, 0, 400000), 4.0),
            (datetime64(2014, 1, 1, 11, 48, 0, 800000), 4.5)
        ], list(ts['return'].fetch()))
        self.assertEqual(numpy.timedelta64(200000000, 'ns'),
                         ts['return'].aggregation.granularity)
예제 #7
0
    def test_fetch_agg_std(self):
        # NOTE (gordc): this is a good test to ensure we drop NaN entries
        # 2014-01-01 12:00:00 will appear if we don't dropna()
        ts = {'sampling': numpy.timedelta64(60, 's'),
              'size': 60, 'agg': 'std'}
        tsb = carbonara.BoundTimeSerie(block_size=ts['sampling'])

        tsb.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 0, 0), 3),
                                    (datetime64(2014, 1, 1, 12, 1, 4), 4),
                                    (datetime64(2014, 1, 1, 12, 1, 9), 7),
                                    (datetime64(2014, 1, 1, 12, 2, 1), 15),
                                    (datetime64(2014, 1, 1, 12, 2, 12), 1)],
                                   dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
                       before_truncate_callback=functools.partial(
                           self._resample_and_merge, agg_dict=ts))

        self.assertEqual([
            (datetime64(2014, 1, 1, 12, 1, 0), 2.1213203435596424),
            (datetime64(2014, 1, 1, 12, 2, 0), 9.8994949366116654),
        ], list(ts['return'].fetch(datetime64(2014, 1, 1, 12, 0, 0))))

        tsb.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 2, 13), 110)],
                                   dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
                       before_truncate_callback=functools.partial(
                           self._resample_and_merge, agg_dict=ts))

        self.assertEqual([
            (datetime64(2014, 1, 1, 12, 1, 0), 2.1213203435596424),
            (datetime64(2014, 1, 1, 12, 2, 0), 59.304300012730948),
        ], list(ts['return'].fetch(datetime64(2014, 1, 1, 12, 0, 0))))
예제 #8
0
    def test_fetch_agg_max(self):
        ts = carbonara.AggregatedTimeSerie(sampling=60,
                                           max_size=60,
                                           aggregation_method='max')
        tsb = carbonara.BoundTimeSerie(block_size=ts.sampling)

        tsb.set_values([(datetime.datetime(2014, 1, 1, 12, 0, 0), 3),
                        (datetime.datetime(2014, 1, 1, 12, 1, 4), 4),
                        (datetime.datetime(2014, 1, 1, 12, 1, 9), 7),
                        (datetime.datetime(2014, 1, 1, 12, 2, 1), 15),
                        (datetime.datetime(2014, 1, 1, 12, 2, 12), 1)],
                       before_truncate_callback=ts.update)

        self.assertEqual([
            (pandas.Timestamp('2014-01-01 12:00:00'), 60.0, 3),
            (pandas.Timestamp('2014-01-01 12:01:00'), 60.0, 7),
            (pandas.Timestamp('2014-01-01 12:02:00'), 60.0, 15),
        ], ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0)))

        tsb.set_values([(datetime.datetime(2014, 1, 1, 12, 2, 13), 110)],
                       before_truncate_callback=ts.update)

        self.assertEqual([
            (pandas.Timestamp('2014-01-01 12:00:00'), 60.0, 3),
            (pandas.Timestamp('2014-01-01 12:01:00'), 60.0, 7),
            (pandas.Timestamp('2014-01-01 12:02:00'), 60.0, 110),
        ], ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0)))
예제 #9
0
    def test_fetch_agg_std(self):
        ts = carbonara.AggregatedTimeSerie(sampling=60,
                                           max_size=60,
                                           aggregation_method='std')
        tsb = carbonara.BoundTimeSerie(block_size=ts.sampling)

        tsb.set_values([(datetime.datetime(2014, 1, 1, 12, 0, 0), 3),
                        (datetime.datetime(2014, 1, 1, 12, 1, 4), 4),
                        (datetime.datetime(2014, 1, 1, 12, 1, 9), 7),
                        (datetime.datetime(2014, 1, 1, 12, 2, 1), 15),
                        (datetime.datetime(2014, 1, 1, 12, 2, 12), 1)],
                       before_truncate_callback=ts.update)

        self.assertEqual([
            (pandas.Timestamp('2014-01-01 12:01:00'), 60.0,
             2.1213203435596424),
            (pandas.Timestamp('2014-01-01 12:02:00'), 60.0,
             9.8994949366116654),
        ], ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0)))

        tsb.set_values([(datetime.datetime(2014, 1, 1, 12, 2, 13), 110)],
                       before_truncate_callback=ts.update)

        self.assertEqual([
            (pandas.Timestamp('2014-01-01 12:01:00'), 60.0,
             2.1213203435596424),
            (pandas.Timestamp('2014-01-01 12:02:00'), 60.0,
             59.304300012730948),
        ], ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0)))
예제 #10
0
    def test_fetch_nano(self):
        ts = carbonara.AggregatedTimeSerie(sampling=0.2,
                                           max_size=10,
                                           aggregation_method='mean')
        tsb = carbonara.BoundTimeSerie(block_size=ts.sampling)

        tsb.set_values([
            (datetime.datetime(2014, 1, 1, 11, 46, 0, 200123), 4),
            (datetime.datetime(2014, 1, 1, 11, 46, 0, 340000), 8),
            (datetime.datetime(2014, 1, 1, 11, 47, 0, 323154), 50),
            (datetime.datetime(2014, 1, 1, 11, 48, 0, 590903), 4),
            (datetime.datetime(2014, 1, 1, 11, 48, 0, 903291), 4),
        ],
                       before_truncate_callback=ts.update)

        tsb.set_values([
            (datetime.datetime(2014, 1, 1, 11, 48, 0, 821312), 5),
        ],
                       before_truncate_callback=ts.update)

        self.assertEqual(
            [(datetime.datetime(2014, 1, 1, 11, 46, 0, 200000), 0.2, 6.0),
             (datetime.datetime(2014, 1, 1, 11, 47, 0, 200000), 0.2, 50.0),
             (datetime.datetime(2014, 1, 1, 11, 48, 0, 400000), 0.2, 4.0),
             (datetime.datetime(2014, 1, 1, 11, 48, 0, 800000), 0.2, 4.5)],
            ts.fetch())
예제 #11
0
    def test_fetch_agg_max(self):
        ts = {'sampling': numpy.timedelta64(60, 's'),
              'size': 60, 'agg': 'max'}
        tsb = carbonara.BoundTimeSerie(block_size=ts['sampling'])

        tsb.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 0, 0), 3),
                                    (datetime64(2014, 1, 1, 12, 1, 4), 4),
                                    (datetime64(2014, 1, 1, 12, 1, 9), 7),
                                    (datetime64(2014, 1, 1, 12, 2, 1), 15),
                                    (datetime64(2014, 1, 1, 12, 2, 12), 1)],
                                   dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
                       before_truncate_callback=functools.partial(
                           self._resample_and_merge, agg_dict=ts))

        self.assertEqual([
            (datetime64(2014, 1, 1, 12, 0, 0), 3),
            (datetime64(2014, 1, 1, 12, 1, 0), 7),
            (datetime64(2014, 1, 1, 12, 2, 0), 15),
        ], list(ts['return'].fetch(datetime64(2014, 1, 1, 12, 0, 0))))

        tsb.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 2, 13), 110)],
                                   dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
                       before_truncate_callback=functools.partial(
                           self._resample_and_merge, agg_dict=ts))

        self.assertEqual([
            (datetime64(2014, 1, 1, 12, 0, 0), 3),
            (datetime64(2014, 1, 1, 12, 1, 0), 7),
            (datetime64(2014, 1, 1, 12, 2, 0), 110),
        ], list(ts['return'].fetch(datetime64(2014, 1, 1, 12, 0, 0))))
예제 #12
0
    def test_back_window(self):
        """Back window testing.

        Test the back window on an archive is not longer than the window we
        aggregate on.
        """
        ts = {'sampling': numpy.timedelta64(1, 's'), 'size': 60, 'agg': 'mean'}
        tsb = carbonara.BoundTimeSerie(block_size=ts['sampling'])

        tsb.set_values(numpy.array([
            (datetime64(2014, 1, 1, 12, 0, 1, 2300), 1),
            (datetime64(2014, 1, 1, 12, 0, 1, 4600), 2),
            (datetime64(2014, 1, 1, 12, 0, 2, 4500), 3),
            (datetime64(2014, 1, 1, 12, 0, 2, 7800), 4),
            (datetime64(2014, 1, 1, 12, 0, 3, 8), 2.5)],
            dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
            before_truncate_callback=functools.partial(
                self._resample_and_merge, agg_dict=ts))

        self.assertEqual(
            [
                (datetime64(2014, 1, 1, 12, 0, 1), 1.5),
                (datetime64(2014, 1, 1, 12, 0, 2), 3.5),
                (datetime64(2014, 1, 1, 12, 0, 3), 2.5),
            ],
            list(ts['return'].fetch()))
예제 #13
0
    def test_cross_metric_with_random_holes_derived_boundaries(self):
        tsc1 = {
            'sampling': numpy.timedelta64(60, 's'),
            'size': 10,
            'agg': 'mean'
        }
        tsb1 = carbonara.BoundTimeSerie(block_size=tsc1['sampling'])
        tsc2 = {
            'sampling': numpy.timedelta64(60, 's'),
            'size': 10,
            'agg': 'mean'
        }
        tsb2 = carbonara.BoundTimeSerie(block_size=tsc2['sampling'])

        tsb1.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 1, 0), 9),
                                     (datetime64(2014, 1, 1, 12, 2, 0), 1),
                                     (datetime64(2014, 1, 1, 12, 4, 0), 5),
                                     (datetime64(2014, 1, 1, 12, 6, 0), 3)],
                                    dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
                        before_truncate_callback=functools.partial(
                            self._resample_and_merge, agg_dict=tsc1))

        tsb2.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 0, 0), 6),
                                     (datetime64(2014, 1, 1, 12, 1, 0), 2),
                                     (datetime64(2014, 1, 1, 12, 2, 0), 13),
                                     (datetime64(2014, 1, 1, 12, 3, 0), 24),
                                     (datetime64(2014, 1, 1, 12, 4, 0), 4)],
                                    dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
                        before_truncate_callback=functools.partial(
                            self._resample_and_merge, agg_dict=tsc2))

        output = cross_metric.aggregated([tsc1['return'], tsc2['return']],
                                         aggregation='mean',
                                         needed_percent_of_overlap=50.0)

        self.assertEqual([
            (datetime64(2014, 1, 1, 12, 1,
                        0), numpy.timedelta64(60000000000, 'ns'), 5.5),
            (datetime64(2014, 1, 1, 12, 2,
                        0), numpy.timedelta64(60000000000, 'ns'), 7.0),
            (datetime64(2014, 1, 1, 12, 3,
                        0), numpy.timedelta64(60000000000, 'ns'), 24.0),
            (datetime64(2014, 1, 1, 12, 4,
                        0), numpy.timedelta64(60000000000, 'ns'), 4.5),
        ], list(output))
예제 #14
0
    def test_aggregated_different_archive_overlap_edge_missing1(self):
        tsc1 = carbonara.AggregatedTimeSerie(sampling=60,
                                             max_size=10,
                                             aggregation_method='mean')
        tsb1 = carbonara.BoundTimeSerie(block_size=tsc1.sampling)
        tsc2 = carbonara.AggregatedTimeSerie(sampling=60,
                                             max_size=10,
                                             aggregation_method='mean')
        tsb2 = carbonara.BoundTimeSerie(block_size=tsc2.sampling)

        tsb1.set_values([
            (datetime.datetime(2014, 1, 1, 12, 3, 0), 9),
            (datetime.datetime(2014, 1, 1, 12, 4, 0), 1),
            (datetime.datetime(2014, 1, 1, 12, 5, 0), 2),
            (datetime.datetime(2014, 1, 1, 12, 6, 0), 7),
            (datetime.datetime(2014, 1, 1, 12, 7, 0), 5),
            (datetime.datetime(2014, 1, 1, 12, 8, 0), 3),
        ],
                        before_truncate_callback=tsc1.update)

        tsb2.set_values([
            (datetime.datetime(2014, 1, 1, 11, 0, 0), 6),
            (datetime.datetime(2014, 1, 1, 12, 1, 0), 2),
            (datetime.datetime(2014, 1, 1, 12, 2, 0), 13),
            (datetime.datetime(2014, 1, 1, 12, 3, 0), 24),
            (datetime.datetime(2014, 1, 1, 12, 4, 0), 4),
            (datetime.datetime(2014, 1, 1, 12, 5, 0), 16),
            (datetime.datetime(2014, 1, 1, 12, 6, 0), 12),
        ],
                        before_truncate_callback=tsc2.update)

        # By default we require 100% of point that overlap
        # but we allow that the last datapoint is missing
        # of the precisest granularity
        output = carbonara.AggregatedTimeSerie.aggregated([tsc1, tsc2],
                                                          aggregation='sum')

        self.assertEqual([
            (pandas.Timestamp('2014-01-01 12:03:00'), 60.0, 33.0),
            (pandas.Timestamp('2014-01-01 12:04:00'), 60.0, 5.0),
            (pandas.Timestamp('2014-01-01 12:05:00'), 60.0, 18.0),
            (pandas.Timestamp('2014-01-01 12:06:00'), 60.0, 19.0),
        ], output)
예제 #15
0
    def test_fetch(self):
        ts = {'sampling': numpy.timedelta64(60, 's'),
              'size': 10, 'agg': 'mean'}
        tsb = carbonara.BoundTimeSerie(block_size=ts['sampling'])

        tsb.set_values(numpy.array([
            (datetime64(2014, 1, 1, 11, 46, 4), 4),
            (datetime64(2014, 1, 1, 11, 47, 34), 8),
            (datetime64(2014, 1, 1, 11, 50, 54), 50),
            (datetime64(2014, 1, 1, 11, 54, 45), 4),
            (datetime64(2014, 1, 1, 11, 56, 49), 4),
            (datetime64(2014, 1, 1, 11, 57, 22), 6),
            (datetime64(2014, 1, 1, 11, 58, 22), 5),
            (datetime64(2014, 1, 1, 12, 1, 4), 4),
            (datetime64(2014, 1, 1, 12, 1, 9), 7),
            (datetime64(2014, 1, 1, 12, 2, 1), 15),
            (datetime64(2014, 1, 1, 12, 2, 12), 1),
            (datetime64(2014, 1, 1, 12, 3, 0), 3),
            (datetime64(2014, 1, 1, 12, 4, 9), 7),
            (datetime64(2014, 1, 1, 12, 5, 1), 15),
            (datetime64(2014, 1, 1, 12, 5, 12), 1),
            (datetime64(2014, 1, 1, 12, 6, 0, 2), 3)],
            dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
            before_truncate_callback=functools.partial(
                self._resample_and_merge, agg_dict=ts))

        tsb.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 6), 5)],
                                   dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
                       before_truncate_callback=functools.partial(
                           self._resample_and_merge, agg_dict=ts))

        self.assertEqual([
            (numpy.datetime64('2014-01-01T11:46:00.000000000'), 4.0),
            (numpy.datetime64('2014-01-01T11:47:00.000000000'), 8.0),
            (numpy.datetime64('2014-01-01T11:50:00.000000000'), 50.0),
            (datetime64(2014, 1, 1, 11, 54), 4.0),
            (datetime64(2014, 1, 1, 11, 56), 4.0),
            (datetime64(2014, 1, 1, 11, 57), 6.0),
            (datetime64(2014, 1, 1, 11, 58), 5.0),
            (datetime64(2014, 1, 1, 12, 1), 5.5),
            (datetime64(2014, 1, 1, 12, 2), 8.0),
            (datetime64(2014, 1, 1, 12, 3), 3.0),
            (datetime64(2014, 1, 1, 12, 4), 7.0),
            (datetime64(2014, 1, 1, 12, 5), 8.0),
            (datetime64(2014, 1, 1, 12, 6), 4.0)
        ], list(ts['return'].fetch()))

        self.assertEqual([
            (datetime64(2014, 1, 1, 12, 1), 5.5),
            (datetime64(2014, 1, 1, 12, 2), 8.0),
            (datetime64(2014, 1, 1, 12, 3), 3.0),
            (datetime64(2014, 1, 1, 12, 4), 7.0),
            (datetime64(2014, 1, 1, 12, 5), 8.0),
            (datetime64(2014, 1, 1, 12, 6), 4.0)
        ], list(ts['return'].fetch(datetime64(2014, 1, 1, 12, 0, 0))))
예제 #16
0
    def test_aggregated_different_archive_no_overlap(self):
        tsc1 = carbonara.AggregatedTimeSerie(sampling=60,
                                             max_size=50,
                                             aggregation_method='mean')
        tsb1 = carbonara.BoundTimeSerie(block_size=tsc1.sampling)
        tsc2 = carbonara.AggregatedTimeSerie(sampling=60,
                                             max_size=50,
                                             aggregation_method='mean')
        tsb2 = carbonara.BoundTimeSerie(block_size=tsc2.sampling)

        tsb1.set_values([(datetime.datetime(2014, 1, 1, 11, 46, 4), 4)],
                        before_truncate_callback=tsc1.update)
        tsb2.set_values([(datetime.datetime(2014, 1, 1, 9, 1, 4), 4)],
                        before_truncate_callback=tsc2.update)

        dtfrom = datetime.datetime(2014, 1, 1, 11, 0, 0)
        self.assertRaises(carbonara.UnAggregableTimeseries,
                          carbonara.AggregatedTimeSerie.aggregated,
                          [tsc1, tsc2],
                          from_timestamp=dtfrom,
                          aggregation='mean')
예제 #17
0
    def test_no_truncation(self):
        ts = carbonara.AggregatedTimeSerie(sampling=60,
                                           aggregation_method='mean')
        tsb = carbonara.BoundTimeSerie()

        for i in six.moves.range(1, 11):
            tsb.set_values(
                [(datetime.datetime(2014, 1, 1, 12, i, i), float(i))],
                before_truncate_callback=ts.update)
            tsb.set_values(
                [(datetime.datetime(2014, 1, 1, 12, i, i + 1), float(i + 1))],
                before_truncate_callback=ts.update)
            self.assertEqual(i, len(ts.fetch()))
예제 #18
0
    def test_fetch_agg_pct(self):
        ts = {'sampling': numpy.timedelta64(1, 's'),
              'size': 3600 * 24, 'agg': '90pct'}
        tsb = carbonara.BoundTimeSerie(block_size=ts['sampling'])

        tsb.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 0, 0), 3),
                                    (datetime64(2014, 1, 1, 12, 0, 0, 123), 4),
                                    (datetime64(2014, 1, 1, 12, 0, 2), 4)],
                                   dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
                       before_truncate_callback=functools.partial(
                           self._resample_and_merge, agg_dict=ts))

        result = ts['return'].fetch(datetime64(2014, 1, 1, 12, 0, 0))
        reference = [
            (datetime64(
                2014, 1, 1, 12, 0, 0
            ), 3.9),
            (datetime64(
                2014, 1, 1, 12, 0, 2
            ), 4)
        ]

        self.assertEqual(len(reference), len(list(result)))

        for ref, res in zip(reference, result):
            self.assertEqual(ref[0], res[0])
            # Rounding \o/
            self.assertAlmostEqual(ref[1], res[1])

        tsb.set_values(numpy.array([
            (datetime64(2014, 1, 1, 12, 0, 2, 113), 110)],
            dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
            before_truncate_callback=functools.partial(
                self._resample_and_merge, agg_dict=ts))

        result = ts['return'].fetch(datetime64(2014, 1, 1, 12, 0, 0))
        reference = [
            (datetime64(
                2014, 1, 1, 12, 0, 0
            ), 3.9),
            (datetime64(
                2014, 1, 1, 12, 0, 2
            ), 99.4)
        ]

        self.assertEqual(len(reference), len(list(result)))

        for ref, res in zip(reference, result):
            self.assertEqual(ref[0], res[0])
            # Rounding \o/
            self.assertAlmostEqual(ref[1], res[1])
예제 #19
0
    def test_fetch(self):
        ts = carbonara.AggregatedTimeSerie(sampling=60,
                                           max_size=10,
                                           aggregation_method='mean')
        tsb = carbonara.BoundTimeSerie(block_size=ts.sampling)

        tsb.set_values([
            (datetime.datetime(2014, 1, 1, 11, 46, 4), 4),
            (datetime.datetime(2014, 1, 1, 11, 47, 34), 8),
            (datetime.datetime(2014, 1, 1, 11, 50, 54), 50),
            (datetime.datetime(2014, 1, 1, 11, 54, 45), 4),
            (datetime.datetime(2014, 1, 1, 11, 56, 49), 4),
            (datetime.datetime(2014, 1, 1, 11, 57, 22), 6),
            (datetime.datetime(2014, 1, 1, 11, 58, 22), 5),
            (datetime.datetime(2014, 1, 1, 12, 1, 4), 4),
            (datetime.datetime(2014, 1, 1, 12, 1, 9), 7),
            (datetime.datetime(2014, 1, 1, 12, 2, 1), 15),
            (datetime.datetime(2014, 1, 1, 12, 2, 12), 1),
            (datetime.datetime(2014, 1, 1, 12, 3, 0), 3),
            (datetime.datetime(2014, 1, 1, 12, 4, 9), 7),
            (datetime.datetime(2014, 1, 1, 12, 5, 1), 15),
            (datetime.datetime(2014, 1, 1, 12, 5, 12), 1),
            (datetime.datetime(2014, 1, 1, 12, 6, 0, 2), 3),
        ],
                       before_truncate_callback=ts.update)

        tsb.set_values([
            (datetime.datetime(2014, 1, 1, 12, 6), 5),
        ],
                       before_truncate_callback=ts.update)

        self.assertEqual([(datetime.datetime(2014, 1, 1, 11, 54), 60.0, 4.0),
                          (datetime.datetime(2014, 1, 1, 11, 56), 60.0, 4.0),
                          (datetime.datetime(2014, 1, 1, 11, 57), 60.0, 6.0),
                          (datetime.datetime(2014, 1, 1, 11, 58), 60.0, 5.0),
                          (datetime.datetime(2014, 1, 1, 12, 1), 60.0, 5.5),
                          (datetime.datetime(2014, 1, 1, 12, 2), 60.0, 8.0),
                          (datetime.datetime(2014, 1, 1, 12, 3), 60.0, 3.0),
                          (datetime.datetime(2014, 1, 1, 12, 4), 60.0, 7.0),
                          (datetime.datetime(2014, 1, 1, 12, 5), 60.0, 8.0),
                          (datetime.datetime(2014, 1, 1, 12, 6), 60.0, 4.0)],
                         ts.fetch())

        self.assertEqual([(datetime.datetime(2014, 1, 1, 12, 1), 60.0, 5.5),
                          (datetime.datetime(2014, 1, 1, 12, 2), 60.0, 8.0),
                          (datetime.datetime(2014, 1, 1, 12, 3), 60.0, 3.0),
                          (datetime.datetime(2014, 1, 1, 12, 4), 60.0, 7.0),
                          (datetime.datetime(2014, 1, 1, 12, 5), 60.0, 8.0),
                          (datetime.datetime(2014, 1, 1, 12, 6), 60.0, 4.0)],
                         ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0)))
예제 #20
0
    def test_back_window_ignore(self):
        """Back window testing.

        Test the back window on an archive is not longer than the window we
        aggregate on.
        """
        ts = carbonara.AggregatedTimeSerie(sampling=1,
                                           max_size=60,
                                           aggregation_method='mean')
        tsb = carbonara.BoundTimeSerie(block_size=ts.sampling)

        tsb.set_values([
            (datetime.datetime(2014, 1, 1, 12, 0, 1, 2300), 1),
            (datetime.datetime(2014, 1, 1, 12, 0, 1, 4600), 2),
            (datetime.datetime(2014, 1, 1, 12, 0, 2, 4500), 3),
            (datetime.datetime(2014, 1, 1, 12, 0, 2, 7800), 4),
            (datetime.datetime(2014, 1, 1, 12, 0, 3, 8), 2.5),
        ],
                       before_truncate_callback=ts.update)

        self.assertEqual([
            (pandas.Timestamp('2014-01-01 12:00:01'), 1.0, 1.5),
            (pandas.Timestamp('2014-01-01 12:00:02'), 1.0, 3.5),
            (pandas.Timestamp('2014-01-01 12:00:03'), 1.0, 2.5),
        ], ts.fetch())

        tsb.set_values([
            (datetime.datetime(2014, 1, 1, 12, 0, 2, 99), 9),
        ],
                       ignore_too_old_timestamps=True,
                       before_truncate_callback=ts.update)

        self.assertEqual([
            (pandas.Timestamp('2014-01-01 12:00:01'), 1.0, 1.5),
            (pandas.Timestamp('2014-01-01 12:00:02'), 1.0, 3.5),
            (pandas.Timestamp('2014-01-01 12:00:03'), 1.0, 2.5),
        ], ts.fetch())

        tsb.set_values([
            (datetime.datetime(2014, 1, 1, 12, 0, 2, 99), 9),
            (datetime.datetime(2014, 1, 1, 12, 0, 3, 9), 4.5),
        ],
                       ignore_too_old_timestamps=True,
                       before_truncate_callback=ts.update)

        self.assertEqual([
            (pandas.Timestamp('2014-01-01 12:00:01'), 1.0, 1.5),
            (pandas.Timestamp('2014-01-01 12:00:02'), 1.0, 3.5),
            (pandas.Timestamp('2014-01-01 12:00:03'), 1.0, 3.5),
        ], ts.fetch())
예제 #21
0
    def test_no_truncation(self):
        ts = {'sampling': numpy.timedelta64(60, 's'), 'agg': 'mean'}
        tsb = carbonara.BoundTimeSerie()

        for i in six.moves.range(1, 11):
            tsb.set_values(numpy.array([
                (datetime64(2014, 1, 1, 12, i, i), float(i))],
                dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
                before_truncate_callback=functools.partial(
                    self._resample_and_merge, agg_dict=ts))
            tsb.set_values(numpy.array([
                (datetime64(2014, 1, 1, 12, i, i + 1), float(i + 1))],
                dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
                before_truncate_callback=functools.partial(
                    self._resample_and_merge, agg_dict=ts))
            self.assertEqual(i, len(list(ts['return'].fetch())))
예제 #22
0
    def test_serialize(self):
        ts = carbonara.AggregatedTimeSerie(sampling=0.5,
                                           aggregation_method='mean')
        tsb = carbonara.BoundTimeSerie(block_size=ts.sampling)

        tsb.set_values([
            (datetime.datetime(2014, 1, 1, 12, 0, 0, 1234), 3),
            (datetime.datetime(2014, 1, 1, 12, 0, 0, 321), 6),
            (datetime.datetime(2014, 1, 1, 12, 1, 4, 234), 5),
            (datetime.datetime(2014, 1, 1, 12, 1, 9, 32), 7),
            (datetime.datetime(2014, 1, 1, 12, 2, 12, 532), 1),
        ],
                       before_truncate_callback=ts.update)

        self.assertEqual(
            ts, carbonara.AggregatedTimeSerie.unserialize(ts.serialize()))
예제 #23
0
    def test_serialize(self):
        ts = {'sampling': numpy.timedelta64(500, 'ms'), 'agg': 'mean'}
        tsb = carbonara.BoundTimeSerie(block_size=ts['sampling'])

        tsb.set_values(numpy.array([
            (datetime64(2014, 1, 1, 12, 0, 0, 1234), 3),
            (datetime64(2014, 1, 1, 12, 0, 0, 321), 6),
            (datetime64(2014, 1, 1, 12, 1, 4, 234), 5),
            (datetime64(2014, 1, 1, 12, 1, 9, 32), 7),
            (datetime64(2014, 1, 1, 12, 2, 12, 532), 1)],
            dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
            before_truncate_callback=functools.partial(
                self._resample_and_merge, agg_dict=ts))

        key = ts['return'].get_split_key()
        o, s = ts['return'].serialize(key)
        self.assertEqual(ts['return'],
                         carbonara.AggregatedTimeSerie.unserialize(
                             s, key, ts['return'].aggregation))
예제 #24
0
    def test_back_window(self):
        """Back window testing.

        Test the back window on an archive is not longer than the window we
        aggregate on.
        """
        ts = carbonara.AggregatedTimeSerie(sampling=1,
                                           max_size=60,
                                           aggregation_method='mean')
        tsb = carbonara.BoundTimeSerie(block_size=ts.sampling)

        tsb.set_values([
            (datetime.datetime(2014, 1, 1, 12, 0, 1, 2300), 1),
            (datetime.datetime(2014, 1, 1, 12, 0, 1, 4600), 2),
            (datetime.datetime(2014, 1, 1, 12, 0, 2, 4500), 3),
            (datetime.datetime(2014, 1, 1, 12, 0, 2, 7800), 4),
            (datetime.datetime(2014, 1, 1, 12, 0, 3, 8), 2.5),
        ],
                       before_truncate_callback=ts.update)

        self.assertEqual([
            (pandas.Timestamp('2014-01-01 12:00:01'), 1.0, 1.5),
            (pandas.Timestamp('2014-01-01 12:00:02'), 1.0, 3.5),
            (pandas.Timestamp('2014-01-01 12:00:03'), 1.0, 2.5),
        ], ts.fetch())

        try:
            tsb.set_values([
                (datetime.datetime(2014, 1, 1, 12, 0, 2, 99), 9),
            ])
        except carbonara.NoDeloreanAvailable as e:
            self.assertEqual(
                six.text_type(e),
                u"2014-01-01 12:00:02.000099 is before 2014-01-01 12:00:03")
            self.assertEqual(datetime.datetime(2014, 1, 1, 12, 0, 2, 99),
                             e.bad_timestamp)
            self.assertEqual(datetime.datetime(2014, 1, 1, 12, 0, 3),
                             e.first_timestamp)
        else:
            self.fail("No exception raised")
예제 #25
0
    def test_fetch_agg_pct(self):
        ts = carbonara.AggregatedTimeSerie(sampling=1,
                                           max_size=3600 * 24,
                                           aggregation_method='90pct')
        tsb = carbonara.BoundTimeSerie(block_size=ts.sampling)

        tsb.set_values([(datetime.datetime(2014, 1, 1, 12, 0, 0), 3),
                        (datetime.datetime(2014, 1, 1, 12, 0, 0, 123), 4),
                        (datetime.datetime(2014, 1, 1, 12, 0, 2), 4)],
                       before_truncate_callback=ts.update)

        result = ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0))
        reference = [(pandas.Timestamp('2014-01-01 12:00:00'), 1.0, 3.9),
                     (pandas.Timestamp('2014-01-01 12:00:02'), 1.0, 4)]

        self.assertEqual(len(reference), len(result))

        for ref, res in zip(reference, result):
            self.assertEqual(ref[0], res[0])
            self.assertEqual(ref[1], res[1])
            # Rounding \o/
            self.assertAlmostEqual(ref[2], res[2])

        tsb.set_values([(datetime.datetime(2014, 1, 1, 12, 0, 2, 113), 110)],
                       before_truncate_callback=ts.update)

        result = ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0))
        reference = [(pandas.Timestamp('2014-01-01 12:00:00'), 1.0, 3.9),
                     (pandas.Timestamp('2014-01-01 12:00:02'), 1.0, 99.4)]

        self.assertEqual(len(reference), len(result))

        for ref, res in zip(reference, result):
            self.assertEqual(ref[0], res[0])
            self.assertEqual(ref[1], res[1])
            # Rounding \o/
            self.assertAlmostEqual(ref[2], res[2])
예제 #26
0
    def add_measures_to_metrics(self, metrics_and_measures):
        """Update a metric with a new measures, computing new aggregations.

        :param metrics_and_measures: A dict there keys are `storage.Metric`
                                     objects and values are timeseries array of
                                     the new measures.
        """
        with self.statistics.time("raw measures fetch"):
            raw_measures = self._get_or_create_unaggregated_timeseries(
                metrics_and_measures.keys())
        self.statistics["raw measures fetch"] += len(metrics_and_measures)
        self.statistics["processed measures"] += sum(
            map(len, metrics_and_measures.values()))

        new_boundts = []
        splits_to_delete = {}
        splits_to_update = {}

        for metric, measures in six.iteritems(metrics_and_measures):
            measures = numpy.sort(measures, order='timestamps')

            agg_methods = list(metric.archive_policy.aggregation_methods)
            block_size = metric.archive_policy.max_block_size
            back_window = metric.archive_policy.back_window
            # NOTE(sileht): We keep one more blocks to calculate rate of change
            # correctly
            if any(filter(lambda x: x.startswith("rate:"), agg_methods)):
                back_window += 1

            if raw_measures[metric] is None:
                ts = None
            else:
                try:
                    ts = carbonara.BoundTimeSerie.unserialize(
                        raw_measures[metric], block_size, back_window)
                except carbonara.InvalidData:
                    LOG.error(
                        "Data corruption detected for %s "
                        "unaggregated timeserie, creating a new one",
                        metric.id)
                    ts = None

            if ts is None:
                # This is the first time we treat measures for this
                # metric, or data are corrupted, create a new one
                ts = carbonara.BoundTimeSerie(block_size=block_size,
                                              back_window=back_window)
                current_first_block_timestamp = None
            else:
                current_first_block_timestamp = ts.first_block_timestamp()

            # NOTE(jd) This is Python where you need such
            # hack to pass a variable around a closure,
            # sorry.
            computed_points = {"number": 0}

            def _map_compute_splits_operations(bound_timeserie):
                # NOTE (gordc): bound_timeserie is entire set of
                # unaggregated measures matching largest
                # granularity. the following takes only the points
                # affected by new measures for specific granularity
                tstamp = max(bound_timeserie.first, measures['timestamps'][0])
                new_first_block_timestamp = (
                    bound_timeserie.first_block_timestamp())
                computed_points['number'] = len(bound_timeserie)

                aggregations = metric.archive_policy.aggregations

                grouped_timeseries = {
                    granularity: bound_timeserie.group_serie(
                        granularity,
                        carbonara.round_timestamp(tstamp, granularity))
                    for granularity, aggregations
                    # No need to sort the aggregation, they are already
                    in itertools.groupby(aggregations, ATTRGETTER_GRANULARITY)
                }

                aggregations_and_timeseries = {
                    aggregation:
                    carbonara.AggregatedTimeSerie.from_grouped_serie(
                        grouped_timeseries[aggregation.granularity],
                        aggregation)
                    for aggregation in aggregations
                }

                deleted_keys, keys_and_split_to_store = (
                    self._compute_split_operations(
                        metric, aggregations_and_timeseries,
                        current_first_block_timestamp,
                        new_first_block_timestamp))

                return (new_first_block_timestamp, deleted_keys,
                        keys_and_split_to_store)

            with self.statistics.time("aggregated measures compute"):
                (new_first_block_timestamp, deleted_keys,
                 keys_and_splits_to_store) = ts.set_values(
                     measures,
                     before_truncate_callback=_map_compute_splits_operations,
                 )

            splits_to_delete[metric] = deleted_keys
            splits_to_update[metric] = (keys_and_splits_to_store,
                                        new_first_block_timestamp)

            new_boundts.append((metric, ts.serialize()))

        with self.statistics.time("splits delete"):
            self._delete_metric_splits(splits_to_delete)
        self.statistics["splits delete"] += len(splits_to_delete)
        with self.statistics.time("splits update"):
            self._update_metric_splits(splits_to_update)
        self.statistics["splits update"] += len(splits_to_update)
        with self.statistics.time("raw measures store"):
            self._store_unaggregated_timeseries(new_boundts)
        self.statistics["raw measures store"] += len(new_boundts)
    def process_measures(self, indexer, block_size, sync=False):
        metrics_to_process = self._list_metric_with_measures_to_process(
            block_size, full=sync)
        metrics = indexer.list_metrics(ids=metrics_to_process)
        # This build the list of deleted metrics, i.e. the metrics we have
        # measures to process for but that are not in the indexer anymore.
        deleted_metrics_id = (set(map(uuid.UUID, metrics_to_process)) -
                              set(m.id for m in metrics))
        for metric_id in deleted_metrics_id:
            # NOTE(jd): We need to lock the metric otherwise we might delete
            # measures that another worker might be processing. Deleting
            # measurement files under its feet is not nice!
            with self._lock(metric_id)(blocking=sync):
                self._delete_unprocessed_measures_for_metric_id(metric_id)
        for metric in metrics:
            lock = self._lock(metric.id)
            agg_methods = list(metric.archive_policy.aggregation_methods)
            # Do not block if we cannot acquire the lock, that means some other
            # worker is doing the job. We'll just ignore this metric and may
            # get back later to it if needed.
            if lock.acquire(blocking=sync):
                try:
                    LOG.debug("Processing measures for %s" % metric)
                    with self._process_measure_for_metric(metric) as measures:
                        # NOTE(mnaser): The metric could have been handled by
                        #               another worker, ignore if no measures.
                        if len(measures) == 0:
                            LOG.debug("Skipping %s (already processed)" %
                                      metric)
                            continue

                        try:
                            with timeutils.StopWatch() as sw:
                                raw_measures = (
                                    self._get_unaggregated_timeserie(metric))
                                LOG.debug("Retrieve unaggregated measures "
                                          "for %s in %.2fs" %
                                          (metric.id, sw.elapsed()))
                        except storage.MetricDoesNotExist:
                            try:
                                self._create_metric(metric)
                            except storage.MetricAlreadyExists:
                                # Created in the mean time, do not worry
                                pass
                            ts = None
                        else:
                            try:
                                ts = carbonara.BoundTimeSerie.unserialize(
                                    raw_measures)
                            except ValueError:
                                ts = None
                                LOG.error("Data corruption detected for %s "
                                          "unaggregated timeserie, "
                                          "recreating an empty one." %
                                          metric.id)

                        if ts is None:
                            # This is the first time we treat measures for this
                            # metric, or data are corrupted, create a new one
                            mbs = metric.archive_policy.max_block_size
                            ts = carbonara.BoundTimeSerie(
                                block_size=mbs,
                                back_window=metric.archive_policy.back_window)

                        def _map_add_measures(bound_timeserie):
                            self._map_in_thread(
                                self._add_measures,
                                ((aggregation, d, metric, bound_timeserie)
                                 for aggregation in agg_methods
                                 for d in metric.archive_policy.definition))

                        with timeutils.StopWatch() as sw:
                            ts.set_values(
                                measures,
                                before_truncate_callback=_map_add_measures,
                                ignore_too_old_timestamps=True)
                            LOG.debug(
                                "Computed new metric %s with %d new measures "
                                "in %.2f seconds" %
                                (metric.id, len(measures), sw.elapsed()))

                        self._store_unaggregated_timeserie(
                            metric, ts.serialize())
                except Exception:
                    if sync:
                        raise
                    LOG.error("Error processing new measures", exc_info=True)
                finally:
                    lock.release()
예제 #28
0
    def _compute_and_store_timeseries(self, metric, measures):
        # NOTE(mnaser): The metric could have been handled by
        #               another worker, ignore if no measures.
        if len(measures) == 0:
            LOG.debug("Skipping %s (already processed)", metric)
            return

        measures.sort(order='timestamps')

        agg_methods = list(metric.archive_policy.aggregation_methods)
        block_size = metric.archive_policy.max_block_size
        back_window = metric.archive_policy.back_window
        definition = metric.archive_policy.definition
        # NOTE(sileht): We keep one more blocks to calculate rate of change
        # correctly
        if any(filter(lambda x: x.startswith("rate:"), agg_methods)):
            back_window += 1

        try:
            ts = self._get_unaggregated_timeserie_and_unserialize(
                metric, block_size=block_size, back_window=back_window)
        except MetricDoesNotExist:
            try:
                self._create_metric(metric)
            except MetricAlreadyExists:
                # Created in the mean time, do not worry
                pass
            ts = None
        except CorruptionError as e:
            LOG.error(e)
            ts = None

        if ts is None:
            # This is the first time we treat measures for this
            # metric, or data are corrupted, create a new one
            ts = carbonara.BoundTimeSerie(block_size=block_size,
                                          back_window=back_window)
            current_first_block_timestamp = None
        else:
            current_first_block_timestamp = ts.first_block_timestamp()

        # NOTE(jd) This is Python where you need such
        # hack to pass a variable around a closure,
        # sorry.
        computed_points = {"number": 0}

        def _map_add_measures(bound_timeserie):
            # NOTE (gordc): bound_timeserie is entire set of
            # unaggregated measures matching largest
            # granularity. the following takes only the points
            # affected by new measures for specific granularity
            tstamp = max(bound_timeserie.first, measures['timestamps'][0])
            new_first_block_timestamp = bound_timeserie.first_block_timestamp()
            computed_points['number'] = len(bound_timeserie)
            for d in definition:
                ts = bound_timeserie.group_serie(
                    d.granularity, carbonara.round_timestamp(
                        tstamp, d.granularity))

                self._map_in_thread(
                    self._add_measures,
                    ((aggregation, d, metric, ts,
                        current_first_block_timestamp,
                        new_first_block_timestamp)
                        for aggregation in agg_methods))

        with utils.StopWatch() as sw:
            ts.set_values(measures,
                          before_truncate_callback=_map_add_measures)

        number_of_operations = (len(agg_methods) * len(definition))
        perf = ""
        elapsed = sw.elapsed()
        if elapsed > 0:
            perf = " (%d points/s, %d measures/s)" % (
                ((number_of_operations * computed_points['number']) /
                    elapsed),
                ((number_of_operations * len(measures)) / elapsed)
            )
        LOG.debug("Computed new metric %s with %d new measures "
                  "in %.2f seconds%s",
                  metric.id, len(measures), elapsed, perf)

        self._store_unaggregated_timeserie(metric, ts.serialize())
예제 #29
0
    def compute_and_store_timeseries(self, metric, measures):
        # NOTE(mnaser): The metric could have been handled by
        #               another worker, ignore if no measures.
        if len(measures) == 0:
            LOG.debug("Skipping %s (already processed)", metric)
            return

        measures = numpy.sort(measures, order='timestamps')

        agg_methods = list(metric.archive_policy.aggregation_methods)
        block_size = metric.archive_policy.max_block_size
        back_window = metric.archive_policy.back_window
        definition = metric.archive_policy.definition
        # NOTE(sileht): We keep one more blocks to calculate rate of change
        # correctly
        if any(filter(lambda x: x.startswith("rate:"), agg_methods)):
            back_window += 1

        with utils.StopWatch() as sw:
            raw_measures = (
                self._get_or_create_unaggregated_timeseries(
                    [metric])[metric]
            )
        LOG.debug("Retrieve unaggregated measures for %s in %.2fs",
                  metric.id, sw.elapsed())

        if raw_measures is None:
            ts = None
        else:
            try:
                ts = carbonara.BoundTimeSerie.unserialize(
                    raw_measures, block_size, back_window)
            except carbonara.InvalidData:
                LOG.error("Data corruption detected for %s "
                          "unaggregated timeserie, creating a new one",
                          metric.id)
                ts = None

        if ts is None:
            # This is the first time we treat measures for this
            # metric, or data are corrupted, create a new one
            ts = carbonara.BoundTimeSerie(block_size=block_size,
                                          back_window=back_window)
            current_first_block_timestamp = None
        else:
            current_first_block_timestamp = ts.first_block_timestamp()

        # NOTE(jd) This is Python where you need such
        # hack to pass a variable around a closure,
        # sorry.
        computed_points = {"number": 0}

        def _map_add_measures(bound_timeserie):
            # NOTE (gordc): bound_timeserie is entire set of
            # unaggregated measures matching largest
            # granularity. the following takes only the points
            # affected by new measures for specific granularity
            tstamp = max(bound_timeserie.first, measures['timestamps'][0])
            new_first_block_timestamp = bound_timeserie.first_block_timestamp()
            computed_points['number'] = len(bound_timeserie)

            for granularity, aggregations in itertools.groupby(
                    # No need to sort the aggregation, they are already
                    metric.archive_policy.aggregations,
                    ATTRGETTER_GRANULARITY):
                ts = bound_timeserie.group_serie(
                    granularity, carbonara.round_timestamp(
                        tstamp, granularity))

                self._add_measures(metric, aggregations, ts,
                                   current_first_block_timestamp,
                                   new_first_block_timestamp)

        with utils.StopWatch() as sw:
            ts.set_values(measures,
                          before_truncate_callback=_map_add_measures)

        number_of_operations = (len(agg_methods) * len(definition))
        perf = ""
        elapsed = sw.elapsed()
        if elapsed > 0:
            perf = " (%d points/s, %d measures/s)" % (
                ((number_of_operations * computed_points['number']) /
                    elapsed),
                ((number_of_operations * len(measures)) / elapsed)
            )
        LOG.debug("Computed new metric %s with %d new measures "
                  "in %.2f seconds%s",
                  metric.id, len(measures), elapsed, perf)

        self._store_unaggregated_timeseries([(metric, ts.serialize())])
예제 #30
0
    def test_aggregated_different_archive_overlap(self):
        tsc1 = {
            'sampling': numpy.timedelta64(60, 's'),
            'size': 10,
            'agg': 'mean'
        }
        tsb1 = carbonara.BoundTimeSerie(block_size=tsc1['sampling'])
        tsc2 = {
            'sampling': numpy.timedelta64(60, 's'),
            'size': 10,
            'agg': 'mean'
        }
        tsb2 = carbonara.BoundTimeSerie(block_size=tsc2['sampling'])

        # NOTE(sileht): minute 8 is missing in both and
        # minute 7 in tsc2 too, but it looks like we have
        # enough point to do the aggregation
        tsb1.set_values(numpy.array([(datetime64(2014, 1, 1, 11, 0, 0), 4),
                                     (datetime64(2014, 1, 1, 12, 1, 0), 3),
                                     (datetime64(2014, 1, 1, 12, 2, 0), 2),
                                     (datetime64(2014, 1, 1, 12, 3, 0), 4),
                                     (datetime64(2014, 1, 1, 12, 4, 0), 2),
                                     (datetime64(2014, 1, 1, 12, 5, 0), 3),
                                     (datetime64(2014, 1, 1, 12, 6, 0), 4),
                                     (datetime64(2014, 1, 1, 12, 7, 0), 10),
                                     (datetime64(2014, 1, 1, 12, 9, 0), 2)],
                                    dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
                        before_truncate_callback=functools.partial(
                            self._resample_and_merge, agg_dict=tsc1))

        tsb2.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 1, 0), 3),
                                     (datetime64(2014, 1, 1, 12, 2, 0), 4),
                                     (datetime64(2014, 1, 1, 12, 3, 0), 4),
                                     (datetime64(2014, 1, 1, 12, 4, 0), 6),
                                     (datetime64(2014, 1, 1, 12, 5, 0), 3),
                                     (datetime64(2014, 1, 1, 12, 6, 0), 6),
                                     (datetime64(2014, 1, 1, 12, 9, 0), 2),
                                     (datetime64(2014, 1, 1, 12, 11, 0), 2),
                                     (datetime64(2014, 1, 1, 12, 12, 0), 2)],
                                    dtype=carbonara.TIMESERIES_ARRAY_DTYPE),
                        before_truncate_callback=functools.partial(
                            self._resample_and_merge, agg_dict=tsc2))

        dtfrom = datetime64(2014, 1, 1, 12, 0, 0)
        dtto = datetime64(2014, 1, 1, 12, 10, 0)

        # By default we require 100% of point that overlap
        # so that fail
        self.assertRaises(cross_metric.UnAggregableTimeseries,
                          cross_metric.aggregated,
                          [tsc1['return'], tsc2['return']],
                          from_timestamp=dtfrom,
                          to_timestamp=dtto,
                          aggregation='mean')

        # Retry with 80% and it works
        output = cross_metric.aggregated([tsc1['return'], tsc2['return']],
                                         from_timestamp=dtfrom,
                                         to_timestamp=dtto,
                                         aggregation='mean',
                                         needed_percent_of_overlap=80.0)

        self.assertEqual([
            (datetime64(2014, 1, 1, 12, 1, 0), numpy.timedelta64(60,
                                                                 's'), 3.0),
            (datetime64(2014, 1, 1, 12, 2, 0), numpy.timedelta64(60,
                                                                 's'), 3.0),
            (datetime64(2014, 1, 1, 12, 3, 0), numpy.timedelta64(60,
                                                                 's'), 4.0),
            (datetime64(2014, 1, 1, 12, 4, 0), numpy.timedelta64(60,
                                                                 's'), 4.0),
            (datetime64(2014, 1, 1, 12, 5, 0), numpy.timedelta64(60,
                                                                 's'), 3.0),
            (datetime64(2014, 1, 1, 12, 6, 0), numpy.timedelta64(60,
                                                                 's'), 5.0),
            (datetime64(2014, 1, 1, 12, 7, 0), numpy.timedelta64(60,
                                                                 's'), 10.0),
            (datetime64(2014, 1, 1, 12, 9, 0), numpy.timedelta64(60,
                                                                 's'), 2.0),
        ], list(output))