def test_aggregated_different_archive_overlap_edge_missing2(self): tsc1 = { 'sampling': numpy.timedelta64(60, 's'), 'size': 10, 'agg': 'mean' } tsb1 = carbonara.BoundTimeSerie(block_size=tsc1['sampling']) tsc2 = { 'sampling': numpy.timedelta64(60, 's'), 'size': 10, 'agg': 'mean' } tsb2 = carbonara.BoundTimeSerie(block_size=tsc2['sampling']) tsb1.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 3, 0), 4)], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=tsc1)) tsb2.set_values(numpy.array([(datetime64(2014, 1, 1, 11, 0, 0), 4), (datetime64(2014, 1, 1, 12, 3, 0), 4)], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=tsc2)) output = cross_metric.aggregated([tsc1['return'], tsc2['return']], aggregation='mean') self.assertEqual([ (datetime64(2014, 1, 1, 12, 3, 0), numpy.timedelta64(60000000000, 'ns'), 4.0), ], list(output))
def test_aggregated_different_archive_no_overlap(self): tsc1 = { 'sampling': numpy.timedelta64(60, 's'), 'size': 50, 'agg': 'mean' } tsb1 = carbonara.BoundTimeSerie(block_size=tsc1['sampling']) tsc2 = { 'sampling': numpy.timedelta64(60, 's'), 'size': 50, 'agg': 'mean' } tsb2 = carbonara.BoundTimeSerie(block_size=tsc2['sampling']) tsb1.set_values(numpy.array([(datetime64(2014, 1, 1, 11, 46, 4), 4)], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=tsc1)) tsb2.set_values(numpy.array([(datetime64(2014, 1, 1, 9, 1, 4), 4)], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=tsc2)) dtfrom = datetime64(2014, 1, 1, 11, 0, 0) self.assertRaises(cross_metric.UnAggregableTimeseries, cross_metric.aggregated, [tsc1['return'], tsc2['return']], from_timestamp=dtfrom, aggregation='mean')
def test_aggregated_different_archive_overlap_edge_missing2(self): tsc1 = carbonara.AggregatedTimeSerie(sampling=60, max_size=10, aggregation_method='mean') tsb1 = carbonara.BoundTimeSerie(block_size=tsc1.sampling) tsc2 = carbonara.AggregatedTimeSerie(sampling=60, max_size=10, aggregation_method='mean') tsb2 = carbonara.BoundTimeSerie(block_size=tsc2.sampling) tsb1.set_values([ (datetime.datetime(2014, 1, 1, 12, 3, 0), 4), ], before_truncate_callback=tsc1.update) tsb2.set_values([ (datetime.datetime(2014, 1, 1, 11, 0, 0), 4), (datetime.datetime(2014, 1, 1, 12, 3, 0), 4), ], before_truncate_callback=tsc2.update) output = carbonara.AggregatedTimeSerie.aggregated([tsc1, tsc2], aggregation='mean') self.assertEqual([ (pandas.Timestamp('2014-01-01 12:03:00'), 60.0, 4.0), ], output)
def test_aggregated_some_overlap_with_fill_null(self): tsc1 = { 'sampling': numpy.timedelta64(60, 's'), 'size': 10, 'agg': 'mean' } tsb1 = carbonara.BoundTimeSerie(block_size=tsc1['sampling']) tsc2 = { 'sampling': numpy.timedelta64(60, 's'), 'size': 10, 'agg': 'mean' } tsb2 = carbonara.BoundTimeSerie(block_size=tsc2['sampling']) tsb1.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 3, 0), 9), (datetime64(2014, 1, 1, 12, 4, 0), 1), (datetime64(2014, 1, 1, 12, 5, 0), 2), (datetime64(2014, 1, 1, 12, 6, 0), 7), (datetime64(2014, 1, 1, 12, 7, 0), 5), (datetime64(2014, 1, 1, 12, 8, 0), 3)], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=tsc1)) tsb2.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 0, 0), 6), (datetime64(2014, 1, 1, 12, 1, 0), 2), (datetime64(2014, 1, 1, 12, 2, 0), 13), (datetime64(2014, 1, 1, 12, 3, 0), 24), (datetime64(2014, 1, 1, 12, 4, 0), 4), (datetime64(2014, 1, 1, 12, 5, 0), 16), (datetime64(2014, 1, 1, 12, 6, 0), 12)], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=tsc2)) output = cross_metric.aggregated([tsc1['return'], tsc2['return']], aggregation='mean', fill='null') self.assertEqual([ (datetime64(2014, 1, 1, 12, 0, 0), numpy.timedelta64(60000000000, 'ns'), 6.0), (datetime64(2014, 1, 1, 12, 1, 0), numpy.timedelta64(60000000000, 'ns'), 2.0), (datetime64(2014, 1, 1, 12, 2, 0), numpy.timedelta64(60000000000, 'ns'), 13.0), (datetime64(2014, 1, 1, 12, 3, 0), numpy.timedelta64(60000000000, 'ns'), 16.5), (datetime64(2014, 1, 1, 12, 4, 0), numpy.timedelta64(60000000000, 'ns'), 2.5), (datetime64(2014, 1, 1, 12, 5, 0), numpy.timedelta64(60000000000, 'ns'), 9.0), (datetime64(2014, 1, 1, 12, 6, 0), numpy.timedelta64(60000000000, 'ns'), 9.5), (datetime64(2014, 1, 1, 12, 7, 0), numpy.timedelta64(60000000000, 'ns'), 5.0), (datetime64(2014, 1, 1, 12, 8, 0), numpy.timedelta64(60000000000, 'ns'), 3.0), ], list(output))
def test_aggregated_different_archive_overlap_edge_missing1(self): tsc1 = { 'sampling': numpy.timedelta64(60, 's'), 'size': 10, 'agg': 'mean' } tsb1 = carbonara.BoundTimeSerie(block_size=tsc1['sampling']) tsc2 = { 'sampling': numpy.timedelta64(60, 's'), 'size': 10, 'agg': 'mean' } tsb2 = carbonara.BoundTimeSerie(block_size=tsc2['sampling']) tsb1.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 3, 0), 9), (datetime64(2014, 1, 1, 12, 4, 0), 1), (datetime64(2014, 1, 1, 12, 5, 0), 2), (datetime64(2014, 1, 1, 12, 6, 0), 7), (datetime64(2014, 1, 1, 12, 7, 0), 5), (datetime64(2014, 1, 1, 12, 8, 0), 3)], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=tsc1)) tsb2.set_values(numpy.array([(datetime64(2014, 1, 1, 11, 0, 0), 6), (datetime64(2014, 1, 1, 12, 1, 0), 2), (datetime64(2014, 1, 1, 12, 2, 0), 13), (datetime64(2014, 1, 1, 12, 3, 0), 24), (datetime64(2014, 1, 1, 12, 4, 0), 4), (datetime64(2014, 1, 1, 12, 5, 0), 16), (datetime64(2014, 1, 1, 12, 6, 0), 12)], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=tsc2)) # By default we require 100% of point that overlap # but we allow that the last datapoint is missing # of the precisest granularity output = cross_metric.aggregated([tsc1['return'], tsc2['return']], aggregation='sum') self.assertEqual([ (datetime64(2014, 1, 1, 12, 3, 0), numpy.timedelta64(60, 's'), 33.0), (datetime64(2014, 1, 1, 12, 4, 0), numpy.timedelta64(60, 's'), 5.0), (datetime64(2014, 1, 1, 12, 5, 0), numpy.timedelta64(60, 's'), 18.0), (datetime64(2014, 1, 1, 12, 6, 0), numpy.timedelta64(60, 's'), 19.0), ], list(output))
def test_fetch_nano(self): ts = {'sampling': numpy.timedelta64(200, 'ms'), 'size': 10, 'agg': 'mean'} tsb = carbonara.BoundTimeSerie(block_size=ts['sampling']) tsb.set_values(numpy.array([ (datetime64(2014, 1, 1, 11, 46, 0, 200123), 4), (datetime64(2014, 1, 1, 11, 46, 0, 340000), 8), (datetime64(2014, 1, 1, 11, 47, 0, 323154), 50), (datetime64(2014, 1, 1, 11, 48, 0, 590903), 4), (datetime64(2014, 1, 1, 11, 48, 0, 903291), 4)], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=ts)) tsb.set_values(numpy.array([ (datetime64(2014, 1, 1, 11, 48, 0, 821312), 5)], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=ts)) self.assertEqual([ (datetime64(2014, 1, 1, 11, 46, 0, 200000), 6.0), (datetime64(2014, 1, 1, 11, 47, 0, 200000), 50.0), (datetime64(2014, 1, 1, 11, 48, 0, 400000), 4.0), (datetime64(2014, 1, 1, 11, 48, 0, 800000), 4.5) ], list(ts['return'].fetch())) self.assertEqual(numpy.timedelta64(200000000, 'ns'), ts['return'].aggregation.granularity)
def test_fetch_agg_std(self): # NOTE (gordc): this is a good test to ensure we drop NaN entries # 2014-01-01 12:00:00 will appear if we don't dropna() ts = {'sampling': numpy.timedelta64(60, 's'), 'size': 60, 'agg': 'std'} tsb = carbonara.BoundTimeSerie(block_size=ts['sampling']) tsb.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 0, 0), 3), (datetime64(2014, 1, 1, 12, 1, 4), 4), (datetime64(2014, 1, 1, 12, 1, 9), 7), (datetime64(2014, 1, 1, 12, 2, 1), 15), (datetime64(2014, 1, 1, 12, 2, 12), 1)], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=ts)) self.assertEqual([ (datetime64(2014, 1, 1, 12, 1, 0), 2.1213203435596424), (datetime64(2014, 1, 1, 12, 2, 0), 9.8994949366116654), ], list(ts['return'].fetch(datetime64(2014, 1, 1, 12, 0, 0)))) tsb.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 2, 13), 110)], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=ts)) self.assertEqual([ (datetime64(2014, 1, 1, 12, 1, 0), 2.1213203435596424), (datetime64(2014, 1, 1, 12, 2, 0), 59.304300012730948), ], list(ts['return'].fetch(datetime64(2014, 1, 1, 12, 0, 0))))
def test_fetch_agg_max(self): ts = carbonara.AggregatedTimeSerie(sampling=60, max_size=60, aggregation_method='max') tsb = carbonara.BoundTimeSerie(block_size=ts.sampling) tsb.set_values([(datetime.datetime(2014, 1, 1, 12, 0, 0), 3), (datetime.datetime(2014, 1, 1, 12, 1, 4), 4), (datetime.datetime(2014, 1, 1, 12, 1, 9), 7), (datetime.datetime(2014, 1, 1, 12, 2, 1), 15), (datetime.datetime(2014, 1, 1, 12, 2, 12), 1)], before_truncate_callback=ts.update) self.assertEqual([ (pandas.Timestamp('2014-01-01 12:00:00'), 60.0, 3), (pandas.Timestamp('2014-01-01 12:01:00'), 60.0, 7), (pandas.Timestamp('2014-01-01 12:02:00'), 60.0, 15), ], ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0))) tsb.set_values([(datetime.datetime(2014, 1, 1, 12, 2, 13), 110)], before_truncate_callback=ts.update) self.assertEqual([ (pandas.Timestamp('2014-01-01 12:00:00'), 60.0, 3), (pandas.Timestamp('2014-01-01 12:01:00'), 60.0, 7), (pandas.Timestamp('2014-01-01 12:02:00'), 60.0, 110), ], ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0)))
def test_fetch_agg_std(self): ts = carbonara.AggregatedTimeSerie(sampling=60, max_size=60, aggregation_method='std') tsb = carbonara.BoundTimeSerie(block_size=ts.sampling) tsb.set_values([(datetime.datetime(2014, 1, 1, 12, 0, 0), 3), (datetime.datetime(2014, 1, 1, 12, 1, 4), 4), (datetime.datetime(2014, 1, 1, 12, 1, 9), 7), (datetime.datetime(2014, 1, 1, 12, 2, 1), 15), (datetime.datetime(2014, 1, 1, 12, 2, 12), 1)], before_truncate_callback=ts.update) self.assertEqual([ (pandas.Timestamp('2014-01-01 12:01:00'), 60.0, 2.1213203435596424), (pandas.Timestamp('2014-01-01 12:02:00'), 60.0, 9.8994949366116654), ], ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0))) tsb.set_values([(datetime.datetime(2014, 1, 1, 12, 2, 13), 110)], before_truncate_callback=ts.update) self.assertEqual([ (pandas.Timestamp('2014-01-01 12:01:00'), 60.0, 2.1213203435596424), (pandas.Timestamp('2014-01-01 12:02:00'), 60.0, 59.304300012730948), ], ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0)))
def test_fetch_nano(self): ts = carbonara.AggregatedTimeSerie(sampling=0.2, max_size=10, aggregation_method='mean') tsb = carbonara.BoundTimeSerie(block_size=ts.sampling) tsb.set_values([ (datetime.datetime(2014, 1, 1, 11, 46, 0, 200123), 4), (datetime.datetime(2014, 1, 1, 11, 46, 0, 340000), 8), (datetime.datetime(2014, 1, 1, 11, 47, 0, 323154), 50), (datetime.datetime(2014, 1, 1, 11, 48, 0, 590903), 4), (datetime.datetime(2014, 1, 1, 11, 48, 0, 903291), 4), ], before_truncate_callback=ts.update) tsb.set_values([ (datetime.datetime(2014, 1, 1, 11, 48, 0, 821312), 5), ], before_truncate_callback=ts.update) self.assertEqual( [(datetime.datetime(2014, 1, 1, 11, 46, 0, 200000), 0.2, 6.0), (datetime.datetime(2014, 1, 1, 11, 47, 0, 200000), 0.2, 50.0), (datetime.datetime(2014, 1, 1, 11, 48, 0, 400000), 0.2, 4.0), (datetime.datetime(2014, 1, 1, 11, 48, 0, 800000), 0.2, 4.5)], ts.fetch())
def test_fetch_agg_max(self): ts = {'sampling': numpy.timedelta64(60, 's'), 'size': 60, 'agg': 'max'} tsb = carbonara.BoundTimeSerie(block_size=ts['sampling']) tsb.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 0, 0), 3), (datetime64(2014, 1, 1, 12, 1, 4), 4), (datetime64(2014, 1, 1, 12, 1, 9), 7), (datetime64(2014, 1, 1, 12, 2, 1), 15), (datetime64(2014, 1, 1, 12, 2, 12), 1)], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=ts)) self.assertEqual([ (datetime64(2014, 1, 1, 12, 0, 0), 3), (datetime64(2014, 1, 1, 12, 1, 0), 7), (datetime64(2014, 1, 1, 12, 2, 0), 15), ], list(ts['return'].fetch(datetime64(2014, 1, 1, 12, 0, 0)))) tsb.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 2, 13), 110)], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=ts)) self.assertEqual([ (datetime64(2014, 1, 1, 12, 0, 0), 3), (datetime64(2014, 1, 1, 12, 1, 0), 7), (datetime64(2014, 1, 1, 12, 2, 0), 110), ], list(ts['return'].fetch(datetime64(2014, 1, 1, 12, 0, 0))))
def test_back_window(self): """Back window testing. Test the back window on an archive is not longer than the window we aggregate on. """ ts = {'sampling': numpy.timedelta64(1, 's'), 'size': 60, 'agg': 'mean'} tsb = carbonara.BoundTimeSerie(block_size=ts['sampling']) tsb.set_values(numpy.array([ (datetime64(2014, 1, 1, 12, 0, 1, 2300), 1), (datetime64(2014, 1, 1, 12, 0, 1, 4600), 2), (datetime64(2014, 1, 1, 12, 0, 2, 4500), 3), (datetime64(2014, 1, 1, 12, 0, 2, 7800), 4), (datetime64(2014, 1, 1, 12, 0, 3, 8), 2.5)], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=ts)) self.assertEqual( [ (datetime64(2014, 1, 1, 12, 0, 1), 1.5), (datetime64(2014, 1, 1, 12, 0, 2), 3.5), (datetime64(2014, 1, 1, 12, 0, 3), 2.5), ], list(ts['return'].fetch()))
def test_cross_metric_with_random_holes_derived_boundaries(self): tsc1 = { 'sampling': numpy.timedelta64(60, 's'), 'size': 10, 'agg': 'mean' } tsb1 = carbonara.BoundTimeSerie(block_size=tsc1['sampling']) tsc2 = { 'sampling': numpy.timedelta64(60, 's'), 'size': 10, 'agg': 'mean' } tsb2 = carbonara.BoundTimeSerie(block_size=tsc2['sampling']) tsb1.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 1, 0), 9), (datetime64(2014, 1, 1, 12, 2, 0), 1), (datetime64(2014, 1, 1, 12, 4, 0), 5), (datetime64(2014, 1, 1, 12, 6, 0), 3)], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=tsc1)) tsb2.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 0, 0), 6), (datetime64(2014, 1, 1, 12, 1, 0), 2), (datetime64(2014, 1, 1, 12, 2, 0), 13), (datetime64(2014, 1, 1, 12, 3, 0), 24), (datetime64(2014, 1, 1, 12, 4, 0), 4)], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=tsc2)) output = cross_metric.aggregated([tsc1['return'], tsc2['return']], aggregation='mean', needed_percent_of_overlap=50.0) self.assertEqual([ (datetime64(2014, 1, 1, 12, 1, 0), numpy.timedelta64(60000000000, 'ns'), 5.5), (datetime64(2014, 1, 1, 12, 2, 0), numpy.timedelta64(60000000000, 'ns'), 7.0), (datetime64(2014, 1, 1, 12, 3, 0), numpy.timedelta64(60000000000, 'ns'), 24.0), (datetime64(2014, 1, 1, 12, 4, 0), numpy.timedelta64(60000000000, 'ns'), 4.5), ], list(output))
def test_aggregated_different_archive_overlap_edge_missing1(self): tsc1 = carbonara.AggregatedTimeSerie(sampling=60, max_size=10, aggregation_method='mean') tsb1 = carbonara.BoundTimeSerie(block_size=tsc1.sampling) tsc2 = carbonara.AggregatedTimeSerie(sampling=60, max_size=10, aggregation_method='mean') tsb2 = carbonara.BoundTimeSerie(block_size=tsc2.sampling) tsb1.set_values([ (datetime.datetime(2014, 1, 1, 12, 3, 0), 9), (datetime.datetime(2014, 1, 1, 12, 4, 0), 1), (datetime.datetime(2014, 1, 1, 12, 5, 0), 2), (datetime.datetime(2014, 1, 1, 12, 6, 0), 7), (datetime.datetime(2014, 1, 1, 12, 7, 0), 5), (datetime.datetime(2014, 1, 1, 12, 8, 0), 3), ], before_truncate_callback=tsc1.update) tsb2.set_values([ (datetime.datetime(2014, 1, 1, 11, 0, 0), 6), (datetime.datetime(2014, 1, 1, 12, 1, 0), 2), (datetime.datetime(2014, 1, 1, 12, 2, 0), 13), (datetime.datetime(2014, 1, 1, 12, 3, 0), 24), (datetime.datetime(2014, 1, 1, 12, 4, 0), 4), (datetime.datetime(2014, 1, 1, 12, 5, 0), 16), (datetime.datetime(2014, 1, 1, 12, 6, 0), 12), ], before_truncate_callback=tsc2.update) # By default we require 100% of point that overlap # but we allow that the last datapoint is missing # of the precisest granularity output = carbonara.AggregatedTimeSerie.aggregated([tsc1, tsc2], aggregation='sum') self.assertEqual([ (pandas.Timestamp('2014-01-01 12:03:00'), 60.0, 33.0), (pandas.Timestamp('2014-01-01 12:04:00'), 60.0, 5.0), (pandas.Timestamp('2014-01-01 12:05:00'), 60.0, 18.0), (pandas.Timestamp('2014-01-01 12:06:00'), 60.0, 19.0), ], output)
def test_fetch(self): ts = {'sampling': numpy.timedelta64(60, 's'), 'size': 10, 'agg': 'mean'} tsb = carbonara.BoundTimeSerie(block_size=ts['sampling']) tsb.set_values(numpy.array([ (datetime64(2014, 1, 1, 11, 46, 4), 4), (datetime64(2014, 1, 1, 11, 47, 34), 8), (datetime64(2014, 1, 1, 11, 50, 54), 50), (datetime64(2014, 1, 1, 11, 54, 45), 4), (datetime64(2014, 1, 1, 11, 56, 49), 4), (datetime64(2014, 1, 1, 11, 57, 22), 6), (datetime64(2014, 1, 1, 11, 58, 22), 5), (datetime64(2014, 1, 1, 12, 1, 4), 4), (datetime64(2014, 1, 1, 12, 1, 9), 7), (datetime64(2014, 1, 1, 12, 2, 1), 15), (datetime64(2014, 1, 1, 12, 2, 12), 1), (datetime64(2014, 1, 1, 12, 3, 0), 3), (datetime64(2014, 1, 1, 12, 4, 9), 7), (datetime64(2014, 1, 1, 12, 5, 1), 15), (datetime64(2014, 1, 1, 12, 5, 12), 1), (datetime64(2014, 1, 1, 12, 6, 0, 2), 3)], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=ts)) tsb.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 6), 5)], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=ts)) self.assertEqual([ (numpy.datetime64('2014-01-01T11:46:00.000000000'), 4.0), (numpy.datetime64('2014-01-01T11:47:00.000000000'), 8.0), (numpy.datetime64('2014-01-01T11:50:00.000000000'), 50.0), (datetime64(2014, 1, 1, 11, 54), 4.0), (datetime64(2014, 1, 1, 11, 56), 4.0), (datetime64(2014, 1, 1, 11, 57), 6.0), (datetime64(2014, 1, 1, 11, 58), 5.0), (datetime64(2014, 1, 1, 12, 1), 5.5), (datetime64(2014, 1, 1, 12, 2), 8.0), (datetime64(2014, 1, 1, 12, 3), 3.0), (datetime64(2014, 1, 1, 12, 4), 7.0), (datetime64(2014, 1, 1, 12, 5), 8.0), (datetime64(2014, 1, 1, 12, 6), 4.0) ], list(ts['return'].fetch())) self.assertEqual([ (datetime64(2014, 1, 1, 12, 1), 5.5), (datetime64(2014, 1, 1, 12, 2), 8.0), (datetime64(2014, 1, 1, 12, 3), 3.0), (datetime64(2014, 1, 1, 12, 4), 7.0), (datetime64(2014, 1, 1, 12, 5), 8.0), (datetime64(2014, 1, 1, 12, 6), 4.0) ], list(ts['return'].fetch(datetime64(2014, 1, 1, 12, 0, 0))))
def test_aggregated_different_archive_no_overlap(self): tsc1 = carbonara.AggregatedTimeSerie(sampling=60, max_size=50, aggregation_method='mean') tsb1 = carbonara.BoundTimeSerie(block_size=tsc1.sampling) tsc2 = carbonara.AggregatedTimeSerie(sampling=60, max_size=50, aggregation_method='mean') tsb2 = carbonara.BoundTimeSerie(block_size=tsc2.sampling) tsb1.set_values([(datetime.datetime(2014, 1, 1, 11, 46, 4), 4)], before_truncate_callback=tsc1.update) tsb2.set_values([(datetime.datetime(2014, 1, 1, 9, 1, 4), 4)], before_truncate_callback=tsc2.update) dtfrom = datetime.datetime(2014, 1, 1, 11, 0, 0) self.assertRaises(carbonara.UnAggregableTimeseries, carbonara.AggregatedTimeSerie.aggregated, [tsc1, tsc2], from_timestamp=dtfrom, aggregation='mean')
def test_no_truncation(self): ts = carbonara.AggregatedTimeSerie(sampling=60, aggregation_method='mean') tsb = carbonara.BoundTimeSerie() for i in six.moves.range(1, 11): tsb.set_values( [(datetime.datetime(2014, 1, 1, 12, i, i), float(i))], before_truncate_callback=ts.update) tsb.set_values( [(datetime.datetime(2014, 1, 1, 12, i, i + 1), float(i + 1))], before_truncate_callback=ts.update) self.assertEqual(i, len(ts.fetch()))
def test_fetch_agg_pct(self): ts = {'sampling': numpy.timedelta64(1, 's'), 'size': 3600 * 24, 'agg': '90pct'} tsb = carbonara.BoundTimeSerie(block_size=ts['sampling']) tsb.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 0, 0), 3), (datetime64(2014, 1, 1, 12, 0, 0, 123), 4), (datetime64(2014, 1, 1, 12, 0, 2), 4)], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=ts)) result = ts['return'].fetch(datetime64(2014, 1, 1, 12, 0, 0)) reference = [ (datetime64( 2014, 1, 1, 12, 0, 0 ), 3.9), (datetime64( 2014, 1, 1, 12, 0, 2 ), 4) ] self.assertEqual(len(reference), len(list(result))) for ref, res in zip(reference, result): self.assertEqual(ref[0], res[0]) # Rounding \o/ self.assertAlmostEqual(ref[1], res[1]) tsb.set_values(numpy.array([ (datetime64(2014, 1, 1, 12, 0, 2, 113), 110)], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=ts)) result = ts['return'].fetch(datetime64(2014, 1, 1, 12, 0, 0)) reference = [ (datetime64( 2014, 1, 1, 12, 0, 0 ), 3.9), (datetime64( 2014, 1, 1, 12, 0, 2 ), 99.4) ] self.assertEqual(len(reference), len(list(result))) for ref, res in zip(reference, result): self.assertEqual(ref[0], res[0]) # Rounding \o/ self.assertAlmostEqual(ref[1], res[1])
def test_fetch(self): ts = carbonara.AggregatedTimeSerie(sampling=60, max_size=10, aggregation_method='mean') tsb = carbonara.BoundTimeSerie(block_size=ts.sampling) tsb.set_values([ (datetime.datetime(2014, 1, 1, 11, 46, 4), 4), (datetime.datetime(2014, 1, 1, 11, 47, 34), 8), (datetime.datetime(2014, 1, 1, 11, 50, 54), 50), (datetime.datetime(2014, 1, 1, 11, 54, 45), 4), (datetime.datetime(2014, 1, 1, 11, 56, 49), 4), (datetime.datetime(2014, 1, 1, 11, 57, 22), 6), (datetime.datetime(2014, 1, 1, 11, 58, 22), 5), (datetime.datetime(2014, 1, 1, 12, 1, 4), 4), (datetime.datetime(2014, 1, 1, 12, 1, 9), 7), (datetime.datetime(2014, 1, 1, 12, 2, 1), 15), (datetime.datetime(2014, 1, 1, 12, 2, 12), 1), (datetime.datetime(2014, 1, 1, 12, 3, 0), 3), (datetime.datetime(2014, 1, 1, 12, 4, 9), 7), (datetime.datetime(2014, 1, 1, 12, 5, 1), 15), (datetime.datetime(2014, 1, 1, 12, 5, 12), 1), (datetime.datetime(2014, 1, 1, 12, 6, 0, 2), 3), ], before_truncate_callback=ts.update) tsb.set_values([ (datetime.datetime(2014, 1, 1, 12, 6), 5), ], before_truncate_callback=ts.update) self.assertEqual([(datetime.datetime(2014, 1, 1, 11, 54), 60.0, 4.0), (datetime.datetime(2014, 1, 1, 11, 56), 60.0, 4.0), (datetime.datetime(2014, 1, 1, 11, 57), 60.0, 6.0), (datetime.datetime(2014, 1, 1, 11, 58), 60.0, 5.0), (datetime.datetime(2014, 1, 1, 12, 1), 60.0, 5.5), (datetime.datetime(2014, 1, 1, 12, 2), 60.0, 8.0), (datetime.datetime(2014, 1, 1, 12, 3), 60.0, 3.0), (datetime.datetime(2014, 1, 1, 12, 4), 60.0, 7.0), (datetime.datetime(2014, 1, 1, 12, 5), 60.0, 8.0), (datetime.datetime(2014, 1, 1, 12, 6), 60.0, 4.0)], ts.fetch()) self.assertEqual([(datetime.datetime(2014, 1, 1, 12, 1), 60.0, 5.5), (datetime.datetime(2014, 1, 1, 12, 2), 60.0, 8.0), (datetime.datetime(2014, 1, 1, 12, 3), 60.0, 3.0), (datetime.datetime(2014, 1, 1, 12, 4), 60.0, 7.0), (datetime.datetime(2014, 1, 1, 12, 5), 60.0, 8.0), (datetime.datetime(2014, 1, 1, 12, 6), 60.0, 4.0)], ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0)))
def test_back_window_ignore(self): """Back window testing. Test the back window on an archive is not longer than the window we aggregate on. """ ts = carbonara.AggregatedTimeSerie(sampling=1, max_size=60, aggregation_method='mean') tsb = carbonara.BoundTimeSerie(block_size=ts.sampling) tsb.set_values([ (datetime.datetime(2014, 1, 1, 12, 0, 1, 2300), 1), (datetime.datetime(2014, 1, 1, 12, 0, 1, 4600), 2), (datetime.datetime(2014, 1, 1, 12, 0, 2, 4500), 3), (datetime.datetime(2014, 1, 1, 12, 0, 2, 7800), 4), (datetime.datetime(2014, 1, 1, 12, 0, 3, 8), 2.5), ], before_truncate_callback=ts.update) self.assertEqual([ (pandas.Timestamp('2014-01-01 12:00:01'), 1.0, 1.5), (pandas.Timestamp('2014-01-01 12:00:02'), 1.0, 3.5), (pandas.Timestamp('2014-01-01 12:00:03'), 1.0, 2.5), ], ts.fetch()) tsb.set_values([ (datetime.datetime(2014, 1, 1, 12, 0, 2, 99), 9), ], ignore_too_old_timestamps=True, before_truncate_callback=ts.update) self.assertEqual([ (pandas.Timestamp('2014-01-01 12:00:01'), 1.0, 1.5), (pandas.Timestamp('2014-01-01 12:00:02'), 1.0, 3.5), (pandas.Timestamp('2014-01-01 12:00:03'), 1.0, 2.5), ], ts.fetch()) tsb.set_values([ (datetime.datetime(2014, 1, 1, 12, 0, 2, 99), 9), (datetime.datetime(2014, 1, 1, 12, 0, 3, 9), 4.5), ], ignore_too_old_timestamps=True, before_truncate_callback=ts.update) self.assertEqual([ (pandas.Timestamp('2014-01-01 12:00:01'), 1.0, 1.5), (pandas.Timestamp('2014-01-01 12:00:02'), 1.0, 3.5), (pandas.Timestamp('2014-01-01 12:00:03'), 1.0, 3.5), ], ts.fetch())
def test_no_truncation(self): ts = {'sampling': numpy.timedelta64(60, 's'), 'agg': 'mean'} tsb = carbonara.BoundTimeSerie() for i in six.moves.range(1, 11): tsb.set_values(numpy.array([ (datetime64(2014, 1, 1, 12, i, i), float(i))], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=ts)) tsb.set_values(numpy.array([ (datetime64(2014, 1, 1, 12, i, i + 1), float(i + 1))], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=ts)) self.assertEqual(i, len(list(ts['return'].fetch())))
def test_serialize(self): ts = carbonara.AggregatedTimeSerie(sampling=0.5, aggregation_method='mean') tsb = carbonara.BoundTimeSerie(block_size=ts.sampling) tsb.set_values([ (datetime.datetime(2014, 1, 1, 12, 0, 0, 1234), 3), (datetime.datetime(2014, 1, 1, 12, 0, 0, 321), 6), (datetime.datetime(2014, 1, 1, 12, 1, 4, 234), 5), (datetime.datetime(2014, 1, 1, 12, 1, 9, 32), 7), (datetime.datetime(2014, 1, 1, 12, 2, 12, 532), 1), ], before_truncate_callback=ts.update) self.assertEqual( ts, carbonara.AggregatedTimeSerie.unserialize(ts.serialize()))
def test_serialize(self): ts = {'sampling': numpy.timedelta64(500, 'ms'), 'agg': 'mean'} tsb = carbonara.BoundTimeSerie(block_size=ts['sampling']) tsb.set_values(numpy.array([ (datetime64(2014, 1, 1, 12, 0, 0, 1234), 3), (datetime64(2014, 1, 1, 12, 0, 0, 321), 6), (datetime64(2014, 1, 1, 12, 1, 4, 234), 5), (datetime64(2014, 1, 1, 12, 1, 9, 32), 7), (datetime64(2014, 1, 1, 12, 2, 12, 532), 1)], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=ts)) key = ts['return'].get_split_key() o, s = ts['return'].serialize(key) self.assertEqual(ts['return'], carbonara.AggregatedTimeSerie.unserialize( s, key, ts['return'].aggregation))
def test_back_window(self): """Back window testing. Test the back window on an archive is not longer than the window we aggregate on. """ ts = carbonara.AggregatedTimeSerie(sampling=1, max_size=60, aggregation_method='mean') tsb = carbonara.BoundTimeSerie(block_size=ts.sampling) tsb.set_values([ (datetime.datetime(2014, 1, 1, 12, 0, 1, 2300), 1), (datetime.datetime(2014, 1, 1, 12, 0, 1, 4600), 2), (datetime.datetime(2014, 1, 1, 12, 0, 2, 4500), 3), (datetime.datetime(2014, 1, 1, 12, 0, 2, 7800), 4), (datetime.datetime(2014, 1, 1, 12, 0, 3, 8), 2.5), ], before_truncate_callback=ts.update) self.assertEqual([ (pandas.Timestamp('2014-01-01 12:00:01'), 1.0, 1.5), (pandas.Timestamp('2014-01-01 12:00:02'), 1.0, 3.5), (pandas.Timestamp('2014-01-01 12:00:03'), 1.0, 2.5), ], ts.fetch()) try: tsb.set_values([ (datetime.datetime(2014, 1, 1, 12, 0, 2, 99), 9), ]) except carbonara.NoDeloreanAvailable as e: self.assertEqual( six.text_type(e), u"2014-01-01 12:00:02.000099 is before 2014-01-01 12:00:03") self.assertEqual(datetime.datetime(2014, 1, 1, 12, 0, 2, 99), e.bad_timestamp) self.assertEqual(datetime.datetime(2014, 1, 1, 12, 0, 3), e.first_timestamp) else: self.fail("No exception raised")
def test_fetch_agg_pct(self): ts = carbonara.AggregatedTimeSerie(sampling=1, max_size=3600 * 24, aggregation_method='90pct') tsb = carbonara.BoundTimeSerie(block_size=ts.sampling) tsb.set_values([(datetime.datetime(2014, 1, 1, 12, 0, 0), 3), (datetime.datetime(2014, 1, 1, 12, 0, 0, 123), 4), (datetime.datetime(2014, 1, 1, 12, 0, 2), 4)], before_truncate_callback=ts.update) result = ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0)) reference = [(pandas.Timestamp('2014-01-01 12:00:00'), 1.0, 3.9), (pandas.Timestamp('2014-01-01 12:00:02'), 1.0, 4)] self.assertEqual(len(reference), len(result)) for ref, res in zip(reference, result): self.assertEqual(ref[0], res[0]) self.assertEqual(ref[1], res[1]) # Rounding \o/ self.assertAlmostEqual(ref[2], res[2]) tsb.set_values([(datetime.datetime(2014, 1, 1, 12, 0, 2, 113), 110)], before_truncate_callback=ts.update) result = ts.fetch(datetime.datetime(2014, 1, 1, 12, 0, 0)) reference = [(pandas.Timestamp('2014-01-01 12:00:00'), 1.0, 3.9), (pandas.Timestamp('2014-01-01 12:00:02'), 1.0, 99.4)] self.assertEqual(len(reference), len(result)) for ref, res in zip(reference, result): self.assertEqual(ref[0], res[0]) self.assertEqual(ref[1], res[1]) # Rounding \o/ self.assertAlmostEqual(ref[2], res[2])
def add_measures_to_metrics(self, metrics_and_measures): """Update a metric with a new measures, computing new aggregations. :param metrics_and_measures: A dict there keys are `storage.Metric` objects and values are timeseries array of the new measures. """ with self.statistics.time("raw measures fetch"): raw_measures = self._get_or_create_unaggregated_timeseries( metrics_and_measures.keys()) self.statistics["raw measures fetch"] += len(metrics_and_measures) self.statistics["processed measures"] += sum( map(len, metrics_and_measures.values())) new_boundts = [] splits_to_delete = {} splits_to_update = {} for metric, measures in six.iteritems(metrics_and_measures): measures = numpy.sort(measures, order='timestamps') agg_methods = list(metric.archive_policy.aggregation_methods) block_size = metric.archive_policy.max_block_size back_window = metric.archive_policy.back_window # NOTE(sileht): We keep one more blocks to calculate rate of change # correctly if any(filter(lambda x: x.startswith("rate:"), agg_methods)): back_window += 1 if raw_measures[metric] is None: ts = None else: try: ts = carbonara.BoundTimeSerie.unserialize( raw_measures[metric], block_size, back_window) except carbonara.InvalidData: LOG.error( "Data corruption detected for %s " "unaggregated timeserie, creating a new one", metric.id) ts = None if ts is None: # This is the first time we treat measures for this # metric, or data are corrupted, create a new one ts = carbonara.BoundTimeSerie(block_size=block_size, back_window=back_window) current_first_block_timestamp = None else: current_first_block_timestamp = ts.first_block_timestamp() # NOTE(jd) This is Python where you need such # hack to pass a variable around a closure, # sorry. computed_points = {"number": 0} def _map_compute_splits_operations(bound_timeserie): # NOTE (gordc): bound_timeserie is entire set of # unaggregated measures matching largest # granularity. the following takes only the points # affected by new measures for specific granularity tstamp = max(bound_timeserie.first, measures['timestamps'][0]) new_first_block_timestamp = ( bound_timeserie.first_block_timestamp()) computed_points['number'] = len(bound_timeserie) aggregations = metric.archive_policy.aggregations grouped_timeseries = { granularity: bound_timeserie.group_serie( granularity, carbonara.round_timestamp(tstamp, granularity)) for granularity, aggregations # No need to sort the aggregation, they are already in itertools.groupby(aggregations, ATTRGETTER_GRANULARITY) } aggregations_and_timeseries = { aggregation: carbonara.AggregatedTimeSerie.from_grouped_serie( grouped_timeseries[aggregation.granularity], aggregation) for aggregation in aggregations } deleted_keys, keys_and_split_to_store = ( self._compute_split_operations( metric, aggregations_and_timeseries, current_first_block_timestamp, new_first_block_timestamp)) return (new_first_block_timestamp, deleted_keys, keys_and_split_to_store) with self.statistics.time("aggregated measures compute"): (new_first_block_timestamp, deleted_keys, keys_and_splits_to_store) = ts.set_values( measures, before_truncate_callback=_map_compute_splits_operations, ) splits_to_delete[metric] = deleted_keys splits_to_update[metric] = (keys_and_splits_to_store, new_first_block_timestamp) new_boundts.append((metric, ts.serialize())) with self.statistics.time("splits delete"): self._delete_metric_splits(splits_to_delete) self.statistics["splits delete"] += len(splits_to_delete) with self.statistics.time("splits update"): self._update_metric_splits(splits_to_update) self.statistics["splits update"] += len(splits_to_update) with self.statistics.time("raw measures store"): self._store_unaggregated_timeseries(new_boundts) self.statistics["raw measures store"] += len(new_boundts)
def process_measures(self, indexer, block_size, sync=False): metrics_to_process = self._list_metric_with_measures_to_process( block_size, full=sync) metrics = indexer.list_metrics(ids=metrics_to_process) # This build the list of deleted metrics, i.e. the metrics we have # measures to process for but that are not in the indexer anymore. deleted_metrics_id = (set(map(uuid.UUID, metrics_to_process)) - set(m.id for m in metrics)) for metric_id in deleted_metrics_id: # NOTE(jd): We need to lock the metric otherwise we might delete # measures that another worker might be processing. Deleting # measurement files under its feet is not nice! with self._lock(metric_id)(blocking=sync): self._delete_unprocessed_measures_for_metric_id(metric_id) for metric in metrics: lock = self._lock(metric.id) agg_methods = list(metric.archive_policy.aggregation_methods) # Do not block if we cannot acquire the lock, that means some other # worker is doing the job. We'll just ignore this metric and may # get back later to it if needed. if lock.acquire(blocking=sync): try: LOG.debug("Processing measures for %s" % metric) with self._process_measure_for_metric(metric) as measures: # NOTE(mnaser): The metric could have been handled by # another worker, ignore if no measures. if len(measures) == 0: LOG.debug("Skipping %s (already processed)" % metric) continue try: with timeutils.StopWatch() as sw: raw_measures = ( self._get_unaggregated_timeserie(metric)) LOG.debug("Retrieve unaggregated measures " "for %s in %.2fs" % (metric.id, sw.elapsed())) except storage.MetricDoesNotExist: try: self._create_metric(metric) except storage.MetricAlreadyExists: # Created in the mean time, do not worry pass ts = None else: try: ts = carbonara.BoundTimeSerie.unserialize( raw_measures) except ValueError: ts = None LOG.error("Data corruption detected for %s " "unaggregated timeserie, " "recreating an empty one." % metric.id) if ts is None: # This is the first time we treat measures for this # metric, or data are corrupted, create a new one mbs = metric.archive_policy.max_block_size ts = carbonara.BoundTimeSerie( block_size=mbs, back_window=metric.archive_policy.back_window) def _map_add_measures(bound_timeserie): self._map_in_thread( self._add_measures, ((aggregation, d, metric, bound_timeserie) for aggregation in agg_methods for d in metric.archive_policy.definition)) with timeutils.StopWatch() as sw: ts.set_values( measures, before_truncate_callback=_map_add_measures, ignore_too_old_timestamps=True) LOG.debug( "Computed new metric %s with %d new measures " "in %.2f seconds" % (metric.id, len(measures), sw.elapsed())) self._store_unaggregated_timeserie( metric, ts.serialize()) except Exception: if sync: raise LOG.error("Error processing new measures", exc_info=True) finally: lock.release()
def _compute_and_store_timeseries(self, metric, measures): # NOTE(mnaser): The metric could have been handled by # another worker, ignore if no measures. if len(measures) == 0: LOG.debug("Skipping %s (already processed)", metric) return measures.sort(order='timestamps') agg_methods = list(metric.archive_policy.aggregation_methods) block_size = metric.archive_policy.max_block_size back_window = metric.archive_policy.back_window definition = metric.archive_policy.definition # NOTE(sileht): We keep one more blocks to calculate rate of change # correctly if any(filter(lambda x: x.startswith("rate:"), agg_methods)): back_window += 1 try: ts = self._get_unaggregated_timeserie_and_unserialize( metric, block_size=block_size, back_window=back_window) except MetricDoesNotExist: try: self._create_metric(metric) except MetricAlreadyExists: # Created in the mean time, do not worry pass ts = None except CorruptionError as e: LOG.error(e) ts = None if ts is None: # This is the first time we treat measures for this # metric, or data are corrupted, create a new one ts = carbonara.BoundTimeSerie(block_size=block_size, back_window=back_window) current_first_block_timestamp = None else: current_first_block_timestamp = ts.first_block_timestamp() # NOTE(jd) This is Python where you need such # hack to pass a variable around a closure, # sorry. computed_points = {"number": 0} def _map_add_measures(bound_timeserie): # NOTE (gordc): bound_timeserie is entire set of # unaggregated measures matching largest # granularity. the following takes only the points # affected by new measures for specific granularity tstamp = max(bound_timeserie.first, measures['timestamps'][0]) new_first_block_timestamp = bound_timeserie.first_block_timestamp() computed_points['number'] = len(bound_timeserie) for d in definition: ts = bound_timeserie.group_serie( d.granularity, carbonara.round_timestamp( tstamp, d.granularity)) self._map_in_thread( self._add_measures, ((aggregation, d, metric, ts, current_first_block_timestamp, new_first_block_timestamp) for aggregation in agg_methods)) with utils.StopWatch() as sw: ts.set_values(measures, before_truncate_callback=_map_add_measures) number_of_operations = (len(agg_methods) * len(definition)) perf = "" elapsed = sw.elapsed() if elapsed > 0: perf = " (%d points/s, %d measures/s)" % ( ((number_of_operations * computed_points['number']) / elapsed), ((number_of_operations * len(measures)) / elapsed) ) LOG.debug("Computed new metric %s with %d new measures " "in %.2f seconds%s", metric.id, len(measures), elapsed, perf) self._store_unaggregated_timeserie(metric, ts.serialize())
def compute_and_store_timeseries(self, metric, measures): # NOTE(mnaser): The metric could have been handled by # another worker, ignore if no measures. if len(measures) == 0: LOG.debug("Skipping %s (already processed)", metric) return measures = numpy.sort(measures, order='timestamps') agg_methods = list(metric.archive_policy.aggregation_methods) block_size = metric.archive_policy.max_block_size back_window = metric.archive_policy.back_window definition = metric.archive_policy.definition # NOTE(sileht): We keep one more blocks to calculate rate of change # correctly if any(filter(lambda x: x.startswith("rate:"), agg_methods)): back_window += 1 with utils.StopWatch() as sw: raw_measures = ( self._get_or_create_unaggregated_timeseries( [metric])[metric] ) LOG.debug("Retrieve unaggregated measures for %s in %.2fs", metric.id, sw.elapsed()) if raw_measures is None: ts = None else: try: ts = carbonara.BoundTimeSerie.unserialize( raw_measures, block_size, back_window) except carbonara.InvalidData: LOG.error("Data corruption detected for %s " "unaggregated timeserie, creating a new one", metric.id) ts = None if ts is None: # This is the first time we treat measures for this # metric, or data are corrupted, create a new one ts = carbonara.BoundTimeSerie(block_size=block_size, back_window=back_window) current_first_block_timestamp = None else: current_first_block_timestamp = ts.first_block_timestamp() # NOTE(jd) This is Python where you need such # hack to pass a variable around a closure, # sorry. computed_points = {"number": 0} def _map_add_measures(bound_timeserie): # NOTE (gordc): bound_timeserie is entire set of # unaggregated measures matching largest # granularity. the following takes only the points # affected by new measures for specific granularity tstamp = max(bound_timeserie.first, measures['timestamps'][0]) new_first_block_timestamp = bound_timeserie.first_block_timestamp() computed_points['number'] = len(bound_timeserie) for granularity, aggregations in itertools.groupby( # No need to sort the aggregation, they are already metric.archive_policy.aggregations, ATTRGETTER_GRANULARITY): ts = bound_timeserie.group_serie( granularity, carbonara.round_timestamp( tstamp, granularity)) self._add_measures(metric, aggregations, ts, current_first_block_timestamp, new_first_block_timestamp) with utils.StopWatch() as sw: ts.set_values(measures, before_truncate_callback=_map_add_measures) number_of_operations = (len(agg_methods) * len(definition)) perf = "" elapsed = sw.elapsed() if elapsed > 0: perf = " (%d points/s, %d measures/s)" % ( ((number_of_operations * computed_points['number']) / elapsed), ((number_of_operations * len(measures)) / elapsed) ) LOG.debug("Computed new metric %s with %d new measures " "in %.2f seconds%s", metric.id, len(measures), elapsed, perf) self._store_unaggregated_timeseries([(metric, ts.serialize())])
def test_aggregated_different_archive_overlap(self): tsc1 = { 'sampling': numpy.timedelta64(60, 's'), 'size': 10, 'agg': 'mean' } tsb1 = carbonara.BoundTimeSerie(block_size=tsc1['sampling']) tsc2 = { 'sampling': numpy.timedelta64(60, 's'), 'size': 10, 'agg': 'mean' } tsb2 = carbonara.BoundTimeSerie(block_size=tsc2['sampling']) # NOTE(sileht): minute 8 is missing in both and # minute 7 in tsc2 too, but it looks like we have # enough point to do the aggregation tsb1.set_values(numpy.array([(datetime64(2014, 1, 1, 11, 0, 0), 4), (datetime64(2014, 1, 1, 12, 1, 0), 3), (datetime64(2014, 1, 1, 12, 2, 0), 2), (datetime64(2014, 1, 1, 12, 3, 0), 4), (datetime64(2014, 1, 1, 12, 4, 0), 2), (datetime64(2014, 1, 1, 12, 5, 0), 3), (datetime64(2014, 1, 1, 12, 6, 0), 4), (datetime64(2014, 1, 1, 12, 7, 0), 10), (datetime64(2014, 1, 1, 12, 9, 0), 2)], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=tsc1)) tsb2.set_values(numpy.array([(datetime64(2014, 1, 1, 12, 1, 0), 3), (datetime64(2014, 1, 1, 12, 2, 0), 4), (datetime64(2014, 1, 1, 12, 3, 0), 4), (datetime64(2014, 1, 1, 12, 4, 0), 6), (datetime64(2014, 1, 1, 12, 5, 0), 3), (datetime64(2014, 1, 1, 12, 6, 0), 6), (datetime64(2014, 1, 1, 12, 9, 0), 2), (datetime64(2014, 1, 1, 12, 11, 0), 2), (datetime64(2014, 1, 1, 12, 12, 0), 2)], dtype=carbonara.TIMESERIES_ARRAY_DTYPE), before_truncate_callback=functools.partial( self._resample_and_merge, agg_dict=tsc2)) dtfrom = datetime64(2014, 1, 1, 12, 0, 0) dtto = datetime64(2014, 1, 1, 12, 10, 0) # By default we require 100% of point that overlap # so that fail self.assertRaises(cross_metric.UnAggregableTimeseries, cross_metric.aggregated, [tsc1['return'], tsc2['return']], from_timestamp=dtfrom, to_timestamp=dtto, aggregation='mean') # Retry with 80% and it works output = cross_metric.aggregated([tsc1['return'], tsc2['return']], from_timestamp=dtfrom, to_timestamp=dtto, aggregation='mean', needed_percent_of_overlap=80.0) self.assertEqual([ (datetime64(2014, 1, 1, 12, 1, 0), numpy.timedelta64(60, 's'), 3.0), (datetime64(2014, 1, 1, 12, 2, 0), numpy.timedelta64(60, 's'), 3.0), (datetime64(2014, 1, 1, 12, 3, 0), numpy.timedelta64(60, 's'), 4.0), (datetime64(2014, 1, 1, 12, 4, 0), numpy.timedelta64(60, 's'), 4.0), (datetime64(2014, 1, 1, 12, 5, 0), numpy.timedelta64(60, 's'), 3.0), (datetime64(2014, 1, 1, 12, 6, 0), numpy.timedelta64(60, 's'), 5.0), (datetime64(2014, 1, 1, 12, 7, 0), numpy.timedelta64(60, 's'), 10.0), (datetime64(2014, 1, 1, 12, 9, 0), numpy.timedelta64(60, 's'), 2.0), ], list(output))