def _map_compute_splits_operations(bound_timeserie): # NOTE (gordc): bound_timeserie is entire set of # unaggregated measures matching largest # granularity. the following takes only the points # affected by new measures for specific granularity tstamp = max(bound_timeserie.first, measures['timestamps'][0]) new_first_block_timestamp = ( bound_timeserie.first_block_timestamp()) aggregations = metric.archive_policy.aggregations grouped_timeseries = { granularity: bound_timeserie.group_serie( granularity, carbonara.round_timestamp(tstamp, granularity)) for granularity, aggregations # No need to sort the aggregation, they are already in itertools.groupby(aggregations, ATTRGETTER_GRANULARITY) } aggregations_and_timeseries = { aggregation: carbonara.AggregatedTimeSerie.from_grouped_serie( grouped_timeseries[aggregation.granularity], aggregation) for aggregation in aggregations } deleted_keys, keys_and_split_to_store = ( self._compute_split_operations( metric, aggregations_and_timeseries, current_first_block_timestamp, new_first_block_timestamp)) return (new_first_block_timestamp, deleted_keys, keys_and_split_to_store)
def _map_add_measures(bound_timeserie): # NOTE (gordc): bound_timeserie is entire set of # unaggregated measures matching largest # granularity. the following takes only the points # affected by new measures for specific granularity tstamp = max(bound_timeserie.first, measures['timestamps'][0]) new_first_block_timestamp = bound_timeserie.first_block_timestamp() computed_points['number'] = len(bound_timeserie) for d in definition: ts = bound_timeserie.group_serie( d.granularity, carbonara.round_timestamp(tstamp, d.granularity)) self._map_in_thread( self._add_measures, ((aggregation, d, metric, ts, current_first_block_timestamp, new_first_block_timestamp) for aggregation in agg_methods))
def test_add_measures_update_subset(self): m, m_sql = self._create_metric('medium') measures = [ storage.Measure(utils.dt_to_unix_ns(2014, 1, 6, i, j, 0), 100) for i in six.moves.range(2) for j in six.moves.range(0, 60, 2)] self.incoming.add_measures(m, measures) self.trigger_processing([str(m.id)]) # add measure to end, in same aggregate time as last point. new_point = utils.dt_to_unix_ns(2014, 1, 6, 1, 58, 1) self.incoming.add_measures( m, [storage.Measure(new_point, 100)]) with mock.patch.object(self.incoming, 'add_measures') as c: self.trigger_processing([str(m.id)]) for __, args, __ in c.mock_calls: self.assertEqual( list(args[3])[0][0], carbonara.round_timestamp( new_point, args[1].granularity * 10e8))
def _map_add_measures(bound_timeserie): # NOTE (gordc): bound_timeserie is entire set of # unaggregated measures matching largest # granularity. the following takes only the points # affected by new measures for specific granularity tstamp = max(bound_timeserie.first, measures['timestamps'][0]) new_first_block_timestamp = bound_timeserie.first_block_timestamp() computed_points['number'] = len(bound_timeserie) for granularity, aggregations in itertools.groupby( # No need to sort the aggregation, they are already metric.archive_policy.aggregations, ATTRGETTER_GRANULARITY): ts = bound_timeserie.group_serie( granularity, carbonara.round_timestamp( tstamp, granularity)) self._add_measures(metric, aggregations, ts, current_first_block_timestamp, new_first_block_timestamp)
def aggregated(refs_and_timeseries, operations, from_timestamp=None, to_timestamp=None, needed_percent_of_overlap=100.0, fill=None): series = collections.defaultdict(list) references = collections.defaultdict(list) lookup_keys = collections.defaultdict(list) for (ref, timeserie) in refs_and_timeseries: from_ = (None if from_timestamp is None else carbonara.round_timestamp( from_timestamp, timeserie.aggregation.granularity)) references[timeserie.aggregation.granularity].append(ref) lookup_keys[timeserie.aggregation.granularity].append(ref.lookup_key) series[timeserie.aggregation.granularity].append( timeserie[from_:to_timestamp]) result = [] is_aggregated = False result = {} for sampling in sorted(series, reverse=True): # np.unique sorts results for us times, indices = numpy.unique(numpy.concatenate( [i['timestamps'] for i in series[sampling]]), return_inverse=True) # create nd-array (unique series x unique times) and fill filler = (numpy.NaN if fill in [None, 'null', 'dropna'] else fill) val_grid = numpy.full((len(series[sampling]), len(times)), filler) start = 0 for i, split in enumerate(series[sampling]): size = len(split) val_grid[i][indices[start:start + size]] = split['values'] start += size values = val_grid.T if fill is None: overlap = numpy.flatnonzero( ~numpy.any(numpy.isnan(values), axis=1)) if overlap.size == 0 and needed_percent_of_overlap > 0: raise exceptions.UnAggregableTimeseries( lookup_keys[sampling], 'No overlap') if times.size: # if no boundary set, use first/last timestamp which overlap if to_timestamp is None and overlap.size: times = times[:overlap[-1] + 1] values = values[:overlap[-1] + 1] if from_timestamp is None and overlap.size: times = times[overlap[0]:] values = values[overlap[0]:] percent_of_overlap = overlap.size * 100.0 / times.size if percent_of_overlap < needed_percent_of_overlap: raise exceptions.UnAggregableTimeseries( lookup_keys[sampling], 'Less than %f%% of datapoints overlap in this ' 'timespan (%.2f%%)' % (needed_percent_of_overlap, percent_of_overlap)) granularity, times, values, is_aggregated = (agg_operations.evaluate( operations, sampling, times, values, False, lookup_keys[sampling])) values = values.T result[sampling] = (granularity, times, values, references[sampling]) if is_aggregated: output = {"aggregated": []} for sampling in sorted(result, reverse=True): granularity, times, values, references = result[sampling] if fill == "dropna": pos = ~numpy.logical_or(numpy.isnan(values[0]), numpy.isinf(values[0])) v = values[0][pos] t = times[pos] else: v = values[0] t = times g = [granularity] * len(t) output["aggregated"].extend(six.moves.zip(t, g, v)) return output else: r_output = collections.defaultdict(lambda: collections.defaultdict( lambda: collections.defaultdict(list))) m_output = collections.defaultdict( lambda: collections.defaultdict(list)) for sampling in sorted(result, reverse=True): granularity, times, values, references = result[sampling] for i, ref in enumerate(references): if fill == "dropna": pos = ~numpy.logical_or(numpy.isnan(values[i]), numpy.isinf(values[i])) v = values[i][pos] t = times[pos] else: v = values[i] t = times g = [granularity] * len(t) measures = six.moves.zip(t, g, v) if ref.resource is None: m_output[ref.name][ref.aggregation].extend(measures) else: r_output[str(ref.resource.id)][ref.metric.name][ ref.aggregation].extend(measures) return r_output if r_output else m_output
def aggregated(timeseries, aggregation, from_timestamp=None, to_timestamp=None, needed_percent_of_overlap=100.0, fill=None): series = collections.defaultdict(list) for timeserie in timeseries: from_ = (None if from_timestamp is None else carbonara.round_timestamp( from_timestamp, timeserie.sampling)) series[timeserie.sampling].append(timeserie[from_:to_timestamp]) result = {'timestamps': [], 'granularity': [], 'values': []} for key in sorted(series, reverse=True): combine = numpy.concatenate(series[key]) # np.unique sorts results for us times, indices = numpy.unique(combine['timestamps'], return_inverse=True) # create nd-array (unique series x unique times) and fill filler = fill if fill is not None and fill != 'null' else numpy.NaN val_grid = numpy.full((len(series[key]), len(times)), filler) start = 0 for i, split in enumerate(series[key]): size = len(split) val_grid[i][indices[start:start + size]] = split['values'] start += size values = val_grid.T if fill is None: overlap = numpy.flatnonzero( ~numpy.any(numpy.isnan(values), axis=1)) if overlap.size == 0 and needed_percent_of_overlap > 0: raise UnAggregableTimeseries('No overlap') # if no boundary set, use first/last timestamp which overlap if to_timestamp is None and overlap.size: times = times[:overlap[-1] + 1] values = values[:overlap[-1] + 1] if from_timestamp is None and overlap.size: times = times[overlap[0]:] values = values[overlap[0]:] percent_of_overlap = overlap.size * 100.0 / times.size if percent_of_overlap < needed_percent_of_overlap: raise UnAggregableTimeseries( 'Less than %f%% of datapoints overlap in this ' 'timespan (%.2f%%)' % (needed_percent_of_overlap, percent_of_overlap)) if aggregation in AGG_MAP: values = AGG_MAP[aggregation](values, axis=1) elif aggregation == 'count': values = numpy.count_nonzero(~numpy.isnan(values), axis=1) else: raise carbonara.UnknownAggregationMethod(aggregation) result['timestamps'].extend(times) result['granularity'].extend([key] * len(times)) result['values'].extend(values) return six.moves.zip(result['timestamps'], result['granularity'], result['values'])
def aggregated(refs_and_timeseries, operations, from_timestamp=None, to_timestamp=None, needed_percent_of_overlap=100.0, fill=None): series = collections.defaultdict(list) references = collections.defaultdict(list) for (reference, timeserie) in refs_and_timeseries: from_ = (None if from_timestamp is None else carbonara.round_timestamp( from_timestamp, timeserie.sampling)) references[timeserie.sampling].append(reference) series[timeserie.sampling].append(timeserie[from_:to_timestamp]) result = collections.defaultdict(lambda: { 'timestamps': [], 'granularity': [], 'values': [] }) for key in sorted(series, reverse=True): combine = numpy.concatenate(series[key]) # np.unique sorts results for us times, indices = numpy.unique(combine['timestamps'], return_inverse=True) # create nd-array (unique series x unique times) and fill filler = (numpy.NaN if fill in [None, 'null', 'dropna'] else fill) val_grid = numpy.full((len(series[key]), len(times)), filler) start = 0 for i, split in enumerate(series[key]): size = len(split) val_grid[i][indices[start:start + size]] = split['values'] start += size values = val_grid.T if fill is None: overlap = numpy.flatnonzero( ~numpy.any(numpy.isnan(values), axis=1)) if overlap.size == 0 and needed_percent_of_overlap > 0: raise exceptions.UnAggregableTimeseries( references[key], 'No overlap') if times.size: # if no boundary set, use first/last timestamp which overlap if to_timestamp is None and overlap.size: times = times[:overlap[-1] + 1] values = values[:overlap[-1] + 1] if from_timestamp is None and overlap.size: times = times[overlap[0]:] values = values[overlap[0]:] percent_of_overlap = overlap.size * 100.0 / times.size if percent_of_overlap < needed_percent_of_overlap: raise exceptions.UnAggregableTimeseries( references[key], 'Less than %f%% of datapoints overlap in this ' 'timespan (%.2f%%)' % (needed_percent_of_overlap, percent_of_overlap)) granularity, times, values, is_aggregated = (agg_operations.evaluate( operations, key, times, values, False, references[key])) values = values.T if is_aggregated: idents = ["aggregated"] else: idents = ["%s_%s" % tuple(ref) for ref in references[key]] for i, ident in enumerate(idents): if fill == "dropna": pos = ~numpy.isnan(values[i]) v = values[i][pos] t = times[pos] else: v = values[i] t = times result[ident]["timestamps"].extend(t) result[ident]['granularity'].extend([granularity] * len(t)) result[ident]['values'].extend(v) return dict(((ident, list( six.moves.zip(result[ident]['timestamps'], result[ident]['granularity'], result[ident]['values']))) for ident in result))
def aggregated(refs_and_timeseries, operations, from_timestamp=None, to_timestamp=None, needed_percent_of_overlap=100.0, fill=None): series = collections.defaultdict(list) references = collections.defaultdict(list) lookup_keys = collections.defaultdict(list) for (ref, timeserie) in refs_and_timeseries: from_ = (None if from_timestamp is None else carbonara.round_timestamp( from_timestamp, timeserie.aggregation.granularity)) references[timeserie.aggregation.granularity].append(ref) lookup_keys[timeserie.aggregation.granularity].append(ref.lookup_key) series[timeserie.aggregation.granularity].append( timeserie[from_:to_timestamp]) result = [] is_aggregated = False result = {} for sampling in sorted(series, reverse=True): combine = numpy.concatenate(series[sampling]) # np.unique sorts results for us times, indices = numpy.unique(combine['timestamps'], return_inverse=True) # create nd-array (unique series x unique times) and fill filler = (numpy.NaN if fill in [None, 'null', 'dropna'] else fill) val_grid = numpy.full((len(series[sampling]), len(times)), filler) start = 0 for i, split in enumerate(series[sampling]): size = len(split) val_grid[i][indices[start:start + size]] = split['values'] start += size values = val_grid.T if fill is None: overlap = numpy.flatnonzero(~numpy.any(numpy.isnan(values), axis=1)) if overlap.size == 0 and needed_percent_of_overlap > 0: raise exceptions.UnAggregableTimeseries(lookup_keys[sampling], 'No overlap') if times.size: # if no boundary set, use first/last timestamp which overlap if to_timestamp is None and overlap.size: times = times[:overlap[-1] + 1] values = values[:overlap[-1] + 1] if from_timestamp is None and overlap.size: times = times[overlap[0]:] values = values[overlap[0]:] percent_of_overlap = overlap.size * 100.0 / times.size if percent_of_overlap < needed_percent_of_overlap: raise exceptions.UnAggregableTimeseries( lookup_keys[sampling], 'Less than %f%% of datapoints overlap in this ' 'timespan (%.2f%%)' % (needed_percent_of_overlap, percent_of_overlap)) granularity, times, values, is_aggregated = ( agg_operations.evaluate(operations, sampling, times, values, False, lookup_keys[sampling])) values = values.T result[sampling] = (granularity, times, values, references[sampling]) if is_aggregated: output = {"aggregated": []} for sampling in sorted(result, reverse=True): granularity, times, values, references = result[sampling] if fill == "dropna": pos = ~numpy.isnan(values[0]) v = values[0][pos] t = times[pos] else: v = values[0] t = times g = [granularity] * len(t) output["aggregated"].extend(six.moves.zip(t, g, v)) return output else: r_output = collections.defaultdict( lambda: collections.defaultdict( lambda: collections.defaultdict(list))) m_output = collections.defaultdict( lambda: collections.defaultdict(list)) for sampling in sorted(result, reverse=True): granularity, times, values, references = result[sampling] for i, ref in enumerate(references): if fill == "dropna": pos = ~numpy.isnan(values[i]) v = values[i][pos] t = times[pos] else: v = values[i] t = times g = [granularity] * len(t) measures = six.moves.zip(t, g, v) if ref.resource is None: m_output[ref.name][ref.aggregation].extend(measures) else: r_output[str(ref.resource.id)][ ref.metric.name][ref.aggregation].extend(measures) return r_output if r_output else m_output