def handle_binary_operator(nodes, granularity, timestamps, initial_values, is_aggregated, references): op = nodes[0] g1, t1, v1, is_a1 = evaluate(nodes[1], granularity, timestamps, initial_values, is_aggregated, references) g2, t2, v2, is_a2 = evaluate(nodes[2], granularity, timestamps, initial_values, is_aggregated, references) is_aggregated = is_a1 or is_a2 # We keep the computed timeseries if isinstance(v1, numpy.ndarray) and isinstance(v2, numpy.ndarray): if not numpy.array_equal(t1, t2) or g1 != g2: raise exceptions.UnAggregableTimeseries( references, "Can't compute timeseries with different " "granularity %s <> %s" % (nodes[1], nodes[2])) timestamps = t1 granularity = g1 is_aggregated = True elif isinstance(v2, numpy.ndarray): timestamps = t2 granularity = g2 else: timestamps = t1 granularity = g1 values = binary_operators[op](v1, v2) return granularity, timestamps, values, is_aggregated
def handle_rolling(agg, granularity, timestamps, values, is_aggregated, references, window): if window > len(values): raise exceptions.UnAggregableTimeseries( references, "Rolling window '%d' is greater than serie length '%d'" % (window, len(values))) timestamps = timestamps[window - 1:] values = values.T # rigtorp.se/2011/01/01/rolling-statistics-numpy.html shape = values.shape[:-1] + (values.shape[-1] - window + 1, window) strides = values.strides + (values.strides[-1], ) new_values = AGG_MAP[agg](as_strided(values, shape=shape, strides=strides), axis=-1) return granularity, timestamps, new_values.T, is_aggregated
def get_measures(storage, references, operations, from_timestamp=None, to_timestamp=None, granularities=None, needed_overlap=100.0, fill=None): """Get aggregated measures of multiple entities. :param storage: The storage driver. :param metrics_and_aggregations: List of metric+agg_method tuple measured to aggregate. :param from timestamp: The timestamp to get the measure from. :param to timestamp: The timestamp to get the measure to. :param granularities: The granularities to retrieve. :param fill: The value to use to fill in missing data in series. """ if granularities is None: all_granularities = ( definition.granularity for ref in references for definition in ref.metric.archive_policy.definition) # granularities_in_common granularities = [ g for g, occurrence in six.iteritems( collections.Counter(all_granularities)) if occurrence == len(references) ] if not granularities: raise exceptions.UnAggregableTimeseries( list((ref.name, ref.aggregation) for ref in references), 'No granularity match') references_with_missing_granularity = [] for ref in references: if (ref.aggregation not in ref.metric.archive_policy.aggregation_methods): raise gnocchi_storage.AggregationDoesNotExist( ref.metric, ref.aggregation, # Use the first granularity, that should be good enough since # they are all missing anyway ref.metric.archive_policy.definition[0].granularity) available_granularities = [ d.granularity for d in ref.metric.archive_policy.definition ] for g in granularities: if g not in available_granularities: references_with_missing_granularity.append( (ref.name, ref.aggregation, g)) break if references_with_missing_granularity: raise exceptions.UnAggregableTimeseries( references_with_missing_granularity, "Granularities are missing") tss = utils.parallel_map(_get_measures_timeserie, [(storage, ref, g, from_timestamp, to_timestamp) for ref in references for g in granularities]) return aggregated(tss, operations, from_timestamp, to_timestamp, needed_overlap, fill)
def aggregated(refs_and_timeseries, operations, from_timestamp=None, to_timestamp=None, needed_percent_of_overlap=100.0, fill=None): series = collections.defaultdict(list) references = collections.defaultdict(list) lookup_keys = collections.defaultdict(list) for (ref, timeserie) in refs_and_timeseries: from_ = (None if from_timestamp is None else carbonara.round_timestamp( from_timestamp, timeserie.aggregation.granularity)) references[timeserie.aggregation.granularity].append(ref) lookup_keys[timeserie.aggregation.granularity].append(ref.lookup_key) series[timeserie.aggregation.granularity].append( timeserie[from_:to_timestamp]) result = [] is_aggregated = False result = {} for sampling in sorted(series, reverse=True): # np.unique sorts results for us times, indices = numpy.unique(numpy.concatenate( [i['timestamps'] for i in series[sampling]]), return_inverse=True) # create nd-array (unique series x unique times) and fill filler = (numpy.NaN if fill in [None, 'null', 'dropna'] else fill) val_grid = numpy.full((len(series[sampling]), len(times)), filler) start = 0 for i, split in enumerate(series[sampling]): size = len(split) val_grid[i][indices[start:start + size]] = split['values'] start += size values = val_grid.T if fill is None: overlap = numpy.flatnonzero( ~numpy.any(numpy.isnan(values), axis=1)) if overlap.size == 0 and needed_percent_of_overlap > 0: raise exceptions.UnAggregableTimeseries( lookup_keys[sampling], 'No overlap') if times.size: # if no boundary set, use first/last timestamp which overlap if to_timestamp is None and overlap.size: times = times[:overlap[-1] + 1] values = values[:overlap[-1] + 1] if from_timestamp is None and overlap.size: times = times[overlap[0]:] values = values[overlap[0]:] percent_of_overlap = overlap.size * 100.0 / times.size if percent_of_overlap < needed_percent_of_overlap: raise exceptions.UnAggregableTimeseries( lookup_keys[sampling], 'Less than %f%% of datapoints overlap in this ' 'timespan (%.2f%%)' % (needed_percent_of_overlap, percent_of_overlap)) granularity, times, values, is_aggregated = (agg_operations.evaluate( operations, sampling, times, values, False, lookup_keys[sampling])) values = values.T result[sampling] = (granularity, times, values, references[sampling]) if is_aggregated: output = {"aggregated": []} for sampling in sorted(result, reverse=True): granularity, times, values, references = result[sampling] if fill == "dropna": pos = ~numpy.logical_or(numpy.isnan(values[0]), numpy.isinf(values[0])) v = values[0][pos] t = times[pos] else: v = values[0] t = times g = [granularity] * len(t) output["aggregated"].extend(six.moves.zip(t, g, v)) return output else: r_output = collections.defaultdict(lambda: collections.defaultdict( lambda: collections.defaultdict(list))) m_output = collections.defaultdict( lambda: collections.defaultdict(list)) for sampling in sorted(result, reverse=True): granularity, times, values, references = result[sampling] for i, ref in enumerate(references): if fill == "dropna": pos = ~numpy.logical_or(numpy.isnan(values[i]), numpy.isinf(values[i])) v = values[i][pos] t = times[pos] else: v = values[i] t = times g = [granularity] * len(t) measures = six.moves.zip(t, g, v) if ref.resource is None: m_output[ref.name][ref.aggregation].extend(measures) else: r_output[str(ref.resource.id)][ref.metric.name][ ref.aggregation].extend(measures) return r_output if r_output else m_output
def get_measures(storage, metrics_and_aggregations, operations, from_timestamp=None, to_timestamp=None, granularity=None, needed_overlap=100.0, fill=None, ref_identifier="id"): """Get aggregated measures of multiple entities. :param storage: The storage driver. :param metrics_and_aggregations: List of metric+agg_method tuple measured to aggregate. :param from timestamp: The timestamp to get the measure from. :param to timestamp: The timestamp to get the measure to. :param granularity: The granularity to retrieve. :param fill: The value to use to fill in missing data in series. """ references_with_missing_granularity = [] for (metric, aggregation) in metrics_and_aggregations: if aggregation not in metric.archive_policy.aggregation_methods: raise gnocchi_storage.AggregationDoesNotExist(metric, aggregation) if granularity is not None: for d in metric.archive_policy.definition: if d.granularity == granularity: break else: references_with_missing_granularity.append( (getattr(metric, ref_identifier), aggregation)) if references_with_missing_granularity: raise exceptions.UnAggregableTimeseries( references_with_missing_granularity, "granularity '%d' is missing" % utils.timespan_total_seconds(granularity)) if granularity is None: granularities = (definition.granularity for (metric, aggregation) in metrics_and_aggregations for definition in metric.archive_policy.definition) granularities_in_common = [ g for g, occurrence in six.iteritems( collections.Counter(granularities)) if occurrence == len(metrics_and_aggregations) ] if not granularities_in_common: raise exceptions.UnAggregableTimeseries( list((str(getattr(m, ref_identifier)), a) for (m, a) in metrics_and_aggregations), 'No granularity match') else: granularities_in_common = [granularity] tss = utils.parallel_map( _get_measures_timeserie, [(storage, metric, aggregation, ref_identifier, g, from_timestamp, to_timestamp) for (metric, aggregation) in metrics_and_aggregations for g in granularities_in_common]) return aggregated(tss, operations, from_timestamp, to_timestamp, needed_overlap, fill)
def aggregated(refs_and_timeseries, operations, from_timestamp=None, to_timestamp=None, needed_percent_of_overlap=100.0, fill=None): series = collections.defaultdict(list) references = collections.defaultdict(list) for (reference, timeserie) in refs_and_timeseries: from_ = (None if from_timestamp is None else carbonara.round_timestamp( from_timestamp, timeserie.sampling)) references[timeserie.sampling].append(reference) series[timeserie.sampling].append(timeserie[from_:to_timestamp]) result = collections.defaultdict(lambda: { 'timestamps': [], 'granularity': [], 'values': [] }) for key in sorted(series, reverse=True): combine = numpy.concatenate(series[key]) # np.unique sorts results for us times, indices = numpy.unique(combine['timestamps'], return_inverse=True) # create nd-array (unique series x unique times) and fill filler = (numpy.NaN if fill in [None, 'null', 'dropna'] else fill) val_grid = numpy.full((len(series[key]), len(times)), filler) start = 0 for i, split in enumerate(series[key]): size = len(split) val_grid[i][indices[start:start + size]] = split['values'] start += size values = val_grid.T if fill is None: overlap = numpy.flatnonzero( ~numpy.any(numpy.isnan(values), axis=1)) if overlap.size == 0 and needed_percent_of_overlap > 0: raise exceptions.UnAggregableTimeseries( references[key], 'No overlap') if times.size: # if no boundary set, use first/last timestamp which overlap if to_timestamp is None and overlap.size: times = times[:overlap[-1] + 1] values = values[:overlap[-1] + 1] if from_timestamp is None and overlap.size: times = times[overlap[0]:] values = values[overlap[0]:] percent_of_overlap = overlap.size * 100.0 / times.size if percent_of_overlap < needed_percent_of_overlap: raise exceptions.UnAggregableTimeseries( references[key], 'Less than %f%% of datapoints overlap in this ' 'timespan (%.2f%%)' % (needed_percent_of_overlap, percent_of_overlap)) granularity, times, values, is_aggregated = (agg_operations.evaluate( operations, key, times, values, False, references[key])) values = values.T if is_aggregated: idents = ["aggregated"] else: idents = ["%s_%s" % tuple(ref) for ref in references[key]] for i, ident in enumerate(idents): if fill == "dropna": pos = ~numpy.isnan(values[i]) v = values[i][pos] t = times[pos] else: v = values[i] t = times result[ident]["timestamps"].extend(t) result[ident]['granularity'].extend([granularity] * len(t)) result[ident]['values'].extend(v) return dict(((ident, list( six.moves.zip(result[ident]['timestamps'], result[ident]['granularity'], result[ident]['values']))) for ident in result))