Example #1
0
def handle_binary_operator(nodes, granularity, timestamps, initial_values,
                           is_aggregated, references):
    op = nodes[0]
    g1, t1, v1, is_a1 = evaluate(nodes[1], granularity, timestamps,
                                 initial_values, is_aggregated, references)
    g2, t2, v2, is_a2 = evaluate(nodes[2], granularity, timestamps,
                                 initial_values, is_aggregated, references)

    is_aggregated = is_a1 or is_a2
    # We keep the computed timeseries
    if isinstance(v1, numpy.ndarray) and isinstance(v2, numpy.ndarray):
        if not numpy.array_equal(t1, t2) or g1 != g2:
            raise exceptions.UnAggregableTimeseries(
                references, "Can't compute timeseries with different "
                "granularity %s <> %s" % (nodes[1], nodes[2]))
        timestamps = t1
        granularity = g1
        is_aggregated = True

    elif isinstance(v2, numpy.ndarray):
        timestamps = t2
        granularity = g2
    else:
        timestamps = t1
        granularity = g1

    values = binary_operators[op](v1, v2)
    return granularity, timestamps, values, is_aggregated
Example #2
0
def handle_rolling(agg, granularity, timestamps, values, is_aggregated,
                   references, window):
    if window > len(values):
        raise exceptions.UnAggregableTimeseries(
            references,
            "Rolling window '%d' is greater than serie length '%d'" %
            (window, len(values)))

    timestamps = timestamps[window - 1:]
    values = values.T
    # rigtorp.se/2011/01/01/rolling-statistics-numpy.html
    shape = values.shape[:-1] + (values.shape[-1] - window + 1, window)
    strides = values.strides + (values.strides[-1], )
    new_values = AGG_MAP[agg](as_strided(values, shape=shape, strides=strides),
                              axis=-1)
    return granularity, timestamps, new_values.T, is_aggregated
Example #3
0
def get_measures(storage,
                 references,
                 operations,
                 from_timestamp=None,
                 to_timestamp=None,
                 granularities=None,
                 needed_overlap=100.0,
                 fill=None):
    """Get aggregated measures of multiple entities.

    :param storage: The storage driver.
    :param metrics_and_aggregations: List of metric+agg_method tuple
                                     measured to aggregate.
    :param from timestamp: The timestamp to get the measure from.
    :param to timestamp: The timestamp to get the measure to.
    :param granularities: The granularities to retrieve.
    :param fill: The value to use to fill in missing data in series.
    """

    if granularities is None:
        all_granularities = (
            definition.granularity for ref in references
            for definition in ref.metric.archive_policy.definition)
        # granularities_in_common
        granularities = [
            g for g, occurrence in six.iteritems(
                collections.Counter(all_granularities))
            if occurrence == len(references)
        ]

        if not granularities:
            raise exceptions.UnAggregableTimeseries(
                list((ref.name, ref.aggregation) for ref in references),
                'No granularity match')

    references_with_missing_granularity = []
    for ref in references:
        if (ref.aggregation
                not in ref.metric.archive_policy.aggregation_methods):
            raise gnocchi_storage.AggregationDoesNotExist(
                ref.metric,
                ref.aggregation,
                # Use the first granularity, that should be good enough since
                # they are all missing anyway
                ref.metric.archive_policy.definition[0].granularity)

        available_granularities = [
            d.granularity for d in ref.metric.archive_policy.definition
        ]
        for g in granularities:
            if g not in available_granularities:
                references_with_missing_granularity.append(
                    (ref.name, ref.aggregation, g))
                break

    if references_with_missing_granularity:
        raise exceptions.UnAggregableTimeseries(
            references_with_missing_granularity, "Granularities are missing")

    tss = utils.parallel_map(_get_measures_timeserie,
                             [(storage, ref, g, from_timestamp, to_timestamp)
                              for ref in references for g in granularities])

    return aggregated(tss, operations, from_timestamp, to_timestamp,
                      needed_overlap, fill)
Example #4
0
def aggregated(refs_and_timeseries,
               operations,
               from_timestamp=None,
               to_timestamp=None,
               needed_percent_of_overlap=100.0,
               fill=None):

    series = collections.defaultdict(list)
    references = collections.defaultdict(list)
    lookup_keys = collections.defaultdict(list)
    for (ref, timeserie) in refs_and_timeseries:
        from_ = (None if from_timestamp is None else carbonara.round_timestamp(
            from_timestamp, timeserie.aggregation.granularity))
        references[timeserie.aggregation.granularity].append(ref)
        lookup_keys[timeserie.aggregation.granularity].append(ref.lookup_key)
        series[timeserie.aggregation.granularity].append(
            timeserie[from_:to_timestamp])

    result = []
    is_aggregated = False
    result = {}
    for sampling in sorted(series, reverse=True):
        # np.unique sorts results for us
        times, indices = numpy.unique(numpy.concatenate(
            [i['timestamps'] for i in series[sampling]]),
                                      return_inverse=True)

        # create nd-array (unique series x unique times) and fill
        filler = (numpy.NaN if fill in [None, 'null', 'dropna'] else fill)
        val_grid = numpy.full((len(series[sampling]), len(times)), filler)
        start = 0
        for i, split in enumerate(series[sampling]):
            size = len(split)
            val_grid[i][indices[start:start + size]] = split['values']
            start += size
        values = val_grid.T

        if fill is None:
            overlap = numpy.flatnonzero(
                ~numpy.any(numpy.isnan(values), axis=1))
            if overlap.size == 0 and needed_percent_of_overlap > 0:
                raise exceptions.UnAggregableTimeseries(
                    lookup_keys[sampling], 'No overlap')
            if times.size:
                # if no boundary set, use first/last timestamp which overlap
                if to_timestamp is None and overlap.size:
                    times = times[:overlap[-1] + 1]
                    values = values[:overlap[-1] + 1]
                if from_timestamp is None and overlap.size:
                    times = times[overlap[0]:]
                    values = values[overlap[0]:]
                percent_of_overlap = overlap.size * 100.0 / times.size
                if percent_of_overlap < needed_percent_of_overlap:
                    raise exceptions.UnAggregableTimeseries(
                        lookup_keys[sampling],
                        'Less than %f%% of datapoints overlap in this '
                        'timespan (%.2f%%)' %
                        (needed_percent_of_overlap, percent_of_overlap))

        granularity, times, values, is_aggregated = (agg_operations.evaluate(
            operations, sampling, times, values, False, lookup_keys[sampling]))

        values = values.T
        result[sampling] = (granularity, times, values, references[sampling])

    if is_aggregated:
        output = {"aggregated": []}
        for sampling in sorted(result, reverse=True):
            granularity, times, values, references = result[sampling]
            if fill == "dropna":
                pos = ~numpy.logical_or(numpy.isnan(values[0]),
                                        numpy.isinf(values[0]))
                v = values[0][pos]
                t = times[pos]
            else:
                v = values[0]
                t = times
            g = [granularity] * len(t)
            output["aggregated"].extend(six.moves.zip(t, g, v))
        return output
    else:
        r_output = collections.defaultdict(lambda: collections.defaultdict(
            lambda: collections.defaultdict(list)))
        m_output = collections.defaultdict(
            lambda: collections.defaultdict(list))
        for sampling in sorted(result, reverse=True):
            granularity, times, values, references = result[sampling]
            for i, ref in enumerate(references):
                if fill == "dropna":
                    pos = ~numpy.logical_or(numpy.isnan(values[i]),
                                            numpy.isinf(values[i]))
                    v = values[i][pos]
                    t = times[pos]
                else:
                    v = values[i]
                    t = times
                g = [granularity] * len(t)
                measures = six.moves.zip(t, g, v)
                if ref.resource is None:
                    m_output[ref.name][ref.aggregation].extend(measures)
                else:
                    r_output[str(ref.resource.id)][ref.metric.name][
                        ref.aggregation].extend(measures)
        return r_output if r_output else m_output
Example #5
0
def get_measures(storage,
                 metrics_and_aggregations,
                 operations,
                 from_timestamp=None,
                 to_timestamp=None,
                 granularity=None,
                 needed_overlap=100.0,
                 fill=None,
                 ref_identifier="id"):
    """Get aggregated measures of multiple entities.

    :param storage: The storage driver.
    :param metrics_and_aggregations: List of metric+agg_method tuple
                                     measured to aggregate.
    :param from timestamp: The timestamp to get the measure from.
    :param to timestamp: The timestamp to get the measure to.
    :param granularity: The granularity to retrieve.
    :param fill: The value to use to fill in missing data in series.
    """

    references_with_missing_granularity = []
    for (metric, aggregation) in metrics_and_aggregations:
        if aggregation not in metric.archive_policy.aggregation_methods:
            raise gnocchi_storage.AggregationDoesNotExist(metric, aggregation)
        if granularity is not None:
            for d in metric.archive_policy.definition:
                if d.granularity == granularity:
                    break
            else:
                references_with_missing_granularity.append(
                    (getattr(metric, ref_identifier), aggregation))

    if references_with_missing_granularity:
        raise exceptions.UnAggregableTimeseries(
            references_with_missing_granularity,
            "granularity '%d' is missing" %
            utils.timespan_total_seconds(granularity))

    if granularity is None:
        granularities = (definition.granularity
                         for (metric, aggregation) in metrics_and_aggregations
                         for definition in metric.archive_policy.definition)
        granularities_in_common = [
            g for g, occurrence in six.iteritems(
                collections.Counter(granularities))
            if occurrence == len(metrics_and_aggregations)
        ]

        if not granularities_in_common:
            raise exceptions.UnAggregableTimeseries(
                list((str(getattr(m, ref_identifier)), a)
                     for (m, a) in metrics_and_aggregations),
                'No granularity match')
    else:
        granularities_in_common = [granularity]

    tss = utils.parallel_map(
        _get_measures_timeserie,
        [(storage, metric, aggregation, ref_identifier, g, from_timestamp,
          to_timestamp) for (metric, aggregation) in metrics_and_aggregations
         for g in granularities_in_common])

    return aggregated(tss, operations, from_timestamp, to_timestamp,
                      needed_overlap, fill)
Example #6
0
def aggregated(refs_and_timeseries,
               operations,
               from_timestamp=None,
               to_timestamp=None,
               needed_percent_of_overlap=100.0,
               fill=None):

    series = collections.defaultdict(list)
    references = collections.defaultdict(list)
    for (reference, timeserie) in refs_and_timeseries:
        from_ = (None if from_timestamp is None else carbonara.round_timestamp(
            from_timestamp, timeserie.sampling))
        references[timeserie.sampling].append(reference)
        series[timeserie.sampling].append(timeserie[from_:to_timestamp])

    result = collections.defaultdict(lambda: {
        'timestamps': [],
        'granularity': [],
        'values': []
    })
    for key in sorted(series, reverse=True):
        combine = numpy.concatenate(series[key])
        # np.unique sorts results for us
        times, indices = numpy.unique(combine['timestamps'],
                                      return_inverse=True)

        # create nd-array (unique series x unique times) and fill
        filler = (numpy.NaN if fill in [None, 'null', 'dropna'] else fill)
        val_grid = numpy.full((len(series[key]), len(times)), filler)
        start = 0
        for i, split in enumerate(series[key]):
            size = len(split)
            val_grid[i][indices[start:start + size]] = split['values']
            start += size
        values = val_grid.T

        if fill is None:
            overlap = numpy.flatnonzero(
                ~numpy.any(numpy.isnan(values), axis=1))
            if overlap.size == 0 and needed_percent_of_overlap > 0:
                raise exceptions.UnAggregableTimeseries(
                    references[key], 'No overlap')
            if times.size:
                # if no boundary set, use first/last timestamp which overlap
                if to_timestamp is None and overlap.size:
                    times = times[:overlap[-1] + 1]
                    values = values[:overlap[-1] + 1]
                if from_timestamp is None and overlap.size:
                    times = times[overlap[0]:]
                    values = values[overlap[0]:]
                percent_of_overlap = overlap.size * 100.0 / times.size
                if percent_of_overlap < needed_percent_of_overlap:
                    raise exceptions.UnAggregableTimeseries(
                        references[key],
                        'Less than %f%% of datapoints overlap in this '
                        'timespan (%.2f%%)' %
                        (needed_percent_of_overlap, percent_of_overlap))

        granularity, times, values, is_aggregated = (agg_operations.evaluate(
            operations, key, times, values, False, references[key]))

        values = values.T
        if is_aggregated:
            idents = ["aggregated"]
        else:
            idents = ["%s_%s" % tuple(ref) for ref in references[key]]
        for i, ident in enumerate(idents):
            if fill == "dropna":
                pos = ~numpy.isnan(values[i])
                v = values[i][pos]
                t = times[pos]
            else:
                v = values[i]
                t = times
            result[ident]["timestamps"].extend(t)
            result[ident]['granularity'].extend([granularity] * len(t))
            result[ident]['values'].extend(v)

    return dict(((ident,
                  list(
                      six.moves.zip(result[ident]['timestamps'],
                                    result[ident]['granularity'],
                                    result[ident]['values'])))
                 for ident in result))