コード例 #1
0
def aggregated(refs_and_timeseries,
               operations,
               from_timestamp=None,
               to_timestamp=None,
               needed_percent_of_overlap=100.0,
               fill=None):

    series = collections.defaultdict(list)
    references = collections.defaultdict(list)
    for (reference, timeserie) in refs_and_timeseries:
        from_ = (None if from_timestamp is None else carbonara.round_timestamp(
            from_timestamp, timeserie.sampling))
        references[timeserie.sampling].append(reference)
        series[timeserie.sampling].append(timeserie[from_:to_timestamp])

    result = collections.defaultdict(lambda: {
        'timestamps': [],
        'granularity': [],
        'values': []
    })
    for key in sorted(series, reverse=True):
        combine = numpy.concatenate(series[key])
        # np.unique sorts results for us
        times, indices = numpy.unique(combine['timestamps'],
                                      return_inverse=True)

        # create nd-array (unique series x unique times) and fill
        filler = (numpy.NaN if fill in [None, 'null', 'dropna'] else fill)
        val_grid = numpy.full((len(series[key]), len(times)), filler)
        start = 0
        for i, split in enumerate(series[key]):
            size = len(split)
            val_grid[i][indices[start:start + size]] = split['values']
            start += size
        values = val_grid.T

        if fill is None:
            overlap = numpy.flatnonzero(
                ~numpy.any(numpy.isnan(values), axis=1))
            if overlap.size == 0 and needed_percent_of_overlap > 0:
                raise exceptions.UnAggregableTimeseries(
                    references[key], 'No overlap')
            if times.size:
                # if no boundary set, use first/last timestamp which overlap
                if to_timestamp is None and overlap.size:
                    times = times[:overlap[-1] + 1]
                    values = values[:overlap[-1] + 1]
                if from_timestamp is None and overlap.size:
                    times = times[overlap[0]:]
                    values = values[overlap[0]:]
                percent_of_overlap = overlap.size * 100.0 / times.size
                if percent_of_overlap < needed_percent_of_overlap:
                    raise exceptions.UnAggregableTimeseries(
                        references[key],
                        'Less than %f%% of datapoints overlap in this '
                        'timespan (%.2f%%)' %
                        (needed_percent_of_overlap, percent_of_overlap))

        granularity, times, values, is_aggregated = (agg_operations.evaluate(
            operations, key, times, values, False, references[key]))

        values = values.T
        if is_aggregated:
            idents = ["aggregated"]
        else:
            idents = ["%s_%s" % tuple(ref) for ref in references[key]]
        for i, ident in enumerate(idents):
            if fill == "dropna":
                pos = ~numpy.isnan(values[i])
                v = values[i][pos]
                t = times[pos]
            else:
                v = values[i]
                t = times
            result[ident]["timestamps"].extend(t)
            result[ident]['granularity'].extend([granularity] * len(t))
            result[ident]['values'].extend(v)

    return dict(((ident,
                  list(
                      six.moves.zip(result[ident]['timestamps'],
                                    result[ident]['granularity'],
                                    result[ident]['values'])))
                 for ident in result))
コード例 #2
0
ファイル: processor.py プロジェクト: vishalbelsare/gnocchi
def aggregated(refs_and_timeseries,
               operations,
               from_timestamp=None,
               to_timestamp=None,
               needed_percent_of_overlap=100.0,
               fill=None):

    series = collections.defaultdict(list)
    references = collections.defaultdict(list)
    lookup_keys = collections.defaultdict(list)
    for (ref, timeserie) in refs_and_timeseries:
        from_ = (None if from_timestamp is None else carbonara.round_timestamp(
            from_timestamp, timeserie.aggregation.granularity))
        references[timeserie.aggregation.granularity].append(ref)
        lookup_keys[timeserie.aggregation.granularity].append(ref.lookup_key)
        series[timeserie.aggregation.granularity].append(
            timeserie[from_:to_timestamp])

    result = []
    is_aggregated = False
    result = {}
    for sampling in sorted(series, reverse=True):
        # np.unique sorts results for us
        times, indices = numpy.unique(numpy.concatenate(
            [i['timestamps'] for i in series[sampling]]),
                                      return_inverse=True)

        # create nd-array (unique series x unique times) and fill
        filler = (numpy.NaN if fill in [None, 'null', 'dropna'] else fill)
        val_grid = numpy.full((len(series[sampling]), len(times)), filler)
        start = 0
        for i, split in enumerate(series[sampling]):
            size = len(split)
            val_grid[i][indices[start:start + size]] = split['values']
            start += size
        values = val_grid.T

        if fill is None:
            overlap = numpy.flatnonzero(
                ~numpy.any(numpy.isnan(values), axis=1))
            if overlap.size == 0 and needed_percent_of_overlap > 0:
                raise exceptions.UnAggregableTimeseries(
                    lookup_keys[sampling], 'No overlap')
            if times.size:
                # if no boundary set, use first/last timestamp which overlap
                if to_timestamp is None and overlap.size:
                    times = times[:overlap[-1] + 1]
                    values = values[:overlap[-1] + 1]
                if from_timestamp is None and overlap.size:
                    times = times[overlap[0]:]
                    values = values[overlap[0]:]
                percent_of_overlap = overlap.size * 100.0 / times.size
                if percent_of_overlap < needed_percent_of_overlap:
                    raise exceptions.UnAggregableTimeseries(
                        lookup_keys[sampling],
                        'Less than %f%% of datapoints overlap in this '
                        'timespan (%.2f%%)' %
                        (needed_percent_of_overlap, percent_of_overlap))

        granularity, times, values, is_aggregated = (agg_operations.evaluate(
            operations, sampling, times, values, False, lookup_keys[sampling]))

        values = values.T
        result[sampling] = (granularity, times, values, references[sampling])

    if is_aggregated:
        output = {"aggregated": []}
        for sampling in sorted(result, reverse=True):
            granularity, times, values, references = result[sampling]
            if fill == "dropna":
                pos = ~numpy.logical_or(numpy.isnan(values[0]),
                                        numpy.isinf(values[0]))
                v = values[0][pos]
                t = times[pos]
            else:
                v = values[0]
                t = times
            g = [granularity] * len(t)
            output["aggregated"].extend(six.moves.zip(t, g, v))
        return output
    else:
        r_output = collections.defaultdict(lambda: collections.defaultdict(
            lambda: collections.defaultdict(list)))
        m_output = collections.defaultdict(
            lambda: collections.defaultdict(list))
        for sampling in sorted(result, reverse=True):
            granularity, times, values, references = result[sampling]
            for i, ref in enumerate(references):
                if fill == "dropna":
                    pos = ~numpy.logical_or(numpy.isnan(values[i]),
                                            numpy.isinf(values[i]))
                    v = values[i][pos]
                    t = times[pos]
                else:
                    v = values[i]
                    t = times
                g = [granularity] * len(t)
                measures = six.moves.zip(t, g, v)
                if ref.resource is None:
                    m_output[ref.name][ref.aggregation].extend(measures)
                else:
                    r_output[str(ref.resource.id)][ref.metric.name][
                        ref.aggregation].extend(measures)
        return r_output if r_output else m_output
コード例 #3
0
ファイル: processor.py プロジェクト: luo-zn/gnocchi
def aggregated(refs_and_timeseries, operations, from_timestamp=None,
               to_timestamp=None, needed_percent_of_overlap=100.0, fill=None):

    series = collections.defaultdict(list)
    references = collections.defaultdict(list)
    lookup_keys = collections.defaultdict(list)
    for (ref, timeserie) in refs_and_timeseries:
        from_ = (None if from_timestamp is None else
                 carbonara.round_timestamp(
                     from_timestamp, timeserie.aggregation.granularity))
        references[timeserie.aggregation.granularity].append(ref)
        lookup_keys[timeserie.aggregation.granularity].append(ref.lookup_key)
        series[timeserie.aggregation.granularity].append(
            timeserie[from_:to_timestamp])

    result = []
    is_aggregated = False
    result = {}
    for sampling in sorted(series, reverse=True):
        combine = numpy.concatenate(series[sampling])
        # np.unique sorts results for us
        times, indices = numpy.unique(combine['timestamps'],
                                      return_inverse=True)

        # create nd-array (unique series x unique times) and fill
        filler = (numpy.NaN if fill in [None, 'null', 'dropna']
                  else fill)
        val_grid = numpy.full((len(series[sampling]), len(times)), filler)
        start = 0
        for i, split in enumerate(series[sampling]):
            size = len(split)
            val_grid[i][indices[start:start + size]] = split['values']
            start += size
        values = val_grid.T

        if fill is None:
            overlap = numpy.flatnonzero(~numpy.any(numpy.isnan(values),
                                                   axis=1))
            if overlap.size == 0 and needed_percent_of_overlap > 0:
                raise exceptions.UnAggregableTimeseries(lookup_keys[sampling],
                                                        'No overlap')
            if times.size:
                # if no boundary set, use first/last timestamp which overlap
                if to_timestamp is None and overlap.size:
                    times = times[:overlap[-1] + 1]
                    values = values[:overlap[-1] + 1]
                if from_timestamp is None and overlap.size:
                    times = times[overlap[0]:]
                    values = values[overlap[0]:]
                percent_of_overlap = overlap.size * 100.0 / times.size
                if percent_of_overlap < needed_percent_of_overlap:
                    raise exceptions.UnAggregableTimeseries(
                        lookup_keys[sampling],
                        'Less than %f%% of datapoints overlap in this '
                        'timespan (%.2f%%)' % (needed_percent_of_overlap,
                                               percent_of_overlap))

        granularity, times, values, is_aggregated = (
            agg_operations.evaluate(operations, sampling, times, values,
                                    False, lookup_keys[sampling]))

        values = values.T
        result[sampling] = (granularity, times, values, references[sampling])

    if is_aggregated:
        output = {"aggregated": []}
        for sampling in sorted(result, reverse=True):
            granularity, times, values, references = result[sampling]
            if fill == "dropna":
                pos = ~numpy.isnan(values[0])
                v = values[0][pos]
                t = times[pos]
            else:
                v = values[0]
                t = times
            g = [granularity] * len(t)
            output["aggregated"].extend(six.moves.zip(t, g, v))
        return output
    else:
        r_output = collections.defaultdict(
            lambda: collections.defaultdict(
                lambda: collections.defaultdict(list)))
        m_output = collections.defaultdict(
            lambda: collections.defaultdict(list))
        for sampling in sorted(result, reverse=True):
            granularity, times, values, references = result[sampling]
            for i, ref in enumerate(references):
                if fill == "dropna":
                    pos = ~numpy.isnan(values[i])
                    v = values[i][pos]
                    t = times[pos]
                else:
                    v = values[i]
                    t = times
                g = [granularity] * len(t)
                measures = six.moves.zip(t, g, v)
                if ref.resource is None:
                    m_output[ref.name][ref.aggregation].extend(measures)
                else:
                    r_output[str(ref.resource.id)][
                        ref.metric.name][ref.aggregation].extend(measures)
        return r_output if r_output else m_output