Beispiel #1
0
            def _map_compute_splits_operations(bound_timeserie):
                # NOTE (gordc): bound_timeserie is entire set of
                # unaggregated measures matching largest
                # granularity. the following takes only the points
                # affected by new measures for specific granularity
                tstamp = max(bound_timeserie.first, measures['timestamps'][0])
                new_first_block_timestamp = (
                    bound_timeserie.first_block_timestamp())
                aggregations = metric.archive_policy.aggregations

                grouped_timeseries = {
                    granularity: bound_timeserie.group_serie(
                        granularity,
                        carbonara.round_timestamp(tstamp, granularity))
                    for granularity, aggregations
                    # No need to sort the aggregation, they are already
                    in itertools.groupby(aggregations, ATTRGETTER_GRANULARITY)
                }

                aggregations_and_timeseries = {
                    aggregation:
                    carbonara.AggregatedTimeSerie.from_grouped_serie(
                        grouped_timeseries[aggregation.granularity],
                        aggregation)
                    for aggregation in aggregations
                }

                deleted_keys, keys_and_split_to_store = (
                    self._compute_split_operations(
                        metric, aggregations_and_timeseries,
                        current_first_block_timestamp,
                        new_first_block_timestamp))

                return (new_first_block_timestamp, deleted_keys,
                        keys_and_split_to_store)
Beispiel #2
0
        def _map_add_measures(bound_timeserie):
            # NOTE (gordc): bound_timeserie is entire set of
            # unaggregated measures matching largest
            # granularity. the following takes only the points
            # affected by new measures for specific granularity
            tstamp = max(bound_timeserie.first, measures['timestamps'][0])
            new_first_block_timestamp = bound_timeserie.first_block_timestamp()
            computed_points['number'] = len(bound_timeserie)
            for d in definition:
                ts = bound_timeserie.group_serie(
                    d.granularity,
                    carbonara.round_timestamp(tstamp, d.granularity))

                self._map_in_thread(
                    self._add_measures,
                    ((aggregation, d, metric, ts,
                      current_first_block_timestamp, new_first_block_timestamp)
                     for aggregation in agg_methods))
Beispiel #3
0
    def test_add_measures_update_subset(self):
        m, m_sql = self._create_metric('medium')
        measures = [
            storage.Measure(utils.dt_to_unix_ns(2014, 1, 6, i, j, 0), 100)
            for i in six.moves.range(2) for j in six.moves.range(0, 60, 2)]
        self.incoming.add_measures(m, measures)
        self.trigger_processing([str(m.id)])

        # add measure to end, in same aggregate time as last point.
        new_point = utils.dt_to_unix_ns(2014, 1, 6, 1, 58, 1)
        self.incoming.add_measures(
            m, [storage.Measure(new_point, 100)])

        with mock.patch.object(self.incoming, 'add_measures') as c:
            self.trigger_processing([str(m.id)])
        for __, args, __ in c.mock_calls:
            self.assertEqual(
                list(args[3])[0][0], carbonara.round_timestamp(
                    new_point, args[1].granularity * 10e8))
Beispiel #4
0
        def _map_add_measures(bound_timeserie):
            # NOTE (gordc): bound_timeserie is entire set of
            # unaggregated measures matching largest
            # granularity. the following takes only the points
            # affected by new measures for specific granularity
            tstamp = max(bound_timeserie.first, measures['timestamps'][0])
            new_first_block_timestamp = bound_timeserie.first_block_timestamp()
            computed_points['number'] = len(bound_timeserie)

            for granularity, aggregations in itertools.groupby(
                    # No need to sort the aggregation, they are already
                    metric.archive_policy.aggregations,
                    ATTRGETTER_GRANULARITY):
                ts = bound_timeserie.group_serie(
                    granularity, carbonara.round_timestamp(
                        tstamp, granularity))

                self._add_measures(metric, aggregations, ts,
                                   current_first_block_timestamp,
                                   new_first_block_timestamp)
Beispiel #5
0
def aggregated(refs_and_timeseries,
               operations,
               from_timestamp=None,
               to_timestamp=None,
               needed_percent_of_overlap=100.0,
               fill=None):

    series = collections.defaultdict(list)
    references = collections.defaultdict(list)
    lookup_keys = collections.defaultdict(list)
    for (ref, timeserie) in refs_and_timeseries:
        from_ = (None if from_timestamp is None else carbonara.round_timestamp(
            from_timestamp, timeserie.aggregation.granularity))
        references[timeserie.aggregation.granularity].append(ref)
        lookup_keys[timeserie.aggregation.granularity].append(ref.lookup_key)
        series[timeserie.aggregation.granularity].append(
            timeserie[from_:to_timestamp])

    result = []
    is_aggregated = False
    result = {}
    for sampling in sorted(series, reverse=True):
        # np.unique sorts results for us
        times, indices = numpy.unique(numpy.concatenate(
            [i['timestamps'] for i in series[sampling]]),
                                      return_inverse=True)

        # create nd-array (unique series x unique times) and fill
        filler = (numpy.NaN if fill in [None, 'null', 'dropna'] else fill)
        val_grid = numpy.full((len(series[sampling]), len(times)), filler)
        start = 0
        for i, split in enumerate(series[sampling]):
            size = len(split)
            val_grid[i][indices[start:start + size]] = split['values']
            start += size
        values = val_grid.T

        if fill is None:
            overlap = numpy.flatnonzero(
                ~numpy.any(numpy.isnan(values), axis=1))
            if overlap.size == 0 and needed_percent_of_overlap > 0:
                raise exceptions.UnAggregableTimeseries(
                    lookup_keys[sampling], 'No overlap')
            if times.size:
                # if no boundary set, use first/last timestamp which overlap
                if to_timestamp is None and overlap.size:
                    times = times[:overlap[-1] + 1]
                    values = values[:overlap[-1] + 1]
                if from_timestamp is None and overlap.size:
                    times = times[overlap[0]:]
                    values = values[overlap[0]:]
                percent_of_overlap = overlap.size * 100.0 / times.size
                if percent_of_overlap < needed_percent_of_overlap:
                    raise exceptions.UnAggregableTimeseries(
                        lookup_keys[sampling],
                        'Less than %f%% of datapoints overlap in this '
                        'timespan (%.2f%%)' %
                        (needed_percent_of_overlap, percent_of_overlap))

        granularity, times, values, is_aggregated = (agg_operations.evaluate(
            operations, sampling, times, values, False, lookup_keys[sampling]))

        values = values.T
        result[sampling] = (granularity, times, values, references[sampling])

    if is_aggregated:
        output = {"aggregated": []}
        for sampling in sorted(result, reverse=True):
            granularity, times, values, references = result[sampling]
            if fill == "dropna":
                pos = ~numpy.logical_or(numpy.isnan(values[0]),
                                        numpy.isinf(values[0]))
                v = values[0][pos]
                t = times[pos]
            else:
                v = values[0]
                t = times
            g = [granularity] * len(t)
            output["aggregated"].extend(six.moves.zip(t, g, v))
        return output
    else:
        r_output = collections.defaultdict(lambda: collections.defaultdict(
            lambda: collections.defaultdict(list)))
        m_output = collections.defaultdict(
            lambda: collections.defaultdict(list))
        for sampling in sorted(result, reverse=True):
            granularity, times, values, references = result[sampling]
            for i, ref in enumerate(references):
                if fill == "dropna":
                    pos = ~numpy.logical_or(numpy.isnan(values[i]),
                                            numpy.isinf(values[i]))
                    v = values[i][pos]
                    t = times[pos]
                else:
                    v = values[i]
                    t = times
                g = [granularity] * len(t)
                measures = six.moves.zip(t, g, v)
                if ref.resource is None:
                    m_output[ref.name][ref.aggregation].extend(measures)
                else:
                    r_output[str(ref.resource.id)][ref.metric.name][
                        ref.aggregation].extend(measures)
        return r_output if r_output else m_output
Beispiel #6
0
def aggregated(timeseries,
               aggregation,
               from_timestamp=None,
               to_timestamp=None,
               needed_percent_of_overlap=100.0,
               fill=None):

    series = collections.defaultdict(list)
    for timeserie in timeseries:
        from_ = (None if from_timestamp is None else carbonara.round_timestamp(
            from_timestamp, timeserie.sampling))
        series[timeserie.sampling].append(timeserie[from_:to_timestamp])

    result = {'timestamps': [], 'granularity': [], 'values': []}
    for key in sorted(series, reverse=True):
        combine = numpy.concatenate(series[key])
        # np.unique sorts results for us
        times, indices = numpy.unique(combine['timestamps'],
                                      return_inverse=True)

        # create nd-array (unique series x unique times) and fill
        filler = fill if fill is not None and fill != 'null' else numpy.NaN
        val_grid = numpy.full((len(series[key]), len(times)), filler)
        start = 0
        for i, split in enumerate(series[key]):
            size = len(split)
            val_grid[i][indices[start:start + size]] = split['values']
            start += size
        values = val_grid.T

        if fill is None:
            overlap = numpy.flatnonzero(
                ~numpy.any(numpy.isnan(values), axis=1))
            if overlap.size == 0 and needed_percent_of_overlap > 0:
                raise UnAggregableTimeseries('No overlap')
            # if no boundary set, use first/last timestamp which overlap
            if to_timestamp is None and overlap.size:
                times = times[:overlap[-1] + 1]
                values = values[:overlap[-1] + 1]
            if from_timestamp is None and overlap.size:
                times = times[overlap[0]:]
                values = values[overlap[0]:]
            percent_of_overlap = overlap.size * 100.0 / times.size
            if percent_of_overlap < needed_percent_of_overlap:
                raise UnAggregableTimeseries(
                    'Less than %f%% of datapoints overlap in this '
                    'timespan (%.2f%%)' %
                    (needed_percent_of_overlap, percent_of_overlap))

        if aggregation in AGG_MAP:
            values = AGG_MAP[aggregation](values, axis=1)
        elif aggregation == 'count':
            values = numpy.count_nonzero(~numpy.isnan(values), axis=1)
        else:
            raise carbonara.UnknownAggregationMethod(aggregation)

        result['timestamps'].extend(times)
        result['granularity'].extend([key] * len(times))
        result['values'].extend(values)

    return six.moves.zip(result['timestamps'], result['granularity'],
                         result['values'])
Beispiel #7
0
def aggregated(refs_and_timeseries,
               operations,
               from_timestamp=None,
               to_timestamp=None,
               needed_percent_of_overlap=100.0,
               fill=None):

    series = collections.defaultdict(list)
    references = collections.defaultdict(list)
    for (reference, timeserie) in refs_and_timeseries:
        from_ = (None if from_timestamp is None else carbonara.round_timestamp(
            from_timestamp, timeserie.sampling))
        references[timeserie.sampling].append(reference)
        series[timeserie.sampling].append(timeserie[from_:to_timestamp])

    result = collections.defaultdict(lambda: {
        'timestamps': [],
        'granularity': [],
        'values': []
    })
    for key in sorted(series, reverse=True):
        combine = numpy.concatenate(series[key])
        # np.unique sorts results for us
        times, indices = numpy.unique(combine['timestamps'],
                                      return_inverse=True)

        # create nd-array (unique series x unique times) and fill
        filler = (numpy.NaN if fill in [None, 'null', 'dropna'] else fill)
        val_grid = numpy.full((len(series[key]), len(times)), filler)
        start = 0
        for i, split in enumerate(series[key]):
            size = len(split)
            val_grid[i][indices[start:start + size]] = split['values']
            start += size
        values = val_grid.T

        if fill is None:
            overlap = numpy.flatnonzero(
                ~numpy.any(numpy.isnan(values), axis=1))
            if overlap.size == 0 and needed_percent_of_overlap > 0:
                raise exceptions.UnAggregableTimeseries(
                    references[key], 'No overlap')
            if times.size:
                # if no boundary set, use first/last timestamp which overlap
                if to_timestamp is None and overlap.size:
                    times = times[:overlap[-1] + 1]
                    values = values[:overlap[-1] + 1]
                if from_timestamp is None and overlap.size:
                    times = times[overlap[0]:]
                    values = values[overlap[0]:]
                percent_of_overlap = overlap.size * 100.0 / times.size
                if percent_of_overlap < needed_percent_of_overlap:
                    raise exceptions.UnAggregableTimeseries(
                        references[key],
                        'Less than %f%% of datapoints overlap in this '
                        'timespan (%.2f%%)' %
                        (needed_percent_of_overlap, percent_of_overlap))

        granularity, times, values, is_aggregated = (agg_operations.evaluate(
            operations, key, times, values, False, references[key]))

        values = values.T
        if is_aggregated:
            idents = ["aggregated"]
        else:
            idents = ["%s_%s" % tuple(ref) for ref in references[key]]
        for i, ident in enumerate(idents):
            if fill == "dropna":
                pos = ~numpy.isnan(values[i])
                v = values[i][pos]
                t = times[pos]
            else:
                v = values[i]
                t = times
            result[ident]["timestamps"].extend(t)
            result[ident]['granularity'].extend([granularity] * len(t))
            result[ident]['values'].extend(v)

    return dict(((ident,
                  list(
                      six.moves.zip(result[ident]['timestamps'],
                                    result[ident]['granularity'],
                                    result[ident]['values'])))
                 for ident in result))
Beispiel #8
0
def aggregated(refs_and_timeseries, operations, from_timestamp=None,
               to_timestamp=None, needed_percent_of_overlap=100.0, fill=None):

    series = collections.defaultdict(list)
    references = collections.defaultdict(list)
    lookup_keys = collections.defaultdict(list)
    for (ref, timeserie) in refs_and_timeseries:
        from_ = (None if from_timestamp is None else
                 carbonara.round_timestamp(
                     from_timestamp, timeserie.aggregation.granularity))
        references[timeserie.aggregation.granularity].append(ref)
        lookup_keys[timeserie.aggregation.granularity].append(ref.lookup_key)
        series[timeserie.aggregation.granularity].append(
            timeserie[from_:to_timestamp])

    result = []
    is_aggregated = False
    result = {}
    for sampling in sorted(series, reverse=True):
        combine = numpy.concatenate(series[sampling])
        # np.unique sorts results for us
        times, indices = numpy.unique(combine['timestamps'],
                                      return_inverse=True)

        # create nd-array (unique series x unique times) and fill
        filler = (numpy.NaN if fill in [None, 'null', 'dropna']
                  else fill)
        val_grid = numpy.full((len(series[sampling]), len(times)), filler)
        start = 0
        for i, split in enumerate(series[sampling]):
            size = len(split)
            val_grid[i][indices[start:start + size]] = split['values']
            start += size
        values = val_grid.T

        if fill is None:
            overlap = numpy.flatnonzero(~numpy.any(numpy.isnan(values),
                                                   axis=1))
            if overlap.size == 0 and needed_percent_of_overlap > 0:
                raise exceptions.UnAggregableTimeseries(lookup_keys[sampling],
                                                        'No overlap')
            if times.size:
                # if no boundary set, use first/last timestamp which overlap
                if to_timestamp is None and overlap.size:
                    times = times[:overlap[-1] + 1]
                    values = values[:overlap[-1] + 1]
                if from_timestamp is None and overlap.size:
                    times = times[overlap[0]:]
                    values = values[overlap[0]:]
                percent_of_overlap = overlap.size * 100.0 / times.size
                if percent_of_overlap < needed_percent_of_overlap:
                    raise exceptions.UnAggregableTimeseries(
                        lookup_keys[sampling],
                        'Less than %f%% of datapoints overlap in this '
                        'timespan (%.2f%%)' % (needed_percent_of_overlap,
                                               percent_of_overlap))

        granularity, times, values, is_aggregated = (
            agg_operations.evaluate(operations, sampling, times, values,
                                    False, lookup_keys[sampling]))

        values = values.T
        result[sampling] = (granularity, times, values, references[sampling])

    if is_aggregated:
        output = {"aggregated": []}
        for sampling in sorted(result, reverse=True):
            granularity, times, values, references = result[sampling]
            if fill == "dropna":
                pos = ~numpy.isnan(values[0])
                v = values[0][pos]
                t = times[pos]
            else:
                v = values[0]
                t = times
            g = [granularity] * len(t)
            output["aggregated"].extend(six.moves.zip(t, g, v))
        return output
    else:
        r_output = collections.defaultdict(
            lambda: collections.defaultdict(
                lambda: collections.defaultdict(list)))
        m_output = collections.defaultdict(
            lambda: collections.defaultdict(list))
        for sampling in sorted(result, reverse=True):
            granularity, times, values, references = result[sampling]
            for i, ref in enumerate(references):
                if fill == "dropna":
                    pos = ~numpy.isnan(values[i])
                    v = values[i][pos]
                    t = times[pos]
                else:
                    v = values[i]
                    t = times
                g = [granularity] * len(t)
                measures = six.moves.zip(t, g, v)
                if ref.resource is None:
                    m_output[ref.name][ref.aggregation].extend(measures)
                else:
                    r_output[str(ref.resource.id)][
                        ref.metric.name][ref.aggregation].extend(measures)
        return r_output if r_output else m_output