Example #1
0
def test_aggregation():
    # note: uneven aggregation: we only want 1 resulting metric,
    query = Query("")
    query["avg_by"] = {"server": [""]}
    query["sum_by"] = {"type": [""]}

    targets = {
        "web1.db": {"id": "web1.db", "tags": {"server": "web1", "type": "db", "n3": "foo"}},
        "web1.php": {"id": "web1.php", "tags": {"server": "web1", "type": "php", "n3": "foo"}},
        "web2.db": {"id": "web2.db", "tags": {"server": "web2", "type": "db", "n3": "foo"}},
        "web2.php": {"id": "web2.php", "tags": {"server": "web2", "type": "php", "n3": "foo"}},
        "web2.memcache": {"id": "web2.memcache", "tags": {"server": "web2", "type": "memcache", "n3": "foo"}},
    }
    from pprint import pprint

    for (k, v) in targets.items():
        v = Target(v)
        v.get_graph_info(group_by={})
        targets[k] = v
    graphs, _query = build_graphs_from_targets(targets, query)
    # TODO: there should be only 1 graph, containing all 5 items
    print "Graphs:"
    for (k, v) in graphs.items():
        print "graph key"
        pprint(k)
        print "val:"
        pprint(v)
    assert {} == graphs
Example #2
0
def build_graphs_from_targets(targets, query):
    graphs = {}
    if not targets:
        return (graphs, query)
    group_by = query['group_by']
    sum_by = query['sum_by']
    avg_by = query['avg_by']
    avg_over = query['avg_over']
    # i'm gonna assume you never use second and your datapoints are stored with
    # minutely resolution. later on we can use config options for this (or
    # better: somehow query graphite about it)
    # note, the day/week/month numbers are not technically accurate, but
    # since we're doing movingAvg that's ok
    averaging = {
        'M': 1,
        'h': 60,
        'd': 60 * 24,
        'w': 60 * 24 * 7,
        'mo': 60 * 24 * 30
    }
    if avg_over is not None:
        avg_over_amount = avg_over[0]
        avg_over_unit = avg_over[1]
        if avg_over_unit in averaging.keys():
            multiplier = averaging[avg_over_unit]
            query['target_modifiers'].append(
                Query.graphite_function_applier('movingAverage', avg_over_amount * multiplier))

    # for each group_by bucket, make 1 graph.
    # so for each graph, we have:
    # the "constants": tags in the group_by
    # the "variables": tags not in the group_by, which can have arbitrary
    # values, or different values from a group_by tag that match the same
    # bucket pattern
    # go through all targets and group them into graphs:
    for _target_id, target_data in sorted(targets.items()):
        # FWIW. has an 'id' which timeserieswidget doesn't care about
        target = Target(target_data)
        target['target'] = target['id']

        (graph_key, constants) = target.get_graph_info(group_by)
        if graph_key not in graphs:
            graph = {'from': query['from'], 'until': query['to']}
            graph.update({'constants': constants, 'targets': []})
            graphs[graph_key] = graph
        graphs[graph_key]['targets'].append(target)

    # ok so now we have a graphs dictionary with a graph for every appropriate
    # combination of group_by tags, and each graph contains all targets that
    # should be shown on it.  but the user may have asked to aggregate certain
    # targets together, by summing and/or averaging across different values of
    # (a) certain tag(s). let's process the aggregations now.
    if (sum_by or avg_by):
        for (graph_key, graph_config) in graphs.items():
            graph_config['targets_sum_candidates'] = {}
            graph_config['targets_avg_candidates'] = {}
            graph_config['normal_targets'] = []

            for target in graph_config['targets']:
                sum_id = target.get_agg_key(sum_by)
                if sum_id:
                    if sum_id not in graph_config['targets_sum_candidates']:
                        graphs[graph_key]['targets_sum_candidates'][sum_id] = []
                    graph_config['targets_sum_candidates'][sum_id].append(target)

            for (sum_id, targets) in graph_config['targets_sum_candidates'].items():
                if len(targets) > 1:
                    for t in targets:
                        graph_config['targets'].remove(t)
                    graph_config['targets'].append(
                        graphite_func_aggregate(targets, sum_by, "sumSeries"))

            for target in graph_config['targets']:
                # Now that any summing is done, we look at aggregating by
                # averaging because avg(foo+bar+baz) is more efficient
                # than avg(foo)+avg(bar)+avg(baz)
                # aggregate targets (whether those are sums or regular ones)
                avg_id = target.get_agg_key(avg_by)
                if avg_id:
                    if avg_id not in graph_config['targets_avg_candidates']:
                        graph_config['targets_avg_candidates'][avg_id] = []
                    graph_config['targets_avg_candidates'][avg_id].append(target)

            for (avg_id, targets) in graph_config['targets_avg_candidates'].items():
                if len(targets) > 1:
                    for t in targets:
                        graph_config['targets'].remove(t)
                    graph_config['targets'].append(
                        graphite_func_aggregate(targets, avg_by, "averageSeries"))

    # remove targets/graphs over the limit
    graphs = graphs_limit_targets(graphs, query['limit_targets'])

    # Apply target modifiers (like movingAverage, summarize, ...)
    for (graph_key, graph_config) in graphs.items():
        for target in graph_config['targets']:
            for target_modifier in query['target_modifiers']:
                target_modifier(target, graph_config)

    # if in a graph all targets have a tag with the same value, they are
    # effectively constants, so promote them.  this makes the display of the
    # graphs less rendundant and makes it easier to do config/preferences
    # on a per-graph basis.
    for (graph_key, graph_config) in graphs.items():
        # get all variable tags throughout all targets in this graph
        tags_seen = set()
        for target in graph_config['targets']:
            for tag_name in target['variables'].keys():
                tags_seen.add(tag_name)

        # find effective constants from those variables,
        # and effective variables. (unset tag is a value too)
        first_values_seen = {}
        effective_variables = set()  # tags for which we've seen >1 values
        for target in graph_config['targets']:
            for tag_name in tags_seen:
                # already known that we can't promote, continue
                if tag_name in effective_variables:
                    continue
                tag_value = target['variables'].get(tag_name, None)
                if tag_name not in first_values_seen:
                    first_values_seen[tag_name] = tag_value
                elif tag_value != first_values_seen[tag_name]:
                    effective_variables.add(tag_name)
        effective_constants = tags_seen - effective_variables

        # promote the effective_constants by adjusting graph and targets:
        graph_config['promoted_constants'] = {}
        for tag_name in effective_constants:
            graph_config['promoted_constants'][tag_name] = first_values_seen[tag_name]
            for target in graph_config['targets']:
                target['variables'].pop(tag_name, None)

        # now that graph config is "rich", merge in settings from preferences
        constants = dict(graph_config['constants'].items() + graph_config['promoted_constants'].items())
        for graph_option in get_action_on_rules_match(preferences.graph_options, constants):
            if isinstance(graph_option, dict):
                graph_config.update(graph_option)
            else:
                graph_config = graphs[graph_key] = graph_option(graph_config)

        # but, the query may override some preferences:
        override = {}
        if query['statement'] == 'lines':
            override['state'] = 'lines'
        if query['statement'] == 'stack':
            override['state'] = 'stacked'
        if query['min'] is not None:
            override['yaxis'] = override.get('yaxis', {})
            override['yaxis'].update({'min': convert.parse_str(query['min'])})
        if query['max'] is not None:
            override['yaxis'] = override.get('yaxis', {})
            override['yaxis'].update({'max': convert.parse_str(query['max'])})

        graphs[graph_key].update(override)

    # now that some constants are promoted, we can give the graph more
    # unique keys based on all (original + promoted) constants. this is in
    # line with the meaning of the graph ("all targets with those constant
    # tags"), but more importantly: this fixes cases where some graphs
    # would otherwise have the same key, even though they have a different
    # set of constants, this can manifest itself on dashboard pages where
    # graphs for different queries are shown.
    # note that we can't just compile constants + promoted_constants,
    # part of the original graph key is also set by the group by (which, by
    # means of the bucket patterns doesn't always translate into constants),
    # we solve this by just including the old key.
    new_graphs = {}
    for (graph_key, graph_config) in graphs.items():
        new_key = ','.join('%s=%s' % i for i in graph_config['promoted_constants'].items())
        new_key = '%s__%s' % (graph_key, new_key)
        new_graphs[new_key] = graph_config
    graphs = new_graphs

    return (graphs, query)
Example #3
0
def test_nontrivial_implicit_aggregation():
    # we ultimately want 1 graph with 1 line for each server,
    # irrespective of the values of the other tags (n1 and n2)
    # and even whether or not the metrics have those tags at all.
    query = Query("")
    query['group_by'] = {}
    query['sum_by'] = {'n1': [''], 'n2': ['']}

    targets = {
        # web1 : one with and without n2
        'web1.a.a': {
            'id': 'web1.a.a',
            'tags': {
                'server': 'web1',
                'n1': 'a',
                'n2': 'a'
            }
        },
        'web1.a': {
            'id': 'web1.a',
            'tags': {
                'server': 'web1',
                'n1': 'a',
            }
        },
        # web 2: 2 different values of n2
        'web2.a.a': {
            'id': 'web2.a.a',
            'tags': {
                'server': 'web2',
                'n1': 'a',
                'n2': 'a'
            }
        },
        'web2.a.b': {
            'id': 'web2.a.b',
            'tags': {
                'server': 'web2',
                'n1': 'a',
                'n2': 'b'
            }
        },
        # web3: with and without n2, diff value for n1
        'web3.a.a': {
            'id': 'web3.a.a',
            'tags': {
                'server': 'web3',
                'n1': 'a',
                'n2': 'a'
            }
        },
        'web3.b': {
            'id': 'web3.b',
            'tags': {
                'server': 'web3',
                'n1': 'b'
            }
        }
    }
    from pprint import pprint
    for (k, v) in targets.items():
        v = Target(v)
        v.get_graph_info(group_by={})
        targets[k] = v
    graphs, _query = build_graphs_from_targets(targets, query)
    # TODO: there should be only 1 graph, containing 3 lines, with each 2 targets per server
    # i.e. something like this:
    expected = {
        'targets': {
            {'id': ['web1.a.a', 'web1.a']},
            {'id': ['web2.a.a', 'web2.a.b']},
            {'id': ['web3.a.a', 'web3.b']}
        }
    }

    print "Graphs:"
    for (k, v) in graphs.items():
        print "graph key"
        pprint(k)
        print "val:"
        pprint(v)
    assert expected == graphs
Example #4
0
def test_aggregation():
    preferences = DummyPrefs()
    # note: uneven aggregation: we only want 1 resulting metric,
    query = Query("")
    query['avg_by'] = {'server': ['']}
    query['sum_by'] = {'type': ['']}

    targets = {
        'web1.db': {
            'id': 'web1.db',
            'tags': {
                'server': 'web1',
                'type': 'db',
                'n3': 'foo'
            }
        },
        'web1.php': {
            'id': 'web1.php',
            'tags': {
                'server': 'web1',
                'type': 'php',
                'n3': 'foo'
            }
        },
        'web2.db': {
            'id': 'web2.db',
            'tags': {
                'server': 'web2',
                'type': 'db',
                'n3': 'foo'
            }
        },
        'web2.php': {
            'id': 'web2.php',
            'tags': {
                'server': 'web2',
                'type': 'php',
                'n3': 'foo'
            }
        },
        'web2.memcache': {
            'id': 'web2.memcache',
            'tags': {
                'server': 'web2',
                'type': 'memcache',
                'n3': 'foo'
            }
        }
    }
    from pprint import pprint
    for (k, v) in targets.items():
        v = Target(v)
        v.get_graph_info(group_by={})
        targets[k] = v
    graphs, _query = g.build_from_targets(targets, query, preferences)
    # TODO: there should be only 1 graph, containing all 5 items
    print "Graphs:"
    for (k, v) in graphs.items():
        print "graph key"
        pprint(k)
        print "val:"
        pprint(v)
    assert {} == graphs
Example #5
0
def build_from_targets(targets, query, preferences):
    graphs = {}
    if not targets:
        return (graphs, query)
    group_by = query['group_by']
    sum_by = query['sum_by']
    avg_by = query['avg_by']
    avg_over = query['avg_over']
    # i'm gonna assume you never use second and your datapoints are stored with
    # minutely resolution. later on we can use config options for this (or
    # better: somehow query graphite about it)
    # note, the day/week/month numbers are not technically accurate, but
    # since we're doing movingAvg that's ok
    averaging = {
        'M': 1,
        'h': 60,
        'd': 60 * 24,
        'w': 60 * 24 * 7,
        'mo': 60 * 24 * 30
    }
    if avg_over is not None:
        avg_over_amount = avg_over[0]
        avg_over_unit = avg_over[1]
        if avg_over_unit in averaging.keys():
            multiplier = averaging[avg_over_unit]
            query['target_modifiers'].append(
                Query.graphite_function_applier('movingAverage',
                                                avg_over_amount * multiplier))

    # for each group_by bucket, make 1 graph.
    # so for each graph, we have:
    # the "constants": tags in the group_by
    # the "variables": tags not in the group_by, which can have arbitrary
    # values, or different values from a group_by tag that match the same
    # bucket pattern
    # go through all targets and group them into graphs:
    for _target_id, target_data in sorted(targets.items()):
        # FWIW. has an 'id' which timeserieswidget doesn't care about
        target = Target(target_data)
        target['target'] = target['id']

        (graph_key, constants) = target.get_graph_info(group_by)
        if graph_key not in graphs:
            graph = {'from': query['from'], 'until': query['to']}
            graph.update({'constants': constants, 'targets': []})
            graphs[graph_key] = graph
        graphs[graph_key]['targets'].append(target)

    # ok so now we have a graphs dictionary with a graph for every appropriate
    # combination of group_by tags, and each graph contains all targets that
    # should be shown on it.  but the user may have asked to aggregate certain
    # targets together, by summing and/or averaging across different values of
    # (a) certain tag(s). let's process the aggregations now.
    if (sum_by or avg_by):
        for (graph_key, graph_config) in graphs.items():
            graph_config['targets_sum_candidates'] = {}
            graph_config['targets_avg_candidates'] = {}
            graph_config['normal_targets'] = []

            # process equivalence rules, see further down.
            filter_candidates = {}
            for tag, buckets in sum_by.items():

                # first separate the individuals from the _sum_

                filter_candidates[tag] = {}
                for target in graph_config['targets']:
                    # we can use agg_key to find out if they all have the same values
                    # other than this one particular key
                    key = target.get_agg_key({tag: buckets})
                    if key not in filter_candidates[tag]:
                        filter_candidates[tag][key] = {'individuals': []}
                    if target['tags'].get(tag, '') == '_sum_':
                        filter_candidates[tag][key]['_sum_'] = target
                    else:
                        filter_candidates[tag][key]['individuals'].append(
                            target)

                # for all agg keys that only have the '' bucket,
                # if targets are identical except that some have tag
                # foo={bar,baz,0,quux, ...} and one of them has foo=_sum_ and we're
                # summing by that tag, and we didn't filter on foo,
                # remove all the ones except the sum one

                if len(buckets) == 1 and buckets[0] == '':
                    if not Query.filtered_on(query, tag):
                        for key in filter_candidates[tag].keys():
                            if '_sum_' in filter_candidates[tag][key]:
                                for i in filter_candidates[tag][key][
                                        'individuals']:
                                    graph_config['targets'].remove(i)

                # if we are summing, and we have a filter, and we have individual ones and a _sum_, remove the _sum_
                # irrespective of buckets.  note that this removes the _sum_ target without the user needing to filter it out explicitly
                # this is the only place we do that, but it makes sense.  we wouldn't want users to specify the _sum_ removal explicitly
                # all the time, esp for multiple tag keys
                if Query.filtered_on(query, tag):
                    for key in filter_candidates[tag].keys():
                        if '_sum_' in filter_candidates[tag][key]:
                            graph_config['targets'].remove(
                                filter_candidates[tag][key]['_sum_'])

            for target in graph_config['targets']:
                sum_id = target.get_agg_key(sum_by)
                if sum_id:
                    if sum_id not in graph_config['targets_sum_candidates']:
                        graphs[graph_key]['targets_sum_candidates'][
                            sum_id] = []
                    graph_config['targets_sum_candidates'][sum_id].append(
                        target)

            for (sum_id,
                 targets) in graph_config['targets_sum_candidates'].items():
                if len(targets) > 1:
                    for candidate in targets:
                        graph_config['targets'].remove(candidate)
                    graph_config['targets'].append(
                        t.graphite_func_aggregate(targets, sum_by,
                                                  "sumSeries"))

            for target in graph_config['targets']:
                # Now that any summing is done, we look at aggregating by
                # averaging because avg(foo+bar+baz) is more efficient
                # than avg(foo)+avg(bar)+avg(baz)
                # aggregate targets (whether those are sums or regular ones)
                avg_id = target.get_agg_key(avg_by)
                if avg_id:
                    if avg_id not in graph_config['targets_avg_candidates']:
                        graph_config['targets_avg_candidates'][avg_id] = []
                    graph_config['targets_avg_candidates'][avg_id].append(
                        target)

            for (avg_id,
                 targets) in graph_config['targets_avg_candidates'].items():
                if len(targets) > 1:
                    for candidate in targets:
                        graph_config['targets'].remove(candidate)
                    graph_config['targets'].append(
                        t.graphite_func_aggregate(targets, avg_by,
                                                  "averageSeries"))

    # remove targets/graphs over the limit
    graphs = limit_targets(graphs, query['limit_targets'])

    # Apply target modifiers (like movingAverage, summarize, ...)
    for (graph_key, graph_config) in graphs.items():
        for target in graph_config['targets']:
            for target_modifier in query['target_modifiers']:
                target_modifier(target, graph_config)

    # if in a graph all targets have a tag with the same value, they are
    # effectively constants, so promote them.  this makes the display of the
    # graphs less rendundant and makes it easier to do config/preferences
    # on a per-graph basis.
    for (graph_key, graph_config) in graphs.items():
        # get all variable tags throughout all targets in this graph
        tags_seen = set()
        for target in graph_config['targets']:
            for tag_name in target['variables'].keys():
                tags_seen.add(tag_name)

        # find effective constants from those variables,
        # and effective variables. (unset tag is a value too)
        first_values_seen = {}
        effective_variables = set()  # tags for which we've seen >1 values
        for target in graph_config['targets']:
            for tag_name in tags_seen:
                # already known that we can't promote, continue
                if tag_name in effective_variables:
                    continue
                tag_value = target['variables'].get(tag_name, None)
                if tag_name not in first_values_seen:
                    first_values_seen[tag_name] = tag_value
                elif tag_value != first_values_seen[tag_name]:
                    effective_variables.add(tag_name)
        effective_constants = tags_seen - effective_variables

        # promote the effective_constants by adjusting graph and targets:
        graph_config['promoted_constants'] = {}
        for tag_name in effective_constants:
            graph_config['promoted_constants'][tag_name] = first_values_seen[
                tag_name]
            for target in graph_config['targets']:
                target['variables'].pop(tag_name, None)

        # now that graph config is "rich", merge in settings from preferences
        constants = dict(graph_config['constants'].items() +
                         graph_config['promoted_constants'].items())
        for graph_option in get_action_on_rules_match(
                preferences.graph_options, constants):
            if isinstance(graph_option, dict):
                graph_config.update(graph_option)
            else:
                graph_config = graphs[graph_key] = graph_option(graph_config)

        # but, the query may override some preferences:
        override = {}
        if query['statement'] == 'lines':
            override['state'] = 'lines'
        if query['statement'] == 'stack':
            override['state'] = 'stacked'
        if query['min'] is not None:
            override['yaxis'] = override.get('yaxis', {})
            override['yaxis'].update({'min': convert.parse_str(query['min'])})
        if query['max'] is not None:
            override['yaxis'] = override.get('yaxis', {})
            override['yaxis'].update({'max': convert.parse_str(query['max'])})

        graphs[graph_key].update(override)

    # now that some constants are promoted, we can give the graph more
    # unique keys based on all (original + promoted) constants. this is in
    # line with the meaning of the graph ("all targets with those constant
    # tags"), but more importantly: this fixes cases where some graphs
    # would otherwise have the same key, even though they have a different
    # set of constants, this can manifest itself on dashboard pages where
    # graphs for different queries are shown.
    # note that we can't just compile constants + promoted_constants,
    # part of the original graph key is also set by the group by (which, by
    # means of the bucket patterns doesn't always translate into constants),
    # we solve this by just including the old key.
    new_graphs = {}
    for (graph_key, graph_config) in graphs.items():
        new_key = ','.join('%s=%s' % i
                           for i in graph_config['promoted_constants'].items())
        new_key = '%s__%s' % (graph_key, new_key)
        new_graphs[new_key] = graph_config
    graphs = new_graphs

    return (graphs, query)