def parse(self, query_str): avg_over_match = '^([0-9]*)(s|M|h|d|w|mo)$' # for a call like ('foo bar baz quux', 'bar ', 'baz', 'def') # returns ('foo quux', 'baz') or the original query and the default val if no match def parse_val(query_str, predicate_match, value_match, value_default=None): match = re.search('\\b(%s%s)' % (predicate_match, value_match), query_str) value = value_default if match and match.groups() > 0: value = match.groups(1)[0].replace(predicate_match, '') query_str = query_str[:match.start(1)] + query_str[match.end(1):] return (query_str, value) (query_str, self['statement']) = parse_val(query_str, '^', '(graph|list|stack|lines)\\b', self['statement']) self['statement'] = self['statement'].rstrip() (query_str, self['to']) = parse_val(query_str, 'to ', '[^ ]+', self['to']) (query_str, self['from']) = parse_val(query_str, 'from ', '[^ ]+', self['from']) (query_str, group_by_str) = parse_val(query_str, 'GROUP BY ', '[^ ]+') (query_str, extra_group_by_str) = parse_val(query_str, 'group by ', '[^ ]+') (query_str, sum_by_str) = parse_val(query_str, 'sum by ', '[^ ]+') (query_str, avg_by_str) = parse_val(query_str, 'avg by ', '[^ ]+') (query_str, avg_over_str) = parse_val(query_str, 'avg over ', '[^ ]+') (query_str, min_str) = parse_val(query_str, 'min ', '[^ ]+') (query_str, max_str) = parse_val(query_str, 'max ', '[^ ]+') explicit_group_by = {} if group_by_str is not None: explicit_group_by = Query.build_buckets(group_by_str) self['group_by'] = explicit_group_by elif extra_group_by_str is not None: for k in self['group_by'].keys(): if not k.endswith('='): del self['group_by'][k] explicit_group_by = Query.build_buckets(extra_group_by_str) self['group_by'].update(explicit_group_by) if sum_by_str is not None: self['sum_by'] = Query.build_buckets(sum_by_str) if avg_by_str is not None: self['avg_by'] = Query.build_buckets(avg_by_str) if min_str is not None: # check if we can parse the values, but don't actually replace yet # because we want to keep the 'pretty' value for now so we can display # it in the query details section convert.parse_str(min_str) self['min'] = min_str if max_str is not None: convert.parse_str(max_str) self['max'] = max_str # if you specified a tag in avg_by or sum_by that is included in the # default group_by (and you didn't explicitly ask to group by that tag), we # remove it from group by, so that the avg/sum can work properly. for tag in self['sum_by'].keys() + self['avg_by'].keys(): for tag_check in (tag, "%s=" % tag): if tag_check in self['group_by'] and tag_check not in explicit_group_by.keys(): del self['group_by'][tag_check] # doing this sanity check would now be tricky: basically you can have the same keys in more than 1 of sum/avg/group by, # it now depends on the bucket configuration. since i can't wrap my head around it anymore, let's just leave it be for now. # it's up to people to construct sane queries, and if they do a stupid query, then at least GE shouldn't crash or anything. # sum_individual_keys = len(self['group_by']) + len(self['sum_by']) + len(self['avg_by']) # sum_unique_keys = len(set(self['group_by'].keys() + self['sum_by'].keys() + self['avg_by'].keys())) # if sum_individual_keys != sum_unique_keys: # raise Exception("'group by' (%s), 'sum by (%s)' and 'avg by (%s)' " # "cannot list the same tag keys" % # (', '.join(self['group_by'].keys()), # ', '.join(self['sum_by'].keys()), # ', '.join(self['avg_by'].keys()))) if avg_over_str is not None: # avg_over_str should be something like 'h', '10M', etc avg_over = re.match(avg_over_match, avg_over_str) if avg_over is not None: # if None, that's an invalid request. ignore it. TODO error to user avg_over = avg_over.groups() self['avg_over'] = (int(avg_over[0]), avg_over[1]) (query_str, self['limit_targets']) = parse_val(query_str, 'limit ', '[^ ]+', self['limit_targets']) self['limit_targets'] = int(self['limit_targets']) # split query_str into multiple patterns which are all matched independently # this allows you write patterns in any order, and also makes it easy to use negations self['patterns'] += query_str.split()
def build_graphs_from_targets(targets, query={}, target_modifiers=[]): # merge default options.. defaults = { 'group_by': [], 'sum_by': [], 'avg_over': None, 'avg_by': [], 'from': '-24hours', 'to': 'now', 'statement': 'graph', 'limit_targets': 500 } query = dict(defaults.items() + query.items()) graphs = {} if not targets: return (graphs, query) group_by = query['group_by'] sum_by = query['sum_by'] avg_by = query['avg_by'] avg_over = query['avg_over'] # i'm gonna assume you never use second and your datapoints are stored with # minutely resolution. later on we can use config options for this (or # better: somehow query graphite about it) # note, the day/week/month numbers are not technically accurate, but # since we're doing movingAvg that's ok averaging = { 'M': 1, 'h': 60, 'd': 60 * 24, 'w': 60 * 24 * 7, 'mo': 60 * 24 * 30 } if avg_over is not None: avg_over_amount = avg_over[0] avg_over_unit = avg_over[1] if avg_over_unit in averaging.keys(): multiplier = averaging[avg_over_unit] target_modifier = {'target': ['movingAverage', str(avg_over_amount * multiplier)]} target_modifiers.append(target_modifier) # for each combination of values of tags from group_by, make 1 graph with # all targets that have these values. so for each graph, we have: # the "constants": tags in the group_by # the "variables": tags not in the group_by, which can have arbitrary values # go through all targets and group them into graphs: for (i, target_id) in enumerate(sorted(targets.iterkeys())): constants = {} variables = {} target_data = targets[target_id] for (tag_name, tag_value) in target_data['tags'].items(): if tag_name in group_by or '%s=' % tag_name in group_by: constants[tag_name] = tag_value else: variables[tag_name] = tag_value graph_key = '__'.join([target_data['tags'][tag_name] for tag_name in constants]) if graph_key not in graphs: graph = {'from': query['from'], 'until': query['to']} graph.update({'constants': constants, 'targets': []}) graphs[graph_key] = graph target = target_data['id'] # set all options needed for timeserieswidget/flot: t = { 'variables': variables, 'id': target_data['id'], # timeserieswidget doesn't care about this 'target': target } if 'color' in target_data: t['color'] = target_data['color'] graphs[graph_key]['targets'].append(t) # ok so now we have a graphs dictionary with a graph for every approriate # combination of group_by tags, and each graphs contains all targets that # should be shown on it. but the user may have asked to aggregate certain # targets together, by summing and/or averaging across different values of # (a) certain tag(s). let's process the aggregations now. if (sum_by or avg_by): for (graph_key, graph_config) in graphs.items(): graph_config['targets_sum_candidates'] = {} graph_config['targets_avg_candidates'] = {} graph_config['normal_targets'] = [] all_targets = graph_config['targets'][:] # Get a copy. for target in all_targets: # targets that can get summed together with other tags, must # have at least 1 'sum_by' tags in the variables list. # targets that can get summed together must have: # * the same 'sum_by' tag keys (not values, because we # aggregate across different values for these tags) # * the same variables (key and val), except those vals that # are being summed by. # so for every group of sum_by tags and variables we build a # list of targets that can be summed together # of course it only makes sense to sum by tags that the target # actually has, and that are not already constants (meaning # every target in the graph has the same value) variables = target['variables'].keys() sum_constants = set(sum_by).intersection(set(variables)) if sum_constants: sum_constants_str = '_'.join(sorted(sum_constants)) variables_str = '_'.join( ['%s_%s' % (k, target['variables'][k]) for k in sorted(variables) if k not in sum_constants]) sum_id = '%s__%s' % (sum_constants_str, variables_str) if sum_id not in graph_config['targets_sum_candidates']: graphs[graph_key]['targets_sum_candidates'][sum_id] = [] graph_config['targets_sum_candidates'][sum_id].append(target) for (sum_id, targets) in graph_config['targets_sum_candidates'].items(): if len(targets) > 1: for t in targets: all_targets.remove(t) all_targets.append( graphite_func_aggregate(targets, sum_by, "sumSeries")) for target in all_targets: # Now that any summing is done, we look at aggregating by # averaging because avg(foo+bar+baz) is more efficient # than avg(foo)+avg(bar)+avg(baz) # It's pretty similar than what happened above and aggregates # targets (whether those are sums or regular ones) variables = target['variables'].keys() avg_constants = set(avg_by).intersection(set(variables)) if avg_constants: avg_constants_str = '_'.join(sorted(avg_constants)) variables_str = '_'.join( ['%s_%s' % (k, target['variables'][k]) for k in sorted(variables) if k not in avg_constants]) # some values can be like 'sumSeries (8 values)' due to an # earlier aggregation. if now targets have a different amount of # values matched, that doesn't matter and they should still # be aggregated together if the rest of the conditions are met variables_str = re.sub('\([0-9]+ values\)', '(Xvalues)', variables_str) avg_id = '%s__%s' % (avg_constants_str, variables_str) if avg_id not in graph_config['targets_avg_candidates']: graph_config['targets_avg_candidates'][avg_id] = [] graph_config['targets_avg_candidates'][avg_id].append(target) for (avg_id, targets) in graph_config['targets_avg_candidates'].items(): if len(targets) > 1: for t in targets: all_targets.remove(t) all_targets.append( graphite_func_aggregate(targets, avg_by, "averageSeries")) graph_config["targets"] = all_targets # remove targets/graphs over the limit graphs = graphs_limit_targets(graphs, query['limit_targets']) # Apply target modifiers (like movingAverage, summarize, ...) for (graph_key, graph_config) in graphs.items(): for target in graph_config['targets']: for target_modifier in target_modifiers: target['target'] = "%s(%s,%s)" % (target_modifier['target'][0], target['target'], ','.join(target_modifier['target'][1:])) if 'tags' in target_modifier: for (new_k, new_v) in target_modifier['tags'].items(): if new_k in graph_config['constants']: graph_config['constants'][new_k] = new_v else: target['variables'][new_k] = new_v # if in a graph all targets have a tag with the same value, they are # effectively constants, so promote them. this makes the display of the # graphs less rendundant and makes it easier to do config/preferences # on a per-graph basis. for (graph_key, graph_config) in graphs.items(): # get all variable tags throughout all targets in this graph tags_seen = set() for target in graph_config['targets']: for tag_name in target['variables'].keys(): tags_seen.add(tag_name) # find effective constants from those variables, # and effective variables. (unset tag is a value too) first_values_seen = {} effective_variables = set() # tags for which we've seen >1 values for target in graph_config['targets']: for tag_name in tags_seen: # already known that we can't promote, continue if tag_name in effective_variables: continue tag_value = target['variables'].get(tag_name, None) if tag_name not in first_values_seen: first_values_seen[tag_name] = tag_value elif tag_value != first_values_seen[tag_name]: effective_variables.add(tag_name) effective_constants = tags_seen - effective_variables # promote the effective_constants by adjusting graph and targets: graphs[graph_key]['promoted_constants'] = {} for tag_name in effective_constants: graphs[graph_key]['promoted_constants'][tag_name] = first_values_seen[tag_name] for (i, target) in enumerate(graph_config['targets']): if tag_name in graphs[graph_key]['targets'][i]['variables']: del graphs[graph_key]['targets'][i]['variables'][tag_name] # now that graph config is "rich", merge in settings from preferences constants = dict(graphs[graph_key]['constants'].items() + graphs[graph_key]['promoted_constants'].items()) for graph_option in get_action_on_rules_match(preferences.graph_options, constants): if isinstance(graph_option, dict): graphs[graph_key].update(graph_option) else: graphs[graph_key] = graph_option(graphs[graph_key]) # but, the query may override some preferences: override = {} if query['statement'] == 'lines': override['state'] = 'lines' if query['statement'] == 'stack': override['state'] = 'stacked' if query['min'] is not None: override['yaxis'] = override.get('yaxis', {}) override['yaxis'].update({'min': convert.parse_str(query['min'])}) if query['max'] is not None: override['yaxis'] = override.get('yaxis', {}) override['yaxis'].update({'max': convert.parse_str(query['max'])}) graphs[graph_key].update(override) # now that some constants are promoted, we can give the graph more # unique keys based on all (original + promoted) constants. this is in # line with the meaning of the graph ("all targets with those constant # tags"), but more importantly: this fixes cases where some graphs # would otherwise have the same key, even though they have a different # set of constants, this can manifest itself on dashboard pages where # graphs for different queries are shown. new_graphs = {} for (graph_key, graph_config) in graphs.items(): better_graph_key_1 = '__'.join('%s_%s' % i for i in graph_config['constants'].items()) better_graph_key_2 = '__'.join('%s_%s' % i for i in graph_config['promoted_constants'].items()) better_graph_key = '%s___%s' % (better_graph_key_1, better_graph_key_2) new_graphs[better_graph_key] = graph_config graphs = new_graphs return (graphs, query)
def build_graphs_from_targets(targets, query): graphs = {} if not targets: return (graphs, query) group_by = query['group_by'] sum_by = query['sum_by'] avg_by = query['avg_by'] avg_over = query['avg_over'] # i'm gonna assume you never use second and your datapoints are stored with # minutely resolution. later on we can use config options for this (or # better: somehow query graphite about it) # note, the day/week/month numbers are not technically accurate, but # since we're doing movingAvg that's ok averaging = { 'M': 1, 'h': 60, 'd': 60 * 24, 'w': 60 * 24 * 7, 'mo': 60 * 24 * 30 } if avg_over is not None: avg_over_amount = avg_over[0] avg_over_unit = avg_over[1] if avg_over_unit in averaging.keys(): multiplier = averaging[avg_over_unit] query['target_modifiers'].append( Query.graphite_function_applier('movingAverage', avg_over_amount * multiplier)) # for each group_by bucket, make 1 graph. # so for each graph, we have: # the "constants": tags in the group_by # the "variables": tags not in the group_by, which can have arbitrary # values, or different values from a group_by tag that match the same # bucket pattern # go through all targets and group them into graphs: for _target_id, target_data in sorted(targets.items()): # FWIW. has an 'id' which timeserieswidget doesn't care about target = Target(target_data) target['target'] = target['id'] (graph_key, constants) = target.get_graph_info(group_by) if graph_key not in graphs: graph = {'from': query['from'], 'until': query['to']} graph.update({'constants': constants, 'targets': []}) graphs[graph_key] = graph graphs[graph_key]['targets'].append(target) # ok so now we have a graphs dictionary with a graph for every appropriate # combination of group_by tags, and each graph contains all targets that # should be shown on it. but the user may have asked to aggregate certain # targets together, by summing and/or averaging across different values of # (a) certain tag(s). let's process the aggregations now. if (sum_by or avg_by): for (graph_key, graph_config) in graphs.items(): graph_config['targets_sum_candidates'] = {} graph_config['targets_avg_candidates'] = {} graph_config['normal_targets'] = [] for target in graph_config['targets']: sum_id = target.get_agg_key(sum_by) if sum_id: if sum_id not in graph_config['targets_sum_candidates']: graphs[graph_key]['targets_sum_candidates'][sum_id] = [] graph_config['targets_sum_candidates'][sum_id].append(target) for (sum_id, targets) in graph_config['targets_sum_candidates'].items(): if len(targets) > 1: for t in targets: graph_config['targets'].remove(t) graph_config['targets'].append( graphite_func_aggregate(targets, sum_by, "sumSeries")) for target in graph_config['targets']: # Now that any summing is done, we look at aggregating by # averaging because avg(foo+bar+baz) is more efficient # than avg(foo)+avg(bar)+avg(baz) # aggregate targets (whether those are sums or regular ones) avg_id = target.get_agg_key(avg_by) if avg_id: if avg_id not in graph_config['targets_avg_candidates']: graph_config['targets_avg_candidates'][avg_id] = [] graph_config['targets_avg_candidates'][avg_id].append(target) for (avg_id, targets) in graph_config['targets_avg_candidates'].items(): if len(targets) > 1: for t in targets: graph_config['targets'].remove(t) graph_config['targets'].append( graphite_func_aggregate(targets, avg_by, "averageSeries")) # remove targets/graphs over the limit graphs = graphs_limit_targets(graphs, query['limit_targets']) # Apply target modifiers (like movingAverage, summarize, ...) for (graph_key, graph_config) in graphs.items(): for target in graph_config['targets']: for target_modifier in query['target_modifiers']: target_modifier(target, graph_config) # if in a graph all targets have a tag with the same value, they are # effectively constants, so promote them. this makes the display of the # graphs less rendundant and makes it easier to do config/preferences # on a per-graph basis. for (graph_key, graph_config) in graphs.items(): # get all variable tags throughout all targets in this graph tags_seen = set() for target in graph_config['targets']: for tag_name in target['variables'].keys(): tags_seen.add(tag_name) # find effective constants from those variables, # and effective variables. (unset tag is a value too) first_values_seen = {} effective_variables = set() # tags for which we've seen >1 values for target in graph_config['targets']: for tag_name in tags_seen: # already known that we can't promote, continue if tag_name in effective_variables: continue tag_value = target['variables'].get(tag_name, None) if tag_name not in first_values_seen: first_values_seen[tag_name] = tag_value elif tag_value != first_values_seen[tag_name]: effective_variables.add(tag_name) effective_constants = tags_seen - effective_variables # promote the effective_constants by adjusting graph and targets: graph_config['promoted_constants'] = {} for tag_name in effective_constants: graph_config['promoted_constants'][tag_name] = first_values_seen[tag_name] for target in graph_config['targets']: target['variables'].pop(tag_name, None) # now that graph config is "rich", merge in settings from preferences constants = dict(graph_config['constants'].items() + graph_config['promoted_constants'].items()) for graph_option in get_action_on_rules_match(preferences.graph_options, constants): if isinstance(graph_option, dict): graph_config.update(graph_option) else: graph_config = graphs[graph_key] = graph_option(graph_config) # but, the query may override some preferences: override = {} if query['statement'] == 'lines': override['state'] = 'lines' if query['statement'] == 'stack': override['state'] = 'stacked' if query['min'] is not None: override['yaxis'] = override.get('yaxis', {}) override['yaxis'].update({'min': convert.parse_str(query['min'])}) if query['max'] is not None: override['yaxis'] = override.get('yaxis', {}) override['yaxis'].update({'max': convert.parse_str(query['max'])}) graphs[graph_key].update(override) # now that some constants are promoted, we can give the graph more # unique keys based on all (original + promoted) constants. this is in # line with the meaning of the graph ("all targets with those constant # tags"), but more importantly: this fixes cases where some graphs # would otherwise have the same key, even though they have a different # set of constants, this can manifest itself on dashboard pages where # graphs for different queries are shown. # note that we can't just compile constants + promoted_constants, # part of the original graph key is also set by the group by (which, by # means of the bucket patterns doesn't always translate into constants), # we solve this by just including the old key. new_graphs = {} for (graph_key, graph_config) in graphs.items(): new_key = ','.join('%s=%s' % i for i in graph_config['promoted_constants'].items()) new_key = '%s__%s' % (graph_key, new_key) new_graphs[new_key] = graph_config graphs = new_graphs return (graphs, query)
def parse_query(query_str): avg_over_match = '^([0-9]*)(s|M|h|d|w|mo)$' query = { 'patterns': [], 'group_by': ['target_type=', 'unit=', 'server'], 'sum_by': [], 'avg_by': [], 'avg_over': None, 'min': None, 'max': None } # for a call like ('foo bar baz quux', 'bar ', 'baz', 'def') # returns ('foo quux', 'baz') or the original query and the default val if no match def parse_out_value(query_str, predicate_match, value_match, value_default): match = re.search('(%s%s)' % (predicate_match, value_match), query_str) value = value_default if match and match.groups() > 0: value = match.groups(1)[0].replace(predicate_match, '') query_str = query_str[:match.start(1)] + query_str[match.end(1):] return (query_str, value) (query_str, query['statement']) = parse_out_value(query_str, '^', '(graph|list|stack|lines) ', 'graph') query['statement'] = query['statement'].rstrip() (query_str, query['to']) = parse_out_value(query_str, 'to ', '[^ ]+', 'now') (query_str, query['from']) = parse_out_value(query_str, 'from ', '[^ ]+', '-24hours') (query_str, group_by_str) = parse_out_value(query_str, 'GROUP BY ', '[^ ]+', None) (query_str, extra_group_by_str) = parse_out_value(query_str, 'group by ', '[^ ]+', None) (query_str, sum_by_str) = parse_out_value(query_str, 'sum by ', '[^ ]+', None) (query_str, avg_by_str) = parse_out_value(query_str, 'avg by ', '[^ ]+', None) (query_str, avg_over_str) = parse_out_value(query_str, 'avg over ', '[^ ]+', None) (query_str, min_str) = parse_out_value(query_str, 'min ', '[^ ]+', None) (query_str, max_str) = parse_out_value(query_str, 'max ', '[^ ]+', None) explicit_group_by = [] if group_by_str is not None: explicit_group_by = group_by_str.split(',') query['group_by'] = explicit_group_by elif extra_group_by_str is not None: explicit_group_by = extra_group_by_str.split(',') query['group_by'] = [ tag for tag in query['group_by'] if tag.endswith('=') ] query['group_by'].extend(explicit_group_by) if sum_by_str is not None: query['sum_by'] = sum_by_str.split(',') if avg_by_str is not None: query['avg_by'] = avg_by_str.split(',') if min_str is not None: # check if we can parse the values, but don't actually replace yet # because we want to keep the 'pretty' value for now so we can display # it in the query details section convert.parse_str(min_str) query['min'] = min_str if max_str is not None: convert.parse_str(max_str) query['max'] = max_str # if you specified a tag in avg_by or sum_by that is included in the # default group_by (and you didn't explicitly ask to group by that tag), we # remove it from group by, so that the avg/sum can work properly. for tag in query['sum_by'] + query['avg_by']: for tag_check in (tag, "%s=" % tag): if tag_check in query[ 'group_by'] and tag_check not in explicit_group_by: query['group_by'].remove(tag_check) if len(query['group_by']) + len(query['sum_by']) + len( query['avg_by']) != len( set(query['group_by'] + query['sum_by'] + query['avg_by'])): raise Exception( "'group by' (%s), 'sum by (%s)' and 'avg by (%s)' cannot list the same tag keys" % (', '.join(query['group_by']), ', '.join( query['sum_by']), ', '.join(query['avg_by']))) if avg_over_str is not None: # avg_over_str should be something like 'h', '10M', etc avg_over = re.match(avg_over_match, avg_over_str) if avg_over is not None: # if None, that's an invalid request. ignore it. TODO error to user avg_over = avg_over.groups() query['avg_over'] = (int(avg_over[0]), avg_over[1]) for tag in query['group_by']: if tag.endswith('='): query['patterns'].append(tag) (query_str, query['limit_targets']) = parse_out_value(query_str, 'limit ', '[^ ]+', 500) # split query_str into multiple patterns which are all matched independently # this allows you write patterns in any order, and also makes it easy to use negations query['patterns'] += query_str.split() return query
def parse_query(query_str): avg_over_match = '^([0-9]*)(s|M|h|d|w|mo)$' query = { 'patterns': [], 'group_by': ['target_type=', 'unit=', 'server'], 'sum_by': [], 'avg_by': [], 'avg_over': None, 'min': None, 'max': None } # for a call like ('foo bar baz quux', 'bar ', 'baz', 'def') # returns ('foo quux', 'baz') or the original query and the default val if no match def parse_out_value(query_str, predicate_match, value_match, value_default): match = re.search('(%s%s)' % (predicate_match, value_match), query_str) value = value_default if match and match.groups() > 0: value = match.groups(1)[0].replace(predicate_match, '') query_str = query_str[:match.start(1)] + query_str[match.end(1):] return (query_str, value) (query_str, query['statement']) = parse_out_value(query_str, '^', '(graph|list|stack|lines) ', 'graph') query['statement'] = query['statement'].rstrip() (query_str, query['to']) = parse_out_value(query_str, 'to ', '[^ ]+', 'now') (query_str, query['from']) = parse_out_value(query_str, 'from ', '[^ ]+', '-24hours') (query_str, group_by_str) = parse_out_value(query_str, 'GROUP BY ', '[^ ]+', None) (query_str, extra_group_by_str) = parse_out_value(query_str, 'group by ', '[^ ]+', None) (query_str, sum_by_str) = parse_out_value(query_str, 'sum by ', '[^ ]+', None) (query_str, avg_by_str) = parse_out_value(query_str, 'avg by ', '[^ ]+', None) (query_str, avg_over_str) = parse_out_value(query_str, 'avg over ', '[^ ]+', None) (query_str, min_str) = parse_out_value(query_str, 'min ', '[^ ]+', None) (query_str, max_str) = parse_out_value(query_str, 'max ', '[^ ]+', None) explicit_group_by = [] if group_by_str is not None: explicit_group_by = group_by_str.split(',') query['group_by'] = explicit_group_by elif extra_group_by_str is not None: explicit_group_by = extra_group_by_str.split(',') query['group_by'] = [tag for tag in query['group_by'] if tag.endswith('=')] query['group_by'].extend(explicit_group_by) if sum_by_str is not None: query['sum_by'] = sum_by_str.split(',') if avg_by_str is not None: query['avg_by'] = avg_by_str.split(',') if min_str is not None: # check if we can parse the values, but don't actually replace yet # because we want to keep the 'pretty' value for now so we can display # it in the query details section convert.parse_str(min_str) query['min'] = min_str if max_str is not None: convert.parse_str(max_str) query['max'] = max_str # if you specified a tag in avg_by or sum_by that is included in the # default group_by (and you didn't explicitly ask to group by that tag), we # remove it from group by, so that the avg/sum can work properly. for tag in query['sum_by'] + query['avg_by']: for tag_check in (tag, "%s=" % tag): if tag_check in query['group_by'] and tag_check not in explicit_group_by: query['group_by'].remove(tag_check) if len(query['group_by']) + len(query['sum_by']) + len(query['avg_by']) != len(set(query['group_by'] + query['sum_by'] + query['avg_by'])): raise Exception("'group by' (%s), 'sum by (%s)' and 'avg by (%s)' cannot list the same tag keys" % (', '.join(query['group_by']), ', '.join(query['sum_by']), ', '.join(query['avg_by']))) if avg_over_str is not None: # avg_over_str should be something like 'h', '10M', etc avg_over = re.match(avg_over_match, avg_over_str) if avg_over is not None: # if None, that's an invalid request. ignore it. TODO error to user avg_over = avg_over.groups() query['avg_over'] = (int(avg_over[0]), avg_over[1]) for tag in query['group_by']: if tag.endswith('='): query['patterns'].append(tag) (query_str, query['limit_targets']) = parse_out_value(query_str, 'limit ', '[^ ]+', 500) # split query_str into multiple patterns which are all matched independently # this allows you write patterns in any order, and also makes it easy to use negations query['patterns'] += query_str.split() return query
def parse(self, query_str): avg_over_match = '^([0-9]*)(s|M|h|d|w|mo)$' # for a call like ('foo bar baz quux', 'bar ', 'baz', 'def') # returns ('foo quux', 'baz') or the original query and the default val if no match def parse_val(query_str, predicate_match, value_match, value_default=None): match = re.search('\\b(%s%s)' % (predicate_match, value_match), query_str) value = value_default if match and match.groups() > 0: value = match.groups(1)[0].replace(predicate_match, '') query_str = query_str[:match.start(1)] + query_str[match.end(1):] return (query_str, value) if '||' in query_str: (query_str, _, self['events_query']) = query_str.partition('||') (query_str, self['statement']) = parse_val(query_str, '^', '(graph|list|stack|lines)\\b', self['statement']) self['statement'] = self['statement'].rstrip() (query_str, self['to']) = parse_val(query_str, 'to ', '[^ ]+', self['to']) (query_str, self['from']) = parse_val(query_str, 'from ', '[^ ]+', self['from']) (query_str, group_by_str) = parse_val(query_str, 'GROUP BY ', '[^ ]+') (query_str, extra_group_by_str) = parse_val(query_str, 'group by ', '[^ ]+') (query_str, sum_by_str) = parse_val(query_str, 'sum by ', '[^ ]+') (query_str, avg_by_str) = parse_val(query_str, 'avg by ', '[^ ]+') (query_str, avg_over_str) = parse_val(query_str, 'avg over ', '[^ ]+') (query_str, min_str) = parse_val(query_str, 'min ', '[^ ]+') (query_str, max_str) = parse_val(query_str, 'max ', '[^ ]+') explicit_group_by = {} if group_by_str is not None: explicit_group_by = Query.build_buckets(group_by_str) self['group_by'] = explicit_group_by elif extra_group_by_str is not None: for k in self['group_by'].keys(): if not k.endswith('='): del self['group_by'][k] explicit_group_by = Query.build_buckets(extra_group_by_str) self['group_by'].update(explicit_group_by) if sum_by_str is not None: self['sum_by'] = Query.build_buckets(sum_by_str) if avg_by_str is not None: self['avg_by'] = Query.build_buckets(avg_by_str) if min_str is not None: # check if we can parse the values, but don't actually replace yet # because we want to keep the 'pretty' value for now so we can display # it in the query details section convert.parse_str(min_str) self['min'] = min_str if max_str is not None: convert.parse_str(max_str) self['max'] = max_str # if you specified a tag in avg_by or sum_by that is included in the # default group_by (and you didn't explicitly ask to group by that tag), we # remove it from group by, so that the avg/sum can work properly. for tag in self['sum_by'].keys() + self['avg_by'].keys(): for tag_check in (tag, "%s=" % tag): if tag_check in self['group_by'] and tag_check not in explicit_group_by.keys(): del self['group_by'][tag_check] # doing this sanity check would now be tricky: basically you can have the same keys in more than 1 of sum/avg/group by, # it now depends on the bucket configuration. since i can't wrap my head around it anymore, let's just leave it be for now. # it's up to people to construct sane queries, and if they do a stupid query, then at least GE shouldn't crash or anything. # sum_individual_keys = len(self['group_by']) + len(self['sum_by']) + len(self['avg_by']) # sum_unique_keys = len(set(self['group_by'].keys() + self['sum_by'].keys() + self['avg_by'].keys())) # if sum_individual_keys != sum_unique_keys: # raise Exception("'group by' (%s), 'sum by (%s)' and 'avg by (%s)' " # "cannot list the same tag keys" % # (', '.join(self['group_by'].keys()), # ', '.join(self['sum_by'].keys()), # ', '.join(self['avg_by'].keys()))) if avg_over_str is not None: # avg_over_str should be something like 'h', '10M', etc avg_over = re.match(avg_over_match, avg_over_str) if avg_over is not None: # if None, that's an invalid request. ignore it. TODO error to user avg_over = avg_over.groups() self['avg_over'] = (int(avg_over[0]), avg_over[1]) (query_str, self['limit_targets']) = parse_val(query_str, 'limit ', '[^ ]+', self['limit_targets']) self['limit_targets'] = int(self['limit_targets']) # split query_str into multiple patterns which are all matched independently # this allows you write patterns in any order, and also makes it easy to use negations self['patterns'] += query_str.split()
def build_from_targets(targets, query, preferences): graphs = {} if not targets: return (graphs, query) group_by = query['group_by'] sum_by = query['sum_by'] avg_by = query['avg_by'] avg_over = query['avg_over'] # i'm gonna assume you never use second and your datapoints are stored with # minutely resolution. later on we can use config options for this (or # better: somehow query graphite about it) # note, the day/week/month numbers are not technically accurate, but # since we're doing movingAvg that's ok averaging = { 'M': 1, 'h': 60, 'd': 60 * 24, 'w': 60 * 24 * 7, 'mo': 60 * 24 * 30 } if avg_over is not None: avg_over_amount = avg_over[0] avg_over_unit = avg_over[1] if avg_over_unit in averaging.keys(): multiplier = averaging[avg_over_unit] query['target_modifiers'].append( Query.graphite_function_applier('movingAverage', avg_over_amount * multiplier)) # for each group_by bucket, make 1 graph. # so for each graph, we have: # the "constants": tags in the group_by # the "variables": tags not in the group_by, which can have arbitrary # values, or different values from a group_by tag that match the same # bucket pattern # go through all targets and group them into graphs: for _target_id, target_data in sorted(targets.items()): # FWIW. has an 'id' which timeserieswidget doesn't care about target = Target(target_data) target['target'] = target['id'] (graph_key, constants) = target.get_graph_info(group_by) if graph_key not in graphs: graph = {'from': query['from'], 'until': query['to']} graph.update({'constants': constants, 'targets': []}) graphs[graph_key] = graph graphs[graph_key]['targets'].append(target) # ok so now we have a graphs dictionary with a graph for every appropriate # combination of group_by tags, and each graph contains all targets that # should be shown on it. but the user may have asked to aggregate certain # targets together, by summing and/or averaging across different values of # (a) certain tag(s). let's process the aggregations now. if (sum_by or avg_by): for (graph_key, graph_config) in graphs.items(): graph_config['targets_sum_candidates'] = {} graph_config['targets_avg_candidates'] = {} graph_config['normal_targets'] = [] # process equivalence rules, see further down. filter_candidates = {} for tag, buckets in sum_by.items(): # first separate the individuals from the _sum_ filter_candidates[tag] = {} for target in graph_config['targets']: # we can use agg_key to find out if they all have the same values # other than this one particular key key = target.get_agg_key({tag: buckets}) if key not in filter_candidates[tag]: filter_candidates[tag][key] = {'individuals': []} if target['tags'].get(tag, '') == '_sum_': filter_candidates[tag][key]['_sum_'] = target else: filter_candidates[tag][key]['individuals'].append( target) # for all agg keys that only have the '' bucket, # if targets are identical except that some have tag # foo={bar,baz,0,quux, ...} and one of them has foo=_sum_ and we're # summing by that tag, and we didn't filter on foo, # remove all the ones except the sum one if len(buckets) == 1 and buckets[0] == '': if not Query.filtered_on(query, tag): for key in filter_candidates[tag].keys(): if '_sum_' in filter_candidates[tag][key]: for i in filter_candidates[tag][key][ 'individuals']: graph_config['targets'].remove(i) # if we are summing, and we have a filter, and we have individual ones and a _sum_, remove the _sum_ # irrespective of buckets. note that this removes the _sum_ target without the user needing to filter it out explicitly # this is the only place we do that, but it makes sense. we wouldn't want users to specify the _sum_ removal explicitly # all the time, esp for multiple tag keys if Query.filtered_on(query, tag): for key in filter_candidates[tag].keys(): if '_sum_' in filter_candidates[tag][key]: graph_config['targets'].remove( filter_candidates[tag][key]['_sum_']) for target in graph_config['targets']: sum_id = target.get_agg_key(sum_by) if sum_id: if sum_id not in graph_config['targets_sum_candidates']: graphs[graph_key]['targets_sum_candidates'][ sum_id] = [] graph_config['targets_sum_candidates'][sum_id].append( target) for (sum_id, targets) in graph_config['targets_sum_candidates'].items(): if len(targets) > 1: for candidate in targets: graph_config['targets'].remove(candidate) graph_config['targets'].append( t.graphite_func_aggregate(targets, sum_by, "sumSeries")) for target in graph_config['targets']: # Now that any summing is done, we look at aggregating by # averaging because avg(foo+bar+baz) is more efficient # than avg(foo)+avg(bar)+avg(baz) # aggregate targets (whether those are sums or regular ones) avg_id = target.get_agg_key(avg_by) if avg_id: if avg_id not in graph_config['targets_avg_candidates']: graph_config['targets_avg_candidates'][avg_id] = [] graph_config['targets_avg_candidates'][avg_id].append( target) for (avg_id, targets) in graph_config['targets_avg_candidates'].items(): if len(targets) > 1: for candidate in targets: graph_config['targets'].remove(candidate) graph_config['targets'].append( t.graphite_func_aggregate(targets, avg_by, "averageSeries")) # remove targets/graphs over the limit graphs = limit_targets(graphs, query['limit_targets']) # Apply target modifiers (like movingAverage, summarize, ...) for (graph_key, graph_config) in graphs.items(): for target in graph_config['targets']: for target_modifier in query['target_modifiers']: target_modifier(target, graph_config) # if in a graph all targets have a tag with the same value, they are # effectively constants, so promote them. this makes the display of the # graphs less rendundant and makes it easier to do config/preferences # on a per-graph basis. for (graph_key, graph_config) in graphs.items(): # get all variable tags throughout all targets in this graph tags_seen = set() for target in graph_config['targets']: for tag_name in target['variables'].keys(): tags_seen.add(tag_name) # find effective constants from those variables, # and effective variables. (unset tag is a value too) first_values_seen = {} effective_variables = set() # tags for which we've seen >1 values for target in graph_config['targets']: for tag_name in tags_seen: # already known that we can't promote, continue if tag_name in effective_variables: continue tag_value = target['variables'].get(tag_name, None) if tag_name not in first_values_seen: first_values_seen[tag_name] = tag_value elif tag_value != first_values_seen[tag_name]: effective_variables.add(tag_name) effective_constants = tags_seen - effective_variables # promote the effective_constants by adjusting graph and targets: graph_config['promoted_constants'] = {} for tag_name in effective_constants: graph_config['promoted_constants'][tag_name] = first_values_seen[ tag_name] for target in graph_config['targets']: target['variables'].pop(tag_name, None) # now that graph config is "rich", merge in settings from preferences constants = dict(graph_config['constants'].items() + graph_config['promoted_constants'].items()) for graph_option in get_action_on_rules_match( preferences.graph_options, constants): if isinstance(graph_option, dict): graph_config.update(graph_option) else: graph_config = graphs[graph_key] = graph_option(graph_config) # but, the query may override some preferences: override = {} if query['statement'] == 'lines': override['state'] = 'lines' if query['statement'] == 'stack': override['state'] = 'stacked' if query['min'] is not None: override['yaxis'] = override.get('yaxis', {}) override['yaxis'].update({'min': convert.parse_str(query['min'])}) if query['max'] is not None: override['yaxis'] = override.get('yaxis', {}) override['yaxis'].update({'max': convert.parse_str(query['max'])}) graphs[graph_key].update(override) # now that some constants are promoted, we can give the graph more # unique keys based on all (original + promoted) constants. this is in # line with the meaning of the graph ("all targets with those constant # tags"), but more importantly: this fixes cases where some graphs # would otherwise have the same key, even though they have a different # set of constants, this can manifest itself on dashboard pages where # graphs for different queries are shown. # note that we can't just compile constants + promoted_constants, # part of the original graph key is also set by the group by (which, by # means of the bucket patterns doesn't always translate into constants), # we solve this by just including the old key. new_graphs = {} for (graph_key, graph_config) in graphs.items(): new_key = ','.join('%s=%s' % i for i in graph_config['promoted_constants'].items()) new_key = '%s__%s' % (graph_key, new_key) new_graphs[new_key] = graph_config graphs = new_graphs return (graphs, query)
def build_graphs_from_targets(targets, query={}, target_modifiers=[]): # merge default options.. defaults = { 'group_by': [], 'sum_by': [], 'avg_over': None, 'avg_by': [], 'from': '-24hours', 'to': 'now', 'statement': 'graph', 'limit_targets': 500 } query = dict(defaults.items() + query.items()) graphs = {} if not targets: return (graphs, query) group_by = query['group_by'] sum_by = query['sum_by'] avg_by = query['avg_by'] avg_over = query['avg_over'] # i'm gonna assume you never use second and your datapoints are stored with # minutely resolution. later on we can use config options for this (or # better: somehow query graphite about it) # note, the day/week/month numbers are not technically accurate, but # since we're doing movingAvg that's ok averaging = { 'M': 1, 'h': 60, 'd': 60 * 24, 'w': 60 * 24 * 7, 'mo': 60 * 24 * 30 } if avg_over is not None: avg_over_amount = avg_over[0] avg_over_unit = avg_over[1] if avg_over_unit in averaging.keys(): multiplier = averaging[avg_over_unit] target_modifier = { 'target': ['movingAverage', str(avg_over_amount * multiplier)] } target_modifiers.append(target_modifier) # for each combination of values of tags from group_by, make 1 graph with # all targets that have these values. so for each graph, we have: # the "constants": tags in the group_by # the "variables": tags not in the group_by, which can have arbitrary values # go through all targets and group them into graphs: for (i, target_id) in enumerate(sorted(targets.iterkeys())): constants = {} variables = {} target_data = targets[target_id] for (tag_name, tag_value) in target_data['tags'].items(): if tag_name in group_by or '%s=' % tag_name in group_by: constants[tag_name] = tag_value else: variables[tag_name] = tag_value graph_key = '__'.join( [target_data['tags'][tag_name] for tag_name in constants]) if graph_key not in graphs: graph = {'from': query['from'], 'until': query['to']} graph.update({'constants': constants, 'targets': []}) graphs[graph_key] = graph target = target_data['id'] # set all options needed for timeserieswidget/flot: t = { 'variables': variables, 'id': target_data['id'], # timeserieswidget doesn't care about this 'target': target } if 'color' in target_data: t['color'] = target_data['color'] graphs[graph_key]['targets'].append(t) # ok so now we have a graphs dictionary with a graph for every approriate # combination of group_by tags, and each graphs contains all targets that # should be shown on it. but the user may have asked to aggregate certain # targets together, by summing and/or averaging across different values of # (a) certain tag(s). let's process the aggregations now. if (sum_by or avg_by): for (graph_key, graph_config) in graphs.items(): graph_config['targets_sum_candidates'] = {} graph_config['targets_avg_candidates'] = {} graph_config['normal_targets'] = [] all_targets = graph_config['targets'][:] # Get a copy. for target in all_targets: # targets that can get summed together with other tags, must # have at least 1 'sum_by' tags in the variables list. # targets that can get summed together must have: # * the same 'sum_by' tag keys (not values, because we # aggregate across different values for these tags) # * the same variables (key and val), except those vals that # are being summed by. # so for every group of sum_by tags and variables we build a # list of targets that can be summed together # of course it only makes sense to sum by tags that the target # actually has, and that are not already constants (meaning # every target in the graph has the same value) variables = target['variables'].keys() sum_constants = set(sum_by).intersection(set(variables)) if sum_constants: sum_constants_str = '_'.join(sorted(sum_constants)) variables_str = '_'.join([ '%s_%s' % (k, target['variables'][k]) for k in sorted(variables) if k not in sum_constants ]) sum_id = '%s__%s' % (sum_constants_str, variables_str) if sum_id not in graph_config['targets_sum_candidates']: graphs[graph_key]['targets_sum_candidates'][ sum_id] = [] graph_config['targets_sum_candidates'][sum_id].append( target) for (sum_id, targets) in graph_config['targets_sum_candidates'].items(): if len(targets) > 1: for t in targets: all_targets.remove(t) all_targets.append( graphite_func_aggregate(targets, sum_by, "sumSeries")) for target in all_targets: # Now that any summing is done, we look at aggregating by # averaging because avg(foo+bar+baz) is more efficient # than avg(foo)+avg(bar)+avg(baz) # It's pretty similar than what happened above and aggregates # targets (whether those are sums or regular ones) variables = target['variables'].keys() avg_constants = set(avg_by).intersection(set(variables)) if avg_constants: avg_constants_str = '_'.join(sorted(avg_constants)) variables_str = '_'.join([ '%s_%s' % (k, target['variables'][k]) for k in sorted(variables) if k not in avg_constants ]) # some values can be like 'sumSeries (8 values)' due to an # earlier aggregation. if now targets have a different amount of # values matched, that doesn't matter and they should still # be aggregated together if the rest of the conditions are met variables_str = re.sub('\([0-9]+ values\)', '(Xvalues)', variables_str) avg_id = '%s__%s' % (avg_constants_str, variables_str) if avg_id not in graph_config['targets_avg_candidates']: graph_config['targets_avg_candidates'][avg_id] = [] graph_config['targets_avg_candidates'][avg_id].append( target) for (avg_id, targets) in graph_config['targets_avg_candidates'].items(): if len(targets) > 1: for t in targets: all_targets.remove(t) all_targets.append( graphite_func_aggregate(targets, avg_by, "averageSeries")) graph_config["targets"] = all_targets # remove targets/graphs over the limit graphs = graphs_limit_targets(graphs, query['limit_targets']) # Apply target modifiers (like movingAverage, summarize, ...) for (graph_key, graph_config) in graphs.items(): for target in graph_config['targets']: for target_modifier in target_modifiers: target['target'] = "%s(%s,%s)" % ( target_modifier['target'][0], target['target'], ','.join( target_modifier['target'][1:])) if 'tags' in target_modifier: for (new_k, new_v) in target_modifier['tags'].items(): if new_k in graph_config['constants']: graph_config['constants'][new_k] = new_v else: target['variables'][new_k] = new_v # if in a graph all targets have a tag with the same value, they are # effectively constants, so promote them. this makes the display of the # graphs less rendundant and makes it easier to do config/preferences # on a per-graph basis. for (graph_key, graph_config) in graphs.items(): # get all variable tags throughout all targets in this graph tags_seen = set() for target in graph_config['targets']: for tag_name in target['variables'].keys(): tags_seen.add(tag_name) # find effective constants from those variables, # and effective variables. (unset tag is a value too) first_values_seen = {} effective_variables = set() # tags for which we've seen >1 values for target in graph_config['targets']: for tag_name in tags_seen: # already known that we can't promote, continue if tag_name in effective_variables: continue tag_value = target['variables'].get(tag_name, None) if tag_name not in first_values_seen: first_values_seen[tag_name] = tag_value elif tag_value != first_values_seen[tag_name]: effective_variables.add(tag_name) effective_constants = tags_seen - effective_variables # promote the effective_constants by adjusting graph and targets: graphs[graph_key]['promoted_constants'] = {} for tag_name in effective_constants: graphs[graph_key]['promoted_constants'][ tag_name] = first_values_seen[tag_name] for (i, target) in enumerate(graph_config['targets']): if tag_name in graphs[graph_key]['targets'][i]['variables']: del graphs[graph_key]['targets'][i]['variables'][tag_name] # now that graph config is "rich", merge in settings from preferences constants = dict(graphs[graph_key]['constants'].items() + graphs[graph_key]['promoted_constants'].items()) for graph_option in get_action_on_rules_match( preferences.graph_options, constants): if isinstance(graph_option, dict): graphs[graph_key].update(graph_option) else: graphs[graph_key] = graph_option(graphs[graph_key]) # but, the query may override some preferences: override = {} if query['statement'] == 'lines': override['state'] = 'lines' if query['statement'] == 'stack': override['state'] = 'stacked' if query['min'] is not None: override['yaxis'] = override.get('yaxis', {}) override['yaxis'].update({'min': convert.parse_str(query['min'])}) if query['max'] is not None: override['yaxis'] = override.get('yaxis', {}) override['yaxis'].update({'max': convert.parse_str(query['max'])}) graphs[graph_key].update(override) # now that some constants are promoted, we can give the graph more # unique keys based on all (original + promoted) constants. this is in # line with the meaning of the graph ("all targets with those constant # tags"), but more importantly: this fixes cases where some graphs # would otherwise have the same key, even though they have a different # set of constants, this can manifest itself on dashboard pages where # graphs for different queries are shown. new_graphs = {} for (graph_key, graph_config) in graphs.items(): better_graph_key_1 = '__'.join( '%s_%s' % i for i in graph_config['constants'].items()) better_graph_key_2 = '__'.join( '%s_%s' % i for i in graph_config['promoted_constants'].items()) better_graph_key = '%s___%s' % (better_graph_key_1, better_graph_key_2) new_graphs[better_graph_key] = graph_config graphs = new_graphs return (graphs, query)