def get_group_key(self, field_groups, alias_groups, select_context, alias_group_result_context, index): """Computes a singleton context with the values for a group key. The evaluation has already been done; this method just selects the values out of the right contexts. Arguments: field_groups: A list of ColumnRefs for the field groups to use. alias_groups: A list of strings of alias groups to use. select_context: A context with the data for the table expression being selected from. alias_group_result_context: A context with the data for the grouped-by select fields. index: The row index to use from each context. """ result_columns = collections.OrderedDict() for field_group in field_groups: column_key = (field_group.table, field_group.column) source_column = select_context.columns[column_key] result_columns[column_key] = context.Column( # TODO(Samantha): This shouldn't just be nullable. type=source_column.type, mode=tq_modes.NULLABLE, values=[source_column.values[index]]) for alias_group in alias_groups: column_key = (None, alias_group) source_column = alias_group_result_context.columns[column_key] result_columns[column_key] = context.Column( # TODO(Samantha): This shouldn't just be nullable. type=source_column.type, mode=tq_modes.NULLABLE, values=[source_column.values[index]]) return context.Context(1, result_columns, None)
def empty_context_from_select_fields(self, select_fields): return context.Context( 0, collections.OrderedDict(( (None, select_field.alias), # TODO(Samantha): This shouldn't just be nullable context.Column(type=select_field.expr.type, mode=tq_modes.NULLABLE, values=[])) for select_field in select_fields), None)
def evaluate_select_fields(self, select_fields, ctx): """Evaluate a table result given the data the fields have access to. Arguments: select_fields: A list of typed_ast.SelectField values to evaluate. context: The "source" context that the expressions can access when being evaluated. """ return context.Context( ctx.num_rows, collections.OrderedDict( self.evaluate_select_field(select_field, ctx) for select_field in select_fields), None)
def merge_contexts_for_select_fields(self, col_names, context1, context2): """Build a context that combines columns of two contexts. The col_names argument is a list of strings that specifies the order of the columns in the result. Note that not every column must be used, and columns in context1 take precedence over context2 (this happens in practice with non-alias groups that are part of the group key). """ assert context1.num_rows == context2.num_rows assert context1.aggregate_context is None assert context2.aggregate_context is None # Select fields always have the None table. col_keys = [(None, col_name) for col_name in col_names] columns1, columns2 = context1.columns, context2.columns return context.Context(context1.num_rows, collections.OrderedDict( (col_key, columns1.get(col_key) or columns2[col_key]) for col_key in col_keys ), None)
def evaluate_groups(self, select_fields, group_set, select_context): """Evaluate a list of select fields, grouping by some of the values. Arguments: select_fields: A list of SelectField instances to evaluate. group_set: The groups (either fields in select_context or aliases referring to an element of select_fields) to group by. select_context: A context with the data that the select statement has access to. Returns: A context with the results. """ # TODO: Implement GROUP BY for repeated fields. field_groups = group_set.field_groups alias_groups = group_set.alias_groups alias_group_list = sorted(alias_groups) group_key_select_fields = [ f for f in select_fields if f.alias in alias_groups ] aggregate_select_fields = [ f for f in select_fields if f.alias not in alias_groups ] alias_group_result_context = self.evaluate_select_fields( group_key_select_fields, select_context) # Dictionary mapping (singleton) group key context to the context of # values for that key. from collections import OrderedDict group_contexts = OrderedDict() # As a special case, we check if we are grouping by nothing (in other # words, if the query had an aggregate without any explicit GROUP BY). # Normally, it's fine to just use the trivial group set: every row maps # to the empty tuple, so we have a single aggregation over the entire # table. However, if the table is empty, we still want to aggregate # over the empty table and return a single row, so this is the one case # where it's possible to have a group with no rows in it. To make this # case work, we ensure that the trivial group key (the empty tuple) # always shows up for the TRIVIAL_GROUP_SET case. # In the long run, it might be cleaner to view TRIVIAL_GROUP_SET as a # completely separate case, but this approach should work. if group_set == typed_ast.TRIVIAL_GROUP_SET: trivial_ctx = context.Context(1, collections.OrderedDict(), None) group_contexts[trivial_ctx] = ( context.empty_context_from_template(select_context)) # TODO: Seems pretty ugly and wasteful to use a whole context as a # group key. for i in six.moves.xrange(select_context.num_rows): key = self.get_group_key(field_groups, alias_group_list, select_context, alias_group_result_context, i) if key not in group_contexts: new_group_context = context.empty_context_from_template( select_context) group_contexts[key] = new_group_context group_context = group_contexts[key] context.append_row_to_context(src_context=select_context, index=i, dest_context=group_context) result_context = self.empty_context_from_select_fields(select_fields) result_col_names = [field.alias for field in select_fields] for context_key, group_context in group_contexts.items(): group_eval_context = context.Context(1, context_key.columns, group_context) group_aggregate_result_context = self.evaluate_select_fields( aggregate_select_fields, group_eval_context) full_result_row_context = self.merge_contexts_for_select_fields( result_col_names, group_aggregate_result_context, context_key) context.append_row_to_context(full_result_row_context, 0, result_context) return result_context
def eval_table_NoTable(self, table_expr): # If the user isn't selecting from any tables, just specify that there # is one column to return and no table accessible. return context.Context(1, collections.OrderedDict(), None)