def eval_table_Join(self, table_expr): base_context = self.evaluate_table_expr(table_expr.base) rhs_tables, join_types = zip(*table_expr.tables) other_contexts = map(self.evaluate_table_expr, rhs_tables) lhs_context = base_context for rhs_context, join_type, conditions in zip(other_contexts, join_types, table_expr.conditions): if join_type is tq_ast.JoinType.CROSS: lhs_context = context.cross_join_contexts( lhs_context, rhs_context) continue # We reordered the join conditions in the compilation step, so # column1 always refers to the lhs of the current join. lhs_key_refs = [cond.column1 for cond in conditions] rhs_key_refs = [cond.column2 for cond in conditions] rhs_key_contexts = {} for i in xrange(rhs_context.num_rows): rhs_key = self.get_join_key(rhs_context, rhs_key_refs, i) if rhs_key not in rhs_key_contexts: rhs_key_contexts[rhs_key] = ( context.empty_context_from_template(rhs_context)) context.append_row_to_context( src_context=rhs_context, index=i, dest_context=rhs_key_contexts[rhs_key]) result_context = context.cross_join_contexts( context.empty_context_from_template(lhs_context), context.empty_context_from_template(rhs_context)) for i in xrange(lhs_context.num_rows): lhs_key = self.get_join_key(lhs_context, lhs_key_refs, i) lhs_row_context = context.row_context_from_context( lhs_context, i) if lhs_key in rhs_key_contexts: new_rows = context.cross_join_contexts( lhs_row_context, rhs_key_contexts[lhs_key]) context.append_context_to_context(new_rows, result_context) elif join_type is tq_ast.JoinType.LEFT_OUTER: # For a left outer join, we still want to in a row with # nulls on the right. context.append_context_to_context(lhs_row_context, result_context) lhs_context = result_context return lhs_context
def eval_table_Join(self, table_expr): result_context_1 = self.evaluate_table_expr(table_expr.table1) result_context_2 = self.evaluate_table_expr(table_expr.table2) table_1_key_refs = [cond.column1 for cond in table_expr.conditions] table_2_key_refs = [cond.column2 for cond in table_expr.conditions] # Build a map from table 2 key to value. table_2_key_contexts = {} for i in xrange(result_context_2.num_rows): key = self.get_join_key(result_context_2, table_2_key_refs, i) if key not in table_2_key_contexts: new_group_context = context.empty_context_from_template( result_context_2) table_2_key_contexts[key] = new_group_context context.append_row_to_context( src_context=result_context_2, index=i, dest_context=table_2_key_contexts[key]) result_context = context.cross_join_contexts( context.empty_context_from_template(result_context_1), context.empty_context_from_template(result_context_2), ) for i in xrange(result_context_1.num_rows): key = self.get_join_key(result_context_1, table_1_key_refs, i) if key not in table_2_key_contexts: # Left outer join means that if we didn't find something, we # still put in a row with nulls on the right. if table_expr.is_left_outer: row_context = context.row_context_from_context( result_context_1, i) context.append_context_to_context(row_context, result_context) continue row_context = context.row_context_from_context(result_context_1, i) new_rows = context.cross_join_contexts(row_context, table_2_key_contexts[key]) context.append_context_to_context(new_rows, result_context) return result_context
def evaluate_groups(self, select_fields, group_set, select_context): """Evaluate a list of select fields, grouping by some of the values. Arguments: select_fields: A list of SelectField instances to evaluate. group_set: The groups (either fields in select_context or aliases referring to an element of select_fields) to group by. select_context: A context with the data that the select statement has access to. Returns: A context with the results. """ # TODO: Implement GROUP BY for repeated fields. field_groups = group_set.field_groups alias_groups = group_set.alias_groups alias_group_list = sorted(alias_groups) group_key_select_fields = [ f for f in select_fields if f.alias in alias_groups ] aggregate_select_fields = [ f for f in select_fields if f.alias not in alias_groups ] alias_group_result_context = self.evaluate_select_fields( group_key_select_fields, select_context) # Dictionary mapping (singleton) group key context to the context of # values for that key. from collections import OrderedDict group_contexts = OrderedDict() # As a special case, we check if we are grouping by nothing (in other # words, if the query had an aggregate without any explicit GROUP BY). # Normally, it's fine to just use the trivial group set: every row maps # to the empty tuple, so we have a single aggregation over the entire # table. However, if the table is empty, we still want to aggregate # over the empty table and return a single row, so this is the one case # where it's possible to have a group with no rows in it. To make this # case work, we ensure that the trivial group key (the empty tuple) # always shows up for the TRIVIAL_GROUP_SET case. # In the long run, it might be cleaner to view TRIVIAL_GROUP_SET as a # completely separate case, but this approach should work. if group_set == typed_ast.TRIVIAL_GROUP_SET: trivial_ctx = context.Context(1, collections.OrderedDict(), None) group_contexts[trivial_ctx] = ( context.empty_context_from_template(select_context)) # TODO: Seems pretty ugly and wasteful to use a whole context as a # group key. for i in xrange(select_context.num_rows): key = self.get_group_key(field_groups, alias_group_list, select_context, alias_group_result_context, i) if key not in group_contexts: new_group_context = context.empty_context_from_template( select_context) group_contexts[key] = new_group_context group_context = group_contexts[key] context.append_row_to_context(src_context=select_context, index=i, dest_context=group_context) result_context = self.empty_context_from_select_fields(select_fields) result_col_names = [field.alias for field in select_fields] for context_key, group_context in group_contexts.iteritems(): group_eval_context = context.Context(1, context_key.columns, group_context) group_aggregate_result_context = self.evaluate_select_fields( aggregate_select_fields, group_eval_context) full_result_row_context = self.merge_contexts_for_select_fields( result_col_names, group_aggregate_result_context, context_key) context.append_row_to_context(full_result_row_context, 0, result_context) return result_context
def evaluate_groups(self, select_fields, group_set, select_context): """Evaluate a list of select fields, grouping by some of the values. Arguments: select_fields: A list of SelectField instances to evaluate. group_set: The groups (either fields in select_context or aliases referring to an element of select_fields) to group by. select_context: A context with the data that the select statement has access to. Returns: A context with the results. """ field_groups = group_set.field_groups alias_groups = group_set.alias_groups alias_group_list = sorted(alias_groups) group_key_select_fields = [ f for f in select_fields if f.alias in alias_groups] aggregate_select_fields = [ f for f in select_fields if f.alias not in alias_groups] alias_group_result_context = self.evaluate_select_fields( group_key_select_fields, select_context) # Dictionary mapping (singleton) group key context to the context of # values for that key. group_contexts = {} # As a special case, we check if we are grouping by nothing (in other # words, if the query had an aggregate without any explicit GROUP BY). # Normally, it's fine to just use the trivial group set: every row maps # to the empty tuple, so we have a single aggregation over the entire # table. However, if the table is empty, we still want to aggregate # over the empty table and return a single row, so this is the one case # where it's possible to have a group with no rows in it. To make this # case work, we ensure that the trivial group key (the empty tuple) # always shows up for the TRIVIAL_GROUP_SET case. # In the long run, it might be cleaner to view TRIVIAL_GROUP_SET as a # completely separate case, but this approach should work. if group_set == typed_ast.TRIVIAL_GROUP_SET: trivial_ctx = context.Context(1, collections.OrderedDict(), None) group_contexts[trivial_ctx] = ( context.empty_context_from_template(select_context)) # TODO: Seems pretty ugly and wasteful to use a whole context as a # group key. for i in xrange(select_context.num_rows): key = self.get_group_key( field_groups, alias_group_list, select_context, alias_group_result_context, i) if key not in group_contexts: new_group_context = context.empty_context_from_template( select_context) group_contexts[key] = new_group_context group_context = group_contexts[key] context.append_row_to_context(src_context=select_context, index=i, dest_context=group_context) result_context = self.empty_context_from_select_fields(select_fields) result_col_names = [field.alias for field in select_fields] for context_key, group_context in group_contexts.iteritems(): group_eval_context = context.Context( 1, context_key.columns, group_context) group_aggregate_result_context = self.evaluate_select_fields( aggregate_select_fields, group_eval_context) full_result_row_context = self.merge_contexts_for_select_fields( result_col_names, group_aggregate_result_context, context_key) context.append_row_to_context(full_result_row_context, 0, result_context) return result_context