def eval_table_Join(self, table_expr): base_context = self.evaluate_table_expr(table_expr.base) rhs_tables, join_types = zip(*table_expr.tables) other_contexts = map(self.evaluate_table_expr, rhs_tables) lhs_context = base_context for rhs_context, join_type, conditions in zip(other_contexts, join_types, table_expr.conditions): if join_type is tq_ast.JoinType.CROSS: lhs_context = context.cross_join_contexts( lhs_context, rhs_context) continue # We reordered the join conditions in the compilation step, so # column1 always refers to the lhs of the current join. lhs_key_refs = [cond.column1 for cond in conditions] rhs_key_refs = [cond.column2 for cond in conditions] rhs_key_contexts = {} for i in xrange(rhs_context.num_rows): rhs_key = self.get_join_key(rhs_context, rhs_key_refs, i) if rhs_key not in rhs_key_contexts: rhs_key_contexts[rhs_key] = ( context.empty_context_from_template(rhs_context)) context.append_row_to_context( src_context=rhs_context, index=i, dest_context=rhs_key_contexts[rhs_key]) result_context = context.cross_join_contexts( context.empty_context_from_template(lhs_context), context.empty_context_from_template(rhs_context)) for i in xrange(lhs_context.num_rows): lhs_key = self.get_join_key(lhs_context, lhs_key_refs, i) lhs_row_context = context.row_context_from_context( lhs_context, i) if lhs_key in rhs_key_contexts: new_rows = context.cross_join_contexts( lhs_row_context, rhs_key_contexts[lhs_key]) context.append_context_to_context(new_rows, result_context) elif join_type is tq_ast.JoinType.LEFT_OUTER: # For a left outer join, we still want to in a row with # nulls on the right. context.append_context_to_context(lhs_row_context, result_context) lhs_context = result_context return lhs_context
def evaluate_within(self, select_fields, group_set, ctx, within_clause): """Evaluate a list of select fields, one of which has a WITHIN or WITHIN RECORD clause and/or grouping by some of the values. Arguments: select_fields: A list of SelectField instances to evaluate. group_set: The groups (either fields in select_context or aliases referring to an element of select_fields) to group by. ctx: The "source" context that the expressions can access when being evaluated. Returns: A context with the results. """ if within_clause == "RECORD": # Add an extra column of row number over which the grouping # will be done. ctx_with_primary_key = context.empty_context_from_template(ctx) context.append_context_to_context(ctx, ctx_with_primary_key) (table_name, _), _ = ctx_with_primary_key.columns.items()[0] row_nums = range(1, ctx_with_primary_key.num_rows + 1) row_nums_col = context.Column(type=tq_types.INT, mode=tq_modes.NULLABLE, values=row_nums) ctx_with_primary_key.columns[( table_name, 'row_numbers_column_primary_key')] = row_nums_col group_set.field_groups.append( typed_ast.ColumnRef(table_name, 'row_numbers_column_primary_key', tq_types.INT)) if len(select_fields) > 1: # TODO: Implement WITHIN RECORD when one or more of the # selected fields (except the one in the WITHIN RECORD # clause) has mode = REPEATED. for select_field in select_fields: if select_field.within_clause is None: if select_field.expr.mode != tq_modes.REPEATED: group_set.alias_groups.add(select_field.alias) else: raise NotImplementedError( 'Cannot select fields having mode=REPEATED ' 'for queries involving WITHIN RECORD') # TODO: Implement for WITHIN clause typed_ast.TRIVIAL_GROUP_SET = typed_ast.GroupSet(set(), []) return self.evaluate_groups(select_fields, group_set, ctx_with_primary_key)
def eval_table_Join(self, table_expr): result_context_1 = self.evaluate_table_expr(table_expr.table1) result_context_2 = self.evaluate_table_expr(table_expr.table2) table_1_key_refs = [cond.column1 for cond in table_expr.conditions] table_2_key_refs = [cond.column2 for cond in table_expr.conditions] # Build a map from table 2 key to value. table_2_key_contexts = {} for i in xrange(result_context_2.num_rows): key = self.get_join_key(result_context_2, table_2_key_refs, i) if key not in table_2_key_contexts: new_group_context = context.empty_context_from_template( result_context_2) table_2_key_contexts[key] = new_group_context context.append_row_to_context( src_context=result_context_2, index=i, dest_context=table_2_key_contexts[key]) result_context = context.cross_join_contexts( context.empty_context_from_template(result_context_1), context.empty_context_from_template(result_context_2), ) for i in xrange(result_context_1.num_rows): key = self.get_join_key(result_context_1, table_1_key_refs, i) if key not in table_2_key_contexts: # Left outer join means that if we didn't find something, we # still put in a row with nulls on the right. if table_expr.is_left_outer: row_context = context.row_context_from_context( result_context_1, i) context.append_context_to_context(row_context, result_context) continue row_context = context.row_context_from_context(result_context_1, i) new_rows = context.cross_join_contexts(row_context, table_2_key_contexts[key]) context.append_context_to_context(new_rows, result_context) return result_context