def test_multi_way_join(self): self.assert_compiled_select( 'SELECT 0 ' 'FROM table1 t1 JOIN table2 t2 ON t1.value = t2.value ' 'LEFT JOIN table3 t3 ON t2.value3 = t3.value', typed_ast.Select( select_fields=[ typed_ast.SelectField( typed_ast.Literal(0, tq_types.INT), 'f0_', None)], table=typed_ast.Join( base=typed_ast.Table('table1', self.make_type_context([ ('t1', 'value', tq_types.INT), ('t1', 'value2', tq_types.INT), ])), tables=[ (typed_ast.Table( 'table2', self.make_type_context([ ('t2', 'value', tq_types.INT), ('t2', 'value3', tq_types.INT), ])), tq_ast.JoinType.INNER), (typed_ast.Table( 'table3', self.make_type_context([ ('t3', 'value', tq_types.INT) ])), tq_ast.JoinType.LEFT_OUTER )], conditions=[ [typed_ast.JoinFields( typed_ast.ColumnRef('t1', 'value', tq_types.INT), typed_ast.ColumnRef('t2', 'value', tq_types.INT) )], [typed_ast.JoinFields( typed_ast.ColumnRef('t2', 'value3', tq_types.INT), typed_ast.ColumnRef('t3', 'value', tq_types.INT) )]], type_ctx=self.make_type_context([ ('t1', 'value', tq_types.INT), ('t1', 'value2', tq_types.INT), ('t2', 'value', tq_types.INT), ('t2', 'value3', tq_types.INT), ('t3', 'value', tq_types.INT), ]) ), where_expr=typed_ast.Literal(True, tq_types.BOOL), group_set=None, having_expr=typed_ast.Literal(True, tq_types.BOOL), orderings=None, limit=None, type_ctx=self.make_type_context( [(None, 'f0_', tq_types.INT)], self.make_type_context([])) ) )
def compile_join_field(expr, join_type): """Compile a single part of the join. This results in a list of one or more join fields, depending on whether or not multiple are ANDed together. """ if join_type is tq_ast.JoinType.CROSS: assert expr is None, ( "Cross joins do not allow join conditions.") return [None] if isinstance(expr, tq_ast.BinaryOperator): if expr.operator == 'and': return list( itertools.chain( compile_join_field(expr.left, join_type), compile_join_field(expr.right, join_type))) elif (expr.operator in ('=', '==') and isinstance(expr.left, tq_ast.ColumnId) and isinstance(expr.right, tq_ast.ColumnId)): # For evaluation, we want the ordering of the columns in # the JoinField to match the ordering of the join, left to # right, but bigquery allows either order. Thus we need to # reorder them if they're reversed. # TODO(colin): better error message if we don't find an # alias? lhs_alias_idx = next( idx for idx, alias in enumerate(aliases) if expr.left.name.startswith(alias + ".")) rhs_alias_idx = next( idx for idx, alias in enumerate(aliases) if expr.right.name.startswith(alias + ".")) left_column_id = self.compile_ColumnId( expr.left, type_contexts[lhs_alias_idx]) right_column_id = self.compile_ColumnId( expr.right, type_contexts[rhs_alias_idx]) if lhs_alias_idx < rhs_alias_idx: return [ typed_ast.JoinFields(left_column_id, right_column_id) ] elif rhs_alias_idx < lhs_alias_idx: return [ typed_ast.JoinFields(right_column_id, left_column_id) ] # Fall through to the error case if the aliases are the # same for both sides. raise CompileError('JOIN conditions must consist of an AND of = ' 'comparisons between two field on distinct ' 'tables. Got expression %s' % expr)
def compile_join_fields(self, type_ctx1, type_ctx2, alias1, alias2, expr): """Traverse a join condition to find the joined fields. Arguments: type_ctx1: A TypeContext for the first table being joined. type_ctx2: A TypeContext for the second table being joined. alias1: The alias for the first table. alias2: The alias for the second table. expr: An uncompiled tq_ast expression to traverse. Returns: A list of JoinFields instances for the expression. """ if isinstance(expr, tq_ast.BinaryOperator): if expr.operator == 'and': return (self.compile_join_fields(type_ctx1, type_ctx2, alias1, alias2, expr.left) + self.compile_join_fields(type_ctx1, type_ctx2, alias1, alias2, expr.right)) elif (expr.operator == '=' and isinstance(expr.left, tq_ast.ColumnId) and isinstance(expr.right, tq_ast.ColumnId)): column_id1, column_id2 = expr.left, expr.right # By default, the left side of the equality corresponds to the # left side of the join, but this can be overridden if any # aliases suggest that the reverse order should be used. if (column_id1.name.startswith(alias2 + '.') or column_id2.name.startswith(alias1 + '.')): column_id1, column_id2 = column_id2, column_id1 column_ref1 = self.compile_ColumnId(column_id1, type_ctx1) column_ref2 = self.compile_ColumnId(column_id2, type_ctx2) return [typed_ast.JoinFields(column_ref1, column_ref2)] raise CompileError('JOIN conditions must consist of an AND of = ' 'comparisons. Got expression {}'.format(expr))
def test_join_multiple_fields(self): self.assert_compiled_select( 'SELECT 0 ' 'FROM table1 t1 JOIN table2 t2 ' 'ON t1.value = t2.value AND t2.value3 = t1.value2', typed_ast.Select( [typed_ast.SelectField( typed_ast.Literal(0, tq_types.INT), 'f0_')], typed_ast.Join( typed_ast.Table('table1', self.make_type_context([ ('t1', 'value', tq_types.INT), ('t1', 'value2', tq_types.INT), ])), typed_ast.Table('table2', self.make_type_context([ ('t2', 'value', tq_types.INT), ('t2', 'value3', tq_types.INT), ])), [typed_ast.JoinFields( typed_ast.ColumnRef('t1', 'value', tq_types.INT), typed_ast.ColumnRef('t2', 'value', tq_types.INT) ), typed_ast.JoinFields( typed_ast.ColumnRef('t1', 'value2', tq_types.INT), typed_ast.ColumnRef('t2', 'value3', tq_types.INT) )], False, self.make_type_context([ ('t1', 'value', tq_types.INT), ('t1', 'value2', tq_types.INT), ('t2', 'value', tq_types.INT), ('t2', 'value3', tq_types.INT), ]) ), typed_ast.Literal(True, tq_types.BOOL), None, None, self.make_type_context( [(None, 'f0_', tq_types.INT)], self.make_type_context([])) ) )
def test_simple_join(self): self.assert_compiled_select( 'SELECT value2 ' 'FROM table1 t1 JOIN table2 t2 ON t1.value = t2.value', typed_ast.Select([ typed_ast.SelectField( typed_ast.ColumnRef('t1', 'value2', tq_types.INT), 'value2', None )], typed_ast.Join( typed_ast.Table('table1', self.make_type_context([ ('t1', 'value', tq_types.INT), ('t1', 'value2', tq_types.INT), ])), [(typed_ast.Table('table2', self.make_type_context([ ('t2', 'value', tq_types.INT), ('t2', 'value3', tq_types.INT), ])), tq_ast.JoinType.INNER)], [[typed_ast.JoinFields( typed_ast.ColumnRef('t1', 'value', tq_types.INT), typed_ast.ColumnRef('t2', 'value', tq_types.INT) )]], self.make_type_context([ ('t1', 'value', tq_types.INT), ('t1', 'value2', tq_types.INT), ('t2', 'value', tq_types.INT), ('t2', 'value3', tq_types.INT), ]) ), typed_ast.Literal(True, tq_types.BOOL), None, typed_ast.Literal(True, tq_types.BOOL), None, None, self.make_type_context( [(None, 'value2', tq_types.INT)], self.make_type_context([('t1', 'value2', tq_types.INT)]) ) ) )