def column_ref_for_name(self, name): """Gets the full identifier for a column from any possible alias.""" if name in self.columns: return typed_ast.ColumnRef(name, self.columns[name]) possible_results = [] # Try all possible ways of splitting a dot-separated string. for match in re.finditer('\.', name): left_side = name[:match.start()] right_side = name[match.end():] result_type = self.columns.get((left_side, right_side)) if result_type is not None: possible_results.append( typed_ast.ColumnRef(left_side, right_side, result_type)) if name in self.aliases: table, column = self.aliases[name] result_type = self.columns[(table, column)] possible_results.append( typed_ast.ColumnRef(table, column, result_type)) if len(possible_results) == 1: return possible_results[0] elif len(possible_results) > 1: raise exceptions.CompileError('Ambiguous field: {}'.format(name)) else: if self.implicit_column_context is not None: return self.implicit_column_context.column_ref_for_name(name) else: raise exceptions.CompileError( 'Field not found: {}'.format(name))
def test_record_star(self): self.assert_compiled_select( 'SELECT r1.* FROM record_table', typed_ast.Select(select_fields=[ typed_ast.SelectField( typed_ast.ColumnRef('record_table', 'r1.i', tq_types.INT), 'r1.i', None), typed_ast.SelectField( typed_ast.ColumnRef('record_table', 'r1.s', tq_types.STRING), 'r1.s', None), ], table=typed_ast.Table('record_table', self.record_table_type_ctx), where_expr=typed_ast.Literal(True, tq_types.BOOL), group_set=None, having_expr=typed_ast.Literal( True, tq_types.BOOL), orderings=None, limit=None, type_ctx=self.make_type_context( [(None, 'r1.i', tq_types.INT), (None, 'r1.s', tq_types.STRING)], self.make_type_context([ ('record_table', 'r1.i', tq_types.INT), ('record_table', 'r1.s', tq_types.STRING) ]))))
def test_within_clause(self): self.assert_compiled_select( 'SELECT r1.s, COUNT(r1.s) WITHIN r1 AS num_s_in_r1 ' 'FROM record_table', typed_ast.Select(select_fields=[ typed_ast.SelectField( typed_ast.ColumnRef('record_table', 'r1.s', tq_types.STRING), 'r1.s', None), typed_ast.SelectField( typed_ast.FunctionCall(runtime.get_func('count'), [ typed_ast.ColumnRef('record_table', 'r1.s', tq_types.STRING) ], tq_types.INT), 'num_s_in_r1', 'r1') ], table=typed_ast.Table('record_table', self.record_table_type_ctx), where_expr=typed_ast.Literal(True, tq_types.BOOL), group_set=typed_ast.GroupSet(set(), []), having_expr=typed_ast.Literal( True, tq_types.BOOL), orderings=None, limit=None, type_ctx=self.make_type_context( [(None, 'r1.s', tq_types.STRING), (None, 'num_s_in_r1', tq_types.INT)], self.make_type_context([]))))
def test_subquery_aliases(self): self.assert_compiled_select( 'SELECT t.value FROM (SELECT value FROM table1) t', typed_ast.Select( [ typed_ast.SelectField( typed_ast.ColumnRef('t', 'value', tq_types.INT), 't.value', None) ], typed_ast.Select( [ typed_ast.SelectField( typed_ast.ColumnRef('table1', 'value', tq_types.INT), 'value', None) ], typed_ast.Table('table1', self.table1_type_ctx), typed_ast.Literal(True, tq_types.BOOL), None, typed_ast.Literal(True, tq_types.BOOL), None, None, self.make_type_context([(None, 'value', tq_types.INT)], self.make_type_context([ ('t', 'value', tq_types.INT) ]))), typed_ast.Literal(True, tq_types.BOOL), None, typed_ast.Literal(True, tq_types.BOOL), None, None, self.make_type_context([(None, 't.value', tq_types.INT)], self.make_type_context([ ('t', 'value', tq_types.INT) ]))))
def test_select_multiple_tables(self): # Union of columns should be taken, with no aliases. unioned_type_ctx = self.make_type_context([ (None, 'value', tq_types.INT), (None, 'value2', tq_types.INT), (None, 'value3', tq_types.INT) ]) self.assert_compiled_select( 'SELECT value, value2, value3 FROM table1, table2', typed_ast.Select( [ typed_ast.SelectField( typed_ast.ColumnRef(None, 'value', tq_types.INT), 'value', None), typed_ast.SelectField( typed_ast.ColumnRef(None, 'value2', tq_types.INT), 'value2', None), typed_ast.SelectField( typed_ast.ColumnRef(None, 'value3', tq_types.INT), 'value3', None) ], typed_ast.TableUnion([ typed_ast.Table('table1', self.table1_type_ctx), typed_ast.Table('table2', self.table2_type_ctx) ], unioned_type_ctx), typed_ast.Literal(True, tq_types.BOOL), None, typed_ast.Literal(True, tq_types.BOOL), None, None, self.make_type_context([(None, 'value', tq_types.INT), (None, 'value2', tq_types.INT), (None, 'value3', tq_types.INT)], self.make_type_context([ (None, 'value', tq_types.INT), (None, 'value2', tq_types.INT), (None, 'value3', tq_types.INT) ]))))
def test_implicitly_accessed_column(self): self.assert_compiled_select( 'SELECT table1.value FROM (SELECT value + 1 AS foo FROM table1)', typed_ast.Select( [ typed_ast.SelectField( typed_ast.ColumnRef('table1', 'value', tq_types.INT), 'table1.value', None) ], typed_ast.Select([ typed_ast.SelectField( typed_ast.FunctionCall(runtime.get_binary_op('+'), [ typed_ast.ColumnRef('table1', 'value', tq_types.INT), typed_ast.Literal(1, tq_types.INT) ], tq_types.INT), 'foo', None) ], typed_ast.Table('table1', self.table1_type_ctx), typed_ast.Literal(True, tq_types.BOOL), None, typed_ast.Literal(True, tq_types.BOOL), None, None, self.make_type_context( [(None, 'foo', tq_types.INT)], self.make_type_context([ ('table1', 'value', tq_types.INT) ]))), typed_ast.Literal(True, tq_types.BOOL), None, typed_ast.Literal(True, tq_types.BOOL), None, None, self.make_type_context([(None, 'table1.value', tq_types.INT)], self.make_type_context([ ('table1', 'value', tq_types.INT) ]))))
def test_multi_way_join(self): self.assert_compiled_select( 'SELECT 0 ' 'FROM table1 t1 JOIN table2 t2 ON t1.value = t2.value ' 'LEFT JOIN table3 t3 ON t2.value3 = t3.value', typed_ast.Select( select_fields=[ typed_ast.SelectField(typed_ast.Literal(0, tq_types.INT), 'f0_', None) ], table=typed_ast.Join( base=typed_ast.Table( 'table1', self.make_type_context([ ('t1', 'value', tq_types.INT), ('t1', 'value2', tq_types.INT), ])), tables=[(typed_ast.Table( 'table2', self.make_type_context([ ('t2', 'value', tq_types.INT), ('t2', 'value3', tq_types.INT), ])), tq_ast.JoinType.INNER), (typed_ast.Table( 'table3', self.make_type_context([ ('t3', 'value', tq_types.INT) ])), tq_ast.JoinType.LEFT_OUTER)], conditions=[[ typed_ast.JoinFields( typed_ast.ColumnRef('t1', 'value', tq_types.INT), typed_ast.ColumnRef('t2', 'value', tq_types.INT)) ], [ typed_ast.JoinFields( typed_ast.ColumnRef( 't2', 'value3', tq_types.INT), typed_ast.ColumnRef( 't3', 'value', tq_types.INT)) ]], type_ctx=self.make_type_context([ ('t1', 'value', tq_types.INT), ('t1', 'value2', tq_types.INT), ('t2', 'value', tq_types.INT), ('t2', 'value3', tq_types.INT), ('t3', 'value', tq_types.INT), ])), where_expr=typed_ast.Literal(True, tq_types.BOOL), group_set=None, having_expr=typed_ast.Literal(True, tq_types.BOOL), orderings=None, limit=None, type_ctx=self.make_type_context([(None, 'f0_', tq_types.INT)], self.make_type_context([]))))
def test_having(self): self.assert_compiled_select( 'SELECT value FROM table1 HAVING value > 3', typed_ast.Select( [ typed_ast.SelectField( typed_ast.ColumnRef('table1', 'value', tq_types.INT), 'value', None) ], typed_ast.Table('table1', self.table1_type_ctx), typed_ast.Literal(True, tq_types.BOOL), None, typed_ast.FunctionCall(runtime.get_binary_op('>'), [ typed_ast.ColumnRef(None, 'value', tq_types.INT), typed_ast.Literal(3, tq_types.INT) ], tq_types.BOOL), None, None, self.make_type_context([(None, 'value', tq_types.INT)], self.make_type_context([ ('table1', 'value', tq_types.INT) ]))))
def test_select_star(self): self.assert_compiled_select( 'SELECT * FROM table1', typed_ast.Select( [ typed_ast.SelectField( typed_ast.ColumnRef('table1', 'value', tq_types.INT), 'value', None), typed_ast.SelectField( typed_ast.ColumnRef('table1', 'value2', tq_types.INT), 'value2', None) ], typed_ast.Table('table1', self.table1_type_ctx), typed_ast.Literal(True, tq_types.BOOL), None, typed_ast.Literal(True, tq_types.BOOL), None, None, self.make_type_context([(None, 'value', tq_types.INT), (None, 'value2', tq_types.INT)], self.make_type_context([ ('table1', 'value', tq_types.INT), ('table1', 'value2', tq_types.INT) ]))))
def test_select_grouped_and_non_grouped_fields(self): self.assert_compiled_select( 'SELECT value, SUM(value2) FROM table1 GROUP BY value', typed_ast.Select([ typed_ast.SelectField( typed_ast.ColumnRef('table1', 'value', tq_types.INT), 'value', None), typed_ast.SelectField( typed_ast.FunctionCall(runtime.get_func('sum'), [ typed_ast.ColumnRef('table1', 'value2', tq_types.INT) ], tq_types.INT), 'f0_', None) ], typed_ast.Table('table1', self.table1_type_ctx), typed_ast.Literal(True, tq_types.BOOL), typed_ast.GroupSet(alias_groups={'value'}, field_groups=[]), typed_ast.Literal(True, tq_types.BOOL), None, None, self.make_type_context( [(None, 'value', tq_types.INT), (None, 'f0_', tq_types.INT)], self.make_type_context([('table1', 'value', tq_types.INT)]))))
def test_simple_join(self): self.assert_compiled_select( 'SELECT value2 ' 'FROM table1 t1 JOIN table2 t2 ON t1.value = t2.value', typed_ast.Select( [ typed_ast.SelectField( typed_ast.ColumnRef('t1', 'value2', tq_types.INT), 'value2', None) ], typed_ast.Join( typed_ast.Table( 'table1', self.make_type_context([ ('t1', 'value', tq_types.INT), ('t1', 'value2', tq_types.INT), ])), [(typed_ast.Table( 'table2', self.make_type_context([ ('t2', 'value', tq_types.INT), ('t2', 'value3', tq_types.INT), ])), tq_ast.JoinType.INNER)], [[ typed_ast.JoinFields( typed_ast.ColumnRef('t1', 'value', tq_types.INT), typed_ast.ColumnRef('t2', 'value', tq_types.INT)) ]], self.make_type_context([ ('t1', 'value', tq_types.INT), ('t1', 'value2', tq_types.INT), ('t2', 'value', tq_types.INT), ('t2', 'value3', tq_types.INT), ])), typed_ast.Literal(True, tq_types.BOOL), None, typed_ast.Literal(True, tq_types.BOOL), None, None, self.make_type_context([(None, 'value2', tq_types.INT)], self.make_type_context([ ('t1', 'value2', tq_types.INT) ]))))
def test_multiple_select(self): self.assert_compiled_select( 'SELECT value * 3 AS foo, value, value + 1, value bar, value - 1 ' 'FROM table1', typed_ast.Select([ typed_ast.SelectField( typed_ast.FunctionCall(runtime.get_binary_op('*'), [ typed_ast.ColumnRef('table1', 'value', tq_types.INT), typed_ast.Literal(3, tq_types.INT) ], tq_types.INT), 'foo', None), typed_ast.SelectField( typed_ast.ColumnRef('table1', 'value', tq_types.INT), 'value', None), typed_ast.SelectField( typed_ast.FunctionCall(runtime.get_binary_op('+'), [ typed_ast.ColumnRef('table1', 'value', tq_types.INT), typed_ast.Literal(1, tq_types.INT) ], tq_types.INT), 'f0_', None), typed_ast.SelectField( typed_ast.ColumnRef('table1', 'value', tq_types.INT), 'bar', None), typed_ast.SelectField( typed_ast.FunctionCall(runtime.get_binary_op('-'), [ typed_ast.ColumnRef('table1', 'value', tq_types.INT), typed_ast.Literal(1, tq_types.INT) ], tq_types.INT), 'f1_', None) ], typed_ast.Table('table1', self.table1_type_ctx), typed_ast.Literal(True, tq_types.BOOL), None, typed_ast.Literal(True, tq_types.BOOL), None, None, self.make_type_context( [(None, 'foo', tq_types.INT), (None, 'value', tq_types.INT), (None, 'f0_', tq_types.INT), (None, 'bar', tq_types.INT), (None, 'f1_', tq_types.INT)], self.make_type_context([('table1', 'value', tq_types.INT)]))))
def test_aggregates(self): self.assert_compiled_select( 'SELECT MAX(value), MIN(value) FROM table1', typed_ast.Select([ typed_ast.SelectField( typed_ast.AggregateFunctionCall( runtime.get_func('max'), [typed_ast.ColumnRef('table1', 'value', tq_types.INT)], tq_types.INT), 'f0_', None), typed_ast.SelectField( typed_ast.AggregateFunctionCall( runtime.get_func('min'), [typed_ast.ColumnRef('table1', 'value', tq_types.INT)], tq_types.INT), 'f1_', None) ], typed_ast.Table('table1', self.table1_type_ctx), typed_ast.Literal(True, tq_types.BOOL), typed_ast.GroupSet(set(), []), typed_ast.Literal(True, tq_types.BOOL), None, None, self.make_type_context( [(None, 'f0_', tq_types.INT), (None, 'f1_', tq_types.INT)], self.make_type_context([]))))
def evaluate_within(self, select_fields, group_set, ctx, within_clause): """Evaluate a list of select fields, one of which has a WITHIN or WITHIN RECORD clause and/or grouping by some of the values. Arguments: select_fields: A list of SelectField instances to evaluate. group_set: The groups (either fields in select_context or aliases referring to an element of select_fields) to group by. ctx: The "source" context that the expressions can access when being evaluated. Returns: A context with the results. """ if within_clause == "RECORD": # Add an extra column of row number over which the grouping # will be done. ctx_with_primary_key = context.empty_context_from_template(ctx) context.append_context_to_context(ctx, ctx_with_primary_key) table_name = next(iter(ctx_with_primary_key.columns)) row_nums = list( six.moves.xrange(1, ctx_with_primary_key.num_rows + 1)) row_nums_col = context.Column(type=tq_types.INT, mode=tq_modes.NULLABLE, values=row_nums) ctx_with_primary_key.columns[( table_name, 'row_numbers_column_primary_key')] = row_nums_col group_set.field_groups.append( typed_ast.ColumnRef(table_name, 'row_numbers_column_primary_key', tq_types.INT)) if len(select_fields) > 1: # TODO: Implement WITHIN RECORD when one or more of the # selected fields (except the one in the WITHIN RECORD # clause) has mode = REPEATED. for select_field in select_fields: if select_field.within_clause is None: if select_field.expr.mode != tq_modes.REPEATED: group_set.alias_groups.add(select_field.alias) else: raise NotImplementedError( 'Cannot select fields having mode=REPEATED ' 'for queries involving WITHIN RECORD') # TODO: Implement for WITHIN clause typed_ast.TRIVIAL_GROUP_SET = typed_ast.GroupSet(set(), []) return self.evaluate_groups(select_fields, group_set, ctx_with_primary_key)
def test_order_by_field(self): self.assert_compiled_select( 'SELECT value FROM table1 ORDER BY value2 DESC', typed_ast.Select( select_fields=[ typed_ast.SelectField( typed_ast.ColumnRef('table1', 'value', tq_types.INT), 'value', None) ], table=typed_ast.Table('table1', self.table1_type_ctx), where_expr=typed_ast.Literal(True, tq_types.BOOL), group_set=None, having_expr=typed_ast.Literal(True, tq_types.BOOL), orderings=[tq_ast.Ordering(tq_ast.ColumnId('value2'), False)], limit=None, type_ctx=self.make_type_context( [(None, 'value', tq_types.INT)], self.make_type_context([('table1', 'value', tq_types.INT) ]))))