def test_select_grouped_and_non_grouped_fields(self):
     self.assert_compiled_select(
         'SELECT value, SUM(value2) FROM table1 GROUP BY value',
         typed_ast.Select([
             typed_ast.SelectField(
                 typed_ast.ColumnRef('table1', 'value', tq_types.INT),
                 'value'),
             typed_ast.SelectField(
                 typed_ast.FunctionCall(
                     runtime.get_func('sum'),
                     [typed_ast.ColumnRef('table1', 'value2',
                                          tq_types.INT)],
                     tq_types.INT),
                 'f0_')],
             typed_ast.Table('table1', self.table1_type_ctx),
             typed_ast.Literal(True, tq_types.BOOL),
             typed_ast.GroupSet(
                 alias_groups={'value'},
                 field_groups=[]
             ),
             None,
             self.make_type_context(
                 [(None, 'value', tq_types.INT),
                  (None, 'f0_', tq_types.INT)],
                 self.make_type_context(
                     [('table1', 'value', tq_types.INT)]))
         )
     )
 def test_aggregates(self):
     self.assert_compiled_select(
         'SELECT MAX(value), MIN(value) FROM table1',
         typed_ast.Select([
             typed_ast.SelectField(
                 typed_ast.AggregateFunctionCall(
                     runtime.get_func('max'),
                     [typed_ast.ColumnRef('table1', 'value', tq_types.INT)],
                     tq_types.INT
                 ),
                 'f0_'),
             typed_ast.SelectField(
                 typed_ast.AggregateFunctionCall(
                     runtime.get_func('min'),
                     [typed_ast.ColumnRef('table1', 'value', tq_types.INT)],
                     tq_types.INT
                 ),
                 'f1_')],
             typed_ast.Table('table1', self.table1_type_ctx),
             typed_ast.Literal(True, tq_types.BOOL),
             typed_ast.GroupSet(set(), []),
             None,
             self.make_type_context([
                 (None, 'f0_', tq_types.INT),
                 (None, 'f1_', tq_types.INT)],
                 self.make_type_context([]))))
 def test_within_clause(self):
     self.assert_compiled_select(
         'SELECT r1.s, COUNT(r1.s) WITHIN r1 AS num_s_in_r1 '
         'FROM record_table',
         typed_ast.Select(
             select_fields=[
                 typed_ast.SelectField(
                     typed_ast.ColumnRef('record_table', 'r1.s',
                                         tq_types.STRING),
                     'r1.s', None),
                 typed_ast.SelectField(typed_ast.FunctionCall(
                     runtime.get_func('count'),
                     [typed_ast.ColumnRef('record_table', 'r1.s',
                                          tq_types.STRING)],
                     tq_types.INT
                 ), 'num_s_in_r1', 'r1')],
             table=typed_ast.Table('record_table',
                                   self.record_table_type_ctx),
             where_expr=typed_ast.Literal(True, tq_types.BOOL),
             group_set=typed_ast.GroupSet(set(), []),
             having_expr=typed_ast.Literal(True, tq_types.BOOL),
             orderings=None,
             limit=None,
             type_ctx=self.make_type_context(
                 [(None, 'r1.s', tq_types.STRING),
                  (None, 'num_s_in_r1', tq_types.INT)],
                 self.make_type_context([]))))
Exemple #4
0
    def compile_groups(self, groups, select_fields, aliases, table_ctx):
        """Gets the group set to use for the query.

        This involves handling the special cases when no GROUP BY statement
        exists, and also determining whether each group should be treated as an
        alias group or a field group.

        Arguments:
            groups: Either None, indicating that no GROUP BY was specified, or
                a list of strings from the GROUP BY.
            select_fields: A list of tq_ast.SelectField objects for the query
                we are compiling.
            aliases: The aliases we will assign to the select fields.
            table_ctx: The TypeContext from the table expression in the SELECT.
        """
        if groups is None:
            # Special case: if no GROUP BY was specified, we're an aggregate
            # query iff at least one select field has an aggregate function.
            is_aggregate_select = any(
                self.expression_contains_aggregate(field.expr)
                for field in select_fields)

            if is_aggregate_select:
                # Group such that everything is in the same group.
                return typed_ast.TRIVIAL_GROUP_SET
            else:
                # Don't do any grouping at all.
                return None
        else:
            # At least one group was specified, so this is definitely a
            # GROUP BY query and we need to figure out what they refer to.
            alias_groups = set()
            field_groups = []

            alias_set = set(aliases)
            for group in groups:
                if group.name in alias_set:
                    alias_groups.add(group.name)
                else:
                    # Will raise an exception if not found.
                    # TODO: This doesn't perfectly match BigQuery's approach.
                    # In BigQuery, grouping by my_table.my_value will make
                    # either my_table.my_value or my_value valid ways of
                    # referring to the group, whereas grouping by my_value will
                    # make it so only my_value is a valid way of referring to
                    # the group. The whole approach to implicit table
                    # references could potentially be rethought.
                    field_groups.append(
                        table_ctx.column_ref_for_name(group.name))
            return typed_ast.GroupSet(alias_groups, field_groups)
Exemple #5
0
    def evaluate_within(self, select_fields, group_set, ctx, within_clause):
        """Evaluate a list of select fields, one of which has a WITHIN or
        WITHIN RECORD clause and/or grouping by some of the values.

        Arguments:
            select_fields: A list of SelectField instances to evaluate.
            group_set: The groups (either fields in select_context or aliases
                referring to an element of select_fields) to group by.
            ctx: The "source" context that the expressions can access when
                being evaluated.

        Returns:
            A context with the results.
        """
        if within_clause == "RECORD":
            # Add an extra column of row number over which the grouping
            # will be done.
            ctx_with_primary_key = context.empty_context_from_template(ctx)
            context.append_context_to_context(ctx, ctx_with_primary_key)

            (table_name, _), _ = ctx_with_primary_key.columns.items()[0]
            row_nums = range(1, ctx_with_primary_key.num_rows + 1)
            row_nums_col = context.Column(type=tq_types.INT,
                                          mode=tq_modes.NULLABLE,
                                          values=row_nums)
            ctx_with_primary_key.columns[(
                table_name, 'row_numbers_column_primary_key')] = row_nums_col
            group_set.field_groups.append(
                typed_ast.ColumnRef(table_name,
                                    'row_numbers_column_primary_key',
                                    tq_types.INT))
            if len(select_fields) > 1:
                # TODO: Implement WITHIN RECORD when one or more of the
                # selected fields (except the one in the WITHIN RECORD
                # clause) has mode = REPEATED.
                for select_field in select_fields:
                    if select_field.within_clause is None:
                        if select_field.expr.mode != tq_modes.REPEATED:
                            group_set.alias_groups.add(select_field.alias)
                        else:
                            raise NotImplementedError(
                                'Cannot select fields having mode=REPEATED '
                                'for queries involving WITHIN RECORD')
        # TODO: Implement for WITHIN clause
        typed_ast.TRIVIAL_GROUP_SET = typed_ast.GroupSet(set(), [])
        return self.evaluate_groups(select_fields, group_set,
                                    ctx_with_primary_key)
 def test_group_by_alias(self):
     self.assert_compiled_select(
         'SELECT 0 AS foo FROM table1 GROUP BY foo',
         typed_ast.Select(
             [typed_ast.SelectField(
                 typed_ast.Literal(0, tq_types.INT), 'foo')],
             typed_ast.Table('table1', self.table1_type_ctx),
             typed_ast.Literal(True, tq_types.BOOL),
             typed_ast.GroupSet(
                 alias_groups={'foo'},
                 field_groups=[]
             ),
             None,
             self.make_type_context(
                 [(None, 'foo', tq_types.INT)],
                 self.make_type_context([]))
         )
     )