Exemple #1
0
    def get_group_key(self, field_groups, alias_groups, select_context,
                      alias_group_result_context, index):
        """Computes a singleton context with the values for a group key.

        The evaluation has already been done; this method just selects the
        values out of the right contexts.

        Arguments:
            field_groups: A list of ColumnRefs for the field groups to use.
            alias_groups: A list of strings of alias groups to use.
            select_context: A context with the data for the table expression
                being selected from.
            alias_group_result_context: A context with the data for the
                grouped-by select fields.
            index: The row index to use from each context.
        """
        result_columns = collections.OrderedDict()
        for field_group in field_groups:
            column_key = (field_group.table, field_group.column)
            source_column = select_context.columns[column_key]
            result_columns[column_key] = context.Column(
                # TODO(Samantha): This shouldn't just be nullable.
                type=source_column.type,
                mode=tq_modes.NULLABLE,
                values=[source_column.values[index]])
        for alias_group in alias_groups:
            column_key = (None, alias_group)
            source_column = alias_group_result_context.columns[column_key]
            result_columns[column_key] = context.Column(
                # TODO(Samantha): This shouldn't just be nullable.
                type=source_column.type,
                mode=tq_modes.NULLABLE,
                values=[source_column.values[index]])
        return context.Context(1, result_columns, None)
    def setUp(self):
        self.table1 = tinyquery.Table(
            'table1',
            0,
            collections.OrderedDict([
                ('value', context.Column(tq_types.INT, [])),
                ('value2', context.Column(tq_types.INT, []))
            ]))
        self.table1_type_ctx = self.make_type_context(
            [('table1', 'value', tq_types.INT),
             ('table1', 'value2', tq_types.INT)]
        )

        self.table2 = tinyquery.Table(
            'table2',
            0,
            collections.OrderedDict([
                ('value', context.Column(tq_types.INT, [])),
                ('value3', context.Column(tq_types.INT, []))
            ])
        )
        self.table2_type_ctx = self.make_type_context(
            [('table2', 'value', tq_types.INT),
             ('table2', 'value3', tq_types.INT)]
        )

        self.tables_by_name = {
            'table1': self.table1,
            'table2': self.table2
        }
Exemple #3
0
 def evaluate_select_field(self, select_field, ctx):
     """Given a typed select field, return a resulting column entry."""
     assert isinstance(select_field, typed_ast.SelectField)
     results = self.evaluate_expr(select_field.expr, ctx)
     return (None,
             select_field.alias), context.Column(select_field.expr.type,
                                                 results)
Exemple #4
0
 def empty_context_from_select_fields(self, select_fields):
     return context.Context(
         0,
         collections.OrderedDict(
             ((None, select_field.alias),
              context.Column(select_field.expr.type, []))
             for select_field in select_fields), None)
Exemple #5
0
 def load_empty_table_from_template(self, table_name, template_table):
     columns = collections.OrderedDict(
         # TODO(Samantha): This shouldn't just be nullable.
         (col_name,
          context.Column(type=col.type, mode=tq_modes.NULLABLE, values=[]))
         for col_name, col in template_table.columns.iteritems())
     table = Table(table_name, 0, columns)
     self.load_table_or_view(table)
 def make_context(self, name_type_values_triples):
     num_rows = len(name_type_values_triples[0][2])
     # The constructor does all relevant invariant checks, so we don't have
     # to do that here.
     return context.Context(
         num_rows,
         collections.OrderedDict(
             ((None, name), context.Column(col_type, values))
             for name, col_type, values in name_type_values_triples), None)
Exemple #7
0
 def empty_context_from_select_fields(self, select_fields):
     return context.Context(
         0,
         collections.OrderedDict((
             (None, select_field.alias),
             # TODO(Samantha): This shouldn't just be nullable
             context.Column(type=select_field.expr.type,
                            mode=tq_modes.NULLABLE,
                            values=[])) for select_field in select_fields),
         None)
Exemple #8
0
 def make_empty_table(self, table_name, raw_schema):
     columns = collections.OrderedDict()
     for field in raw_schema['fields']:
         # TODO: Handle the mode here. We should default to NULLABLE, but
         # allow other specifiers.
         # TODO: Validate that the type is legal. Currently we take
         # advantage of the fact that type names match the types defined in
         # tq_types.py.
         columns[field['name']] = context.Column(field['type'], [])
     return Table(table_name, 0, columns)
 def setUp(self):
     self.tq = tinyquery.TinyQuery()
     self.tq.load_table_or_view(
         tinyquery.Table(
             'test_table', 5,
             collections.OrderedDict([
                 ('val1', context.Column(tq_types.INT, [4, 1, 8, 1, 2])),
                 ('val2', context.Column(tq_types.INT, [8, 2, 4, 1, 6]))
             ])))
     self.tq.load_table_or_view(
         tinyquery.Table(
             'test_table_2', 2,
             collections.OrderedDict([
                 ('val3', context.Column(tq_types.INT, [3, 8])),
                 ('val2', context.Column(tq_types.INT, [2, 7])),
             ])))
     self.tq.load_table_or_view(
         tinyquery.Table(
             'test_table_3', 5,
             collections.OrderedDict([
                 ('foo', context.Column(tq_types.INT, [1, 2, 4, 5, 1])),
                 ('bar', context.Column(tq_types.INT, [2, 7, 3, 1, 1])),
             ])))
     self.tq.load_table_or_view(
         tinyquery.Table(
             'null_table', 4,
             collections.OrderedDict([
                 ('foo', context.Column(tq_types.INT, [1, None, None, 5])),
             ])))
     self.tq.load_table_or_view(
         tinyquery.Table(
             'string_table', 2,
             collections.OrderedDict([
                 ('str', context.Column(tq_types.STRING,
                                        ['hello', 'world'])),
             ])))
     self.tq.load_table_or_view(
         tinyquery.Table(
             'empty_table', 0,
             collections.OrderedDict([
                 ('foo', context.Column(tq_types.INT, [])),
             ])))
Exemple #10
0
    def evaluate_within(self, select_fields, group_set, ctx, within_clause):
        """Evaluate a list of select fields, one of which has a WITHIN or
        WITHIN RECORD clause and/or grouping by some of the values.

        Arguments:
            select_fields: A list of SelectField instances to evaluate.
            group_set: The groups (either fields in select_context or aliases
                referring to an element of select_fields) to group by.
            ctx: The "source" context that the expressions can access when
                being evaluated.

        Returns:
            A context with the results.
        """
        if within_clause == "RECORD":
            # Add an extra column of row number over which the grouping
            # will be done.
            ctx_with_primary_key = context.empty_context_from_template(ctx)
            context.append_context_to_context(ctx, ctx_with_primary_key)

            (table_name, _), _ = ctx_with_primary_key.columns.items()[0]
            row_nums = range(1, ctx_with_primary_key.num_rows + 1)
            row_nums_col = context.Column(type=tq_types.INT,
                                          mode=tq_modes.NULLABLE,
                                          values=row_nums)
            ctx_with_primary_key.columns[(
                table_name, 'row_numbers_column_primary_key')] = row_nums_col
            group_set.field_groups.append(
                typed_ast.ColumnRef(table_name,
                                    'row_numbers_column_primary_key',
                                    tq_types.INT))
            if len(select_fields) > 1:
                # TODO: Implement WITHIN RECORD when one or more of the
                # selected fields (except the one in the WITHIN RECORD
                # clause) has mode = REPEATED.
                for select_field in select_fields:
                    if select_field.within_clause is None:
                        if select_field.expr.mode != tq_modes.REPEATED:
                            group_set.alias_groups.add(select_field.alias)
                        else:
                            raise NotImplementedError(
                                'Cannot select fields having mode=REPEATED '
                                'for queries involving WITHIN RECORD')
        # TODO: Implement for WITHIN clause
        typed_ast.TRIVIAL_GROUP_SET = typed_ast.GroupSet(set(), [])
        return self.evaluate_groups(select_fields, group_set,
                                    ctx_with_primary_key)
Exemple #11
0
 def make_columns(schema, name_prefix='', ever_repeated=False):
     for field in schema['fields']:
         prefixed_name = name_prefix + field['name']
         value_type = field['type'].upper()
         mode = field['mode'].upper()
         if value_type == 'RECORD':
             make_columns(field,
                          name_prefix=(prefixed_name + '.'),
                          ever_repeated=(ever_repeated
                                         or mode == 'REPEATED'))
         # Type and Mode validation
         elif (value_type not in tq_types.TYPE_SET
               or mode not in tq_modes.MODE_SET):
             raise ValueError("Type or Mode given was invalid.")
         else:
             final_mode = 'REPEATED' if ever_repeated else mode
             columns[prefixed_name] = context.Column(type=value_type,
                                                     mode=final_mode,
                                                     values=[])
Exemple #12
0
 def evaluate_Literal(self, literal, context_object):
     values = [literal.value for _ in xrange(context_object.num_rows)]
     return context.Column(type=literal.type,
                           mode=tq_modes.NULLABLE,
                           values=values)
Exemple #13
0
    def evaluate_orderings(self, overall_context, select_context,
                           ordering_col):
        """
        Evaluate a context and order it by a list of given columns.

        Arguments:
            overall_context: A context with the data that the select statement
                has access to.
            select_context: A context with the data remaining after earlier
            evaluations.
            ordering_col: A list of order-by column objects having two
                properties: column_id containing the name of the column and
                is_ascending which is a boolean for the order in which the
                column has to be arranged (True for ascending and False for
                descending).

        Returns:
            A context with the results.
        """
        assert select_context.aggregate_context is None
        all_values = []
        sort_by_indexes = collections.OrderedDict()

        for ((_, column_name), column) in overall_context.columns.iteritems():
            all_values.append(column.values)

        for order_by_column in ordering_col:
            for count, ((_, column_name), column) in enumerate(
                    overall_context.columns.iteritems()):
                if order_by_column.column_id.name == column_name:
                    sort_by_indexes[count] = order_by_column.is_ascending
                    break
        reversed_sort_by_indexes = collections.OrderedDict(
            reversed(list(sort_by_indexes.items())))

        t_all_values = map(list, zip(*all_values))
        for index, is_ascending in reversed_sort_by_indexes.iteritems():
            t_all_values.sort(key=lambda x: (x[index]),
                              reverse=not is_ascending)
        ordered_values = map(list, zip(*t_all_values))
        # If we started evaluating an ordering over 0 rows,
        # all_values was originally [[], [], [], ...], i.e. the empty list for
        # each column, but now ordered_values is just the empty list, since
        # when going to a list of rows, we lost any notion of how many columns
        # there were.  In that case, we just set back to all_values, since
        # there isn't any data to order by anyway.
        # TODO(colin): can we exit early if there's no data to order?
        if len(t_all_values) == 0:
            ordered_values = all_values

        for key in select_context.columns:
            for count, (_, overall_key) in enumerate(overall_context.columns):
                overall_context_loop_break = False
                if overall_key == key[1]:
                    select_context.columns[key] = context.Column(
                        type=select_context.columns[key].type,
                        mode=select_context.columns[key].mode,
                        values=ordered_values[count])
                    overall_context_loop_break = True
                if overall_context_loop_break:
                    break

        return select_context
Exemple #14
0
 def load_empty_table_from_template(self, table_name, template_table):
     columns = collections.OrderedDict(
         (col_name, context.Column(col.type, []))
         for col_name, col in template_table.columns.iteritems())
     table = Table(table_name, 0, columns)
     self.load_table_or_view(table)
    def setUp(self):
        self.table1 = tinyquery.Table(
            'table1',
            0,
            collections.OrderedDict([
                ('value', context.Column(type=tq_types.INT,
                                         mode=tq_modes.NULLABLE, values=[])),
                ('value2', context.Column(type=tq_types.INT,
                                          mode=tq_modes.NULLABLE, values=[]))
            ]))
        self.table1_type_ctx = self.make_type_context(
            [('table1', 'value', tq_types.INT),
             ('table1', 'value2', tq_types.INT)]
        )

        self.table2 = tinyquery.Table(
            'table2',
            0,
            collections.OrderedDict([
                ('value', context.Column(type=tq_types.INT,
                                         mode=tq_modes.NULLABLE, values=[])),
                ('value3', context.Column(type=tq_types.INT,
                                          mode=tq_modes.NULLABLE, values=[]))
            ])
        )
        self.table2_type_ctx = self.make_type_context(
            [('table2', 'value', tq_types.INT),
             ('table2', 'value3', tq_types.INT)]
        )

        self.table3 = tinyquery.Table(
            'table3',
            0,
            collections.OrderedDict([
                ('value', context.Column(type=tq_types.INT,
                                         mode=tq_modes.NULLABLE, values=[])),
            ])
        )
        self.table3_type_ctx = self.make_type_context(
            [('table3', 'value', tq_types.INT)]
        )

        self.rainbow_table = tinyquery.Table(
            'rainbow_table',
            3,
            collections.OrderedDict([
                ('ints', context.Column(type=tq_types.INT,
                                        mode=tq_modes.NULLABLE,
                                        values=[-2147483649, -0, 2147483648])),
                ('floats', context.Column(type=tq_types.FLOAT,
                                          mode=tq_modes.NULLABLE,
                                          values=[1.41, 2.72,
                                                  float('infinity')])),
                ('bools', context.Column(type=tq_types.BOOL,
                                         mode=tq_modes.NULLABLE,
                                         values=[True, False, True])),
                ('strings', context.Column(type=tq_types.STRING,
                                           mode=tq_modes.NULLABLE,
                                           values=["infrared", "indigo",
                                                   "ultraviolet"])),
                ('times', context.Column(type=tq_types.TIMESTAMP,
                                         mode=tq_modes.NULLABLE,
                                         values=[
                                             datetime.datetime(1969, 12, 31,
                                                               23, 59, 59),
                                             datetime.datetime(1999, 12, 31,
                                                               23, 59, 59),
                                             datetime.datetime(2038, 1, 19,
                                                               3, 14, 8)]))]))
        self.rainbow_table_type_ctx = self.make_type_context(
            [('rainbow_table', 'ints', tq_types.INT),
             ('rainbow_table', 'floats', tq_types.FLOAT),
             ('rainbow_table', 'bools', tq_types.BOOL),
             ('rainbow_table', 'strings', tq_types.STRING),
             ('rainbow_table', 'times', tq_types.TIMESTAMP)]
        )

        self.record_table = tinyquery.Table(
            'record_table',
            0,
            collections.OrderedDict([
                ('r1.i', context.Column(type=tq_types.INT,
                                        mode=tq_modes.NULLABLE, values=[])),
                ('r1.s', context.Column(type=tq_types.STRING,
                                        mode=tq_modes.NULLABLE, values=[])),
                ('r2.i', context.Column(type=tq_types.INT,
                                        mode=tq_modes.NULLABLE, values=[])),
            ])
        )
        self.record_table_type_ctx = self.make_type_context(
            [('record_table', 'r1.i', tq_types.INT),
             ('record_table', 'r1.s', tq_types.STRING),
             ('record_table', 'r2.i', tq_types.INT)]
        )

        self.tables_by_name = {
            'table1': self.table1,
            'table2': self.table2,
            'table3': self.table3,
            'rainbow_table': self.rainbow_table,
            'record_table': self.record_table,
        }