def get_group_key(self, field_groups, alias_groups, select_context, alias_group_result_context, index): """Computes a singleton context with the values for a group key. The evaluation has already been done; this method just selects the values out of the right contexts. Arguments: field_groups: A list of ColumnRefs for the field groups to use. alias_groups: A list of strings of alias groups to use. select_context: A context with the data for the table expression being selected from. alias_group_result_context: A context with the data for the grouped-by select fields. index: The row index to use from each context. """ result_columns = collections.OrderedDict() for field_group in field_groups: column_key = (field_group.table, field_group.column) source_column = select_context.columns[column_key] result_columns[column_key] = context.Column( # TODO(Samantha): This shouldn't just be nullable. type=source_column.type, mode=tq_modes.NULLABLE, values=[source_column.values[index]]) for alias_group in alias_groups: column_key = (None, alias_group) source_column = alias_group_result_context.columns[column_key] result_columns[column_key] = context.Column( # TODO(Samantha): This shouldn't just be nullable. type=source_column.type, mode=tq_modes.NULLABLE, values=[source_column.values[index]]) return context.Context(1, result_columns, None)
def setUp(self): self.table1 = tinyquery.Table( 'table1', 0, collections.OrderedDict([ ('value', context.Column(tq_types.INT, [])), ('value2', context.Column(tq_types.INT, [])) ])) self.table1_type_ctx = self.make_type_context( [('table1', 'value', tq_types.INT), ('table1', 'value2', tq_types.INT)] ) self.table2 = tinyquery.Table( 'table2', 0, collections.OrderedDict([ ('value', context.Column(tq_types.INT, [])), ('value3', context.Column(tq_types.INT, [])) ]) ) self.table2_type_ctx = self.make_type_context( [('table2', 'value', tq_types.INT), ('table2', 'value3', tq_types.INT)] ) self.tables_by_name = { 'table1': self.table1, 'table2': self.table2 }
def evaluate_select_field(self, select_field, ctx): """Given a typed select field, return a resulting column entry.""" assert isinstance(select_field, typed_ast.SelectField) results = self.evaluate_expr(select_field.expr, ctx) return (None, select_field.alias), context.Column(select_field.expr.type, results)
def empty_context_from_select_fields(self, select_fields): return context.Context( 0, collections.OrderedDict( ((None, select_field.alias), context.Column(select_field.expr.type, [])) for select_field in select_fields), None)
def load_empty_table_from_template(self, table_name, template_table): columns = collections.OrderedDict( # TODO(Samantha): This shouldn't just be nullable. (col_name, context.Column(type=col.type, mode=tq_modes.NULLABLE, values=[])) for col_name, col in template_table.columns.iteritems()) table = Table(table_name, 0, columns) self.load_table_or_view(table)
def make_context(self, name_type_values_triples): num_rows = len(name_type_values_triples[0][2]) # The constructor does all relevant invariant checks, so we don't have # to do that here. return context.Context( num_rows, collections.OrderedDict( ((None, name), context.Column(col_type, values)) for name, col_type, values in name_type_values_triples), None)
def empty_context_from_select_fields(self, select_fields): return context.Context( 0, collections.OrderedDict(( (None, select_field.alias), # TODO(Samantha): This shouldn't just be nullable context.Column(type=select_field.expr.type, mode=tq_modes.NULLABLE, values=[])) for select_field in select_fields), None)
def make_empty_table(self, table_name, raw_schema): columns = collections.OrderedDict() for field in raw_schema['fields']: # TODO: Handle the mode here. We should default to NULLABLE, but # allow other specifiers. # TODO: Validate that the type is legal. Currently we take # advantage of the fact that type names match the types defined in # tq_types.py. columns[field['name']] = context.Column(field['type'], []) return Table(table_name, 0, columns)
def setUp(self): self.tq = tinyquery.TinyQuery() self.tq.load_table_or_view( tinyquery.Table( 'test_table', 5, collections.OrderedDict([ ('val1', context.Column(tq_types.INT, [4, 1, 8, 1, 2])), ('val2', context.Column(tq_types.INT, [8, 2, 4, 1, 6])) ]))) self.tq.load_table_or_view( tinyquery.Table( 'test_table_2', 2, collections.OrderedDict([ ('val3', context.Column(tq_types.INT, [3, 8])), ('val2', context.Column(tq_types.INT, [2, 7])), ]))) self.tq.load_table_or_view( tinyquery.Table( 'test_table_3', 5, collections.OrderedDict([ ('foo', context.Column(tq_types.INT, [1, 2, 4, 5, 1])), ('bar', context.Column(tq_types.INT, [2, 7, 3, 1, 1])), ]))) self.tq.load_table_or_view( tinyquery.Table( 'null_table', 4, collections.OrderedDict([ ('foo', context.Column(tq_types.INT, [1, None, None, 5])), ]))) self.tq.load_table_or_view( tinyquery.Table( 'string_table', 2, collections.OrderedDict([ ('str', context.Column(tq_types.STRING, ['hello', 'world'])), ]))) self.tq.load_table_or_view( tinyquery.Table( 'empty_table', 0, collections.OrderedDict([ ('foo', context.Column(tq_types.INT, [])), ])))
def evaluate_within(self, select_fields, group_set, ctx, within_clause): """Evaluate a list of select fields, one of which has a WITHIN or WITHIN RECORD clause and/or grouping by some of the values. Arguments: select_fields: A list of SelectField instances to evaluate. group_set: The groups (either fields in select_context or aliases referring to an element of select_fields) to group by. ctx: The "source" context that the expressions can access when being evaluated. Returns: A context with the results. """ if within_clause == "RECORD": # Add an extra column of row number over which the grouping # will be done. ctx_with_primary_key = context.empty_context_from_template(ctx) context.append_context_to_context(ctx, ctx_with_primary_key) (table_name, _), _ = ctx_with_primary_key.columns.items()[0] row_nums = range(1, ctx_with_primary_key.num_rows + 1) row_nums_col = context.Column(type=tq_types.INT, mode=tq_modes.NULLABLE, values=row_nums) ctx_with_primary_key.columns[( table_name, 'row_numbers_column_primary_key')] = row_nums_col group_set.field_groups.append( typed_ast.ColumnRef(table_name, 'row_numbers_column_primary_key', tq_types.INT)) if len(select_fields) > 1: # TODO: Implement WITHIN RECORD when one or more of the # selected fields (except the one in the WITHIN RECORD # clause) has mode = REPEATED. for select_field in select_fields: if select_field.within_clause is None: if select_field.expr.mode != tq_modes.REPEATED: group_set.alias_groups.add(select_field.alias) else: raise NotImplementedError( 'Cannot select fields having mode=REPEATED ' 'for queries involving WITHIN RECORD') # TODO: Implement for WITHIN clause typed_ast.TRIVIAL_GROUP_SET = typed_ast.GroupSet(set(), []) return self.evaluate_groups(select_fields, group_set, ctx_with_primary_key)
def make_columns(schema, name_prefix='', ever_repeated=False): for field in schema['fields']: prefixed_name = name_prefix + field['name'] value_type = field['type'].upper() mode = field['mode'].upper() if value_type == 'RECORD': make_columns(field, name_prefix=(prefixed_name + '.'), ever_repeated=(ever_repeated or mode == 'REPEATED')) # Type and Mode validation elif (value_type not in tq_types.TYPE_SET or mode not in tq_modes.MODE_SET): raise ValueError("Type or Mode given was invalid.") else: final_mode = 'REPEATED' if ever_repeated else mode columns[prefixed_name] = context.Column(type=value_type, mode=final_mode, values=[])
def evaluate_Literal(self, literal, context_object): values = [literal.value for _ in xrange(context_object.num_rows)] return context.Column(type=literal.type, mode=tq_modes.NULLABLE, values=values)
def evaluate_orderings(self, overall_context, select_context, ordering_col): """ Evaluate a context and order it by a list of given columns. Arguments: overall_context: A context with the data that the select statement has access to. select_context: A context with the data remaining after earlier evaluations. ordering_col: A list of order-by column objects having two properties: column_id containing the name of the column and is_ascending which is a boolean for the order in which the column has to be arranged (True for ascending and False for descending). Returns: A context with the results. """ assert select_context.aggregate_context is None all_values = [] sort_by_indexes = collections.OrderedDict() for ((_, column_name), column) in overall_context.columns.iteritems(): all_values.append(column.values) for order_by_column in ordering_col: for count, ((_, column_name), column) in enumerate( overall_context.columns.iteritems()): if order_by_column.column_id.name == column_name: sort_by_indexes[count] = order_by_column.is_ascending break reversed_sort_by_indexes = collections.OrderedDict( reversed(list(sort_by_indexes.items()))) t_all_values = map(list, zip(*all_values)) for index, is_ascending in reversed_sort_by_indexes.iteritems(): t_all_values.sort(key=lambda x: (x[index]), reverse=not is_ascending) ordered_values = map(list, zip(*t_all_values)) # If we started evaluating an ordering over 0 rows, # all_values was originally [[], [], [], ...], i.e. the empty list for # each column, but now ordered_values is just the empty list, since # when going to a list of rows, we lost any notion of how many columns # there were. In that case, we just set back to all_values, since # there isn't any data to order by anyway. # TODO(colin): can we exit early if there's no data to order? if len(t_all_values) == 0: ordered_values = all_values for key in select_context.columns: for count, (_, overall_key) in enumerate(overall_context.columns): overall_context_loop_break = False if overall_key == key[1]: select_context.columns[key] = context.Column( type=select_context.columns[key].type, mode=select_context.columns[key].mode, values=ordered_values[count]) overall_context_loop_break = True if overall_context_loop_break: break return select_context
def load_empty_table_from_template(self, table_name, template_table): columns = collections.OrderedDict( (col_name, context.Column(col.type, [])) for col_name, col in template_table.columns.iteritems()) table = Table(table_name, 0, columns) self.load_table_or_view(table)
def setUp(self): self.table1 = tinyquery.Table( 'table1', 0, collections.OrderedDict([ ('value', context.Column(type=tq_types.INT, mode=tq_modes.NULLABLE, values=[])), ('value2', context.Column(type=tq_types.INT, mode=tq_modes.NULLABLE, values=[])) ])) self.table1_type_ctx = self.make_type_context( [('table1', 'value', tq_types.INT), ('table1', 'value2', tq_types.INT)] ) self.table2 = tinyquery.Table( 'table2', 0, collections.OrderedDict([ ('value', context.Column(type=tq_types.INT, mode=tq_modes.NULLABLE, values=[])), ('value3', context.Column(type=tq_types.INT, mode=tq_modes.NULLABLE, values=[])) ]) ) self.table2_type_ctx = self.make_type_context( [('table2', 'value', tq_types.INT), ('table2', 'value3', tq_types.INT)] ) self.table3 = tinyquery.Table( 'table3', 0, collections.OrderedDict([ ('value', context.Column(type=tq_types.INT, mode=tq_modes.NULLABLE, values=[])), ]) ) self.table3_type_ctx = self.make_type_context( [('table3', 'value', tq_types.INT)] ) self.rainbow_table = tinyquery.Table( 'rainbow_table', 3, collections.OrderedDict([ ('ints', context.Column(type=tq_types.INT, mode=tq_modes.NULLABLE, values=[-2147483649, -0, 2147483648])), ('floats', context.Column(type=tq_types.FLOAT, mode=tq_modes.NULLABLE, values=[1.41, 2.72, float('infinity')])), ('bools', context.Column(type=tq_types.BOOL, mode=tq_modes.NULLABLE, values=[True, False, True])), ('strings', context.Column(type=tq_types.STRING, mode=tq_modes.NULLABLE, values=["infrared", "indigo", "ultraviolet"])), ('times', context.Column(type=tq_types.TIMESTAMP, mode=tq_modes.NULLABLE, values=[ datetime.datetime(1969, 12, 31, 23, 59, 59), datetime.datetime(1999, 12, 31, 23, 59, 59), datetime.datetime(2038, 1, 19, 3, 14, 8)]))])) self.rainbow_table_type_ctx = self.make_type_context( [('rainbow_table', 'ints', tq_types.INT), ('rainbow_table', 'floats', tq_types.FLOAT), ('rainbow_table', 'bools', tq_types.BOOL), ('rainbow_table', 'strings', tq_types.STRING), ('rainbow_table', 'times', tq_types.TIMESTAMP)] ) self.record_table = tinyquery.Table( 'record_table', 0, collections.OrderedDict([ ('r1.i', context.Column(type=tq_types.INT, mode=tq_modes.NULLABLE, values=[])), ('r1.s', context.Column(type=tq_types.STRING, mode=tq_modes.NULLABLE, values=[])), ('r2.i', context.Column(type=tq_types.INT, mode=tq_modes.NULLABLE, values=[])), ]) ) self.record_table_type_ctx = self.make_type_context( [('record_table', 'r1.i', tq_types.INT), ('record_table', 'r1.s', tq_types.STRING), ('record_table', 'r2.i', tq_types.INT)] ) self.tables_by_name = { 'table1': self.table1, 'table2': self.table2, 'table3': self.table3, 'rainbow_table': self.rainbow_table, 'record_table': self.record_table, }