def test_is_aggregate_derived(self): columns, aggregates = qc.get_columns_and_aggregates( qc.EvalAnd( qc.EvalEqual(qe.PositionColumn(), qc.EvalConstant(42)), qc.EvalOr( qc.EvalNot(qc.EvalEqual(qe.DateColumn(), qc.EvalConstant(datetime.date(2014, 1, 1)))), qc.EvalConstant(False)))) self.assertEqual((2, 0), (len(columns), len(aggregates))) columns, aggregates = qc.get_columns_and_aggregates( qc.EvalAnd( qc.EvalEqual(qe.PositionColumn(), qc.EvalConstant(42)), qc.EvalOr( qc.EvalNot(qc.EvalEqual(qe.DateColumn(), qc.EvalConstant(datetime.date(2014, 1, 1)))), # Aggregation node deep in the tree. qe.Sum([qc.EvalConstant(1)])))) self.assertEqual((2, 1), (len(columns), len(aggregates)))
def test_get_columns_and_aggregates(self): # Simple column. c_query = qe.PositionColumn() columns, aggregates = qc.get_columns_and_aggregates(c_query) self.assertEqual((1, 0), (len(columns), len(aggregates))) self.assertFalse(qc.is_aggregate(c_query)) # Multiple columns. c_query = qc.EvalAnd(qe.PositionColumn(), qe.DateColumn()) columns, aggregates = qc.get_columns_and_aggregates(c_query) self.assertEqual((2, 0), (len(columns), len(aggregates))) self.assertFalse(qc.is_aggregate(c_query)) # Simple aggregate. c_query = qe.SumPosition([qe.PositionColumn()]) columns, aggregates = qc.get_columns_and_aggregates(c_query) self.assertEqual((0, 1), (len(columns), len(aggregates))) self.assertTrue(qc.is_aggregate(c_query)) # Multiple aggregates. c_query = qc.EvalAnd(qe.First([qe.AccountColumn()]), qe.Last([qe.AccountColumn()])) columns, aggregates = qc.get_columns_and_aggregates(c_query) self.assertEqual((0, 2), (len(columns), len(aggregates))) self.assertTrue(qc.is_aggregate(c_query)) # Simple non-aggregate function. c_query = qe.Length([qe.AccountColumn()]) columns, aggregates = qc.get_columns_and_aggregates(c_query) self.assertEqual((1, 0), (len(columns), len(aggregates))) self.assertFalse(qc.is_aggregate(c_query)) # Mix of column and aggregates (this is used to detect this illegal case). c_query = qc.EvalAnd(qe.Length([qe.AccountColumn()]), qe.SumPosition([qe.PositionColumn()])) columns, aggregates = qc.get_columns_and_aggregates(c_query) self.assertEqual((1, 1), (len(columns), len(aggregates))) self.assertTrue(qc.is_aggregate(c_query))
def execute_query(query, entries, options_map): """Given a compiled select statement, execute the query. Args: query: An instance of a query_compile.Query entries: A list of directives. options_map: A parser's option_map. Returns: A pair of: result_types: A list of (name, data-type) item pairs. result_rows: A list of ResultRow tuples of length and types described by 'result_types'. """ # Figure out the result types that describe what we return. result_types = [(target.name, target.c_expr.dtype) for target in query.c_targets if target.name is not None] # Create a class for each final result. # pylint: disable=invalid-name ResultRow = collections.namedtuple( 'ResultRow', [target.name for target in query.c_targets if target.name is not None]) # Pre-compute lists of the expressions to evaluate. group_indexes = (set(query.group_indexes) if query.group_indexes is not None else query.group_indexes) # Indexes of the columns for result rows and order rows. result_indexes = [ index for index, c_target in enumerate(query.c_targets) if c_target.name ] order_indexes = query.order_indexes # Figure out if we need to compute balance. uses_balance = any( uses_balance_column(c_expr) for c_expr in itertools.chain([c_target.c_expr for c_target in query.c_targets], [query.c_where] if query.c_where else [])) context = create_row_context(entries, options_map) # Filter the entries using the FROM clause. filt_entries = (filter_entries(query.c_from, entries, options_map, context) if query.c_from is not None else entries) # Dispatch between the non-aggregated queries and aggregated queries. c_where = query.c_where schwartz_rows = [] # Precompute a list of expressions to be evaluated. c_target_exprs = [c_target.c_expr for c_target in query.c_targets] if query.group_indexes is None: # This is a non-aggregated query. # Iterate over all the postings once and produce schwartzian rows. for entry in misc_utils.filter_type(filt_entries, data.Transaction): context.entry = entry for posting in entry.postings: context.posting = posting if c_where is None or c_where(context): # Compute the balance. if uses_balance: context.balance.add_position(posting) # Evaluate all the values. values = [c_expr(context) for c_expr in c_target_exprs] # Compute result and sort-key objects. result = ResultRow._make(values[index] for index in result_indexes) sortkey = row_sortkey(order_indexes, values, c_target_exprs) schwartz_rows.append((sortkey, result)) else: # This is an aggregated query. # Precompute lists of non-aggregate and aggregate expressions to # evaluate. For aggregate targets, we hunt down the aggregate # sub-expressions to evaluate, to avoid recursion during iteration. c_nonaggregate_exprs = [] c_aggregate_exprs = [] for index, c_expr in enumerate(c_target_exprs): if index in group_indexes: c_nonaggregate_exprs.append(c_expr) else: _, aggregate_exprs = query_compile.get_columns_and_aggregates( c_expr) c_aggregate_exprs.extend(aggregate_exprs) # Note: it is possible that there are no aggregates to compute here. You could # have all columns be non-aggregates and group-by the entire list of columns. # Pre-allocate handles in aggregation nodes. allocator = Allocator() for c_expr in c_aggregate_exprs: c_expr.allocate(allocator) # Iterate over all the postings to evaluate the aggregates. agg_store = {} for entry in misc_utils.filter_type(filt_entries, data.Transaction): context.entry = entry for posting in entry.postings: context.posting = posting if c_where is None or c_where(context): # Compute the balance. if uses_balance: context.balance.add_position(posting) # Compute the non-aggregate expressions. row_key = tuple( c_expr(context) for c_expr in c_nonaggregate_exprs) # Get an appropriate store for the unique key of this row. try: store = agg_store[row_key] except KeyError: # This is a row; create a new store. store = allocator.create_store() for c_expr in c_aggregate_exprs: c_expr.initialize(store) agg_store[row_key] = store # Update the aggregate expressions. for c_expr in c_aggregate_exprs: c_expr.update(store, context) # Iterate over all the aggregations to produce the schwartzian rows. for key, store in agg_store.items(): key_iter = iter(key) values = [] # Finalize the store. for c_expr in c_aggregate_exprs: c_expr.finalize(store) context.store = store for index, c_expr in enumerate(c_target_exprs): if index in group_indexes: value = next(key_iter) else: value = c_expr(context) values.append(value) # Compute result and sort-key objects. result = ResultRow._make(values[index] for index in result_indexes) sortkey = row_sortkey(order_indexes, values, c_target_exprs) schwartz_rows.append((sortkey, result)) # Order results if requested. if order_indexes is not None: schwartz_rows.sort(key=operator.itemgetter(0), reverse=(query.ordering == 'DESC')) # Extract final results, in sorted order at this point. result_rows = [x[1] for x in schwartz_rows] # Apply distinct. if query.distinct: result_rows = list(misc_utils.uniquify(result_rows)) # Apply limit. if query.limit is not None: result_rows = result_rows[:query.limit] # Flatten inventories if requested. if query.flatten: result_types, result_rows = flatten_results(result_types, result_rows) return (result_types, result_rows)