Beispiel #1
0
def aggregate_options_map(options_map, other_options_map):
    """Aggregate some of the attributes of options map.

    Args:
      options_map: The target map in which we want to aggregate attributes.
        Note: This value is mutated in-place.
      other_options_map: A list of other options maps, some of whose values
        we'd like to see aggregated.
    """
    options_map = copy.copy(options_map)

    currencies = list(options_map["operating_currency"])
    for omap in other_options_map:
        currencies.extend(omap["operating_currency"])
    options_map["operating_currency"] = list(misc_utils.uniquify(currencies))

    # Produce a 'pythonpath' value for transformers.
    pythonpath = set()
    for omap in itertools.chain((options_map, ), other_options_map):
        if omap.get("insert_pythonpath", False):
            pythonpath.add(path.dirname(omap["filename"]))
    options_map["pythonpath"] = sorted(pythonpath)

    return options_map
Beispiel #2
0
 def test_uniquify_last(self):
     data = [('d', 9), ('b', 4), ('c', 8), ('c', 6), ('c', 7), ('a', 3),
             ('a', 1), ('a', 2), ('b', 5)]
     unique_data = misc_utils.uniquify(data, lambda x: x[0], last=True)
     self.assertEqual([('d', 9), ('c', 7), ('a', 2), ('b', 5)],
                      list(unique_data))
Beispiel #3
0
def execute_select(query, entries, options_map):
    """Given a compiled select statement, execute the query.

    Args:
      query: An instance of a query_compile.Query
      entries: A list of directives.
      options_map: A parser's option_map.
    Returns:
      A pair of:
        result_types: A list of (name, data-type) item pairs.
        result_rows: A list of ResultRow tuples of length and types described by
          'result_types'.
    """
    # Figure out the result types that describe what we return.
    result_types = [Column(target.name, target.c_expr.dtype)
                    for target in query.c_targets
                    if target.name is not None]

    # Pre-compute lists of the expressions to evaluate.
    group_indexes = (set(query.group_indexes)
                     if query.group_indexes is not None
                     else query.group_indexes)

    # Indexes of the columns for result rows and order rows.
    result_indexes = [index
                      for index, c_target in enumerate(query.c_targets)
                      if c_target.name]
    order_spec = query.order_spec

    # Figure out if we need to compute balance.
    uses_balance = any(uses_balance_column(c_expr)
                       for c_expr in itertools.chain(
                               [c_target.c_expr for c_target in query.c_targets],
                               [query.c_where] if query.c_where else []))

    context = create_row_context(entries, options_map)

    # Filter the entries using the FROM clause.
    filt_entries = (filter_entries(query.c_from, entries, options_map, context)
                    if query.c_from is not None else
                    entries)

    # Dispatch between the non-aggregated queries and aggregated queries.
    c_where = query.c_where
    rows = []

    # Precompute a list of expressions to be evaluated.
    c_target_exprs = [c_target.c_expr for c_target in query.c_targets]

    if query.group_indexes is None:
        # This is a non-aggregated query.

        # Iterate over all the postings once.
        for entry in misc_utils.filter_type(filt_entries, data.Transaction):
            context.entry = entry
            for posting in entry.postings:
                context.posting = posting
                if c_where is None or c_where(context):
                    # Compute the balance.
                    if uses_balance:
                        context.balance.add_position(posting)

                    # Evaluate all the values.
                    values = [c_expr(context) for c_expr in c_target_exprs]

                    rows.append(values)
    else:
        # This is an aggregated query.

        # Precompute lists of non-aggregate and aggregate expressions to
        # evaluate. For aggregate targets, we hunt down the aggregate
        # sub-expressions to evaluate, to avoid recursion during iteration.
        c_nonaggregate_exprs = []
        c_aggregate_exprs = []
        for index, c_expr in enumerate(c_target_exprs):
            if index in group_indexes:
                c_nonaggregate_exprs.append(c_expr)
            else:
                _, aggregate_exprs = query_compile.get_columns_and_aggregates(c_expr)
                c_aggregate_exprs.extend(aggregate_exprs)
        # Note: it is possible that there are no aggregates to compute here. You could
        # have all columns be non-aggregates and group-by the entire list of columns.

        # Pre-allocate handles in aggregation nodes.
        allocator = Allocator()
        for c_expr in c_aggregate_exprs:
            c_expr.allocate(allocator)

        # Iterate over all the postings to evaluate the aggregates.
        agg_store = {}
        for entry in misc_utils.filter_type(filt_entries, data.Transaction):
            context.entry = entry
            for posting in entry.postings:
                context.posting = posting
                if c_where is None or c_where(context):
                    # Compute the balance.
                    if uses_balance:
                        context.balance.add_position(posting)

                    # Compute the non-aggregate expressions.
                    row_key = tuple(c_expr(context)
                                    for c_expr in c_nonaggregate_exprs)

                    # Get an appropriate store for the unique key of this row.
                    try:
                        store = agg_store[row_key]
                    except KeyError:
                        # This is a row; create a new store.
                        store = allocator.create_store()
                        for c_expr in c_aggregate_exprs:
                            c_expr.initialize(store)
                        agg_store[row_key] = store

                    # Update the aggregate expressions.
                    for c_expr in c_aggregate_exprs:
                        c_expr.update(store, context)

        # Iterate over all the aggregations.
        for key, store in agg_store.items():
            key_iter = iter(key)
            values = []

            # Finalize the store.
            for c_expr in c_aggregate_exprs:
                c_expr.finalize(store)
            context.store = store

            for index, c_expr in enumerate(c_target_exprs):
                if index in group_indexes:
                    value = next(key_iter)
                else:
                    value = c_expr(context)
                values.append(value)

            # Skip row if HAVING clause expression is false.
            if query.having_index is not None:
                if not values[query.having_index]:
                    continue

            rows.append(values)

    # Order results if requested.
    if order_spec is not None:
        # Process the order-by clauses grouped by their ordering direction.
        for reverse, spec in itertools.groupby(reversed(order_spec), key=operator.itemgetter(1)):
            indexes = reversed([i[0] for i in spec])
            # The rows may contain None values: nullitemgetter()
            # replaces these with a special value that compares
            # smaller than anything else.
            rows.sort(key=nullitemgetter(*indexes), reverse=reverse)

    # Convert results into list of tuples.
    rows = [tuple(row[i] for i in result_indexes) for row in rows]

    # Apply distinct.
    if query.distinct:
        rows = list(misc_utils.uniquify(rows))

    # Apply limit.
    if query.limit is not None:
        rows = rows[:query.limit]

    return result_types, rows
Beispiel #4
0
def execute_query(query, entries, options_map):
    """Given a compiled select statement, execute the query.

    Args:
      query: An instance of a query_compile.Query
      entries: A list of directives.
      options_map: A parser's option_map.
    Returns:
      A pair of:
        result_types: A list of (name, data-type) item pairs.
        result_rows: A list of ResultRow tuples of length and types described by
          'result_types'.
    """
    # Figure out the result types that describe what we return.
    result_types = [(target.name, target.c_expr.dtype)
                    for target in query.c_targets if target.name is not None]

    # Create a class for each final result.
    # pylint: disable=invalid-name
    ResultRow = collections.namedtuple(
        'ResultRow',
        [target.name for target in query.c_targets if target.name is not None])

    # Pre-compute lists of the expressions to evaluate.
    group_indexes = (set(query.group_indexes) if query.group_indexes
                     is not None else query.group_indexes)

    # Indexes of the columns for result rows and order rows.
    result_indexes = [
        index for index, c_target in enumerate(query.c_targets)
        if c_target.name
    ]
    order_indexes = query.order_indexes

    # Figure out if we need to compute balance.
    uses_balance = any(
        uses_balance_column(c_expr) for c_expr in
        itertools.chain([c_target.c_expr for c_target in query.c_targets],
                        [query.c_where] if query.c_where else []))

    context = create_row_context(entries, options_map)

    # Filter the entries using the FROM clause.
    filt_entries = (filter_entries(query.c_from, entries, options_map, context)
                    if query.c_from is not None else entries)

    # Dispatch between the non-aggregated queries and aggregated queries.
    c_where = query.c_where
    schwartz_rows = []

    # Precompute a list of expressions to be evaluated.
    c_target_exprs = [c_target.c_expr for c_target in query.c_targets]

    if query.group_indexes is None:
        # This is a non-aggregated query.

        # Iterate over all the postings once and produce schwartzian rows.
        for entry in misc_utils.filter_type(filt_entries, data.Transaction):
            context.entry = entry
            for posting in entry.postings:
                context.posting = posting
                if c_where is None or c_where(context):
                    # Compute the balance.
                    if uses_balance:
                        context.balance.add_position(posting)

                    # Evaluate all the values.
                    values = [c_expr(context) for c_expr in c_target_exprs]

                    # Compute result and sort-key objects.
                    result = ResultRow._make(values[index]
                                             for index in result_indexes)
                    sortkey = row_sortkey(order_indexes, values,
                                          c_target_exprs)
                    schwartz_rows.append((sortkey, result))
    else:
        # This is an aggregated query.

        # Precompute lists of non-aggregate and aggregate expressions to
        # evaluate. For aggregate targets, we hunt down the aggregate
        # sub-expressions to evaluate, to avoid recursion during iteration.
        c_nonaggregate_exprs = []
        c_aggregate_exprs = []
        for index, c_expr in enumerate(c_target_exprs):
            if index in group_indexes:
                c_nonaggregate_exprs.append(c_expr)
            else:
                _, aggregate_exprs = query_compile.get_columns_and_aggregates(
                    c_expr)
                c_aggregate_exprs.extend(aggregate_exprs)
        # Note: it is possible that there are no aggregates to compute here. You could
        # have all columns be non-aggregates and group-by the entire list of columns.

        # Pre-allocate handles in aggregation nodes.
        allocator = Allocator()
        for c_expr in c_aggregate_exprs:
            c_expr.allocate(allocator)

        # Iterate over all the postings to evaluate the aggregates.
        agg_store = {}
        for entry in misc_utils.filter_type(filt_entries, data.Transaction):
            context.entry = entry
            for posting in entry.postings:
                context.posting = posting
                if c_where is None or c_where(context):
                    # Compute the balance.
                    if uses_balance:
                        context.balance.add_position(posting)

                    # Compute the non-aggregate expressions.
                    row_key = tuple(
                        c_expr(context) for c_expr in c_nonaggregate_exprs)

                    # Get an appropriate store for the unique key of this row.
                    try:
                        store = agg_store[row_key]
                    except KeyError:
                        # This is a row; create a new store.
                        store = allocator.create_store()
                        for c_expr in c_aggregate_exprs:
                            c_expr.initialize(store)
                        agg_store[row_key] = store

                    # Update the aggregate expressions.
                    for c_expr in c_aggregate_exprs:
                        c_expr.update(store, context)

        # Iterate over all the aggregations to produce the schwartzian rows.
        for key, store in agg_store.items():
            key_iter = iter(key)
            values = []

            # Finalize the store.
            for c_expr in c_aggregate_exprs:
                c_expr.finalize(store)
            context.store = store

            for index, c_expr in enumerate(c_target_exprs):
                if index in group_indexes:
                    value = next(key_iter)
                else:
                    value = c_expr(context)
                values.append(value)

            # Compute result and sort-key objects.
            result = ResultRow._make(values[index] for index in result_indexes)
            sortkey = row_sortkey(order_indexes, values, c_target_exprs)
            schwartz_rows.append((sortkey, result))

    # Order results if requested.
    if order_indexes is not None:
        schwartz_rows.sort(key=operator.itemgetter(0),
                           reverse=(query.ordering == 'DESC'))

    # Extract final results, in sorted order at this point.
    result_rows = [x[1] for x in schwartz_rows]

    # Apply distinct.
    if query.distinct:
        result_rows = list(misc_utils.uniquify(result_rows))

    # Apply limit.
    if query.limit is not None:
        result_rows = result_rows[:query.limit]

    # Flatten inventories if requested.
    if query.flatten:
        result_types, result_rows = flatten_results(result_types, result_rows)

    return (result_types, result_rows)