def test_large_compile(): """ Tests that compiling a large expression tree finishes within a reasonable amount of time """ num_columns = 20 num_joins = 7 class MockBigQueryClient(bq.BigQueryClient): def __init__(self): pass names = [f"col_{i}" for i in range(num_columns)] schema = ibis.Schema(names, ['string'] * num_columns) ibis_client = MockBigQueryClient() table = TableExpr( ops.SQLQueryResult("select * from t", schema, ibis_client) ) for _ in range(num_joins): table = table.mutate(dummy=ibis.literal("")) table = table.left_join(table, ["dummy"])[[table]] start = datetime.datetime.now() table.compile() delta = datetime.datetime.now() - start assert delta.total_seconds() < 10
def filter(table, predicates): """ Select rows from table based on boolean expressions Parameters ---------- predicates : boolean array expressions, or list thereof Returns ------- filtered_expr : TableExpr """ if isinstance(predicates, Expr): predicates = _L.unwrap_ands(predicates) predicates = util.promote_list(predicates) predicates = [ir.bind_expr(table, x) for x in predicates] resolved_predicates = [] for pred in predicates: if isinstance(pred, ir.AnalyticExpr): pred = pred.to_filter() resolved_predicates.append(pred) op = _L.apply_filter(table, resolved_predicates) return TableExpr(op)
def table(schema, name=None): """ Create an unbound Ibis table for creating expressions. Cannot be executed without being bound to some physical table. Useful for testing Parameters ---------- schema : ibis Schema name : string, default None Name for table Returns ------- table : TableExpr """ if not isinstance(schema, Schema): if isinstance(schema, list): schema = Schema.from_tuples(schema) else: schema = Schema.from_dict(schema) node = _ops.UnboundTable(schema, name=name) return TableExpr(node)
def aggregate(table, metrics=None, by=None, having=None, **kwds): """ Aggregate a table with a given set of reductions, with grouping expressions, and post-aggregation filters. Parameters ---------- table : table expression metrics : expression or expression list by : optional, default None Grouping expressions having : optional, default None Post-aggregation filters Returns ------- agg_expr : TableExpr """ if metrics is None: metrics = [] for k, v in sorted(kwds.items()): v = table._ensure_expr(v) metrics.append(v.name(k)) op = _ops.Aggregation(table, metrics, by=by, having=having) return TableExpr(op)
def join(left, right, predicates=(), how='inner'): """ Perform a relational join between two tables. Does not resolve resulting table schema. Parameters ---------- left : TableExpr right : TableExpr predicates : join expression(s) how : string, default 'inner' - 'inner': inner join - 'left': left join - 'outer': full outer join - 'semi' or 'left_semi': left semi join - 'anti': anti join Returns ------- joined : TableExpr Note, schema is not materialized yet """ klass = _join_classes[how.lower()] if isinstance(predicates, Expr): predicates = _L.unwrap_ands(predicates) op = klass(left, right, predicates) return TableExpr(op)
def _table_materialize(table): """ Force schema resolution for a joined table, selecting all fields from all tables. """ if table._is_materialized(): return table else: op = _ops.MaterializedJoin(table) return TableExpr(op)
def _table_view(self): """ Create a new table expression that is semantically equivalent to the current one, but is considered a distinct relation for evaluation purposes (e.g. in SQL). For doing any self-referencing operations, like a self-join, you will use this operation to create a reference to the current table expression. Returns ------- expr : TableExpr """ return TableExpr(_ops.SelfReference(self))
def filter(table, predicates): """ Select rows from table based on boolean expressions Parameters ---------- predicates : boolean array expressions, or list thereof Returns ------- filtered_expr : TableExpr """ if isinstance(predicates, Expr): predicates = _L.unwrap_ands(predicates) op = _L.apply_filter(table, predicates) return TableExpr(op)
def _table_union(left, right, distinct=False): """ Form the table set union of two table expressions having identical schemas. Parameters ---------- right : TableExpr distinct : boolean, default False Only union distinct rows not occurring in the calling table (this can be very expensive, be careful) Returns ------- union : TableExpr """ op = _ops.Union(left, right, distinct=distinct) return TableExpr(op)
def _table_limit(table, n, offset=0): """ Select the first n rows at beginning of table (may not be deterministic depending on implementatino and presence of a sorting). Parameters ---------- n : int Rows to include offset : int, default 0 Number of rows to skip first Returns ------- limited : TableExpr """ op = _ops.Limit(table, n, offset=offset) return TableExpr(op)
def projection(table, exprs): """ Compute new table expression with the indicated column expressions from this table. Parameters ---------- exprs : column expression, or string, or list of column expressions and strings. If strings passed, must be columns in the table already Returns ------- projection : TableExpr """ import ibis.expr.analysis as L if isinstance(exprs, (Expr,) + six.string_types): exprs = [exprs] exprs = [table._ensure_expr(e) for e in exprs] op = L.Projector(table, exprs).get_result() return TableExpr(op)
def _table_sort_by(table, sort_exprs): """ Sort table by the indicated column expressions and sort orders (ascending/descending) Parameters ---------- sort_exprs : sorting expressions Must be one of: - Column name or expression - Sort key, e.g. desc(col) - (column name, True (ascending) / False (descending)) Examples -------- sorted = table.sort_by([('a', True), ('b', False)]) Returns ------- sorted : TableExpr """ op = _ops.SortBy(table, sort_exprs) return TableExpr(op)
def cross_join(*args, **kwargs): """ Perform a cross join (cartesian product) amongst a list of tables, with optional set of prefixes to apply to overlapping column names Parameters ---------- positional args: tables to join prefixes keyword : prefixes for each table Not yet implemented Examples -------- >>> joined1 = ibis.cross_join(a, b, c, d, e) >>> joined2 = ibis.cross_join(a, b, c, prefixes=['a_', 'b_', 'c_'])) Returns ------- joined : TableExpr If prefixes not provided, the result schema is not yet materialized """ op = _ops.CrossJoin(*args, **kwargs) return TableExpr(op)
def cross_join(left, right, prefixes=None): """ """ op = _ops.CrossJoin(left, right) return TableExpr(op)