コード例 #1
0
def test_large_compile():
    """
    Tests that compiling a large expression tree finishes
    within a reasonable amount of time
    """
    num_columns = 20
    num_joins = 7

    class MockBigQueryClient(bq.BigQueryClient):
        def __init__(self):
            pass

    names = [f"col_{i}" for i in range(num_columns)]
    schema = ibis.Schema(names, ['string'] * num_columns)
    ibis_client = MockBigQueryClient()
    table = TableExpr(
        ops.SQLQueryResult("select * from t", schema, ibis_client)
    )
    for _ in range(num_joins):
        table = table.mutate(dummy=ibis.literal(""))
        table = table.left_join(table, ["dummy"])[[table]]

    start = datetime.datetime.now()
    table.compile()
    delta = datetime.datetime.now() - start
    assert delta.total_seconds() < 10
コード例 #2
0
def filter(table, predicates):
    """
    Select rows from table based on boolean expressions

    Parameters
    ----------
    predicates : boolean array expressions, or list thereof

    Returns
    -------
    filtered_expr : TableExpr
    """
    if isinstance(predicates, Expr):
        predicates = _L.unwrap_ands(predicates)
    predicates = util.promote_list(predicates)

    predicates = [ir.bind_expr(table, x) for x in predicates]

    resolved_predicates = []
    for pred in predicates:
        if isinstance(pred, ir.AnalyticExpr):
            pred = pred.to_filter()
        resolved_predicates.append(pred)

    op = _L.apply_filter(table, resolved_predicates)
    return TableExpr(op)
コード例 #3
0
def table(schema, name=None):
    """
    Create an unbound Ibis table for creating expressions. Cannot be executed
    without being bound to some physical table.

    Useful for testing

    Parameters
    ----------
    schema : ibis Schema
    name : string, default None
      Name for table

    Returns
    -------
    table : TableExpr
    """
    if not isinstance(schema, Schema):
        if isinstance(schema, list):
            schema = Schema.from_tuples(schema)
        else:
            schema = Schema.from_dict(schema)

    node = _ops.UnboundTable(schema, name=name)
    return TableExpr(node)
コード例 #4
0
def aggregate(table, metrics=None, by=None, having=None, **kwds):
    """
    Aggregate a table with a given set of reductions, with grouping
    expressions, and post-aggregation filters.

    Parameters
    ----------
    table : table expression
    metrics : expression or expression list
    by : optional, default None
      Grouping expressions
    having : optional, default None
      Post-aggregation filters

    Returns
    -------
    agg_expr : TableExpr
    """
    if metrics is None:
        metrics = []

    for k, v in sorted(kwds.items()):
        v = table._ensure_expr(v)
        metrics.append(v.name(k))

    op = _ops.Aggregation(table, metrics, by=by, having=having)
    return TableExpr(op)
コード例 #5
0
def join(left, right, predicates=(), how='inner'):
    """
    Perform a relational join between two tables. Does not resolve resulting
    table schema.

    Parameters
    ----------
    left : TableExpr
    right : TableExpr
    predicates : join expression(s)
    how : string, default 'inner'
      - 'inner': inner join
      - 'left': left join
      - 'outer': full outer join
      - 'semi' or 'left_semi': left semi join
      - 'anti': anti join

    Returns
    -------
    joined : TableExpr
      Note, schema is not materialized yet
    """
    klass = _join_classes[how.lower()]
    if isinstance(predicates, Expr):
        predicates = _L.unwrap_ands(predicates)

    op = klass(left, right, predicates)
    return TableExpr(op)
コード例 #6
0
ファイル: api.py プロジェクト: keflavich/ibis
def _table_materialize(table):
    """
    Force schema resolution for a joined table, selecting all fields from
    all tables.
    """
    if table._is_materialized():
        return table
    else:
        op = _ops.MaterializedJoin(table)
        return TableExpr(op)
コード例 #7
0
ファイル: api.py プロジェクト: keflavich/ibis
def _table_view(self):
    """
    Create a new table expression that is semantically equivalent to the
    current one, but is considered a distinct relation for evaluation
    purposes (e.g. in SQL).

    For doing any self-referencing operations, like a self-join, you will
    use this operation to create a reference to the current table
    expression.

    Returns
    -------
    expr : TableExpr
    """
    return TableExpr(_ops.SelfReference(self))
コード例 #8
0
def filter(table, predicates):
    """
    Select rows from table based on boolean expressions

    Parameters
    ----------
    predicates : boolean array expressions, or list thereof

    Returns
    -------
    filtered_expr : TableExpr
    """
    if isinstance(predicates, Expr):
        predicates = _L.unwrap_ands(predicates)

    op = _L.apply_filter(table, predicates)
    return TableExpr(op)
コード例 #9
0
def _table_union(left, right, distinct=False):
    """
    Form the table set union of two table expressions having identical
    schemas.

    Parameters
    ----------
    right : TableExpr
    distinct : boolean, default False
        Only union distinct rows not occurring in the calling table (this
        can be very expensive, be careful)

    Returns
    -------
    union : TableExpr
    """
    op = _ops.Union(left, right, distinct=distinct)
    return TableExpr(op)
コード例 #10
0
def _table_limit(table, n, offset=0):
    """
    Select the first n rows at beginning of table (may not be deterministic
    depending on implementatino and presence of a sorting).

    Parameters
    ----------
    n : int
      Rows to include
    offset : int, default 0
      Number of rows to skip first

    Returns
    -------
    limited : TableExpr
    """
    op = _ops.Limit(table, n, offset=offset)
    return TableExpr(op)
コード例 #11
0
ファイル: api.py プロジェクト: keflavich/ibis
def projection(table, exprs):
    """
    Compute new table expression with the indicated column expressions from
    this table.

    Parameters
    ----------
    exprs : column expression, or string, or list of column expressions and
      strings. If strings passed, must be columns in the table already

    Returns
    -------
    projection : TableExpr
    """
    import ibis.expr.analysis as L

    if isinstance(exprs, (Expr,) + six.string_types):
        exprs = [exprs]

    exprs = [table._ensure_expr(e) for e in exprs]
    op = L.Projector(table, exprs).get_result()
    return TableExpr(op)
コード例 #12
0
def _table_sort_by(table, sort_exprs):
    """
    Sort table by the indicated column expressions and sort orders
    (ascending/descending)

    Parameters
    ----------
    sort_exprs : sorting expressions
      Must be one of:
        - Column name or expression
        - Sort key, e.g. desc(col)
        - (column name, True (ascending) / False (descending))

    Examples
    --------
    sorted = table.sort_by([('a', True), ('b', False)])

    Returns
    -------
    sorted : TableExpr
    """
    op = _ops.SortBy(table, sort_exprs)
    return TableExpr(op)
コード例 #13
0
def cross_join(*args, **kwargs):
    """
    Perform a cross join (cartesian product) amongst a list of tables, with
    optional set of prefixes to apply to overlapping column names

    Parameters
    ----------
    positional args: tables to join
    prefixes keyword : prefixes for each table
      Not yet implemented

    Examples
    --------
    >>> joined1 = ibis.cross_join(a, b, c, d, e)
    >>> joined2 = ibis.cross_join(a, b, c, prefixes=['a_', 'b_', 'c_']))

    Returns
    -------
    joined : TableExpr
      If prefixes not provided, the result schema is not yet materialized
    """
    op = _ops.CrossJoin(*args, **kwargs)
    return TableExpr(op)
コード例 #14
0
def cross_join(left, right, prefixes=None):
    """

    """
    op = _ops.CrossJoin(left, right)
    return TableExpr(op)