def test_large_compile():
    """
    Tests that compiling a large expression tree finishes
    within a reasonable amount of time
    """
    num_columns = 20
    num_joins = 7

    class MockCloudSpannerClient(cs_compile.CloudSpannerClient):
        def __init__(self):
            pass

    names = [f"col_{i}" for i in range(num_columns)]
    schema = ibis.Schema(names, ["string"] * num_columns)
    ibis_client = MockCloudSpannerClient()
    table = TableExpr(
        ops.SQLQueryResult("select * from t", schema, ibis_client))
    for _ in range(num_joins):
        table = table.mutate(dummy=ibis.literal(""))
        table = table.left_join(table, ["dummy"])[[table]]

    start = datetime.datetime.now()
    cs_compile.compile(table)
    delta = datetime.datetime.now() - start
    assert delta.total_seconds() < 60
Ejemplo n.º 2
0
 def __call__(self, expr: ir.TableExpr) -> ir.TableExpr:
     op = expr.op()
     if isinstance(op, ops.Join):
         scope = {X: op.left, Y: op.right}
     else:
         scope = {X: expr}
     return expr.projection(
         list(
             itertools.chain.from_iterable(
                 ibis.util.promote_list(column.resolve(expr, scope))
                 for column in self.columns)))
Ejemplo n.º 3
0
 def __call__(self, expr: ir.TableExpr) -> ir.TableExpr:
     columns = [
         column.resolve(expr, {
             X: expr
         }).name(name) for name, column in self.mutations.items()
     ]
     return expr.projection(columns)
Ejemplo n.º 4
0
def join(left, right, predicates=(), how='inner'):
    """
    Perform a relational join between two tables. Does not resolve resulting
    table schema.

    Parameters
    ----------
    left : TableExpr
    right : TableExpr
    predicates : join expression(s)
    how : string, default 'inner'
      - 'inner': inner join
      - 'left': left join
      - 'outer': full outer join
      - 'semi' or 'left_semi': left semi join
      - 'anti': anti join

    Returns
    -------
    joined : TableExpr
      Note, schema is not materialized yet
    """
    klass = _join_classes[how.lower()]
    if isinstance(predicates, Expr):
        predicates = _L.unwrap_ands(predicates)

    op = klass(left, right, predicates)
    return TableExpr(op)
Ejemplo n.º 5
0
def filter(table, predicates):
    """
    Select rows from table based on boolean expressions

    Parameters
    ----------
    predicates : boolean array expressions, or list thereof

    Returns
    -------
    filtered_expr : TableExpr
    """
    if isinstance(predicates, Expr):
        predicates = _L.unwrap_ands(predicates)
    predicates = util.promote_list(predicates)

    predicates = [ir.bind_expr(table, x) for x in predicates]

    resolved_predicates = []
    for pred in predicates:
        if isinstance(pred, ir.AnalyticExpr):
            pred = pred.to_filter()
        resolved_predicates.append(pred)

    op = _L.apply_filter(table, resolved_predicates)
    return TableExpr(op)
Ejemplo n.º 6
0
def aggregate(table, metrics=None, by=None, having=None, **kwds):
    """
    Aggregate a table with a given set of reductions, with grouping
    expressions, and post-aggregation filters.

    Parameters
    ----------
    table : table expression
    metrics : expression or expression list
    by : optional, default None
      Grouping expressions
    having : optional, default None
      Post-aggregation filters

    Returns
    -------
    agg_expr : TableExpr
    """
    if metrics is None:
        metrics = []

    for k, v in sorted(kwds.items()):
        v = table._ensure_expr(v)
        metrics.append(v.name(k))

    op = _ops.Aggregation(table, metrics, by=by, having=having)
    return TableExpr(op)
Ejemplo n.º 7
0
def table(schema, name=None):
    """
    Create an unbound Ibis table for creating expressions. Cannot be executed
    without being bound to some physical table.

    Useful for testing

    Parameters
    ----------
    schema : ibis Schema
    name : string, default None
      Name for table

    Returns
    -------
    table : TableExpr
    """
    if not isinstance(schema, Schema):
        if isinstance(schema, list):
            schema = Schema.from_tuples(schema)
        else:
            schema = Schema.from_dict(schema)

    node = _ops.UnboundTable(schema, name=name)
    return TableExpr(node)
Ejemplo n.º 8
0
def _table_materialize(table):
    """
    Force schema resolution for a joined table, selecting all fields from
    all tables.
    """
    if table._is_materialized():
        return table
    else:
        op = _ops.MaterializedJoin(table)
        return TableExpr(op)
Ejemplo n.º 9
0
 def handle_selection(self, ibis_table: TableExpr,
                      columns: List[Value]) -> TableExpr:
     column_mutation = []
     for column in columns:
         if column.get_name() == "*":
             return ibis_table
         column_value = column.get_value().name(column.get_name())
         column_mutation.append(column_value)
     if column_mutation:
         return ibis_table.projection(column_mutation)
     return ibis_table
Ejemplo n.º 10
0
 def union_distinct(
     self,
     expr1: TableExpr,
     expr2: TableExpr,
 ):
     """
     Return union distinct of two TableExpr
     :param expr1: Left TableExpr
     :param expr2: Right TableExpr
     :return:
     """
     return expr1.union(expr2, distinct=True)
Ejemplo n.º 11
0
def flatten(table: ir.TableExpr):
    """Extract all intersection or difference queries from `table`.

    Parameters
    ----------
    table : TableExpr

    Returns
    -------
    Iterable[Union[TableExpr]]
    """
    op = table.op()
    return list(toolz.concatv(flatten_union(op.left), flatten_union(op.right)))
Ejemplo n.º 12
0
def flatten_difference(table: ir.TableExpr):
    """Extract all intersection queries from `table`.

    Parameters
    ----------
    table : TableExpr

    Returns
    -------
    Iterable[Union[TableExpr]]
    """
    op = table.op()
    if isinstance(op, ops.Difference):
        return toolz.concatv(flatten_union(op.left), flatten_union(op.right))
    return [table]
Ejemplo n.º 13
0
def _table_view(self):
    """
    Create a new table expression that is semantically equivalent to the
    current one, but is considered a distinct relation for evaluation
    purposes (e.g. in SQL).

    For doing any self-referencing operations, like a self-join, you will
    use this operation to create a reference to the current table
    expression.

    Returns
    -------
    expr : TableExpr
    """
    return TableExpr(_ops.SelfReference(self))
Ejemplo n.º 14
0
def flatten_union(table: ir.TableExpr):
    """Extract all union queries from `table`.

    Parameters
    ----------
    table : TableExpr

    Returns
    -------
    Iterable[Union[TableExpr, bool]]
    """
    op = table.op()
    if isinstance(op, ops.Union):
        return toolz.concatv(flatten_union(op.left), [op.distinct],
                             flatten_union(op.right))
    return [table]
Ejemplo n.º 15
0
def filter(table, predicates):
    """
    Select rows from table based on boolean expressions

    Parameters
    ----------
    predicates : boolean array expressions, or list thereof

    Returns
    -------
    filtered_expr : TableExpr
    """
    if isinstance(predicates, Expr):
        predicates = _L.unwrap_ands(predicates)

    op = _L.apply_filter(table, predicates)
    return TableExpr(op)
Ejemplo n.º 16
0
def _table_limit(table, n, offset=0):
    """
    Select the first n rows at beginning of table (may not be deterministic
    depending on implementatino and presence of a sorting).

    Parameters
    ----------
    n : int
      Rows to include
    offset : int, default 0
      Number of rows to skip first

    Returns
    -------
    limited : TableExpr
    """
    op = _ops.Limit(table, n, offset=offset)
    return TableExpr(op)
Ejemplo n.º 17
0
def _table_union(left, right, distinct=False):
    """
    Form the table set union of two table expressions having identical
    schemas.

    Parameters
    ----------
    right : TableExpr
    distinct : boolean, default False
        Only union distinct rows not occurring in the calling table (this
        can be very expensive, be careful)

    Returns
    -------
    union : TableExpr
    """
    op = _ops.Union(left, right, distinct=distinct)
    return TableExpr(op)
Ejemplo n.º 18
0
 def handle_filtering(
     self,
     ibis_table: TableExpr,
     where_expr: Tree,
     internal_transformer: InternalTransformer,
 ):
     """
     Returns frame with appropriately selected and named columns
     :param ibis_table: Ibis expression table to manipulate
     :param where_expr: Syntax tree containing where clause
     :param internal_transformer: Transformer to transform the where clauses
     :return: Filtered TableExpr
     """
     if where_expr is not None:
         where_expression: WhereExpression = internal_transformer.transform(
             where_expr)
         return ibis_table.filter(where_expression.value.get_value())
     return ibis_table
Ejemplo n.º 19
0
def projection(table, exprs):
    """
    Compute new table expression with the indicated column expressions from
    this table.

    Parameters
    ----------
    exprs : column expression, or string, or list of column expressions and
      strings. If strings passed, must be columns in the table already

    Returns
    -------
    projection : TableExpr
    """
    import ibis.expr.analysis as L

    if isinstance(exprs, (Expr,) + six.string_types):
        exprs = [exprs]

    exprs = [table._ensure_expr(e) for e in exprs]
    op = L.Projector(table, exprs).get_result()
    return TableExpr(op)
Ejemplo n.º 20
0
def flatten_union(table: ir.TableExpr):
    """Extract all union queries from `table`.

    Parameters
    ----------
    table : TableExpr

    Returns
    -------
    Iterable[Union[TableExpr, bool]]
    """
    op = table.op()
    if isinstance(op, ops.Union):
        # For some reason mypy considers `op.left` and `op.right`
        # of `Argument` type, and fails the validation. While in
        # `flatten` types are the same, and it works
        return toolz.concatv(
            flatten_union(op.left),  # type: ignore
            [op.distinct],
            flatten_union(op.right),  # type: ignore
        )
    return [table]
Ejemplo n.º 21
0
def _table_sort_by(table, sort_exprs):
    """
    Sort table by the indicated column expressions and sort orders
    (ascending/descending)

    Parameters
    ----------
    sort_exprs : sorting expressions
      Must be one of:
        - Column name or expression
        - Sort key, e.g. desc(col)
        - (column name, True (ascending) / False (descending))

    Examples
    --------
    sorted = table.sort_by([('a', True), ('b', False)])

    Returns
    -------
    sorted : TableExpr
    """
    op = _ops.SortBy(table, sort_exprs)
    return TableExpr(op)
Ejemplo n.º 22
0
def cross_join(*args, **kwargs):
    """
    Perform a cross join (cartesian product) amongst a list of tables, with
    optional set of prefixes to apply to overlapping column names

    Parameters
    ----------
    positional args: tables to join
    prefixes keyword : prefixes for each table
      Not yet implemented

    Examples
    --------
    >>> joined1 = ibis.cross_join(a, b, c, d, e)
    >>> joined2 = ibis.cross_join(a, b, c, prefixes=['a_', 'b_', 'c_']))

    Returns
    -------
    joined : TableExpr
      If prefixes not provided, the result schema is not yet materialized
    """
    op = _ops.CrossJoin(*args, **kwargs)
    return TableExpr(op)
Ejemplo n.º 23
0
def get_columns_with_alias(table: TableExpr, alias: str):
    return [
        column.name(f"{alias}.{column_name}")
        for column_name, column in zip(table.columns, table.get_columns(table.columns))
    ]
Ejemplo n.º 24
0
 def _handle_count_star(self, aggregate: Aggregate, relation: TableExpr):
     if isinstance(aggregate.value, CountStar):
         aggregate.value = relation.count()
     return aggregate
Ejemplo n.º 25
0
 def __call__(self, left: ir.TableExpr) -> ir.TableExpr:
     right = self.right
     on = self.on.resolve(left, {X: left, Y: right})
     return left.join(right, on, how=self.how)
Ejemplo n.º 26
0
 def __call__(self, expr: ir.TableExpr) -> ir.TableExpr:
     return expr.sort_by(
         list(
             itertools.chain.from_iterable(
                 ibis.util.promote_list(key.resolve(expr, {X: expr}))
                 for key in self.sort_keys)))
Ejemplo n.º 27
0
 def resolve(self, expr: ir.TableExpr, scope: Scope) -> ir.TableExpr:
     return expr.head(self.expr.resolve(expr, scope))
Ejemplo n.º 28
0
 def __call__(self, expr: ir.TableExpr) -> ir.TableExpr:
     return expr.mutate(
         **{
             name: column.resolve(expr, {X: expr})
             for name, column in self.mutations.items()
         })
Ejemplo n.º 29
0
    def handle_aggregation(
        self,
        aggregates: Dict[str, Aggregate],
        group_columns: List[GroupByColumn],
        table: TableExpr,
        having_expr: Tree,
        internal_transformer: InternalTransformer,
        selected_columns: List[Value],
    ):
        """
        Handles all aggregation operations when translating from dictionary info
        to dataframe
        """
        selected_column_names = {
            column.get_name().lower()
            for column in selected_columns
        }
        aggregate_ibis_columns = self._get_aggregate_ibis_columns(
            aggregates, table)
        having = self._handle_having_expressions(
            having_expr,
            internal_transformer,
            table,
            aggregates,
            [group_column.get_name() for group_column in group_columns],
        )

        if group_columns and not selected_column_names:
            for group_column in group_columns:
                group_column.set_ibis_name_to_name()

        if group_columns and having is not None and not aggregates:
            raise NotImplementedError(
                "Group by, having, without aggregation not yet implemented")
        if group_columns and not aggregates:
            for column in [
                    selected_column.get_name()
                    for selected_column in selected_columns
            ]:
                if column not in group_columns:
                    raise InvalidQueryException(
                        self.format_column_needs_agg_or_group_msg(column))
            table = table.distinct()
        elif aggregates and not group_columns:
            table = table.aggregate(aggregate_ibis_columns, having=having)
        elif aggregates and group_columns:
            table = table.group_by(
                [group_column.value for group_column in group_columns])
            if having is not None:
                table = table.having(having)
            table = table.aggregate(aggregate_ibis_columns)

        non_selected_columns = []
        if group_columns and aggregates:
            for group_column in group_columns:
                if group_column.get_name().lower(
                ) not in selected_column_names:
                    non_selected_columns.append(group_column.group_by_name)
            if non_selected_columns:
                table = table.drop(non_selected_columns)

        return table
Ejemplo n.º 30
0
def _get_all_columns(table: TableExpr):
    return table.get_columns(table.columns)