Beispiel #1
0
 def execute(
     self,
     expr: ir.Expr,
     params: Mapping[ir.Expr, object] = None,
     limit: str = 'default',
     **kwargs: Any,
 ):
     if isinstance(expr, ir.TableExpr):
         frame = self.compile(expr, params, **kwargs)
         table = _to_pyarrow_table(frame)
         return table.to_pandas()
     elif isinstance(expr, ir.ColumnExpr):
         # expression must be named for the projection
         expr = expr.name('tmp').to_projection()
         frame = self.compile(expr, params, **kwargs)
         table = _to_pyarrow_table(frame)
         return table['tmp'].to_pandas()
     elif isinstance(expr, ir.ScalarExpr):
         if expr.op().root_tables():
             # there are associated datafusion tables so convert the expr
             # to a selection which we can directly convert to a datafusion
             # plan
             expr = expr.name('tmp').to_projection()
             frame = self.compile(expr, params, **kwargs)
         else:
             # doesn't have any tables associated so create a plan from a
             # dummy datafusion table
             compiled = self.compile(expr, params, **kwargs)
             frame = self._context.empty_table().select(compiled)
         table = _to_pyarrow_table(frame)
         return table[0][0].as_py()
     else:
         raise com.IbisError(
             f"Cannot execute expression of type: {type(expr)}"
         )
Beispiel #2
0
def coerce_to_output(
        result: Any,
        expr: ir.Expr,
        index: Optional[pd.Index] = None) -> Union[pd.Series, pd.DataFrame]:
    """ Cast the result to either a Series or DataFrame.

    This method casts result of an execution to a Series or DataFrame,
    depending on the type of the expression and shape of the result.

    Parameters
    ----------
    result: Any
        The result to cast
    expr: ibis.expr.types.Expr
        The expression associated with the result
    index: pd.Index
        Optional. If passed, scalar results will be broadcasted according
        to the index.

    Returns
    -------
    result: A Series or DataFrame
    """
    result_name = getattr(expr, '_name', None)

    if isinstance(expr, (ir.DestructColumn, ir.StructColumn)):
        return ibis.util.coerce_to_dataframe(result, expr.type().names)
    elif isinstance(expr, (ir.DestructScalar, ir.StructScalar)):
        # Here there are two cases, if this is groupby aggregate,
        # then the result e a Series of tuple/list, or
        # if this is non grouped aggregate, then the result
        return ibis.util.coerce_to_dataframe(result, expr.type().names)
    elif isinstance(result, pd.Series):
        return result.rename(result_name)
    elif isinstance(result, np.ndarray):
        return pd.Series(result, name=result_name)
    elif isinstance(expr.op(), ops.Reduction):
        # We either wrap a scalar into a single element Series
        # or broadcast the scalar to a multi element Series
        if index is None:
            return pd.Series(result, name=result_name)
        else:
            return pd.Series(
                np.repeat(result, len(index)),
                index=index,
                name=result_name,
            )
    else:
        raise ValueError(f"Cannot coerce_to_output. Result: {result}")
Beispiel #3
0
def maybe_wrap_scalar(result: Any, expr: ir.Expr) -> Any:
    """
    A partial implementation of `coerce_to_output` in the pandas backend.

    Currently only wraps scalars, but will change when udfs are added to the
    dask backend.
    """
    result_name = expr.get_name()
    if isinstance(result, dd.core.Scalar) and isinstance(
            expr.op(), ops.Reduction):
        # TODO - computation
        return dd.from_pandas(pd.Series(result.compute(), name=result_name),
                              npartitions=1)
    else:
        return result.rename(result_name)
Beispiel #4
0
    def predicate(counts: Counter,
                  expr: ir.Expr) -> tuple[Sequence[ir.Table] | bool, None]:
        op = expr.op()

        if isinstance(op, ops.Join):
            return [op.left, op.right], None
        elif isinstance(op, ops.PhysicalTable):
            return lin.halt, None
        elif isinstance(op, ops.SelfReference):
            return lin.proceed, None
        elif isinstance(op, (ops.Selection, ops.Aggregation)):
            counts[op] += 1
            return [op.table], None
        elif isinstance(op, ops.TableNode):
            counts[op] += 1
            return lin.proceed, None
        elif isinstance(op, ops.TableColumn):
            return op.table.op() not in counts, None
        else:
            return lin.proceed, None
Beispiel #5
0
def to_op_dag(expr: ir.Expr) -> Graph:
    """Convert `expr` into a directed acyclic graph.

    Parameters
    ----------
    expr
        An ibis expression

    Returns
    -------
    Graph
        A directed acyclic graph of ibis operations
    """
    stack = [expr.op()]
    dag = {}

    while stack:
        if (node := stack.pop()) not in dag:
            dag[node] = children = node._flat_ops
            stack.extend(children)
Beispiel #6
0
def coerce_to_output(
        result: Any,
        expr: ir.Expr,
        index: Optional[pd.Index] = None) -> Union[dd.Series, dd.DataFrame]:
    """Cast the result to either a Series of DataFrame, renaming as needed.

    Reimplementation of `coerce_to_output` in the pandas backend, but
    creates dask objects and adds special handling for dd.Scalars.

    Parameters
    ----------
    result: Any
        The result to cast
    expr: ibis.expr.types.Expr
        The expression associated with the result
    index: pd.Index
        Optional. If passed, scalar results will be broadcasted according
        to the index.

    Returns
    -------
    result: A `dd.Series` or `dd.DataFrame`

    Raises
    ------
    ValueError
        If unable to coerce result

    Examples
    --------
    For dataframe outputs, see ``_coerce_to_dataframe``. Examples below use
    pandas objects for legibility, but functionality is the same on dask
    objects.

    >>> coerce_to_output(pd.Series(1), expr)
    0    1
    Name: result, dtype: int64
    >>> coerce_to_output(1, expr)
    0    1
    Name: result, dtype: int64
    >>> coerce_to_output(1, expr, [1,2,3])
    1    1
    2    1
    3    1
    Name: result, dtype: int64
    >>> coerce_to_output([1,2,3], expr)
    0    [1, 2, 3]
    Name: result, dtype: object
    """
    result_name = expr.get_name()
    dataframe_exprs = (
        ir.DestructColumn,
        ir.StructColumn,
        ir.DestructScalar,
        ir.StructScalar,
    )
    if isinstance(expr, dataframe_exprs):
        return _coerce_to_dataframe(result,
                                    expr.type().names,
                                    expr.type().types)
    elif isinstance(result, (pd.Series, dd.Series)):
        # Series from https://github.com/ibis-project/ibis/issues/2711
        return result.rename(result_name)
    elif isinstance(expr.op(), ops.Reduction):
        if isinstance(result, dd.core.Scalar):
            # wrap the scalar in a series
            out_dtype = _pandas_dtype_from_dd_scalar(result)
            out_len = 1 if index is None else len(index)
            meta = make_meta_series(dtype=out_dtype, name=result_name)
            # Specify `divisions` so that the created Dask object has
            # known divisions (to be concatenatable with Dask objects
            # created using `dd.from_pandas`)
            series = dd.from_delayed(
                _wrap_dd_scalar(result, result_name, out_len),
                meta=meta,
                divisions=(0, out_len - 1),
            )

            return series
        else:
            return dd.from_pandas(pd_util.coerce_to_output(
                result, expr, index),
                                  npartitions=1)
    else:
        raise ValueError(f"Cannot coerce_to_output. Result: {result}")
Beispiel #7
0
 def _is_row_order_preserving(expr: ir.Expr):
     if isinstance(expr.op(), (ops.Reduction, ops.WindowOp)):
         return (lin.halt, False)
     else:
         return (lin.proceed, True)
Beispiel #8
0
def coerce_to_output(
        result: Any,
        expr: ir.Expr,
        index: Optional[pd.Index] = None) -> Union[pd.Series, pd.DataFrame]:
    """Cast the result to either a Series or DataFrame.

    This method casts result of an execution to a Series or DataFrame,
    depending on the type of the expression and shape of the result.

    Parameters
    ----------
    result: Any
        The result to cast
    expr: ibis.expr.types.Expr
        The expression associated with the result
    index: pd.Index
        Optional. If passed, scalar results will be broadcasted according
        to the index.

    Returns
    -------
    result: A Series or DataFrame

    Examples
    --------
    For dataframe outputs, see ``ibis.util.coerce_to_dataframe``.

    >>> coerce_to_output(pd.Series(1), expr)
    0    1
    Name: result, dtype: int64
    >>> coerce_to_output(1, expr)
    0    1
    Name: result, dtype: int64
    >>> coerce_to_output(1, expr, [1,2,3])
    1    1
    2    1
    3    1
    Name: result, dtype: int64
    >>> coerce_to_output([1,2,3], expr)
    0    [1, 2, 3]
    Name: result, dtype: object
    """
    result_name = getattr(expr, '_name', None)

    if isinstance(expr, (ir.DestructColumn, ir.StructColumn)):
        return coerce_to_dataframe(result, expr.type())
    elif isinstance(expr, (ir.DestructScalar, ir.StructScalar)):
        # Here there are two cases, if this is groupby aggregate,
        # then the result e a Series of tuple/list, or
        # if this is non grouped aggregate, then the result
        return coerce_to_dataframe(result, expr.type())
    elif isinstance(result, pd.Series):
        return result.rename(result_name)
    elif isinstance(expr.op(), ops.Reduction):
        if index is None:
            # Wrap `result` into a single-element Series.
            return pd.Series([result], name=result_name)
        else:
            # Broadcast `result` to a multi-element Series according to the
            # given `index`.
            return pd.Series(
                np.repeat(result, len(index)),
                index=index,
                name=result_name,
            )
    elif isinstance(result, np.ndarray):
        return pd.Series(result, name=result_name)
    else:
        raise ValueError(f"Cannot coerce_to_output. Result: {result}")
Beispiel #9
0
def main_execute(expr: ir.Expr,
                 scope: Optional[Mapping] = None,
                 aggcontext: Optional[agg_ctx.AggregationContext] = None,
                 clients: Sequence[ibis.client.Client] = (),
                 params: Optional[Mapping] = None,
                 **kwargs: Any):
    """Execute an ibis expression against the pandas backend.

    Parameters
    ----------
    expr
    scope
    aggcontext
    clients
    params

    """
    toposorted, dependencies = toposort(expr)
    params = toolz.keymap(get_node, params if params is not None else {})

    # Add to scope the objects that have no dependencies and are not ibis
    # nodes. We have to filter out nodes for cases--such as zero argument
    # UDFs--that do not have any dependencies yet still need to be evaluated.
    full_scope = toolz.merge(
        scope if scope is not None else {},
        {
            key: key
            for key, parents in dependencies.items()
            if not parents and not isinstance(key, ops.Node)
        },
        params,
    )

    if not clients:
        clients = list(find_backends(expr))

    if aggcontext is None:
        aggcontext = agg_ctx.Summarize()

    # give backends a chance to inject scope if needed
    execute_first_scope = execute_first(expr.op(),
                                        *clients,
                                        scope=full_scope,
                                        aggcontext=aggcontext,
                                        **kwargs)
    full_scope = toolz.merge(full_scope, execute_first_scope)

    nodes = [node for node in toposorted if node not in full_scope]

    # compute the nodes that are not currently in scope
    for node in nodes:
        # allow clients to pre compute nodes as they like
        pre_executed_scope = pre_execute(node,
                                         *clients,
                                         scope=full_scope,
                                         aggcontext=aggcontext,
                                         **kwargs)
        # merge the existing scope with whatever was returned from pre_execute
        execute_scope = toolz.merge(full_scope, pre_executed_scope)

        # if after pre_execute our node is in scope, then there's nothing to do
        # in this iteration
        if node in execute_scope:
            full_scope = execute_scope
        else:
            # If we're evaluating a literal then we can be a bit quicker about
            # evaluating the dispatch graph
            if isinstance(node, ops.Literal):
                executor = execute_literal
            else:
                executor = execute_node

            # Gather the inputs we've already computed that the current node
            # depends on
            execute_args = [
                full_scope[get_node(arg)] for arg in dependencies[node]
            ]

            # execute the node with its inputs
            execute_node_result = executor(
                node,
                *execute_args,
                aggcontext=aggcontext,
                scope=execute_scope,
                clients=clients,
                **kwargs,
            )

            # last change to perform any additional computation on the result
            # before it gets added to scope for the next node
            full_scope[node] = post_execute(
                node,
                execute_node_result,
                clients=clients,
                aggcontext=aggcontext,
                scope=full_scope,
            )

    # the last node in the toposorted graph is the root and maps to the desired
    # result in scope
    last_node = toposorted[-1]
    result = full_scope[last_node]
    return result
Beispiel #10
0
def _fmt_value_expr(expr: ir.Expr, *, aliases: Aliases) -> str:
    """Format a value expression.

    Forwards the call on to the specific operation dispatch rule.
    """
    return fmt_value(expr.op(), aliases=aliases)
Beispiel #11
0
def needs_parens(expr: ir.Expr):
    op = expr.op()
    if isinstance(op, ops.Alias):
        op = op.arg.op()
    return isinstance(op, _NEEDS_PARENS_OPS)