Ejemplo n.º 1
0
 def execute(
     self,
     expr: ir.Expr,
     timecontext: Mapping | None = None,
     params: Mapping[ir.Scalar, Any] | None = None,
     limit: str = 'default',
     **kwargs: Any,
 ) -> Any:
     """Execute an expression."""
     if isinstance(expr, types.Table):
         return self.compile(expr, timecontext, params, **kwargs).toPandas()
     elif isinstance(expr, types.Column):
         # expression must be named for the projection
         if not expr.has_name():
             expr = expr.name("tmp")
         return self.compile(
             expr.to_projection(), timecontext, params, **kwargs
         ).toPandas()[expr.get_name()]
     elif isinstance(expr, types.Scalar):
         compiled = self.compile(expr, timecontext, params, **kwargs)
         if isinstance(compiled, Column):
             # attach result column to a fake DataFrame and
             # select the result
             compiled = self._session.range(0, 1).select(compiled)
         return compiled.toPandas().iloc[0, 0]
     else:
         raise com.IbisError(
             f"Cannot execute expression of type: {type(expr)}"
         )
Ejemplo n.º 2
0
Archivo: util.py Proyecto: CGe0516/ibis
def maybe_wrap_scalar(result: Any, expr: ir.Expr) -> Any:
    """
    A partial implementation of `coerce_to_output` in the pandas backend.

    Currently only wraps scalars, but will change when udfs are added to the
    dask backend.
    """
    result_name = expr.get_name()
    if isinstance(result, dd.core.Scalar) and isinstance(
            expr.op(), ops.Reduction):
        # TODO - computation
        return dd.from_pandas(pd.Series(result.compute(), name=result_name),
                              npartitions=1)
    else:
        return result.rename(result_name)
Ejemplo n.º 3
0
Archivo: util.py Proyecto: jelitox/ibis
def coerce_to_output(
        result: Any,
        expr: ir.Expr,
        index: Optional[pd.Index] = None) -> Union[dd.Series, dd.DataFrame]:
    """Cast the result to either a Series of DataFrame, renaming as needed.

    Reimplementation of `coerce_to_output` in the pandas backend, but
    creates dask objects and adds special handling for dd.Scalars.

    Parameters
    ----------
    result: Any
        The result to cast
    expr: ibis.expr.types.Expr
        The expression associated with the result
    index: pd.Index
        Optional. If passed, scalar results will be broadcasted according
        to the index.

    Returns
    -------
    result: A `dd.Series` or `dd.DataFrame`

    Raises
    ------
    ValueError
        If unable to coerce result

    Examples
    --------
    For dataframe outputs, see ``_coerce_to_dataframe``. Examples below use
    pandas objects for legibility, but functionality is the same on dask
    objects.

    >>> coerce_to_output(pd.Series(1), expr)
    0    1
    Name: result, dtype: int64
    >>> coerce_to_output(1, expr)
    0    1
    Name: result, dtype: int64
    >>> coerce_to_output(1, expr, [1,2,3])
    1    1
    2    1
    3    1
    Name: result, dtype: int64
    >>> coerce_to_output([1,2,3], expr)
    0    [1, 2, 3]
    Name: result, dtype: object
    """
    result_name = expr.get_name()
    dataframe_exprs = (
        ir.DestructColumn,
        ir.StructColumn,
        ir.DestructScalar,
        ir.StructScalar,
    )
    if isinstance(expr, dataframe_exprs):
        return _coerce_to_dataframe(result,
                                    expr.type().names,
                                    expr.type().types)
    elif isinstance(result, (pd.Series, dd.Series)):
        # Series from https://github.com/ibis-project/ibis/issues/2711
        return result.rename(result_name)
    elif isinstance(expr.op(), ops.Reduction):
        if isinstance(result, dd.core.Scalar):
            # wrap the scalar in a series
            out_dtype = _pandas_dtype_from_dd_scalar(result)
            out_len = 1 if index is None else len(index)
            meta = make_meta_series(dtype=out_dtype, name=result_name)
            # Specify `divisions` so that the created Dask object has
            # known divisions (to be concatenatable with Dask objects
            # created using `dd.from_pandas`)
            series = dd.from_delayed(
                _wrap_dd_scalar(result, result_name, out_len),
                meta=meta,
                divisions=(0, out_len - 1),
            )

            return series
        else:
            return dd.from_pandas(pd_util.coerce_to_output(
                result, expr, index),
                                  npartitions=1)
    else:
        raise ValueError(f"Cannot coerce_to_output. Result: {result}")