def execute( self, expr: ir.Expr, timecontext: Mapping | None = None, params: Mapping[ir.Scalar, Any] | None = None, limit: str = 'default', **kwargs: Any, ) -> Any: """Execute an expression.""" if isinstance(expr, types.Table): return self.compile(expr, timecontext, params, **kwargs).toPandas() elif isinstance(expr, types.Column): # expression must be named for the projection if not expr.has_name(): expr = expr.name("tmp") return self.compile( expr.to_projection(), timecontext, params, **kwargs ).toPandas()[expr.get_name()] elif isinstance(expr, types.Scalar): compiled = self.compile(expr, timecontext, params, **kwargs) if isinstance(compiled, Column): # attach result column to a fake DataFrame and # select the result compiled = self._session.range(0, 1).select(compiled) return compiled.toPandas().iloc[0, 0] else: raise com.IbisError( f"Cannot execute expression of type: {type(expr)}" )
def maybe_wrap_scalar(result: Any, expr: ir.Expr) -> Any: """ A partial implementation of `coerce_to_output` in the pandas backend. Currently only wraps scalars, but will change when udfs are added to the dask backend. """ result_name = expr.get_name() if isinstance(result, dd.core.Scalar) and isinstance( expr.op(), ops.Reduction): # TODO - computation return dd.from_pandas(pd.Series(result.compute(), name=result_name), npartitions=1) else: return result.rename(result_name)
def coerce_to_output( result: Any, expr: ir.Expr, index: Optional[pd.Index] = None) -> Union[dd.Series, dd.DataFrame]: """Cast the result to either a Series of DataFrame, renaming as needed. Reimplementation of `coerce_to_output` in the pandas backend, but creates dask objects and adds special handling for dd.Scalars. Parameters ---------- result: Any The result to cast expr: ibis.expr.types.Expr The expression associated with the result index: pd.Index Optional. If passed, scalar results will be broadcasted according to the index. Returns ------- result: A `dd.Series` or `dd.DataFrame` Raises ------ ValueError If unable to coerce result Examples -------- For dataframe outputs, see ``_coerce_to_dataframe``. Examples below use pandas objects for legibility, but functionality is the same on dask objects. >>> coerce_to_output(pd.Series(1), expr) 0 1 Name: result, dtype: int64 >>> coerce_to_output(1, expr) 0 1 Name: result, dtype: int64 >>> coerce_to_output(1, expr, [1,2,3]) 1 1 2 1 3 1 Name: result, dtype: int64 >>> coerce_to_output([1,2,3], expr) 0 [1, 2, 3] Name: result, dtype: object """ result_name = expr.get_name() dataframe_exprs = ( ir.DestructColumn, ir.StructColumn, ir.DestructScalar, ir.StructScalar, ) if isinstance(expr, dataframe_exprs): return _coerce_to_dataframe(result, expr.type().names, expr.type().types) elif isinstance(result, (pd.Series, dd.Series)): # Series from https://github.com/ibis-project/ibis/issues/2711 return result.rename(result_name) elif isinstance(expr.op(), ops.Reduction): if isinstance(result, dd.core.Scalar): # wrap the scalar in a series out_dtype = _pandas_dtype_from_dd_scalar(result) out_len = 1 if index is None else len(index) meta = make_meta_series(dtype=out_dtype, name=result_name) # Specify `divisions` so that the created Dask object has # known divisions (to be concatenatable with Dask objects # created using `dd.from_pandas`) series = dd.from_delayed( _wrap_dd_scalar(result, result_name, out_len), meta=meta, divisions=(0, out_len - 1), ) return series else: return dd.from_pandas(pd_util.coerce_to_output( result, expr, index), npartitions=1) else: raise ValueError(f"Cannot coerce_to_output. Result: {result}")