def coerce_to_output( result: Any, expr: ir.Expr, index: Optional[pd.Index] = None) -> Union[pd.Series, pd.DataFrame]: """ Cast the result to either a Series or DataFrame. This method casts result of an execution to a Series or DataFrame, depending on the type of the expression and shape of the result. Parameters ---------- result: Any The result to cast expr: ibis.expr.types.Expr The expression associated with the result index: pd.Index Optional. If passed, scalar results will be broadcasted according to the index. Returns ------- result: A Series or DataFrame """ result_name = getattr(expr, '_name', None) if isinstance(expr, (ir.DestructColumn, ir.StructColumn)): return ibis.util.coerce_to_dataframe(result, expr.type().names) elif isinstance(expr, (ir.DestructScalar, ir.StructScalar)): # Here there are two cases, if this is groupby aggregate, # then the result e a Series of tuple/list, or # if this is non grouped aggregate, then the result return ibis.util.coerce_to_dataframe(result, expr.type().names) elif isinstance(result, pd.Series): return result.rename(result_name) elif isinstance(result, np.ndarray): return pd.Series(result, name=result_name) elif isinstance(expr.op(), ops.Reduction): # We either wrap a scalar into a single element Series # or broadcast the scalar to a multi element Series if index is None: return pd.Series(result, name=result_name) else: return pd.Series( np.repeat(result, len(index)), index=index, name=result_name, ) else: raise ValueError(f"Cannot coerce_to_output. Result: {result}")
def coerce_to_output( result: Any, expr: ir.Expr, index: Optional[pd.Index] = None) -> Union[dd.Series, dd.DataFrame]: """Cast the result to either a Series of DataFrame, renaming as needed. Reimplementation of `coerce_to_output` in the pandas backend, but creates dask objects and adds special handling for dd.Scalars. Parameters ---------- result: Any The result to cast expr: ibis.expr.types.Expr The expression associated with the result index: pd.Index Optional. If passed, scalar results will be broadcasted according to the index. Returns ------- result: A `dd.Series` or `dd.DataFrame` Raises ------ ValueError If unable to coerce result Examples -------- For dataframe outputs, see ``_coerce_to_dataframe``. Examples below use pandas objects for legibility, but functionality is the same on dask objects. >>> coerce_to_output(pd.Series(1), expr) 0 1 Name: result, dtype: int64 >>> coerce_to_output(1, expr) 0 1 Name: result, dtype: int64 >>> coerce_to_output(1, expr, [1,2,3]) 1 1 2 1 3 1 Name: result, dtype: int64 >>> coerce_to_output([1,2,3], expr) 0 [1, 2, 3] Name: result, dtype: object """ result_name = expr.get_name() dataframe_exprs = ( ir.DestructColumn, ir.StructColumn, ir.DestructScalar, ir.StructScalar, ) if isinstance(expr, dataframe_exprs): return _coerce_to_dataframe(result, expr.type().names, expr.type().types) elif isinstance(result, (pd.Series, dd.Series)): # Series from https://github.com/ibis-project/ibis/issues/2711 return result.rename(result_name) elif isinstance(expr.op(), ops.Reduction): if isinstance(result, dd.core.Scalar): # wrap the scalar in a series out_dtype = _pandas_dtype_from_dd_scalar(result) out_len = 1 if index is None else len(index) meta = make_meta_series(dtype=out_dtype, name=result_name) # Specify `divisions` so that the created Dask object has # known divisions (to be concatenatable with Dask objects # created using `dd.from_pandas`) series = dd.from_delayed( _wrap_dd_scalar(result, result_name, out_len), meta=meta, divisions=(0, out_len - 1), ) return series else: return dd.from_pandas(pd_util.coerce_to_output( result, expr, index), npartitions=1) else: raise ValueError(f"Cannot coerce_to_output. Result: {result}")
def coerce_to_output( result: Any, expr: ir.Expr, index: Optional[pd.Index] = None) -> Union[pd.Series, pd.DataFrame]: """Cast the result to either a Series or DataFrame. This method casts result of an execution to a Series or DataFrame, depending on the type of the expression and shape of the result. Parameters ---------- result: Any The result to cast expr: ibis.expr.types.Expr The expression associated with the result index: pd.Index Optional. If passed, scalar results will be broadcasted according to the index. Returns ------- result: A Series or DataFrame Examples -------- For dataframe outputs, see ``ibis.util.coerce_to_dataframe``. >>> coerce_to_output(pd.Series(1), expr) 0 1 Name: result, dtype: int64 >>> coerce_to_output(1, expr) 0 1 Name: result, dtype: int64 >>> coerce_to_output(1, expr, [1,2,3]) 1 1 2 1 3 1 Name: result, dtype: int64 >>> coerce_to_output([1,2,3], expr) 0 [1, 2, 3] Name: result, dtype: object """ result_name = expr._safe_name if isinstance(expr, (ir.DestructColumn, ir.StructColumn)): return _coerce_to_dataframe(result, expr.type()) elif isinstance(expr, (ir.DestructScalar, ir.StructScalar)): # Here there are two cases, if this is groupby aggregate, # then the result e a Series of tuple/list, or # if this is non grouped aggregate, then the result return _coerce_to_dataframe(result, expr.type()) elif isinstance(result, pd.Series): return result.rename(result_name) elif isinstance(expr, ir.ScalarExpr): if index is None: # Wrap `result` into a single-element Series. return pd.Series([result], name=result_name) else: # Broadcast `result` to a multi-element Series according to the # given `index`. return pd.Series( np.repeat(result, len(index)), index=index, name=result_name, ) elif isinstance(result, np.ndarray): return pd.Series(result, name=result_name) else: raise ValueError(f"Cannot coerce_to_output. Result: {result}")