def execute( self, expr: ir.Expr, timecontext: Mapping | None = None, params: Mapping[ir.Scalar, Any] | None = None, limit: str = 'default', **kwargs: Any, ) -> Any: """Execute an expression.""" if isinstance(expr, types.Table): return self.compile(expr, timecontext, params, **kwargs).toPandas() elif isinstance(expr, types.Column): # expression must be named for the projection if not expr.has_name(): expr = expr.name("tmp") return self.compile( expr.to_projection(), timecontext, params, **kwargs ).toPandas()[expr.get_name()] elif isinstance(expr, types.Scalar): compiled = self.compile(expr, timecontext, params, **kwargs) if isinstance(compiled, Column): # attach result column to a fake DataFrame and # select the result compiled = self._session.range(0, 1).select(compiled) return compiled.toPandas().iloc[0, 0] else: raise com.IbisError( f"Cannot execute expression of type: {type(expr)}" )
def execute( self, expr: ir.Expr, params: Mapping[ir.Expr, object] = None, limit: str = 'default', **kwargs: Any, ): if isinstance(expr, ir.TableExpr): frame = self.compile(expr, params, **kwargs) table = _to_pyarrow_table(frame) return table.to_pandas() elif isinstance(expr, ir.ColumnExpr): # expression must be named for the projection expr = expr.name('tmp').to_projection() frame = self.compile(expr, params, **kwargs) table = _to_pyarrow_table(frame) return table['tmp'].to_pandas() elif isinstance(expr, ir.ScalarExpr): if expr.op().root_tables(): # there are associated datafusion tables so convert the expr # to a selection which we can directly convert to a datafusion # plan expr = expr.name('tmp').to_projection() frame = self.compile(expr, params, **kwargs) else: # doesn't have any tables associated so create a plan from a # dummy datafusion table compiled = self.compile(expr, params, **kwargs) frame = self._context.empty_table().select(compiled) table = _to_pyarrow_table(frame) return table[0][0].as_py() else: raise com.IbisError( f"Cannot execute expression of type: {type(expr)}" )
def coerce_to_output( result: Any, expr: ir.Expr, index: Optional[pd.Index] = None) -> Union[pd.Series, pd.DataFrame]: """ Cast the result to either a Series or DataFrame. This method casts result of an execution to a Series or DataFrame, depending on the type of the expression and shape of the result. Parameters ---------- result: Any The result to cast expr: ibis.expr.types.Expr The expression associated with the result index: pd.Index Optional. If passed, scalar results will be broadcasted according to the index. Returns ------- result: A Series or DataFrame """ result_name = getattr(expr, '_name', None) if isinstance(expr, (ir.DestructColumn, ir.StructColumn)): return ibis.util.coerce_to_dataframe(result, expr.type().names) elif isinstance(expr, (ir.DestructScalar, ir.StructScalar)): # Here there are two cases, if this is groupby aggregate, # then the result e a Series of tuple/list, or # if this is non grouped aggregate, then the result return ibis.util.coerce_to_dataframe(result, expr.type().names) elif isinstance(result, pd.Series): return result.rename(result_name) elif isinstance(result, np.ndarray): return pd.Series(result, name=result_name) elif isinstance(expr.op(), ops.Reduction): # We either wrap a scalar into a single element Series # or broadcast the scalar to a multi element Series if index is None: return pd.Series(result, name=result_name) else: return pd.Series( np.repeat(result, len(index)), index=index, name=result_name, ) else: raise ValueError(f"Cannot coerce_to_output. Result: {result}")
def maybe_wrap_scalar(result: Any, expr: ir.Expr) -> Any: """ A partial implementation of `coerce_to_output` in the pandas backend. Currently only wraps scalars, but will change when udfs are added to the dask backend. """ result_name = expr.get_name() if isinstance(result, dd.core.Scalar) and isinstance( expr.op(), ops.Reduction): # TODO - computation return dd.from_pandas(pd.Series(result.compute(), name=result_name), npartitions=1) else: return result.rename(result_name)
def resolve(self, expr: ir.Expr, scope: Scope) -> List[ir.ColumnExpr]: schema = expr.schema() columns = schema.names name_locs = schema._name_locs start = name_locs[self.start] stop = name_locs.get(self.stop, len(expr.columns) - 1) + 1 # inclusive return [expr[columns[i]] for i in range(start, stop)]
def predicate(counts: Counter, expr: ir.Expr) -> tuple[Sequence[ir.Table] | bool, None]: op = expr.op() if isinstance(op, ops.Join): return [op.left, op.right], None elif isinstance(op, ops.PhysicalTable): return lin.halt, None elif isinstance(op, ops.SelfReference): return lin.proceed, None elif isinstance(op, (ops.Selection, ops.Aggregation)): counts[op] += 1 return [op.table], None elif isinstance(op, ops.TableNode): counts[op] += 1 return lin.proceed, None elif isinstance(op, ops.TableColumn): return op.table.op() not in counts, None else: return lin.proceed, None
def to_op_dag(expr: ir.Expr) -> Graph: """Convert `expr` into a directed acyclic graph. Parameters ---------- expr An ibis expression Returns ------- Graph A directed acyclic graph of ibis operations """ stack = [expr.op()] dag = {} while stack: if (node := stack.pop()) not in dag: dag[node] = children = node._flat_ops stack.extend(children)
def coerce_to_output( result: Any, expr: ir.Expr, index: Optional[pd.Index] = None) -> Union[dd.Series, dd.DataFrame]: """Cast the result to either a Series of DataFrame, renaming as needed. Reimplementation of `coerce_to_output` in the pandas backend, but creates dask objects and adds special handling for dd.Scalars. Parameters ---------- result: Any The result to cast expr: ibis.expr.types.Expr The expression associated with the result index: pd.Index Optional. If passed, scalar results will be broadcasted according to the index. Returns ------- result: A `dd.Series` or `dd.DataFrame` Raises ------ ValueError If unable to coerce result Examples -------- For dataframe outputs, see ``_coerce_to_dataframe``. Examples below use pandas objects for legibility, but functionality is the same on dask objects. >>> coerce_to_output(pd.Series(1), expr) 0 1 Name: result, dtype: int64 >>> coerce_to_output(1, expr) 0 1 Name: result, dtype: int64 >>> coerce_to_output(1, expr, [1,2,3]) 1 1 2 1 3 1 Name: result, dtype: int64 >>> coerce_to_output([1,2,3], expr) 0 [1, 2, 3] Name: result, dtype: object """ result_name = expr.get_name() dataframe_exprs = ( ir.DestructColumn, ir.StructColumn, ir.DestructScalar, ir.StructScalar, ) if isinstance(expr, dataframe_exprs): return _coerce_to_dataframe(result, expr.type().names, expr.type().types) elif isinstance(result, (pd.Series, dd.Series)): # Series from https://github.com/ibis-project/ibis/issues/2711 return result.rename(result_name) elif isinstance(expr.op(), ops.Reduction): if isinstance(result, dd.core.Scalar): # wrap the scalar in a series out_dtype = _pandas_dtype_from_dd_scalar(result) out_len = 1 if index is None else len(index) meta = make_meta_series(dtype=out_dtype, name=result_name) # Specify `divisions` so that the created Dask object has # known divisions (to be concatenatable with Dask objects # created using `dd.from_pandas`) series = dd.from_delayed( _wrap_dd_scalar(result, result_name, out_len), meta=meta, divisions=(0, out_len - 1), ) return series else: return dd.from_pandas(pd_util.coerce_to_output( result, expr, index), npartitions=1) else: raise ValueError(f"Cannot coerce_to_output. Result: {result}")
def _is_row_order_preserving(expr: ir.Expr): if isinstance(expr.op(), (ops.Reduction, ops.WindowOp)): return (lin.halt, False) else: return (lin.proceed, True)
def coerce_to_output( result: Any, expr: ir.Expr, index: Optional[pd.Index] = None) -> Union[pd.Series, pd.DataFrame]: """Cast the result to either a Series or DataFrame. This method casts result of an execution to a Series or DataFrame, depending on the type of the expression and shape of the result. Parameters ---------- result: Any The result to cast expr: ibis.expr.types.Expr The expression associated with the result index: pd.Index Optional. If passed, scalar results will be broadcasted according to the index. Returns ------- result: A Series or DataFrame Examples -------- For dataframe outputs, see ``ibis.util.coerce_to_dataframe``. >>> coerce_to_output(pd.Series(1), expr) 0 1 Name: result, dtype: int64 >>> coerce_to_output(1, expr) 0 1 Name: result, dtype: int64 >>> coerce_to_output(1, expr, [1,2,3]) 1 1 2 1 3 1 Name: result, dtype: int64 >>> coerce_to_output([1,2,3], expr) 0 [1, 2, 3] Name: result, dtype: object """ result_name = expr._safe_name if isinstance(expr, (ir.DestructColumn, ir.StructColumn)): return _coerce_to_dataframe(result, expr.type()) elif isinstance(expr, (ir.DestructScalar, ir.StructScalar)): # Here there are two cases, if this is groupby aggregate, # then the result e a Series of tuple/list, or # if this is non grouped aggregate, then the result return _coerce_to_dataframe(result, expr.type()) elif isinstance(result, pd.Series): return result.rename(result_name) elif isinstance(expr, ir.ScalarExpr): if index is None: # Wrap `result` into a single-element Series. return pd.Series([result], name=result_name) else: # Broadcast `result` to a multi-element Series according to the # given `index`. return pd.Series( np.repeat(result, len(index)), index=index, name=result_name, ) elif isinstance(result, np.ndarray): return pd.Series(result, name=result_name) else: raise ValueError(f"Cannot coerce_to_output. Result: {result}")
def main_execute(expr: ir.Expr, scope: Optional[Mapping] = None, aggcontext: Optional[agg_ctx.AggregationContext] = None, clients: Sequence[ibis.client.Client] = (), params: Optional[Mapping] = None, **kwargs: Any): """Execute an ibis expression against the pandas backend. Parameters ---------- expr scope aggcontext clients params """ toposorted, dependencies = toposort(expr) params = toolz.keymap(get_node, params if params is not None else {}) # Add to scope the objects that have no dependencies and are not ibis # nodes. We have to filter out nodes for cases--such as zero argument # UDFs--that do not have any dependencies yet still need to be evaluated. full_scope = toolz.merge( scope if scope is not None else {}, { key: key for key, parents in dependencies.items() if not parents and not isinstance(key, ops.Node) }, params, ) if not clients: clients = list(find_backends(expr)) if aggcontext is None: aggcontext = agg_ctx.Summarize() # give backends a chance to inject scope if needed execute_first_scope = execute_first(expr.op(), *clients, scope=full_scope, aggcontext=aggcontext, **kwargs) full_scope = toolz.merge(full_scope, execute_first_scope) nodes = [node for node in toposorted if node not in full_scope] # compute the nodes that are not currently in scope for node in nodes: # allow clients to pre compute nodes as they like pre_executed_scope = pre_execute(node, *clients, scope=full_scope, aggcontext=aggcontext, **kwargs) # merge the existing scope with whatever was returned from pre_execute execute_scope = toolz.merge(full_scope, pre_executed_scope) # if after pre_execute our node is in scope, then there's nothing to do # in this iteration if node in execute_scope: full_scope = execute_scope else: # If we're evaluating a literal then we can be a bit quicker about # evaluating the dispatch graph if isinstance(node, ops.Literal): executor = execute_literal else: executor = execute_node # Gather the inputs we've already computed that the current node # depends on execute_args = [ full_scope[get_node(arg)] for arg in dependencies[node] ] # execute the node with its inputs execute_node_result = executor( node, *execute_args, aggcontext=aggcontext, scope=execute_scope, clients=clients, **kwargs, ) # last change to perform any additional computation on the result # before it gets added to scope for the next node full_scope[node] = post_execute( node, execute_node_result, clients=clients, aggcontext=aggcontext, scope=full_scope, ) # the last node in the toposorted graph is the root and maps to the desired # result in scope last_node = toposorted[-1] result = full_scope[last_node] return result
def _fmt_value_expr(expr: ir.Expr, *, aliases: Aliases) -> str: """Format a value expression. Forwards the call on to the specific operation dispatch rule. """ return fmt_value(expr.op(), aliases=aliases)
def needs_parens(expr: ir.Expr): op = expr.op() if isinstance(op, ops.Alias): op = op.arg.op() return isinstance(op, _NEEDS_PARENS_OPS)