def is_rowid_zero_indexed(cls, data): try: from ibis.client import find_backends, validate_backends (backend,) = validate_backends(list(find_backends(data))) except Exception: backend = data._find_backend() return type(backend).__module__ in cls.zero_indexed_backend_modules
def execute_with_scope(expr, scope, aggcontext=None, clients=None, **kwargs): """Execute an expression `expr`, with data provided in `scope`. Parameters ---------- expr : ibis.expr.types.Expr The expression to execute. scope : collections.Mapping A dictionary mapping :class:`~ibis.expr.operations.Node` subclass instances to concrete data such as a pandas DataFrame. aggcontext : Optional[ibis.pandas.aggcontext.AggregationContext] Returns ------- result : scalar, pd.Series, pd.DataFrame """ op = expr.op() # Call pre_execute, to allow clients to intercept the expression before # computing anything *and* before associating leaf nodes with data. This # allows clients to provide their own data for each leaf. if clients is None: clients = list(find_backends(expr)) if aggcontext is None: aggcontext = agg_ctx.Summarize() pre_executed_scope = pre_execute(op, *clients, scope=scope, aggcontext=aggcontext, **kwargs) new_scope = toolz.merge(scope, pre_executed_scope) result = execute_until_in_scope( expr, new_scope, aggcontext=aggcontext, clients=clients, # XXX: we *explicitly* pass in scope and not new_scope here so that # post_execute sees the scope of execute_with_scope, not the scope of # execute_until_in_scope post_execute_=functools.partial( post_execute, scope=scope, aggcontext=aggcontext, clients=clients, **kwargs, ), **kwargs, ) return result
def execute_with_scope(expr, scope, aggcontext=None, clients=None, **kwargs): """Execute an expression `expr`, with data provided in `scope`. Parameters ---------- expr : ibis.expr.types.Expr The expression to execute. scope : collections.Mapping A dictionary mapping :class:`~ibis.expr.operations.Node` subclass instances to concrete data such as a pandas DataFrame. aggcontext : Optional[ibis.pandas.aggcontext.AggregationContext] Returns ------- result : scalar, pd.Series, pd.DataFrame """ op = expr.op() # Call pre_execute, to allow clients to intercept the expression before # computing anything *and* before associating leaf nodes with data. This # allows clients to provide their own data for each leaf. if clients is None: clients = list(find_backends(expr)) if aggcontext is None: aggcontext = agg_ctx.Summarize() pre_executed_scope = pre_execute( op, *clients, scope=scope, aggcontext=aggcontext, **kwargs ) new_scope = toolz.merge(scope, pre_executed_scope) result = execute_until_in_scope( expr, new_scope, aggcontext=aggcontext, clients=clients, # XXX: we *explicitly* pass in scope and not new_scope here so that # post_execute sees the scope of execute_with_scope, not the scope of # execute_until_in_scope post_execute_=functools.partial( post_execute, scope=scope, aggcontext=aggcontext, clients=clients, **kwargs, ), **kwargs, ) return result
def execute_with_scope( expr, scope: Scope, timecontext: Optional[TimeContext] = None, aggcontext=None, clients=None, **kwargs, ): """Execute an expression `expr`, with data provided in `scope`. Parameters ---------- expr : ibis.expr.types.Expr The expression to execute. scope : Scope A Scope class, with dictionary mapping :class:`~ibis.expr.operations.Node` subclass instances to concrete data such as a pandas DataFrame. timecontext : Optional[TimeContext] A tuple of (begin, end) that is passed from parent Node to children see [timecontext.py](ibis/backends/pandas/execution/timecontext.py) for detailed usage for this time context. aggcontext : Optional[ibis.backends.pandas.aggcontext.AggregationContext] Returns ------- result : scalar, pd.Series, pd.DataFrame """ op = expr.op() # Call pre_execute, to allow clients to intercept the expression before # computing anything *and* before associating leaf nodes with data. This # allows clients to provide their own data for each leaf. if clients is None: clients = list(find_backends(expr)) if aggcontext is None: aggcontext = agg_ctx.Summarize() pre_executed_scope = pre_execute( op, *clients, scope=scope, timecontext=timecontext, aggcontext=aggcontext, **kwargs, ) new_scope = scope.merge_scope(pre_executed_scope) result = execute_until_in_scope( expr, new_scope, timecontext=timecontext, aggcontext=aggcontext, clients=clients, # XXX: we *explicitly* pass in scope and not new_scope here so that # post_execute sees the scope of execute_with_scope, not the scope of # execute_until_in_scope post_execute_=functools.partial( post_execute, scope=scope, timecontext=timecontext, aggcontext=aggcontext, clients=clients, **kwargs, ), **kwargs, ).get_value(op, timecontext) return result
def execute_with_scope(expr, scope, context=None, **kwargs): """Execute an expression `expr`, with data provided in `scope`. Parameters ---------- expr : ir.Expr The expression to execute. scope : dict A dictionary mapping :class:`~ibis.expr.types.Node` subclass instances to concrete data such as a pandas DataFrame. Returns ------- result : scalar, pd.Series, pd.DataFrame """ op = expr.op() # Call pre_execute, to allow clients to intercept the expression before # computing anything *and* before associating leaf nodes with data. This # allows clients to provide their own scope. scope = toolz.merge( scope, *map( functools.partial(pre_execute, op, scope=scope, **kwargs), find_backends(expr) ) ) # base case: our op has been computed (or is a leaf data node), so # return the corresponding value if op in scope: return scope[op] if context is None: context = agg_ctx.Summarize() try: computed_args = [scope[t] for t in op.root_tables()] except KeyError: pass else: try: # special case: we have a definition of execute_first that matches # our current operation and data leaves return execute_first( op, *computed_args, scope=scope, context=context, **kwargs ) except NotImplementedError: pass args = op.args # recursively compute the op's arguments computed_args = [ execute(arg, scope, context=context, **kwargs) if hasattr(arg, 'op') else arg for arg in args if isinstance(arg, _VALID_INPUT_TYPES) ] # Compute our op, with its computed arguments return execute_node( op, *computed_args, scope=scope, context=context, **kwargs )
def is_rowid_zero_indexed(cls, data): from ibis.client import find_backends, validate_backends (backend, ) = validate_backends(list(find_backends(data))) return type(backend).__module__ in cls.zero_indexed_backend_modules
def main_execute(expr: ir.Expr, scope: Optional[Mapping] = None, aggcontext: Optional[agg_ctx.AggregationContext] = None, clients: Sequence[ibis.client.Client] = (), params: Optional[Mapping] = None, **kwargs: Any): """Execute an ibis expression against the pandas backend. Parameters ---------- expr scope aggcontext clients params """ toposorted, dependencies = toposort(expr) params = toolz.keymap(get_node, params if params is not None else {}) # Add to scope the objects that have no dependencies and are not ibis # nodes. We have to filter out nodes for cases--such as zero argument # UDFs--that do not have any dependencies yet still need to be evaluated. full_scope = toolz.merge( scope if scope is not None else {}, { key: key for key, parents in dependencies.items() if not parents and not isinstance(key, ops.Node) }, params, ) if not clients: clients = list(find_backends(expr)) if aggcontext is None: aggcontext = agg_ctx.Summarize() # give backends a chance to inject scope if needed execute_first_scope = execute_first(expr.op(), *clients, scope=full_scope, aggcontext=aggcontext, **kwargs) full_scope = toolz.merge(full_scope, execute_first_scope) nodes = [node for node in toposorted if node not in full_scope] # compute the nodes that are not currently in scope for node in nodes: # allow clients to pre compute nodes as they like pre_executed_scope = pre_execute(node, *clients, scope=full_scope, aggcontext=aggcontext, **kwargs) # merge the existing scope with whatever was returned from pre_execute execute_scope = toolz.merge(full_scope, pre_executed_scope) # if after pre_execute our node is in scope, then there's nothing to do # in this iteration if node in execute_scope: full_scope = execute_scope else: # If we're evaluating a literal then we can be a bit quicker about # evaluating the dispatch graph if isinstance(node, ops.Literal): executor = execute_literal else: executor = execute_node # Gather the inputs we've already computed that the current node # depends on execute_args = [ full_scope[get_node(arg)] for arg in dependencies[node] ] # execute the node with its inputs execute_node_result = executor( node, *execute_args, aggcontext=aggcontext, scope=execute_scope, clients=clients, **kwargs, ) # last change to perform any additional computation on the result # before it gets added to scope for the next node full_scope[node] = post_execute( node, execute_node_result, clients=clients, aggcontext=aggcontext, scope=full_scope, ) # the last node in the toposorted graph is the root and maps to the desired # result in scope last_node = toposorted[-1] result = full_scope[last_node] return result