def execute_with_scope(expr, scope, context=None, **kwargs): """Execute an expression `expr`, with data provided in `scope`. Parameters ---------- expr : ibis.expr.types.Expr The expression to execute. scope : collections.Mapping A dictionary mapping :class:`~ibis.expr.operations.Node` subclass instances to concrete data such as a pandas DataFrame. context : Optional[ibis.pandas.aggcontext.AggregationContext] Returns ------- result : scalar, pd.Series, pd.DataFrame """ op = expr.op() # Call pre_execute, to allow clients to intercept the expression before # computing anything *and* before associating leaf nodes with data. This # allows clients to provide their own data for each leaf. clients = list(find_backends(expr)) if context is None: context = agg_ctx.Summarize() pre_executed_scope = map( functools.partial( pre_execute, op, scope=scope, context=context, **kwargs), clients ) new_scope = toolz.merge(scope, *pre_executed_scope) result = execute_until_in_scope( expr, new_scope, context=context, clients=clients, # XXX: we *explicitly* pass in scope and not new_scope here so that # post_execute sees the scope of execute_with_scope, not the scope of # execute_until_in_scope post_execute_=functools.partial( post_execute, scope=scope, context=context, clients=clients, **kwargs ), **kwargs ) return result
def __init__(self, path=None, create=False): super(SQLiteClient, self).__init__(sa.create_engine('sqlite://')) self.name = path self.database_name = 'default' if path is not None: self.attach(self.database_name, path, create=create) for func in _SQLITE_UDF_REGISTRY: self.con.run_callable(functools.partial(_register_function, func)) for agg in _SQLITE_UDAF_REGISTRY: self.con.run_callable(functools.partial(_register_aggregate, agg))
def execute_count_distinct_series_groupby_mask( op, data, mask, aggcontext=None, **kwargs ): return aggcontext.agg( data, functools.partial(_filtered_reduction, mask.obj, pd.Series.nunique) )
def execute_until_in_scope( expr, scope, context=None, clients=None, post_execute_=None, **kwargs ): """Execute until our op is in `scope`. Parameters ---------- expr : ibis.expr.types.Expr scope : Mapping context : Optional[AggregationContext] clients : List[ibis.client.Client] kwargs : Mapping """ # these should never be None assert context is not None, 'context is None' assert clients is not None, 'clients is None' assert post_execute_ is not None, 'post_execute_ is None' # base case: our op has been computed (or is a leaf data node), so # return the corresponding value op = expr.op() if op in scope: return scope[op] new_scope = execute_bottom_up( expr, scope, context=context, post_execute_=post_execute_, **kwargs) pre_executor = functools.partial(pre_execute, op, scope=scope, **kwargs) new_scope = toolz.merge(new_scope, *map(pre_executor, clients)) return execute_until_in_scope( expr, new_scope, context=context, clients=clients, post_execute_=post_execute_, **kwargs )
def execute_reduction_series_gb_mask( op, data, mask, aggcontext=None, **kwargs ): method = operator.methodcaller(type(op).__name__.lower()) return aggcontext.agg( data, functools.partial(_filtered_reduction, mask.obj, method) )
def execute_series_natural_log(op, data, **kwargs): if data.dtype == np.dtype(np.object_): return data.apply(functools.partial(execute_node, op, **kwargs)) return np.log(data)
def vectorize_object(op, arg, *args, **kwargs): func = np.vectorize(functools.partial(execute_node, op, **kwargs)) return pd.Series(func(arg, *args), index=arg.index, name=arg.name)
def call_numpy_ufunc(func, op, data, **kwargs): if data.dtype == np.dtype(np.object_): return data.apply(functools.partial(execute_node, op, **kwargs)) return func(data)
def execute_bottom_up(expr, scope, aggcontext=None, post_execute_=None, clients=None, **kwargs): """Execute `expr` bottom-up. Parameters ---------- expr : ibis.expr.types.Expr scope : Mapping[ibis.expr.operations.Node, object] aggcontext : Optional[ibis.pandas.aggcontext.AggregationContext] kwargs : Dict[str, object] Returns ------- result : Mapping[ ibis.expr.operations.Node, Union[pandas.Series, pandas.DataFrame, scalar_types] ] A mapping from node to the computed result of that Node """ assert post_execute_ is not None, 'post_execute_ is None' op = expr.op() # if we're in scope then return the scope, this will then be passed back # into execute_bottom_up, which will then terminate if op in scope: return scope elif isinstance(op, ops.Literal): # special case literals to avoid the overhead of dispatching # execute_node return { op: execute_literal(op, op.value, expr.type(), aggcontext=aggcontext, **kwargs) } # figure out what arguments we're able to compute on based on the # expressions inputs. things like expressions, None, and scalar types are # computable whereas ``list``s are not args = op.inputs is_computable_argument = functools.partial(is_computable_arg, op) computable_args = list(filter(is_computable_argument, args)) # recursively compute each node's arguments until we've changed type scopes = [ execute_bottom_up(arg, scope, aggcontext=aggcontext, post_execute_=post_execute_, clients=clients, **kwargs) if hasattr(arg, 'op') else { arg: arg } for arg in computable_args ] # if we're unable to find data then raise an exception if not scopes: raise com.UnboundExpressionError( 'Unable to find data for expression:\n{}'.format(repr(expr))) # there should be exactly one dictionary per computable argument assert len(computable_args) == len(scopes) new_scope = toolz.merge(scopes) # pass our computed arguments to this node's execute_node implementation data = [ new_scope[arg.op()] if hasattr(arg, 'op') else arg for arg in computable_args ] result = execute_node(op, *data, scope=scope, aggcontext=aggcontext, clients=clients, **kwargs) computed = post_execute_(op, result) return {op: computed}
def array_type(self): return functools.partial(self.column, dtype=self)
def scalar_type(self): return functools.partial(self.scalar, dtype=self)
def execute_with_scope(expr, scope, context=None, **kwargs): """Execute an expression `expr`, with data provided in `scope`. Parameters ---------- expr : ir.Expr The expression to execute. scope : dict A dictionary mapping :class:`~ibis.expr.types.Node` subclass instances to concrete data such as a pandas DataFrame. Returns ------- result : scalar, pd.Series, pd.DataFrame """ op = expr.op() # Call pre_execute, to allow clients to intercept the expression before # computing anything *and* before associating leaf nodes with data. This # allows clients to provide their own scope. scope = toolz.merge( scope, *map( functools.partial(pre_execute, op, scope=scope, **kwargs), find_backends(expr) ) ) # base case: our op has been computed (or is a leaf data node), so # return the corresponding value if op in scope: return scope[op] if context is None: context = agg_ctx.Summarize() try: computed_args = [scope[t] for t in op.root_tables()] except KeyError: pass else: try: # special case: we have a definition of execute_first that matches # our current operation and data leaves return execute_first( op, *computed_args, scope=scope, context=context, **kwargs ) except NotImplementedError: pass args = op.args # recursively compute the op's arguments computed_args = [ execute(arg, scope, context=context, **kwargs) if hasattr(arg, 'op') else arg for arg in args if isinstance(arg, _VALID_INPUT_TYPES) ] # Compute our op, with its computed arguments return execute_node( op, *computed_args, scope=scope, context=context, **kwargs )
def output_type(self): return functools.partial(Foo, dtype=dt.int64)
def execute_series_unary_op(op, data, **kwargs): function = getattr(np, type(op).__name__.lower()) if data.dtype == np.dtype(np.object_): return data.apply(functools.partial(execute_node, op, **kwargs)) return function(data)
def execute_series_unary_op_negate(op, data, **kwargs): if data.dtype == np.dtype(np.object_): return data.apply(functools.partial(execute_node, op, **kwargs)) return np.negative(data)