def wrapper(func): funcsig = valid_function_signature(input_type, func) UDAFNode = type( func.__name__, (base_class, ), { 'signature': sig.TypeSignature.from_dtypes(input_type), 'output_type': output_type_method(output_type), }) # An execution rule for a simple aggregate node @execute_node.register(UDAFNode, *udf_signature(input_type, pin=None, klass=pd.Series)) def execute_udaf_node(op, *args, **kwargs): args, kwargs = arguments_from_signature( funcsig, *args, **kwargs) return func(*args, **kwargs) # An execution rule for a grouped aggregation node. This # includes aggregates applied over a window. nargs = len(input_type) group_by_signatures = [ udf_signature(input_type, pin=pin, klass=SeriesGroupBy) for pin in range(nargs) ] @toolz.compose(*(execute_node.register(UDAFNode, *types) for types in group_by_signatures)) def execute_udaf_node_groupby(op, *args, **kwargs): # construct a generator that yields the next group of data # for every argument excluding the first (pandas performs # the iteration for the first argument) for each argument # that is a SeriesGroupBy. # # If the argument is not a SeriesGroupBy then keep # repeating it until all groups are exhausted. aggcontext = kwargs.pop('aggcontext', None) assert aggcontext is not None, 'aggcontext is None' iters = ((data for _, data in arg) if isinstance( arg, SeriesGroupBy) else itertools.repeat(arg) for arg in args[1:]) funcsig = signature(func) def aggregator(first, *rest, **kwargs): # map(next, *rest) gets the inputs for the next group # TODO: might be inefficient to do this on every call args, kwargs = arguments_from_signature( funcsig, first, *map(next, rest), **kwargs) return func(*args, **kwargs) result = aggcontext.agg(args[0], aggregator, *iters, **kwargs) return result @functools.wraps(func) def wrapped(*args): return UDAFNode(*args).to_expr() return wrapped
@execute_node.register(ops.Where, pd.Series, pd.Series, pd.Series) @execute_node.register(ops.Where, pd.Series, pd.Series, scalar_types) def execute_node_where_series_series_series(op, cond, true, false, **kwargs): # No need to turn false into a series, pandas will broadcast it return true.where(cond, other=false) # Series, scalar, Series def execute_node_where_series_scalar_scalar(op, cond, true, false, **kwargs): return pd.Series(np.repeat(true, len(cond))).where(cond, other=false) # Series, scalar, scalar for scalar_type in scalar_types: execute_node_where_series_scalar_scalar = execute_node.register( ops.Where, pd.Series, scalar_type, scalar_type)(execute_node_where_series_scalar_scalar) # scalar, Series, Series @execute_node.register(ops.Where, boolean_types, pd.Series, pd.Series) def execute_node_where_scalar_scalar_scalar(op, cond, true, false, **kwargs): # Note that it is not necessary to check that true and false are also # scalars. This allows users to do things like: # ibis.where(even_or_odd_bool, [2, 4, 6], [1, 3, 5]) return true if cond else false # scalar, scalar, scalar for scalar_type in scalar_types: execute_node_where_scalar_scalar_scalar = execute_node.register(
def wrapper(func): # validate that the input_type argument and the function signature # match funcsig = valid_function_signature(input_type, func) # generate a new custom node UDFNode = type( func.__name__, (ops.ValueOp, ), { 'signature': sig.TypeSignature.from_dtypes(input_type), 'output_type': output_type.column_type, }, ) # definitions # Define an execution rule for a simple elementwise Series # function @execute_node.register(UDFNode, *udf_signature(input_type, pin=None, klass=pd.Series)) @execute_node.register( UDFNode, *(rule_to_python_type(argtype) + nullable(argtype) for argtype in input_type), ) def execute_udf_node(op, *args, **kwargs): args, kwargs = arguments_from_signature( funcsig, *args, **kwargs) return func(*args, **kwargs) # Define an execution rule for elementwise operations on a # grouped Series nargs = len(input_type) group_by_signatures = [ udf_signature(input_type, pin=pin, klass=SeriesGroupBy) for pin in range(nargs) ] @toolz.compose(*(execute_node.register(UDFNode, *types) for types in group_by_signatures)) def execute_udf_node_groupby(op, *args, **kwargs): groupers = [ grouper for grouper in (getattr(arg, 'grouper', None) for arg in args) if grouper is not None ] # all grouping keys must be identical assert all(groupers[0] == grouper for grouper in groupers[1:]) # we're performing a scalar operation on grouped column, so # perform the operation directly on the underlying Series # and regroup after it's finished arguments = [getattr(arg, 'obj', arg) for arg in args] groupings = groupers[0].groupings args, kwargs = arguments_from_signature( signature(func), *arguments, **kwargs) return func(*args, **kwargs).groupby(groupings) @functools.wraps(func) def wrapped(*args): return UDFNode(*args).to_expr() return wrapped
def pre_execute_elementwise_udf(op, *clients, scope=None, aggcontet=None, **kwargs): """Register execution rules for elementwise UDFs. """ input_type = op.input_type # definitions # Define an execution rule for elementwise operations on a # grouped Series nargs = len(input_type) group_by_signatures = [ udf_signature(input_type, pin=pin, klass=SeriesGroupBy) for pin in range(nargs) ] @toolz.compose(*(execute_node.register(ops.ElementWiseVectorizedUDF, *types) for types in group_by_signatures)) def execute_udf_node_groupby(op, *args, **kwargs): func = op.func groupers = [ grouper for grouper in (getattr(arg, 'grouper', None) for arg in args) if grouper is not None ] # all grouping keys must be identical assert all(groupers[0] == grouper for grouper in groupers[1:]) # we're performing a scalar operation on grouped column, so # perform the operation directly on the underlying Series # and regroup after it's finished arguments = [getattr(arg, 'obj', arg) for arg in args] groupings = groupers[0].groupings args, kwargs = arguments_from_signature(signature(func), *arguments, **kwargs) return func(*args, **kwargs).groupby(groupings) # Define an execution rule for a simple elementwise Series # function @execute_node.register( ops.ElementWiseVectorizedUDF, *udf_signature(input_type, pin=None, klass=pd.Series), ) @execute_node.register( ops.ElementWiseVectorizedUDF, *(rule_to_python_type(argtype) + nullable(argtype) for argtype in input_type), ) def execute_udf_node(op, *args, **kwargs): func = op.func funcsig = valid_function_signature(input_type, func) args, kwargs = arguments_from_signature(funcsig, *args, **kwargs) return func(*args, **kwargs) return scope
@execute_node.register(ops.Where, pd.Series, pd.Series, pd.Series) @execute_node.register(ops.Where, pd.Series, pd.Series, scalar_types) def execute_node_where_series_series_series(op, cond, true, false, **kwargs): # No need to turn false into a series, pandas will broadcast it return true.where(cond, other=false) # Series, scalar, Series def execute_node_where_series_scalar_scalar(op, cond, true, false, **kwargs): return pd.Series(np.repeat(true, len(cond))).where(cond, other=false) # Series, scalar, scalar for scalar_type in scalar_types: execute_node_where_series_scalar_scalar = execute_node.register( ops.Where, pd.Series, scalar_type, scalar_type )(execute_node_where_series_scalar_scalar) # scalar, Series, Series @execute_node.register(ops.Where, boolean_types, pd.Series, pd.Series) def execute_node_where_scalar_scalar_scalar(op, cond, true, false, **kwargs): # Note that it is not necessary to check that true and false are also # scalars. This allows users to do things like: # ibis.where(even_or_odd_bool, [2, 4, 6], [1, 3, 5]) return true if cond else false # scalar, scalar, scalar for scalar_type in scalar_types: execute_node_where_scalar_scalar_scalar = execute_node.register(