예제 #1
0
        def wrapper(func):
            funcsig = valid_function_signature(input_type, func)

            UDAFNode = type(
                func.__name__, (base_class, ), {
                    'signature': sig.TypeSignature.from_dtypes(input_type),
                    'output_type': output_type_method(output_type),
                })

            # An execution rule for a simple aggregate node
            @execute_node.register(UDAFNode,
                                   *udf_signature(input_type,
                                                  pin=None,
                                                  klass=pd.Series))
            def execute_udaf_node(op, *args, **kwargs):
                args, kwargs = arguments_from_signature(
                    funcsig, *args, **kwargs)
                return func(*args, **kwargs)

            # An execution rule for a grouped aggregation node. This
            # includes aggregates applied over a window.
            nargs = len(input_type)
            group_by_signatures = [
                udf_signature(input_type, pin=pin, klass=SeriesGroupBy)
                for pin in range(nargs)
            ]

            @toolz.compose(*(execute_node.register(UDAFNode, *types)
                             for types in group_by_signatures))
            def execute_udaf_node_groupby(op, *args, **kwargs):
                # construct a generator that yields the next group of data
                # for every argument excluding the first (pandas performs
                # the iteration for the first argument) for each argument
                # that is a SeriesGroupBy.
                #
                # If the argument is not a SeriesGroupBy then keep
                # repeating it until all groups are exhausted.
                aggcontext = kwargs.pop('aggcontext', None)
                assert aggcontext is not None, 'aggcontext is None'
                iters = ((data for _, data in arg) if isinstance(
                    arg, SeriesGroupBy) else itertools.repeat(arg)
                         for arg in args[1:])
                funcsig = signature(func)

                def aggregator(first, *rest, **kwargs):
                    # map(next, *rest) gets the inputs for the next group
                    # TODO: might be inefficient to do this on every call
                    args, kwargs = arguments_from_signature(
                        funcsig, first, *map(next, rest), **kwargs)
                    return func(*args, **kwargs)

                result = aggcontext.agg(args[0], aggregator, *iters, **kwargs)
                return result

            @functools.wraps(func)
            def wrapped(*args):
                return UDAFNode(*args).to_expr()

            return wrapped
예제 #2
0
파일: generic.py 프로젝트: shshe/ibis
@execute_node.register(ops.Where, pd.Series, pd.Series, pd.Series)
@execute_node.register(ops.Where, pd.Series, pd.Series, scalar_types)
def execute_node_where_series_series_series(op, cond, true, false, **kwargs):
    # No need to turn false into a series, pandas will broadcast it
    return true.where(cond, other=false)


# Series, scalar, Series
def execute_node_where_series_scalar_scalar(op, cond, true, false, **kwargs):
    return pd.Series(np.repeat(true, len(cond))).where(cond, other=false)


# Series, scalar, scalar
for scalar_type in scalar_types:
    execute_node_where_series_scalar_scalar = execute_node.register(
        ops.Where, pd.Series, scalar_type,
        scalar_type)(execute_node_where_series_scalar_scalar)


# scalar, Series, Series
@execute_node.register(ops.Where, boolean_types, pd.Series, pd.Series)
def execute_node_where_scalar_scalar_scalar(op, cond, true, false, **kwargs):
    # Note that it is not necessary to check that true and false are also
    # scalars. This allows users to do things like:
    # ibis.where(even_or_odd_bool, [2, 4, 6], [1, 3, 5])
    return true if cond else false


# scalar, scalar, scalar
for scalar_type in scalar_types:
    execute_node_where_scalar_scalar_scalar = execute_node.register(
예제 #3
0
파일: udf.py 프로젝트: wkusnierczyk/ibis
        def wrapper(func):
            # validate that the input_type argument and the function signature
            # match
            funcsig = valid_function_signature(input_type, func)

            # generate a new custom node
            UDFNode = type(
                func.__name__,
                (ops.ValueOp, ),
                {
                    'signature': sig.TypeSignature.from_dtypes(input_type),
                    'output_type': output_type.column_type,
                },
            )

            # definitions
            # Define an execution rule for a simple elementwise Series
            # function
            @execute_node.register(UDFNode,
                                   *udf_signature(input_type,
                                                  pin=None,
                                                  klass=pd.Series))
            @execute_node.register(
                UDFNode,
                *(rule_to_python_type(argtype) + nullable(argtype)
                  for argtype in input_type),
            )
            def execute_udf_node(op, *args, **kwargs):
                args, kwargs = arguments_from_signature(
                    funcsig, *args, **kwargs)
                return func(*args, **kwargs)

            # Define an execution rule for elementwise operations on a
            # grouped Series
            nargs = len(input_type)
            group_by_signatures = [
                udf_signature(input_type, pin=pin, klass=SeriesGroupBy)
                for pin in range(nargs)
            ]

            @toolz.compose(*(execute_node.register(UDFNode, *types)
                             for types in group_by_signatures))
            def execute_udf_node_groupby(op, *args, **kwargs):
                groupers = [
                    grouper for grouper in (getattr(arg, 'grouper', None)
                                            for arg in args)
                    if grouper is not None
                ]

                # all grouping keys must be identical
                assert all(groupers[0] == grouper for grouper in groupers[1:])

                # we're performing a scalar operation on grouped column, so
                # perform the operation directly on the underlying Series
                # and regroup after it's finished
                arguments = [getattr(arg, 'obj', arg) for arg in args]
                groupings = groupers[0].groupings
                args, kwargs = arguments_from_signature(
                    signature(func), *arguments, **kwargs)
                return func(*args, **kwargs).groupby(groupings)

            @functools.wraps(func)
            def wrapped(*args):
                return UDFNode(*args).to_expr()

            return wrapped
예제 #4
0
def pre_execute_elementwise_udf(op,
                                *clients,
                                scope=None,
                                aggcontet=None,
                                **kwargs):
    """Register execution rules for elementwise UDFs.
    """
    input_type = op.input_type

    # definitions

    # Define an execution rule for elementwise operations on a
    # grouped Series
    nargs = len(input_type)
    group_by_signatures = [
        udf_signature(input_type, pin=pin, klass=SeriesGroupBy)
        for pin in range(nargs)
    ]

    @toolz.compose(*(execute_node.register(ops.ElementWiseVectorizedUDF,
                                           *types)
                     for types in group_by_signatures))
    def execute_udf_node_groupby(op, *args, **kwargs):
        func = op.func

        groupers = [
            grouper for grouper in (getattr(arg, 'grouper', None)
                                    for arg in args) if grouper is not None
        ]

        # all grouping keys must be identical
        assert all(groupers[0] == grouper for grouper in groupers[1:])

        # we're performing a scalar operation on grouped column, so
        # perform the operation directly on the underlying Series
        # and regroup after it's finished
        arguments = [getattr(arg, 'obj', arg) for arg in args]
        groupings = groupers[0].groupings
        args, kwargs = arguments_from_signature(signature(func), *arguments,
                                                **kwargs)
        return func(*args, **kwargs).groupby(groupings)

    # Define an execution rule for a simple elementwise Series
    # function
    @execute_node.register(
        ops.ElementWiseVectorizedUDF,
        *udf_signature(input_type, pin=None, klass=pd.Series),
    )
    @execute_node.register(
        ops.ElementWiseVectorizedUDF,
        *(rule_to_python_type(argtype) + nullable(argtype)
          for argtype in input_type),
    )
    def execute_udf_node(op, *args, **kwargs):
        func = op.func
        funcsig = valid_function_signature(input_type, func)

        args, kwargs = arguments_from_signature(funcsig, *args, **kwargs)
        return func(*args, **kwargs)

    return scope
예제 #5
0
파일: generic.py 프로젝트: cloudera/ibis
@execute_node.register(ops.Where, pd.Series, pd.Series, pd.Series)
@execute_node.register(ops.Where, pd.Series, pd.Series, scalar_types)
def execute_node_where_series_series_series(op, cond, true, false, **kwargs):
    # No need to turn false into a series, pandas will broadcast it
    return true.where(cond, other=false)


# Series, scalar, Series
def execute_node_where_series_scalar_scalar(op, cond, true, false, **kwargs):
    return pd.Series(np.repeat(true, len(cond))).where(cond, other=false)


# Series, scalar, scalar
for scalar_type in scalar_types:
    execute_node_where_series_scalar_scalar = execute_node.register(
        ops.Where, pd.Series, scalar_type, scalar_type
    )(execute_node_where_series_scalar_scalar)


# scalar, Series, Series
@execute_node.register(ops.Where, boolean_types, pd.Series, pd.Series)
def execute_node_where_scalar_scalar_scalar(op, cond, true, false, **kwargs):
    # Note that it is not necessary to check that true and false are also
    # scalars. This allows users to do things like:
    # ibis.where(even_or_odd_bool, [2, 4, 6], [1, 3, 5])
    return true if cond else false


# scalar, scalar, scalar
for scalar_type in scalar_types:
    execute_node_where_scalar_scalar_scalar = execute_node.register(