Esempio n. 1
0
File: udf.py Progetto: bochuxt/ibis
    def wrapper(func):

        UDAFNode = type(
            func.__name__,
            (ops.Reduction,),
            dict(input_type=input_type, output_type=output_type.scalar_type)
        )

        with pause_ordering():

            # An execution rule for a simple aggregate node
            @execute_node.register(
                UDAFNode, *udf_signature(input_type, klass=pd.Series)
            )
            def execute_udaf_node(op, *args, **kwargs):
                args, kwargs = arguments_from_signature(
                    signature(func), *args, **kwargs
                )
                return func(*args, **kwargs)

            # An execution rule for a grouped aggregation node. This includes
            # aggregates applied over a window.
            @execute_node.register(
                UDAFNode, *udf_signature(input_type, klass=SeriesGroupBy)
            )
            def execute_udaf_node_groupby(op, *args, **kwargs):
                # construct a generator that yields the next group of data for
                # every argument excluding the first (pandas performs the
                # iteration for the first argument) for each argument that is a
                # SeriesGroupBy.
                #
                # If the argument is not a SeriesGroupBy then keep repeating it
                # until all groups are exhausted.
                context = kwargs.pop('context', None)
                assert context is not None, 'context is None'
                iters = (
                    (data for _, data in arg)
                    if isinstance(arg, SeriesGroupBy)
                    else itertools.repeat(arg) for arg in args[1:]
                )
                funcsig = signature(func)

                def aggregator(first, *rest, **kwargs):
                    # map(next, *rest) gets the inputs for the next group
                    # TODO: might be inefficient to do this on every call
                    args, kwargs = arguments_from_signature(
                        funcsig, first, *map(next, rest), **kwargs
                    )
                    return func(*args, **kwargs)

                result = context.agg(args[0], aggregator, *iters, **kwargs)
                return result

        @check_matching_signature(input_type)
        @functools.wraps(func)
        def wrapped(*args):
            return UDAFNode(*args).to_expr()

        return wrapped
Esempio n. 2
0
File: udf.py Progetto: xmnlab/ibis
    def wrapper(func):
        # generate a new custom node

        UDFNode = type(
            func.__name__,
            (ops.ValueOp,),
            {
                'signature': sig.TypeSignature.from_dtypes(input_type),
                'output_type': output_type.array_type
            }
        )

        # Don't reorder the multiple dispatch graph for each of these
        # definitions
        with pause_ordering():

            # Define a execution rule for a simple elementwise Series function
            @execute_node.register(
                UDFNode, *udf_signature(input_type, klass=pd.Series)
            )
            def execute_udf_node(op, *args, **kwargs):
                args, kwargs = arguments_from_signature(
                    signature(func), *args, **kwargs
                )
                return func(*args, **kwargs)

            # Define an execution rule for elementwise operations on a grouped
            # Series
            @execute_node.register(
                UDFNode, *udf_signature(input_type, klass=SeriesGroupBy)
            )
            def execute_udf_node_groupby(op, *args, **kwargs):
                groupers = [
                    grouper for grouper in (
                        getattr(arg, 'grouper', None) for arg in args
                    ) if grouper is not None
                ]

                # all grouping keys must be identical
                assert all(groupers[0] == grouper for grouper in groupers[1:])

                # we're performing a scalar operation on grouped column, so
                # perform the operation directly on the underlying Series and
                # regroup after it's finished
                arguments = [getattr(arg, 'obj', arg) for arg in args]
                groupings = groupers[0].groupings
                args, kwargs = arguments_from_signature(
                    signature(func), *arguments, **kwargs
                )
                return func(*args, **kwargs).groupby(groupings)

        @check_matching_signature(input_type)
        @functools.wraps(func)
        def wrapped(*args):
            return UDFNode(*args).to_expr()

        return wrapped
Esempio n. 3
0
import pytest

import numpy as np

import pandas as pd
import pandas.util.testing as tm

import ibis
import ibis.expr.types as ir
import ibis.expr.datatypes as dt

from ibis.pandas.udf import udf, udaf, nullable
from ibis.pandas.dispatch import pause_ordering

with pause_ordering():

    @udf(input_type=[dt.string], output_type=dt.int64)
    def my_string_length(series, **kwargs):
        return series.str.len() * 2

    @udaf(input_type=[dt.string], output_type=dt.int64)
    def my_string_length_sum(series, **kwargs):
        return (series.str.len() * 2).sum()

    @udaf(input_type=[dt.double, dt.double], output_type=dt.double)
    def my_corr(lhs, rhs, **kwargs):
        return lhs.corr(rhs)

    @udf([dt.double], dt.double)
    def add_one(x):
        return x + 1.0