Esempio n. 1
0
def _map(output_selector, *args):
    """Maps the given input fields to output fields."""
    if len(args) == 1:
        (input_selector, function, output_field) = \
        (Fields.ALL, args[0], Fields.UNKNOWN)
    elif len(args) == 2:
        if inspect.isfunction(args[0]) or _any_instance(args[0], \
        (DecoratedFunction, cascading.operation.Function, cascading.operation.Filter)):
            # The first argument is a function, the second is the output fields
            (input_selector, function, output_field) = \
            (Fields.ALL, args[0], args[1])
        else:
            # The first argument is the input tuple argument selector,
            # the second one is the function
            (input_selector, function, output_field) = \
            (args[0], args[1], Fields.UNKNOWN)
    elif len(args) == 3:
        (input_selector, function, output_field) = args
    else:
        raise Exception('map_{add,replace} needs to be called with 1 to 3 parameters')
    if isinstance(function, DecoratedFunction):
        # By default we take everything from the UDF's decorators
        df = function
        if output_field != Fields.UNKNOWN:
            # But if we specified the output fields for the map, use that
            df = DecoratedFunction.decorate_function(function.decorators['function'])
            df.decorators = dict(function.decorators)
            df.decorators['produces'] = output_field
    elif inspect.isfunction(function):
        df = udf(produces=output_field)(function)
    else:
        df = function
    return Apply(input_selector, df, output_selector)
Esempio n. 2
0
def _map(output_selector, *args):
    """Maps the given input fields to output fields."""
    if len(args) == 1:
        (input_selector, function, output_field) = \
        (Fields.ALL, args[0], Fields.UNKNOWN)
    elif len(args) == 2:
        if inspect.isfunction(args[0]) or _any_instance(args[0], \
        (DecoratedFunction, cascading.operation.Function, cascading.operation.Filter)):
            # The first argument is a function, the second is the output fields
            (input_selector, function, output_field) = \
            (Fields.ALL, args[0], args[1])
        else:
            # The first argument is the input tuple argument selector,
            # the second one is the function
            (input_selector, function, output_field) = \
            (args[0], args[1], Fields.UNKNOWN)
    elif len(args) == 3:
        (input_selector, function, output_field) = args
    else:
        raise Exception(
            'map_{add,replace} needs to be called with 1 to 3 parameters')
    if isinstance(function, DecoratedFunction):
        # By default we take everything from the UDF's decorators
        df = function
        if output_field != Fields.UNKNOWN:
            # But if we specified the output fields for the map, use that
            df = DecoratedFunction.decorate_function(
                function.decorators['function'])
            df.decorators = dict(function.decorators)
            df.decorators['produces'] = output_field
    elif inspect.isfunction(function):
        df = udf(produces=output_field)(function)
    else:
        df = function
    return Apply(input_selector, df, output_selector)
Esempio n. 3
0
def group_by(*args, **kwargs):
    if len(args) == 0:
        grouping_fields = None
        parameters = ()
    elif len(args) == 1:
        grouping_fields = args[0]
        parameters = ()
    elif len(args) == 2:
        grouping_fields = args[0]
        parameters = (Fields.ALL, args[1], Fields.UNKNOWN)
    elif len(args) == 3:
        grouping_fields = args[0]
        if inspect.isfunction(args[1]) or isinstance(args[1], \
        (DecoratedFunction, cascading.operation.Aggregator, cascading.operation.Buffer)):
            # The first argument is an aggregator/buffer,
            # the second is the output fields
            parameters = (Fields.ALL, args[1], args[2])
        else:
            parameters = (args[1], args[2], Fields.UNKNOWN)
    elif len(args) == 4:
        grouping_fields = args[0]
        parameters = (args[1], args[2], args[3])
    else:
        raise Exception('group_by needs to be called with 1 to 4 parameters')

    if parameters:
        (input_selector, function, output_field) = parameters
        if isinstance(function, DecoratedFunction):
            # By default we take everything from the UDF's decorators
            df = function
            if output_field != Fields.UNKNOWN:
                # But if we specified the output fields for the map, use that
                df = DecoratedFunction.decorate_function(
                    function.decorators['function'])
                df.decorators = dict(function.decorators)
                df.decorators['produces'] = output_field
        elif inspect.isfunction(function):
            df = udf(produces=output_field)(function)
        else:
            df = function

        def pipe(parent):
            if grouping_fields:
                return parent | GroupBy(grouping_fields, **kwargs) | \
                    Every(df, argument_selector=input_selector)
            else:
                return parent | GroupBy(**kwargs) | \
                    Every(df, argument_selector=input_selector)

        return _DelayedInitialization(pipe)
    else:

        def pipe(parent):
            if grouping_fields:
                return parent | GroupBy(grouping_fields, **kwargs)
            else:
                return parent | GroupBy(**kwargs)

        return _DelayedInitialization(pipe)
Esempio n. 4
0
 def fun_decorator(function_or_callabledict):
     if isinstance(function_or_callabledict, DecoratedFunction):
         # Another decorator is next
         dff = function_or_callabledict
     else:
         # The original function comes next
         dff = DecoratedFunction.decorate_function(function_or_callabledict)
     # Add the attributes to the decorated function
     dff.decorators.update(additional_parameters)
     return dff
Esempio n. 5
0
 def fun_decorator(function_or_callabledict):
     if isinstance(function_or_callabledict, DecoratedFunction):
         # Another decorator is next
         dff = function_or_callabledict
     else:
         # The original function comes next
         dff = DecoratedFunction.decorate_function(function_or_callabledict)
     # Add the attributes to the decorated function
     dff.decorators.update(additional_parameters)
     return dff
Esempio n. 6
0
def group_by(*args, **kwargs):
    if len(args) == 0:
        grouping_fields = None
        parameters = ()
    elif len(args) == 1:
        grouping_fields = args[0]
        parameters = ()
    elif len(args) == 2:
        grouping_fields = args[0]
        parameters = (Fields.ALL, args[1], Fields.UNKNOWN)
    elif len(args) == 3:
        grouping_fields = args[0]
        if inspect.isfunction(args[1]) or isinstance(args[1], \
        (DecoratedFunction, cascading.operation.Aggregator, cascading.operation.Buffer)):
            # The first argument is an aggregator/buffer,
            # the second is the output fields
            parameters = (Fields.ALL, args[1], args[2])
        else:
            parameters = (args[1], args[2], Fields.UNKNOWN)
    elif len(args) == 4:
        grouping_fields = args[0]
        parameters = (args[1], args[2], args[3])
    else:
        raise Exception('group_by needs to be called with 1 to 4 parameters')

    if parameters:
        (input_selector, function, output_field) = parameters
        if isinstance(function, DecoratedFunction):
            # By default we take everything from the UDF's decorators
            df = function
            if output_field != Fields.UNKNOWN:
                # But if we specified the output fields for the map, use that
                df = DecoratedFunction.decorate_function(function.decorators['function'])
                df.decorators = dict(function.decorators)
                df.decorators['produces'] = output_field
        elif inspect.isfunction(function):
            df = udf(produces=output_field)(function)
        else:
            df = function
        def pipe(parent):
            if grouping_fields:
                return parent | GroupBy(grouping_fields, **kwargs) | \
                    Every(df, argument_selector=input_selector)
            else:
                return parent | GroupBy(**kwargs) | \
                    Every(df, argument_selector=input_selector)
        return _DelayedInitialization(pipe)
    else:
        def pipe(parent):
            if grouping_fields:
                return parent | GroupBy(grouping_fields, **kwargs)
            else:
                return parent | GroupBy(**kwargs)
        return _DelayedInitialization(pipe)
Esempio n. 7
0
 def fun_decorator(function_or_callabledict):
     if isinstance(function_or_callabledict, DecoratedFunction):
         # Another decorator is next
         dff = function_or_callabledict
     else:
         # The original function comes next
         dff = DecoratedFunction()
         dff.decorators['function'] = function_or_callabledict
         dff.decorators['type'] = 'none'
         dff.decorators['input_conversion'] = \
         CascadingBaseOperationWrapper.ConvertInputTuples.NONE
         dff.decorators['output_method'] = \
         CascadingRecordProducerWrapper.OutputMethod.YIELDS_OR_RETURNS
         dff.decorators['output_type'] = \
         CascadingRecordProducerWrapper.OutputType.AUTO
         dff.decorators['flow_process_pass_in'] = \
         CascadingRecordProducerWrapper.FlowProcessPassIn.NO
         dff.decorators['args'] = None
         dff.decorators['kwargs'] = None
     # Add the attributes to the decorated function
     dff.decorators.update(additional_parameters)
     return dff
Esempio n. 8
0
def filter_by(function):
    if isinstance(function, DecoratedFunction):
        # We make sure we will treat the function as a filter
        # Here we make a copy of the decorators so that we don't overwrite
        # the original parameters
        if function.decorators['type'] not in ('filter', 'auto'):
            raise Exception('Function is not a filter')
        df = DecoratedFunction.decorate_function(function.decorators['function'])
        df.decorators = dict(function.decorators)
        df.decorators['type'] = 'filter'
    else:
        df = udf(type='filter')(function)
    return Filter(df)
Esempio n. 9
0
def filter_by(function):
    if isinstance(function, DecoratedFunction):
        # We make sure we will treat the function as a filter
        # Here we make a copy of the decorators so that we don't overwrite
        # the original parameters
        if function.decorators['type'] not in ('filter', 'auto'):
            raise Exception('Function is not a filter')
        df = DecoratedFunction.decorate_function(
            function.decorators['function'])
        df.decorators = dict(function.decorators)
        df.decorators['type'] = 'filter'
    else:
        df = udf(type='filter')(function)
    return Filter(df)