def _map(output_selector, *args): """Maps the given input fields to output fields.""" if len(args) == 1: (input_selector, function, output_field) = \ (Fields.ALL, args[0], Fields.UNKNOWN) elif len(args) == 2: if inspect.isfunction(args[0]) or _any_instance(args[0], \ (DecoratedFunction, cascading.operation.Function, cascading.operation.Filter)): # The first argument is a function, the second is the output fields (input_selector, function, output_field) = \ (Fields.ALL, args[0], args[1]) else: # The first argument is the input tuple argument selector, # the second one is the function (input_selector, function, output_field) = \ (args[0], args[1], Fields.UNKNOWN) elif len(args) == 3: (input_selector, function, output_field) = args else: raise Exception('map_{add,replace} needs to be called with 1 to 3 parameters') if isinstance(function, DecoratedFunction): # By default we take everything from the UDF's decorators df = function if output_field != Fields.UNKNOWN: # But if we specified the output fields for the map, use that df = DecoratedFunction.decorate_function(function.decorators['function']) df.decorators = dict(function.decorators) df.decorators['produces'] = output_field elif inspect.isfunction(function): df = udf(produces=output_field)(function) else: df = function return Apply(input_selector, df, output_selector)
def _map(output_selector, *args): """Maps the given input fields to output fields.""" if len(args) == 1: (input_selector, function, output_field) = \ (Fields.ALL, args[0], Fields.UNKNOWN) elif len(args) == 2: if inspect.isfunction(args[0]) or _any_instance(args[0], \ (DecoratedFunction, cascading.operation.Function, cascading.operation.Filter)): # The first argument is a function, the second is the output fields (input_selector, function, output_field) = \ (Fields.ALL, args[0], args[1]) else: # The first argument is the input tuple argument selector, # the second one is the function (input_selector, function, output_field) = \ (args[0], args[1], Fields.UNKNOWN) elif len(args) == 3: (input_selector, function, output_field) = args else: raise Exception( 'map_{add,replace} needs to be called with 1 to 3 parameters') if isinstance(function, DecoratedFunction): # By default we take everything from the UDF's decorators df = function if output_field != Fields.UNKNOWN: # But if we specified the output fields for the map, use that df = DecoratedFunction.decorate_function( function.decorators['function']) df.decorators = dict(function.decorators) df.decorators['produces'] = output_field elif inspect.isfunction(function): df = udf(produces=output_field)(function) else: df = function return Apply(input_selector, df, output_selector)
def group_by(*args, **kwargs): if len(args) == 0: grouping_fields = None parameters = () elif len(args) == 1: grouping_fields = args[0] parameters = () elif len(args) == 2: grouping_fields = args[0] parameters = (Fields.ALL, args[1], Fields.UNKNOWN) elif len(args) == 3: grouping_fields = args[0] if inspect.isfunction(args[1]) or isinstance(args[1], \ (DecoratedFunction, cascading.operation.Aggregator, cascading.operation.Buffer)): # The first argument is an aggregator/buffer, # the second is the output fields parameters = (Fields.ALL, args[1], args[2]) else: parameters = (args[1], args[2], Fields.UNKNOWN) elif len(args) == 4: grouping_fields = args[0] parameters = (args[1], args[2], args[3]) else: raise Exception('group_by needs to be called with 1 to 4 parameters') if parameters: (input_selector, function, output_field) = parameters if isinstance(function, DecoratedFunction): # By default we take everything from the UDF's decorators df = function if output_field != Fields.UNKNOWN: # But if we specified the output fields for the map, use that df = DecoratedFunction.decorate_function( function.decorators['function']) df.decorators = dict(function.decorators) df.decorators['produces'] = output_field elif inspect.isfunction(function): df = udf(produces=output_field)(function) else: df = function def pipe(parent): if grouping_fields: return parent | GroupBy(grouping_fields, **kwargs) | \ Every(df, argument_selector=input_selector) else: return parent | GroupBy(**kwargs) | \ Every(df, argument_selector=input_selector) return _DelayedInitialization(pipe) else: def pipe(parent): if grouping_fields: return parent | GroupBy(grouping_fields, **kwargs) else: return parent | GroupBy(**kwargs) return _DelayedInitialization(pipe)
def fun_decorator(function_or_callabledict): if isinstance(function_or_callabledict, DecoratedFunction): # Another decorator is next dff = function_or_callabledict else: # The original function comes next dff = DecoratedFunction.decorate_function(function_or_callabledict) # Add the attributes to the decorated function dff.decorators.update(additional_parameters) return dff
def fun_decorator(function_or_callabledict): if isinstance(function_or_callabledict, DecoratedFunction): # Another decorator is next dff = function_or_callabledict else: # The original function comes next dff = DecoratedFunction.decorate_function(function_or_callabledict) # Add the attributes to the decorated function dff.decorators.update(additional_parameters) return dff
def group_by(*args, **kwargs): if len(args) == 0: grouping_fields = None parameters = () elif len(args) == 1: grouping_fields = args[0] parameters = () elif len(args) == 2: grouping_fields = args[0] parameters = (Fields.ALL, args[1], Fields.UNKNOWN) elif len(args) == 3: grouping_fields = args[0] if inspect.isfunction(args[1]) or isinstance(args[1], \ (DecoratedFunction, cascading.operation.Aggregator, cascading.operation.Buffer)): # The first argument is an aggregator/buffer, # the second is the output fields parameters = (Fields.ALL, args[1], args[2]) else: parameters = (args[1], args[2], Fields.UNKNOWN) elif len(args) == 4: grouping_fields = args[0] parameters = (args[1], args[2], args[3]) else: raise Exception('group_by needs to be called with 1 to 4 parameters') if parameters: (input_selector, function, output_field) = parameters if isinstance(function, DecoratedFunction): # By default we take everything from the UDF's decorators df = function if output_field != Fields.UNKNOWN: # But if we specified the output fields for the map, use that df = DecoratedFunction.decorate_function(function.decorators['function']) df.decorators = dict(function.decorators) df.decorators['produces'] = output_field elif inspect.isfunction(function): df = udf(produces=output_field)(function) else: df = function def pipe(parent): if grouping_fields: return parent | GroupBy(grouping_fields, **kwargs) | \ Every(df, argument_selector=input_selector) else: return parent | GroupBy(**kwargs) | \ Every(df, argument_selector=input_selector) return _DelayedInitialization(pipe) else: def pipe(parent): if grouping_fields: return parent | GroupBy(grouping_fields, **kwargs) else: return parent | GroupBy(**kwargs) return _DelayedInitialization(pipe)
def fun_decorator(function_or_callabledict): if isinstance(function_or_callabledict, DecoratedFunction): # Another decorator is next dff = function_or_callabledict else: # The original function comes next dff = DecoratedFunction() dff.decorators['function'] = function_or_callabledict dff.decorators['type'] = 'none' dff.decorators['input_conversion'] = \ CascadingBaseOperationWrapper.ConvertInputTuples.NONE dff.decorators['output_method'] = \ CascadingRecordProducerWrapper.OutputMethod.YIELDS_OR_RETURNS dff.decorators['output_type'] = \ CascadingRecordProducerWrapper.OutputType.AUTO dff.decorators['flow_process_pass_in'] = \ CascadingRecordProducerWrapper.FlowProcessPassIn.NO dff.decorators['args'] = None dff.decorators['kwargs'] = None # Add the attributes to the decorated function dff.decorators.update(additional_parameters) return dff
def filter_by(function): if isinstance(function, DecoratedFunction): # We make sure we will treat the function as a filter # Here we make a copy of the decorators so that we don't overwrite # the original parameters if function.decorators['type'] not in ('filter', 'auto'): raise Exception('Function is not a filter') df = DecoratedFunction.decorate_function(function.decorators['function']) df.decorators = dict(function.decorators) df.decorators['type'] = 'filter' else: df = udf(type='filter')(function) return Filter(df)
def filter_by(function): if isinstance(function, DecoratedFunction): # We make sure we will treat the function as a filter # Here we make a copy of the decorators so that we don't overwrite # the original parameters if function.decorators['type'] not in ('filter', 'auto'): raise Exception('Function is not a filter') df = DecoratedFunction.decorate_function( function.decorators['function']) df.decorators = dict(function.decorators) df.decorators['type'] = 'filter' else: df = udf(type='filter')(function) return Filter(df)