def _map(output_selector, *args): """Maps the given input fields to output fields.""" if len(args) == 1: (input_selector, function, output_field) = \ (Fields.ALL, args[0], Fields.UNKNOWN) elif len(args) == 2: if inspect.isfunction(args[0]) or _any_instance(args[0], \ (DecoratedFunction, cascading.operation.Function, cascading.operation.Filter)): # The first argument is a function, the second is the output fields (input_selector, function, output_field) = \ (Fields.ALL, args[0], args[1]) else: # The first argument is the input tuple argument selector, # the second one is the function (input_selector, function, output_field) = \ (args[0], args[1], Fields.UNKNOWN) elif len(args) == 3: (input_selector, function, output_field) = args else: raise Exception('map_{add,replace} needs to be called with 1 to 3 parameters') if isinstance(function, DecoratedFunction): # By default we take everything from the UDF's decorators df = function if output_field != Fields.UNKNOWN: # But if we specified the output fields for the map, use that df = DecoratedFunction.decorate_function(function.decorators['function']) df.decorators = dict(function.decorators) df.decorators['produces'] = output_field elif inspect.isfunction(function): df = udf(produces=output_field)(function) else: df = function return Apply(input_selector, df, output_selector)
def _map(output_selector, *args): """Maps the given input fields to output fields.""" if len(args) == 1: (input_selector, function, output_field) = \ (Fields.ALL, args[0], Fields.UNKNOWN) elif len(args) == 2: if inspect.isfunction(args[0]) or _any_instance(args[0], \ (DecoratedFunction, cascading.operation.Function, cascading.operation.Filter)): # The first argument is a function, the second is the output fields (input_selector, function, output_field) = \ (Fields.ALL, args[0], args[1]) else: # The first argument is the input tuple argument selector, # the second one is the function (input_selector, function, output_field) = \ (args[0], args[1], Fields.UNKNOWN) elif len(args) == 3: (input_selector, function, output_field) = args else: raise Exception( 'map_{add,replace} needs to be called with 1 to 3 parameters') if isinstance(function, DecoratedFunction): # By default we take everything from the UDF's decorators df = function if output_field != Fields.UNKNOWN: # But if we specified the output fields for the map, use that df = DecoratedFunction.decorate_function( function.decorators['function']) df.decorators = dict(function.decorators) df.decorators['produces'] = output_field elif inspect.isfunction(function): df = udf(produces=output_field)(function) else: df = function return Apply(input_selector, df, output_selector)
def filter_by(function): if isinstance(function, DecoratedFunction): # We make sure we will treat the function as a filter # Here we make a copy of the decorators so that we don't overwrite # the original parameters if function.decorators['type'] not in ('filter', 'auto'): raise Exception('Function is not a filter') df = DecoratedFunction.decorate_function(function.decorators['function']) df.decorators = dict(function.decorators) df.decorators['type'] = 'filter' else: df = udf(type='filter')(function) return Filter(df)
def filter_by(function): if isinstance(function, DecoratedFunction): # We make sure we will treat the function as a filter # Here we make a copy of the decorators so that we don't overwrite # the original parameters if function.decorators['type'] not in ('filter', 'auto'): raise Exception('Function is not a filter') df = DecoratedFunction.decorate_function( function.decorators['function']) df.decorators = dict(function.decorators) df.decorators['type'] = 'filter' else: df = udf(type='filter')(function) return Filter(df)