def _python_function_to_java(function): """Create the serializable Java object for a Python function.""" wrapped_func = PythonFunctionWrapper(function) if running_mode == 'local': wrapped_func.setRunningMode(PythonFunctionWrapper.RunningMode.LOCAL) else: wrapped_func.setRunningMode(PythonFunctionWrapper.RunningMode.HADOOP) return wrapped_func
def _python_function_to_java(function): """Create the serializable Java object for a Python function.""" import inspect, py_compile from org.python.core import BytecodeLoader source = inspect.getsource(function) function_name = function.func_name print 'source:', source print 'name:', function_name exec(source) exec('print ' + function_name + '("hi there")') wrapped_func = PythonFunctionWrapper(function) if config['running.mode'] == 'local': wrapped_func.setRunningMode(PythonFunctionWrapper.RunningMode.LOCAL) else: wrapped_func.setRunningMode(PythonFunctionWrapper.RunningMode.HADOOP) return wrapped_func
def _wrap_function(function, casc_function_type): """Wraps a Python function into a Serializable and callable Java object. This wrapping is necessary as Cascading serializes the job pipeline before it sends the job to the workers. We need to in essence reconstruct the Python function from source on the receiving end when we deserialize the function, as Python is an interpreted language. Arguments: function -- either a Cascading Operation, a PyCascading-decorated Python function, or a native Python function casc_function_type -- the Cascading Operation that this Python function will be called by in its operate method """ if isinstance(function, cascading.operation.Operation): return function if isinstance(function, DecoratedFunction): # Build the arguments for the constructor args = [] decorators = function.decorators if 'numargs_expected' in decorators: args.append(decorators['numargs_expected']) if 'produces' in decorators and decorators['produces']: args.append(coerce_to_fields(decorators['produces'])) # Create the appropriate type (function or filter) fw = casc_function_type(*args) function = decorators['function'] fw.setConvertInputTuples(decorators['input_conversion']) if decorators['type'] in set(['map', 'reduce']): fw.setOutputMethod(decorators['output_method']) fw.setOutputType(decorators['output_type']) fw.setFlowProcessPassIn(decorators['flow_process_pass_in']) fw.setContextArgs(decorators['args']) fw.setContextKwArgs(decorators['kwargs']) else: # When function is a pure Python function, declared without decorators fw = casc_function_type() wrapped_func = PythonFunctionWrapper(function) if running_mode == 'local': wrapped_func.setRunningMode(PythonFunctionWrapper.RunningMode.LOCAL) else: wrapped_func.setRunningMode(PythonFunctionWrapper.RunningMode.HADOOP) fw.setFunction(wrapped_func) return fw