Beispiel #1
0
def _python_function_to_java(function):
    """Create the serializable Java object for a Python function."""
    wrapped_func = PythonFunctionWrapper(function)
    if running_mode == 'local':
        wrapped_func.setRunningMode(PythonFunctionWrapper.RunningMode.LOCAL)
    else:
        wrapped_func.setRunningMode(PythonFunctionWrapper.RunningMode.HADOOP)
    return wrapped_func
Beispiel #2
0
def _python_function_to_java(function):
    """Create the serializable Java object for a Python function."""
    import inspect, py_compile
    from org.python.core import BytecodeLoader

    source = inspect.getsource(function)
    function_name = function.func_name
    print 'source:', source
    print 'name:', function_name
    exec(source)
    exec('print ' + function_name + '("hi there")')
    wrapped_func = PythonFunctionWrapper(function)
    if config['running.mode'] == 'local':
        wrapped_func.setRunningMode(PythonFunctionWrapper.RunningMode.LOCAL)
    else:
        wrapped_func.setRunningMode(PythonFunctionWrapper.RunningMode.HADOOP)
    return wrapped_func
Beispiel #3
0
def _wrap_function(function, casc_function_type):
    """Wraps a Python function into a Serializable and callable Java object.
    
    This wrapping is necessary as Cascading serializes the job pipeline before
    it sends the job to the workers. We need to in essence reconstruct the
    Python function from source on the receiving end when we deserialize the
    function, as Python is an interpreted language.
    
    Arguments:
    function -- either a Cascading Operation, a PyCascading-decorated Python
        function, or a native Python function
    casc_function_type -- the Cascading Operation that this Python function
        will be called by in its operate method
    """
    if isinstance(function, cascading.operation.Operation):
        return function
    if isinstance(function, DecoratedFunction):
        # Build the arguments for the constructor
        args = []
        decorators = function.decorators
        if 'numargs_expected' in decorators:
            args.append(decorators['numargs_expected'])
        if 'produces' in decorators and decorators['produces']:
            args.append(coerce_to_fields(decorators['produces']))
        # Create the appropriate type (function or filter)
        fw = casc_function_type(*args)
        function = decorators['function']
        fw.setConvertInputTuples(decorators['input_conversion'])
        if decorators['type'] in set(['map', 'reduce']):
            fw.setOutputMethod(decorators['output_method'])
            fw.setOutputType(decorators['output_type'])
            fw.setFlowProcessPassIn(decorators['flow_process_pass_in'])
        fw.setContextArgs(decorators['args'])
        fw.setContextKwArgs(decorators['kwargs'])
    else:
        # When function is a pure Python function, declared without decorators
        fw = casc_function_type()
    wrapped_func = PythonFunctionWrapper(function)
    if running_mode == 'local':
        wrapped_func.setRunningMode(PythonFunctionWrapper.RunningMode.LOCAL)
    else:
        wrapped_func.setRunningMode(PythonFunctionWrapper.RunningMode.HADOOP)
    fw.setFunction(wrapped_func)
    return fw
Beispiel #4
0
def _wrap_function(function, casc_function_type):
    """Wraps a Python function into a Serializable and callable Java object.
    
    This wrapping is necessary as Cascading serializes the job pipeline before
    it sends the job to the workers. We need to in essence reconstruct the
    Python function from source on the receiving end when we deserialize the
    function, as Python is an interpreted language.
    
    Arguments:
    function -- either a Cascading Operation, a PyCascading-decorated Python
        function, or a native Python function
    casc_function_type -- the Cascading Operation that this Python function
        will be called by in its operate method
    """
    if isinstance(function, cascading.operation.Operation):
        return function
    if isinstance(function, DecoratedFunction):
        # Build the arguments for the constructor
        args = []
        decorators = function.decorators
        if 'numargs_expected' in decorators:
            args.append(decorators['numargs_expected'])
        if 'produces' in decorators and decorators['produces']:
            args.append(coerce_to_fields(decorators['produces']))
        # Create the appropriate type (function or filter)
        fw = casc_function_type(*args)
        function = decorators['function']
        fw.setConvertInputTuples(decorators['input_conversion'])
        if decorators['type'] in set(['map', 'reduce']):
            fw.setOutputMethod(decorators['output_method'])
            fw.setOutputType(decorators['output_type'])
            fw.setFlowProcessPassIn(decorators['flow_process_pass_in'])
        fw.setContextArgs(decorators['args'])
        fw.setContextKwArgs(decorators['kwargs'])
    else:
        # When function is a pure Python function, declared without decorators
        fw = casc_function_type()
    wrapped_func = PythonFunctionWrapper(function)
    if running_mode == 'local':
        wrapped_func.setRunningMode(PythonFunctionWrapper.RunningMode.LOCAL)
    else:
        wrapped_func.setRunningMode(PythonFunctionWrapper.RunningMode.HADOOP)
    fw.setFunction(wrapped_func)
    return fw