Beispiel #1
0
def createHadoopWordcountDefinition():
    parameterOrdering = DefinitionModule.createParameterOrderingTable()
    row = parameterOrdering.addRow()
    row.setColumn('source', 'input file')
    row.setColumn('target', 'output file')

    # TODO:
    # need to be able to customize this for each host
    executable = HadoopModule.JarExecutable()
    executable.stageable(False)
    executable.path([HadoopModule.getExecutablePath()])
    executable.jarFile([getExamplesJar()])
    executable.jarClass(['wordcount'])
    
    definition = DefinitionModule.createShellProcessDefinition(
        inputParameters = {
            'input file':{
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True,
                ParameterModule.PORT_ATTRIBUTE_ISINPUTFILE:True,
            },
            'output file':{
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True,
                ParameterModule.PORT_ATTRIBUTE_ISSIDEEFFECT:True,
            }
        },
        parameterOrderings = parameterOrdering,
        executable = executable
    )

    return definition
Beispiel #2
0
def createWordCountDefinition(dir=None):
    
    parameterOrdering = DefinitionModule.createParameterOrderingTable()
    row = parameterOrdering.addRow()
    row.setColumn('source', 'input file')
    row.setColumn('target', 'output file')

    if dir is None:
        dir = os.getcwd().split(os.path.sep) + ['resources', 'testdata', 'TestExecute']
    command = dir + ['wordcount.py']
    executable = TaskCommandModule.Executable()
    executable.stageable(True)
    executable.path(command)
    executable.staticArgs([])
    
    definition = DefinitionModule.createShellProcessDefinition(
        inputParameters = {
            'input file':{
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True,
                ParameterModule.PORT_ATTRIBUTE_ISINPUTFILE:True,
            },
            'output file':{
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True,
                ParameterModule.PORT_ATTRIBUTE_ISSIDEEFFECT:True,
            }
        },
        parameterOrderings = parameterOrdering,
        executable = executable
    )
    definition.name('wordcount mapper')
    definition.id(ID_WORDCOUNT)
    definition.isLibraryDefinition(True)
    
    return definition
Beispiel #3
0
def createHadoopStreamingDefinition():
    parameterOrdering = DefinitionModule.createParameterOrderingTable()
    row = parameterOrdering.addRow()
    row.setColumn('source', 'input file')
    row.setColumn('target', 'output file')
    row = parameterOrdering.addRow()
    row.setColumn('source', 'output file')
    row.setColumn('target', 'mapper')
    row = parameterOrdering.addRow()
    row.setColumn('source', 'mapper')
    row.setColumn('target', 'reducer')

    
    # TODO:
    # need to be able to customize this for each host
    executable = HadoopModule.JarExecutable()
    executable.stageable(False)
    executable.path([HadoopModule.getExecutablePath()])
    executable.jarFile([getStreamingJar()])
    executable.jarClass([])
    
    
    definition = DefinitionModule.createShellProcessDefinition(
        inputParameters = {
            'input file':{
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True,
                ParameterModule.PORT_ATTRIBUTE_ISINPUTFILE:True,
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE_OPTIONS:{
                    ParameterModule.COMMANDLINE_PREFIX_FLAG:['-input']
                    },
            },
            'output file':{
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True,
                ParameterModule.PORT_ATTRIBUTE_ISSIDEEFFECT:True,
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE_OPTIONS:{
                    ParameterModule.COMMANDLINE_PREFIX_FLAG:['-output']
                    },
            },
            'mapper':{
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True,
                ParameterModule.PORT_ATTRIBUTE_ISINPUTFILE:True,
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE_OPTIONS:{
                    ParameterModule.COMMANDLINE_PREFIX_FLAG:['-mapper']
                    },
            },
            'reducer':{
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True,
                ParameterModule.PORT_ATTRIBUTE_ISINPUTFILE:True,
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE_OPTIONS:{
                    ParameterModule.COMMANDLINE_PREFIX_FLAG:['-reducer']
                    },
            },
        },
        parameterOrderings = parameterOrdering,
        executable = executable
    )

    return definition
Beispiel #4
0
def createEchoDefinition():
    command = ['/bin/echo']
    executable = TaskCommandModule.Executable()
    executable.stageable(False)
    executable.path(command)
    executable.staticArgs([])
    
    definition = DefinitionModule.createShellProcessDefinition(
        inputParameters = {
            'item to echo':{ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True}
            },
        parameterOrderings = None,
        executable = executable
    )
    definition.name('echo')
    return definition
Beispiel #5
0
def createHadoopPipesDefinition():

    parameterOrdering = DefinitionModule.createParameterOrderingTable()
    row = parameterOrdering.addRow()
    row.setColumn('source', 'input file')
    row.setColumn('target', 'output file')
    
    # TODO:
    # need to be able to customize this for each host
    command = ['pipesProgram']
    executable = HadoopModule.PipesExecutable()
    executable.stageable(False)
    executable.path([HadoopModule.getExecutablePath()])
    executable.pipesFile(command)
    
    
    definition = DefinitionModule.createShellProcessDefinition(
        inputParameters = {
            'input file':{
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True,
                ParameterModule.PORT_ATTRIBUTE_ISINPUTFILE:True,
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE_OPTIONS:{
                    ParameterModule.COMMANDLINE_PREFIX_FLAG:['-input']
                    },
            },
            'output file':{
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True,
                ParameterModule.PORT_ATTRIBUTE_ISSIDEEFFECT:True,
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE_OPTIONS:{
                    ParameterModule.COMMANDLINE_PREFIX_FLAG:['-output']
                    },
            },
        },
        parameterOrderings = parameterOrdering,
        executable = executable
    )

    return definition