Exemple #1
0
    def createNewAtomicPomset(self, name=None, 
                              executableObject=None,
                              commandBuilderType=None,
                              executeEnvironmentType=None,
                              *args, **kwds):

        newAtomicPomset = DefinitionModule.AtomicDefinition(*args, **kwds)
        if name is None:
            name = 'pomset %s' % uuid.uuid4().hex[:3]
        newAtomicPomset.name(name)

        newAtomicPomset.functionToExecute(
            DefinitionModule.executeTaskInEnvironment)

        newAtomicPomset.executable(executableObject)

        # create the parameter orderings
        parameterOrderings = DefinitionModule.createParameterOrderingTable()
        newAtomicPomset.parameterOrderingTable(parameterOrderings)

        if commandBuilderType is None:
            commandBuilderType = 'shell process'
        newAtomicPomset.commandBuilderType(commandBuilderType)

        if executeEnvironmentType is None:
            executeEnvironmentType = 'shell process'
        newAtomicPomset.executeEnvironmentType(executeEnvironmentType)

        newPomsetContext = ContextModule.wrapPomsetInContext(newAtomicPomset)
        
        return newPomsetContext
Exemple #2
0
def createHadoopWordcountDefinition():
    parameterOrdering = DefinitionModule.createParameterOrderingTable()
    row = parameterOrdering.addRow()
    row.setColumn('source', 'input file')
    row.setColumn('target', 'output file')

    # TODO:
    # need to be able to customize this for each host
    executable = HadoopModule.JarExecutable()
    executable.stageable(False)
    executable.path([HadoopModule.getExecutablePath()])
    executable.jarFile([getExamplesJar()])
    executable.jarClass(['wordcount'])
    
    definition = DefinitionModule.createShellProcessDefinition(
        inputParameters = {
            'input file':{
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True,
                ParameterModule.PORT_ATTRIBUTE_ISINPUTFILE:True,
            },
            'output file':{
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True,
                ParameterModule.PORT_ATTRIBUTE_ISSIDEEFFECT:True,
            }
        },
        parameterOrderings = parameterOrdering,
        executable = executable
    )

    return definition
Exemple #3
0
def createWordCountDefinition(dir=None):
    
    parameterOrdering = DefinitionModule.createParameterOrderingTable()
    row = parameterOrdering.addRow()
    row.setColumn('source', 'input file')
    row.setColumn('target', 'output file')

    if dir is None:
        dir = os.getcwd().split(os.path.sep) + ['resources', 'testdata', 'TestExecute']
    command = dir + ['wordcount.py']
    executable = TaskCommandModule.Executable()
    executable.stageable(True)
    executable.path(command)
    executable.staticArgs([])
    
    definition = DefinitionModule.createShellProcessDefinition(
        inputParameters = {
            'input file':{
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True,
                ParameterModule.PORT_ATTRIBUTE_ISINPUTFILE:True,
            },
            'output file':{
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True,
                ParameterModule.PORT_ATTRIBUTE_ISSIDEEFFECT:True,
            }
        },
        parameterOrderings = parameterOrdering,
        executable = executable
    )
    definition.name('wordcount mapper')
    definition.id(ID_WORDCOUNT)
    definition.isLibraryDefinition(True)
    
    return definition
Exemple #4
0
def createHadoopStreamingDefinition():
    parameterOrdering = DefinitionModule.createParameterOrderingTable()
    row = parameterOrdering.addRow()
    row.setColumn('source', 'input file')
    row.setColumn('target', 'output file')
    row = parameterOrdering.addRow()
    row.setColumn('source', 'output file')
    row.setColumn('target', 'mapper')
    row = parameterOrdering.addRow()
    row.setColumn('source', 'mapper')
    row.setColumn('target', 'reducer')

    
    # TODO:
    # need to be able to customize this for each host
    executable = HadoopModule.JarExecutable()
    executable.stageable(False)
    executable.path([HadoopModule.getExecutablePath()])
    executable.jarFile([getStreamingJar()])
    executable.jarClass([])
    
    
    definition = DefinitionModule.createShellProcessDefinition(
        inputParameters = {
            'input file':{
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True,
                ParameterModule.PORT_ATTRIBUTE_ISINPUTFILE:True,
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE_OPTIONS:{
                    ParameterModule.COMMANDLINE_PREFIX_FLAG:['-input']
                    },
            },
            'output file':{
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True,
                ParameterModule.PORT_ATTRIBUTE_ISSIDEEFFECT:True,
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE_OPTIONS:{
                    ParameterModule.COMMANDLINE_PREFIX_FLAG:['-output']
                    },
            },
            'mapper':{
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True,
                ParameterModule.PORT_ATTRIBUTE_ISINPUTFILE:True,
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE_OPTIONS:{
                    ParameterModule.COMMANDLINE_PREFIX_FLAG:['-mapper']
                    },
            },
            'reducer':{
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True,
                ParameterModule.PORT_ATTRIBUTE_ISINPUTFILE:True,
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE_OPTIONS:{
                    ParameterModule.COMMANDLINE_PREFIX_FLAG:['-reducer']
                    },
            },
        },
        parameterOrderings = parameterOrdering,
        executable = executable
    )

    return definition
Exemple #5
0
def createHadoopPipesDefinition():

    parameterOrdering = DefinitionModule.createParameterOrderingTable()
    row = parameterOrdering.addRow()
    row.setColumn('source', 'input file')
    row.setColumn('target', 'output file')
    
    # TODO:
    # need to be able to customize this for each host
    command = ['pipesProgram']
    executable = HadoopModule.PipesExecutable()
    executable.stageable(False)
    executable.path([HadoopModule.getExecutablePath()])
    executable.pipesFile(command)
    
    
    definition = DefinitionModule.createShellProcessDefinition(
        inputParameters = {
            'input file':{
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True,
                ParameterModule.PORT_ATTRIBUTE_ISINPUTFILE:True,
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE_OPTIONS:{
                    ParameterModule.COMMANDLINE_PREFIX_FLAG:['-input']
                    },
            },
            'output file':{
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE:True,
                ParameterModule.PORT_ATTRIBUTE_ISSIDEEFFECT:True,
                ParameterModule.PORT_ATTRIBUTE_COMMANDLINE_OPTIONS:{
                    ParameterModule.COMMANDLINE_PREFIX_FLAG:['-output']
                    },
            },
        },
        parameterOrderings = parameterOrdering,
        executable = executable
    )

    return definition