Beispiel #1
0
 parameters and some customozed parameters). you could add your "helpers"
 by inherit the Delegatee class and implement some interfaces.
 '''
 process_id = __file__.lstrip('./')
 Job.LOGGER = Logger("%s/log/%s.log" % ('/tmp', process_id), 'w')
 '''
 here we register a DFS delegatee abd a Shell delegatee.
 DFS already prepare some common commands to use: ls, cat, mv and copyFromLocal
 check the code for more.
 '''
 Job.DELEGATEES['my_dfs'] = DFS()
 Job.DELEGATEES['my_shell'] = Shell()
 '''
 '''
 wrapper = JobBlock(
     'entry job', '''
     this job demonstrate how to use delegatees, say DFS or Pig
 ''')
 wrapper.add_plan(Job.INIT_JOB, Job.START, 'hadoop delegatee')
 wrapper.add_plan('hadoop delegatee', Job.DONE, 'wrong command')
 wrapper.add_plan('wrong command', Job.DONE, Job.LAST_JOB)
 '''
 prepare the jobs
 '''
 j = JobNode(id='hadoop delegatee',
             desc='''
     cat some file on the dfs (to run this tutorial, you have to prepare
     your own data on the dfs)
 ''')
 j.set_callback(delegated_job)
 wrapper.add_sub_job(j)
 # ==
Beispiel #2
0
if __name__ == '__main__':
    '''
    in this tutorial, we will introduce more features of configuration mechanism.
    '''
    process_id = __file__.lstrip('./')
    Job.LOGGER = Logger("%s/log/%s.log" % ('/tmp', process_id), 'w')
    '''
    we follow the previous example, remember Mr. Serious and Mr.Kidding?
    Imaging this scenario:
    our job say hello to Mr. Serious and Mr. Kidding, we want to keep
    result message, and pass them to Mr. Serious and Mr. Kidding separately.
    (Serious and Kidding are conceptualize into two jobs)
    '''
    wrapper = JobBlock(
        'entry job', '''
        this job demonstrate how to utilize variablized configuration
    ''')
    wrapper.add_plan(Job.INIT_JOB, Job.START, 'hello Serious')
    wrapper.add_plan('hello Serious', Job.DONE, 'hello Kidding')
    wrapper.add_plan('hello Kidding', Job.DONE, 'Serious')
    wrapper.add_plan('Serious', Job.DONE, 'Kidding')
    wrapper.add_plan('Kidding', Job.DONE, Job.LAST_JOB)
    '''
    same as previous tutorial
    but we declare the output, 'msg_to_[name]',which represent the message to be kept.
    the callback are also modified.
    '''
    # ==
    j_temp = JobNode(id='hello template', desc='say hello to someone')
    j_temp.need_input('msg', 'hello! Mr.[name]')
    j_temp.need_output('msg_to_[name]')
Beispiel #3
0
    """
    in this tutorial, we will introduce more features of configuration mechanism.
    """
    process_id = __file__.lstrip("./")
    Job.LOGGER = Logger("%s/log/%s.log" % ("/tmp", process_id), "w")

    """
    we follow the previous example, remember Mr. Serious and Mr.Kidding?
    Imaging this scenario:
    our job say hello to Mr. Serious and Mr. Kidding, we want to keep
    result message, and pass them to Mr. Serious and Mr. Kidding separately.
    (Serious and Kidding are conceptualize into two jobs)
    """
    wrapper = JobBlock(
        "entry job",
        """
        this job demonstrate how to utilize variablized configuration
    """,
    )
    wrapper.add_plan(Job.INIT_JOB, Job.START, "hello Serious")
    wrapper.add_plan("hello Serious", Job.DONE, "hello Kidding")
    wrapper.add_plan("hello Kidding", Job.DONE, "Serious")
    wrapper.add_plan("Serious", Job.DONE, "Kidding")
    wrapper.add_plan("Kidding", Job.DONE, Job.LAST_JOB)

    """
    same as previous tutorial
    but we declare the output, 'msg_to_[name]',which represent the message to be kept.
    the callback are also modified.
    """
    # ==
    j_temp = JobNode(id="hello template", desc="say hello to someone")
Beispiel #4
0
    '''
    process_id = __file__.lstrip('./')
    Job.LOGGER = Logger("%s/log/%s.log" % ('/tmp', process_id), 'w')

    '''
    here we register a DFS delegatee abd a Shell delegatee.
    DFS already prepare some common commands to use: ls, cat, mv and copyFromLocal
    check the code for more.
    '''
    Job.DELEGATEES['my_dfs'] = DFS()
    Job.DELEGATEES['my_shell'] = Shell()

    '''
    '''
    wrapper = JobBlock('entry job', '''
        this job demonstrate how to use delegatees, say DFS or Pig
    ''')
    wrapper.add_plan(Job.INIT_JOB, Job.START, 'hadoop delegatee')
    wrapper.add_plan('hadoop delegatee', Job.DONE, 'wrong command')
    wrapper.add_plan('wrong command', Job.DONE, Job.LAST_JOB)

    '''
    prepare the jobs
    '''
    j = JobNode(id='hadoop delegatee', desc='''
        cat some file on the dfs (to run this tutorial, you have to prepare
        your own data on the dfs)
    ''')
    j.set_callback(delegated_job)
    wrapper.add_sub_job(j)
    # ==
Beispiel #5
0
    return Job.DONE


if __name__ == '__main__':
    '''
    in this tutorial, we will introduce the configuration mechanism which helps
    you handle the communication of inputs and outputs between the jobs.
    '''
    process_id = __file__.lstrip('./')
    Job.LOGGER = Logger("%s/log/%s.log" % ('/tmp', process_id), 'w')
    '''
    we want to prepare two JobNodes, which say hello to the input name it gets.
    the most parts of the jobs are the same, except the input.
    '''
    wrapper = JobBlock(
        'entry job', '''
        this job demonstrate how to use configuration mechanism for input data
    ''')
    wrapper.add_plan(Job.INIT_JOB, Job.START, 'hello Serious')
    wrapper.add_plan('hello Serious', Job.DONE, 'hello Kidding')
    wrapper.add_plan('hello Kidding', Job.DONE, Job.LAST_JOB)
    '''
    first, we build a template/prototype job for the hello jobs and assign
    a key-value pair input. the input could be access in the callback
    by self.get_input(<key_of_the_input>). note that we bracket the name in the
    config value. it's a variablized config. we will explain it later.
    '''
    # ==
    j_temp = JobNode(id='template', desc='say hello to someone')
    j_temp.need_input('msg', 'hello! Mr.[name]')
    j_temp.set_callback(hello_job)
    '''
Beispiel #6
0
if __name__ == '__main__':

    '''
    in this tutorial, we will introduce ParallelJobBlock which allows you to
    execute multiple JobBlocks parallelly
    '''
    process_id = __file__.lstrip('./')
    Job.LOGGER = Logger("%s/log/%s.log" % ('/tmp', process_id), 'w')

    '''
    following the previous tutorial, we use the existed two JobNodes, but wrap
    them into on ParallelJobBlock
    '''
    wrapper = JobBlock('entry job', '''
        this job demonstrate how to execute job parallelly
    ''')
    wrapper.add_plan(Job.INIT_JOB, Job.START, 'para block1')
    wrapper.add_plan('para block1', Job.DONE, Job.LAST_JOB)

    '''
    first, as usual top-down design strategy, we define ParallelJobBlock, which
    is like wrapper with its own plan.
    however, to add parallel plans is a little different from JobBlock.
    we call add_papallel_plan and assign all the inner job IDs in the same time.
    no plan is allowed in the ParallelJobBlock!
    '''
    # ==
    j = ParallelJobBlock(id='para block1', desc='para block1')
    j.add_papallel_plan('job0','job1')
    wrapper.add_sub_job(j)
Beispiel #7
0
if __name__ == '__main__':

    '''
    in this tutorial, we will introduce the configuration mechanism which helps
    you handle the communication of inputs and outputs between the jobs.
    '''
    process_id = __file__.lstrip('./')
    Job.LOGGER = Logger("%s/log/%s.log" % ('/tmp', process_id), 'w')

    '''
    we want to prepare two JobNodes, which say hello to the input name it gets.
    the most parts of the jobs are the same, except the input.
    '''
    wrapper = JobBlock('entry job', '''
        this job demonstrate how to use configuration mechanism for input data
    ''')
    wrapper.add_plan(Job.INIT_JOB, Job.START, 'hello Serious')
    wrapper.add_plan('hello Serious', Job.DONE, 'hello Kidding')
    wrapper.add_plan('hello Kidding', Job.DONE, Job.LAST_JOB)

    '''
    first, we build a template/prototype job for the hello jobs and assign
    a key-value pair input. the input could be access in the callback
    by self.get_input(<key_of_the_input>). note that we bracket the name in the
    config value. it's a variablized config. we will explain it later.
    '''
    # ==
    j_temp = JobNode(id='template', desc='say hello to someone')
    j_temp.need_input('msg', 'hello! Mr.[name]')
    j_temp.set_callback(hello_job)
Beispiel #8
0
if __name__ == '__main__':
    '''
    in this tutorial, we want introduce a handy tools for development:
    dry_run mechanism
    '''
    process_id = __file__.lstrip('./')
    Job.LOGGER = Logger("%s/log/%s.log" % ('/tmp', process_id), 'w')
    '''
    imaging that you have a bunch of job in the flow. (all right, we only demo by
    four jobs) under development, you want to check the inputs/outputs
    you assigned are correct before running the process. (because some job may
    cause *permanent* affect, say rm -rf?) the "dry run" mode provides you a way
    to print out the command and evaluate the outputs without actualy executing them.
    '''
    wrapper = JobBlock(
        'entry job', '''
        this job demonstrate how to use dry run mechanism
    ''')
    wrapper.add_plan(Job.INIT_JOB, Job.START, 'foo')
    wrapper.add_plan('foo', Job.DONE, 'block')
    wrapper.add_plan('block', Job.DONE, 'fob')
    wrapper.add_plan('fob', Job.DONE, Job.LAST_JOB)
    '''
    now, we enable a secret switch to tell the whole process in the dry run mode
    '''
    wrapper.set_dry_run(True)
    '''
    prepare job sea
    '''
    j = JobNode(id='foo', desc=''' foo ''')
    j.set_callback(foo_job)
    wrapper.add_sub_job(j)
Beispiel #9
0
    process_id = __file__.lstrip("./")
    Job.LOGGER = Logger("%s/log/%s.log" % ("/tmp", process_id), "w")

    """
    then, we plan the process with two separated jobs, job0 and job1.
    each plan is comprising of a starting job, a state and an ending job.
    you may notice there are two special job in the very begin and the end.
    they are part of "syntax" of the plan; you should assign Job.INIT_JOB and
    Job.LAST_JOB for any of your process.
    in this step, we just give some job name (id) and compose their sequence
    without thinking any detail implementation of them. that helps you be focus on
    the planning.
    """
    wrapper = JobBlock(
        "entry job",
        """
        this job demonstrate how to add simple JobBlocks
    """,
    )
    wrapper.add_plan(Job.INIT_JOB, Job.START, "job0")
    wrapper.add_plan("job0", Job.DONE, "job1")
    wrapper.add_plan("job1", Job.DONE, Job.LAST_JOB)

    """
    now we start to plan the detail of each job.
    each job should have a id and a paragraph of desc(ription) which will be
    generated into document and you won't be bother to prepare any other document.
    this mechanism helps the code to be kept alive.
    the job we need here are some very simple job. let's say we wanna print
    something in each job, so we don't need to prepare any input. (we leave this
    to other tutorial codes.) so we assign a "callback" method, normal_job, to the
    job. now you could check the callbacks in the beginning of this code.
Beispiel #10
0
    wait, do you notice that we are repeatedly typing the variable name?
    we don't like bad smell, even it is slightly.
    we introduce a helper in the config module, called pickup_from_dict
    applying it with the python build-in method, map, could ease your work
    '''
    configs_for_jobs = config.pickup_from_dict(
        CFG,
        [
            'a very long path blah blah',  # you can even
            'another very long path blah blah',  # make some tidy
            'yet another very long path blah blah',  # comment here
        ])
    map(lambda key: Job.set_global(key, CFG[key]), configs_for_jobs.keys())

    wrapper = JobBlock(
        'entry job', '''
        this job demonstrate how to use config management module
    ''')
    wrapper.add_plan(Job.INIT_JOB, Job.START, 'foo')
    wrapper.add_plan('foo', Job.DONE, Job.LAST_JOB)
    '''
    we could get the configs we just set as global by giving the key without value
    or, we could put it into some other input

    here we also introduce another usage of output:
    in the tutorial_04, we set the key of output without value; that's a kind of
    declaration to exclaim 'we will put some value with that key as the output.
    (and the later jobs could access it as input)
    this time, we do give value to output key because we want the job output
    something to the path we expected.
    '''
    j = JobNode(id='foo', desc=''' foo ''')
Beispiel #11
0
    '''
    in this tutorial, we want introduce a handy tools for development:
    dry_run mechanism
    '''
    process_id = __file__.lstrip('./')
    Job.LOGGER = Logger("%s/log/%s.log" % ('/tmp', process_id), 'w')

    '''
    imaging that you have a bunch of job in the flow. (all right, we only demo by
    four jobs) under development, you want to check the inputs/outputs
    you assigned are correct before running the process. (because some job may
    cause *permanent* affect, say rm -rf?) the "dry run" mode provides you a way
    to print out the command and evaluate the outputs without actualy executing them.
    '''
    wrapper = JobBlock('entry job', '''
        this job demonstrate how to use dry run mechanism
    ''')
    wrapper.add_plan(Job.INIT_JOB, Job.START, 'foo')
    wrapper.add_plan('foo', Job.DONE, 'block')
    wrapper.add_plan('block', Job.DONE, 'fob')
    wrapper.add_plan('fob', Job.DONE, Job.LAST_JOB)

    '''
    now, we enable a secret switch to tell the whole process in the dry run mode
    '''
    wrapper.set_dry_run(True)

    '''
    prepare job sea
    '''
    j = JobNode(id='foo', desc=''' foo ''')
Beispiel #12
0
if __name__ == '__main__':

    '''
    in this tutorial, we will introduce JobBlock which allows you to compose
    your JobNodes into tree-style structure. With hierarchical structure, you
    could organize your job better (from writer and reader point of view)
    '''
    process_id = __file__.lstrip('./')
    Job.LOGGER = Logger("%s/log/%s.log" % ('/tmp', process_id), 'w')

    '''
    following the previous tutorial, we use the existed two JobNodes, but wrap
    them into on JobBlock
    '''
    wrapper = JobBlock('entry job', '''
        this job demonstrate how to organize your plan by JobBlocks
    ''')
    wrapper.add_plan(Job.INIT_JOB, Job.START, 'block1')
    wrapper.add_plan('block1', Job.DONE, Job.LAST_JOB)

    '''
    first, with the top-down design strategy, we define JobBlock, which is like
    wrapper with its own plan.
    '''
    # ==
    j = JobBlock(id='block1', desc='block1')
    j.add_plan(Job.INIT_JOB, Job.START, 'job0')
    j.add_plan('job0', Job.DONE, 'job1')
    j.add_plan('job1', Job.DONE, Job.LAST_JOB)
    wrapper.add_sub_job(j)
Beispiel #13
0
 we could use self.log as a build-in method for each job.
 '''
 process_id = __file__.lstrip('./')
 Job.LOGGER = Logger("%s/log/%s.log" % ('/tmp', process_id), 'w')
 '''
 then, we plan the process with two separated jobs, job0 and job1.
 each plan is comprising of a starting job, a state and an ending job.
 you may notice there are two special job in the very begin and the end.
 they are part of "syntax" of the plan; you should assign Job.INIT_JOB and
 Job.LAST_JOB for any of your process.
 in this step, we just give some job name (id) and compose their sequence
 without thinking any detail implementation of them. that helps you be focus on
 the planning.
 '''
 wrapper = JobBlock(
     'entry job', '''
     this job demonstrate how to add simple JobBlocks
 ''')
 wrapper.add_plan(Job.INIT_JOB, Job.START, 'job0')
 wrapper.add_plan('job0', Job.DONE, 'job1')
 wrapper.add_plan('job1', Job.DONE, Job.LAST_JOB)
 '''
 now we start to plan the detail of each job.
 each job should have a id and a paragraph of desc(ription) which will be
 generated into document and you won't be bother to prepare any other document.
 this mechanism helps the code to be kept alive.
 the job we need here are some very simple job. let's say we wanna print
 something in each job, so we don't need to prepare any input. (we leave this
 to other tutorial codes.) so we assign a "callback" method, normal_job, to the
 job. now you could check the callbacks in the beginning of this code.
 '''
 # ==
Beispiel #14
0
    return Job.DONE


if __name__ == '__main__':
    '''
    in this tutorial, we will introduce ParallelJobBlock which allows you to
    execute multiple JobBlocks parallelly
    '''
    process_id = __file__.lstrip('./')
    Job.LOGGER = Logger("%s/log/%s.log" % ('/tmp', process_id), 'w')
    '''
    following the previous tutorial, we use the existed two JobNodes, but wrap
    them into on ParallelJobBlock
    '''
    wrapper = JobBlock(
        'entry job', '''
        this job demonstrate how to execute job parallelly
    ''')
    wrapper.add_plan(Job.INIT_JOB, Job.START, 'para block1')
    wrapper.add_plan('para block1', Job.DONE, Job.LAST_JOB)
    '''
    first, as usual top-down design strategy, we define ParallelJobBlock, which
    is like wrapper with its own plan.
    however, to add parallel plans is a little different from JobBlock.
    we call add_papallel_plan and assign all the inner job IDs in the same time.
    no plan is allowed in the ParallelJobBlock!
    '''
    # ==
    j = ParallelJobBlock(id='para block1', desc='para block1')
    j.add_papallel_plan('job0', 'job1')
    wrapper.add_sub_job(j)
    '''