Beispiel #1
0
def build_project(project_name, global_props, project_props, jobs, files,
                  version):
    logger.info("Building workflow %s, version: %s.", project_name, version)

    project = Project(project_name, root=os.curdir, version=version)
    project.properties = global_props
    project.properties.update(project_props)

    for job_name, job_definition in jobs.items():
        project.add_job(job_name, Job(job_definition))

    for file, target in files:
        project.add_file(file, target)
    return project
Beispiel #2
0
#!/usr/bin/env python
# encoding: utf-8

"""
  Azkaban example projects configuration script.
   • Azkaban CLI syntax definition to configure all examples in this project
"""

from azkaban import Job, Project

PROJECT = Project('azkaban_examples', root=__file__)
# Project level properties declared here are visible to all jobs.
PROJECT.properties = {
  'project_1': 'project-val1'
}

JOBS = {
  # `basic_flow` example
  'basic_step_1.cmd':  Job({'type': 'command', 'command': 'echo "job: basic_step_1.cmd"'}),
  'basic_step_2.cmd':  Job({'type': 'command', 'command': 'echo "job: basic_step_2.cmd"', 'dependencies': 'basic_step_1.cmd'}),
  'basic_step_3.cmd':  Job({'type': 'command', 'command': 'echo "job: basic_step_3.cmd"', 'dependencies': 'basic_step_2.cmd'}),
  'basic_step_4.cmd':  Job({'type': 'command', 'command': 'echo "job: basic_step_4.cmd"', 'dependencies': 'basic_step_3.cmd'}),
  'basic_step_5.cmd':  Job({'type': 'command', 'command': 'echo "job: basic_step_5.cmd"', 'dependencies': 'basic_step_4.cmd'}),
  'basic_step_6.cmd':  Job({'type': 'command', 'command': 'echo "job: basic_step_6.cmd"', 'dependencies': 'basic_step_4.cmd'}),
  'basic_step_7.cmd':  Job({'type': 'command', 'command': 'echo "job: basic_step_7.cmd"', 'dependencies': 'basic_step_2.cmd'}),
  'basic_step_8.cmd':  Job({'type': 'command', 'command': 'echo "job: basic_step_8.cmd"', 'dependencies': 'basic_step_2.cmd'}),
  'basic_flow':        Job({'type': 'noop'   , 'dependencies': 'basic_step_5.cmd,basic_step_6.cmd,basic_step_7.cmd,basic_step_8.cmd'}),
  # `template_flow` example
  #   • Demonstrates using one flow as a "template" that is embedded in another flow and reused multiple times.
  #   • The only work performed by job in this example template is to echo out the variables it receives to the log.
  #     NOTE: We have to `chmod 777` our script to make sure Azkaban can run it.
Beispiel #3
0
production and test, without any job duplication.

"""

from azkaban import Job, Project
from getpass import getuser

# Production project
# ------------------
#
# This project is configured to run in a production environment (e.g. using a
# headless user with permissions to write to a specific directory).

PROJECT = Project('azkabancli_sample', root=__file__)
PROJECT.properties = {
    'user.to.proxy': 'production_user',
    'hdfs.root': '/jobs/sample/'
}

# dictionary of jobs, keyed by job name
JOBS = {
    'gather_data':
    Job({
        'type': 'hadoopJava',
        'job.class': 'sample.GatherData',
        'path.output': '${hdfs.root}data.avro',  # note the property use here
    }),

    # ...
}

for name, job in JOBS.items():
Beispiel #4
0
#!/usr/bin/env python
# encoding: utf-8
"""
  Azkaban example projects configuration script.
   • Azkaban CLI syntax definition to configure all examples in this project
"""

from azkaban import Job, Project

PROJECT = Project('azkaban_examples', root=__file__)
# Project level properties declared here are visible to all jobs.
PROJECT.properties = {'project_1': 'project-val1'}

JOBS = {
    # `basic_flow` example
    'basic_step_1.cmd':
    Job({
        'type': 'command',
        'command': 'echo "job: basic_step_1.cmd"'
    }),
    'basic_step_2.cmd':
    Job({
        'type': 'command',
        'command': 'echo "job: basic_step_2.cmd"',
        'dependencies': 'basic_step_1.cmd'
    }),
    'basic_step_3.cmd':
    Job({
        'type': 'command',
        'command': 'echo "job: basic_step_3.cmd"',
        'dependencies': 'basic_step_2.cmd'
Beispiel #5
0
"""

from azkaban import Job, Project
from getpass import getuser


# Production project
# ------------------
#
# This project is configured to run in a production environment (e.g. using a
# headless user with permissions to write to a specific directory).

PROJECT = Project('azkabancli_sample', root=__file__)
PROJECT.properties = {
  'user.to.proxy': 'production_user',
  'hdfs.root': '/jobs/sample/'
}

# dictionary of jobs, keyed by job name
JOBS = {

  'gather_data': Job({
    'type': 'hadoopJava',
    'job.class': 'sample.GatherData',
    'path.output': '${hdfs.root}data.avro', # note the property use here
  }),

  # ...

}