Python OozieClient 예제들, savanna.service.edp.oozie.OozieClient Python 예제들

예제 #1

0

파일 보기

파일: job_manager.py 프로젝트: joelmathew/savanna

def cancel_job(job_execution_id):
    ctx = context.ctx()
    job_execution = conductor.job_execution_get(ctx, job_execution_id)
    cluster = conductor.cluster_get(ctx, job_execution.cluster_id)

    client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie/")
    client.kill_job(job_execution.oozie_job_id)

    job_info = client.get_job_status(job_execution.oozie_job_id)
    update = {"info": job_info, "end_time": datetime.datetime.now()}
    job_execution = conductor.job_execution_update(ctx, job_execution, update)

    return job_execution

예제 #2

0

파일 보기

파일: job_manager.py 프로젝트: joelmathew/savanna

def run_job(job_execution):
    ctx = context.ctx()

    cluster = conductor.cluster_get(ctx, job_execution.cluster_id)
    if cluster.status != 'Active':
        return job_execution

    job = conductor.job_get(ctx, job_execution.job_id)
    input_source = conductor.data_source_get(ctx, job_execution.input_id)
    output_source = conductor.data_source_get(ctx, job_execution.output_id)
    #TODO(nprivalova): should be removed after all features implemented
    validate(input_source, output_source, job)

    plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)
    hdfs_user = plugin.get_hdfs_user()
    wf_dir = create_workflow_dir(u.get_jobtracker(cluster), job, hdfs_user)
    upload_job_files(u.get_jobtracker(cluster), wf_dir, job, hdfs_user)

    creator = workflow_factory.get_creator(job)

    # Do other job type specific setup here, for example
    # uploading hive configuration
    creator.configure_workflow_if_needed(cluster, wf_dir)

    wf_xml = creator.get_workflow_xml(job_execution.job_configs, input_source,
                                      output_source)

    path_to_workflow = upload_workflow_file(u.get_jobtracker(cluster), wf_dir,
                                            wf_xml, hdfs_user)

    jt_path = '%s:8021' % u.get_jobtracker(cluster).hostname
    nn_path = 'hdfs://%s:8020' % u.get_namenode(cluster).hostname

    client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie/")
    job_parameters = {
        "jobTracker": jt_path,
        "nameNode": nn_path,
        "user.name": "hadoop",
        "oozie.wf.application.path": "%s%s" % (nn_path, path_to_workflow),
        "oozie.use.system.libpath": "true"
    }

    oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters))
    client.run_job(oozie_job_id)
    job_execution = conductor.job_execution_update(
        ctx, job_execution, {
            'oozie_job_id': oozie_job_id,
            'start_time': datetime.datetime.now()
        })

    return job_execution

예제 #3

0

파일 보기

파일: job_manager.py 프로젝트: joelmathew/savanna

def get_job_status(job_execution_id):
    ctx = context.ctx()
    job_execution = conductor.job_execution_get(ctx, job_execution_id)
    cluster = conductor.cluster_get(ctx, job_execution.cluster_id)

    if cluster is None or cluster.status != 'Active':
        return job_execution

    client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie")
    job_info = client.get_job_status(job_execution.oozie_job_id)
    update = {"info": job_info}
    if job_info['status'] in terminated_job_states:
        update['end_time'] = datetime.datetime.now()

    job_execution = conductor.job_execution_update(ctx, job_execution, update)
    return job_execution

예제 #4

0

파일 보기

def get_job_status(job_execution_id):
    ctx = context.ctx()
    job_execution = conductor.job_execution_get(ctx, job_execution_id)
    if job_execution.oozie_job_id is None:
        # We don't have an Oozie id yet for this job, that's okay
        return job_execution

    cluster = conductor.cluster_get(ctx, job_execution.cluster_id)

    if cluster is None or cluster.status != 'Active':
        return job_execution

    client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie",
                           _get_oozie_server(cluster))
    job_info = client.get_job_status(job_execution)
    update = {"info": job_info}
    if job_info['status'] in terminated_job_states:
        update['end_time'] = datetime.datetime.now()

    job_execution = conductor.job_execution_update(ctx, job_execution, update)
    return job_execution

예제 #5

0

파일 보기

def run_job(ctx, job_execution):
    cluster = conductor.cluster_get(ctx, job_execution.cluster_id)
    if cluster.status != 'Active':
        return job_execution.status

    job = conductor.job_get(ctx, job_execution.job_id)
    job_origin = conductor.job_origin_get(context.ctx(), job.job_origin_id)
    input_source = conductor.data_source_get(ctx, job_execution.input_id)
    output_source = conductor.data_source_get(ctx, job_execution.output_id)
    #TODO(nprivalova): should be removed after all features implemented
    validate(input_source, output_source, job)

    wf_dir = create_workflow_dir(u.get_jobtracker(cluster), job)
    upload_job_file(u.get_jobtracker(cluster), wf_dir, job_origin, job)

    wf_xml = build_workflow_for_job(job.type, input_source, output_source)
    path_to_workflow = upload_workflow_file(u.get_jobtracker(cluster), wf_dir,
                                            wf_xml)

    jt_path = '%s:8021' % u.get_jobtracker(cluster).hostname
    nn_path = 'hdfs://%s:8020' % u.get_namenode(cluster).hostname

    client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie/")
    job_parameters = {
        "jobTracker": jt_path,
        "nameNode": nn_path,
        "user.name": "hadoop",
        "oozie.wf.application.path": "%s%s" % (nn_path, path_to_workflow),
        "oozie.use.system.libpath": "true"
    }

    oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters))
    client.run_job(oozie_job_id)
    job_execution = conductor.job_execution_update(
        ctx, job_execution, {
            'oozie_job_id': oozie_job_id,
            'start_time': datetime.datetime.now()
        })

    return job_execution

예제 #6

0

파일 보기

def run_job(job_execution):
    ctx = context.ctx()

    cluster = conductor.cluster_get(ctx, job_execution.cluster_id)
    if cluster.status != 'Active':
        return job_execution

    job = conductor.job_get(ctx, job_execution.job_id)
    if not edp.compare_job_type(job.type, 'Java'):
        input_source = conductor.data_source_get(ctx, job_execution.input_id)
        output_source = conductor.data_source_get(ctx, job_execution.output_id)
    else:
        input_source = None
        output_source = None
    #TODO(nprivalova): should be removed after all features implemented
    validate(input_source, output_source, job)

    for data_source in [input_source, output_source]:
        if data_source and data_source.type == 'hdfs':
            h.configure_cluster_for_hdfs(cluster, data_source)

    hdfs_user = _get_hdfs_user(cluster)
    oozie_server = _get_oozie_server(cluster)
    wf_dir = create_workflow_dir(oozie_server, job, hdfs_user)
    upload_job_files(oozie_server, wf_dir, job, hdfs_user)

    creator = workflow_factory.get_creator(job)

    # Do other job type specific setup here, for example
    # uploading hive configuration
    creator.configure_workflow_if_needed(cluster, wf_dir)

    wf_xml = creator.get_workflow_xml(job_execution, input_source,
                                      output_source)

    path_to_workflow = upload_workflow_file(oozie_server, wf_dir, wf_xml,
                                            hdfs_user)

    rm_path = _get_resource_manager_path(cluster)
    nn_path = cluster['info']['HDFS']['NameNode']

    client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie/",
                           _get_oozie_server(cluster))
    job_parameters = {
        "jobTracker": rm_path,
        "nameNode": nn_path,
        "user.name": hdfs_user,
        "oozie.wf.application.path": "%s%s" % (nn_path, path_to_workflow),
        "oozie.use.system.libpath": "true"
    }

    oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters),
                                  job_execution)
    job_execution = conductor.job_execution_update(
        ctx, job_execution, {
            'oozie_job_id': oozie_job_id,
            'start_time': datetime.datetime.now()
        })
    client.run_job(job_execution, oozie_job_id)

    return job_execution