def cancel_job(job_execution_id): ctx = context.ctx() job_execution = conductor.job_execution_get(ctx, job_execution_id) cluster = conductor.cluster_get(ctx, job_execution.cluster_id) client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie/") client.kill_job(job_execution.oozie_job_id) job_info = client.get_job_status(job_execution.oozie_job_id) update = {"info": job_info, "end_time": datetime.datetime.now()} job_execution = conductor.job_execution_update(ctx, job_execution, update) return job_execution
def run_job(job_execution): ctx = context.ctx() cluster = conductor.cluster_get(ctx, job_execution.cluster_id) if cluster.status != 'Active': return job_execution job = conductor.job_get(ctx, job_execution.job_id) input_source = conductor.data_source_get(ctx, job_execution.input_id) output_source = conductor.data_source_get(ctx, job_execution.output_id) #TODO(nprivalova): should be removed after all features implemented validate(input_source, output_source, job) plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name) hdfs_user = plugin.get_hdfs_user() wf_dir = create_workflow_dir(u.get_jobtracker(cluster), job, hdfs_user) upload_job_files(u.get_jobtracker(cluster), wf_dir, job, hdfs_user) creator = workflow_factory.get_creator(job) # Do other job type specific setup here, for example # uploading hive configuration creator.configure_workflow_if_needed(cluster, wf_dir) wf_xml = creator.get_workflow_xml(job_execution.job_configs, input_source, output_source) path_to_workflow = upload_workflow_file(u.get_jobtracker(cluster), wf_dir, wf_xml, hdfs_user) jt_path = '%s:8021' % u.get_jobtracker(cluster).hostname nn_path = 'hdfs://%s:8020' % u.get_namenode(cluster).hostname client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie/") job_parameters = { "jobTracker": jt_path, "nameNode": nn_path, "user.name": "hadoop", "oozie.wf.application.path": "%s%s" % (nn_path, path_to_workflow), "oozie.use.system.libpath": "true" } oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters)) client.run_job(oozie_job_id) job_execution = conductor.job_execution_update( ctx, job_execution, { 'oozie_job_id': oozie_job_id, 'start_time': datetime.datetime.now() }) return job_execution
def get_job_status(job_execution_id): ctx = context.ctx() job_execution = conductor.job_execution_get(ctx, job_execution_id) cluster = conductor.cluster_get(ctx, job_execution.cluster_id) if cluster is None or cluster.status != 'Active': return job_execution client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie") job_info = client.get_job_status(job_execution.oozie_job_id) update = {"info": job_info} if job_info['status'] in terminated_job_states: update['end_time'] = datetime.datetime.now() job_execution = conductor.job_execution_update(ctx, job_execution, update) return job_execution
def get_job_status(job_execution_id): ctx = context.ctx() job_execution = conductor.job_execution_get(ctx, job_execution_id) if job_execution.oozie_job_id is None: # We don't have an Oozie id yet for this job, that's okay return job_execution cluster = conductor.cluster_get(ctx, job_execution.cluster_id) if cluster is None or cluster.status != 'Active': return job_execution client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie", _get_oozie_server(cluster)) job_info = client.get_job_status(job_execution) update = {"info": job_info} if job_info['status'] in terminated_job_states: update['end_time'] = datetime.datetime.now() job_execution = conductor.job_execution_update(ctx, job_execution, update) return job_execution
def run_job(ctx, job_execution): cluster = conductor.cluster_get(ctx, job_execution.cluster_id) if cluster.status != 'Active': return job_execution.status job = conductor.job_get(ctx, job_execution.job_id) job_origin = conductor.job_origin_get(context.ctx(), job.job_origin_id) input_source = conductor.data_source_get(ctx, job_execution.input_id) output_source = conductor.data_source_get(ctx, job_execution.output_id) #TODO(nprivalova): should be removed after all features implemented validate(input_source, output_source, job) wf_dir = create_workflow_dir(u.get_jobtracker(cluster), job) upload_job_file(u.get_jobtracker(cluster), wf_dir, job_origin, job) wf_xml = build_workflow_for_job(job.type, input_source, output_source) path_to_workflow = upload_workflow_file(u.get_jobtracker(cluster), wf_dir, wf_xml) jt_path = '%s:8021' % u.get_jobtracker(cluster).hostname nn_path = 'hdfs://%s:8020' % u.get_namenode(cluster).hostname client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie/") job_parameters = { "jobTracker": jt_path, "nameNode": nn_path, "user.name": "hadoop", "oozie.wf.application.path": "%s%s" % (nn_path, path_to_workflow), "oozie.use.system.libpath": "true" } oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters)) client.run_job(oozie_job_id) job_execution = conductor.job_execution_update( ctx, job_execution, { 'oozie_job_id': oozie_job_id, 'start_time': datetime.datetime.now() }) return job_execution
def run_job(job_execution): ctx = context.ctx() cluster = conductor.cluster_get(ctx, job_execution.cluster_id) if cluster.status != 'Active': return job_execution job = conductor.job_get(ctx, job_execution.job_id) if not edp.compare_job_type(job.type, 'Java'): input_source = conductor.data_source_get(ctx, job_execution.input_id) output_source = conductor.data_source_get(ctx, job_execution.output_id) else: input_source = None output_source = None #TODO(nprivalova): should be removed after all features implemented validate(input_source, output_source, job) for data_source in [input_source, output_source]: if data_source and data_source.type == 'hdfs': h.configure_cluster_for_hdfs(cluster, data_source) hdfs_user = _get_hdfs_user(cluster) oozie_server = _get_oozie_server(cluster) wf_dir = create_workflow_dir(oozie_server, job, hdfs_user) upload_job_files(oozie_server, wf_dir, job, hdfs_user) creator = workflow_factory.get_creator(job) # Do other job type specific setup here, for example # uploading hive configuration creator.configure_workflow_if_needed(cluster, wf_dir) wf_xml = creator.get_workflow_xml(job_execution, input_source, output_source) path_to_workflow = upload_workflow_file(oozie_server, wf_dir, wf_xml, hdfs_user) rm_path = _get_resource_manager_path(cluster) nn_path = cluster['info']['HDFS']['NameNode'] client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie/", _get_oozie_server(cluster)) job_parameters = { "jobTracker": rm_path, "nameNode": nn_path, "user.name": hdfs_user, "oozie.wf.application.path": "%s%s" % (nn_path, path_to_workflow), "oozie.use.system.libpath": "true" } oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters), job_execution) job_execution = conductor.job_execution_update( ctx, job_execution, { 'oozie_job_id': oozie_job_id, 'start_time': datetime.datetime.now() }) client.run_job(job_execution, oozie_job_id) return job_execution