def check_job_executor(data, job_id): job = api.get_job(job_id) job_type, subtype = edp.split_job_type(job.type) # Check if cluster contains Oozie service to run job main_base.check_edp_job_support(data['cluster_id']) # All types except Java require input and output objects if job_type == 'Java': if not _is_main_class_present(data): raise ex.InvalidDataException('Java job must ' 'specify edp.java.main_class') else: if not ('input_id' in data and 'output_id' in data): raise ex.InvalidDataException("%s job requires 'input_id' " "and 'output_id'" % job.type) b.check_data_source_exists(data['input_id']) b.check_data_source_exists(data['output_id']) b.check_data_sources_are_different(data['input_id'], data['output_id']) if job_type == 'MapReduce' and ( subtype == 'Streaming' and not _streaming_present(data)): raise ex.InvalidDataException("%s job " "must specify streaming mapper " "and reducer" % job.type) main_base.check_cluster_exists(data['cluster_id'])
def check_data_sources(data, job): if not ('input_id' in data and 'output_id' in data): raise ex.InvalidDataException(_("%s job requires 'input_id' " "and 'output_id'") % job.type) b.check_data_source_exists(data['input_id']) b.check_data_source_exists(data['output_id']) b.check_data_sources_are_different(data['input_id'], data['output_id'])