def run_job(self, job_execution): ctx = context.ctx() job = conductor.job_get(ctx, job_execution.job_id) input_source, output_source = job_utils.get_data_sources(job_execution, job) for data_source in [input_source, output_source]: if data_source and data_source.type == 'hdfs': h.configure_cluster_for_hdfs(self.cluster, data_source) break hdfs_user = self.plugin.get_hdfs_user() # TODO(tmckay): this should probably be "get_namenode" # but that call does not exist in the plugin api now. # However, other engines may need it. oozie_server = self.plugin.get_oozie_server(self.cluster) wf_dir = job_utils.create_hdfs_workflow_dir(oozie_server, job, hdfs_user) job_utils.upload_job_files_to_hdfs(oozie_server, wf_dir, job, hdfs_user) wf_xml = workflow_factory.get_workflow_xml( job, self.cluster, job_execution, input_source, output_source) path_to_workflow = self._upload_workflow_file(oozie_server, wf_dir, wf_xml, hdfs_user) job_params = self._get_oozie_job_params(hdfs_user, path_to_workflow) client = self._get_client() oozie_job_id = client.add_job(x.create_hadoop_xml(job_params), job_execution) client.run_job(job_execution, oozie_job_id) try: status = client.get_job_status(job_execution, oozie_job_id)['status'] except Exception: status = None return (oozie_job_id, status, None)
def test_hdfs_upload_job_files(self, conductor_raw_data, remote_class, dir_missing, helper, remote): remote_class.__exit__.return_value = 'closed' remote.return_value = remote_class helper.return_value = 'ok' dir_missing.return_value = False conductor_raw_data.return_value = 'ok' job, _ = _create_all_stack(edp.JOB_TYPE_PIG) res = job_utils.upload_job_files_to_hdfs(mock.Mock(), 'job_prefix', job, 'hadoop') self.assertEqual(['job_prefix/script.pig'], res) job, _ = _create_all_stack(edp.JOB_TYPE_MAPREDUCE) res = job_utils.upload_job_files_to_hdfs(mock.Mock(), 'job_prefix', job, 'hadoop') self.assertEqual(['job_prefix/lib/main.jar'], res) remote.reset_mock() remote_class.reset_mock() helper.reset_mock()