def test_upload_job_files(self, get_remote, get_raw_binary): main_names = ["main1", "main2", "main3"] lib_names = ["lib1", "lib2", "lib3"] def make_data_objects(*args): objs = [] for name in args: m = mock.Mock() m.name = name objs.append(m) return objs job = mock.Mock() job.name = "job" job.mains = make_data_objects(*main_names) job.libs = make_data_objects(*lib_names) # This is to mock "with remote.get_remote(instance) as r" remote_instance = mock.Mock() get_remote.return_value.__enter__ = mock.Mock( return_value=remote_instance) get_raw_binary.return_value = "data" paths = job_utils.upload_job_files("where", "/somedir", job, libs_subdir=False) self.assertEqual(paths, ["/somedir/" + n for n in main_names + lib_names]) for path in paths: remote_instance.write_file_to.assert_any_call(path, "data") remote_instance.write_file_to.reset_mock() paths = job_utils.upload_job_files("where", "/somedir", job, libs_subdir=True) remote_instance.execute_command.assert_called_with( "mkdir -p /somedir/libs") expected = ["/somedir/" + n for n in main_names] expected += ["/somedir/libs/" + n for n in lib_names] self.assertEqual(paths, expected) for path in paths: remote_instance.write_file_to.assert_any_call(path, "data")
def test_upload_job_files(self, get_remote, get_raw_binary): main_names = ["main1", "main2", "main3"] lib_names = ["lib1", "lib2", "lib3"] def make_data_objects(*args): objs = [] for name in args: m = mock.Mock() m.name = name objs.append(m) return objs job = mock.Mock() job.name = "job" job.mains = make_data_objects(*main_names) job.libs = make_data_objects(*lib_names) # This is to mock "with remote.get_remote(instance) as r" remote_instance = mock.Mock() get_remote.return_value.__enter__ = mock.Mock( return_value=remote_instance) get_raw_binary.return_value = "data" paths = job_utils.upload_job_files( "where", "/somedir", job, libs_subdir=False) self.assertEqual(paths, ["/somedir/" + n for n in main_names + lib_names]) for path in paths: remote_instance.write_file_to.assert_any_call(path, "data") remote_instance.write_file_to.reset_mock() paths = job_utils.upload_job_files( "where", "/somedir", job, libs_subdir=True) remote_instance.execute_command.assert_called_with( "mkdir -p /somedir/libs") expected = ["/somedir/" + n for n in main_names] expected += ["/somedir/libs/" + n for n in lib_names] self.assertEqual(paths, expected) for path in paths: remote_instance.write_file_to.assert_any_call(path, "data")
def run_job(self, job_execution): ctx = context.ctx() job = conductor.job_get(ctx, job_execution.job_id) input_source, output_source = job_utils.get_data_sources(job_execution, job) for data_source in [input_source, output_source]: if data_source and data_source.type == 'hdfs': h.configure_cluster_for_hdfs(self.cluster, data_source) break hdfs_user = self.plugin.get_hdfs_user() # TODO(tmckay): this should probably be "get_namenode" # but that call does not exist in the plugin api now. # However, other engines may need it. oozie_server = self.plugin.get_oozie_server(self.cluster) wf_dir = job_utils.create_workflow_dir(oozie_server, job, hdfs_user) job_utils.upload_job_files(oozie_server, wf_dir, job, hdfs_user) wf_xml = workflow_factory.get_workflow_xml( job, self.cluster, job_execution, input_source, output_source) path_to_workflow = self._upload_workflow_file(oozie_server, wf_dir, wf_xml, hdfs_user) job_params = self._get_oozie_job_params(hdfs_user, path_to_workflow) client = self._get_client() oozie_job_id = client.add_job(x.create_hadoop_xml(job_params), job_execution) client.run_job(job_execution, oozie_job_id) return oozie_job_id
def run_job(self, job_execution): ctx = context.ctx() job = conductor.job_get(ctx, job_execution.job_id) proxy_configs = job_execution.job_configs.get('proxy_configs') # We'll always run the driver program on the master master = plugin_utils.get_instance(self.cluster, "master") # TODO(tmckay): wf_dir should probably be configurable. # The only requirement is that the dir is writable by the image user wf_dir = job_utils.create_workflow_dir(master, '/tmp/spark-edp', job, job_execution.id) paths = job_utils.upload_job_files(master, wf_dir, job, libs_subdir=False, proxy_configs=proxy_configs) # We can shorten the paths in this case since we'll run out of wf_dir paths = [os.path.basename(p) for p in paths] # TODO(tmckay): for now, paths[0] is always assumed to be the app # jar and we generate paths in order (mains, then libs). # When we have a Spark job type, we can require a "main" and set # the app jar explicitly to be "main" app_jar = paths.pop(0) # The rest of the paths will be passed with --jars additional_jars = ",".join(paths) if additional_jars: additional_jars = "--jars " + additional_jars # Launch the spark job using spark-submit and deploy_mode = client host = master.hostname() port = c_helper.get_config_value("Spark", "Master port", self.cluster) spark_submit = os.path.join( c_helper.get_config_value("Spark", "Spark home", self.cluster), "bin/spark-submit") job_class = job_execution.job_configs.configs["edp.java.main_class"] # TODO(tmckay): we need to clean up wf_dirs on long running clusters # TODO(tmckay): probably allow for general options to spark-submit args = " ".join(job_execution.job_configs.get('args', [])) # The redirects of stdout and stderr will preserve output in the wf_dir cmd = "%s %s --class %s %s --master spark://%s:%s %s" % ( spark_submit, app_jar, job_class, additional_jars, host, port, args) # If an exception is raised here, the job_manager will mark # the job failed and log the exception with remote.get_remote(master) as r: # Upload the command launch script launch = os.path.join(wf_dir, "launch_command") r.write_file_to(launch, self._job_script()) r.execute_command("chmod +x %s" % launch) ret, stdout = r.execute_command( "cd %s; ./launch_command %s > /dev/null 2>&1 & echo $!" % (wf_dir, cmd)) if ret == 0: # Success, we'll add the wf_dir in job_execution.extra and store # pid@instance_id as the job id # We know the job is running so return "RUNNING" return (stdout.strip() + "@" + master.id, edp.JOB_STATUS_RUNNING, { 'spark-path': wf_dir }) # Hmm, no execption but something failed. # Since we're using backgrounding with redirect, this is unlikely. raise e.EDPError( _("Spark job execution failed. Exit status = " "%(status)s, stdout = %(stdout)s") % { 'status': ret, 'stdout': stdout })
def run_job(self, job_execution): ctx = context.ctx() job = conductor.job_get(ctx, job_execution.job_id) # We'll always run the driver program on the master master = plugin_utils.get_instance(self.cluster, "master") # TODO(tmckay): wf_dir should probably be configurable. # The only requirement is that the dir is writable by the image user wf_dir = job_utils.create_workflow_dir(master, '/tmp/spark-edp', job, job_execution.id) paths = job_utils.upload_job_files(master, wf_dir, job, libs_subdir=False) # We can shorten the paths in this case since we'll run out of wf_dir paths = [os.path.basename(p) for p in paths] # TODO(tmckay): for now, paths[0] is always assumed to be the app # jar and we generate paths in order (mains, then libs). # When we have a Spark job type, we can require a "main" and set # the app jar explicitly to be "main" app_jar = paths.pop(0) # The rest of the paths will be passed with --jars additional_jars = ",".join(paths) if additional_jars: additional_jars = "--jars " + additional_jars # Launch the spark job using spark-submit and deploy_mode = client host = master.hostname() port = c_helper.get_config_value("Spark", "Master port", self.cluster) spark_submit = os.path.join( c_helper.get_config_value("Spark", "Spark home", self.cluster), "bin/spark-submit") job_class = job_execution.job_configs.configs["edp.java.main_class"] # TODO(tmckay): we need to clean up wf_dirs on long running clusters # TODO(tmckay): probably allow for general options to spark-submit args = " ".join(job_execution.job_configs.get('args', [])) # The redirects of stdout and stderr will preserve output in the wf_dir cmd = "%s %s --class %s %s --master spark://%s:%s %s" % ( spark_submit, app_jar, job_class, additional_jars, host, port, args) # If an exception is raised here, the job_manager will mark # the job failed and log the exception with remote.get_remote(master) as r: # Upload the command launch script launch = os.path.join(wf_dir, "launch_command") r.write_file_to(launch, self._job_script()) r.execute_command("chmod +x %s" % launch) ret, stdout = r.execute_command( "cd %s; ./launch_command %s > /dev/null 2>&1 & echo $!" % (wf_dir, cmd)) if ret == 0: # Success, we'll add the wf_dir in job_execution.extra and store # pid@instance_id as the job id # We know the job is running so return "RUNNING" return (stdout.strip() + "@" + master.id, edp.JOB_STATUS_RUNNING, {'spark-path': wf_dir}) # Hmm, no execption but something failed. # Since we're using backgrounding with redirect, this is unlikely. raise e.EDPError("Spark job execution failed. Exit status = %s, " "stdout = %s" % (ret, stdout))