Ejemplo n.º 1
0
    def test_upload_job_files(self, get_remote, get_raw_binary):
        main_names = ["main1", "main2", "main3"]
        lib_names = ["lib1", "lib2", "lib3"]

        def make_data_objects(*args):
            objs = []
            for name in args:
                m = mock.Mock()
                m.name = name
                objs.append(m)
            return objs

        job = mock.Mock()
        job.name = "job"
        job.mains = make_data_objects(*main_names)
        job.libs = make_data_objects(*lib_names)

        # This is to mock "with remote.get_remote(instance) as r"
        remote_instance = mock.Mock()
        get_remote.return_value.__enter__ = mock.Mock(
            return_value=remote_instance)

        get_raw_binary.return_value = "data"
        paths = job_utils.upload_job_files("where",
                                           "/somedir",
                                           job,
                                           libs_subdir=False)
        self.assertEqual(paths,
                         ["/somedir/" + n for n in main_names + lib_names])
        for path in paths:
            remote_instance.write_file_to.assert_any_call(path, "data")
        remote_instance.write_file_to.reset_mock()

        paths = job_utils.upload_job_files("where",
                                           "/somedir",
                                           job,
                                           libs_subdir=True)
        remote_instance.execute_command.assert_called_with(
            "mkdir -p /somedir/libs")
        expected = ["/somedir/" + n for n in main_names]
        expected += ["/somedir/libs/" + n for n in lib_names]
        self.assertEqual(paths, expected)
        for path in paths:
            remote_instance.write_file_to.assert_any_call(path, "data")
Ejemplo n.º 2
0
    def test_upload_job_files(self, get_remote, get_raw_binary):
        main_names = ["main1", "main2", "main3"]
        lib_names = ["lib1", "lib2", "lib3"]

        def make_data_objects(*args):
            objs = []
            for name in args:
                m = mock.Mock()
                m.name = name
                objs.append(m)
            return objs

        job = mock.Mock()
        job.name = "job"
        job.mains = make_data_objects(*main_names)
        job.libs = make_data_objects(*lib_names)

        # This is to mock "with remote.get_remote(instance) as r"
        remote_instance = mock.Mock()
        get_remote.return_value.__enter__ = mock.Mock(
            return_value=remote_instance)

        get_raw_binary.return_value = "data"
        paths = job_utils.upload_job_files(
            "where", "/somedir", job, libs_subdir=False)
        self.assertEqual(paths,
                         ["/somedir/" + n for n in main_names + lib_names])
        for path in paths:
            remote_instance.write_file_to.assert_any_call(path, "data")
        remote_instance.write_file_to.reset_mock()

        paths = job_utils.upload_job_files(
            "where", "/somedir", job, libs_subdir=True)
        remote_instance.execute_command.assert_called_with(
            "mkdir -p /somedir/libs")
        expected = ["/somedir/" + n for n in main_names]
        expected += ["/somedir/libs/" + n for n in lib_names]
        self.assertEqual(paths, expected)
        for path in paths:
            remote_instance.write_file_to.assert_any_call(path, "data")
Ejemplo n.º 3
0
    def run_job(self, job_execution):
        ctx = context.ctx()

        job = conductor.job_get(ctx, job_execution.job_id)
        input_source, output_source = job_utils.get_data_sources(job_execution,
                                                                 job)

        for data_source in [input_source, output_source]:
            if data_source and data_source.type == 'hdfs':
                h.configure_cluster_for_hdfs(self.cluster, data_source)
                break

        hdfs_user = self.plugin.get_hdfs_user()

        # TODO(tmckay): this should probably be "get_namenode"
        # but that call does not exist in the plugin api now.
        # However, other engines may need it.
        oozie_server = self.plugin.get_oozie_server(self.cluster)

        wf_dir = job_utils.create_workflow_dir(oozie_server, job, hdfs_user)
        job_utils.upload_job_files(oozie_server, wf_dir, job, hdfs_user)

        wf_xml = workflow_factory.get_workflow_xml(
            job, self.cluster, job_execution, input_source, output_source)

        path_to_workflow = self._upload_workflow_file(oozie_server, wf_dir,
                                                      wf_xml, hdfs_user)

        job_params = self._get_oozie_job_params(hdfs_user,
                                                path_to_workflow)

        client = self._get_client()
        oozie_job_id = client.add_job(x.create_hadoop_xml(job_params),
                                      job_execution)
        client.run_job(job_execution, oozie_job_id)
        return oozie_job_id
Ejemplo n.º 4
0
    def run_job(self, job_execution):
        ctx = context.ctx()
        job = conductor.job_get(ctx, job_execution.job_id)

        proxy_configs = job_execution.job_configs.get('proxy_configs')

        # We'll always run the driver program on the master
        master = plugin_utils.get_instance(self.cluster, "master")

        # TODO(tmckay): wf_dir should probably be configurable.
        # The only requirement is that the dir is writable by the image user
        wf_dir = job_utils.create_workflow_dir(master, '/tmp/spark-edp', job,
                                               job_execution.id)
        paths = job_utils.upload_job_files(master,
                                           wf_dir,
                                           job,
                                           libs_subdir=False,
                                           proxy_configs=proxy_configs)

        # We can shorten the paths in this case since we'll run out of wf_dir
        paths = [os.path.basename(p) for p in paths]

        # TODO(tmckay): for now, paths[0] is always assumed to be the app
        # jar and we generate paths in order (mains, then libs).
        # When we have a Spark job type, we can require a "main" and set
        # the app jar explicitly to be "main"
        app_jar = paths.pop(0)

        # The rest of the paths will be passed with --jars
        additional_jars = ",".join(paths)
        if additional_jars:
            additional_jars = "--jars " + additional_jars

        # Launch the spark job using spark-submit and deploy_mode = client
        host = master.hostname()
        port = c_helper.get_config_value("Spark", "Master port", self.cluster)
        spark_submit = os.path.join(
            c_helper.get_config_value("Spark", "Spark home", self.cluster),
            "bin/spark-submit")

        job_class = job_execution.job_configs.configs["edp.java.main_class"]

        # TODO(tmckay): we need to clean up wf_dirs on long running clusters
        # TODO(tmckay): probably allow for general options to spark-submit
        args = " ".join(job_execution.job_configs.get('args', []))

        # The redirects of stdout and stderr will preserve output in the wf_dir
        cmd = "%s %s --class %s %s --master spark://%s:%s %s" % (
            spark_submit, app_jar, job_class, additional_jars, host, port,
            args)

        # If an exception is raised here, the job_manager will mark
        # the job failed and log the exception
        with remote.get_remote(master) as r:
            # Upload the command launch script
            launch = os.path.join(wf_dir, "launch_command")
            r.write_file_to(launch, self._job_script())
            r.execute_command("chmod +x %s" % launch)
            ret, stdout = r.execute_command(
                "cd %s; ./launch_command %s > /dev/null 2>&1 & echo $!" %
                (wf_dir, cmd))

        if ret == 0:
            # Success, we'll add the wf_dir in job_execution.extra and store
            # pid@instance_id as the job id
            # We know the job is running so return "RUNNING"
            return (stdout.strip() + "@" + master.id, edp.JOB_STATUS_RUNNING, {
                'spark-path': wf_dir
            })

        # Hmm, no execption but something failed.
        # Since we're using backgrounding with redirect, this is unlikely.
        raise e.EDPError(
            _("Spark job execution failed. Exit status = "
              "%(status)s, stdout = %(stdout)s") % {
                  'status': ret,
                  'stdout': stdout
              })
Ejemplo n.º 5
0
    def run_job(self, job_execution):
        ctx = context.ctx()
        job = conductor.job_get(ctx, job_execution.job_id)

        # We'll always run the driver program on the master
        master = plugin_utils.get_instance(self.cluster, "master")

        # TODO(tmckay): wf_dir should probably be configurable.
        # The only requirement is that the dir is writable by the image user
        wf_dir = job_utils.create_workflow_dir(master, '/tmp/spark-edp', job,
                                               job_execution.id)
        paths = job_utils.upload_job_files(master, wf_dir, job,
                                           libs_subdir=False)

        # We can shorten the paths in this case since we'll run out of wf_dir
        paths = [os.path.basename(p) for p in paths]

        # TODO(tmckay): for now, paths[0] is always assumed to be the app
        # jar and we generate paths in order (mains, then libs).
        # When we have a Spark job type, we can require a "main" and set
        # the app jar explicitly to be "main"
        app_jar = paths.pop(0)

        # The rest of the paths will be passed with --jars
        additional_jars = ",".join(paths)
        if additional_jars:
            additional_jars = "--jars " + additional_jars

        # Launch the spark job using spark-submit and deploy_mode = client
        host = master.hostname()
        port = c_helper.get_config_value("Spark", "Master port", self.cluster)
        spark_submit = os.path.join(
            c_helper.get_config_value("Spark",
                                      "Spark home",
                                      self.cluster),
            "bin/spark-submit")

        job_class = job_execution.job_configs.configs["edp.java.main_class"]

        # TODO(tmckay): we need to clean up wf_dirs on long running clusters
        # TODO(tmckay): probably allow for general options to spark-submit
        args = " ".join(job_execution.job_configs.get('args', []))

        # The redirects of stdout and stderr will preserve output in the wf_dir
        cmd = "%s %s --class %s %s --master spark://%s:%s %s" % (
            spark_submit,
            app_jar,
            job_class,
            additional_jars,
            host,
            port,
            args)

        # If an exception is raised here, the job_manager will mark
        # the job failed and log the exception
        with remote.get_remote(master) as r:
            # Upload the command launch script
            launch = os.path.join(wf_dir, "launch_command")
            r.write_file_to(launch, self._job_script())
            r.execute_command("chmod +x %s" % launch)
            ret, stdout = r.execute_command(
                "cd %s; ./launch_command %s > /dev/null 2>&1 & echo $!"
                % (wf_dir, cmd))

        if ret == 0:
            # Success, we'll add the wf_dir in job_execution.extra and store
            # pid@instance_id as the job id
            # We know the job is running so return "RUNNING"
            return (stdout.strip() + "@" + master.id,
                    edp.JOB_STATUS_RUNNING,
                    {'spark-path': wf_dir})

        # Hmm, no execption but something failed.
        # Since we're using backgrounding with redirect, this is unlikely.
        raise e.EDPError("Spark job execution failed. Exit status = %s, "
                         "stdout = %s" % (ret, stdout))