Ejemplo n.º 1
0
    def _create_master_and_slave_conf(self, conf_dir):
        """Configure master and create slaves configuration files."""

        defs_file = conf_dir + "/spark-defaults.conf"

        # Configure master
        spark_master = read_param_in_props_file(defs_file, "spark.master")

        if spark_master and spark_master.startswith("local"):
            logger.warn("Your default configuration executes Spark locally. "
                        "Note that unless otherwise specified when launching "
                        "your scripts, the distributed configuration will be "
                        "ignored.")
        else:

            if self.mode == STANDALONE_MODE:
                # Always override?
                spark_master = "spark://%s:%d" % (self.master.address,
                                                  self.port)

            elif self.mode == YARN_MODE:
                if spark_master:
                    if spark_master not in ["yarn-client", "yarn-cluster"]:
                        logger.warn("Provided spark.master is not compatible "
                                    "with YARN mode. Overriding with "
                                    "'yarn-client'")
                        spark_master = "yarn-client"
                else:
                    spark_master = "yarn-client"

            write_in_props_file(defs_file,
                                "spark.master", spark_master,
                                create_if_absent=True,
                                override=True)

        # Configure slaves
        with open(conf_dir + "/slaves", "w") as slaves_file:
            for s in self.hosts:
                slaves_file.write(s.address + "\n")
Ejemplo n.º 2
0
    def change_conf(self, params, conf_file=None,
                    default_file=SPARK_CONF_FILE):
        """Modify Spark configuration. This method copies the configuration
        files from the first host of each g5k cluster conf dir into a local
        temporary dir, do all the changes in place and broadcast the new
        configuration files to all hosts.

        Args:
          params (dict of str:str):
            The parameters to be changed in the form key:value.
          conf_file (str, optional):
            The file where parameters should be set. If not specified, all
            files are checked for the parameter name and the parameter is set
            in the file where the property is found. If not found, the
            parameter is set in the default file.
          default_file (str, optional): The default conf file where to set the
            parameter if not found. Only applies when conf_file is not set.
        """

        for cluster in self.hw.get_clusters():
            hosts = cluster.get_hosts()

            # Copy conf files from first host in the cluster
            action = Remote("ls " + self.conf_dir + "/*.conf", [hosts[0]])
            action.run()
            output = action.processes[0].stdout

            remote_conf_files = []
            for f in output.split():
                remote_conf_files.append(os.path.join(self.conf_dir, f))

            tmp_dir = "/tmp/mliroz_temp_spark/"
            if not os.path.exists(tmp_dir):
                os.makedirs(tmp_dir)

            action = Get([hosts[0]], remote_conf_files, tmp_dir)
            action.run()

            # Do replacements in temp file
            if conf_file:
                f = os.path.join(tmp_dir, conf_file)
                for name, value in params.iteritems():
                    write_in_props_file(f, name, value, True)
            else:
                temp_conf_files = [os.path.join(tmp_dir, f) for f in
                                   os.listdir(tmp_dir)]

                for name, value in params.iteritems():
                    for f in temp_conf_files:
                        if write_in_props_file(f, name, value):
                            break
                    else:
                        # Property not found - add it in SPARK_CONF_FILE
                        logger.info("Parameter with name " + name + " has not "
                                    "been found in any conf file. Setting it "
                                    "in " + default_file)
                        f = os.path.join(tmp_dir, default_file)
                        write_in_props_file(f, name, value, True)

            # Copy back the files to all hosts
            self._copy_conf(tmp_dir, hosts)
Ejemplo n.º 3
0
    def _set_common_params(self, params, conf_dir, default_tuning=False):
        """Replace common parameters. Some user-specified values are
        overwritten.

           Args:
             params (str):
               Already defined parameters over all the clusters.
             conf_dir (str):
               The path of the directory with the configuration files.
             default_tuning (bool, optional):
               Whether to use automatic tuning based on some best practices or
               leave the default parameters.
        """

        defs_file = conf_dir + "/spark-defaults.conf"

        # spark-env.sh
        command = "cat >> " + self.conf_dir + "/spark-env.sh << EOF\n"
        command += "SPARK_MASTER_PORT=" + str(self.port) + "\n"
        command += "EOF\n"
        action = Remote(command, self.hosts)
        action.run()

        # Get already set parameters
        global_params = params["global"]
        exec_mem = global_params["exec_mem"]
        exec_cores = global_params["exec_cores"]
        total_execs = global_params["total_execs"]

        # Log parameters
        if self.evs_log_dir:
            write_in_props_file(defs_file,
                                "spark.eventLog.enabled", "true",
                                create_if_absent=True,
                                override=True)

            write_in_props_file(defs_file,
                                "spark.eventLog.dir", self.evs_log_dir,
                                create_if_absent=True,
                                override=True)

        write_in_props_file(defs_file,
                            "spark.logConf", "true",
                            create_if_absent=True,
                            override=False)

        if default_tuning:

            write_in_props_file(defs_file,
                                "spark.executor.memory", "%dm" % exec_mem,
                                create_if_absent=True,
                                override=False)
            write_in_props_file(defs_file,
                                "spark.executor.cores", exec_cores,
                                create_if_absent=True,
                                override=False)
            write_in_props_file(defs_file,
                                "spark.executor.instances", total_execs,
                                create_if_absent=True,
                                override=False)