예제 #1
0
    def change_conf(self, params, conf_file=None, default_file=MR_CONF_FILE):
        """Modify Hadoop configuration. This method copies the configuration
        files from the first host of each g5k cluster conf dir into a local
        temporary dir, do all the changes in place and broadcast the new
        configuration files to all hosts.
        
        Args:
          params (dict of str:str):
            The parameters to be changed in the form key:value.
          conf_file (str, optional):
            The file where parameters should be set. If not specified, all
            files are checked for the parameter name and the parameter is set
            in the file where the property is found. If not found, the
            parameter is set in the default file.
          default_file (str, optional): The default conf file where to set the
            parameter if not found. Only applies when conf_file is not set.
        """

        for cluster in self.hw.get_clusters():
            hosts = cluster.get_hosts()

            # Copy conf files from first host in the cluster
            action = Remote("ls " + self.conf_dir + "/*.xml", [hosts[0]])
            action.run()
            output = action.processes[0].stdout

            remote_conf_files = []
            for f in output.split():
                remote_conf_files.append(os.path.join(self.conf_dir, f))

            tmp_dir = "/tmp/mliroz_temp_hadoop/"
            if not os.path.exists(tmp_dir):
                os.makedirs(tmp_dir)

            action = Get([hosts[0]], remote_conf_files, tmp_dir)
            action.run()

            # Do replacements in temp file
            if conf_file:
                f = os.path.join(tmp_dir, conf_file)
                for name, value in params.iteritems():
                    replace_in_xml_file(f, name, value, True)
            else:
                temp_conf_files = [os.path.join(tmp_dir, f) for f in
                                   os.listdir(tmp_dir)]

                for name, value in params.iteritems():
                    for f in temp_conf_files:
                        if replace_in_xml_file(f, name, value):
                            break
                    else:
                        # Property not found - add it in MR_CONF_FILE
                        logger.info("Parameter with name " + name + " has not "
                                    "been found in any conf file. Setting it "
                                    "in " + default_file)
                        f = os.path.join(tmp_dir, default_file)
                        replace_in_xml_file(f, name, value, True)

            # Copy back the files to all hosts
            self._copy_conf(tmp_dir, hosts)
예제 #2
0
파일: hive.py 프로젝트: rwfazul/hadoop_g5k
    def _configure_servers(self, hosts=None):
        """Configure servers and host-dependant parameters.

           Args:
             hosts (list of Host, optional):
               The list of hosts to take into account in the configuration. If
               not specified, all the hosts of the Spark cluster are used. The
               first host of this list is always used as the reference.
        """

        if not hosts:
            hosts = self.hosts

        conf_file = os.path.join(self.temp_conf_dir, "hive-site.xml")

        replace_in_xml_file(
            conf_file, "fs.default.name", "hdfs://" + self.hc.master.address +
            ":" + str(self.hc.hdfs_port) + "/", True)

        replace_in_xml_file(
            conf_file, "mapred.job.tracker",
            self.hc.master.address + ":" + str(self.hc.mapred_port), True)

        replace_in_xml_file(conf_file, "hive.metastore.warehouse.dir",
                            self.warehouse_dir, True)

        replace_in_xml_file(
            conf_file, "javax.jdo.option.ConnectionURL", "jdbc:derby:;"
            "databaseName=" + self.metastore_dir + ";"
            "create=true", True)
예제 #3
0
    def _set_common_params(self, params, conf_dir, default_tuning=False):
        """Replace common parameters. Some user-specified values are
        overwritten.

           Args:
             params (str):
               Already defined parameters over all the clusters.
             conf_dir (str):
               The path of the directory with the configuration files.
             default_tuning (bool, optional):
               Whether to use automatic tuning based on some best practices or
               leave the default parameters.
        """

        core_file = os.path.join(conf_dir, CORE_CONF_FILE)
        mr_file = os.path.join(conf_dir, MR_CONF_FILE)

        replace_in_xml_file(core_file,
                            "fs.default.name",
                            "hdfs://%s:%d/" %
                            (self.master.address, self.hdfs_port),
                            create_if_absent=True)
        replace_in_xml_file(core_file,
                            "hadoop.tmp.dir",
                            self.hadoop_temp_dir,
                            create_if_absent=True)
        replace_in_xml_file(core_file,
                            "topology.script.file.name",
                            self.conf_dir + "/topo.sh",
                            create_if_absent=True)

        replace_in_xml_file(mr_file,
                            "mapred.job.tracker",
                            "%s:%d" % (self.master.address, self.mapred_port),
                            create_if_absent=True)
예제 #4
0
    def _set_cluster_params(self,
                            cluster,
                            params,
                            conf_dir,
                            default_tuning=False):
        """Replace cluster-dependent parameters

           Args:
             cluster (PhysicalCluster):
               The PhysicalCluster object to take into account in the
               configuration.
             params (str):
               Already defined parameters over all the clusters.
             conf_dir (str):
               The path of the directory with the configuration files.
             default_tuning (bool, optional):
               Whether to use automatic tuning based on some best practices or
               leave the default parameters.
        """

        yarn_file = os.path.join(conf_dir, YARN_CONF_FILE)

        cname = cluster.get_name()
        max_mem = params[cname]["max_cont_mem"]
        max_cores = params[cname]["max_cont_cores"]

        replace_in_xml_file(yarn_file,
                            "yarn.nodemanager.resource.memory-mb",
                            str(max_mem),
                            create_if_absent=True,
                            replace_if_present=default_tuning)
        replace_in_xml_file(yarn_file,
                            "yarn.nodemanager.resource.cpu-vcores",
                            str(max_cores),
                            create_if_absent=True,
                            replace_if_present=default_tuning)
예제 #5
0
    def _set_cluster_params(self,
                            cluster,
                            params,
                            conf_dir,
                            default_tuning=False):
        """Replace cluster-dependent parameters.

           Args:
             cluster (PhysicalCluster):
               The PhysicalCluster object to take into account in the
               configuration.
             params (str):
               Already defined parameters over all the clusters.
             conf_dir (str):
               The path of the directory with the configuration files.
             default_tuning (bool, optional):
               Whether to use automatic tuning based on some best practices or
               leave the default parameters.
        """

        cname = cluster.get_name()

        mem_per_slot_mb = params[cname]["mem_per_slot_mb"]
        map_slots = params[cname]["map_slots"]
        red_slots = params[cname]["red_slots"]

        if default_tuning:
            mr_file = os.path.join(conf_dir, MR_CONF_FILE)

            replace_in_xml_file(mr_file,
                                "mapred.tasktracker.map.tasks.maximum",
                                str(map_slots),
                                create_if_absent=True,
                                replace_if_present=False)
            replace_in_xml_file(mr_file,
                                "mapred.tasktracker.reduce.tasks.maximum",
                                str(red_slots),
                                create_if_absent=True,
                                replace_if_present=False)
            replace_in_xml_file(mr_file,
                                "mapred.child.java.opts",
                                "-Xmx%dm" % mem_per_slot_mb,
                                create_if_absent=True,
                                replace_if_present=False)
예제 #6
0
    def _set_common_params(self, params, conf_dir, default_tuning=False):
        """Replace common parameters. Some user-specified values are
        overwritten.

           Args:
             params (str):
               Already defined parameters over all the clusters.
             conf_dir (str):
               The path of the directory with the configuration files.
             default_tuning (bool, optional):
               Whether to use automatic tuning based on some best practices or
               leave the default parameters.
        """

        core_file = os.path.join(conf_dir, CORE_CONF_FILE)
        yarn_file = os.path.join(conf_dir, YARN_CONF_FILE)
        mr_file = os.path.join(conf_dir, MR_CONF_FILE)

        global_params = params["global"]
        sch_max_mem = global_params["sch_max_mem"]
        sch_max_cores = global_params["sch_max_cores"]

        # General and HDFS
        replace_in_xml_file(core_file,
                            "fs.defaultFS",
                            "hdfs://%s:%d/" %
                            (self.master.address, self.hdfs_port),
                            create_if_absent=True,
                            replace_if_present=True)
        replace_in_xml_file(core_file,
                            "hadoop.tmp.dir",
                            self.hadoop_temp_dir,
                            create_if_absent=True,
                            replace_if_present=True)
        replace_in_xml_file(core_file,
                            "topology.script.file.name",
                            self.conf_dir + "/topo.sh",
                            create_if_absent=True,
                            replace_if_present=True)

        # YARN
        replace_in_xml_file(yarn_file,
                            "yarn.resourcemanager.hostname",
                            self.master.address,
                            create_if_absent=True)

        replace_in_xml_file(mr_file,
                            "mapreduce.framework.name",
                            "yarn",
                            create_if_absent=True,
                            replace_if_present=True)

        replace_in_xml_file(yarn_file,
                            "yarn.nodemanager.aux-services",
                            "mapreduce_shuffle",
                            create_if_absent=True,
                            replace_if_present=True)

        replace_in_xml_file(yarn_file,
                            "yarn.scheduler.maximum-allocation-mb",
                            str(sch_max_mem),
                            create_if_absent=True,
                            replace_if_present=default_tuning)
        replace_in_xml_file(yarn_file,
                            "yarn.scheduler.maximum-allocation-vcores",
                            str(sch_max_cores),
                            create_if_absent=True,
                            replace_if_present=default_tuning)

        if default_tuning:

            # YARN
            min_cont_mem = global_params["min_cont_mem"]
            replace_in_xml_file(yarn_file,
                                "yarn.scheduler.minimum-allocation-mb",
                                str(min_cont_mem),
                                create_if_absent=True,
                                replace_if_present=True)

            # MR memory settings
            map_mem = global_params["map_mem"]
            red_mem = global_params["red_mem"]
            map_java_heap = global_params["map_java_heap"]
            red_java_heap = global_params["red_java_heap"]

            replace_in_xml_file(mr_file,
                                "mapreduce.map.memory.mb",
                                str(map_mem),
                                create_if_absent=True,
                                replace_if_present=True)
            replace_in_xml_file(mr_file,
                                "mapreduce.map.java.opts",
                                "-Xmx%dm" % map_java_heap,
                                create_if_absent=True,
                                replace_if_present=True)

            replace_in_xml_file(mr_file,
                                "mapreduce.reduce.memory.mb",
                                str(red_mem),
                                create_if_absent=True,
                                replace_if_present=True)
            replace_in_xml_file(mr_file,
                                "mapreduce.reduce.java.opts",
                                "-Xmx%dm" % red_java_heap,
                                create_if_absent=True,
                                replace_if_present=True)

            # MR core settings
            replace_in_xml_file(mr_file,
                                "mapreduce.map.cpu.vcores",
                                "1",
                                create_if_absent=True,
                                replace_if_present=True)
            replace_in_xml_file(mr_file,
                                "mapreduce.map.reduce.vcores",
                                "1",
                                create_if_absent=True,
                                replace_if_present=True)

            # MR shuffle
            io_sort_mb = global_params["io_sort_mb"]
            io_sort_factor = global_params["io_sort_factor"]

            replace_in_xml_file(mr_file,
                                "mapreduce.map.output.compress",
                                "true",
                                create_if_absent=True,
                                replace_if_present=True)
            replace_in_xml_file(mr_file,
                                "mapreduce.task.io.sort.mb",
                                str(io_sort_mb),
                                create_if_absent=True,
                                replace_if_present=True)
            replace_in_xml_file(mr_file,
                                "mapreduce.task.io.sort.factor",
                                str(io_sort_factor),
                                create_if_absent=True,
                                replace_if_present=True)