예제 #1
0
    def __init__(self, hosts, topo_list=None, config_file=None):
        """Create a new Hadoop cluster with the given hosts and topology.
        
        Args:
          hosts (list of Host):
            The hosts to be assigned a topology.
          topo_list (list of str, optional):
            The racks to be assigned to each host. len(hosts) should be equal to
            len(topo_list).
          configFile (str, optional):
            The path of the config file to be used.
        """

        # Load cluster properties
        config = ConfigParser(self.defaults)
        config.add_section("cluster")
        config.add_section("local")

        if config_file:
            config.readfp(open(config_file))

        self.base_dir = config.get("cluster", "hadoop_base_dir")
        self.conf_dir = config.get("cluster", "hadoop_conf_dir")
        self.logs_dir = config.get("cluster", "hadoop_logs_dir")
        self.hadoop_temp_dir = config.get("cluster", "hadoop_temp_dir")
        self.hdfs_port = config.getint("cluster", "hdfs_port")
        self.mapred_port = config.getint("cluster", "mapred_port")
        self.local_base_conf_dir = config.get("local", "local_base_conf_dir")

        self.bin_dir = self.base_dir + "/bin"
        self.sbin_dir = self.base_dir + "/bin"

        # Configure master and slaves
        self.hosts = hosts
        self.master = hosts[0]

        # Create topology
        self.topology = HadoopTopology(hosts, topo_list)
        # Store cluster information
        self.host_clusters = {}
        for h in self.hosts:
            g5k_cluster = get_host_cluster(h)
            if g5k_cluster in self.host_clusters:
                self.host_clusters[g5k_cluster].append(h)
            else:
                self.host_clusters[g5k_cluster] = [h]

        # Create a string to display the topology
        t = {v: [] for v in self.topology.topology.values()}
        for key, value in self.topology.topology.iteritems():
            t[value].append(key.address)
        log_topo = ', '.join([style.user2(k) + ': ' +
                              ' '.join(map(lambda x: style.host(x.split('.')[0]), v)) 
                              for k, v in t.iteritems()])
        
        logger.info("Hadoop cluster created with master %s, hosts %s and topology %s",
                    style.host(self.master.address), 
                    ' '.join([style.host(h.address.split('.')[0]) for h in self.hosts]),
                    log_topo)
예제 #2
0
    def __init__(self, hosts, topo_list=None, config_file=None):
        """Create a new Hadoop cluster with the given hosts and topology.
        
        Args:
          hosts (list of Host):
            The hosts to be assigned a topology.
          topo_list (list of str, optional):
            The racks to be assigned to each host. len(hosts) should be equal to
            len(topo_list).
          configFile (str, optional):
            The path of the config file to be used.
        """

        # Load cluster properties
        config = ConfigParser(self.defaults)
        config.add_section("cluster")
        config.add_section("local")

        if config_file:
            config.readfp(open(config_file))

        self.base_dir = config.get("cluster", "hadoop_base_dir")
        self.conf_dir = config.get("cluster", "hadoop_conf_dir")
        self.logs_dir = config.get("cluster", "hadoop_logs_dir")
        self.hadoop_temp_dir = config.get("cluster", "hadoop_temp_dir")
        self.hdfs_port = config.getint("cluster", "hdfs_port")
        self.mapred_port = config.getint("cluster", "mapred_port")
        self.local_base_conf_dir = config.get("local", "local_base_conf_dir")

        self.bin_dir = self.base_dir + "/bin"
        self.sbin_dir = self.base_dir + "/bin"

        # Configure master and slaves
        self.hosts = hosts
        self.master = hosts[0]

        # Create topology
        self.topology = HadoopTopology(hosts, topo_list)
        # Store cluster information
        self.host_clusters = {}
        for h in self.hosts:
            g5k_cluster = get_host_cluster(h)
            if g5k_cluster in self.host_clusters:
                self.host_clusters[g5k_cluster].append(h)
            else:
                self.host_clusters[g5k_cluster] = [h]

        # Create a string to display the topology
        t = {v: [] for v in self.topology.topology.values()}
        for key, value in self.topology.topology.iteritems():
            t[value].append(key.address)
        log_topo = ', '.join([style.user2(k) + ': ' +
                              ' '.join(map(lambda x: style.host(x.split('.')[0]), v)) 
                              for k, v in t.iteritems()])
        
        logger.info("Hadoop cluster created with master %s, hosts %s and topology %s",
                    style.host(self.master.address), 
                    ' '.join([style.host(h.address.split('.')[0]) for h in self.hosts]),
                    log_topo)
예제 #3
0
    def __init__(self, hosts, topo_list=None, config_file=None):
        """Create a new Hadoop cluster with the given hosts and topology.
        
        Args:
          hosts (list of Host):
            The hosts to be assigned a topology.
          topo_list (list of str, optional):
            The racks to be assigned to each host. len(hosts) should be equal
            to len(topo_list).
          configFile (str, optional):
            The path of the config file to be used.
        """

        # Load properties
        config = ConfigParser(self.defaults)
        config.add_section("cluster")
        config.add_section("local")

        if config_file:
            config.readfp(open(config_file))

        # Deployment properties
        self.local_base_conf_dir = config.get("local", "local_base_conf_dir")
        self.init_conf_dir = tempfile.mkdtemp("", "hadoop-init-", "/tmp")
        self.conf_mandatory_files = [CORE_CONF_FILE,
                                     HDFS_CONF_FILE,
                                     MR_CONF_FILE]

        # Node properties
        self.base_dir = config.get("cluster", "hadoop_base_dir")
        self.conf_dir = config.get("cluster", "hadoop_conf_dir")
        self.logs_dir = config.get("cluster", "hadoop_logs_dir")
        self.hadoop_temp_dir = config.get("cluster", "hadoop_temp_dir")
        self.hdfs_port = config.getint("cluster", "hdfs_port")
        self.mapred_port = config.getint("cluster", "mapred_port")

        self.bin_dir = self.base_dir + "/bin"
        self.sbin_dir = self.base_dir + "/bin"

        self.java_home = None

        # Configure master and slaves
        self.hosts = list(hosts)
        self.master = self.hosts[0]

        # Create topology
        self.topology = HadoopTopology(hosts, topo_list)

        # Store cluster information
        self.hw = hw_manager.make_deployment_hardware()
        self.hw.add_hosts(self.hosts)
        self.master_cluster = self.hw.get_host_cluster(self.master)

        # Create a string to display the topology
        t = {v: [] for v in self.topology.topology.values()}
        for key, value in self.topology.topology.iteritems():
            t[value].append(key.address)
        log_topo = ', '.join([style.user2(k) + ': ' +
                              ' '.join(map(lambda x: style.host(x.split('.')[0]), v))
                              for k, v in t.iteritems()])
        
        logger.info("Hadoop cluster created with master %s, hosts %s and "
                    "topology %s",
                    style.host(self.master.address), 
                    ' '.join([style.host(h.address.split('.')[0])
                              for h in self.hosts]),
                    log_topo)
예제 #4
0
    def __init__(self, hosts, topo_list=None, config_file=None):
        """Create a new Hadoop cluster with the given hosts and topology.
        
        Args:
          hosts (list of Host):
            The hosts to be assigned a topology.
          topo_list (list of str, optional):
            The racks to be assigned to each host. len(hosts) should be equal
            to len(topo_list).
          configFile (str, optional):
            The path of the config file to be used.
        """

        # Load properties
        config = ConfigParser(self.defaults)
        config.add_section("cluster")
        config.add_section("local")

        if config_file:
            config.readfp(open(config_file))

        # Deployment properties
        self.local_base_conf_dir = config.get("local", "local_base_conf_dir")
        self.init_conf_dir = tempfile.mkdtemp("", "hadoop-init-", "/tmp")
        self.conf_mandatory_files = [
            CORE_CONF_FILE, HDFS_CONF_FILE, MR_CONF_FILE
        ]

        # Node properties
        self.base_dir = config.get("cluster", "hadoop_base_dir")
        self.conf_dir = config.get("cluster", "hadoop_conf_dir")
        self.logs_dir = config.get("cluster", "hadoop_logs_dir")
        self.hadoop_temp_dir = config.get("cluster", "hadoop_temp_dir")
        self.hdfs_port = config.getint("cluster", "hdfs_port")
        self.mapred_port = config.getint("cluster", "mapred_port")

        self.bin_dir = self.base_dir + "/bin"
        self.sbin_dir = self.base_dir + "/bin"

        self.java_home = None

        # Configure master and slaves
        self.hosts = list(hosts)
        self.master = self.hosts[0]

        # Create topology
        self.topology = HadoopTopology(hosts, topo_list)

        # Store cluster information
        self.hw = hw_manager.make_deployment_hardware()
        self.hw.add_hosts(self.hosts)
        self.master_cluster = self.hw.get_host_cluster(self.master)

        # Create a string to display the topology
        t = {v: [] for v in self.topology.topology.values()}
        for key, value in self.topology.topology.iteritems():
            t[value].append(key.address)
        log_topo = ', '.join([
            style.user2(k) + ': ' +
            ' '.join(map(lambda x: style.host(x.split('.')[0]), v))
            for k, v in t.iteritems()
        ])

        logger.info(
            "Hadoop cluster created with master %s, hosts %s and "
            "topology %s", style.host(self.master.address),
            ' '.join([style.host(h.address.split('.')[0])
                      for h in self.hosts]), log_topo)