def __init__(self, hosts, topo_list=None, config_file=None): """Create a new Hadoop cluster with the given hosts and topology. Args: hosts (list of Host): The hosts to be assigned a topology. topo_list (list of str, optional): The racks to be assigned to each host. len(hosts) should be equal to len(topo_list). configFile (str, optional): The path of the config file to be used. """ # Load cluster properties config = ConfigParser(self.defaults) config.add_section("cluster") config.add_section("local") if config_file: config.readfp(open(config_file)) self.base_dir = config.get("cluster", "hadoop_base_dir") self.conf_dir = config.get("cluster", "hadoop_conf_dir") self.logs_dir = config.get("cluster", "hadoop_logs_dir") self.hadoop_temp_dir = config.get("cluster", "hadoop_temp_dir") self.hdfs_port = config.getint("cluster", "hdfs_port") self.mapred_port = config.getint("cluster", "mapred_port") self.local_base_conf_dir = config.get("local", "local_base_conf_dir") self.bin_dir = self.base_dir + "/bin" self.sbin_dir = self.base_dir + "/bin" # Configure master and slaves self.hosts = hosts self.master = hosts[0] # Create topology self.topology = HadoopTopology(hosts, topo_list) # Store cluster information self.host_clusters = {} for h in self.hosts: g5k_cluster = get_host_cluster(h) if g5k_cluster in self.host_clusters: self.host_clusters[g5k_cluster].append(h) else: self.host_clusters[g5k_cluster] = [h] # Create a string to display the topology t = {v: [] for v in self.topology.topology.values()} for key, value in self.topology.topology.iteritems(): t[value].append(key.address) log_topo = ', '.join([style.user2(k) + ': ' + ' '.join(map(lambda x: style.host(x.split('.')[0]), v)) for k, v in t.iteritems()]) logger.info("Hadoop cluster created with master %s, hosts %s and topology %s", style.host(self.master.address), ' '.join([style.host(h.address.split('.')[0]) for h in self.hosts]), log_topo)
def __init__(self, hosts, topo_list=None, config_file=None): """Create a new Hadoop cluster with the given hosts and topology. Args: hosts (list of Host): The hosts to be assigned a topology. topo_list (list of str, optional): The racks to be assigned to each host. len(hosts) should be equal to len(topo_list). configFile (str, optional): The path of the config file to be used. """ # Load properties config = ConfigParser(self.defaults) config.add_section("cluster") config.add_section("local") if config_file: config.readfp(open(config_file)) # Deployment properties self.local_base_conf_dir = config.get("local", "local_base_conf_dir") self.init_conf_dir = tempfile.mkdtemp("", "hadoop-init-", "/tmp") self.conf_mandatory_files = [CORE_CONF_FILE, HDFS_CONF_FILE, MR_CONF_FILE] # Node properties self.base_dir = config.get("cluster", "hadoop_base_dir") self.conf_dir = config.get("cluster", "hadoop_conf_dir") self.logs_dir = config.get("cluster", "hadoop_logs_dir") self.hadoop_temp_dir = config.get("cluster", "hadoop_temp_dir") self.hdfs_port = config.getint("cluster", "hdfs_port") self.mapred_port = config.getint("cluster", "mapred_port") self.bin_dir = self.base_dir + "/bin" self.sbin_dir = self.base_dir + "/bin" self.java_home = None # Configure master and slaves self.hosts = list(hosts) self.master = self.hosts[0] # Create topology self.topology = HadoopTopology(hosts, topo_list) # Store cluster information self.hw = hw_manager.make_deployment_hardware() self.hw.add_hosts(self.hosts) self.master_cluster = self.hw.get_host_cluster(self.master) # Create a string to display the topology t = {v: [] for v in self.topology.topology.values()} for key, value in self.topology.topology.iteritems(): t[value].append(key.address) log_topo = ', '.join([style.user2(k) + ': ' + ' '.join(map(lambda x: style.host(x.split('.')[0]), v)) for k, v in t.iteritems()]) logger.info("Hadoop cluster created with master %s, hosts %s and " "topology %s", style.host(self.master.address), ' '.join([style.host(h.address.split('.')[0]) for h in self.hosts]), log_topo)
def __init__(self, hosts, topo_list=None, config_file=None): """Create a new Hadoop cluster with the given hosts and topology. Args: hosts (list of Host): The hosts to be assigned a topology. topo_list (list of str, optional): The racks to be assigned to each host. len(hosts) should be equal to len(topo_list). configFile (str, optional): The path of the config file to be used. """ # Load properties config = ConfigParser(self.defaults) config.add_section("cluster") config.add_section("local") if config_file: config.readfp(open(config_file)) # Deployment properties self.local_base_conf_dir = config.get("local", "local_base_conf_dir") self.init_conf_dir = tempfile.mkdtemp("", "hadoop-init-", "/tmp") self.conf_mandatory_files = [ CORE_CONF_FILE, HDFS_CONF_FILE, MR_CONF_FILE ] # Node properties self.base_dir = config.get("cluster", "hadoop_base_dir") self.conf_dir = config.get("cluster", "hadoop_conf_dir") self.logs_dir = config.get("cluster", "hadoop_logs_dir") self.hadoop_temp_dir = config.get("cluster", "hadoop_temp_dir") self.hdfs_port = config.getint("cluster", "hdfs_port") self.mapred_port = config.getint("cluster", "mapred_port") self.bin_dir = self.base_dir + "/bin" self.sbin_dir = self.base_dir + "/bin" self.java_home = None # Configure master and slaves self.hosts = list(hosts) self.master = self.hosts[0] # Create topology self.topology = HadoopTopology(hosts, topo_list) # Store cluster information self.hw = hw_manager.make_deployment_hardware() self.hw.add_hosts(self.hosts) self.master_cluster = self.hw.get_host_cluster(self.master) # Create a string to display the topology t = {v: [] for v in self.topology.topology.values()} for key, value in self.topology.topology.iteritems(): t[value].append(key.address) log_topo = ', '.join([ style.user2(k) + ': ' + ' '.join(map(lambda x: style.host(x.split('.')[0]), v)) for k, v in t.iteritems() ]) logger.info( "Hadoop cluster created with master %s, hosts %s and " "topology %s", style.host(self.master.address), ' '.join([style.host(h.address.split('.')[0]) for h in self.hosts]), log_topo)