def add_hosts(self, hosts): host_clusters = {} for h in hosts: g5k_cluster = get_host_cluster(h) if g5k_cluster in host_clusters: host_clusters[g5k_cluster].append(h) else: host_clusters[g5k_cluster] = [h] print host_clusters for (cluster, cluster_hosts) in host_clusters.items(): self.add_cluster(G5kPhysicalCluster(cluster, cluster_hosts))
def add_hosts(self, hosts): """Add the provided list of hosts to the deployment hardware. The hosts are grouped into a set of clusters depending on their G5k cluster. They are supposed to share hardware characteristics but this is not checked Args: hosts (list of Host): The list of hosts to add. """ host_clusters = {} for h in hosts: g5k_cluster = get_host_cluster(h) if g5k_cluster in host_clusters: host_clusters[g5k_cluster].append(h) else: host_clusters[g5k_cluster] = [h] for (cluster, cluster_hosts) in host_clusters.items(): self.add_cluster(G5kPhysicalCluster(self._hw_manager, cluster, cluster_hosts))
def __init__(self, mode, config_file=None, hosts=None, hadoop_cluster=None): """Create a new Spark cluster. It can be created as a standalone cluster or on top of YARN. Args: mode (int): The cluster manager that is used (STANDALONE_MODE or YARN_MODE). configFile (str, optional): The path of the config file to be used. hosts (list of Host, optional): The hosts of the cluster (standalone operation). hadoop_cluster (HadoopCluster, optional): The Hadoop cluster to link. """ # Load cluster properties config = ConfigParser(self.defaults) config.add_section("cluster") config.add_section("local") if config_file: config.readfp(open(config_file)) # Deployment properties self.local_base_conf_dir = config.get("local", "local_base_conf_dir") self.init_conf_dir = tempfile.mkdtemp("", "spark-init-", "/tmp") self.conf_mandatory_files = [SPARK_CONF_FILE] self.base_dir = config.get("cluster", "spark_base_dir") self.conf_dir = config.get("cluster", "spark_conf_dir") self.logs_dir = config.get("cluster", "spark_logs_dir") self.evs_log_dir = config.get("cluster", "spark_events_dir") self.work_dir = config.get("cluster", "spark_work_dir") self.port = config.getint("cluster", "spark_port") self.local_base_conf_dir = config.get("local", "local_base_conf_dir") self.bin_dir = self.base_dir + "/bin" self.sbin_dir = self.base_dir + "/sbin" self.mode = mode self.java_home = None # Initialize hosts if hosts: self.hosts = hosts self.master = hosts[0] elif hadoop_cluster: self.hosts = hadoop_cluster.hosts self.master = hadoop_cluster.master else: logger.error("Hosts in the cluster must be specified either" "directly or indirectly through a Hadoop cluster.") raise SparkException("Hosts in the cluster must be specified " "either directly or indirectly through a " "Hadoop cluster.") # Store cluster information self.hw = G5kDeploymentHardware() self.hw.add_hosts(self.hosts) self.master_cluster = self.hw.get_cluster(get_host_cluster(self.master)) # Store reference to Hadoop cluster and check if mandatory self.hc = hadoop_cluster if not self.hc and self.mode == YARN_MODE: logger.error("When using a YARN_MODE mode, a reference to the " "Hadoop cluster should be provided.") raise SparkException("When using a YARN_MODE mode, a reference " "to the Hadoop cluster should be provided") if self.mode == STANDALONE_MODE: mode_text = "in standalone mode" else: mode_text = "on top of YARN" logger.info("Spark cluster created %s in hosts %s." + (" It is linked to a Hadoop cluster." if self.hc else ""), mode_text, ' '.join([style.host(h.address.split('.')[0]) for h in self.hosts]))
resources = get_oargrid_job_vm5k_resources(49509) sites = sorted([site for site in resources.keys() if site != 'global']) kavlan = resources['global']['kavlan'] hosts = [] for site in sites: hosts += map(lambda host: get_kavlan_host_name(host, kavlan), resources[site]['hosts']) for host in hosts: site = get_host_site(host) if state.find("./site[@id='" + site+ "']"): el_site = state.find("./site[@id='" + site+ "']") else: el_site = SubElement(state, 'site', attrib={'id': site}) cluster = get_host_cluster(host) if el_site.find("./cluster[@id='" + cluster + "']"): el_cluster = el_site.find("./cluster[@id='" + cluster+ "']") else: el_cluster = SubElement(el_site, 'cluster', attrib={'id': cluster}) SubElement(el_cluster, 'host', attrib={'id': host}) while True: for host, vms in list_vm(hosts).iteritems(): el_host = state.find(".//host/[@id='" + host + "']") for vm in vms: SubElement(el_host, 'vm', attrib={'id': vm['id']}) topology_plot(state, show=True) for vm in state.findall('vm'):