def _extract_configs_to_extra(self, cluster): sp_master = utils.get_instance(cluster, "master") sp_slaves = utils.get_instances(cluster, "slave") extra = dict() config_master = config_slaves = '' if sp_master is not None: config_master = c_helper.generate_spark_env_configs(cluster) if sp_slaves is not None: slavenames = [] for slave in sp_slaves: slavenames.append(slave.hostname()) config_slaves = c_helper.generate_spark_slaves_configs(slavenames) else: config_slaves = "\n" # Any node that might be used to run spark-submit will need # these libs for swift integration config_defaults = c_helper.generate_spark_executor_classpath(cluster) extra['job_cleanup'] = c_helper.generate_job_cleanup_config(cluster) extra['sp_master'] = config_master extra['sp_slaves'] = config_slaves extra['sp_defaults'] = config_defaults if c_helper.is_data_locality_enabled(cluster): topology_data = th.generate_topology_map( cluster, CONF.enable_hypervisor_awareness) extra['topology_data'] = "\n".join( [k + " " + v for k, v in topology_data.items()]) + "\n" return extra
def _extract_configs_to_extra(self, cluster): oozie = vu.get_oozie(cluster) hive = vu.get_hiveserver(cluster) extra = dict() if hive: extra['hive_mysql_passwd'] = self._generate_hive_mysql_password( cluster) for ng in cluster.node_groups: extra[ng.id] = { 'xml': c_helper.generate_xml_configs( cluster, ng, extra['hive_mysql_passwd'] if hive else None), 'setup_script': c_helper.generate_setup_script( ng.storage_paths(), c_helper.extract_environment_confs(ng.configuration()), append_oozie=(oozie and oozie.node_group.id == ng.id)) } if c_helper.is_data_locality_enabled(cluster): topology_data = th.generate_topology_map( cluster, CONF.enable_hypervisor_awareness) extra['topology_data'] = "\n".join( [k + " " + v for k, v in topology_data.items()]) + "\n" return extra
def _extract_configs_to_extra(self, cluster): oozie = vu.get_oozie(cluster) hive = vu.get_hiveserver(cluster) extra = dict() if hive: extra['hive_mysql_passwd'] = six.text_type(uuid.uuid4()) for ng in cluster.node_groups: extra[ng.id] = { 'xml': c_helper.generate_xml_configs( cluster, ng, extra['hive_mysql_passwd'] if hive else None), 'setup_script': c_helper.generate_setup_script( ng.storage_paths(), c_helper.extract_environment_confs(ng.configuration()), append_oozie=( oozie and oozie.node_group.id == ng.id) ) } if c_helper.is_data_locality_enabled(cluster): topology_data = th.generate_topology_map( cluster, CONF.enable_hypervisor_awareness) extra['topology_data'] = "\n".join( [k + " " + v for k, v in topology_data.items()]) + "\n" return extra
def _extract_configs_to_extra(self, cluster): nn = utils.get_instance(cluster, "namenode") sp_master = utils.get_instance(cluster, "master") sp_slaves = utils.get_instances(cluster, "slave") extra = dict() config_master = config_slaves = "" if sp_master is not None: config_master = c_helper.generate_spark_env_configs(cluster) if sp_slaves is not None: slavenames = [] for slave in sp_slaves: slavenames.append(slave.hostname()) config_slaves = c_helper.generate_spark_slaves_configs(slavenames) else: config_slaves = "\n" for ng in cluster.node_groups: extra[ng.id] = { "xml": c_helper.generate_xml_configs(ng.configuration(), ng.storage_paths(), nn.hostname(), None), "setup_script": c_helper.generate_hadoop_setup_script( ng.storage_paths(), c_helper.extract_hadoop_environment_confs(ng.configuration()) ), "sp_master": config_master, "sp_slaves": config_slaves, } if c_helper.is_data_locality_enabled(cluster): topology_data = th.generate_topology_map(cluster, CONF.enable_hypervisor_awareness) extra["topology_data"] = "\n".join([k + " " + v for k, v in topology_data.items()]) + "\n" return extra
def _configure_rack_awareness(self, cluster): api = self.get_api_client(cluster) topology = t_helper.generate_topology_map(cluster, is_node_awareness=False) for host in api.get_all_hosts(): host.rackId = topology[host.ipAddress] host.put_host()
def _get_topology_data(cluster): if not t_helper.is_data_locality_enabled(): return {} LOG.warning("Node group awareness is not implemented in YARN yet " "so enable_hypervisor_awareness set to False " "explicitly") return t_helper.generate_topology_map(cluster, is_node_awareness=False)
def _configure_topology_for_cluster(self, cluster, servers): if CONF.enable_data_locality: topology_data = th.generate_topology_map( cluster, CONF.enable_hypervisor_awareness) topology_str = "\n".join( [k + " " + v for k, v in topology_data.items()]) + "\n" for server in servers: server.configure_topology(topology_str)
def configure_topology_data(pctx, cluster): if c_helper.is_data_locality_enabled(pctx, cluster): LOG.info(_LI("Node group awareness is not implemented in YARN yet " "so enable_hypervisor_awareness set to False explicitly")) tpl_map = th.generate_topology_map(cluster, is_node_awareness=False) topology_data = "\n".join( [k + " " + v for k, v in tpl_map.items()]) + "\n" for ng in cluster.node_groups: for i in ng.instances: i.remote().write_file_to(HADOOP_CONF_DIR + "/topology.data", topology_data, run_as_root=True)
def _configure_topology_for_cluster(self, cluster, servers): if CONF.enable_data_locality: cpo.add_provisioning_step( cluster.id, _("Enable data locality for cluster"), len(servers)) topology_data = th.generate_topology_map( cluster, CONF.enable_hypervisor_awareness) topology_str = "\n".join( [k + " " + v for k, v in topology_data.items()]) + "\n" for server in servers: server.configure_topology(topology_str)
def _configure_topology_for_cluster(self, cluster, servers): if CONF.enable_data_locality: cpo.add_provisioning_step(cluster.id, _("Enable data locality for cluster"), len(servers)) topology_data = th.generate_topology_map( cluster, CONF.enable_hypervisor_awareness) topology_str = "\n".join( [k + " " + v for k, v in topology_data.items()]) + "\n" for server in servers: server.configure_topology(topology_str)
def configure_topology_data(cluster): if c_helper.is_data_locality_enabled(cluster): LOG.info("Node group awareness is not implemented in YARN yet " "so enable_hypervisor_awareness set to False explicitly") tpl_map = th.generate_topology_map(cluster, is_node_awareness=False) topology_data = "\n".join( [k + " " + v for k, v in tpl_map.items()]) + "\n" for ng in cluster.node_groups: for i in ng.instances: i.remote().write_file_to(HADOOP_CONF_DIR + "/topology.data", topology_data, run_as_root=True)
def _extract_configs_to_extra(self, cluster): nn = utils.get_instance(cluster, "namenode") sp_master = utils.get_instance(cluster, "master") sp_slaves = utils.get_instances(cluster, "slave") extra = dict() config_master = config_slaves = '' if sp_master is not None: config_master = c_helper.generate_spark_env_configs(cluster) if sp_slaves is not None: slavenames = [] for slave in sp_slaves: slavenames.append(slave.hostname()) config_slaves = c_helper.generate_spark_slaves_configs(slavenames) else: config_slaves = "\n" # Any node that might be used to run spark-submit will need # these libs for swift integration config_defaults = c_helper.generate_spark_executor_classpath(cluster) extra['job_cleanup'] = c_helper.generate_job_cleanup_config(cluster) for ng in cluster.node_groups: extra[ng.id] = { 'xml': c_helper.generate_xml_configs( ng.configuration(), ng.storage_paths(), nn.hostname(), None ), 'setup_script': c_helper.generate_hadoop_setup_script( ng.storage_paths(), c_helper.extract_hadoop_environment_confs( ng.configuration()) ), 'sp_master': config_master, 'sp_slaves': config_slaves, 'sp_defaults': config_defaults } if "zeppelin" in ng.node_processes: extra[ng.id].update({ "zeppelin_setup_script": c_helper.generate_zeppelin_setup_script(sp_master)}) if c_helper.is_data_locality_enabled(cluster): topology_data = th.generate_topology_map( cluster, CONF.enable_hypervisor_awareness) extra['topology_data'] = "\n".join( [k + " " + v for k, v in topology_data.items()]) + "\n" return extra
def _extract_configs_to_extra(self, cluster): nn = utils.get_instance(cluster, "namenode") sp_master = utils.get_instance(cluster, "master") sp_slaves = utils.get_instances(cluster, "slave") extra = dict() config_master = config_slaves = '' if sp_master is not None: config_master = c_helper.generate_spark_env_configs(cluster) if sp_slaves is not None: slavenames = [] for slave in sp_slaves: slavenames.append(slave.hostname()) config_slaves = c_helper.generate_spark_slaves_configs(slavenames) else: config_slaves = "\n" # Any node that might be used to run spark-submit will need # these libs for swift integration config_defaults = c_helper.generate_spark_executor_classpath(cluster) extra['job_cleanup'] = c_helper.generate_job_cleanup_config(cluster) for ng in cluster.node_groups: extra[ng.id] = { 'xml': c_helper.generate_xml_configs(ng.configuration(), ng.storage_paths(), nn.hostname(), None), 'setup_script': c_helper.generate_hadoop_setup_script( ng.storage_paths(), c_helper.extract_hadoop_environment_confs( ng.configuration())), 'sp_master': config_master, 'sp_slaves': config_slaves, 'sp_defaults': config_defaults } if c_helper.is_data_locality_enabled(cluster): topology_data = th.generate_topology_map( cluster, CONF.enable_hypervisor_awareness) extra['topology_data'] = "\n".join( [k + " " + v for k, v in topology_data.items()]) + "\n" return extra
def configure_topology_data(cluster, is_node_awareness): LOG.info(_LI('START: configuring topology data.')) if is_data_locality_enabled(cluster): LOG.debug('Data locality is enabled.') LOG.debug('Start generating topology map.') topology_map = th.generate_topology_map(cluster, is_node_awareness) LOG.debug('Done for generating topology map.') topology_data = cfu.to_file_content(topology_map, 'topology') for i in u.get_instances(cluster): LOG.debug( 'Start writing to file: %s/topology.data', MAPR_HOME) i.remote().write_file_to(MAPR_HOME + "/topology.data", topology_data, run_as_root=True) LOG.debug('Done writing to file: %s/topology.data', MAPR_HOME) else: LOG.debug('Data locality is disabled.') LOG.info(_LI('END: configuring topology data.'))
def _extract_configs_to_extra(self, cluster): nn = utils.get_instance(cluster, "namenode") sp_master = utils.get_instance(cluster, "master") sp_slaves = utils.get_instances(cluster, "slave") extra = dict() config_master = config_slaves = '' if sp_master is not None: config_master = c_helper.generate_spark_env_configs(cluster) if sp_slaves is not None: slavenames = [] for slave in sp_slaves: slavenames.append(slave.hostname()) config_slaves = c_helper.generate_spark_slaves_configs(slavenames) else: config_slaves = "\n" for ng in cluster.node_groups: extra[ng.id] = { 'xml': c_helper.generate_xml_configs( ng.configuration(), ng.storage_paths(), nn.hostname(), None, ), 'setup_script': c_helper.generate_hadoop_setup_script( ng.storage_paths(), c_helper.extract_hadoop_environment_confs( ng.configuration())), 'sp_master': config_master, 'sp_slaves': config_slaves } if c_helper.is_data_locality_enabled(cluster): topology_data = th.generate_topology_map( cluster, CONF.enable_hypervisor_awareness) extra['topology_data'] = "\n".join( [k + " " + v for k, v in topology_data.items()]) + "\n" return extra
def _configure_topology(self, context, instances): def write_file(instance, path, data): with instance.remote() as r: r.write_file_to(path, data, run_as_root=True) LOG.debug("Configuring cluster topology") is_node_aware = context.is_node_aware if is_node_aware: topo = th.generate_topology_map(context.cluster, is_node_aware) topo = "\n".join(["%s %s" % i for i in six.iteritems(topo)]) + "\n" data_path = "%s/topology.data" % context.mapr_home script = files.get_file_text(_TOPO_SCRIPT) script_path = "%s/topology.sh" % context.mapr_home util.execute_on_instances(instances, write_file, data_path, topo) util.execute_on_instances(instances, util.write_file, script_path, script, "+x", "root") else: LOG.debug("Data locality is disabled.") LOG.info(_LI("Cluster topology successfully configured"))
def _configure_topology(self, context, instances): def write_file(instance, path, data): with instance.remote() as r: r.write_file_to(path, data, run_as_root=True) LOG.debug('Configuring cluster topology') is_node_aware = context.is_node_aware if is_node_aware: topo = th.generate_topology_map(context.cluster, is_node_aware) topo = '\n'.join(['%s %s' % i for i in six.iteritems(topo)]) + '\n' data_path = '%s/topology.data' % context.mapr_home script = files.get_file_text(_TOPO_SCRIPT) script_path = '%s/topology.sh' % context.mapr_home util.execute_on_instances(instances, write_file, data_path, topo) util.execute_on_instances( instances, util.write_file, script_path, script, '+x', 'root') else: LOG.debug('Data locality is disabled.') LOG.info(_LI('Cluster topology successfully configured'))
def _configure_topology(self, context, instances): LOG.debug('Configuring cluster topology') is_node_aware = context.is_node_aware if is_node_aware: topo = th.generate_topology_map(context.cluster, is_node_aware) topo = '\n'.join(['%s %s' % i for i in six.iteritems(topo)]) data_path = '%s/topology.data' % context.mapr_home script_path = '%s/topology.sh' % context.mapr_home files = { data_path: topo, script_path: f.get_file_text(_TOPO_SCRIPT), } chmod_cmd = 'chmod +x %s' % script_path for instance in instances: with instance.remote() as r: r.write_files_to(files, run_as_root=True) r.execute_command(chmod_cmd, run_as_root=True) else: LOG.debug('Data locality is disabled.') LOG.debug('Cluster topology successfully configured')
def _extract_configs_to_extra(self, cluster): st_master = utils.get_instance(cluster, "master") st_slaves = utils.get_instances(cluster, "slave") zk_servers = utils.get_instances(cluster, "zookeeper") extra = dict() config_instances = '' if st_master is not None: if zk_servers is not None: zknames = [] for zk in zk_servers: zknames.append(zk.hostname()) config_master = c_helper.generate_storm_config(cluster, st_master.hostname(), zknames) # FIGURE OUT HOW TO GET IPS for ng in cluster.node_groups: extra[ng.id] = { 'setup_script': c_helper.generate_hosts_setup_script( ng.storage_paths(), c_helper.extract_hadoop_environment_confs( ng.configuration()) ), 'sp_master': config_master, 'sp_slaves': config_slaves } if c_helper.is_data_locality_enabled(cluster): topology_data = th.generate_topology_map( cluster, CONF.enable_hypervisor_awareness) extra['topology_data'] = "\n".join( [k + " " + v for k, v in topology_data.items()]) + "\n" return extra
def generate_topology_map(cluster, is_node_awareness, **kwargs): return t_helper.generate_topology_map(cluster, is_node_awareness)
def test_get_topology(self, swift_topology, compute_topology, novaclient): nova = mock.Mock() novaclient.return_value = nova r1 = mock.Mock() r1.hostId = "o1" r2 = mock.Mock() r2.hostId = "o1" r3 = mock.Mock() r3.hostId = "o2" nova.servers.get.side_effect = [r1, r2, r3, r1, r2, r3] swift_topology.return_value = {"s1": "/r1"} compute_topology.return_value = {"o1": "/r1", "o2": "/r2"} i1 = o.Instance() i1.instance_id = "i1" i1.instance_name = "i1" i1.internal_ip = "0.0.1.1" i1.management_ip = "1.1.1.1" i2 = o.Instance() i2.instance_id = "i2" i2.instance_name = "i2" i2.management_ip = "1.1.1.2" i2.internal_ip = "0.0.1.2" i3 = o.Instance() i3.instance_id = "i3" i3.instance_name = "i3" i3.internal_ip = "1.1.1.3" i3.management_ip = "0.0.1.3" ng1 = o.NodeGroup() ng1.name = "1" ng1.instances = [i1, i2] ng2 = o.NodeGroup() ng2.name = "2" ng2.instances = [i3] cluster = o.Cluster() cluster.node_groups = [ng1, ng2] top = th.generate_topology_map(cluster, False) self.assertEqual({ "i1": "/r1", "1.1.1.1": "/r1", "0.0.1.1": "/r1", "i2": "/r1", "1.1.1.2": "/r1", "0.0.1.2": "/r1", "i3": "/r2", "1.1.1.3": "/r2", "0.0.1.3": "/r2", "s1": "/r1" }, top) top = th.generate_topology_map(cluster, True) self.assertEqual({ "i1": "/r1/o1", "1.1.1.1": "/r1/o1", "0.0.1.1": "/r1/o1", "i2": "/r1/o1", "1.1.1.2": "/r1/o1", "0.0.1.2": "/r1/o1", "i3": "/r2/o2", "1.1.1.3": "/r2/o2", "0.0.1.3": "/r2/o2", "s1": "/r1" }, top)
def topology_map(self): return th.generate_topology_map(self.cluster, self.is_node_aware)
def test_get_topology(self, swift_topology, compute_topology, novaclient): nova = mock.Mock() novaclient.return_value = nova r1 = mock.Mock() r1.hostId = "o1" r2 = mock.Mock() r2.hostId = "o1" r3 = mock.Mock() r3.hostId = "o2" nova.servers.get.side_effect = [r1, r2, r3, r1, r2, r3] swift_topology.return_value = {"s1": "/r1"} compute_topology.return_value = {"o1": "/r1", "o2": "/r2"} i1 = o.Instance() i1.instance_id = "i1" i1.instance_name = "i1" i1.internal_ip = "0.0.1.1" i1.management_ip = "1.1.1.1" i2 = o.Instance() i2.instance_id = "i2" i2.instance_name = "i2" i2.management_ip = "1.1.1.2" i2.internal_ip = "0.0.1.2" i3 = o.Instance() i3.instance_id = "i3" i3.instance_name = "i3" i3.internal_ip = "1.1.1.3" i3.management_ip = "0.0.1.3" ng1 = o.NodeGroup() ng1.name = "1" ng1.instances = [i1, i2] ng2 = o.NodeGroup() ng2.name = "2" ng2.instances = [i3] cluster = o.Cluster() cluster.node_groups = [ng1, ng2] top = th.generate_topology_map(cluster, False) self.assertEqual( top, { "i1": "/r1", "1.1.1.1": "/r1", "0.0.1.1": "/r1", "i2": "/r1", "1.1.1.2": "/r1", "0.0.1.2": "/r1", "i3": "/r2", "1.1.1.3": "/r2", "0.0.1.3": "/r2", "s1": "/r1" }) top = th.generate_topology_map(cluster, True) self.assertEqual( top, { "i1": "/r1/o1", "1.1.1.1": "/r1/o1", "0.0.1.1": "/r1/o1", "i2": "/r1/o1", "1.1.1.2": "/r1/o1", "0.0.1.2": "/r1/o1", "i3": "/r2/o2", "1.1.1.3": "/r2/o2", "0.0.1.3": "/r2/o2", "s1": "/r1" })