def test_get_instances(self): self.assertEqual(len(u.get_instances(self.c1)), 5) self.assertEqual(u.get_instances(self.c1, 'wrong-process'), []) self.assertEqual(u.get_instances(self.c1, 'nn'), self.ng1.instances) instances = list(self.ng2.instances) instances += self.ng3.instances self.assertEqual(u.get_instances(self.c1, 'dn'), instances)
def _extract_configs_to_extra(self, cluster): nn = utils.get_instance(cluster, "namenode") sp_master = utils.get_instance(cluster, "master") sp_slaves = utils.get_instances(cluster, "slave") extra = dict() config_master = config_slaves = "" if sp_master is not None: config_master = c_helper.generate_spark_env_configs(cluster) if sp_slaves is not None: slavenames = [] for slave in sp_slaves: slavenames.append(slave.hostname()) config_slaves = c_helper.generate_spark_slaves_configs(slavenames) else: config_slaves = "\n" for ng in cluster.node_groups: extra[ng.id] = { "xml": c_helper.generate_xml_configs(ng.configuration(), ng.storage_paths(), nn.hostname(), None), "setup_script": c_helper.generate_hadoop_setup_script( ng.storage_paths(), c_helper.extract_hadoop_environment_confs(ng.configuration()) ), "sp_master": config_master, "sp_slaves": config_slaves, } if c_helper.is_data_locality_enabled(cluster): topology_data = th.generate_topology_map(cluster, CONF.enable_hypervisor_awareness) extra["topology_data"] = "\n".join([k + " " + v for k, v in topology_data.items()]) + "\n" return extra
def install_cluster(cluster): mng_instance = u.get_instance(cluster, 'manager') all_hosts = list(set([i.fqdn() for i in u.get_instances(cluster)])) client = c.IntelClient(mng_instance, cluster.name) LOG.info("Create cluster") client.cluster.create() LOG.info("Add nodes to cluster") rack = '/Default' client.nodes.add(all_hosts, rack, 'hadoop', '/home/hadoop/.ssh/id_rsa') LOG.info("Install software") client.cluster.install_software(all_hosts) LOG.info("Configure services") _configure_services(client, cluster) LOG.info("Deploy cluster") client.nodes.config(force=True) LOG.info("Provisioning configs") # cinder and ephemeral drive support _configure_storage(client, cluster) # swift support _configure_swift(client, cluster) # user configs _add_user_params(client, cluster) LOG.info("Format HDFS") client.services.hdfs.format()
def _set_cluster_info(self, cluster): mng = u.get_instances(cluster, 'manager')[0] nn = u.get_namenode(cluster) jt = u.get_jobtracker(cluster) oozie = u.get_oozie(cluster) #TODO(alazarev) make port configurable (bug #1262895) info = {'IDH Manager': { 'Web UI': 'https://%s:9443' % mng.management_ip }} if jt: #TODO(alazarev) make port configurable (bug #1262895) info['MapReduce'] = { 'Web UI': 'http://%s:50030' % jt.management_ip } #TODO(alazarev) make port configurable (bug #1262895) info['MapReduce']['JobTracker'] = '%s:54311' % jt.hostname() if nn: #TODO(alazarev) make port configurable (bug #1262895) info['HDFS'] = { 'Web UI': 'http://%s:50070' % nn.management_ip } #TODO(alazarev) make port configurable (bug #1262895) info['HDFS']['NameNode'] = 'hdfs://%s:8020' % nn.hostname() if oozie: #TODO(alazarev) make port configurable (bug #1262895) info['JobFlow'] = { 'Oozie': 'http://%s:11000' % oozie.management_ip } ctx = context.ctx() conductor.cluster_update(ctx, cluster, {'info': info})
def _clear_exclude_files(cluster): for instance in u.get_instances(cluster): with instance.remote() as r: r.execute_command( 'sudo su - -c "echo > %s/dn-exclude" hadoop' % HADOOP_CONF_DIR) r.execute_command( 'sudo su - -c "echo > %s/nm-exclude" hadoop' % HADOOP_CONF_DIR)
def start_cluster(self, cluster): nn = vu.get_namenode(cluster) run.format_namenode(nn) run.start_hadoop_process(nn, 'namenode') for snn in vu.get_secondarynamenodes(cluster): run.start_hadoop_process(snn, 'secondarynamenode') rm = vu.get_resourcemanager(cluster) if rm: run.start_yarn_process(rm, 'resourcemanager') run.start_all_processes(utils.get_instances(cluster), ['datanode', 'nodemanager']) run.await_datanodes(cluster) hs = vu.get_historyserver(cluster) if hs: run.start_historyserver(hs) oo = vu.get_oozie(cluster) if oo: run.start_oozie_process(self.pctx, oo) self._set_cluster_info(cluster)
def _setup_instances(self, cluster, instances=None): extra = self._extract_configs_to_extra(cluster) if instances is None: instances = utils.get_instances(cluster) self._push_configs_to_nodes(cluster, extra, instances)
def start_cluster(self, cluster): nn_instance = utils.get_instance(cluster, "namenode") sm_instance = utils.get_instance(cluster, "master") dn_instances = utils.get_instances(cluster, "datanode") # Start the name node with remote.get_remote(nn_instance) as r: run.format_namenode(r) run.start_processes(r, "namenode") # start the data nodes self._start_slave_datanode_processes(dn_instances) LOG.info("Hadoop services in cluster %s have been started" % cluster.name) with remote.get_remote(nn_instance) as r: r.execute_command("sudo -u hdfs hdfs dfs -mkdir -p /user/$USER/") r.execute_command(("sudo -u hdfs hdfs dfs -chown $USER " "/user/$USER/")) # start spark nodes if sm_instance: with remote.get_remote(sm_instance) as r: run.start_spark_master(r, self._spark_home(cluster)) LOG.info("Spark service at '%s' has been started", sm_instance.hostname()) LOG.info('Cluster %s has been started successfully' % cluster.name) self._set_cluster_info(cluster)
def _validate_existing_ng_scaling(self, cluster, existing): scalable_processes = self._get_scalable_processes() dn_to_delete = 0 for ng in cluster.node_groups: if ng.id in existing: if ng.count > existing[ng.id] and ("datanode" in ng.node_processes): dn_to_delete += ng.count - existing[ng.id] if not set(ng.node_processes).issubset(scalable_processes): raise ex.NodeGroupCannotBeScaled( ng.name, _("Spark plugin cannot scale nodegroup" " with processes: %s") % " ".join(ng.node_processes), ) dn_amount = len(utils.get_instances(cluster, "datanode")) rep_factor = c_helper.get_config_value("HDFS", "dfs.replication", cluster) if dn_to_delete > 0 and dn_amount - dn_to_delete < rep_factor: raise ex.ClusterCannotBeScaled( cluster.name, _( "Spark plugin cannot shrink cluster because " "there would be not enough nodes for HDFS " "replicas (replication factor is %s)" ) % rep_factor, )
def _push_configs_to_nodes(self, cluster, extra, new_instances): all_instances = utils.get_instances(cluster) with context.ThreadGroup() as tg: for instance in all_instances: if instance in new_instances: tg.spawn('spark-configure-%s' % instance.instance_name, self._push_configs_to_new_node, cluster, extra, instance)
def _get_cluster_hosts_information(host, cluster): for clust in conductor.cluster_get_all(context.ctx()): if clust.id == cluster.id: continue for i in u.get_instances(clust): if i.instance_name == host: return g.generate_etc_hosts(clust) return None
def decommission_nodes(self, cluster, instances): sls = utils.get_instances(cluster, "slave") dns = utils.get_instances(cluster, "datanode") decommission_dns = False decommission_sls = False for i in instances: if 'datanode' in i.node_group.node_processes: dns.remove(i) decommission_dns = True if 'slave' in i.node_group.node_processes: sls.remove(i) decommission_sls = True nn = utils.get_instance(cluster, "namenode") spark_master = utils.get_instance(cluster, "master") if decommission_sls: sc.decommission_sl(spark_master, instances, sls) if decommission_dns: sc.decommission_dn(nn, instances, dns)
def _push_configs_to_nodes(self, cluster, extra, new_instances): all_instances = utils.get_instances(cluster) new_ids = set([instance.id for instance in new_instances]) with context.ThreadGroup() as tg: for instance in all_instances: if instance.id in new_ids: tg.spawn('vanilla-configure-%s' % instance.instance_name, self._push_configs_to_new_node, cluster, extra, instance) else: tg.spawn('vanilla-reconfigure-%s' % instance.instance_name, self._push_configs_to_existing_node, cluster, extra, instance)
def _update_exclude_files(cluster, instances): datanodes = _get_instances_with_service(instances, 'datanode') nodemanagers = _get_instances_with_service(instances, 'nodemanager') dn_hosts = u.generate_fqdn_host_names(datanodes) nm_hosts = u.generate_fqdn_host_names(nodemanagers) for instance in u.get_instances(cluster): with instance.remote() as r: r.execute_command( 'sudo su - -c "echo \'%s\' > %s/dn-exclude" hadoop' % ( dn_hosts, HADOOP_CONF_DIR)) r.execute_command( 'sudo su - -c "echo \'%s\' > %s/nm-exclude" hadoop' % ( nm_hosts, HADOOP_CONF_DIR))
def _extract_configs_to_extra(self, cluster): st_master = utils.get_instance(cluster, "master") st_slaves = utils.get_instances(cluster, "slave") zk_servers = utils.get_instances(cluster, "zookeeper") extra = dict() config_instances = '' if st_master is not None: if zk_servers is not None: zknames = [] for zk in zk_servers: zknames.append(zk.hostname()) config_master = c_helper.generate_storm_config(cluster, st_master.hostname(), zknames) # FIGURE OUT HOW TO GET IPS for ng in cluster.node_groups: extra[ng.id] = { 'setup_script': c_helper.generate_hosts_setup_script( ng.storage_paths(), c_helper.extract_hadoop_environment_confs( ng.configuration()) ), 'sp_master': config_master, 'sp_slaves': config_slaves } if c_helper.is_data_locality_enabled(cluster): topology_data = th.generate_topology_map( cluster, CONF.enable_hypervisor_awareness) extra['topology_data'] = "\n".join( [k + " " + v for k, v in topology_data.items()]) + "\n" return extra
def start_cluster(self, cluster): nn_instance = vu.get_namenode(cluster) with remote.get_remote(nn_instance) as r: run.format_namenode(r) run.start_processes(r, "namenode") for snn in vu.get_secondarynamenodes(cluster): run.start_processes(remote.get_remote(snn), "secondarynamenode") jt_instance = vu.get_jobtracker(cluster) if jt_instance: run.start_processes(remote.get_remote(jt_instance), "jobtracker") self._start_tt_dn_processes(utils.get_instances(cluster)) self._await_datanodes(cluster) LOG.info(_LI("Hadoop services in cluster %s have been started"), cluster.name) oozie = vu.get_oozie(cluster) if oozie: with remote.get_remote(oozie) as r: if c_helper.is_mysql_enable(cluster): run.mysql_start(r, oozie) run.oozie_create_db(r) run.oozie_share_lib(r, nn_instance.hostname()) run.start_oozie(r) LOG.info(_LI("Oozie service at '%s' has been started"), nn_instance.hostname()) hive_server = vu.get_hiveserver(cluster) if hive_server: with remote.get_remote(hive_server) as r: run.hive_create_warehouse_dir(r) run.hive_copy_shared_conf( r, edp.get_hive_shared_conf_path('hadoop')) if c_helper.is_mysql_enable(cluster): if not oozie or hive_server.hostname() != oozie.hostname(): run.mysql_start(r, hive_server) run.hive_create_db(r) run.hive_metastore_start(r) LOG.info(_LI("Hive Metastore server at %s has been " "started"), hive_server.hostname()) LOG.info(_LI('Cluster %s has been started successfully'), cluster.name) self._set_cluster_info(cluster)
def _update_include_files(cluster): instances = u.get_instances(cluster) datanodes = vu.get_datanodes(cluster) nodemanagers = vu.get_nodemanagers(cluster) dn_hosts = u.generate_fqdn_host_names(datanodes) nm_hosts = u.generate_fqdn_host_names(nodemanagers) for instance in instances: with instance.remote() as r: r.execute_command( 'sudo su - -c "echo \'%s\' > %s/dn-include" hadoop' % ( dn_hosts, HADOOP_CONF_DIR)) r.execute_command( 'sudo su - -c "echo \'%s\' > %s/nm-include" hadoop' % ( nm_hosts, HADOOP_CONF_DIR))
def start_cluster(self, cluster): sm_instance = utils.get_instance(cluster, "master") sl_instances = utils.get_instances(cluster, "slave") # start storm master if sm_instance: with remote.get_remote(sm_instance) as r: run.start_storm_master(r) LOG.info("Storm master at '%s' has been started", sm_instance.hostname()) # start storm slaves self._start_slave_processes(sl_instances) LOG.info('Cluster %s has been started successfully' % cluster.name) self._set_cluster_info(cluster)
def configure_cluster_for_hdfs(cluster, data_source): host = urlparse.urlparse(data_source.url).hostname etc_hosts_information = _get_cluster_hosts_information(host, cluster) if etc_hosts_information is None: # Ip address hasn't been resolved, the last chance is for VM itself return create_etc_host = 'sudo "cat /tmp/etc-hosts-update ' create_etc_host += '/etc/hosts > /tmp/etc-hosts"' copy_etc_host = 'sudo "cat /tmp/etc-hosts > /etc/hosts"' for inst in u.get_instances(cluster): with inst.remote as r: r.write_file_to("/tmp/etc-hosts-update", etc_hosts_information) r.execute_command(create_etc_host) r.execute_command(copy_etc_host)
def configure_cluster(cluster): instances = gu.get_instances(cluster) if not cmd.is_pre_installed_cdh(pu.get_manager(cluster).remote()): _configure_os(instances) _install_packages(instances, PACKAGES) _start_cloudera_agents(instances) _start_cloudera_manager(cluster) _await_agents(instances) _configure_manager(cluster) _create_services(cluster) _configure_services(cluster) _configure_instances(instances) cu.deploy_configs(cluster) if c_helper.is_swift_enabled(cluster): _configure_swift(instances)
def configure_cluster_for_hdfs(cluster, data_source): host = urlparse.urlparse(data_source.url).hostname etc_hosts_information = _get_cluster_hosts_information(host, cluster) if etc_hosts_information is None: # Ip address hasn't been resolved, the last chance is for VM itself return update_etc_hosts_cmd = ( 'cat /tmp/etc-hosts-update /etc/hosts | ' 'sort | uniq > /tmp/etc-hosts && ' 'cat /tmp/etc-hosts > /etc/hosts && ' 'rm -f /tmp/etc-hosts /tmp/etc-hosts-update') for inst in u.get_instances(cluster): with inst.remote() as r: r.write_file_to('/tmp/etc-hosts-update', etc_hosts_information) r.execute_command(update_etc_hosts_cmd, run_as_root=True)
def _update_include_files(cluster, dec_instances=None): dec_instances = dec_instances or [] dec_instances_ids = [instance.id for instance in dec_instances] instances = u.get_instances(cluster) inst_filter = lambda inst: inst.id not in dec_instances_ids datanodes = filter(inst_filter, vu.get_datanodes(cluster)) nodemanagers = filter(inst_filter, vu.get_nodemanagers(cluster)) dn_hosts = u.generate_fqdn_host_names(datanodes) nm_hosts = u.generate_fqdn_host_names(nodemanagers) for instance in instances: with instance.remote() as r: r.execute_command( 'sudo su - -c "echo \'%s\' > %s/dn-include" hadoop' % ( dn_hosts, HADOOP_CONF_DIR)) r.execute_command( 'sudo su - -c "echo \'%s\' > %s/nm-include" hadoop' % ( nm_hosts, HADOOP_CONF_DIR))
def configure_cluster_for_hdfs(cluster, data_source): host = urlparse.urlparse(data_source.url).hostname etc_hosts_information = _get_cluster_hosts_information(host, cluster) if etc_hosts_information is None: # Ip address hasn't been resolved, the last chance is for VM itself return etc_hosts_update = '/tmp/etc-hosts-update.%s' % six.text_type(uuid.uuid4()) tmp_etc_hosts = '/tmp/etc-hosts.%s' % six.text_type(uuid.uuid4()) update_etc_hosts_cmd = ( 'cat %(etc_hosts_update)s /etc/hosts | ' 'sort | uniq > %(tmp_etc_hosts)s && ' 'cat %(tmp_etc_hosts)s > /etc/hosts && ' 'rm -f %(tmp_etc_hosts)s %(etc_hosts_update)s' % {'etc_hosts_update': etc_hosts_update, 'tmp_etc_hosts': tmp_etc_hosts}) for inst in u.get_instances(cluster): with inst.remote() as r: r.write_file_to(etc_hosts_update, etc_hosts_information) r.execute_command(update_etc_hosts_cmd, run_as_root=True)
def configure_cluster(self, cluster): instances = utils.get_instances(cluster) self._setup_instances(cluster, instances)
def configure_os(cluster): instances = u.get_instances(cluster) configure_os_from_instances(cluster, instances)
def _push_namenode_configs(self, cluster, r): r.write_file_to('/etc/hadoop/dn.incl', utils.generate_fqdn_host_names( utils.get_instances(cluster, "datanode")))
def get_nodemanagers(cluster): return u.get_instances(cluster, 'NODEMANAGER')
def get_datanodes(cluster): return u.get_instances(cluster, 'DATANODE')
def _setup_instances(self, cluster): extra = self._extract_configs_to_extra(cluster) instances = utils.get_instances(cluster) self._push_configs_to_nodes(cluster, extra, instances)
def get_nodemanagers(cluster): return u.get_instances(cluster, 'nodemanager')
def get_datanodes(cluster): return u.get_instances(cluster, 'datanode')
def get_tasktrackers(cluster): return u.get_instances(cluster, 'tasktracker')
def get_secondarynamenodes(cluster): return u.get_instances(cluster, 'secondarynamenode')