def start_cluster(self, cluster): nn = utils.get_namenode(cluster) run.format_namenode(nn) run.start_hadoop_process(nn, 'namenode') rm = utils.get_resourcemanager(cluster) run.start_yarn_process(rm, 'resourcemanager') for dn in utils.get_datanodes(cluster): run.start_hadoop_process(dn, 'datanode') run.await_datanodes(cluster) for nm in utils.get_nodemanagers(cluster): run.start_yarn_process(nm, 'nodemanager') hs = utils.get_historyserver(cluster) if hs: run.start_historyserver(hs) oo = utils.get_oozie(cluster) if oo: run.start_oozie_process(oo) self._set_cluster_info(cluster)
def decommission_nodes(cluster, instances): dec_hosts = [i.fqdn() for i in instances] dn_hosts = [dn.fqdn() for dn in u.get_datanodes(cluster)] nm_hosts = [nm.fqdn() for nm in u.get_nodemanagers(cluster)] client = c.IntelClient(u.get_instance(cluster, 'manager'), cluster.name) dec_dn_hosts = [] for dec_host in dec_hosts: if dec_host in dn_hosts: dec_dn_hosts.append(dec_host) if dec_dn_hosts: client.services.hdfs.decommission_nodes(dec_dn_hosts) #TODO(alazarev) make timeout configurable (bug #1262897) timeout = 14400 # 4 hours cur_time = 0 for host in dec_dn_hosts: while cur_time < timeout: if client.services.hdfs.get_datanode_status( host) == 'Decomissioned': break context.sleep(5) cur_time += 5 else: LOG.warn("Failed to decomission node '%s' of cluster '%s' " "in %s minutes" % (host, cluster.name, timeout / 60)) client.nodes.stop(dec_hosts) # wait stop services #TODO(alazarev) make timeout configurable (bug #1262897) timeout = 600 # 10 minutes cur_time = 0 for instance in instances: while cur_time < timeout: stopped = True if instance.fqdn() in dn_hosts: stopped = stopped and _is_hadoop_service_stopped( instance, 'hadoop-hdfs-datanode') if instance.fqdn() in nm_hosts: stopped = stopped and _is_hadoop_service_stopped( instance, 'hadoop-yarn-nodemanager') if stopped: break else: context.sleep(5) cur_time += 5 else: LOG.warn("Failed to stop services on node '%s' of cluster '%s' " "in %s minutes" % (instance, cluster.name, timeout / 60)) for node in dec_hosts: LOG.info("Deleting node '%s' on cluster '%s'" % (node, cluster.name)) client.nodes.delete(node)
def _configure_services(client, cluster): nn_host = u.get_namenode(cluster).fqdn() snn = u.get_secondarynamenodes(cluster) snn_host = snn[0].fqdn() if snn else None rm_host = u.get_resourcemanager(cluster).fqdn() if u.get_resourcemanager( cluster) else None hs_host = u.get_historyserver(cluster).fqdn() if u.get_historyserver( cluster) else None dn_hosts = [dn.fqdn() for dn in u.get_datanodes(cluster)] nm_hosts = [tt.fqdn() for tt in u.get_nodemanagers(cluster)] oozie_host = u.get_oozie(cluster).fqdn() if u.get_oozie(cluster) else None hive_host = u.get_hiveserver(cluster).fqdn() if u.get_hiveserver( cluster) else None services = [] if u.get_namenode(cluster): services += ['hdfs'] if u.get_resourcemanager(cluster): services += ['yarn'] if oozie_host: services += ['oozie'] services += ['pig'] if hive_host: services += ['hive'] LOG.debug("Add services: %s" % ', '.join(services)) client.services.add(services) LOG.debug("Assign roles to hosts") client.services.hdfs.add_nodes('PrimaryNameNode', [nn_host]) client.services.hdfs.add_nodes('DataNode', dn_hosts) if snn: client.services.hdfs.add_nodes('SecondaryNameNode', [snn_host]) if oozie_host: client.services.oozie.add_nodes('Oozie', [oozie_host]) if hive_host: client.services.hive.add_nodes('HiveServer', [hive_host]) if rm_host: client.services.yarn.add_nodes('ResourceManager', [rm_host]) client.services.yarn.add_nodes('NodeManager', nm_hosts) if hs_host: client.services.yarn.add_nodes('HistoryServer', [hs_host])
def _update_include_files(cluster): instances = u.get_instances(cluster) datanodes = u.get_datanodes(cluster) nodemanagers = u.get_nodemanagers(cluster) dn_hosts = u.generate_fqdn_host_names(datanodes) nm_hosts = u.generate_fqdn_host_names(nodemanagers) for instance in instances: with instance.remote() as r: r.execute_command( 'sudo su - -c "echo \'%s\' > %s/dn-include" hadoop' % ( dn_hosts, HADOOP_CONF_DIR)) r.execute_command( 'sudo su - -c "echo \'%s\' > %s/nm-include" hadoop' % ( nm_hosts, HADOOP_CONF_DIR))
def scale_cluster(cluster, instances): scale_ins_hosts = [i.fqdn() for i in instances] dn_hosts = [dn.fqdn() for dn in u.get_datanodes(cluster)] nm_hosts = [nm.fqdn() for nm in u.get_nodemanagers(cluster)] to_scale_dn = [] to_scale_nm = [] for i in scale_ins_hosts: if i in dn_hosts: to_scale_dn.append(i) if i in nm_hosts: to_scale_nm.append(i) client = c.IntelClient(u.get_instance(cluster, 'manager'), cluster.name) rack = '/Default' client.nodes.add(scale_ins_hosts, rack, 'hadoop', '/home/hadoop/.ssh/id_rsa') client.cluster.install_software(scale_ins_hosts) if to_scale_nm: client.services.yarn.add_nodes('NodeManager', to_scale_nm) if to_scale_dn: client.services.hdfs.add_nodes('DataNode', to_scale_dn) # IDH 3.0.2 reset cluster parameters (bug #1300603) # Restoring them back LOG.info("Provisioning configs") # cinder and ephemeral drive support _configure_storage(client, cluster) # swift support _configure_swift(client, cluster) # user configs _add_user_params(client, cluster) client.nodes.config() if to_scale_dn: client.services.hdfs.start() if to_scale_nm: client.services.yarn.start()