def test_get_datanodes(self): cl = tu.create_cluster('cl1', 't1', 'vanilla', '2.6.0', [self.ng_manager, self.ng_namenode, self.ng_datanode]) datanodes = u.get_datanodes(cl) self.assertEqual(2, len(datanodes)) self.assertEqual(set(['dn1', 'dn2']), set([datanodes[0].instance_id, datanodes[1].instance_id])) cl = tu.create_cluster('cl1', 't1', 'vanilla', '2.6.0', [self.ng_manager]) self.assertEqual([], u.get_datanodes(cl))
def test_get_datanodes(self): cl = tu.create_cluster( 'cl1', 't1', 'vanilla', '1.2.1', [self.ng_manager, self.ng_namenode, self.ng_datanode]) datanodes = u.get_datanodes(cl) self.assertEqual(2, len(datanodes)) self.assertEqual( set(['dn1', 'dn2']), set([datanodes[0].instance_id, datanodes[1].instance_id])) cl = tu.create_cluster('cl1', 't1', 'vanilla', '1.2.1', [self.ng_manager]) self.assertEqual([], u.get_datanodes(cl))
def _validate_existing_ng_scaling(self, cluster, existing): scalable_processes = self._get_scalable_processes() dn_to_delete = 0 for ng in cluster.node_groups: if ng.id in existing: if ng.count > existing[ng.id] and "datanode" in ng.node_processes: dn_to_delete += ng.count - existing[ng.id] if not set(ng.node_processes).issubset(scalable_processes): raise ex.NodeGroupCannotBeScaled( ng.name, _("Vanilla plugin cannot scale nodegroup" " with processes: %s") % " ".join(ng.node_processes), ) dn_amount = len(vu.get_datanodes(cluster)) rep_factor = c_helper.get_config_value("HDFS", "dfs.replication", cluster) if dn_to_delete > 0 and dn_amount - dn_to_delete < rep_factor: raise ex.ClusterCannotBeScaled( cluster.name, _( "Vanilla plugin cannot shrink cluster because " "it would be not enough nodes for replicas " "(replication factor is %s)" ) % rep_factor, )
def start_cluster(self, cluster): nn = vu.get_namenode(cluster) run.format_namenode(nn) run.start_hadoop_process(nn, 'namenode') for snn in vu.get_secondarynamenodes(cluster): run.start_hadoop_process(snn, 'secondarynamenode') rm = vu.get_resourcemanager(cluster) run.start_yarn_process(rm, 'resourcemanager') for dn in vu.get_datanodes(cluster): run.start_hadoop_process(dn, 'datanode') run.await_datanodes(cluster) for nm in vu.get_nodemanagers(cluster): run.start_yarn_process(nm, 'nodemanager') hs = vu.get_historyserver(cluster) if hs: run.start_historyserver(hs) oo = vu.get_oozie(cluster) if oo: run.start_oozie_process(oo) self._set_cluster_info(cluster)
def start_cluster(self, cluster): nn = vu.get_namenode(cluster) run.format_namenode(nn) run.start_hadoop_process(nn, "namenode") for snn in vu.get_secondarynamenodes(cluster): run.start_hadoop_process(snn, "secondarynamenode") rm = vu.get_resourcemanager(cluster) if rm: run.start_yarn_process(rm, "resourcemanager") for dn in vu.get_datanodes(cluster): run.start_hadoop_process(dn, "datanode") run.await_datanodes(cluster) for nm in vu.get_nodemanagers(cluster): run.start_yarn_process(nm, "nodemanager") hs = vu.get_historyserver(cluster) if hs: run.start_historyserver(hs) oo = vu.get_oozie(cluster) if oo: run.start_oozie_process(oo) self._set_cluster_info(cluster)
def await_datanodes(cluster): datanodes_count = len(vu.get_datanodes(cluster)) if datanodes_count < 1: return l_message = _("Waiting on %s datanodes to start up") % datanodes_count with vu.get_namenode(cluster).remote() as r: poll_utils.plugin_option_poll( cluster, _check_datanodes_count, c_helper.DATANODES_STARTUP_TIMEOUT, l_message, 1, { 'remote': r, 'count': datanodes_count})
def _await_datanodes(self, cluster): datanodes_count = len(vu.get_datanodes(cluster)) if datanodes_count < 1: return l_message = _("Waiting on %s datanodes to start up") % datanodes_count LOG.info(l_message) with remote.get_remote(vu.get_namenode(cluster)) as r: poll_utils.plugin_option_poll( cluster, run.check_datanodes_count, c_helper.DATANODES_STARTUP_TIMEOUT, l_message, 1, { 'remote': r, 'count': datanodes_count})
def _update_include_files(cluster): instances = u.get_instances(cluster) datanodes = vu.get_datanodes(cluster) nodemanagers = vu.get_nodemanagers(cluster) dn_hosts = u.generate_fqdn_host_names(datanodes) nm_hosts = u.generate_fqdn_host_names(nodemanagers) for instance in instances: with instance.remote() as r: r.execute_command( 'sudo su - -c "echo \'%s\' > %s/dn-include" hadoop' % (dn_hosts, HADOOP_CONF_DIR)) r.execute_command( 'sudo su - -c "echo \'%s\' > %s/nm-include" hadoop' % (nm_hosts, HADOOP_CONF_DIR))
def _update_include_files(cluster): instances = u.get_instances(cluster) datanodes = vu.get_datanodes(cluster) nodemanagers = vu.get_nodemanagers(cluster) dn_hosts = u.generate_fqdn_host_names(datanodes) nm_hosts = u.generate_fqdn_host_names(nodemanagers) for instance in instances: with instance.remote() as r: r.execute_command( 'sudo su - -c "echo \'%s\' > %s/dn-include" hadoop' % ( dn_hosts, HADOOP_CONF_DIR)) r.execute_command( 'sudo su - -c "echo \'%s\' > %s/nm-include" hadoop' % ( nm_hosts, HADOOP_CONF_DIR))
def _update_include_files(cluster, dec_instances=None): dec_instances = dec_instances or [] dec_instances_ids = [instance.id for instance in dec_instances] instances = u.get_instances(cluster) inst_filter = lambda inst: inst.id not in dec_instances_ids datanodes = filter(inst_filter, vu.get_datanodes(cluster)) nodemanagers = filter(inst_filter, vu.get_nodemanagers(cluster)) dn_hosts = u.generate_fqdn_host_names(datanodes) nm_hosts = u.generate_fqdn_host_names(nodemanagers) for instance in instances: with instance.remote() as r: r.execute_command("sudo su - -c \"echo '%s' > %s/dn-include\" hadoop" % (dn_hosts, HADOOP_CONF_DIR)) r.execute_command("sudo su - -c \"echo '%s' > %s/nm-include\" hadoop" % (nm_hosts, HADOOP_CONF_DIR))
def await_datanodes(cluster): datanodes_count = len(vu.get_datanodes(cluster)) if datanodes_count < 1: return LOG.info("Waiting %s datanodes to start up" % datanodes_count) with vu.get_namenode(cluster).remote() as r: while True: if _check_datanodes_count(r, datanodes_count): LOG.info("Datanodes on cluster %s has been started" % cluster.name) return context.sleep(1) if not g.check_cluster_exists(cluster): LOG.info("Stop waiting datanodes on cluster %s since it has " "been deleted" % cluster.name) return
def await_datanodes(cluster): datanodes_count = len(vu.get_datanodes(cluster)) if datanodes_count < 1: return LOG.info("Waiting %s datanodes to start up" % datanodes_count) with vu.get_namenode(cluster).remote() as r: while True: if _check_datanodes_count(r, datanodes_count): LOG.info('Datanodes on cluster %s has been started' % cluster.name) return context.sleep(1) if not g.check_cluster_exists(cluster): LOG.info('Stop waiting datanodes on cluster %s since it has ' 'been deleted' % cluster.name) return
def _await_datanodes(self, cluster): datanodes_count = len(vu.get_datanodes(cluster)) if datanodes_count < 1: return LOG.info(_LI("Waiting %s datanodes to start up"), datanodes_count) with remote.get_remote(vu.get_namenode(cluster)) as r: while True: if run.check_datanodes_count(r, datanodes_count): LOG.info(_LI('Datanodes on cluster %s have been started'), cluster.name) return context.sleep(1) if not g.check_cluster_exists(cluster): LOG.info( _LI('Stop waiting datanodes on cluster %s since it has' ' been deleted'), cluster.name) return
def _update_include_files(cluster, dec_instances=None): dec_instances = dec_instances or [] dec_instances_ids = [instance.id for instance in dec_instances] instances = u.get_instances(cluster) inst_filter = lambda inst: inst.id not in dec_instances_ids datanodes = filter(inst_filter, vu.get_datanodes(cluster)) nodemanagers = filter(inst_filter, vu.get_nodemanagers(cluster)) dn_hosts = u.generate_fqdn_host_names(datanodes) nm_hosts = u.generate_fqdn_host_names(nodemanagers) for instance in instances: with instance.remote() as r: r.execute_command( 'sudo su - -c "echo \'%s\' > %s/dn-include" hadoop' % (dn_hosts, HADOOP_CONF_DIR)) r.execute_command( 'sudo su - -c "echo \'%s\' > %s/nm-include" hadoop' % (nm_hosts, HADOOP_CONF_DIR))
def decommission_nodes(self, cluster, instances): tts = vu.get_tasktrackers(cluster) dns = vu.get_datanodes(cluster) decommission_dns = False decommission_tts = False for i in instances: if "datanode" in i.node_group.node_processes: dns.remove(i) decommission_dns = True if "tasktracker" in i.node_group.node_processes: tts.remove(i) decommission_tts = True nn = vu.get_namenode(cluster) jt = vu.get_jobtracker(cluster) if decommission_tts: sc.decommission_tt(jt, instances, tts) if decommission_dns: sc.decommission_dn(nn, instances, dns)
def validate_existing_ng_scaling(cluster, existing): scalable_processes = _get_scalable_processes() dn_to_delete = 0 for ng in cluster.node_groups: if ng.id in existing: if ng.count > existing[ng.id] and "datanode" in ng.node_processes: dn_to_delete += ng.count - existing[ng.id] if not set(ng.node_processes).issubset(scalable_processes): msg = ("Vanilla plugin cannot scale nodegroup " "with processes: %s") raise ex.NodeGroupCannotBeScaled( ng.name, msg % ' '.join(ng.node_processes)) dn_amount = len(vu.get_datanodes(cluster)) rep_factor = c_helper.get_config_value('HDFS', 'dfs.replication', cluster) if dn_to_delete > 0 and dn_amount - dn_to_delete < rep_factor: msg = ("Vanilla plugin cannot shrink cluster because it would be not " "enough nodes for replicas (replication factor is %s)") raise ex.ClusterCannotBeScaled(cluster.name, msg % rep_factor)
def _await_datanodes(self, cluster): datanodes_count = len(vu.get_datanodes(cluster)) if datanodes_count < 1: return LOG.info(_LI("Waiting %s datanodes to start up"), datanodes_count) with remote.get_remote(vu.get_namenode(cluster)) as r: while True: if run.check_datanodes_count(r, datanodes_count): LOG.info( _LI('Datanodes on cluster %s has been started'), cluster.name) return context.sleep(1) if not g.check_cluster_exists(cluster): LOG.info( _LI('Stop waiting datanodes on cluster %s since it has' ' been deleted'), cluster.name) return
def decommission_nodes(self, cluster, instances): tts = vu.get_tasktrackers(cluster) dns = vu.get_datanodes(cluster) decommission_dns = False decommission_tts = False for i in instances: if 'datanode' in i.node_group.node_processes: dns.remove(i) decommission_dns = True if 'tasktracker' in i.node_group.node_processes: tts.remove(i) decommission_tts = True nn = vu.get_namenode(cluster) jt = vu.get_jobtracker(cluster) if decommission_tts: sc.decommission_tt(jt, instances, tts) if decommission_dns: sc.decommission_dn(nn, instances, dns)
def validate_existing_ng_scaling(pctx, cluster, existing): scalable_processes = _get_scalable_processes() dn_to_delete = 0 for ng in cluster.node_groups: if ng.id in existing: if ng.count > existing[ng.id] and "datanode" in ng.node_processes: dn_to_delete += ng.count - existing[ng.id] if not set(ng.node_processes).issubset(scalable_processes): msg = _("Vanilla plugin cannot scale nodegroup " "with processes: %s") raise ex.NodeGroupCannotBeScaled( ng.name, msg % ' '.join(ng.node_processes)) dn_amount = len(vu.get_datanodes(cluster)) rep_factor = cu.get_config_value(pctx, 'HDFS', 'dfs.replication', cluster) if dn_to_delete > 0 and dn_amount - dn_to_delete < rep_factor: msg = _("Vanilla plugin cannot shrink cluster because it would be " "not enough nodes for replicas (replication factor is %s)") raise ex.ClusterCannotBeScaled( cluster.name, msg % rep_factor)
def _push_namenode_configs(self, cluster, r): r.write_file_to( '/etc/hadoop/dn.incl', utils.generate_fqdn_host_names(vu.get_datanodes(cluster)))
def _push_namenode_configs(self, cluster, r): r.write_file_to("/etc/hadoop/dn.incl", utils.generate_fqdn_host_names(vu.get_datanodes(cluster)))