def scale_cluster(self, cluster, instances): master = utils.get_instance(cluster, "master") r_master = remote.get_remote(master) run.stop_spark(r_master, self._spark_home(cluster)) self._setup_instances(cluster, instances) nn = utils.get_instance(cluster, "namenode") run.refresh_nodes(remote.get_remote(nn), "dfsadmin") self._start_slave_datanode_processes(instances) run.start_spark_master(r_master, self._spark_home(cluster)) LOG.info(_LI("Spark master service at '%s' has been restarted"), master.hostname())
def scale_cluster(self, cluster, instances): master = utils.get_instance(cluster, "master") r_master = remote.get_remote(master) run.stop_spark(r_master, self._spark_home(cluster)) self._setup_instances(cluster, instances) nn = utils.get_instance(cluster, "namenode") run.refresh_nodes(remote.get_remote(nn), "dfsadmin") dn_instances = [instance for instance in instances if "datanode" in instance.node_group.node_processes] self._start_datanode_processes(dn_instances) run.start_spark_master(r_master, self._spark_home(cluster)) LOG.info(_LI("Spark master service has been restarted"))
def scale_cluster(self, cluster, instances): master = utils.get_instance(cluster, "master") r_master = remote.get_remote(master) run.stop_spark(r_master, self._spark_home(cluster)) self._setup_instances(cluster, instances) nn = utils.get_instance(cluster, "namenode") run.refresh_nodes(remote.get_remote(nn), "dfsadmin") dn_instances = [instance for instance in instances if 'datanode' in instance.node_group.node_processes] self._start_datanode_processes(dn_instances) swift_helper.install_ssl_certs(instances) run.start_spark_master(r_master, self._spark_home(cluster)) LOG.info("Spark master service has been restarted")
def decommission_dn(nn, inst_to_be_deleted, survived_inst): with remote.get_remote(nn) as r: r.write_file_to('/etc/hadoop/dn.excl', utils.generate_fqdn_host_names( inst_to_be_deleted)) run.refresh_nodes(remote.get_remote(nn), "dfsadmin") context.sleep(3) poll_utils.plugin_option_poll( nn.cluster, _is_decommissioned, c_helper.DECOMMISSIONING_TIMEOUT, _("Decommission %s") % "DataNodes", 3, { 'r': r, 'inst_to_be_deleted': inst_to_be_deleted}) r.write_files_to({ '/etc/hadoop/dn.incl': utils. generate_fqdn_host_names(survived_inst), '/etc/hadoop/dn.excl': ""})
def scale_cluster(self, cluster, instances): master = utils.get_instance(cluster, "master") r_master = remote.get_remote(master) run.stop_spark(r_master, self._spark_home(cluster)) self._setup_instances(cluster, instances) nn = utils.get_instance(cluster, "namenode") run.refresh_nodes(remote.get_remote(nn), "dfsadmin") dn_instances = [ instance for instance in instances if 'datanode' in instance.node_group.node_processes ] self._start_datanode_processes(dn_instances) run.start_spark_master(r_master, self._spark_home(cluster)) LOG.info( _LI("Spark master service at {host} has been restarted").format( host=master.hostname()))
def decommission_dn(nn, inst_to_be_deleted, survived_inst): with remote.get_remote(nn) as r: r.write_file_to('/etc/hadoop/dn.excl', utils.generate_fqdn_host_names( inst_to_be_deleted)) run.refresh_nodes(remote.get_remote(nn), "dfsadmin") context.sleep(3) timeout = c_helper.get_decommissioning_timeout( nn.node_group.cluster) s_time = timeutils.utcnow() all_found = False while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout: cmd = r.execute_command( "sudo -u hdfs hadoop dfsadmin -report") all_found = True datanodes_info = parse_dfs_report(cmd[1]) for i in inst_to_be_deleted: for dn in datanodes_info: if (dn["Name"].startswith(i.internal_ip)) and ( dn["Decommission Status"] != "Decommissioned"): all_found = False break if all_found: r.write_files_to({'/etc/hadoop/dn.incl': utils. generate_fqdn_host_names(survived_inst), '/etc/hadoop/dn.excl': "", }) break context.sleep(3) if not all_found: ex.DecommissionError( _("Cannot finish decommission of cluster %(cluster)s in " "%(seconds)d seconds") % {"cluster": nn.node_group.cluster, "seconds": timeout})