def _push_configs_to_new_node(self, cluster, extra, instance): ng_extra = extra[instance.node_group.id] files_supervisor = { '/etc/supervisor/supervisord.conf': ng_extra['slave_sv_conf'] } files_storm = { '/usr/local/storm/conf/storm.yaml': ng_extra['st_instances'] } files_zk = { '/opt/zookeeper/zookeeper/conf/zoo.cfg': ng_extra['zk_conf'] } files_supervisor_master = { '/etc/supervisor/supervisord.conf': ng_extra['master_sv_conf'] } file_pyleus_conf = { '/home/ubuntu/.pyleus.conf': ng_extra['pyleus_conf'] } with utils.get_remote(instance) as r: node_processes = instance.node_group.node_processes r.write_files_to(files_storm, run_as_root=True) if 'zookeeper' in node_processes: self._push_zk_configs(r, files_zk) if 'nimbus' in node_processes: self._push_supervisor_configs(r, files_supervisor_master) self._push_supervisor_configs(r, file_pyleus_conf) if 'supervisor' in node_processes: self._push_supervisor_configs(r, files_supervisor)
def scale_cluster(self, cluster, instances): master = utils.get_instance(cluster, "master") r_master = utils.get_remote(master) run.stop_spark(r_master, self._spark_home(cluster)) self._setup_instances(cluster, instances) nn = utils.get_instance(cluster, "namenode") run.refresh_nodes(utils.get_remote(nn), "dfsadmin") dn_instances = [instance for instance in instances if 'datanode' in instance.node_group.node_processes] self._start_datanode_processes(dn_instances) swift_helper.install_ssl_certs(instances) run.start_spark_master(r_master, self._spark_home(cluster)) LOG.info("Spark master service has been restarted")
def decommission_dn(nn, inst_to_be_deleted, survived_inst): with utils.get_remote(nn) as r: r.write_file_to('/etc/hadoop/dn.excl', utils.generate_fqdn_host_names(inst_to_be_deleted)) run.refresh_nodes(utils.get_remote(nn), "dfsadmin") context.sleep(3) utils.plugin_option_poll(nn.cluster, _is_decommissioned, c_helper.DECOMMISSIONING_TIMEOUT, _("Decommission %s") % "DataNodes", 3, { 'r': r, 'inst_to_be_deleted': inst_to_be_deleted }) r.write_files_to({ '/etc/hadoop/dn.incl': utils.generate_fqdn_host_names(survived_inst), '/etc/hadoop/dn.excl': "" })
def rebalance_topology(self, cluster): topology_names = self._get_running_topologies_names(cluster) master = utils.get_instance(cluster, "nimbus") for topology_name in topology_names: cmd = ('%(rebalance)s -c nimbus.host=%(host)s %(topology_name)s' ) % ({ "rebalance": "/usr/local/storm/bin/storm rebalance", "host": master.hostname(), "topology_name": topology_name }) with utils.get_remote(master) as r: ret, stdout = r.execute_command(cmd)
def _push_configs_to_existing_node(self, cluster, extra, instance): node_processes = instance.node_group.node_processes need_update_hadoop = (c_helper.is_data_locality_enabled(cluster) or 'namenode' in node_processes) need_update_spark = ('master' in node_processes or 'slave' in node_processes) if need_update_spark: sp_home = self._spark_home(cluster) files = { os.path.join(sp_home, 'conf/spark-env.sh'): extra['sp_master'], os.path.join(sp_home, 'conf/slaves'): extra['sp_slaves'], os.path.join(sp_home, 'conf/spark-defaults.conf'): extra['sp_defaults'] } r = utils.get_remote(instance) r.write_files_to(files) self._push_cleanup_job(r, cluster, extra, instance) if need_update_hadoop: with utils.get_remote(instance) as r: self._write_topology_data(r, cluster, extra) self._push_master_configs(r, cluster, extra, instance)
def _get_running_topologies_names(self, cluster): master = utils.get_instance(cluster, "nimbus") cmd = ("%(storm)s -c nimbus.host=%(host)s " "list | grep ACTIVE | awk '{print $1}'") % ( { "storm": "/usr/local/storm/bin/storm", "host": master.hostname() }) with utils.get_remote(master) as r: ret, stdout = r.execute_command(cmd) names = stdout.split('\n') topology_names = names[0:len(names) - 1] return topology_names
def decommission_sl(master, inst_to_be_deleted, survived_inst): if survived_inst is not None: slavenames = [] for slave in survived_inst: slavenames.append(slave.hostname()) slaves_content = c_helper.generate_spark_slaves_configs(slavenames) else: slaves_content = "\n" cluster = master.cluster sp_home = utils.get_config_value_or_default("Spark", "Spark home", cluster) r_master = utils.get_remote(master) run.stop_spark(r_master, sp_home) # write new slave file to master files = {os.path.join(sp_home, 'conf/slaves'): slaves_content} r_master.write_files_to(files) # write new slaves file to each survived slave as well for i in survived_inst: with utils.get_remote(i) as r: r.write_files_to(files) run.start_spark_master(r_master, sp_home)
def _push_configs_to_existing_node(self, cluster, extra, instance): node_processes = instance.node_group.node_processes need_storm_update = ('nimbus' in node_processes or 'supervisor' in node_processes) need_zookeeper_update = 'zookeeper' in node_processes ng_extra = extra[instance.node_group.id] r = utils.get_remote(instance) if need_storm_update: storm_path = '/usr/local/storm/conf/storm.yaml' files_storm = {storm_path: ng_extra['st_instances']} r.write_files_to(files_storm) if need_zookeeper_update: zk_path = '/opt/zookeeper/zookeeper/conf/zoo.cfg' files_zookeeper = {zk_path: ng_extra['zk_conf']} self._push_zk_configs(r, files_zookeeper)
def start_cluster(self, cluster): nn_instance = utils.get_instance(cluster, "namenode") dn_instances = utils.get_instances(cluster, "datanode") # Start the name node self._start_namenode(nn_instance) # start the data nodes self._start_datanode_processes(dn_instances) run.await_datanodes(cluster) LOG.info("Hadoop services have been started") with utils.get_remote(nn_instance) as r: r.execute_command("sudo -u hdfs hdfs dfs -mkdir -p /user/$USER/") r.execute_command("sudo -u hdfs hdfs dfs -chown $USER " "/user/$USER/") # start spark nodes self.start_spark(cluster) swift_helper.install_ssl_certs(utils.get_instances(cluster)) LOG.info('Cluster has been started successfully') self._set_cluster_info(cluster)
def _push_configs_to_new_node(self, cluster, extra, instance): files_hadoop = { os.path.join(c_helper.HADOOP_CONF_DIR, "core-site.xml"): extra['xml']['core-site'], os.path.join(c_helper.HADOOP_CONF_DIR, "hdfs-site.xml"): extra['xml']['hdfs-site'], } sp_home = self._spark_home(cluster) files_spark = { os.path.join(sp_home, 'conf/spark-env.sh'): extra['sp_master'], os.path.join(sp_home, 'conf/slaves'): extra['sp_slaves'], os.path.join(sp_home, 'conf/spark-defaults.conf'): extra['sp_defaults'] } files_init = { '/tmp/sahara-hadoop-init.sh': extra['setup_script'], 'id_rsa': cluster.management_private_key, 'authorized_keys': cluster.management_public_key } # pietro: This is required because the (secret) key is not stored in # .ssh which hinders password-less ssh required by spark scripts key_cmd = ('sudo cp $HOME/id_rsa $HOME/.ssh/; ' 'sudo chown $USER $HOME/.ssh/id_rsa; ' 'sudo chmod 600 $HOME/.ssh/id_rsa') storage_paths = instance.storage_paths() dn_path = ' '.join(c_helper.make_hadoop_path(storage_paths, '/dfs/dn')) nn_path = ' '.join(c_helper.make_hadoop_path(storage_paths, '/dfs/nn')) hdfs_dir_cmd = ('sudo mkdir -p %(nn_path)s %(dn_path)s &&' 'sudo chown -R hdfs:hadoop %(nn_path)s %(dn_path)s &&' 'sudo chmod 755 %(nn_path)s %(dn_path)s' % { "nn_path": nn_path, "dn_path": dn_path }) with utils.get_remote(instance) as r: r.execute_command('sudo chown -R $USER:$USER /etc/hadoop') r.execute_command('sudo chown -R $USER:$USER %s' % sp_home) r.write_files_to(files_hadoop) r.write_files_to(files_spark) r.write_files_to(files_init) r.execute_command('sudo chmod 0500 /tmp/sahara-hadoop-init.sh') r.execute_command('sudo /tmp/sahara-hadoop-init.sh ' '>> /tmp/sahara-hadoop-init.log 2>&1') r.execute_command(hdfs_dir_cmd) r.execute_command(key_cmd) if c_helper.is_data_locality_enabled(cluster): r.write_file_to( '/etc/hadoop/topology.sh', utils.get_file_text('plugins/spark/resources/topology.sh', 'sahara_plugin_spark')) r.execute_command('sudo chmod +x /etc/hadoop/topology.sh') self._write_topology_data(r, cluster, extra) self._push_master_configs(r, cluster, extra, instance) self._push_cleanup_job(r, cluster, extra, instance)
def _start_spark(self, cluster, sm_instance): with utils.get_remote(sm_instance) as r: run.start_spark_master(r, self._spark_home(cluster)) LOG.info("Spark service has been started")
def _start_namenode(self, nn_instance): with utils.get_remote(nn_instance) as r: run.format_namenode(r) run.start_processes(r, "namenode")
def _start_storm_master(self, sm_instance): with utils.get_remote(sm_instance) as r: run.start_storm_nimbus_and_ui(r) LOG.info("Storm master at {host} has been started".format( host=sm_instance.hostname()))