def _push_configs_to_existing_node(self, cluster, extra, instance): node_processes = instance.node_group.node_processes need_update_hadoop = (c_helper.is_data_locality_enabled(cluster) or 'namenode' in node_processes) need_update_spark = ('master' in node_processes or 'slave' in node_processes) if need_update_spark: ng_extra = extra[instance.node_group.id] sp_home = self._spark_home(cluster) files = { os.path.join(sp_home, 'conf/spark-env.sh'): ng_extra['sp_master'], os.path.join(sp_home, 'conf/slaves'): ng_extra['sp_slaves'], os.path.join( sp_home, 'conf/spark-defaults.conf'): ng_extra['sp_defaults'] } r = remote.get_remote(instance) r.write_files_to(files) self._push_cleanup_job(r, cluster, extra, instance) if need_update_hadoop: with remote.get_remote(instance) as r: self._write_topology_data(r, cluster, extra) self._push_master_configs(r, cluster, extra, instance)
def _push_configs_to_existing_node(self, cluster, extra, instance): node_processes = instance.node_group.node_processes need_update_hadoop = (c_helper.is_data_locality_enabled(cluster) or 'namenode' in node_processes) need_update_spark = ('master' in node_processes or 'slave' in node_processes) if need_update_spark: ng_extra = extra[instance.node_group.id] sp_home = self._spark_home(cluster) files = { os.path.join(sp_home, 'conf/spark-env.sh'): ng_extra['sp_master'], os.path.join(sp_home, 'conf/slaves'): ng_extra['sp_slaves'], os.path.join(sp_home, 'conf/spark-defaults.conf'): ng_extra['sp_defaults'] } r = remote.get_remote(instance) r.write_files_to(files) self._push_cleanup_job(r, cluster, extra, instance) if need_update_hadoop: with remote.get_remote(instance) as r: self._write_topology_data(r, cluster, extra) self._push_master_configs(r, cluster, extra, instance)
def _extract_configs_to_extra(self, cluster): sp_master = utils.get_instance(cluster, "master") sp_slaves = utils.get_instances(cluster, "slave") extra = dict() config_master = config_slaves = '' if sp_master is not None: config_master = c_helper.generate_spark_env_configs(cluster) if sp_slaves is not None: slavenames = [] for slave in sp_slaves: slavenames.append(slave.hostname()) config_slaves = c_helper.generate_spark_slaves_configs(slavenames) else: config_slaves = "\n" # Any node that might be used to run spark-submit will need # these libs for swift integration config_defaults = c_helper.generate_spark_executor_classpath(cluster) extra['job_cleanup'] = c_helper.generate_job_cleanup_config(cluster) extra['sp_master'] = config_master extra['sp_slaves'] = config_slaves extra['sp_defaults'] = config_defaults if c_helper.is_data_locality_enabled(cluster): topology_data = th.generate_topology_map( cluster, CONF.enable_hypervisor_awareness) extra['topology_data'] = "\n".join( [k + " " + v for k, v in topology_data.items()]) + "\n" return extra
def _extract_configs_to_extra(self, cluster): nn = utils.get_instance(cluster, "namenode") sp_master = utils.get_instance(cluster, "master") sp_slaves = utils.get_instances(cluster, "slave") extra = dict() config_master = config_slaves = "" if sp_master is not None: config_master = c_helper.generate_spark_env_configs(cluster) if sp_slaves is not None: slavenames = [] for slave in sp_slaves: slavenames.append(slave.hostname()) config_slaves = c_helper.generate_spark_slaves_configs(slavenames) else: config_slaves = "\n" for ng in cluster.node_groups: extra[ng.id] = { "xml": c_helper.generate_xml_configs(ng.configuration(), ng.storage_paths(), nn.hostname(), None), "setup_script": c_helper.generate_hadoop_setup_script( ng.storage_paths(), c_helper.extract_hadoop_environment_confs(ng.configuration()) ), "sp_master": config_master, "sp_slaves": config_slaves, } if c_helper.is_data_locality_enabled(cluster): topology_data = th.generate_topology_map(cluster, CONF.enable_hypervisor_awareness) extra["topology_data"] = "\n".join([k + " " + v for k, v in topology_data.items()]) + "\n" return extra
def _push_configs_to_new_node(self, cluster, extra, instance): ng_extra = extra[instance.node_group.id] files_hadoop = { '/etc/hadoop/conf/core-site.xml': ng_extra['xml']['core-site'], '/etc/hadoop/conf/hdfs-site.xml': ng_extra['xml']['hdfs-site'], } sp_home = self._spark_home(cluster) files_spark = { os.path.join(sp_home, 'conf/spark-env.sh'): ng_extra['sp_master'], os.path.join(sp_home, 'conf/slaves'): ng_extra['sp_slaves'] } files_init = { '/tmp/sahara-hadoop-init.sh': ng_extra['setup_script'], 'id_rsa': cluster.management_private_key, 'authorized_keys': cluster.management_public_key } # pietro: This is required because the (secret) key is not stored in # .ssh which hinders password-less ssh required by spark scripts key_cmd = ('sudo cp $HOME/id_rsa $HOME/.ssh/; ' 'sudo chown $USER $HOME/.ssh/id_rsa; ' 'sudo chmod 600 $HOME/.ssh/id_rsa') storage_paths = instance.node_group.storage_paths() dn_path = ' '.join(c_helper.make_hadoop_path(storage_paths, '/dfs/dn')) nn_path = ' '.join(c_helper.make_hadoop_path(storage_paths, '/dfs/nn')) hdfs_dir_cmd = ('sudo mkdir -p %(nn_path)s %(dn_path)s &&' 'sudo chown -R hdfs:hadoop %(nn_path)s %(dn_path)s &&' 'sudo chmod 755 %(nn_path)s %(dn_path)s' % { "nn_path": nn_path, "dn_path": dn_path }) with remote.get_remote(instance) as r: r.execute_command('sudo chown -R $USER:$USER /etc/hadoop') r.execute_command('sudo chown -R $USER:$USER %s' % sp_home) r.write_files_to(files_hadoop) r.write_files_to(files_spark) r.write_files_to(files_init) r.execute_command('sudo chmod 0500 /tmp/sahara-hadoop-init.sh') r.execute_command('sudo /tmp/sahara-hadoop-init.sh ' '>> /tmp/sahara-hadoop-init.log 2>&1') r.execute_command(hdfs_dir_cmd) r.execute_command(key_cmd) if c_helper.is_data_locality_enabled(cluster): r.write_file_to( '/etc/hadoop/topology.sh', f.get_file_text('plugins/spark/resources/topology.sh')) r.execute_command('sudo chmod +x /etc/hadoop/topology.sh') self._write_topology_data(r, cluster, extra) self._push_master_configs(r, cluster, extra, instance) self._push_cleanup_job(r, cluster, extra, instance)
def _push_configs_to_new_node(self, cluster, extra, instance): ng_extra = extra[instance.node_group.id] files_hadoop = { "/etc/hadoop/conf/core-site.xml": ng_extra["xml"]["core-site"], "/etc/hadoop/conf/hdfs-site.xml": ng_extra["xml"]["hdfs-site"], } sp_home = self._spark_home(cluster) files_spark = { os.path.join(sp_home, "conf/spark-env.sh"): ng_extra["sp_master"], os.path.join(sp_home, "conf/slaves"): ng_extra["sp_slaves"], } files_init = { "/tmp/sahara-hadoop-init.sh": ng_extra["setup_script"], "id_rsa": cluster.management_private_key, "authorized_keys": cluster.management_public_key, } # pietro: This is required because the (secret) key is not stored in # .ssh which hinders password-less ssh required by spark scripts key_cmd = ( "sudo cp $HOME/id_rsa $HOME/.ssh/; " "sudo chown $USER $HOME/.ssh/id_rsa; " "sudo chmod 600 $HOME/.ssh/id_rsa" ) for ng in cluster.node_groups: dn_path = c_helper.extract_hadoop_path(ng.storage_paths(), "/dfs/dn") nn_path = c_helper.extract_hadoop_path(ng.storage_paths(), "/dfs/nn") hdfs_dir_cmd = ("sudo mkdir -p %s %s;" "sudo chown -R hdfs:hadoop %s %s;" "sudo chmod 755 %s %s;") % ( nn_path, dn_path, nn_path, dn_path, nn_path, dn_path, ) with remote.get_remote(instance) as r: r.execute_command("sudo chown -R $USER:$USER /etc/hadoop") r.execute_command("sudo chown -R $USER:$USER %s" % sp_home) r.write_files_to(files_hadoop) r.write_files_to(files_spark) r.write_files_to(files_init) r.execute_command("sudo chmod 0500 /tmp/sahara-hadoop-init.sh") r.execute_command("sudo /tmp/sahara-hadoop-init.sh " ">> /tmp/sahara-hadoop-init.log 2>&1") r.execute_command(hdfs_dir_cmd) r.execute_command(key_cmd) if c_helper.is_data_locality_enabled(cluster): r.write_file_to("/etc/hadoop/topology.sh", f.get_file_text("plugins/spark/resources/topology.sh")) r.execute_command("sudo chmod +x /etc/hadoop/topology.sh") self._write_topology_data(r, cluster, extra) self._push_master_configs(r, cluster, extra, instance)
def _extract_configs_to_extra(self, cluster): nn = utils.get_instance(cluster, "namenode") sp_master = utils.get_instance(cluster, "master") sp_slaves = utils.get_instances(cluster, "slave") extra = dict() config_master = config_slaves = '' if sp_master is not None: config_master = c_helper.generate_spark_env_configs(cluster) if sp_slaves is not None: slavenames = [] for slave in sp_slaves: slavenames.append(slave.hostname()) config_slaves = c_helper.generate_spark_slaves_configs(slavenames) else: config_slaves = "\n" # Any node that might be used to run spark-submit will need # these libs for swift integration config_defaults = c_helper.generate_spark_executor_classpath(cluster) extra['job_cleanup'] = c_helper.generate_job_cleanup_config(cluster) for ng in cluster.node_groups: extra[ng.id] = { 'xml': c_helper.generate_xml_configs( ng.configuration(), ng.storage_paths(), nn.hostname(), None ), 'setup_script': c_helper.generate_hadoop_setup_script( ng.storage_paths(), c_helper.extract_hadoop_environment_confs( ng.configuration()) ), 'sp_master': config_master, 'sp_slaves': config_slaves, 'sp_defaults': config_defaults } if "zeppelin" in ng.node_processes: extra[ng.id].update({ "zeppelin_setup_script": c_helper.generate_zeppelin_setup_script(sp_master)}) if c_helper.is_data_locality_enabled(cluster): topology_data = th.generate_topology_map( cluster, CONF.enable_hypervisor_awareness) extra['topology_data'] = "\n".join( [k + " " + v for k, v in topology_data.items()]) + "\n" return extra
def _extract_configs_to_extra(self, cluster): nn = utils.get_instance(cluster, "namenode") sp_master = utils.get_instance(cluster, "master") sp_slaves = utils.get_instances(cluster, "slave") extra = dict() config_master = config_slaves = '' if sp_master is not None: config_master = c_helper.generate_spark_env_configs(cluster) if sp_slaves is not None: slavenames = [] for slave in sp_slaves: slavenames.append(slave.hostname()) config_slaves = c_helper.generate_spark_slaves_configs(slavenames) else: config_slaves = "\n" # Any node that might be used to run spark-submit will need # these libs for swift integration config_defaults = c_helper.generate_spark_executor_classpath(cluster) extra['job_cleanup'] = c_helper.generate_job_cleanup_config(cluster) for ng in cluster.node_groups: extra[ng.id] = { 'xml': c_helper.generate_xml_configs(ng.configuration(), ng.storage_paths(), nn.hostname(), None), 'setup_script': c_helper.generate_hadoop_setup_script( ng.storage_paths(), c_helper.extract_hadoop_environment_confs( ng.configuration())), 'sp_master': config_master, 'sp_slaves': config_slaves, 'sp_defaults': config_defaults } if c_helper.is_data_locality_enabled(cluster): topology_data = th.generate_topology_map( cluster, CONF.enable_hypervisor_awareness) extra['topology_data'] = "\n".join( [k + " " + v for k, v in topology_data.items()]) + "\n" return extra
def _extract_configs_to_extra(self, cluster): nn = utils.get_instance(cluster, "namenode") sp_master = utils.get_instance(cluster, "master") sp_slaves = utils.get_instances(cluster, "slave") extra = dict() config_master = config_slaves = '' if sp_master is not None: config_master = c_helper.generate_spark_env_configs(cluster) if sp_slaves is not None: slavenames = [] for slave in sp_slaves: slavenames.append(slave.hostname()) config_slaves = c_helper.generate_spark_slaves_configs(slavenames) else: config_slaves = "\n" for ng in cluster.node_groups: extra[ng.id] = { 'xml': c_helper.generate_xml_configs( ng.configuration(), ng.storage_paths(), nn.hostname(), None, ), 'setup_script': c_helper.generate_hadoop_setup_script( ng.storage_paths(), c_helper.extract_hadoop_environment_confs( ng.configuration())), 'sp_master': config_master, 'sp_slaves': config_slaves } if c_helper.is_data_locality_enabled(cluster): topology_data = th.generate_topology_map( cluster, CONF.enable_hypervisor_awareness) extra['topology_data'] = "\n".join( [k + " " + v for k, v in topology_data.items()]) + "\n" return extra
def _push_configs_to_existing_node(self, cluster, extra, instance): node_processes = instance.node_group.node_processes need_update_hadoop = c_helper.is_data_locality_enabled(cluster) or "namenode" in node_processes need_update_spark = "master" in node_processes or "slave" in node_processes if need_update_spark: ng_extra = extra[instance.node_group.id] sp_home = self._spark_home(cluster) files = { os.path.join(sp_home, "conf/spark-env.sh"): ng_extra["sp_master"], os.path.join(sp_home, "conf/slaves"): ng_extra["sp_slaves"], } r = remote.get_remote(instance) r.write_files_to(files) if need_update_hadoop: with remote.get_remote(instance) as r: self._write_topology_data(r, cluster, extra) self._push_master_configs(r, cluster, extra, instance)
def _push_configs_to_existing_node(self, cluster, extra, instance): node_processes = instance.node_group.node_processes need_update_hadoop = (c_helper.is_data_locality_enabled(cluster) or 'namenode' in node_processes) need_update_spark = ('master' in node_processes or 'slave' in node_processes) if need_update_spark: ng_extra = extra[instance.node_group.id] files = { '/opt/spark/conf/spark-env.sh': ng_extra['sp_master'], '/opt/spark/conf/slaves': ng_extra['sp_slaves'], } r = remote.get_remote(instance) r.write_files_to(files) if need_update_hadoop: with remote.get_remote(instance) as r: self._write_topology_data(r, cluster, extra) self._push_master_configs(r, cluster, extra, instance)
def _write_topology_data(self, r, cluster, extra): if c_helper.is_data_locality_enabled(cluster): topology_data = extra['topology_data'] r.write_file_to('/etc/hadoop/topology.data', topology_data)
def _push_configs_to_new_node(self, cluster, extra, instance): ng_extra = extra[instance.node_group.id] files_hadoop = { os.path.join(c_helper.HADOOP_CONF_DIR, "core-site.xml"): ng_extra['xml']['core-site'], os.path.join(c_helper.HADOOP_CONF_DIR, "hdfs-site.xml"): ng_extra['xml']['hdfs-site'], } sp_home = self._spark_home(cluster) files_spark = { os.path.join(sp_home, 'conf/spark-env.sh'): ng_extra['sp_master'], os.path.join(sp_home, 'conf/slaves'): ng_extra['sp_slaves'], os.path.join(sp_home, 'conf/spark-defaults.conf'): ng_extra['sp_defaults'] } files_init = { '/tmp/sahara-hadoop-init.sh': ng_extra['setup_script'], 'id_rsa': cluster.management_private_key, 'authorized_keys': cluster.management_public_key } if 'zeppelin_setup_script' in ng_extra: files_init.update({ '/tmp/zeppelin-conf.sh': ng_extra['zeppelin_setup_script']}) # pietro: This is required because the (secret) key is not stored in # .ssh which hinders password-less ssh required by spark scripts key_cmd = ('sudo cp $HOME/id_rsa $HOME/.ssh/; ' 'sudo chown $USER $HOME/.ssh/id_rsa; ' 'sudo chmod 600 $HOME/.ssh/id_rsa') storage_paths = instance.node_group.storage_paths() dn_path = ' '.join(c_helper.make_hadoop_path(storage_paths, '/dfs/dn')) nn_path = ' '.join(c_helper.make_hadoop_path(storage_paths, '/dfs/nn')) hdfs_dir_cmd = ('sudo mkdir -p %(nn_path)s %(dn_path)s &&' 'sudo chown -R hdfs:hadoop %(nn_path)s %(dn_path)s &&' 'sudo chmod 755 %(nn_path)s %(dn_path)s' % {"nn_path": nn_path, "dn_path": dn_path}) with remote.get_remote(instance) as r: r.execute_command( 'sudo chown -R $USER:$USER /etc/hadoop' ) r.execute_command( 'sudo chown -R $USER:$USER %s' % sp_home ) r.write_files_to(files_hadoop) r.write_files_to(files_spark) r.write_files_to(files_init) r.execute_command( 'sudo chmod 0500 /tmp/sahara-hadoop-init.sh' ) r.execute_command( 'sudo /tmp/sahara-hadoop-init.sh ' '>> /tmp/sahara-hadoop-init.log 2>&1') r.execute_command(hdfs_dir_cmd) r.execute_command(key_cmd) if c_helper.is_data_locality_enabled(cluster): r.write_file_to( '/etc/hadoop/topology.sh', f.get_file_text( 'plugins/spark/resources/topology.sh')) r.execute_command( 'sudo chmod +x /etc/hadoop/topology.sh' ) if 'zeppelin_setup_script' in ng_extra: r.execute_command( 'sudo chmod 0500 /tmp/zeppelin-conf.sh' ) r.execute_command( 'sudo /tmp/zeppelin-conf.sh ' '>> /tmp/zeppelin-conf.log 2>&1') self._write_topology_data(r, cluster, extra) self._push_master_configs(r, cluster, extra, instance) self._push_cleanup_job(r, cluster, extra, instance)
def _push_configs_to_new_node(self, cluster, extra, instance): ng_extra = extra[instance.node_group.id] files_hadoop = { '/etc/hadoop/conf/core-site.xml': ng_extra['xml']['core-site'], '/etc/hadoop/conf/hdfs-site.xml': ng_extra['xml']['hdfs-site'], } files_spark = { '/opt/spark/conf/spark-env.sh': ng_extra['sp_master'], '/opt/spark/conf/slaves': ng_extra['sp_slaves'] } files_init = { '/tmp/sahara-hadoop-init.sh': ng_extra['setup_script'], 'id_rsa': cluster.management_private_key, 'authorized_keys': cluster.management_public_key } # pietro: This is required because the (secret) key is not stored in # .ssh which hinders password-less ssh required by spark scripts key_cmd = 'sudo cp $HOME/id_rsa $HOME/.ssh/; '\ 'sudo chown $USER $HOME/.ssh/id_rsa; '\ 'sudo chmod 600 $HOME/.ssh/id_rsa' for ng in cluster.node_groups: dn_path = c_helper.extract_hadoop_path(ng.storage_paths(), '/dfs/dn') nn_path = c_helper.extract_hadoop_path(ng.storage_paths(), '/dfs/nn') hdfs_dir_cmd = 'sudo mkdir -p %s %s;'\ 'sudo chown -R hdfs:hadoop %s %s;'\ 'sudo chmod 755 %s %s;'\ % (nn_path, dn_path, nn_path, dn_path, nn_path, dn_path) with remote.get_remote(instance) as r: r.execute_command( 'sudo chown -R $USER:$USER /etc/hadoop' ) r.execute_command( 'sudo chown -R $USER:$USER /opt/spark' ) r.write_files_to(files_hadoop) r.write_files_to(files_spark) r.write_files_to(files_init) r.execute_command( 'sudo chmod 0500 /tmp/sahara-hadoop-init.sh' ) r.execute_command( 'sudo /tmp/sahara-hadoop-init.sh ' '>> /tmp/sahara-hadoop-init.log 2>&1') r.execute_command(hdfs_dir_cmd) r.execute_command(key_cmd) if c_helper.is_data_locality_enabled(cluster): r.write_file_to( '/etc/hadoop/topology.sh', f.get_file_text( 'plugins/spark/resources/topology.sh')) r.execute_command( 'sudo chmod +x /etc/hadoop/topology.sh' ) self._write_topology_data(r, cluster, extra)
def _push_configs_to_new_node(self, cluster, extra, instance): ng_extra = extra[instance.node_group.id] files_hadoop = { '/etc/hadoop/conf/core-site.xml': ng_extra['xml']['core-site'], '/etc/hadoop/conf/hdfs-site.xml': ng_extra['xml']['hdfs-site'], } sp_home = self._spark_home(cluster) files_spark = { os.path.join(sp_home, 'conf/spark-env.sh'): ng_extra['sp_master'], os.path.join(sp_home, 'conf/slaves'): ng_extra['sp_slaves'] } files_init = { '/tmp/sahara-hadoop-init.sh': ng_extra['setup_script'], 'id_rsa': cluster.management_private_key, 'authorized_keys': cluster.management_public_key } # pietro: This is required because the (secret) key is not stored in # .ssh which hinders password-less ssh required by spark scripts key_cmd = ('sudo cp $HOME/id_rsa $HOME/.ssh/; ' 'sudo chown $USER $HOME/.ssh/id_rsa; ' 'sudo chmod 600 $HOME/.ssh/id_rsa') for ng in cluster.node_groups: dn_path = c_helper.extract_hadoop_path(ng.storage_paths(), '/dfs/dn') nn_path = c_helper.extract_hadoop_path(ng.storage_paths(), '/dfs/nn') hdfs_dir_cmd = (('sudo mkdir -p %s %s;' 'sudo chown -R hdfs:hadoop %s %s;' 'sudo chmod 755 %s %s;') % (nn_path, dn_path, nn_path, dn_path, nn_path, dn_path)) with remote.get_remote(instance) as r: r.execute_command( 'sudo chown -R $USER:$USER /etc/hadoop' ) r.execute_command( 'sudo chown -R $USER:$USER %s' % sp_home ) r.write_files_to(files_hadoop) r.write_files_to(files_spark) r.write_files_to(files_init) r.execute_command( 'sudo chmod 0500 /tmp/sahara-hadoop-init.sh' ) r.execute_command( 'sudo /tmp/sahara-hadoop-init.sh ' '>> /tmp/sahara-hadoop-init.log 2>&1') r.execute_command(hdfs_dir_cmd) r.execute_command(key_cmd) if c_helper.is_data_locality_enabled(cluster): r.write_file_to( '/etc/hadoop/topology.sh', f.get_file_text( 'plugins/spark/resources/topology.sh')) r.execute_command( 'sudo chmod +x /etc/hadoop/topology.sh' ) self._write_topology_data(r, cluster, extra) self._push_master_configs(r, cluster, extra, instance)