def setup_cluster(masters, slaves, opts, deploy_ssh_key): master = masters[0] if deploy_ssh_key: print "==> Generating cluster's SSH key on master..." key_setup = """ [ -f ~/.ssh/id_rsa ] || (ssh-keygen -q -t rsa -N '' -f ~/.ssh/id_rsa && cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys) """ utils.do_ssh(master, opts, key_setup) dot_ssh_tar = utils.ssh_read(master, opts, ["tar", "c", ".ssh"]) print "==> Transferring cluster's SSH key to slaves..." for slave in slaves: utils.ssh_write(slave, opts, ["tar", "x"], dot_ssh_tar) print "==> Updating /etc/hosts for each ECS instance..." utils.prepare_hosts(master, slaves, opts) print "==> Updating Spark default configuration..." # copy default hadoop config os.system(" /bin/cp -r %s/spark/conf/* %s" % (GlobalVar.DEFAULT_CONF_DIR, GlobalVar.SPARK_CONF_DIR)) utils.do_scp(masters[0], opts, GlobalVar.SPARK_CONF_DIR, GlobalVar.SPARK_INSTALL_DIR) for slave in slaves: utils.do_scp(slave, opts, GlobalVar.SPARK_CONF_DIR, GlobalVar.SPARK_INSTALL_DIR) print "==> Starting spark cluster..." start_spark_cluster(master, slaves, opts)
def setup_cluster(masters, slaves, opts, deploy_ssh_key): master = masters[0] if deploy_ssh_key: print "==> Generating cluster's SSH key on master..." key_setup = """ [ -f ~/.ssh/id_rsa ] || (ssh-keygen -q -t rsa -N '' -f ~/.ssh/id_rsa && cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys) """ utils.do_ssh(master, opts, key_setup) dot_ssh_tar = utils.ssh_read(master, opts, ['tar', 'c', '.ssh']) print "==> Transferring cluster's SSH key to slaves..." for slave in slaves: utils.ssh_write(slave, opts, ['tar', 'x'], dot_ssh_tar) print "==> Updating /etc/hosts for each ECS instance..." utils.prepare_hosts(master, slaves, opts) print "==> Updating Spark default configuration..." # copy default hadoop config os.system(" /bin/cp -r %s/spark/conf/* %s" % (GlobalVar.DEFAULT_CONF_DIR, GlobalVar.SPARK_CONF_DIR)) utils.do_scp(masters[0], opts, GlobalVar.SPARK_CONF_DIR, GlobalVar.SPARK_INSTALL_DIR) for slave in slaves: utils.do_scp(slave, opts, GlobalVar.SPARK_CONF_DIR, GlobalVar.SPARK_INSTALL_DIR) print "==> Starting spark cluster..." start_spark_cluster(master, slaves, opts)
def start_spark_cluster(master, slaves, opts): ins = ecs.get_instance_info(master) master_name = ins['HostName'] start_master = "%s/sbin/start-master.sh " % GlobalVar.SPARK_INSTALL_DIR utils.do_ssh(master, opts, str(start_master)) for slave in slaves: instance_info = ecs.get_instance_info(slave) worker_name = instance_info['HostName'] start_slave = "%s/sbin/start-slave.sh %s spark://%s:7077" \ % (GlobalVar.SPARK_INSTALL_DIR, worker_name, master_name) utils.do_ssh(slave, opts, str(start_slave)) print "==> Started spark cluster successfully!"
def stop_hdfs(masters, slaves, opts): print "==> Stopping namenode..." master = masters[0] stop_namenode = "%s/sbin/hadoop-daemon.sh --config %s --script hdfs stop namenode" \ % (GlobalVar.HADOOP_INSTALL_DIR, GlobalVar.HADOOP_CONF_DIR) utils.do_ssh(master, opts, stop_namenode) print "==> Stopping datanodes..." for slave in slaves: stop_datanode = "%s/sbin/hadoop-daemon.sh --config %s --script hdfs stop datanode" \ % (GlobalVar.HADOOP_INSTALL_DIR, GlobalVar.HADOOP_CONF_DIR) utils.do_ssh(slave, opts, stop_datanode) print "==> Stopped HDFS service successfully"
def stop_spark_cluster(masters, slaves, opts): master = masters[0] stop_master = "%s/sbin/stop-master.sh " % GlobalVar.SPARK_INSTALL_DIR print "==> Stopping Spark Master..." utils.do_ssh(master, opts, str(stop_master)) print "==> Stopping Spark Slaves..." for slave in slaves: instance_info = ecs.get_instance_info(slave) worker_name = instance_info['HostName'] stop_slave = "%s/sbin/spark-daemon.sh stop org.apache.spark.deploy.worker.Worker %s" \ % (GlobalVar.SPARK_INSTALL_DIR, worker_name) utils.do_ssh(slave, opts, str(stop_slave))
def stop_spark_cluster(masters, slaves, opts): master = masters[0] stop_master = "%s/sbin/stop-master.sh " % GlobalVar.SPARK_INSTALL_DIR print "==> Stopping Spark Master..." utils.do_ssh(master, opts, str(stop_master)) print "==> Stopping Spark Slaves..." for slave in slaves: instance_info = ecs.get_instance_info(slave) worker_name = instance_info["HostName"] stop_slave = "%s/sbin/spark-daemon.sh stop org.apache.spark.deploy.worker.Worker %s" % ( GlobalVar.SPARK_INSTALL_DIR, worker_name, ) utils.do_ssh(slave, opts, str(stop_slave))
def start_spark_cluster(master, slaves, opts): ins = ecs.get_instance_info(master) master_name = ins["HostName"] start_master = "%s/sbin/start-master.sh " % GlobalVar.SPARK_INSTALL_DIR utils.do_ssh(master, opts, str(start_master)) for slave in slaves: instance_info = ecs.get_instance_info(slave) worker_name = instance_info["HostName"] start_slave = "%s/sbin/start-slave.sh %s spark://%s:7077" % ( GlobalVar.SPARK_INSTALL_DIR, worker_name, master_name, ) utils.do_ssh(slave, opts, str(start_slave)) print "==> Started spark cluster successfully!"
def start_hdfs(master, slaves, opts): utils.warning() msg = "If this is the first time, you need to format HDFS, otherwise you should not format it! \n" \ "Format HDFS (Y/n): " confirm = raw_input(msg) if confirm == 'Y': msg = "Confirm to format HDFS? (Y/n): " confirm_again = raw_input(msg) if confirm_again == "Y": print "==> Formatting HDFS..." format_hdfs = "%s/bin/hdfs namenode -format -force 2> /dev/null" % GlobalVar.HADOOP_INSTALL_DIR utils.do_ssh(master, opts, str(format_hdfs)) else: print "==> Not `Y`, skipping formatting HDFS..." else: print "==> Not `Y`, skipping formatting HDFS..." print "==> Starting namenode..." start_namenode = "%s/sbin/hadoop-daemon.sh --config %s --script hdfs start namenode" \ % (GlobalVar.HADOOP_INSTALL_DIR, GlobalVar.HADOOP_CONF_DIR) utils.do_ssh(master, opts, start_namenode) print "==> Starting datanode..." for slave in slaves: start_datanode = "%s/sbin/hadoop-daemon.sh --config %s --script hdfs start datanode" \ % (GlobalVar.HADOOP_INSTALL_DIR, GlobalVar.HADOOP_CONF_DIR) utils.do_ssh(slave, opts, start_datanode)