def start_spark_cluster(master, slaves, opts): ins = ecs.get_instance_info(master) master_name = ins['HostName'] start_master = "%s/sbin/start-master.sh " % GlobalVar.SPARK_INSTALL_DIR utils.do_ssh(master, opts, str(start_master)) for slave in slaves: instance_info = ecs.get_instance_info(slave) worker_name = instance_info['HostName'] start_slave = "%s/sbin/start-slave.sh %s spark://%s:7077" \ % (GlobalVar.SPARK_INSTALL_DIR, worker_name, master_name) utils.do_ssh(slave, opts, str(start_slave)) print "==> Started spark cluster successfully!"
def start_spark_cluster(master, slaves, opts): ins = ecs.get_instance_info(master) master_name = ins["HostName"] start_master = "%s/sbin/start-master.sh " % GlobalVar.SPARK_INSTALL_DIR utils.do_ssh(master, opts, str(start_master)) for slave in slaves: instance_info = ecs.get_instance_info(slave) worker_name = instance_info["HostName"] start_slave = "%s/sbin/start-slave.sh %s spark://%s:7077" % ( GlobalVar.SPARK_INSTALL_DIR, worker_name, master_name, ) utils.do_ssh(slave, opts, str(start_slave)) print "==> Started spark cluster successfully!"
def start_in_client_mode(cluster_name, opts): # check cluster status trickly if utils.check_cluster_status(cluster_name, ['Running']): print "Cluster %s is `Running`, please `Stop` it first." % cluster_name sys.exit(1) do_validity_check(opts) (masters, slaves) = utils.get_masters_and_slaves(opts.mode) if len(masters + slaves) <= 0: print "There is no master or slave, check it first please." sys.exit(1) print "==> Restarting spark cluster..." if opts.pwd == "": opts.pwd = getpass.getpass("You need to provide the password for ECS instance:") spark.start_spark_cluster(masters[0], slaves, opts) if opts.enable_spark_notebook: spark_notebook.start_spark_notebook(masters, opts) if opts.enable_hue: hue.start_hue(masters, opts) if opts.enable_hdfs: hdfs.setup_hdfs(masters, slaves, opts) if opts.enable_slave_public_ip: utils.save_public_ips(masters, slaves) master_ip = ecs.get_instance_info(masters[0])['PublicIpAddress']['IpAddress'][0] utils.open_nginx(opts, masters) utils.end_of_startup(opts, master_ip, masters) # update cluster status os.system("echo Running > %s%s" % (GlobalVar.CLUSTER_STATUS, cluster_name))
def launch_in_client_mode(cluster_name, opts): # check cluster status trickly if utils.check_cluster_status(cluster_name, ['Running', 'Stopped']): print "Cluster %s has been launched, please `Destroy` it first." % cluster_name sys.exit(1) do_validity_check(opts) (masters, slaves) = utils.get_masters_and_slaves(opts.mode) if len(masters) <= 0: print >> stderr, "ERROR: You have to start as least 1 master" sys.exit(1) if len(slaves) <= 0: print >> stderr, "ERROR: You have to start as least 1 slave" sys.exit(1) # Now we only support single-node master. spark.setup_cluster(masters, slaves, opts, True) if opts.enable_spark_notebook: spark_notebook.start_spark_notebook(masters, opts) if opts.enable_hue: hue.start_hue(masters, opts) if opts.enable_hdfs: hdfs.setup_hdfs(masters, slaves, opts) if opts.enable_slave_public_ip: utils.save_public_ips(masters, slaves) master_ip = ecs.get_instance_info( masters[0])['PublicIpAddress']['IpAddress'][0] utils.open_nginx(opts, masters) utils.end_of_startup(opts, master_ip, masters) # update cluster status os.system("echo Running > %s%s" % (GlobalVar.CLUSTER_STATUS, cluster_name))
def launch_in_client_mode(cluster_name, opts): # check cluster status trickly if utils.check_cluster_status(cluster_name, ['Running', 'Stopped']): print "Cluster %s has been launched, please `Destroy` it first." % cluster_name sys.exit(1) do_validity_check(opts) (masters, slaves) = utils.get_masters_and_slaves(opts.mode) if len(masters) <= 0: print >> stderr, "ERROR: You have to start as least 1 master" sys.exit(1) if len(slaves) <= 0: print >> stderr, "ERROR: You have to start as least 1 slave" sys.exit(1) # Now we only support single-node master. spark.setup_cluster(masters, slaves, opts, True) if opts.enable_spark_notebook: spark_notebook.start_spark_notebook(masters, opts) if opts.enable_hue: hue.start_hue(masters, opts) if opts.enable_hdfs: hdfs.setup_hdfs(masters, slaves, opts) if opts.enable_slave_public_ip: utils.save_public_ips(masters, slaves) master_ip = ecs.get_instance_info(masters[0])['PublicIpAddress']['IpAddress'][0] utils.open_nginx(opts, masters) utils.end_of_startup(opts, master_ip, masters) # update cluster status os.system("echo Running > %s%s" % (GlobalVar.CLUSTER_STATUS, cluster_name))
def start_in_client_mode(cluster_name, opts): # check cluster status trickly if utils.check_cluster_status(cluster_name, ['Running']): print "Cluster %s is `Running`, please `Stop` it first." % cluster_name sys.exit(1) do_validity_check(opts) (masters, slaves) = utils.get_masters_and_slaves(opts.mode) if len(masters + slaves) <= 0: print "There is no master or slave, check it first please." sys.exit(1) print "==> Restarting spark cluster..." if opts.pwd == "": opts.pwd = getpass.getpass( "You need to provide the password for ECS instance:") spark.start_spark_cluster(masters[0], slaves, opts) if opts.enable_spark_notebook: spark_notebook.start_spark_notebook(masters, opts) if opts.enable_hue: hue.start_hue(masters, opts) if opts.enable_hdfs: hdfs.setup_hdfs(masters, slaves, opts) if opts.enable_slave_public_ip: utils.save_public_ips(masters, slaves) master_ip = ecs.get_instance_info( masters[0])['PublicIpAddress']['IpAddress'][0] utils.open_nginx(opts, masters) utils.end_of_startup(opts, master_ip, masters) # update cluster status os.system("echo Running > %s%s" % (GlobalVar.CLUSTER_STATUS, cluster_name))
def stop_spark_notebook(masters, opts): print "==> Stopping Spark Notebook..." master = masters[0] ins = ecs.get_instance_info(master) ip = ins['InnerIpAddress']['IpAddress'][0] stop_notebook = ' \" cd %s; cat RUNNING_PID | xargs -r kill -9; rm -f RUNNING_PID \" ' \ % GlobalVar.SPARK_NOTEBOOK_INSTALL_DIR os.system("sshpass -p %s ssh %s %s@%s %s" % (opts.pwd, " ".join(utils.ssh_args()), opts.user, ip, stop_notebook)) print "==> Stopped Spark Notebook service successfully..."
def start_spark_notebook(masters, opts): print "==> Starting Spark Notebook service..." master = masters[0] ins = ecs.get_instance_info(master) ip = ins['InnerIpAddress']['IpAddress'][0] launch_notebook = ' \" cd %s; nohup ./bin/spark-notebook -Dhttp.port=9090 > /dev/null 2>&1 & \" ' \ % GlobalVar.SPARK_NOTEBOOK_INSTALL_DIR os.system("sshpass -p %s ssh %s %s@%s %s" % (opts.pwd, " ".join(utils.ssh_args()), opts.user, ip, launch_notebook)) print "==> Started Spark Notebook service successfully..."
def start_spark_notebook(masters, opts): print "==> Starting Spark Notebook service..." master = masters[0] ins = ecs.get_instance_info(master) ip = ins['InnerIpAddress']['IpAddress'][0] launch_notebook = ' \" cd %s; nohup ./bin/spark-notebook -Dhttp.port=9090 > /dev/null 2>&1 & \" ' \ % GlobalVar.SPARK_NOTEBOOK_INSTALL_DIR os.system( "sshpass -p %s ssh %s %s@%s %s" % (opts.pwd, " ".join(utils.ssh_args()), opts.user, ip, launch_notebook)) print "==> Started Spark Notebook service successfully..."
def stop_spark_notebook(masters, opts): print "==> Stopping Spark Notebook..." master = masters[0] ins = ecs.get_instance_info(master) ip = ins['InnerIpAddress']['IpAddress'][0] stop_notebook = ' \" cd %s; cat RUNNING_PID | xargs -r kill -9; rm -f RUNNING_PID \" ' \ % GlobalVar.SPARK_NOTEBOOK_INSTALL_DIR os.system( "sshpass -p %s ssh %s %s@%s %s" % (opts.pwd, " ".join(utils.ssh_args()), opts.user, ip, stop_notebook)) print "==> Stopped Spark Notebook service successfully..."
def stop_hue(masters, opts): print "==> Stopping HUE service..." master = masters[0] ins = ecs.get_instance_info(master) ip = ins["InnerIpAddress"]["IpAddress"][0] stop_hue_step1 = ' " pgrep supervisor | xargs -r kill -9 " ' stop_hue_step2 = " \" ps -ef | grep livy.server.Main | grep -v grep | awk '{print \$2}' | xargs -r kill -9 \" " stop_hue_step3 = ' " pgrep hue | xargs -r kill -9 " ' os.system("sshpass -p %s ssh %s %s@%s %s" % (opts.pwd, " ".join(utils.ssh_args()), opts.user, ip, stop_hue_step1)) os.system("sshpass -p %s ssh %s %s@%s %s" % (opts.pwd, " ".join(utils.ssh_args()), opts.user, ip, stop_hue_step2)) os.system("sshpass -p %s ssh %s %s@%s %s" % (opts.pwd, " ".join(utils.ssh_args()), opts.user, ip, stop_hue_step3)) print "==> Stopped HUE service successfully"
def stop_spark_cluster(masters, slaves, opts): master = masters[0] stop_master = "%s/sbin/stop-master.sh " % GlobalVar.SPARK_INSTALL_DIR print "==> Stopping Spark Master..." utils.do_ssh(master, opts, str(stop_master)) print "==> Stopping Spark Slaves..." for slave in slaves: instance_info = ecs.get_instance_info(slave) worker_name = instance_info['HostName'] stop_slave = "%s/sbin/spark-daemon.sh stop org.apache.spark.deploy.worker.Worker %s" \ % (GlobalVar.SPARK_INSTALL_DIR, worker_name) utils.do_ssh(slave, opts, str(stop_slave))
def stop_spark_cluster(masters, slaves, opts): master = masters[0] stop_master = "%s/sbin/stop-master.sh " % GlobalVar.SPARK_INSTALL_DIR print "==> Stopping Spark Master..." utils.do_ssh(master, opts, str(stop_master)) print "==> Stopping Spark Slaves..." for slave in slaves: instance_info = ecs.get_instance_info(slave) worker_name = instance_info["HostName"] stop_slave = "%s/sbin/spark-daemon.sh stop org.apache.spark.deploy.worker.Worker %s" % ( GlobalVar.SPARK_INSTALL_DIR, worker_name, ) utils.do_ssh(slave, opts, str(stop_slave))
def setup_hdfs(masters, slaves, opts): print "==> Updating Hadoop configuration for each ECS instance..." # copy default hadoop config os.system(" /bin/cp -r %s/hadoop/etc/hadoop/* %s/etc/hadoop/" % (GlobalVar.DEFAULT_CONF_DIR, GlobalVar.HADOOP_INSTALL_DIR)) master_intranet_ip = ecs.get_instance_info(masters[0])['InnerIpAddress']['IpAddress'][0] namenode = "hdfs://%s:9000" % master_intranet_ip utils.update_hadoop_configuration(namenode) utils.do_scp(masters[0], opts, GlobalVar.HADOOP_CONF_DIR, "%s/etc/" % GlobalVar.HADOOP_INSTALL_DIR) for slave in slaves: utils.do_scp(slave, opts, GlobalVar.HADOOP_CONF_DIR, "%s/etc/" % GlobalVar.HADOOP_INSTALL_DIR) print "==> Starting HDFS service..." start_hdfs(masters[0], slaves, opts) print "==> Started HDFS service successfully"
def stop_hue(masters, opts): print "==> Stopping HUE service..." master = masters[0] ins = ecs.get_instance_info(master) ip = ins['InnerIpAddress']['IpAddress'][0] stop_hue_step1 = ' \" pgrep supervisor | xargs -r kill -9 \" ' stop_hue_step2 = ' \" ps -ef | grep livy.server.Main | grep -v grep | awk \'{print \$2}\' | xargs -r kill -9 \" ' stop_hue_step3 = ' \" pgrep hue | xargs -r kill -9 \" ' os.system( "sshpass -p %s ssh %s %s@%s %s" % (opts.pwd, " ".join(utils.ssh_args()), opts.user, ip, stop_hue_step1)) os.system( "sshpass -p %s ssh %s %s@%s %s" % (opts.pwd, " ".join(utils.ssh_args()), opts.user, ip, stop_hue_step2)) os.system( "sshpass -p %s ssh %s %s@%s %s" % (opts.pwd, " ".join(utils.ssh_args()), opts.user, ip, stop_hue_step3)) print "==> Stopped HUE service successfully"
def start_hue(masters, opts): print "==> Starting HUE service..." master = masters[0] ins = ecs.get_instance_info(master) ip = ins['InnerIpAddress']['IpAddress'][0] copy_command = ' \"/bin/cp -r %s/hue/desktop/conf/hue.ini %s/desktop/conf/ \"' \ % (GlobalVar.DEFAULT_CONF_DIR, GlobalVar.HUE_INSTALL_DIR) launch_hue_step1 = ' \"source /root/.bash_profile; cd %s/build/env/bin/; nohup ./hue livy_server > /dev/null 2>&1 & \" ' \ % GlobalVar.HUE_INSTALL_DIR launch_hue_step2 = ' \"source /root/.bash_profile; cd %s/build/env/bin/; nohup ./supervisor > /dev/null 2>&1 & \" ' \ % GlobalVar.HUE_INSTALL_DIR os.system( "sshpass -p %s ssh %s %s@%s %s" % (opts.pwd, " ".join(utils.ssh_args()), opts.user, ip, copy_command)) os.system("sshpass -p %s ssh %s %s@%s %s" % (opts.pwd, " ".join( utils.ssh_args()), opts.user, ip, launch_hue_step1)) os.system("sshpass -p %s ssh %s %s@%s %s" % (opts.pwd, " ".join( utils.ssh_args()), opts.user, ip, launch_hue_step2)) print "==> Started HUE service successfully"
def start_hue(masters, opts): print "==> Starting HUE service..." master = masters[0] ins = ecs.get_instance_info(master) ip = ins["InnerIpAddress"]["IpAddress"][0] copy_command = ' "/bin/cp -r %s/hue/desktop/conf/hue.ini %s/desktop/conf/ "' % ( GlobalVar.DEFAULT_CONF_DIR, GlobalVar.HUE_INSTALL_DIR, ) launch_hue_step1 = ( ' "source /root/.bash_profile; cd %s/build/env/bin/; nohup ./hue livy_server > /dev/null 2>&1 & " ' % GlobalVar.HUE_INSTALL_DIR ) launch_hue_step2 = ( ' "source /root/.bash_profile; cd %s/build/env/bin/; nohup ./supervisor > /dev/null 2>&1 & " ' % GlobalVar.HUE_INSTALL_DIR ) os.system("sshpass -p %s ssh %s %s@%s %s" % (opts.pwd, " ".join(utils.ssh_args()), opts.user, ip, copy_command)) os.system("sshpass -p %s ssh %s %s@%s %s" % (opts.pwd, " ".join(utils.ssh_args()), opts.user, ip, launch_hue_step1)) os.system("sshpass -p %s ssh %s %s@%s %s" % (opts.pwd, " ".join(utils.ssh_args()), opts.user, ip, launch_hue_step2)) print "==> Started HUE service successfully"
def destroy_in_client_mode(cluster_name, opts): do_validity_check(opts) (masters, slaves) = utils.get_masters_and_slaves(opts.mode) if len(masters + slaves) <= 0: print "There is no master or slave, check it first please." sys.exit(1) print "Are you sure you want to destroy the cluster %s?" % cluster_name print "The following instances will be terminated:" instances = masters + slaves gateway = ecs.get_gateway_instance_info(opts)['InstanceId'] if gateway in instances: instances.remove(gateway) to_release = [] for ins in instances: try: instance_info = ecs.get_instance_info(ins) to_release.append(ins) print "> %s" % (instance_info['HostName']) except Exception, e: if 'InvalidInstanceId.NotFound' in e.args: print "> %s, invalid `InstanceId` not found, skip it." % ins else: raise e