def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key): """Modified version of the setup_cluster function (borrowed from spark-ec.py) in order to manually set the folder with the deploy code""" master = master_nodes[0].public_dns_name if deploy_ssh_key: print "Generating cluster's SSH key on master..." key_setup = """ [ -f ~/.ssh/id_rsa ] || (ssh-keygen -q -t rsa -N '' -f ~/.ssh/id_rsa && cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys) """ ssh(master, opts, key_setup) dot_ssh_tar = ssh_read(master, opts, ["tar", "c", ".ssh"]) print "Transferring cluster's SSH key to slaves..." for slave in slave_nodes: print slave.public_dns_name ssh_write(slave.public_dns_name, opts, ["tar", "x"], dot_ssh_tar) modules = ["spark", "shark", "ephemeral-hdfs", "persistent-hdfs", "mapreduce", "spark-standalone", "tachyon"] if opts.hadoop_major_version == "1": modules = filter(lambda x: x != "mapreduce", modules) if opts.ganglia: modules.append("ganglia") ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/mesos/spark-ec2.git -b v3") print "Deploying files to master..." deploy_folder = os.path.join(os.environ["SPARK_HOME"], "ec2", "deploy.generic") deploy_files(conn, deploy_folder, opts, master_nodes, slave_nodes, modules) print "Running setup on master..." setup_spark_cluster(master, opts) print "Done!"
def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key): """ Modified version of the setup_cluster function (borrowed from spark-ec.py) in order to manually set the folder with the deploy code """ master = master_nodes[0].public_dns_name if deploy_ssh_key: print "Generating cluster's SSH key on master..." key_setup = """ [ -f ~/.ssh/id_rsa ] || (ssh-keygen -q -t rsa -N '' -f ~/.ssh/id_rsa && cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys) """ ssh(master, opts, key_setup) dot_ssh_tar = ssh_read(master, opts, ['tar', 'c', '.ssh']) print "Transferring cluster's SSH key to slaves..." for slave in slave_nodes: print slave.public_dns_name ssh_write(slave.public_dns_name, opts, ['tar', 'x'], dot_ssh_tar) modules = [ 'spark', 'shark', 'ephemeral-hdfs', 'persistent-hdfs', 'mapreduce', 'spark-standalone', 'tachyon' ] if opts.hadoop_major_version == "1": modules = filter(lambda x: x != "mapreduce", modules) if opts.ganglia: modules.append('ganglia') if spark_home_loose_version >= LooseVersion("1.3.0"): MESOS_SPARK_EC2_BRANCH = "branch-1.3" ssh( master, opts, "rm -rf spark-ec2 && git clone https://github.com/mesos/spark-ec2.git " "-b {b}".format(b=MESOS_SPARK_EC2_BRANCH)) else: ssh( master, opts, "rm -rf spark-ec2 && git clone https://github.com/mesos/spark-ec2.git " "-b v4") print "Deploying files to master..." deploy_folder = os.path.join(os.environ['SPARK_HOME'], "ec2", "deploy.generic") deploy_files(conn, deploy_folder, opts, master_nodes, slave_nodes, modules) print "Running setup on master..." setup_spark_cluster(master, opts) print "Done!"