Beispiel #1
0
def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key):
    """Modified version of the setup_cluster function (borrowed from spark-ec.py)
    in order to manually set the folder with the deploy code"""
    master = master_nodes[0].public_dns_name
    if deploy_ssh_key:
        print "Generating cluster's SSH key on master..."
        key_setup = """
      [ -f ~/.ssh/id_rsa ] ||
        (ssh-keygen -q -t rsa -N '' -f ~/.ssh/id_rsa &&
         cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys)
        """
        ssh(master, opts, key_setup)
        dot_ssh_tar = ssh_read(master, opts, ["tar", "c", ".ssh"])
        print "Transferring cluster's SSH key to slaves..."
        for slave in slave_nodes:
            print slave.public_dns_name
            ssh_write(slave.public_dns_name, opts, ["tar", "x"], dot_ssh_tar)

    modules = ["spark", "shark", "ephemeral-hdfs", "persistent-hdfs", "mapreduce", "spark-standalone", "tachyon"]

    if opts.hadoop_major_version == "1":
        modules = filter(lambda x: x != "mapreduce", modules)

    if opts.ganglia:
        modules.append("ganglia")

    ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/mesos/spark-ec2.git -b v3")

    print "Deploying files to master..."
    deploy_folder = os.path.join(os.environ["SPARK_HOME"], "ec2", "deploy.generic")
    deploy_files(conn, deploy_folder, opts, master_nodes, slave_nodes, modules)

    print "Running setup on master..."
    setup_spark_cluster(master, opts)
    print "Done!"
Beispiel #2
0
def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key):
    """
    Modified version of the setup_cluster function (borrowed from spark-ec.py)
    in order to manually set the folder with the deploy code
    """
    master = master_nodes[0].public_dns_name
    if deploy_ssh_key:
        print_status("Generating cluster's SSH key on master")
        key_setup = """
      [ -f ~/.ssh/id_rsa ] ||
        (ssh-keygen -q -t rsa -N '' -f ~/.ssh/id_rsa &&
         cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys)
        """
        ssh(master, opts, key_setup)
        print_success()
        with quiet():
            dot_ssh_tar = ssh_read(master, opts, ['tar', 'c', '.ssh'])
        print_status("Transferring cluster's SSH key to slaves")
        with quiet():
            for slave in slave_nodes:
                ssh_write(slave.public_dns_name, opts, ['tar', 'x'],
                          dot_ssh_tar)
        print_success()

    modules = [
        'spark', 'shark', 'ephemeral-hdfs', 'persistent-hdfs', 'mapreduce',
        'spark-standalone', 'tachyon'
    ]

    if opts.hadoop_major_version == "1":
        modules = filter(lambda x: x != "mapreduce", modules)

    if opts.ganglia:
        modules.append('ganglia')

    if spark_home_loose_version >= LooseVersion("1.3.0"):
        MESOS_SPARK_EC2_BRANCH = "branch-1.3"
        ssh(
            master, opts,
            "rm -rf spark-ec2 && git clone https://github.com/mesos/spark-ec2.git "
            "-b {b}".format(b=MESOS_SPARK_EC2_BRANCH))
    else:
        ssh(
            master, opts,
            "rm -rf spark-ec2 && git clone https://github.com/mesos/spark-ec2.git "
            "-b v4")

    print_status("Deploying files to master")
    deploy_folder = os.path.join(os.environ['SPARK_HOME'], "ec2",
                                 "deploy.generic")
    with quiet():
        deploy_files(conn, deploy_folder, opts, master_nodes, slave_nodes,
                     modules)
    print_success()

    print_status("Installing Spark (may take several minutes)")
    setup_spark_cluster(master, opts)
    print_success()
Beispiel #3
0
def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key):
    """
    Modified version of the setup_cluster function (borrowed from spark-ec.py)
    in order to manually set the folder with the deploy code
    """
    master = master_nodes[0].public_dns_name
    if deploy_ssh_key:
        print_status("Generating cluster's SSH key on master")
        key_setup = """
      [ -f ~/.ssh/id_rsa ] ||
        (ssh-keygen -q -t rsa -N '' -f ~/.ssh/id_rsa &&
         cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys)
        """
        ssh(master, opts, key_setup)
        print_success()
        with quiet():
            dot_ssh_tar = ssh_read(master, opts, ['tar', 'c', '.ssh'])
        print_status("Transferring cluster's SSH key to slaves")
        with quiet():
            for slave in slave_nodes:
                ssh_write(slave.public_dns_name, opts, ['tar', 'x'], dot_ssh_tar)
        print_success()

    modules = ['spark', 'shark', 'ephemeral-hdfs', 'persistent-hdfs',
               'mapreduce', 'spark-standalone', 'tachyon']

    if opts.hadoop_major_version == "1":
        modules = filter(lambda x: x != "mapreduce", modules)

    if opts.ganglia:
        modules.append('ganglia')

    if spark_home_loose_version >= LooseVersion("1.5.0"):
        ssh(host=master,
            opts=opts,
            command="rm -rf spark-ec2"
            + " && "
            + "git clone {r} -b {b} spark-ec2".format(r=opts.spark_ec2_git_repo,
                                                      b=opts.spark_ec2_git_branch)
            )
    elif spark_home_loose_version >= LooseVersion("1.3.0"):
        MESOS_SPARK_EC2_BRANCH = "branch-1.3"
        ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/mesos/spark-ec2.git "
                          "-b {b}".format(b=MESOS_SPARK_EC2_BRANCH))
    else:
        ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/mesos/spark-ec2.git "
                          "-b v4")

    print_status("Deploying files to master")
    deploy_folder = os.path.join(os.environ['SPARK_HOME'], "ec2", "deploy.generic")
    with quiet():
        deploy_files(conn, deploy_folder, opts, master_nodes, slave_nodes, modules)
    print_success()

    print_status("Installing Spark (may take several minutes)")
    setup_spark_cluster(master, opts)
    print_success()
Beispiel #4
0
def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key):
    """Modified version of the setup_cluster function (borrowed from spark-ec.py)
    in order to manually set the folder with the deploy code
    """
    master = master_nodes[0].public_dns_name
    if deploy_ssh_key:
        print "Generating cluster's SSH key on master..."
        key_setup = """
      [ -f ~/.ssh/id_rsa ] ||
        (ssh-keygen -q -t rsa -N '' -f ~/.ssh/id_rsa &&
         cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys)
        """
        ssh(master, opts, key_setup)
        dot_ssh_tar = ssh_read(master, opts, ['tar', 'c', '.ssh'])
        print "Transferring cluster's SSH key to slaves..."
        for slave in slave_nodes:
            print slave.public_dns_name
            ssh_write(slave.public_dns_name, opts, ['tar', 'x'], dot_ssh_tar)

    modules = [
        'spark', 'shark', 'ephemeral-hdfs', 'persistent-hdfs', 'mapreduce',
        'spark-standalone', 'tachyon'
    ]

    if opts.hadoop_major_version == "1":
        modules = filter(lambda x: x != "mapreduce", modules)

    if opts.ganglia:
        modules.append('ganglia')

    ssh(
        master, opts,
        "rm -rf spark-ec2 && git clone https://github.com/mesos/spark-ec2.git -b v4"
    )

    print "Deploying files to master..."
    deploy_folder = os.path.join(os.environ['SPARK_HOME'], "ec2",
                                 "deploy.generic")
    deploy_files(conn, deploy_folder, opts, master_nodes, slave_nodes, modules)

    print "Running setup on master..."
    setup_spark_cluster(master, opts)
    print "Done!"