Python setup_spark_cluster Examples

Programming Language: Python

Namespace/Package Name: spark_ec2

Method/Function: setup_spark_cluster

Examples at hotexamples.com: 2

Python setup_spark_cluster - 2 examples found. These are the top rated real world Python examples of spark_ec2.setup_spark_cluster extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: ec2.py Project: rheiland/thunder

def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key):
    """Modified version of the setup_cluster function (borrowed from spark-ec.py)
    in order to manually set the folder with the deploy code"""
    master = master_nodes[0].public_dns_name
    if deploy_ssh_key:
        print "Generating cluster's SSH key on master..."
        key_setup = """
      [ -f ~/.ssh/id_rsa ] ||
        (ssh-keygen -q -t rsa -N '' -f ~/.ssh/id_rsa &&
         cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys)
        """
        ssh(master, opts, key_setup)
        dot_ssh_tar = ssh_read(master, opts, ["tar", "c", ".ssh"])
        print "Transferring cluster's SSH key to slaves..."
        for slave in slave_nodes:
            print slave.public_dns_name
            ssh_write(slave.public_dns_name, opts, ["tar", "x"], dot_ssh_tar)

    modules = ["spark", "shark", "ephemeral-hdfs", "persistent-hdfs", "mapreduce", "spark-standalone", "tachyon"]

    if opts.hadoop_major_version == "1":
        modules = filter(lambda x: x != "mapreduce", modules)

    if opts.ganglia:
        modules.append("ganglia")

    ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/mesos/spark-ec2.git -b v3")

    print "Deploying files to master..."
    deploy_folder = os.path.join(os.environ["SPARK_HOME"], "ec2", "deploy.generic")
    deploy_files(conn, deploy_folder, opts, master_nodes, slave_nodes, modules)

    print "Running setup on master..."
    setup_spark_cluster(master, opts)
    print "Done!"

Example #2

Show file

File: ec2.py Project: kmader/thunder

def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key):
    """
    Modified version of the setup_cluster function (borrowed from spark-ec.py)
    in order to manually set the folder with the deploy code
    """
    master = master_nodes[0].public_dns_name
    if deploy_ssh_key:
        print "Generating cluster's SSH key on master..."
        key_setup = """
      [ -f ~/.ssh/id_rsa ] ||
        (ssh-keygen -q -t rsa -N '' -f ~/.ssh/id_rsa &&
         cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys)
        """
        ssh(master, opts, key_setup)
        dot_ssh_tar = ssh_read(master, opts, ['tar', 'c', '.ssh'])
        print "Transferring cluster's SSH key to slaves..."
        for slave in slave_nodes:
            print slave.public_dns_name
            ssh_write(slave.public_dns_name, opts, ['tar', 'x'], dot_ssh_tar)

    modules = [
        'spark', 'shark', 'ephemeral-hdfs', 'persistent-hdfs', 'mapreduce',
        'spark-standalone', 'tachyon'
    ]

    if opts.hadoop_major_version == "1":
        modules = filter(lambda x: x != "mapreduce", modules)

    if opts.ganglia:
        modules.append('ganglia')

    if spark_home_loose_version >= LooseVersion("1.3.0"):
        MESOS_SPARK_EC2_BRANCH = "branch-1.3"
        ssh(
            master, opts,
            "rm -rf spark-ec2 && git clone https://github.com/mesos/spark-ec2.git "
            "-b {b}".format(b=MESOS_SPARK_EC2_BRANCH))
    else:
        ssh(
            master, opts,
            "rm -rf spark-ec2 && git clone https://github.com/mesos/spark-ec2.git "
            "-b v4")

    print "Deploying files to master..."
    deploy_folder = os.path.join(os.environ['SPARK_HOME'], "ec2",
                                 "deploy.generic")
    deploy_files(conn, deploy_folder, opts, master_nodes, slave_nodes, modules)

    print "Running setup on master..."
    setup_spark_cluster(master, opts)
    print "Done!"