Ejemplo n.º 1
0
def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key):
    """Modified version of the setup_cluster function (borrowed from spark-ec.py)
    in order to manually set the folder with the deploy code"""
    master = master_nodes[0].public_dns_name
    if deploy_ssh_key:
        print "Generating cluster's SSH key on master..."
        key_setup = """
      [ -f ~/.ssh/id_rsa ] ||
        (ssh-keygen -q -t rsa -N '' -f ~/.ssh/id_rsa &&
         cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys)
        """
        ssh(master, opts, key_setup)
        dot_ssh_tar = ssh_read(master, opts, ["tar", "c", ".ssh"])
        print "Transferring cluster's SSH key to slaves..."
        for slave in slave_nodes:
            print slave.public_dns_name
            ssh_write(slave.public_dns_name, opts, ["tar", "x"], dot_ssh_tar)

    modules = ["spark", "shark", "ephemeral-hdfs", "persistent-hdfs", "mapreduce", "spark-standalone", "tachyon"]

    if opts.hadoop_major_version == "1":
        modules = filter(lambda x: x != "mapreduce", modules)

    if opts.ganglia:
        modules.append("ganglia")

    ssh(master, opts, "rm -rf spark-ec2 && git clone https://github.com/mesos/spark-ec2.git -b v3")

    print "Deploying files to master..."
    deploy_folder = os.path.join(os.environ["SPARK_HOME"], "ec2", "deploy.generic")
    deploy_files(conn, deploy_folder, opts, master_nodes, slave_nodes, modules)

    print "Running setup on master..."
    setup_spark_cluster(master, opts)
    print "Done!"
Ejemplo n.º 2
0
def load_data(master, opts):
    """ Load an example data set into a Spark EC2 cluster"""
    print "Transferring example data to the cluster..."
    ssh(master, opts, "/root/ephemeral-hdfs/bin/start-all.sh")
    time.sleep(10)
    (s3_access_key, s3_secret_key) = get_s3_keys()
    ssh(master, opts, "/root/ephemeral-hdfs/bin/hadoop distcp "
                              "s3n://" + s3_access_key + ":" + s3_secret_key +
                              "@thunder.datasets/test/iris.txt hdfs:///data")
    print "Done!"
Ejemplo n.º 3
0
def load_data(master, opts):
    """ Load an example data set into a Spark EC2 cluster"""
    print "Transferring example data to the cluster..."
    ssh(master, opts, "/root/ephemeral-hdfs/bin/start-all.sh")
    time.sleep(10)
    (s3_access_key, s3_secret_key) = get_s3_keys()
    ssh(
        master, opts, "/root/ephemeral-hdfs/bin/hadoop distcp "
        "s3n://" + s3_access_key + ":" + s3_secret_key +
        "@thunder.datasets/test/iris.txt hdfs:///data")
    print "Done!"
Ejemplo n.º 4
0
def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key):
    """Modified version of the setup_cluster function (borrowed from spark-ec.py)
    in order to manually set the folder with the deploy code"""
    master = master_nodes[0].public_dns_name
    if deploy_ssh_key:
        print "Generating cluster's SSH key on master..."
        key_setup = """
      [ -f ~/.ssh/id_rsa ] ||
        (ssh-keygen -q -t rsa -N '' -f ~/.ssh/id_rsa &&
         cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys)
        """
        ssh(master, opts, key_setup)
        dot_ssh_tar = ssh_read(master, opts, ['tar', 'c', '.ssh'])
        print "Transferring cluster's SSH key to slaves..."
        for slave in slave_nodes:
            print slave.public_dns_name
            ssh_write(slave.public_dns_name, opts, ['tar', 'x'], dot_ssh_tar)

    modules = [
        'spark', 'shark', 'ephemeral-hdfs', 'persistent-hdfs', 'mapreduce',
        'spark-standalone', 'tachyon'
    ]

    if opts.hadoop_major_version == "1":
        modules = filter(lambda x: x != "mapreduce", modules)

    if opts.ganglia:
        modules.append('ganglia')

    ssh(
        master, opts,
        "rm -rf spark-ec2 && git clone https://github.com/mesos/spark-ec2.git -b v2"
    )

    print "Deploying files to master..."
    deploy_folder = os.path.join(os.environ['SPARK_HOME'], "ec2",
                                 "deploy.generic")
    deploy_files(conn, deploy_folder, opts, master_nodes, slave_nodes, modules)

    print "Running setup on master..."
    setup_spark_cluster(master, opts)
    print "Done!"
Ejemplo n.º 5
0
def install_thunder(master, opts):
    """ Install Thunder and dependencies on a Spark EC2 cluster"""
    print "Installing Thunder on the cluster..."
    ssh(master, opts, "git clone https://github.com/freeman-lab/thunder.git")
    ssh(master, opts, "chmod u+x thunder/helper/ec2/setup.sh")
    ssh(master, opts, "thunder/helper/ec2/setup.sh")
    print "Done!"
Ejemplo n.º 6
0
def install_thunder(master, opts):
    """ Install Thunder and dependencies on a Spark EC2 cluster"""
    print "Installing Thunder on the cluster..."
    ssh(master, opts, "git clone https://github.com/freeman-lab/thunder.git")
    ssh(master, opts, "chmod u+x thunder/helper/ec2/setup.sh")
    ssh(master, opts, "thunder/helper/ec2/setup.sh")
    print "Done!"
Ejemplo n.º 7
0
def load_data(master, opts):
    """ 
    Load an example data set into a Spark EC2 cluster
    TODO: replace with URL once we've hosted public data
    """
    print "Transferring example data to the cluster..."
    ssh(master, opts, "/root/ephemeral-hdfs/bin/stop-all.sh")
    ssh(master, opts, "/root/ephemeral-hdfs/bin/start-all.sh")
    time.sleep(10)
    ssh(master, opts, "/root/ephemeral-hdfs/bin/hadoop distcp s3n://thunder.datasets/test/iris.txt hdfs:///data")
    print "\n\n"
    print "-------------------------------"
    print "Example data successfully loaded!"
    print "-------------------------------"
    print "\n"
Ejemplo n.º 8
0
def load_data(master, opts):
    """ 
    Load an example data set into a Spark EC2 cluster
    TODO: replace with URL once we've hosted public data
    """
    print "Transferring example data to the cluster..."
    ssh(master, opts, "/root/ephemeral-hdfs/bin/stop-all.sh")
    ssh(master, opts, "/root/ephemeral-hdfs/bin/start-all.sh")
    time.sleep(10)
    ssh(master, opts, "/root/ephemeral-hdfs/bin/hadoop distcp s3n://thunder.datasets/test/iris.txt hdfs:///data")
    print "\n\n"
    print "-------------------------------"
    print "Example data successfully loaded!"
    print "-------------------------------"
    print "\n"
Ejemplo n.º 9
0
def install_thunder(master, opts):
    """ Install Thunder and dependencies on a Spark EC2 cluster"""
    print "Installing Thunder on the cluster..."
    ssh(master, opts, "rm -rf thunder && git clone https://github.com/freeman-lab/thunder.git")
    ssh(master, opts, "chmod u+x thunder/python/bin/build")
    ssh(master, opts, "thunder/python/bin/build")
    ssh(master, opts, "source ~/.bash_profile && pip install mpld3")
    ssh(master, opts, "echo 'export SPARK_HOME=/root/spark' >> /root/.bash_profile")
    ssh(master, opts, "echo 'export PYTHONPATH=/root/thunder/python' >> /root/.bash_profile")
    ssh(master, opts, "echo 'export IPYTHON=1' >> /root/.bash_profile")
    ssh(master, opts, "echo 'export PATH=/root/thunder/python/bin:$PATH' >> /root/.bash_profile")
    print "\n\n"
    print "-------------------------------"
    print "Thunder successfully installed!"
    print "-------------------------------"
    print "\n"
Ejemplo n.º 10
0
def install_thunder(master, opts):
    """ Install Thunder and dependencies on a Spark EC2 cluster"""
    print "Installing Thunder on the cluster..."
    # download and build thunder
    ssh(master, opts, "rm -rf thunder && git clone https://github.com/freeman-lab/thunder.git")
    ssh(master, opts, "chmod u+x thunder/python/bin/build")
    ssh(master, opts, "thunder/python/bin/build")
    # copy local data examples to all workers
    ssh(master, opts, "yum install -y pssh")
    ssh(master, opts, "pssh -h /root/spark-ec2/slaves mkdir -p /root/thunder/python/thunder/utils/data/")
    ssh(master, opts, "~/spark-ec2/copy-dir /root/thunder/python/thunder/utils/data/")
    # install pip
    ssh(master, opts, "wget http://pypi.python.org/packages/source/p/pip/pip-1.1.tar.gz"
                      "#md5=62a9f08dd5dc69d76734568a6c040508")
    ssh(master, opts, "tar -xvf pip*.gz")
    ssh(master, opts, "cd pip* && sudo python setup.py install")
    # install libraries
    ssh(master, opts, "source ~/.bash_profile && pip install mpld3 && pip install seaborn "
                      "&& pip install jinja2 && pip install -U scikit-learn")
    # install ipython 1.1
    ssh(master, opts, "pip uninstall -y ipython")
    ssh(master, opts, "git clone https://github.com/ipython/ipython.git")
    ssh(master, opts, "cd ipython && git checkout tags/rel-1.1.0")
    ssh(master, opts, "cd ipython && sudo python setup.py install")
    # set environmental variables
    ssh(master, opts, "echo 'export SPARK_HOME=/root/spark' >> /root/.bash_profile")
    ssh(master, opts, "echo 'export PYTHONPATH=/root/thunder/python' >> /root/.bash_profile")
    ssh(master, opts, "echo 'export IPYTHON=1' >> /root/.bash_profile")
    ssh(master, opts, "echo 'export PATH=/root/thunder/python/bin:$PATH' >> /root/.bash_profile")
    # customize spark configuration parameters
    ssh(master, opts, "echo 'spark.akka.frameSize=10000' >> /root/spark/conf/spark-defaults.conf")
    ssh(master, opts, "echo 'spark.kryoserializer.buffer.max.mb=1024' >> /root/spark/conf/spark-defaults.conf")
    ssh(master, opts, "echo 'export SPARK_DRIVER_MEMORY=20g' >> /root/spark/conf/spark-env.sh")
    # add AWS credentials to core-site.xml
    configstring = "<property><name>fs.s3n.awsAccessKeyId</name><value>ACCESS</value></property><property>" \
                   "<name>fs.s3n.awsSecretAccessKey</name><value>SECRET</value></property>"
    access, secret = get_s3_keys()
    filled = configstring.replace('ACCESS', access).replace('SECRET', secret)
    ssh(master, opts, "sed -i'f' 's,.*</configuration>.*,"+filled+"&,' /root/ephemeral-hdfs/conf/core-site.xml")
    # add AWS credentials to ~/.boto
    credentialstring = "[Credentials]\naws_access_key_id = ACCESS\naws_secret_access_key = SECRET\n"
    credentialsfilled = credentialstring.replace('ACCESS', access).replace('SECRET', secret)
    ssh(master, opts, "printf '"+credentialsfilled+"' > /root/.boto")
    ssh(master, opts, "pscp.pssh -h /root/spark-ec2/slaves /root/.boto /root/.boto")
    # configure requester pays
    ssh(master, opts, "touch /root/spark/conf/jets3t.properties")
    ssh(master, opts, "echo 'httpclient.requester-pays-buckets-enabled = true' >> /root/spark/conf/jets3t.properties")
    ssh(master, opts, "~/spark-ec2/copy-dir /root/spark/conf")

    print "\n\n"
    print "-------------------------------"
    print "Thunder successfully installed!"
    print "-------------------------------"
    print "\n"
Ejemplo n.º 11
0
def install_thunder(master, opts):
    """ Install Thunder and dependencies on a Spark EC2 cluster"""
    print "Installing Thunder on the cluster..."
    ssh(master, opts, "rm -rf thunder && git clone https://github.com/freeman-lab/thunder.git")
    ssh(master, opts, "chmod u+x thunder/python/bin/build")
    ssh(master, opts, "thunder/python/bin/build")
    ssh(master, opts, "source ~/.bash_profile && pip install mpld3 && pip install seaborn")
    ssh(master, opts, "rm /root/pyspark_notebook_example.ipynb")
    ssh(master, opts, "echo 'export SPARK_HOME=/root/spark' >> /root/.bash_profile")
    ssh(master, opts, "echo 'export PYTHONPATH=/root/thunder/python' >> /root/.bash_profile")
    ssh(master, opts, "echo 'export IPYTHON=1' >> /root/.bash_profile")
    ssh(master, opts, "echo 'export PATH=/root/thunder/python/bin:$PATH' >> /root/.bash_profile")
    configstring = "<property><name>fs.s3n.awsAccessKeyId</name><value>ACCESS</value></property><property>" \
                   "<name>fs.s3n.awsSecretAccessKey</name><value>SECRET</value></property>"
    access, secret = get_s3_keys()
    filled = configstring.replace('ACCESS', access).replace('SECRET', secret)
    ssh(master, opts, "sed -i'f' 's,.*</configuration>.*,"+filled+"&,' /root/ephemeral-hdfs/conf/core-site.xml")
    print "\n\n"
    print "-------------------------------"
    print "Thunder successfully installed!"
    print "-------------------------------"
    print "\n"
Ejemplo n.º 12
0
def install_thunder(master, opts):
    """ Install Thunder and dependencies on a Spark EC2 cluster"""
    print "Installing Thunder on the cluster..."
    ssh(
        master, opts,
        "rm -rf thunder && git clone https://github.com/freeman-lab/thunder.git"
    )
    ssh(master, opts, "chmod u+x thunder/python/bin/build")
    ssh(master, opts, "thunder/python/bin/build")
    ssh(master, opts, "source ~/.bash_profile && pip install mpld3")
    ssh(master, opts,
        "echo 'export SPARK_HOME=/root/spark' >> /root/.bash_profile")
    ssh(
        master, opts,
        "echo 'export PYTHONPATH=/root/thunder/python' >> /root/.bash_profile")
    ssh(master, opts, "echo 'export IPYTHON=1' >> /root/.bash_profile")
    ssh(
        master, opts,
        "echo 'export PATH=/root/thunder/python/bin:$PATH' >> /root/.bash_profile"
    )
    print "\n\n"
    print "-------------------------------"
    print "Thunder successfully installed!"
    print "-------------------------------"
    print "\n"
Ejemplo n.º 13
0
def install_thunder(master, opts):
    """ Install Thunder and dependencies on a Spark EC2 cluster"""
    print "Installing Thunder on the cluster..."
    ssh(master, opts, "rm -rf thunder && git clone https://github.com/freeman-lab/thunder.git")
    ssh(master, opts, "chmod u+x thunder/python/bin/build")
    ssh(master, opts, "thunder/python/bin/build")
    ssh(master, opts, "source ~/.bash_profile && pip install mpld3")
    ssh(master, opts, "rm /root/pyspark_notebook_example.ipynb")
    ssh(master, opts, "echo 'export SPARK_HOME=/root/spark' >> /root/.bash_profile")
    ssh(master, opts, "echo 'export PYTHONPATH=/root/thunder/python' >> /root/.bash_profile")
    ssh(master, opts, "echo 'export IPYTHON=1' >> /root/.bash_profile")
    ssh(master, opts, "echo 'export PATH=/root/thunder/python/bin:$PATH' >> /root/.bash_profile")
    configstring = "<property><name>fs.s3n.awsAccessKeyId</name><value>ACCESS</value></property><property>" \
                   "<name>fs.s3n.awsSecretAccessKey</name><value>SECRET</value></property>"
    access, secret = get_s3_keys()
    filled = configstring.replace('ACCESS', access).replace('SECRET', secret)
    ssh(master, opts, "sed -i'f' 's,.*</configuration>.*,"+filled+"&,' /root/ephemeral-hdfs/conf/core-site.xml")
    print "\n\n"
    print "-------------------------------"
    print "Thunder successfully installed!"
    print "-------------------------------"
    print "\n"