Beispiel #1
0
def web_proxy(region, stack_name):
    ec2_conn = create_ec2_connection(region)
    manager_instance = get_manager_instance(ec2_conn, stack_name)
    master_instance = get_master_instance(ec2_conn, stack_name)
    worker_instances = get_worker_instances(ec2_conn, stack_name)

    tunnels = []
    ts = "{0:<22}{1:<17}{2:<17}{3:<7}localhost:{4}"
    print (ts.format("name", "public", "private", "remote", "local"))

    # CM
    tunnels.append(non_blocking_tunnel(manager_instance, 7180, 7180))
    print (ts.format("CM WebUI", manager_instance.ip_address, manager_instance.private_ip_address, 7180, 7180))

    # YARN RM
    tunnels.append(non_blocking_tunnel(master_instance, 8088, 8088))
    print (ts.format("YARN RM", master_instance.ip_address, master_instance.private_ip_address, 8088, 8088))

    # YARN JobHistory
    tunnels.append(non_blocking_tunnel(master_instance, 19888, 19888))
    print (ts.format("YARN JobHistory", master_instance.ip_address, master_instance.private_ip_address, 19888, 19888))

    try:
        # block on an arbitrary ssh tunnel
        tunnels[-1].wait()
    finally:
        for tunnel in tunnels:
            tunnel.terminate()
Beispiel #2
0
def describe(region, stack_name):
    ec2_conn = create_ec2_connection(region)
    print "Launcher", get_launcher_instance(ec2_conn, stack_name).ip_address
    print "Manager", get_manager_instance(ec2_conn, stack_name).ip_address
    print "Master", get_master_instance(ec2_conn, stack_name).ip_address
    for instance in get_worker_instances(ec2_conn, stack_name):
        print "Worker", instance.ip_address
Beispiel #3
0
def install_env_vars(region, stack_name):
    # NOTE: this sets cluster env vars to the PRIVATE IP addresses
    ec2_conn = create_ec2_connection(region)
    master_host = get_master_instance(ec2_conn, stack_name).ip_address
    manager_private_ip = get_manager_instance(ec2_conn, stack_name).private_ip_address

    def do():
        append("/home/ec2-user/.bash_profile", "export MANAGER_HOST={0}".format(manager_private_ip))

    execute(do, hosts=[master_host])
Beispiel #4
0
def login(region, stack_name, node):
    print ("Logging into the {0} node...".format(node))
    ec2_conn = create_ec2_connection(region)
    if node == "master":
        hosts = [get_master_instance(ec2_conn, stack_name).ip_address]
    elif node == "manager":
        hosts = [get_manager_instance(ec2_conn, stack_name).ip_address]
    elif node == "launcher":
        hosts = [get_launcher_instance(ec2_conn, stack_name).ip_address]
    else:
        raise EggoError('"{0}" is not a valid node type'.format(node))
    execute(open_shell, hosts=hosts)
Beispiel #5
0
def config_cluster(region, stack_name):
    start_time = datetime.now()

    ec2_conn = create_ec2_connection(region)
    master_host = get_master_instance(ec2_conn, stack_name).ip_address

    execute(install_private_key, hosts=[master_host])
    execute(create_hdfs_home, hosts=[master_host])
    install_env_vars(region, stack_name)
    install_java_8(region, stack_name)
    execute(install_dev_tools, hosts=[master_host])
    execute(install_git, hosts=[master_host])
    execute(install_maven, hosts=[master_host])
    execute(install_gradle, hosts=[master_host])
    execute(install_adam, hosts=[master_host])
    install_opencb([master_host])
    execute(install_hellbender, hosts=[master_host])
    execute(install_quince, hosts=[master_host])
    execute(install_eggo, hosts=[master_host])
    adjust_yarn_memory_limits(region, stack_name)

    end_time = datetime.now()
    print "Cluster configured. Took {t} minutes.".format(t=(end_time - start_time).seconds / 60)
Beispiel #6
0
def install_java_8(region, stack_name):
    # following general protocol for upgrading to JDK 1.8 here:
    # http://www.cloudera.com/content/cloudera/en/documentation/core/v5-3-x/topics/cdh_cm_upgrading_to_jdk8.html
    ec2_conn = create_ec2_connection(region)
    manager_instance = get_manager_instance(ec2_conn, stack_name)
    cluster_instances = get_worker_instances(ec2_conn, stack_name) + [
        manager_instance,
        get_master_instance(ec2_conn, stack_name),
    ]
    cluster_hosts = [i.ip_address for i in cluster_instances]

    # Connect to CM API
    cm_api = ApiResource("localhost", username="******", password="******", server_port=64999, version=9)
    cloudera_manager = cm_api.get_cloudera_manager()

    with http_tunnel_ctx(manager_instance, 7180, 64999):
        # Stop Cloudera Management Service
        print "Stopping Cloudera Management Service"
        mgmt_service = cloudera_manager.get_service()
        mgmt_service.stop().wait()

        # Stop cluster
        print "Stopping the cluster"
        clusters = cm_api.get_all_clusters()
        cluster = clusters.objects[0]
        cluster.stop().wait()

    # Stop all Cloudera Manager Agents
    @parallel
    def stop_cm_agents():
        sudo("service cloudera-scm-agent stop")

    execute(stop_cm_agents, hosts=cluster_hosts)

    # Stop the Cloudera Manager Server
    def stop_cm_server():
        sudo("service cloudera-scm-server stop")

    execute(stop_cm_server, hosts=[manager_instance.ip_address])

    # Cleanup other Java versions and install JDK 1.8
    @parallel
    def swap_jdks():
        sudo("rpm -qa | grep jdk | xargs rpm -e")
        sudo("rm -rf /usr/java/jdk1.6*")
        sudo("rm -rf /usr/java/jdk1.7*")
        run(
            "wget -O jdk-8-linux-x64.rpm --no-cookies --no-check-certificate "
            '--header "Cookie: oraclelicense=accept-securebackup-cookie" '
            "http://download.oracle.com/otn-pub/java/jdk/8u51-b16/"
            "jdk-8u51-linux-x64.rpm"
        )
        sudo("yum install -y jdk-8-linux-x64.rpm")
        append("/home/ec2-user/.bash_profile", 'export JAVA_HOME=`find /usr/java -name "jdk1.8*"`')

    execute(swap_jdks, hosts=cluster_hosts)

    # Start the Cloudera Manager Server
    def start_cm_server():
        sudo("service cloudera-scm-server start")

    execute(start_cm_server, hosts=[manager_instance.ip_address])

    # Start all Cloudera Manager Agents
    @parallel
    def start_cm_agents():
        sudo("service cloudera-scm-agent start")

    execute(start_cm_agents, hosts=cluster_hosts)

    with http_tunnel_ctx(manager_instance, 7180, 64999):
        # Start the cluster and the mgmt service
        print "Starting the cluster"
        cluster.start().wait()
        print "Starting the Cloudera Management Service"
        cloudera_manager = cm_api.get_cloudera_manager()
        mgmt_service = cloudera_manager.get_service()
        mgmt_service.start().wait()