Example #1
0
def describe(region, stack_name):
    ec2_conn = create_ec2_connection(region)
    print 'Launcher', get_launcher_instance(ec2_conn, stack_name).ip_address
    print 'Manager', get_manager_instance(ec2_conn, stack_name).ip_address
    print 'Master', get_master_instance(ec2_conn, stack_name).ip_address
    for instance in get_worker_instances(ec2_conn, stack_name):
        print 'Worker', instance.ip_address
Example #2
0
def adjust_yarn_memory_limits(region, stack_name, restart=True):
    ec2_conn = create_ec2_connection(region)
    manager_instance = get_manager_instance(ec2_conn, stack_name)
    with cm_tunnel_ctx(manager_instance) as local_port:
        cm_api = ApiResource('localhost', username='******', password='******',
                             server_port=local_port, version=9)
        cluster = list(cm_api.get_all_clusters())[0]
        host = list(cm_api.get_all_hosts())[0]  # all hosts same instance type
        yarn = filter(lambda x: x.type == 'YARN',
                      list(cluster.get_all_services()))[0]
        rm_cg = filter(lambda x: x.roleType == 'RESOURCEMANAGER',
                       list(yarn.get_all_role_config_groups()))[0]
        nm_cg = filter(lambda x: x.roleType == 'NODEMANAGER',
                       list(yarn.get_all_role_config_groups()))[0]
        rm_cg.update_config({
            'yarn_scheduler_maximum_allocation_mb': (
                int(host.totalPhysMemBytes / 1024. / 1024.)),
            'yarn_scheduler_maximum_allocation_vcores': host.numCores})
        nm_cg.update_config({
            'yarn_nodemanager_resource_memory_mb': (
                int(host.totalPhysMemBytes / 1024. / 1024.)),
            'yarn_nodemanager_resource_cpu_vcores': host.numCores})
        cluster.deploy_client_config().wait()
        if restart:
            cluster.restart().wait()
Example #3
0
def provision(region, availability_zone, stack_name, cf_template_path,
              launcher_ami, launcher_instance_type, worker_instance_type,
              director_conf_path, cluster_ami, num_workers):
    start_time = datetime.now()

    # create cloudformation stack (VPC etc)
    cf_conn = create_cf_connection(region)
    create_cf_stack(cf_conn, stack_name, cf_template_path, availability_zone)

    # create launcher instance
    ec2_conn = create_ec2_connection(region)
    launcher_instance = create_launcher_instance(
        ec2_conn, cf_conn, stack_name, launcher_ami, launcher_instance_type)

    # run bootstrap on launcher
    execute(
        run_director_bootstrap,
        director_conf_path=director_conf_path, region=region,
        cluster_ami=cluster_ami, num_workers=num_workers,
        stack_name=stack_name, worker_instance_type=worker_instance_type,
        hosts=[launcher_instance.ip_address])

    end_time = datetime.now()
    print "Cluster has started. Took {t} minutes.".format(
        t=(end_time - start_time).seconds / 60)
Example #4
0
def teardown(region, stack_name):
    # terminate Hadoop cluster (prompts for confirmation)
    ec2_conn = create_ec2_connection(region)
    execute(run_director_terminate,
            hosts=[get_launcher_instance(ec2_conn, stack_name).ip_address])

    # terminate launcher instance
    terminate_launcher_instance(ec2_conn, stack_name)

    # delete stack
    cf_conn = create_cf_connection(region)
    delete_stack(cf_conn, stack_name)
Example #5
0
def login(region, stack_name, node):
    print('Logging into the {0} node...'.format(node))
    ec2_conn = create_ec2_connection(region)
    if node == 'master':
        hosts = [get_master_instance(ec2_conn, stack_name).ip_address]
    elif node == 'manager':
        hosts = [get_manager_instance(ec2_conn, stack_name).ip_address]
    elif node == 'launcher':
        hosts = [get_launcher_instance(ec2_conn, stack_name).ip_address]
    else:
        raise EggoError('"{0}" is not a valid node type'.format(node))
    execute(open_shell, hosts=hosts)
Example #6
0
def install_env_vars(region, stack_name):
    ec2_conn = create_ec2_connection(region)

    # get information about the cluster
    manager_instance = get_manager_instance(ec2_conn, stack_name)
    with cm_tunnel_ctx(manager_instance) as local_port:
        env_vars = generate_eggo_env_vars('localhost', local_port, 'admin',
                                          'admin')
    env_var_exports = ['export {0}={1}'.format(k, v)
                       for (k, v) in env_vars.iteritems()]

    def do():
        append('/home/ec2-user/eggo_env_vars.sh', env_var_exports)
        append('/home/ec2-user/.bash_profile',
               'source /home/ec2-user/eggo_env_vars.sh')

    master_host = get_master_instance(ec2_conn, stack_name).ip_address
    execute(do, hosts=[master_host])
Example #7
0
def web_proxy(region, stack_name):
    ec2_conn = create_ec2_connection(region)
    manager_instance = get_manager_instance(ec2_conn, stack_name)
    master_instance = get_master_instance(ec2_conn, stack_name)
    worker_instances = get_worker_instances(ec2_conn, stack_name)

    tunnels = []
    ts = '{0:<22}{1:<17}{2:<17}{3:<7}localhost:{4}'
    print(ts.format('name', 'public', 'private', 'remote', 'local'))

    # CM
    tunnels.append(non_blocking_tunnel(manager_instance.ip_address,
                                       manager_instance.private_ip_address,
                                       7180, 7180, 'ec2-user',
                                       get_ec2_private_key_file()))
    print(ts.format(
        'CM WebUI', manager_instance.ip_address,
        manager_instance.private_ip_address, 7180, 7180))

    # YARN RM
    tunnels.append(non_blocking_tunnel(master_instance.ip_address,
                                       master_instance.private_ip_address,
                                       8088, 8088, 'ec2-user',
                                       get_ec2_private_key_file()))
    print(ts.format(
        'YARN RM', master_instance.ip_address,
        master_instance.private_ip_address, 8088, 8088))

    # YARN JobHistory
    tunnels.append(non_blocking_tunnel(master_instance.ip_address,
                                       master_instance.private_ip_address,
                                       19888, 19888, 'ec2-user',
                                       get_ec2_private_key_file()))
    print(ts.format(
        'YARN JobHistory', master_instance.ip_address,
        master_instance.private_ip_address, 19888, 19888))

    try:
        # block on an arbitrary ssh tunnel
        tunnels[-1].wait()
    finally:
        for tunnel in tunnels:
            tunnel.terminate()
Example #8
0
def config_cluster(region, stack_name, adam, adam_fork, adam_branch, opencb,
                   gatk, quince, quince_fork, quince_branch):
    start_time = datetime.now()

    ec2_conn = create_ec2_connection(region)
    master_host = get_master_instance(ec2_conn, stack_name).ip_address

    execute(install_private_key, hosts=[master_host])
    execute(create_hdfs_home, hosts=[master_host])
    # java 8 install will restart the cluster
    adjust_yarn_memory_limits(region, stack_name, restart=False)
    install_java_8(region, stack_name)

    # install software tools
    execute(install_dev_tools, hosts=[master_host])
    execute(install_git, hosts=[master_host])
    execute(install_maven, hosts=[master_host])
    execute(install_gradle, hosts=[master_host])
    execute(install_parquet_tools, hosts=[master_host])
    if adam:
        execute(install_adam, fork=adam_fork, branch=adam_branch,
                hosts=[master_host])
    if opencb:
        install_opencb([master_host])
    if gatk:
        execute(install_gatk, hosts=[master_host])
    if quince:
        execute(install_quince, fork=quince_fork, branch=quince_branch,
                hosts=[master_host])
    execute(install_eggo, hosts=[master_host])

    # install environment vars for use on the cluster
    install_env_vars(region, stack_name)

    end_time = datetime.now()
    print "Cluster configured. Took {t} minutes.".format(
        t=(end_time - start_time).seconds / 60)
Example #9
0
def install_java_8(region, stack_name):
    # following general protocol for upgrading to JDK 1.8 here:
    # http://www.cloudera.com/content/cloudera/en/documentation/core/v5-3-x/topics/cdh_cm_upgrading_to_jdk8.html
    ec2_conn = create_ec2_connection(region)
    manager_instance = get_manager_instance(ec2_conn, stack_name)
    cluster_instances = (
        get_worker_instances(ec2_conn, stack_name) +
        [manager_instance, get_master_instance(ec2_conn, stack_name)])
    cluster_hosts = [i.ip_address for i in cluster_instances]

    with cm_tunnel_ctx(manager_instance) as local_port:
        # Connect to CM API
        cm_api = ApiResource('localhost', username='******', password='******',
                             server_port=local_port, version=9)
        cloudera_manager = cm_api.get_cloudera_manager()

        # Stop Cloudera Management Service
        print "Stopping Cloudera Management Service"
        mgmt_service = cloudera_manager.get_service()
        mgmt_service.stop().wait()

        # Stop cluster
        print "Stopping the cluster"
        clusters = cm_api.get_all_clusters()
        cluster = clusters.objects[0]
        cluster.stop().wait()

    # Stop all Cloudera Manager Agents
    @parallel
    def stop_cm_agents():
        sudo('service cloudera-scm-agent stop')
    execute(stop_cm_agents, hosts=cluster_hosts)

    # Stop the Cloudera Manager Server
    def stop_cm_server():
        sudo('service cloudera-scm-server stop')
    execute(stop_cm_server, hosts=[manager_instance.ip_address])

    # Cleanup other Java versions and install JDK 1.8
    @parallel
    def swap_jdks():
        sudo('rpm -qa | grep jdk | xargs rpm -e')
        sudo('rm -rf /usr/java/jdk1.6*')
        sudo('rm -rf /usr/java/jdk1.7*')
        run('wget -O jdk-8-linux-x64.rpm --no-cookies --no-check-certificate '
            '--header "Cookie: oraclelicense=accept-securebackup-cookie" '
            'http://download.oracle.com/otn-pub/java/jdk/8u51-b16/'
            'jdk-8u51-linux-x64.rpm')
        sudo('yum install -y jdk-8-linux-x64.rpm')
        append('/home/ec2-user/.bash_profile',
               'export JAVA_HOME=`find /usr/java -name "jdk1.8*"`')
    execute(swap_jdks, hosts=cluster_hosts)

    # Start the Cloudera Manager Server
    def start_cm_server():
        sudo('service cloudera-scm-server start')
    execute(start_cm_server, hosts=[manager_instance.ip_address])

    # Start all Cloudera Manager Agents
    @parallel
    def start_cm_agents():
        sudo('service cloudera-scm-agent start')
    execute(start_cm_agents, hosts=cluster_hosts)

    with cm_tunnel_ctx(manager_instance) as local_port:
        # Connect to CM API
        cm_api = ApiResource('localhost', username='******', password='******',
                             server_port=local_port, version=9)
        cloudera_manager = cm_api.get_cloudera_manager()

        # Start the cluster and the mgmt service
        print "Starting the cluster"
        cluster.start().wait()
        print "Starting the Cloudera Management Service"
        cloudera_manager = cm_api.get_cloudera_manager()
        mgmt_service = cloudera_manager.get_service()
        mgmt_service.start().wait()