def describe(region, stack_name): ec2_conn = create_ec2_connection(region) print 'Launcher', get_launcher_instance(ec2_conn, stack_name).ip_address print 'Manager', get_manager_instance(ec2_conn, stack_name).ip_address print 'Master', get_master_instance(ec2_conn, stack_name).ip_address for instance in get_worker_instances(ec2_conn, stack_name): print 'Worker', instance.ip_address
def adjust_yarn_memory_limits(region, stack_name, restart=True): ec2_conn = create_ec2_connection(region) manager_instance = get_manager_instance(ec2_conn, stack_name) with cm_tunnel_ctx(manager_instance) as local_port: cm_api = ApiResource('localhost', username='******', password='******', server_port=local_port, version=9) cluster = list(cm_api.get_all_clusters())[0] host = list(cm_api.get_all_hosts())[0] # all hosts same instance type yarn = filter(lambda x: x.type == 'YARN', list(cluster.get_all_services()))[0] rm_cg = filter(lambda x: x.roleType == 'RESOURCEMANAGER', list(yarn.get_all_role_config_groups()))[0] nm_cg = filter(lambda x: x.roleType == 'NODEMANAGER', list(yarn.get_all_role_config_groups()))[0] rm_cg.update_config({ 'yarn_scheduler_maximum_allocation_mb': ( int(host.totalPhysMemBytes / 1024. / 1024.)), 'yarn_scheduler_maximum_allocation_vcores': host.numCores}) nm_cg.update_config({ 'yarn_nodemanager_resource_memory_mb': ( int(host.totalPhysMemBytes / 1024. / 1024.)), 'yarn_nodemanager_resource_cpu_vcores': host.numCores}) cluster.deploy_client_config().wait() if restart: cluster.restart().wait()
def provision(region, availability_zone, stack_name, cf_template_path, launcher_ami, launcher_instance_type, worker_instance_type, director_conf_path, cluster_ami, num_workers): start_time = datetime.now() # create cloudformation stack (VPC etc) cf_conn = create_cf_connection(region) create_cf_stack(cf_conn, stack_name, cf_template_path, availability_zone) # create launcher instance ec2_conn = create_ec2_connection(region) launcher_instance = create_launcher_instance( ec2_conn, cf_conn, stack_name, launcher_ami, launcher_instance_type) # run bootstrap on launcher execute( run_director_bootstrap, director_conf_path=director_conf_path, region=region, cluster_ami=cluster_ami, num_workers=num_workers, stack_name=stack_name, worker_instance_type=worker_instance_type, hosts=[launcher_instance.ip_address]) end_time = datetime.now() print "Cluster has started. Took {t} minutes.".format( t=(end_time - start_time).seconds / 60)
def teardown(region, stack_name): # terminate Hadoop cluster (prompts for confirmation) ec2_conn = create_ec2_connection(region) execute(run_director_terminate, hosts=[get_launcher_instance(ec2_conn, stack_name).ip_address]) # terminate launcher instance terminate_launcher_instance(ec2_conn, stack_name) # delete stack cf_conn = create_cf_connection(region) delete_stack(cf_conn, stack_name)
def login(region, stack_name, node): print('Logging into the {0} node...'.format(node)) ec2_conn = create_ec2_connection(region) if node == 'master': hosts = [get_master_instance(ec2_conn, stack_name).ip_address] elif node == 'manager': hosts = [get_manager_instance(ec2_conn, stack_name).ip_address] elif node == 'launcher': hosts = [get_launcher_instance(ec2_conn, stack_name).ip_address] else: raise EggoError('"{0}" is not a valid node type'.format(node)) execute(open_shell, hosts=hosts)
def install_env_vars(region, stack_name): ec2_conn = create_ec2_connection(region) # get information about the cluster manager_instance = get_manager_instance(ec2_conn, stack_name) with cm_tunnel_ctx(manager_instance) as local_port: env_vars = generate_eggo_env_vars('localhost', local_port, 'admin', 'admin') env_var_exports = ['export {0}={1}'.format(k, v) for (k, v) in env_vars.iteritems()] def do(): append('/home/ec2-user/eggo_env_vars.sh', env_var_exports) append('/home/ec2-user/.bash_profile', 'source /home/ec2-user/eggo_env_vars.sh') master_host = get_master_instance(ec2_conn, stack_name).ip_address execute(do, hosts=[master_host])
def web_proxy(region, stack_name): ec2_conn = create_ec2_connection(region) manager_instance = get_manager_instance(ec2_conn, stack_name) master_instance = get_master_instance(ec2_conn, stack_name) worker_instances = get_worker_instances(ec2_conn, stack_name) tunnels = [] ts = '{0:<22}{1:<17}{2:<17}{3:<7}localhost:{4}' print(ts.format('name', 'public', 'private', 'remote', 'local')) # CM tunnels.append(non_blocking_tunnel(manager_instance.ip_address, manager_instance.private_ip_address, 7180, 7180, 'ec2-user', get_ec2_private_key_file())) print(ts.format( 'CM WebUI', manager_instance.ip_address, manager_instance.private_ip_address, 7180, 7180)) # YARN RM tunnels.append(non_blocking_tunnel(master_instance.ip_address, master_instance.private_ip_address, 8088, 8088, 'ec2-user', get_ec2_private_key_file())) print(ts.format( 'YARN RM', master_instance.ip_address, master_instance.private_ip_address, 8088, 8088)) # YARN JobHistory tunnels.append(non_blocking_tunnel(master_instance.ip_address, master_instance.private_ip_address, 19888, 19888, 'ec2-user', get_ec2_private_key_file())) print(ts.format( 'YARN JobHistory', master_instance.ip_address, master_instance.private_ip_address, 19888, 19888)) try: # block on an arbitrary ssh tunnel tunnels[-1].wait() finally: for tunnel in tunnels: tunnel.terminate()
def config_cluster(region, stack_name, adam, adam_fork, adam_branch, opencb, gatk, quince, quince_fork, quince_branch): start_time = datetime.now() ec2_conn = create_ec2_connection(region) master_host = get_master_instance(ec2_conn, stack_name).ip_address execute(install_private_key, hosts=[master_host]) execute(create_hdfs_home, hosts=[master_host]) # java 8 install will restart the cluster adjust_yarn_memory_limits(region, stack_name, restart=False) install_java_8(region, stack_name) # install software tools execute(install_dev_tools, hosts=[master_host]) execute(install_git, hosts=[master_host]) execute(install_maven, hosts=[master_host]) execute(install_gradle, hosts=[master_host]) execute(install_parquet_tools, hosts=[master_host]) if adam: execute(install_adam, fork=adam_fork, branch=adam_branch, hosts=[master_host]) if opencb: install_opencb([master_host]) if gatk: execute(install_gatk, hosts=[master_host]) if quince: execute(install_quince, fork=quince_fork, branch=quince_branch, hosts=[master_host]) execute(install_eggo, hosts=[master_host]) # install environment vars for use on the cluster install_env_vars(region, stack_name) end_time = datetime.now() print "Cluster configured. Took {t} minutes.".format( t=(end_time - start_time).seconds / 60)
def install_java_8(region, stack_name): # following general protocol for upgrading to JDK 1.8 here: # http://www.cloudera.com/content/cloudera/en/documentation/core/v5-3-x/topics/cdh_cm_upgrading_to_jdk8.html ec2_conn = create_ec2_connection(region) manager_instance = get_manager_instance(ec2_conn, stack_name) cluster_instances = ( get_worker_instances(ec2_conn, stack_name) + [manager_instance, get_master_instance(ec2_conn, stack_name)]) cluster_hosts = [i.ip_address for i in cluster_instances] with cm_tunnel_ctx(manager_instance) as local_port: # Connect to CM API cm_api = ApiResource('localhost', username='******', password='******', server_port=local_port, version=9) cloudera_manager = cm_api.get_cloudera_manager() # Stop Cloudera Management Service print "Stopping Cloudera Management Service" mgmt_service = cloudera_manager.get_service() mgmt_service.stop().wait() # Stop cluster print "Stopping the cluster" clusters = cm_api.get_all_clusters() cluster = clusters.objects[0] cluster.stop().wait() # Stop all Cloudera Manager Agents @parallel def stop_cm_agents(): sudo('service cloudera-scm-agent stop') execute(stop_cm_agents, hosts=cluster_hosts) # Stop the Cloudera Manager Server def stop_cm_server(): sudo('service cloudera-scm-server stop') execute(stop_cm_server, hosts=[manager_instance.ip_address]) # Cleanup other Java versions and install JDK 1.8 @parallel def swap_jdks(): sudo('rpm -qa | grep jdk | xargs rpm -e') sudo('rm -rf /usr/java/jdk1.6*') sudo('rm -rf /usr/java/jdk1.7*') run('wget -O jdk-8-linux-x64.rpm --no-cookies --no-check-certificate ' '--header "Cookie: oraclelicense=accept-securebackup-cookie" ' 'http://download.oracle.com/otn-pub/java/jdk/8u51-b16/' 'jdk-8u51-linux-x64.rpm') sudo('yum install -y jdk-8-linux-x64.rpm') append('/home/ec2-user/.bash_profile', 'export JAVA_HOME=`find /usr/java -name "jdk1.8*"`') execute(swap_jdks, hosts=cluster_hosts) # Start the Cloudera Manager Server def start_cm_server(): sudo('service cloudera-scm-server start') execute(start_cm_server, hosts=[manager_instance.ip_address]) # Start all Cloudera Manager Agents @parallel def start_cm_agents(): sudo('service cloudera-scm-agent start') execute(start_cm_agents, hosts=cluster_hosts) with cm_tunnel_ctx(manager_instance) as local_port: # Connect to CM API cm_api = ApiResource('localhost', username='******', password='******', server_port=local_port, version=9) cloudera_manager = cm_api.get_cloudera_manager() # Start the cluster and the mgmt service print "Starting the cluster" cluster.start().wait() print "Starting the Cloudera Management Service" cloudera_manager = cm_api.get_cloudera_manager() mgmt_service = cloudera_manager.get_service() mgmt_service.start().wait()