def web_proxy(region, stack_name): ec2_conn = create_ec2_connection(region) manager_instance = get_manager_instance(ec2_conn, stack_name) master_instance = get_master_instance(ec2_conn, stack_name) worker_instances = get_worker_instances(ec2_conn, stack_name) tunnels = [] ts = "{0:<22}{1:<17}{2:<17}{3:<7}localhost:{4}" print (ts.format("name", "public", "private", "remote", "local")) # CM tunnels.append(non_blocking_tunnel(manager_instance, 7180, 7180)) print (ts.format("CM WebUI", manager_instance.ip_address, manager_instance.private_ip_address, 7180, 7180)) # YARN RM tunnels.append(non_blocking_tunnel(master_instance, 8088, 8088)) print (ts.format("YARN RM", master_instance.ip_address, master_instance.private_ip_address, 8088, 8088)) # YARN JobHistory tunnels.append(non_blocking_tunnel(master_instance, 19888, 19888)) print (ts.format("YARN JobHistory", master_instance.ip_address, master_instance.private_ip_address, 19888, 19888)) try: # block on an arbitrary ssh tunnel tunnels[-1].wait() finally: for tunnel in tunnels: tunnel.terminate()
def describe(region, stack_name): ec2_conn = create_ec2_connection(region) print "Launcher", get_launcher_instance(ec2_conn, stack_name).ip_address print "Manager", get_manager_instance(ec2_conn, stack_name).ip_address print "Master", get_master_instance(ec2_conn, stack_name).ip_address for instance in get_worker_instances(ec2_conn, stack_name): print "Worker", instance.ip_address
def install_env_vars(region, stack_name): # NOTE: this sets cluster env vars to the PRIVATE IP addresses ec2_conn = create_ec2_connection(region) master_host = get_master_instance(ec2_conn, stack_name).ip_address manager_private_ip = get_manager_instance(ec2_conn, stack_name).private_ip_address def do(): append("/home/ec2-user/.bash_profile", "export MANAGER_HOST={0}".format(manager_private_ip)) execute(do, hosts=[master_host])
def login(region, stack_name, node): print ("Logging into the {0} node...".format(node)) ec2_conn = create_ec2_connection(region) if node == "master": hosts = [get_master_instance(ec2_conn, stack_name).ip_address] elif node == "manager": hosts = [get_manager_instance(ec2_conn, stack_name).ip_address] elif node == "launcher": hosts = [get_launcher_instance(ec2_conn, stack_name).ip_address] else: raise EggoError('"{0}" is not a valid node type'.format(node)) execute(open_shell, hosts=hosts)
def config_cluster(region, stack_name): start_time = datetime.now() ec2_conn = create_ec2_connection(region) master_host = get_master_instance(ec2_conn, stack_name).ip_address execute(install_private_key, hosts=[master_host]) execute(create_hdfs_home, hosts=[master_host]) install_env_vars(region, stack_name) install_java_8(region, stack_name) execute(install_dev_tools, hosts=[master_host]) execute(install_git, hosts=[master_host]) execute(install_maven, hosts=[master_host]) execute(install_gradle, hosts=[master_host]) execute(install_adam, hosts=[master_host]) install_opencb([master_host]) execute(install_hellbender, hosts=[master_host]) execute(install_quince, hosts=[master_host]) execute(install_eggo, hosts=[master_host]) adjust_yarn_memory_limits(region, stack_name) end_time = datetime.now() print "Cluster configured. Took {t} minutes.".format(t=(end_time - start_time).seconds / 60)
def install_java_8(region, stack_name): # following general protocol for upgrading to JDK 1.8 here: # http://www.cloudera.com/content/cloudera/en/documentation/core/v5-3-x/topics/cdh_cm_upgrading_to_jdk8.html ec2_conn = create_ec2_connection(region) manager_instance = get_manager_instance(ec2_conn, stack_name) cluster_instances = get_worker_instances(ec2_conn, stack_name) + [ manager_instance, get_master_instance(ec2_conn, stack_name), ] cluster_hosts = [i.ip_address for i in cluster_instances] # Connect to CM API cm_api = ApiResource("localhost", username="******", password="******", server_port=64999, version=9) cloudera_manager = cm_api.get_cloudera_manager() with http_tunnel_ctx(manager_instance, 7180, 64999): # Stop Cloudera Management Service print "Stopping Cloudera Management Service" mgmt_service = cloudera_manager.get_service() mgmt_service.stop().wait() # Stop cluster print "Stopping the cluster" clusters = cm_api.get_all_clusters() cluster = clusters.objects[0] cluster.stop().wait() # Stop all Cloudera Manager Agents @parallel def stop_cm_agents(): sudo("service cloudera-scm-agent stop") execute(stop_cm_agents, hosts=cluster_hosts) # Stop the Cloudera Manager Server def stop_cm_server(): sudo("service cloudera-scm-server stop") execute(stop_cm_server, hosts=[manager_instance.ip_address]) # Cleanup other Java versions and install JDK 1.8 @parallel def swap_jdks(): sudo("rpm -qa | grep jdk | xargs rpm -e") sudo("rm -rf /usr/java/jdk1.6*") sudo("rm -rf /usr/java/jdk1.7*") run( "wget -O jdk-8-linux-x64.rpm --no-cookies --no-check-certificate " '--header "Cookie: oraclelicense=accept-securebackup-cookie" ' "http://download.oracle.com/otn-pub/java/jdk/8u51-b16/" "jdk-8u51-linux-x64.rpm" ) sudo("yum install -y jdk-8-linux-x64.rpm") append("/home/ec2-user/.bash_profile", 'export JAVA_HOME=`find /usr/java -name "jdk1.8*"`') execute(swap_jdks, hosts=cluster_hosts) # Start the Cloudera Manager Server def start_cm_server(): sudo("service cloudera-scm-server start") execute(start_cm_server, hosts=[manager_instance.ip_address]) # Start all Cloudera Manager Agents @parallel def start_cm_agents(): sudo("service cloudera-scm-agent start") execute(start_cm_agents, hosts=cluster_hosts) with http_tunnel_ctx(manager_instance, 7180, 64999): # Start the cluster and the mgmt service print "Starting the cluster" cluster.start().wait() print "Starting the Cloudera Management Service" cloudera_manager = cm_api.get_cloudera_manager() mgmt_service = cloudera_manager.get_service() mgmt_service.start().wait()