Ejemplo n.º 1
0
def end_of_startup(opts, master_ip, masters):
    master_name = ecs.get_instance_info(masters[0])['HostName']
    print """
+--------------------------------------------------------+
+        Spark Cluster Started Successfully!             +
+--------------------------------------------------------+
The Spark Cluster Configuration listed as following:

    Spark Cluster:

        Master Node IP:  %s
        Spark UI:        http://%s:8080
        Master URL:      spark://%s:7077

    """ % (master_ip, master_ip, master_name)

    if opts.enable_hdfs:
        print """
    HDFS NameNode URL:   hdfs://%s:9000
        """ % master_ip

    if opts.enable_spark_notebook:
        print """
    Spark Notebook:      http://%s:9090
        """ % master_ip
    if opts.enable_hue:
        print """
    Hue:                 http://%s:8888
        """ % master_ip
    print """
Ejemplo n.º 2
0
def ssh_read(instance_id, opts, command):
    instance_info = ecs.get_instance_info(instance_id)
    ip = instance_info['InnerIpAddress']['IpAddress'][0]
    return _check_output(
        ["sshpass", "-p", opts.pwd] + ssh_command() +
        ['%s@%s' % (opts.user,
                    ip), stringify_command(command)])
Ejemplo n.º 3
0
def do_ssh(instance_id, opts, command):
    instance_info = ecs.get_instance_info(instance_id)
    ip = instance_info['InnerIpAddress']['IpAddress'][0]
    tries = 0
    while True:
        try:
            res = subprocess.check_call(["sshpass", "-p", opts.pwd] +
                                        ssh_command() + [
                                            '-t', '-t',
                                            '%s@%s' % (opts.user, ip),
                                            stringify_command(command)
                                        ])
            if res != 0:
                raise RuntimeError("Error executing remote command.")
            return res
        except subprocess.CalledProcessError as e:
            if tries > 5:
                # If this was an ssh failure, provide the user with hints.
                if e.returncode == 255:
                    raise UsageError(
                        "Failed to SSH to remote host {0}.\n".format(ip))
                else:
                    raise e
            print >> stderr, \
                "Error executing remote command, retrying after 10 seconds."
            time.sleep(10)
            tries += 1
Ejemplo n.º 4
0
def is_cluster_ssh_available(cluster_instances, opts):
    for i in cluster_instances:
        instance_info = ecs.get_instance_info(i)
        ip = instance_info['InnerIpAddress']['IpAddress'][0]
        if not is_ssh_available(ip, opts, True):
            return False
    else:
        return True
Ejemplo n.º 5
0
def stop_nginx(opts, masters):
    print "==> Stopping nginx service..."
    master_ip = ecs.get_instance_info(
        masters[0])['PublicIpAddress']['IpAddress'][0]
    result_code = do_stop_nginx(opts, master_ip)
    if result_code == 1:
        print("[success] stop nginx succcess ...")
    else:
        print("[error] stop nginx failed ...")
Ejemplo n.º 6
0
def prepare_hosts(master, slaves, opts):
    cluster_hosts = open(
        GlobalVar.SPARK_ECS_DIR + "/" + GlobalVar.CLUSTER_HOSTS, 'w')
    instance_info = ecs.get_instance_info(master)
    host = instance_info['HostName']
    ip = instance_info['InnerIpAddress']['IpAddress'][0]
    cluster_hosts.write(ip + "  " + host + "\n")

    for slave in slaves:
        instance_info = ecs.get_instance_info(slave)
        host = instance_info['HostName']
        ip = instance_info['InnerIpAddress']['IpAddress'][0]
        cluster_hosts.write(ip + "  " + host + "\n")

    cluster_hosts.close()
    update_hosts(master, opts, GlobalVar.SPARK_ECS_DIR, "/root/")
    for slave in slaves:
        update_hosts(slave, opts, GlobalVar.SPARK_ECS_DIR, "/root/")
Ejemplo n.º 7
0
def open_nginx(opts, masters):
    print "==> Starting nginx service..."
    host_info_path = GlobalVar.CLUSTER_HOSTS
    master_ip = ecs.get_instance_info(
        masters[0])['PublicIpAddress']['IpAddress'][0]
    result_code = start_nginx(opts, host_info_path, master_ip)
    if result_code == 1:
        print("[success] start nginx succcess ...")
    else:
        print("[error] start nginx failed ...")
Ejemplo n.º 8
0
def save_public_ips(masters, slaves):
    cluster_hosts = open(
        GlobalVar.SPARK_ECS_DIR + "/" + GlobalVar.CLUSTER_HOSTS + "-public",
        'w')

    for node in masters + slaves:
        instance_info = ecs.get_instance_info(node)
        host = instance_info['HostName']
        ip = instance_info['PublicIpAddress']['IpAddress'][0]
        cluster_hosts.write(ip + "  " + host + "\n")

    cluster_hosts.close()
Ejemplo n.º 9
0
def ssh_write(instance_id, opts, command, arguments):
    instance_info = ecs.get_instance_info(instance_id)
    ip = instance_info['InnerIpAddress']['IpAddress'][0]
    tries = 0
    while True:
        proc = subprocess.Popen(
            ["sshpass", "-p", opts.pwd] + ssh_command() +
            ['%s@%s' % (opts.user, ip),
             stringify_command(command)],
            stdin=subprocess.PIPE)
        proc.stdin.write(arguments)
        proc.stdin.close()
        status = proc.wait()
        if status == 0:
            break
        elif tries > 5:
            raise RuntimeError("ssh_write failed with error %s" %
                               proc.returncode)
        else:
            print >> stderr, \
                "Error {0} while executing remote command, retrying after 10 seconds".format(status)
            time.sleep(10)
            tries = tries + 1
Ejemplo n.º 10
0
def launch_cluster(opts, cluster_name):
    if opts.pwd == "":
        opts.pwd = getpass.getpass(
            """You need to provide a password for ECS instance.
If `CLIENT` mode, you just need to provide login machine's password.
If `CLUSTER` mode and `--include-gateway`, you just need to provide login machine's password.
If `CLUSTER` mode only, you need to set a new default password for each ECS instance.
Please set a password:""")

    if opts.ami is None:
        print "You need to specify an available ECS image, listed as following: \n"
        length = len(GlobalVar.AVAILABLE_SAPRK_VERSION)
        for idx in range(1, length + 1):
            id = "%s" % idx
            print idx, ': ', GlobalVar.AVAILABLE_SAPRK_VERSION[id]
        print
        msg = "Please choose an image No. (like: 1): "
        id = raw_input(msg)
        spark_version = GlobalVar.AVAILABLE_SAPRK_VERSION[id]
        opts.ami = GlobalVar.SPARK_IMAGES[(spark_version, opts.region)]

    if opts.instance_type is None:
        print "You need to specify the type of ECS instance, listed as following: \n\n" \
              "%-14s: %s" % ("type name",  "(cores, memory)")
        for instance_type in GlobalVar.ECS_INSTANCE_TYPE:
            print "%-14s: %s" % (instance_type,
                                 GlobalVar.ECS_INSTANCE_TYPE[instance_type])
        print
        msg = "Please choose an ECS instance type (like: ecs.t1.small): "
        opts.instance_type = str(raw_input(msg)).strip()

    print "==> Begin to launch Spark cluster..."
    print_shopping_list(opts)
    print "==> Setting internet security rules..."
    current_group_id = ecs.get_gateway_instance_info(
        opts)['SecurityGroupIds']['SecurityGroupId'][0]
    ecs.clear_security_group_rules(current_group_id, opts)
    authorized_address = opts.authorized_address
    ecs.authorize_security_group_in(current_group_id, 'tcp', "",
                                    authorized_address, '22/22', opts)
    ecs.authorize_security_group_out(current_group_id, 'tcp', "",
                                     authorized_address, '1/65535', opts)
    if opts.enable_slave_public_ip:
        ecs.authorize_security_group_in(current_group_id, 'tcp', "",
                                        authorized_address, '8080/8080', opts)
        ecs.authorize_security_group_in(current_group_id, 'tcp', "",
                                        authorized_address, '8081/8081', opts)
        ecs.authorize_security_group_in(current_group_id, 'tcp', "",
                                        authorized_address, '9000/9000', opts)

    print "==> Launching master and slaves..."
    # Launch slaves
    master_instances = []
    slave_instacens = []
    count = 0
    while (count < opts.slaves):
        slave_instance_name = cluster_name + "-slave-%s" % (count)
        slave_instance_id = ecs.launch_instance(
            opts,
            cluster_name,
            "slaves",
            opts.ami,
            opts.instance_type,
            current_group_id,
            slave_instance_name,
            opts.ibo,
            slave_instance_name,
            opts.pwd,
            open_public_ip=opts.enable_slave_public_ip)
        slave_instacens.append(slave_instance_id)
        count += 1

    if not opts.include_gateway:
        # Launch master
        master_instance_name = cluster_name + "-master"
        master_instance_id = ecs.launch_instance(opts,
                                                 cluster_name,
                                                 "masters",
                                                 opts.ami,
                                                 opts.instance_type,
                                                 current_group_id,
                                                 master_instance_name,
                                                 opts.ibo,
                                                 master_instance_name,
                                                 opts.pwd,
                                                 open_public_ip=True)
        master_instances.append(master_instance_id)
    else:
        gateway = ecs.get_gateway_instance_info(opts)['InstanceId']
        master_instances.append(gateway)
        save_masters_or_slaves(cluster_name, "masters", gateway)

    master_ip = ecs.get_instance_info(
        master_instances[0])['PublicIpAddress']['IpAddress'][0]

    return master_instances, slave_instacens, master_ip