Ejemplo n.º 1
0
def admit(server_principal: str) -> str:
    config = configuration.get_config()
    principal_hostname = config.get_fqdn(server_principal)

    errs = []

    try:
        if config.is_kerberos_enabled():
            return access.call_keyreq("bootstrap-token",
                                      principal_hostname).decode().strip()
    except Exception as e:
        print('[keyreq failed, set SPIRE_DEBUG for traceback]')
        if os.environ.get('SPIRE_DEBUG'):
            traceback.print_exc()
        errs.append(e)

    try:
        return ssh.check_ssh_output(config.keyserver, "keyinitadmit",
                                    principal_hostname).decode().strip()
    except Exception as e:
        print('[keyinitadmit failed, set SPIRE_DEBUG for traceback]')
        if os.environ.get('SPIRE_DEBUG'):
            traceback.print_exc()
        errs.append(e)

    if len(errs) > 1:
        raise command.MultipleExceptions('admit failed', errs)
    raise Exception('admit failed') from errs[0]
Ejemplo n.º 2
0
def check_user_grant():
    "verify that user-grant and its kubeconfigs work"
    config = configuration.get_config()

    # because we don't yet have load balancing, we have to somehow get *inside the cluster* to test this.
    # that means figuring out the IP address for the user-grant service, uploading the local user cert to the master
    # node, and then authenticating to user-grant via curl on the master node. bluh.
    # TODO: once load balancing is ready, make this whole thing much simpler

    # we use a master node so we're confident we aren't connecting to the node where user-grant is hosted. there's
    # nothing about this that otherwise requires it; usually we'd choose a worker node to avoid running unnecessary code
    # on the master nodes, but this is entirely for testing in non-production clusters, so it doesn't matter.
    proxy_node = config.get_any_node("master")

    service_ip = get_service_ip("user-grant")
    user_key, user_cert = authority.get_local_grant_user_paths()
    remote_key, remote_cert = "/etc/homeworld/testing/usergrant.key", "/etc/homeworld/testing/usergrant.pem"
    ssh.check_ssh(proxy_node, "rm", "-f", remote_key, remote_cert)
    ssh.check_ssh(proxy_node, "mkdir", "-p", "/etc/homeworld/testing")
    ssh.check_scp_up(proxy_node, user_key, remote_key)
    ssh.check_scp_up(proxy_node, user_cert, remote_cert)

    setup.modify_temporary_dns(proxy_node,
                               {config.user_grant_domain: service_ip})
    try:
        kubeconfig = ssh.check_ssh_output(
            proxy_node, "curl", "--key", remote_key, "--cert", remote_cert,
            "https://%s/" % config.user_grant_domain).decode()
    finally:
        setup.modify_temporary_dns(proxy_node, {})

    magic_phrase = "it allows authenticating to the Hyades cluster as you"
    if magic_phrase not in kubeconfig:
        command.fail(
            "invalid kubeconfig: did not see phrase " + repr(magic_phrase),
            "kubeconfig received read as follows: " + repr(kubeconfig))

    print("successfully retrieved kubeconfig from user-grant!")

    # at this point, we have a kubeconfig generated by user-grant, and now we want to confirm that it works.
    # we'll confirm that the kubeconfig works by checking that the auto-created rolebinding passes the sniff test.

    with tempfile.TemporaryDirectory() as workdir:
        kubeconfig_path = os.path.join(workdir, "granted-kubeconfig")
        util.writefile(kubeconfig_path, kubeconfig.encode())

        rolebinding = json.loads(
            subprocess.check_output([
                "hyperkube", "kubectl", "--kubeconfig", kubeconfig_path, "-o",
                "json", "get", "rolebindings",
                "auto-grant-" + authority.UPSTREAM_USER_NAME
            ]).decode())

        if rolebinding.get("roleRef", {}).get("name") != "admin":
            command.fail("rolebinding for user was not admin in %s" %
                         repr(rolebinding))

    print("autogenerated rolebinding for user",
          repr(authority.UPSTREAM_USER_NAME), "passed basic check!")
Ejemplo n.º 3
0
def admit(server_principal: str) -> str:
    config = configuration.get_config()
    principal_hostname = config.get_fqdn(server_principal)
    if config.is_kerberos_enabled():
        return access.call_keyreq("bootstrap-token", principal_hostname).decode().strip()
    else:
        keyserver_hostname = config.keyserver.hostname + "." + config.external_domain
        return ssh.check_ssh_output(config.keyserver, "keyinitadmit", setup.CONFIG_DIR + "/keyserver.yaml", keyserver_hostname, principal_hostname, "bootstrap-keyinit").decode().strip()
Ejemplo n.º 4
0
 def safe_upload_keytab(node=node):
     if not overwrite_keytab:
         try:
             existing_keytab = ssh.check_ssh_output(node, "cat", KEYTAB_PATH)
         except subprocess.CalledProcessError as e_test:
             # if there is no existing keytab, cat will fail with error code 1
             if e_test.returncode != 1:
                 command.fail(e_test)
             print("no existing keytab found, uploading local keytab")
         else:
             if existing_keytab != decrypted:
                 command.fail("existing keytab does not match local keytab")
             return # existing keytab matches local keytab, no action required
     ssh.upload_bytes(node, decrypted, KEYTAB_PATH)
Ejemplo n.º 5
0
 def talk():
     if not event.wait(25):
         command.fail("timed out while waiting for IPv4 address of listener")
     address = found_address[0]
     if address is None:
         command.fail("no address was specified by listener")
     container_command = "ping -c 1 %s && echo 'PING RESULT SUCCESS' || echo 'PING RESULT FAIL'" % address
     server_command = ["rkt", "run", "--net=rkt.kubernetes.io", "homeworld.mit.edu/debian", "--exec", "/bin/bash", "--", "-c", setup.escape_shell(container_command)]
     results = ssh.check_ssh_output(worker_talker, *server_command)
     last_line = results.replace(b"\r\n",b"\n").replace(b"\0",b'').strip().split(b"\n")[-1]
     if b"PING RESULT FAIL" in last_line:
         command.fail("was not able to ping the target container; is flannel working?")
     elif b"PING RESULT SUCCESS" not in last_line:
         command.fail("could not launch container to test flannel properly")
     return True
Ejemplo n.º 6
0
def check_dns_function():
    config = configuration.get_config()

    workers = [node for node in config.nodes if node.kind == "worker"]
    if len(workers) < 1:
        command.fail("expected at least one worker node")
    worker = random.choice(workers)

    print("trying dns functionality test with", worker)

    container_command = "nslookup kubernetes.default.svc.hyades.local 172.28.0.2"
    server_command = ["rkt", "run", "homeworld.mit.edu/debian", "--exec", "/bin/bash", "--", "-c", setup.escape_shell(container_command)]
    results = ssh.check_ssh_output(worker, *server_command)
    last_line = results.replace(b"\r\n",b"\n").replace(b"\0",b'').strip().split(b"\n")[-1]
    if not last_line.endswith(b"Address: 172.28.0.1"):
        command.fail("unexpected last line: %s" % repr(last_line.decode()))

    print("dns-addon seems to work!")
Ejemplo n.º 7
0
def check_aci_pull():
    config = configuration.get_config()
    workers = [node for node in config.nodes if node.kind == "worker"]
    if not workers:
        command.fail("expected nonzero number of worker nodes")
    worker = random.choice(workers)
    print("trying container pulling on: %s" % worker)
    container_command = "ping -c 1 8.8.8.8 && echo 'PING RESULT SUCCESS' || echo 'PING RESULT FAIL'"
    server_command = ["rkt", "run", "--pull-policy=update", "homeworld.mit.edu/debian", "--exec", "/bin/bash", "--", "-c",
                      setup.escape_shell(container_command)]
    results = ssh.check_ssh_output(worker, *server_command)
    last_line = results.replace(b"\r\n",b"\n").replace(b"\0",b'').strip().split(b"\n")[-1]
    if b"PING RESULT FAIL" in last_line:
        if b"PING RESULT SUCCESS" in last_line:
            command.fail("should not have seen both success and failure markers in last line")
        command.fail("cluster network probably not up (could not ping 8.8.8.8)")
    elif b"PING RESULT SUCCESS" not in last_line:
        command.fail("container does not seem to have launched properly; container launches are likely broken (line = %s)" % repr(last_line))
    print("container seems to be launched, with the correct network!")
Ejemplo n.º 8
0
def check_online(server=None):
    config = configuration.get_config()
    if server is None:
        found = config.nodes
        if not found:
            command.fail("no nodes configured")
    else:
        found = [node for node in config.nodes if
                 node.hostname == server or node.ip == server or node.hostname + "." + config.external_domain == server]
        if not found:
            command.fail("could not find server '%s' in setup.yaml" % server)
    any_offline = False
    for node in found:
        try:
            is_online = (ssh.check_ssh_output(node, "echo", "round-trip") == b"round-trip\n")
        except subprocess.CalledProcessError:
            is_online = False
        if not is_online:
            any_offline = True
        print("NODE:", node.hostname.ljust(30), ("[ONLINE]" if is_online else "[OFFLINE]").rjust(10))
    if any_offline:
        command.fail("not all nodes were online!")
    print("All nodes: [ONLINE]")