def admit(server_principal: str) -> str: config = configuration.get_config() principal_hostname = config.get_fqdn(server_principal) errs = [] try: if config.is_kerberos_enabled(): return access.call_keyreq("bootstrap-token", principal_hostname).decode().strip() except Exception as e: print('[keyreq failed, set SPIRE_DEBUG for traceback]') if os.environ.get('SPIRE_DEBUG'): traceback.print_exc() errs.append(e) try: return ssh.check_ssh_output(config.keyserver, "keyinitadmit", principal_hostname).decode().strip() except Exception as e: print('[keyinitadmit failed, set SPIRE_DEBUG for traceback]') if os.environ.get('SPIRE_DEBUG'): traceback.print_exc() errs.append(e) if len(errs) > 1: raise command.MultipleExceptions('admit failed', errs) raise Exception('admit failed') from errs[0]
def check_user_grant(): "verify that user-grant and its kubeconfigs work" config = configuration.get_config() # because we don't yet have load balancing, we have to somehow get *inside the cluster* to test this. # that means figuring out the IP address for the user-grant service, uploading the local user cert to the master # node, and then authenticating to user-grant via curl on the master node. bluh. # TODO: once load balancing is ready, make this whole thing much simpler # we use a master node so we're confident we aren't connecting to the node where user-grant is hosted. there's # nothing about this that otherwise requires it; usually we'd choose a worker node to avoid running unnecessary code # on the master nodes, but this is entirely for testing in non-production clusters, so it doesn't matter. proxy_node = config.get_any_node("master") service_ip = get_service_ip("user-grant") user_key, user_cert = authority.get_local_grant_user_paths() remote_key, remote_cert = "/etc/homeworld/testing/usergrant.key", "/etc/homeworld/testing/usergrant.pem" ssh.check_ssh(proxy_node, "rm", "-f", remote_key, remote_cert) ssh.check_ssh(proxy_node, "mkdir", "-p", "/etc/homeworld/testing") ssh.check_scp_up(proxy_node, user_key, remote_key) ssh.check_scp_up(proxy_node, user_cert, remote_cert) setup.modify_temporary_dns(proxy_node, {config.user_grant_domain: service_ip}) try: kubeconfig = ssh.check_ssh_output( proxy_node, "curl", "--key", remote_key, "--cert", remote_cert, "https://%s/" % config.user_grant_domain).decode() finally: setup.modify_temporary_dns(proxy_node, {}) magic_phrase = "it allows authenticating to the Hyades cluster as you" if magic_phrase not in kubeconfig: command.fail( "invalid kubeconfig: did not see phrase " + repr(magic_phrase), "kubeconfig received read as follows: " + repr(kubeconfig)) print("successfully retrieved kubeconfig from user-grant!") # at this point, we have a kubeconfig generated by user-grant, and now we want to confirm that it works. # we'll confirm that the kubeconfig works by checking that the auto-created rolebinding passes the sniff test. with tempfile.TemporaryDirectory() as workdir: kubeconfig_path = os.path.join(workdir, "granted-kubeconfig") util.writefile(kubeconfig_path, kubeconfig.encode()) rolebinding = json.loads( subprocess.check_output([ "hyperkube", "kubectl", "--kubeconfig", kubeconfig_path, "-o", "json", "get", "rolebindings", "auto-grant-" + authority.UPSTREAM_USER_NAME ]).decode()) if rolebinding.get("roleRef", {}).get("name") != "admin": command.fail("rolebinding for user was not admin in %s" % repr(rolebinding)) print("autogenerated rolebinding for user", repr(authority.UPSTREAM_USER_NAME), "passed basic check!")
def admit(server_principal: str) -> str: config = configuration.get_config() principal_hostname = config.get_fqdn(server_principal) if config.is_kerberos_enabled(): return access.call_keyreq("bootstrap-token", principal_hostname).decode().strip() else: keyserver_hostname = config.keyserver.hostname + "." + config.external_domain return ssh.check_ssh_output(config.keyserver, "keyinitadmit", setup.CONFIG_DIR + "/keyserver.yaml", keyserver_hostname, principal_hostname, "bootstrap-keyinit").decode().strip()
def safe_upload_keytab(node=node): if not overwrite_keytab: try: existing_keytab = ssh.check_ssh_output(node, "cat", KEYTAB_PATH) except subprocess.CalledProcessError as e_test: # if there is no existing keytab, cat will fail with error code 1 if e_test.returncode != 1: command.fail(e_test) print("no existing keytab found, uploading local keytab") else: if existing_keytab != decrypted: command.fail("existing keytab does not match local keytab") return # existing keytab matches local keytab, no action required ssh.upload_bytes(node, decrypted, KEYTAB_PATH)
def talk(): if not event.wait(25): command.fail("timed out while waiting for IPv4 address of listener") address = found_address[0] if address is None: command.fail("no address was specified by listener") container_command = "ping -c 1 %s && echo 'PING RESULT SUCCESS' || echo 'PING RESULT FAIL'" % address server_command = ["rkt", "run", "--net=rkt.kubernetes.io", "homeworld.mit.edu/debian", "--exec", "/bin/bash", "--", "-c", setup.escape_shell(container_command)] results = ssh.check_ssh_output(worker_talker, *server_command) last_line = results.replace(b"\r\n",b"\n").replace(b"\0",b'').strip().split(b"\n")[-1] if b"PING RESULT FAIL" in last_line: command.fail("was not able to ping the target container; is flannel working?") elif b"PING RESULT SUCCESS" not in last_line: command.fail("could not launch container to test flannel properly") return True
def check_dns_function(): config = configuration.get_config() workers = [node for node in config.nodes if node.kind == "worker"] if len(workers) < 1: command.fail("expected at least one worker node") worker = random.choice(workers) print("trying dns functionality test with", worker) container_command = "nslookup kubernetes.default.svc.hyades.local 172.28.0.2" server_command = ["rkt", "run", "homeworld.mit.edu/debian", "--exec", "/bin/bash", "--", "-c", setup.escape_shell(container_command)] results = ssh.check_ssh_output(worker, *server_command) last_line = results.replace(b"\r\n",b"\n").replace(b"\0",b'').strip().split(b"\n")[-1] if not last_line.endswith(b"Address: 172.28.0.1"): command.fail("unexpected last line: %s" % repr(last_line.decode())) print("dns-addon seems to work!")
def check_aci_pull(): config = configuration.get_config() workers = [node for node in config.nodes if node.kind == "worker"] if not workers: command.fail("expected nonzero number of worker nodes") worker = random.choice(workers) print("trying container pulling on: %s" % worker) container_command = "ping -c 1 8.8.8.8 && echo 'PING RESULT SUCCESS' || echo 'PING RESULT FAIL'" server_command = ["rkt", "run", "--pull-policy=update", "homeworld.mit.edu/debian", "--exec", "/bin/bash", "--", "-c", setup.escape_shell(container_command)] results = ssh.check_ssh_output(worker, *server_command) last_line = results.replace(b"\r\n",b"\n").replace(b"\0",b'').strip().split(b"\n")[-1] if b"PING RESULT FAIL" in last_line: if b"PING RESULT SUCCESS" in last_line: command.fail("should not have seen both success and failure markers in last line") command.fail("cluster network probably not up (could not ping 8.8.8.8)") elif b"PING RESULT SUCCESS" not in last_line: command.fail("container does not seem to have launched properly; container launches are likely broken (line = %s)" % repr(last_line)) print("container seems to be launched, with the correct network!")
def check_online(server=None): config = configuration.get_config() if server is None: found = config.nodes if not found: command.fail("no nodes configured") else: found = [node for node in config.nodes if node.hostname == server or node.ip == server or node.hostname + "." + config.external_domain == server] if not found: command.fail("could not find server '%s' in setup.yaml" % server) any_offline = False for node in found: try: is_online = (ssh.check_ssh_output(node, "echo", "round-trip") == b"round-trip\n") except subprocess.CalledProcessError: is_online = False if not is_online: any_offline = True print("NODE:", node.hostname.ljust(30), ("[ONLINE]" if is_online else "[OFFLINE]").rjust(10)) if any_offline: command.fail("not all nodes were online!") print("All nodes: [ONLINE]")