def check_user_grant(): "verify that user-grant and its kubeconfigs work" config = configuration.get_config() # because we don't yet have load balancing, we have to somehow get *inside the cluster* to test this. # that means figuring out the IP address for the user-grant service, uploading the local user cert to the master # node, and then authenticating to user-grant via curl on the master node. bluh. # TODO: once load balancing is ready, make this whole thing much simpler # we use a master node so we're confident we aren't connecting to the node where user-grant is hosted. there's # nothing about this that otherwise requires it; usually we'd choose a worker node to avoid running unnecessary code # on the master nodes, but this is entirely for testing in non-production clusters, so it doesn't matter. proxy_node = config.get_any_node("master") service_ip = get_service_ip("user-grant") user_key, user_cert = authority.get_local_grant_user_paths() remote_key, remote_cert = "/etc/homeworld/testing/usergrant.key", "/etc/homeworld/testing/usergrant.pem" ssh.check_ssh(proxy_node, "rm", "-f", remote_key, remote_cert) ssh.check_ssh(proxy_node, "mkdir", "-p", "/etc/homeworld/testing") ssh.check_scp_up(proxy_node, user_key, remote_key) ssh.check_scp_up(proxy_node, user_cert, remote_cert) setup.modify_temporary_dns(proxy_node, {config.user_grant_domain: service_ip}) try: kubeconfig = ssh.check_ssh_output( proxy_node, "curl", "--key", remote_key, "--cert", remote_cert, "https://%s/" % config.user_grant_domain).decode() finally: setup.modify_temporary_dns(proxy_node, {}) magic_phrase = "it allows authenticating to the Hyades cluster as you" if magic_phrase not in kubeconfig: command.fail( "invalid kubeconfig: did not see phrase " + repr(magic_phrase), "kubeconfig received read as follows: " + repr(kubeconfig)) print("successfully retrieved kubeconfig from user-grant!") # at this point, we have a kubeconfig generated by user-grant, and now we want to confirm that it works. # we'll confirm that the kubeconfig works by checking that the auto-created rolebinding passes the sniff test. with tempfile.TemporaryDirectory() as workdir: kubeconfig_path = os.path.join(workdir, "granted-kubeconfig") util.writefile(kubeconfig_path, kubeconfig.encode()) rolebinding = json.loads( subprocess.check_output([ "hyperkube", "kubectl", "--kubeconfig", kubeconfig_path, "-o", "json", "get", "rolebindings", "auto-grant-" + authority.UPSTREAM_USER_NAME ]).decode()) if rolebinding.get("roleRef", {}).get("name") != "admin": command.fail("rolebinding for user was not admin in %s" % repr(rolebinding)) print("autogenerated rolebinding for user", repr(authority.UPSTREAM_USER_NAME), "passed basic check!")
def check_certs_on_supervisor(): config = configuration.get_config() for node in config.nodes: if node.kind == "supervisor": ssh.check_ssh(node, "test", "-e", "/etc/homeworld/authorities/kubernetes.pem") ssh.check_ssh(node, "test", "-e", "/etc/homeworld/keys/kubernetes-worker.pem")
def launch_spec(spec_name): config = configuration.get_config() spec = configuration.get_single_kube_spec(spec_name).encode() for node in config.nodes: if node.kind == "supervisor": ssh.check_ssh(node, "mkdir", "-p", DEPLOYQUEUE) ssh.upload_bytes( node, spec, "%s/%d.%s" % (DEPLOYQUEUE, int(time.time()), spec_name)) print("Uploaded spec to deployqueue.")
def launch_spec(path, extra_kvs: dict = None, export=False): config = configuration.get_config() spec = configuration.get_single_kube_spec(path, extra_kvs).encode() assert path[:2] == '//' yamlname = path[2:].replace(":", "-") if export: util.writefile(yamlname, spec) else: for node in config.nodes: if node.kind == "supervisor": ssh.check_ssh(node, "mkdir", "-p", DEPLOYQUEUE) ssh.upload_bytes( node, spec, "%s/%f.%s" % (DEPLOYQUEUE, time.time(), yamlname)) print("Uploaded spec to deployqueue.")
def ssh_raw(self, name: str, node: configuration.Node, script: str, in_directory: str=None, redirect_to: str=None)\ -> None: if redirect_to: script = "(%s) >%s" % (script, escape_shell(redirect_to)) if in_directory: script = "cd %s && %s" % (escape_shell(in_directory), script) self.add_operation(name, lambda: ssh.check_ssh(node, script), node=node)
def ssh_raw(ops, name: str, node: configuration.Node, script: str, in_directory: str=None, redirect_to: str=None)\ -> None: if redirect_to: script = "(%s) >%s" % (script, escape_shell(redirect_to)) if in_directory: script = "cd %s && %s" % (escape_shell(in_directory), script) ops.add_operation(name.replace('@HOST', node.hostname), lambda: ssh.check_ssh(node, script))
def check_certs_on_supervisor(): "verify that certificates have been uploaded to the supervisor" config = configuration.get_config() for node in config.nodes: if node.kind == "supervisor": ssh.check_ssh(node, "test", "-e", "/etc/homeworld/authorities/kubernetes.pem") ssh.check_ssh(node, "test", "-e", "/etc/homeworld/keys/kubernetes-supervisor.pem") ssh.check_ssh(node, "test", "-e", "/etc/homeworld/ssl/homeworld.private.pem")
def check_flannel_function(): # checking flannel functionality config = configuration.get_config() workers = [node for node in config.nodes if node.kind == "worker"] if len(workers) < 2: command.fail("expected at least two worker nodes") worker_talker = random.choice(workers) workers.remove(worker_talker) worker_listener = random.choice(workers) assert worker_talker != worker_listener print("trying flannel functionality test with", worker_talker, "talking and", worker_listener, "listening") print("checking launch on both systems...") # this is here to make sure both servers have pulled the relevant containers server_command = ["rkt", "run", "--net=rkt.kubernetes.io", "homeworld.mit.edu/debian", "--", "-c", "/bin/true"] for worker in (worker_talker, worker_listener): ssh.check_ssh(worker, *server_command) print("ready -- this may take a minute... please be patient") found_address = [None] event = threading.Event() def listen(): try: container_command = "ip -o addr show dev eth0 to 172.18/16 primary && sleep 15" server_command = ["rkt", "run", "--net=rkt.kubernetes.io", "homeworld.mit.edu/debian", "--", "-c", setup.escape_shell(container_command)] cmd = ssh.build_ssh(worker_listener, *server_command) with subprocess.Popen(cmd, stdout=subprocess.PIPE, bufsize=1, universal_newlines=True) as process: stdout = process.stdout.readline() if "scope" not in stdout: command.fail("could not find scope line in ip addr output (%s)" % repr(stdout)) parts = stdout.split(" ") if "inet" not in parts: command.fail("could not find inet address in ip addr output") address = parts[parts.index("inet") + 1] if not address.endswith("/24"): command.fail("expected address that ended in /24, not '%s'" % address) address = address[:-3] if address.count(".") != 3: command.fail("expected valid IPv4 address, not '%s'" % address) if not address.replace(".", "").isdigit(): command.fail("expected valid IPv4 address, not '%s'" % address) found_address[0] = address event.set() process.communicate(timeout=20) finally: event.set() return True def talk(): if not event.wait(25): command.fail("timed out while waiting for IPv4 address of listener") address = found_address[0] if address is None: command.fail("no address was specified by listener") container_command = "ping -c 1 %s && echo 'PING RESULT SUCCESS' || echo 'PING RESULT FAIL'" % address server_command = ["rkt", "run", "--net=rkt.kubernetes.io", "homeworld.mit.edu/debian", "--exec", "/bin/bash", "--", "-c", setup.escape_shell(container_command)] results = ssh.check_ssh_output(worker_talker, *server_command) last_line = results.replace(b"\r\n",b"\n").replace(b"\0",b'').strip().split(b"\n")[-1] if b"PING RESULT FAIL" in last_line: command.fail("was not able to ping the target container; is flannel working?") elif b"PING RESULT SUCCESS" not in last_line: command.fail("could not launch container to test flannel properly") return True passed_1, passed_2 = parallel.parallel(listen, talk) assert passed_1 is True and passed_2 is True, "should have been checked by parallel already!" print("flannel seems to work!")
def check_supervisor_accessible(insecure: bool = False): "check whether the supervisor node is accessible over ssh" config = configuration.get_config() ssh.check_ssh(config.keyserver, "true", insecure=insecure)
def modify_temporary_dns(node: configuration.Node, additional: dict) -> None: ssh.check_ssh(node, "grep -vF AUTO-TEMP-DNS /etc/hosts >/etc/hosts.new && mv /etc/hosts.new /etc/hosts") for hostname, ip in additional.items(): new_hosts_line = "%s\t%s # AUTO-TEMP-DNS" % (ip, hostname) ssh.check_ssh(node, "echo %s >>/etc/hosts" % escape_shell(new_hosts_line))