예제 #1
0
def get_keypath() -> str:
    key = os.getenv(KEY_ENV)
    if key is None:
        command.fail("no key specified in env var $HOMEWORLD_DISASTER")
    if not os.path.isfile(key):
        command.fail("no key found in file specified by $HOMEWORLD_DISASTER")
    return key
예제 #2
0
def download_and_verify_package_list(
        baseurl: str,
        dist: str = "homeworld",
        keyring_resource: str = "homeworld-archive-keyring.gpg"
) -> (str, dict):
    baseurl = baseurl.rstrip("/")
    url = baseurl + "/dists/" + dist

    release = fetch_signed_url(url + "/Release", url + "/Release.gpg",
                               keyring_resource)
    packages_relpath = "main/binary-amd64/Packages"

    kvs = parse_apt_kvs(release.decode())
    if "SHA256" not in kvs:
        command.fail("cannot find section for sha256 hashes")

    hashes_by_path = parse_apt_hash_list(kvs["SHA256"])

    if packages_relpath not in hashes_by_path:
        command.fail("could not find hash for %s" % packages_relpath)

    packages = fetch_url_and_check_hash(url + "/" + packages_relpath,
                                        hashes_by_path[packages_relpath])

    parsed_packages = parse_apt_kv_list(packages.decode(), "Package")

    return baseurl, parsed_packages
예제 #3
0
def populate() -> None:
    setup_yaml = os.path.join(get_project(create_dir_if_missing=True),
                              "setup.yaml")
    if os.path.exists(setup_yaml):
        command.fail("setup.yaml already exists")
    resource.copy_to("setup.yaml", setup_yaml)
    print("filled out setup.yaml")
예제 #4
0
파일: keys.py 프로젝트: sipb/homeworld
def export_keytab(node, keytab_file):
    "decrypt and export the keytab for a particular server"
    keytab_source = os.path.join(configuration.get_project(),
                                 "keytab.%s.crypt" % node)
    if not os.path.exists(keytab_source):
        command.fail("no keytab for node %s" % node)
    keycrypt.gpg_decrypt_file(keytab_source, keytab_file)
예제 #5
0
def generate() -> None:
    authorities = get_targz_path(check_exists=False)
    if os.path.exists(authorities):
        command.fail("authorities.tgz already exists")
    # tempfile.TemporaryDirectory() creates the directory with 0o600, which protects the private keys
    with tempfile.TemporaryDirectory() as d:
        certdir = os.path.join(d, "certdir")
        keyserver_yaml = os.path.join(d, "keyserver.yaml")
        util.writefile(keyserver_yaml, configuration.get_keyserver_yaml().encode())
        os.mkdir(certdir)
        print("generating authorities...")
        try:
            # TODO: avoid having these touch disk
            subprocess.check_call(["keygen", keyserver_yaml, certdir, "supervisor-nodes"])
        except FileNotFoundError as e:
            if e.filename == "keygen":
                command.fail("could not find keygen binary. is the homeworld-keyserver dependency installed?")
            else:
                raise e
        print("encrypting authorities...")
        cryptdir = os.path.join(d, "cryptdir")
        os.mkdir(cryptdir)
        for filename in os.listdir(certdir):
            if filename.endswith(".pub") or filename.endswith(".pem"):
                # public keys; copy over without encryption
                util.copy(os.path.join(certdir, filename), os.path.join(cryptdir, filename))
            else:
                # private keys; encrypt when copying
                keycrypt.gpg_encrypt_file(os.path.join(certdir, filename), os.path.join(cryptdir, filename))
        subprocess.check_call(["shred", "--"] + os.listdir(certdir), cwd=certdir)
        print("packing authorities...")
        subprocess.check_call(["tar", "-C", cryptdir, "-czf", authorities, "."])
        subprocess.check_call(["shred", "--"] + os.listdir(cryptdir), cwd=cryptdir)
예제 #6
0
def setup_keyserver(ops: Operations) -> None:
    config = configuration.get_config()
    for node in config.nodes:
        if node.kind != "supervisor":
            continue
        ops.ssh_mkdir("create directories on @HOST", node, AUTHORITY_DIR,
                      STATICS_DIR, CONFIG_DIR)
        for name, data in authority.iterate_keys_decrypted():
            # TODO: keep these keys in memory
            if "/" in name:
                command.fail("found key in upload list with invalid filename")
            # TODO: avoid keeping these keys in memory for this long
            ops.ssh_upload_bytes("upload authority %s to @HOST" % name, node,
                                 data, os.path.join(AUTHORITY_DIR, name))
        ops.ssh_upload_bytes("upload cluster config to @HOST", node,
                             configuration.get_cluster_conf().encode(),
                             STATICS_DIR + "/cluster.conf")
        ops.ssh_upload_bytes("upload machine list to @HOST", node,
                             configuration.get_machine_list_file().encode(),
                             STATICS_DIR + "/machine.list")
        ops.ssh_upload_bytes("upload keyserver config to @HOST", node,
                             configuration.get_keyserver_yaml().encode(),
                             CONFIG_DIR + "/keyserver.yaml")
        ops.ssh("enable keyserver on @HOST", node, "systemctl", "enable",
                "keyserver.service")
        ops.ssh("start keyserver on @HOST", node, "systemctl", "restart",
                "keyserver.service")
예제 #7
0
 def boot_install(self, bootstrap_token):
     self.create_disk()
     # TODO: do something better than a two-second delay to detect "boot:" prompt
     bootline = ("install netcfg/get_ipaddress=%s homeworld/asktoken=%s\n" %
                 (self.node.ip, bootstrap_token)).encode()
     if self.boot_with_io("install", text=bootline, delay=2.0).wait():
         command.fail("qemu virtual machine failed")
예제 #8
0
파일: verify.py 프로젝트: rsthomp/homeworld
def check_dns_function():
    config = configuration.Config.load_from_project()

    workers = [node for node in config.nodes if node.kind == "worker"]
    if len(workers) < 1:
        command.fail("expected at least one worker node")
    worker = random.choice(workers)

    print("trying dns functionality test with", worker)

    container_command = "nslookup kubernetes.default.svc.hyades.local 172.28.0.2"
    server_command = [
        "rkt", "run", "homeworld.mit.edu/debian", "--exec", "/bin/bash", "--",
        "-c",
        setup.escape_shell(container_command)
    ]
    results = subprocess.check_output([
        "ssh",
        "root@%s.%s" % (worker.hostname, config.external_domain), "--"
    ] + server_command)
    last_line = results.replace(b"\r\n",
                                b"\n").replace(b"\0",
                                               b'').strip().split(b"\n")[-1]
    if not last_line.endswith(b"Address: 172.28.0.1"):
        command.fail("unexpected last line: %s" % repr(last_line.decode()))

    print("dns-addon seems to work!")
예제 #9
0
def access_ssh(no_add_to_agent: bool = False):
    """
    request SSH access to the cluster

    no_add_to_agent: do not add the resulting ssh key to ssh-agent
    """
    keypath = renew_ssh_cert()
    print("===== v CERTIFICATE DETAILS v =====")
    subprocess.check_call(["ssh-keygen", "-L", "-f", keypath + "-cert.pub"])
    print("===== ^ CERTIFICATE DETAILS ^ =====")
    if not no_add_to_agent:
        # TODO: clear old identities
        try:
            ssh_add_output = subprocess.check_output(
                ["ssh-add", "--", keypath], stderr=subprocess.STDOUT)
            # if the user is using gnome, gnome-keyring might
            # masquerade as ssh-agent and provide a zero exit
            # code despite failing to add the certificate
            if b"add failed" in ssh_add_output:
                fail_hint = "do you have an ssh-agent?\n" \
                    "(gnome-keyring does not count)"
                command.fail("*** ssh-add failed! ***", fail_hint)
        except subprocess.CalledProcessError:
            fail_hint = "ssh-add returned non-zero exit code. do you have an ssh-agent?"
            command.fail("*** ssh-add failed! ***", fail_hint)
예제 #10
0
파일: packages.py 프로젝트: sipb/homeworld
def fetch_url_and_check_hash(url: str, sha256hash: str) -> bytes:
    data = fetch_url(url)
    found = util.sha256sum_data(data)
    if found != sha256hash:
        command.fail("wrong hash: expected %s but got %s from url %s" %
                     (sha256hash, found, url))
    return data
예제 #11
0
def get_known_hosts_path() -> str:
    homedir = os.getenv("HOME")
    if homedir is None:
        command.fail(
            "could not determine home directory, so could not find ~/.ssh/known_hosts"
        )
    return os.path.join(homedir, ".ssh", "known_hosts")
예제 #12
0
파일: verify.py 프로젝트: sipb/homeworld
def check_flannel():
    "verify that the flannel addon is functioning"

    config = configuration.get_config()
    node_count = len(
        [node for node in config.nodes if node.kind != "supervisor"])
    expect_prometheus_query_exact(
        'sum(kube_daemonset_status_number_ready{daemonset="kube-flannel-ds"})',
        node_count, "flannel pods are ready")
    expect_prometheus_query_bool(
        "sum(flannel_collect_enum_check)",
        "flannel metrics collector is failing enumeration")
    expect_prometheus_query_bool(
        "sum(flannel_collect_enum_dup_check)",
        "flannel metrics collector is encountering duplication")
    expect_prometheus_query_exact('sum(flannel_collect_check)', node_count,
                                  "flannel metrics monitors are collecting")
    expect_prometheus_query_exact(
        'sum(flannel_duplicate_check)', node_count,
        "flannel metrics monitors are avoiding duplication")
    expect_prometheus_query_exact(
        'sum(flannel_monitor_check)', node_count,
        "flannel metrics monitors are monitoring successfully")
    worst_recency = float(
        pull_prometheus_query('time() - min(flannel_monitor_recency)'))
    if worst_recency > 60:
        command.fail(
            "flannel metrics monitors have not updated recently enough")
    expect_prometheus_query_exact('sum(flannel_talk_check)',
                                  node_count * node_count,
                                  "flannel pings are successful")
    print("flannel seems to work!")
예제 #13
0
def edit() -> None:
    "open $EDITOR (defaults to nano) to edit the project's setup.yaml"
    setup_yaml = os.path.join(get_project(), "setup.yaml")
    if not os.path.exists(setup_yaml):
        command.fail(
            "setup.yaml does not exist (run spire config populate first?)")
    subprocess.check_call([get_editor(), "--", setup_yaml])
예제 #14
0
파일: verify.py 프로젝트: sipb/homeworld
def expect_prometheus_query_exact(query, expected,
                                  description):  # description -> 'X are Y'
    count = int(pull_prometheus_query(query))
    if count > expected:
        command.fail("too many %s" % description)
    if count < expected:
        command.fail("only %d/%d %s" % (count, expected, description))
예제 #15
0
 def listen():
     try:
         container_command = "ip -o addr show dev eth0 to 172.18/16 primary && sleep 15"
         server_command = ["rkt", "run", "--net=rkt.kubernetes.io", "homeworld.mit.edu/debian", "--", "-c", setup.escape_shell(container_command)]
         cmd = ssh.build_ssh(worker_listener, *server_command)
         with subprocess.Popen(cmd, stdout=subprocess.PIPE, bufsize=1, universal_newlines=True) as process:
             stdout = process.stdout.readline()
             if "scope" not in stdout:
                 command.fail("could not find scope line in ip addr output (%s)" % repr(stdout))
             parts = stdout.split(" ")
             if "inet" not in parts:
                 command.fail("could not find inet address in ip addr output")
             address = parts[parts.index("inet") + 1]
             if not address.endswith("/24"):
                 command.fail("expected address that ended in /24, not '%s'" % address)
             address = address[:-3]
             if address.count(".") != 3:
                 command.fail("expected valid IPv4 address, not '%s'" % address)
             if not address.replace(".", "").isdigit():
                 command.fail("expected valid IPv4 address, not '%s'" % address)
             found_address[0] = address
             event.set()
             process.communicate(timeout=20)
     finally:
         event.set()
     return True
예제 #16
0
파일: verify.py 프로젝트: rowhit/homeworld
def check_kube_health():
    check_kube_init()
    config = configuration.get_config()
    kube_node_count = len(
        [node for node in config.nodes if node.kind != "supervisor"])
    master_node_count = len(
        [node for node in config.nodes if node.kind == "master"])
    expect_prometheus_query_exact('sum(kube_node_info)', kube_node_count,
                                  "kubernetes nodes are online")

    hostnames = [
        node.hostname for node in config.nodes if node.kind == "master"
    ]
    regex_for_master_nodes = "|".join(hostnames)
    for hostname in hostnames:
        if not hostname.replace("-", "").isalnum():
            command.fail(
                "invalid hostname for inclusion in prometheus monitoring rules: %s"
                % hostname)
    expect_prometheus_query_exact(
        'sum(kube_node_spec_unschedulable{node=~"%s"})' %
        regex_for_master_nodes, master_node_count,
        "master nodes are unschedulable")
    expect_prometheus_query_exact('sum(kube_node_spec_unschedulable)',
                                  master_node_count,
                                  "kubernetes nodes are unschedulable")
    expect_prometheus_query_exact(
        'sum(kube_node_status_condition{condition="Ready",status="true"})',
        kube_node_count, "kubernetes nodes are ready")
    NAMESPACES = ["default", "kube-public", "kube-system"]
    expect_prometheus_query_exact(
        'sum(kube_namespace_status_phase{phase="Active",namespace=~"%s"})' %
        "|".join(NAMESPACES), len(NAMESPACES), "namespaces are set up")
    print("kubernetes cluster passed cursory inspection!")
예제 #17
0
파일: verify.py 프로젝트: sipb/homeworld
def check_user_grant():
    "verify that user-grant and its kubeconfigs work"
    config = configuration.get_config()

    # because we don't yet have load balancing, we have to somehow get *inside the cluster* to test this.
    # that means figuring out the IP address for the user-grant service, uploading the local user cert to the master
    # node, and then authenticating to user-grant via curl on the master node. bluh.
    # TODO: once load balancing is ready, make this whole thing much simpler

    # we use a master node so we're confident we aren't connecting to the node where user-grant is hosted. there's
    # nothing about this that otherwise requires it; usually we'd choose a worker node to avoid running unnecessary code
    # on the master nodes, but this is entirely for testing in non-production clusters, so it doesn't matter.
    proxy_node = config.get_any_node("master")

    service_ip = get_service_ip("user-grant")
    user_key, user_cert = authority.get_local_grant_user_paths()
    remote_key, remote_cert = "/etc/homeworld/testing/usergrant.key", "/etc/homeworld/testing/usergrant.pem"
    ssh.check_ssh(proxy_node, "rm", "-f", remote_key, remote_cert)
    ssh.check_ssh(proxy_node, "mkdir", "-p", "/etc/homeworld/testing")
    ssh.check_scp_up(proxy_node, user_key, remote_key)
    ssh.check_scp_up(proxy_node, user_cert, remote_cert)

    setup.modify_temporary_dns(proxy_node,
                               {config.user_grant_domain: service_ip})
    try:
        kubeconfig = ssh.check_ssh_output(
            proxy_node, "curl", "--key", remote_key, "--cert", remote_cert,
            "https://%s/" % config.user_grant_domain).decode()
    finally:
        setup.modify_temporary_dns(proxy_node, {})

    magic_phrase = "it allows authenticating to the Hyades cluster as you"
    if magic_phrase not in kubeconfig:
        command.fail(
            "invalid kubeconfig: did not see phrase " + repr(magic_phrase),
            "kubeconfig received read as follows: " + repr(kubeconfig))

    print("successfully retrieved kubeconfig from user-grant!")

    # at this point, we have a kubeconfig generated by user-grant, and now we want to confirm that it works.
    # we'll confirm that the kubeconfig works by checking that the auto-created rolebinding passes the sniff test.

    with tempfile.TemporaryDirectory() as workdir:
        kubeconfig_path = os.path.join(workdir, "granted-kubeconfig")
        util.writefile(kubeconfig_path, kubeconfig.encode())

        rolebinding = json.loads(
            subprocess.check_output([
                "hyperkube", "kubectl", "--kubeconfig", kubeconfig_path, "-o",
                "json", "get", "rolebindings",
                "auto-grant-" + authority.UPSTREAM_USER_NAME
            ]).decode())

        if rolebinding.get("roleRef", {}).get("name") != "admin":
            command.fail("rolebinding for user was not admin in %s" %
                         repr(rolebinding))

    print("autogenerated rolebinding for user",
          repr(authority.UPSTREAM_USER_NAME), "passed basic check!")
예제 #18
0
파일: verify.py 프로젝트: sipb/homeworld
def check_keystatics():
    cluster_conf = query.get_keyurl_data("/static/cluster.conf")
    expected_cluster_conf = configuration.get_cluster_conf()

    if not compare_multiline(cluster_conf, expected_cluster_conf):
        command.fail("MISMATCH: cluster.conf")

    print("pass: keyserver serving correct static files")
예제 #19
0
파일: keys.py 프로젝트: sipb/homeworld
def import_keytab(node, keytab_file):
    "import and encrypt a keytab for a particular server"

    if not configuration.get_config().has_node(node):
        command.fail("no such node: %s" % node)
    keytab_target = os.path.join(configuration.get_project(),
                                 "keytab.%s.crypt" % node)
    keycrypt.gpg_encrypt_file(keytab_file, keytab_target)
예제 #20
0
def get_project() -> str:
    project_dir = os.getenv("HOMEWORLD_DIR")
    if project_dir is None:
        command.fail("no HOMEWORLD_DIR environment variable declared")
    if not os.path.isdir(project_dir):
        command.fail("HOMEWORLD_DIR (%s) is not a directory that exists" %
                     project_dir)
    return project_dir
예제 #21
0
def hosts_up(hosts):
    for host, ip in hosts.items():
        if "\t" in host:
            command.fail("expected no tabs in hostname %s" % repr(host))
        assert "\t" not in str(ip)
    sudo_append_to_file(
        "/etc/hosts",
        ["%s\t%s" % (ip, hostname) for hostname, ip in hosts.items()])
예제 #22
0
def populate() -> None:
    "initialize the cluster's setup.yaml with the template"
    setup_yaml = os.path.join(get_project(create_dir_if_missing=True),
                              "setup.yaml")
    if os.path.exists(setup_yaml):
        command.fail("setup.yaml already exists")
    resource.extract("//spire/resources:setup.yaml", setup_yaml)
    print("filled out setup.yaml")
예제 #23
0
def get_apiserver_default() -> str:
    # TODO: this should be eliminated, because nothing should be specific to this one apiserver
    config = Config.load_from_project()
    apiservers = [node for node in config.nodes if node.kind == "master"]
    if not apiservers:
        command.fail(
            "no apiserver to select, because no master nodes were configured")
    return "https://%s:443" % apiservers[0].ip
예제 #24
0
파일: verify.py 프로젝트: sipb/homeworld
def get_service_ip(service_name: str) -> str:
    clusterIP = access.call_kubectl([
        "get", "service", "-o=jsonpath={.spec.clusterIP}", "--", service_name
    ],
                                    return_result=True).decode().strip()
    if clusterIP.count(".") != 3 or not clusterIP.replace(".", "").isdigit():
        command.fail("invalid clusterIP for %s service: %s" %
                     (service_name, repr(clusterIP)))
    return clusterIP
예제 #25
0
def ssh_foreach(ops: setup.Operations, node_kind: str, *params: str):
    config = configuration.get_config()
    valid_node_kinds = configuration.Node.VALID_NODE_KINDS
    if not (node_kind == "node" or node_kind in valid_node_kinds):
        command.fail("usage: spire foreach {node," +
                     ",".join(valid_node_kinds) + "} command")
    for node in config.nodes:
        if node_kind == "node" or node.kind == node_kind:
            ops.ssh("run command on @HOST", node, *params)
예제 #26
0
파일: keys.py 프로젝트: rsthomp/homeworld
def export_https(name, keyout, certout):
    if name != setup.REGISTRY_HOSTNAME:
        command.fail("unexpected https host: %s" % name)
    keypath = os.path.join(configuration.get_project(),
                           "https.%s.key.crypt" % name)
    certpath = os.path.join(configuration.get_project(), "https.%s.pem" % name)

    keycrypt.gpg_decrypt_file(keypath, keyout)
    util.copy(certpath, certout)
예제 #27
0
파일: packages.py 프로젝트: sipb/homeworld
def fetch_signed_url(url: str, signature_url: str) -> bytes:
    signature = fetch_url(signature_url)
    data = fetch_url(url)

    keyring = resource.get("//upload:keyring.gpg")
    if not verify_gpg_signature(data, signature, keyring):
        command.fail("signature verification FAILED on %s!" % url)

    return data
예제 #28
0
파일: keys.py 프로젝트: rsthomp/homeworld
def check_pem_type(filepath, expect):
    with open(filepath, "r") as f:
        first_line = f.readline()
    if not first_line.startswith(
            "-----BEGIN ") or not first_line.rstrip().endswith("-----"):
        command.fail("not a PEM file: %s" % filepath)
    pem_header_type = first_line.rstrip()[len("-----BEGIN "):-len("-----")]
    if pem_header_type != expect:
        command.fail("incorrect PEM header: expected %s, not %s" %
                     (expect, pem_header_type))
예제 #29
0
파일: packages.py 프로젝트: sipb/homeworld
def parse_apt_hash_list(section):
    hashes_by_path = {}

    for line in section.split("\n"):
        if line.count(" ") != 2:
            command.fail("found incorrectly formatted sha256 section")
        hashed, _, path = line.split(" ")
        hashes_by_path[path] = hashed

    return hashes_by_path
예제 #30
0
def get_project(create_dir_if_missing=False) -> str:
    project_dir = os.getenv("HOMEWORLD_DIR")
    if project_dir is None:
        command.fail("no HOMEWORLD_DIR environment variable declared")
    if not os.path.isdir(project_dir):
        if create_dir_if_missing:
            os.mkdir(project_dir)
        else:
            command.fail("HOMEWORLD_DIR (%s) is not a directory that exists" %
                         project_dir)
    return project_dir