Ejemplo n.º 1
0
def remote_config_update(config, args, check_module=False):
    '''
    client end(infra/NFS node) config file update
    ./ctl.py -s svc configupdate restfulapi
    ./ctl.py [-r storage_machine1 [-r storage_machine2]] -s svc configupdate storage_manager
    by default sudo
    '''
    if check_module:
        assert set(args.nargs[1:]) - set([
            "restfulapi", "storagemanager", "repairmanager", "dashboard"
        ]) == set(), "not supported"
    # need to get node list for this subcommand of svc, so load status.yaml
    if not os.path.exists(FILE_MAP_PATH):
        utils.render_template("template/cloud-config/file_map.yaml",
                              FILE_MAP_PATH, config)
    with open(FILE_MAP_PATH) as f:
        file_map = yaml.load(f)
    for module in args.nargs[1:]:
        if module == "jobmanager":
            module = "restfulapi"
        if module in ["restfulapi", "dashboard", "repairmanager"]:
            render_func = eval("render_{}".format(module))
            render_func(config)
            infra_nodes, _ = load_node_list_by_role_from_config(
                config, ["infra"], False)
            for file_pair in file_map[module]:
                src_dst_list = [file_pair["src"], file_pair["dst"]]
                execute_in_parallel(config,
                                    infra_nodes,
                                    src_dst_list,
                                    True,
                                    copy2_wrapper,
                                    noSupressWarning=args.verbose)
        elif module == "storagemanager":
            nfs_nodes, _ = load_node_list_by_role_from_config(
                config, ["nfs"], False)
            for node in nfs_nodes:
                config["storage_manager"] = config["machines"][node][
                    "storage_manager"]
                render_storagemanager(config, node)
                src_dst_list = [
                    "./deploy/StorageManager/{}_storage_manager.yaml".format(
                        node), "/etc/StorageManager/config.yaml"
                ]
                args_list = (config["machines"][node]["fqdns"],
                             config["ssh_cert"], config["admin_username"],
                             src_dst_list, True, args.verbose)
                copy2_wrapper(args_list)
Ejemplo n.º 2
0
def dynamically_add_or_delete_around_a_num(config, args):
    # need some time for the newly added worker to register
    monitor_again_after = config.get("monitor_again_after", 10)
    while True:
        # TODO currently don't keep history of operation here. or name the bash by time?
        os.system("rm -f {}".format(args.output))
        config = load_config_based_on_command("dynamic_around")
        dynamic_worker_num = config.get("dynamic_worker_num", -1)
        if dynamic_worker_num < 0:
            print(
                "This round would be skipped. Please specify dynamic_worker_num in config."
            )
            os.system("sleep {}m".format(monitor_again_after))
            continue
        query_cmds = "get nodes -l worker=active --no-headers | awk '{print $1}'"
        k8s_worker_nodes = get_k8s_node_list_under_condition(
            config, args, query_cmds)
        worker_in_records, config = load_node_list_by_role_from_config(
            config, ["worker"], False)
        print("worker in records:\n", worker_in_records)
        print(
            "Dynamically scaling number of workers:\n {}/{} worker nodes registered in k8s, targeting {}"
            .format(len(k8s_worker_nodes), len(worker_in_records),
                    dynamic_worker_num))
        delta = dynamic_worker_num - len(worker_in_records)
        if delta > 0:
            add_n_machines(config, args, delta)
        elif delta < 0:
            delete_specified_or_cordoned_idling_nodes(config, args, -delta)
        os.system("sleep {}m".format(monitor_again_after))
Ejemplo n.º 3
0
def create_nfs_nsg(config, args):
    nfs_nsg_name = config["azure_cluster"]["nfs_nsg_name"]
    resource_group = config["azure_cluster"]["resource_group"]
    nfs_ports = config["cloud_config_nsg_rules"]["nfs_ports"]
    nfs_nodes, config = load_node_list_by_role_from_config(config, ["nfs"])
    infra_nodes, config = load_node_list_by_role_from_config(config, ["infra"])
    if len(set(nfs_nodes) - set(infra_nodes)):
        cmd = """az network nsg create --resource-group {} --name {}""".format(
            resource_group, nfs_nsg_name)
        execute_or_dump_locally(cmd, args.verbose, args.dryrun, args.output)
    priority = 1700
    # set nsg rules for devs, (and samba, since samba machines are all in corpnet)
    for tag in config["cloud_config_nsg_rules"]["service_tags"]:
        create_nsg_rule(resource_group, nfs_nsg_name, priority,
                        "NFS-Allow-Dev-{}".format(tag), nfs_ports, tag, args)
        priority += 1
Ejemplo n.º 4
0
def connect_to_machine(config, args):
    if args.nargs[0] in config['allroles']:
        target_role = args.nargs[0]
        index = int(args.nargs[1]) if len(args.nargs) > 1 else 0
        nodes, _ = load_node_list_by_role_from_config(config, [target_role])
        node = nodes[index]
    else:
        node = args.nargs[0]
        assert node in config["machines"]
    utils.SSH_connect(config["ssh_cert"],
                      config["machines"][node]["admin_username"],
                      config["machines"][node]["fqdns"])
Ejemplo n.º 5
0
def get_multiple_machines(config, args):
    valid_roles = set(config['allroles']) & set(args.roles_or_machine)
    valid_machine_names = set(config['machines']) & set(args.roles_or_machine)
    invalid_rom = set(args.roles_or_machine) - \
        valid_roles - valid_machine_names
    if invalid_rom:
        print(
            "Warning: invalid roles/machine names detected, the following names \\\
            are neither valid role names nor machines in our cluster: " +
            ",".join(list(invalid_rom)))
    nodes, _ = load_node_list_by_role_from_config(config, list(valid_roles),
                                                  False)
    return nodes + list(valid_machine_names)
Ejemplo n.º 6
0
def run_kubectl(config, args, commands, need_output=False, dump_to_file=''):
    if not os.path.exists("./deploy/bin/kubectl"):
        print(
            "please make sure ./deploy/bin/kubectl exists. One way is to use ./ctl.py download"
        )
        exit(-1)
    one_command = " ".join(commands)
    nodes, _ = load_node_list_by_role_from_config(config, ["infra"], False)
    master_node = random.choice(nodes)
    kube_command = "./deploy/bin/kubectl --server=https://{}:{} --certificate-authority={} --client-key={} --client-certificate={} {}".format(
        config["machines"][master_node]["fqdns"], config["k8sAPIport"],
        "./deploy/ssl/ca/ca.pem", "./deploy/ssl/kubelet/apiserver-key.pem",
        "./deploy/ssl/kubelet/apiserver.pem", one_command)
    if need_output:
        # we may want to dump command to another file instead of args.output, when we don't want to mix k8s commands with others
        output = utils.execute_or_dump_locally(kube_command, args.verbose,
                                               args.dryrun, dump_to_file)
        if not args.verbose:
            print(output)
        return output
    else:
        os.system(kube_command)