예제 #1
0
def worker_setup(cluster: structures.cluster.Cluster) -> None:

    with ssh.SSHClusterCommandRunner(cluster=cluster,
                                     parallel=True).in_workers() as run:

        for init_cmd in cluster.worker_setup_commands:
            _ = run(cmd=init_cmd, print_output=True)
예제 #2
0
def check_clean_docker_cluster(cluster: structures.cluster.Cluster) -> None:

    # Name of the docker containers
    cname = cluster.docker.container_name if cluster.docker.container_name != "" \
        else cluster.cluster_name

    with ssh.SSHClusterCommandRunner(cluster=cluster,
                                     parallel=True).in_cluster() as run:

        ps = f"docker ps -f 'name={cname}' --format '{{.Names}}'"
        results = run(cmd=ps)

        hosts_with_existing_cname = []

        for _, result in results.items():

            result: fabric.Result
            connection: fabric.Connection = result.connection

            if result.stdout.strip() != "":
                hosts_with_existing_cname.append(connection.host)

        if len(hosts_with_existing_cname) != 0:
            raise RuntimeError(
                f"Container '{cname}' found already running in the following hosts: "
                f"[{', '.join(hosts_with_existing_cname)}]. Clean the cluster before "
                f"continuing.")
예제 #3
0
def initialize(cluster: structures.cluster.Cluster) -> None:

    with ssh.SSHClusterCommandRunner(cluster=cluster,
                                     parallel=True).in_cluster() as run:

        for init_cmd in cluster.initialization_commands:
            _ = run(cmd=init_cmd, print_output=True)
예제 #4
0
def head_start_ray(cluster: structures.cluster.Cluster) -> None:

    with ssh.SSHClusterCommandRunner(cluster=cluster,
                                     parallel=True).in_head() as run:

        for init_cmd in cluster.head_start_ray_commands:
            _ = run(cmd=init_cmd, print_output=True)
예제 #5
0
def stop_cluster_with_docker(cluster: structures.cluster.Cluster,
                             session: str) -> None:

    if session is None:
        filter = "docker ps -q -f label=clusterize"
    else:
        filter = f"docker ps -q -f label=clusterize.session={session}"

    with ssh.SSHClusterCommandRunner(cluster=cluster).in_cluster() as run:

        cmd = f'[ -n "$({filter})" ] && docker stop $({filter}) || true'
        _ = run(cmd=cmd)
예제 #6
0
def execute(args: Namespace) -> None:

    project_data = utils.project.get_project_data(args.project_dir)

    if project_data is None:
        raise RuntimeError(f"No project found in '{project_data.directory}'")

    with open(file=project_data.cluster, mode='r') as f:
        cls: structures.cluster.Cluster = structures.cluster.Cluster.from_yaml(
            data=f)

    if args.session is None:
        args.session = project_data.name

    command = args.command + " " + " ".join(args.args)

    if args.docker:
        cname = utils.docker.get_container_name(cluster=cls,
                                                session=args.session)
        command = utils.docker.wrap_in_docker(cmd=command,
                                              container_name=cname)

    if args.on == "HEAD":
        with ssh.SSHClusterCommandRunner(cluster=cls,
                                         parallel=True).in_head() as run:
            _ = run(cmd=command, print_output=True)

    elif args.on == "WORKERS":
        with ssh.SSHClusterCommandRunner(cluster=cls,
                                         parallel=True).in_workers() as run:
            _ = run(cmd=command, print_output=True)

    elif args.on == "CLUSTER":
        with ssh.SSHClusterCommandRunner(cluster=cls,
                                         parallel=True).in_cluster() as run:
            _ = run(cmd=command, print_output=True)

    else:
        raise RuntimeError(f"'{args.on}' not recognized")