def worker_setup(cluster: structures.cluster.Cluster) -> None: with ssh.SSHClusterCommandRunner(cluster=cluster, parallel=True).in_workers() as run: for init_cmd in cluster.worker_setup_commands: _ = run(cmd=init_cmd, print_output=True)
def check_clean_docker_cluster(cluster: structures.cluster.Cluster) -> None: # Name of the docker containers cname = cluster.docker.container_name if cluster.docker.container_name != "" \ else cluster.cluster_name with ssh.SSHClusterCommandRunner(cluster=cluster, parallel=True).in_cluster() as run: ps = f"docker ps -f 'name={cname}' --format '{{.Names}}'" results = run(cmd=ps) hosts_with_existing_cname = [] for _, result in results.items(): result: fabric.Result connection: fabric.Connection = result.connection if result.stdout.strip() != "": hosts_with_existing_cname.append(connection.host) if len(hosts_with_existing_cname) != 0: raise RuntimeError( f"Container '{cname}' found already running in the following hosts: " f"[{', '.join(hosts_with_existing_cname)}]. Clean the cluster before " f"continuing.")
def initialize(cluster: structures.cluster.Cluster) -> None: with ssh.SSHClusterCommandRunner(cluster=cluster, parallel=True).in_cluster() as run: for init_cmd in cluster.initialization_commands: _ = run(cmd=init_cmd, print_output=True)
def head_start_ray(cluster: structures.cluster.Cluster) -> None: with ssh.SSHClusterCommandRunner(cluster=cluster, parallel=True).in_head() as run: for init_cmd in cluster.head_start_ray_commands: _ = run(cmd=init_cmd, print_output=True)
def stop_cluster_with_docker(cluster: structures.cluster.Cluster, session: str) -> None: if session is None: filter = "docker ps -q -f label=clusterize" else: filter = f"docker ps -q -f label=clusterize.session={session}" with ssh.SSHClusterCommandRunner(cluster=cluster).in_cluster() as run: cmd = f'[ -n "$({filter})" ] && docker stop $({filter}) || true' _ = run(cmd=cmd)
def execute(args: Namespace) -> None: project_data = utils.project.get_project_data(args.project_dir) if project_data is None: raise RuntimeError(f"No project found in '{project_data.directory}'") with open(file=project_data.cluster, mode='r') as f: cls: structures.cluster.Cluster = structures.cluster.Cluster.from_yaml( data=f) if args.session is None: args.session = project_data.name command = args.command + " " + " ".join(args.args) if args.docker: cname = utils.docker.get_container_name(cluster=cls, session=args.session) command = utils.docker.wrap_in_docker(cmd=command, container_name=cname) if args.on == "HEAD": with ssh.SSHClusterCommandRunner(cluster=cls, parallel=True).in_head() as run: _ = run(cmd=command, print_output=True) elif args.on == "WORKERS": with ssh.SSHClusterCommandRunner(cluster=cls, parallel=True).in_workers() as run: _ = run(cmd=command, print_output=True) elif args.on == "CLUSTER": with ssh.SSHClusterCommandRunner(cluster=cls, parallel=True).in_cluster() as run: _ = run(cmd=command, print_output=True) else: raise RuntimeError(f"'{args.on}' not recognized")