def test_docker_shm_override(run_option_type): process_runner = MockProcessRunner() provider = MockProvider() provider.create_node({}, {}, 1) cluster_name = "cluster" docker_config = { "container_name": "container", "image": "rayproject/ray:latest", run_option_type: ["--shm-size=80g"] } args = { "log_prefix": "prefix", "node_id": 0, "provider": provider, "auth_config": auth_config, "cluster_name": cluster_name, "process_runner": process_runner, "use_internal_ip": False, "docker_config": docker_config, } cmd_runner = DockerCommandRunner(**args) process_runner.respond_to_call("json .Config.Env", 2 * ["[]"]) cmd_runner.run_init(as_head=True, file_mounts={}, sync_run_yet=True) # Ensure the user-provided SHM size is used. process_runner.assert_has_call("1.2.3.4", pattern="--shm-size=80g") # Ensure that SHM auto detection is bypassed process_runner.assert_not_has_call("1.2.3.4", pattern="/proc/meminfo")
def test_rsync_without_exclude_and_filter(): process_runner = MockProcessRunner() provider = MockProvider() provider.create_node({}, {}, 1) cluster_name = "cluster" args = { "log_prefix": "prefix", "node_id": 0, "provider": provider, "auth_config": auth_config, "cluster_name": cluster_name, "process_runner": process_runner, "use_internal_ip": False, } cmd_runner = SSHCommandRunner(**args) local_mount = "/home/ubuntu/base/mount/" remote_mount = "/root/protected_mount/" process_runner.respond_to_call("docker inspect -f", ["true"]) cmd_runner.run_rsync_up(local_mount, remote_mount, options={ "docker_mount_if_possible": True, }) process_runner.assert_not_has_call("1.2.3.4", pattern="--exclude test") process_runner.assert_not_has_call("1.2.3.4", pattern="--filter dir-merge,- .ignore")
def testCommandPassing(self): t = "custom" config = MULTI_WORKER_CLUSTER.copy() config["available_node_types"]["p2.8xlarge"][ "worker_setup_commands"] = ["new_worker_setup_command"] config["available_node_types"]["p2.xlarge"][ "initialization_commands"] = ["new_worker_initialization_cmd"] config["available_node_types"]["p2.xlarge"]["resources"][t] = 1 # Commenting out this line causes the test case to fail?!?! config["min_workers"] = 0 config["max_workers"] = 10 config_path = self.write_config(config) self.provider = MockProvider() runner = MockProcessRunner() autoscaler = StandardAutoscaler( config_path, LoadMetrics(), max_failures=0, process_runner=runner, update_interval_s=0) assert len(self.provider.non_terminated_nodes({})) == 0 autoscaler.update() self.waitForNodes(0) autoscaler.request_resources([{"CPU": 1}]) autoscaler.update() self.waitForNodes(1) assert self.provider.mock_nodes[0].node_type == "m4.large" autoscaler.request_resources([{"GPU": 8}]) autoscaler.update() self.waitForNodes(2) assert self.provider.mock_nodes[1].node_type == "p2.8xlarge" autoscaler.request_resources([{"GPU": 1}] * 9) autoscaler.update() self.waitForNodes(3) assert self.provider.mock_nodes[2].node_type == "p2.xlarge" autoscaler.update() sleep(0.1) runner.assert_has_call(self.provider.mock_nodes[1].internal_ip, "new_worker_setup_command") runner.assert_not_has_call(self.provider.mock_nodes[1].internal_ip, "setup_cmd") runner.assert_not_has_call(self.provider.mock_nodes[1].internal_ip, "worker_setup_cmd") runner.assert_has_call(self.provider.mock_nodes[2].internal_ip, "new_worker_initialization_cmd") runner.assert_not_has_call(self.provider.mock_nodes[2].internal_ip, "init_cmd")
def test_docker_rsync(): process_runner = MockProcessRunner() provider = MockProvider() provider.create_node({}, {}, 1) cluster_name = "cluster" docker_config = {"container_name": "container"} args = { "log_prefix": "prefix", "node_id": 0, "provider": provider, "auth_config": auth_config, "cluster_name": cluster_name, "process_runner": process_runner, "use_internal_ip": False, "docker_config": docker_config, } cmd_runner = DockerCommandRunner(**args) local_mount = "/home/ubuntu/base/mount/" remote_mount = "/root/protected_mount/" docker_mount_prefix = get_docker_host_mount_location(cluster_name) remote_host_mount = f"{docker_mount_prefix}{remote_mount}" local_file = "/home/ubuntu/base-file" remote_file = "/root/protected-file" remote_host_file = f"{docker_mount_prefix}{remote_file}" process_runner.respond_to_call("docker inspect -f", ["true"]) cmd_runner.run_rsync_up(local_mount, remote_mount, options={"docker_mount_if_possible": True}) # Make sure we do not copy directly to raw destination process_runner.assert_not_has_call( "1.2.3.4", pattern=f"-avz {local_mount} [email protected]:{remote_mount}") process_runner.assert_not_has_call("1.2.3.4", pattern=f"mkdir -p {remote_mount}") # No docker cp for file_mounts process_runner.assert_not_has_call("1.2.3.4", pattern="docker cp") process_runner.assert_has_call( "1.2.3.4", pattern=f"-avz {local_mount} [email protected]:{remote_host_mount}") process_runner.clear_history() ############################## process_runner.respond_to_call("docker inspect -f", ["true"]) cmd_runner.run_rsync_up(local_file, remote_file, options={"docker_mount_if_possible": False}) # Make sure we do not copy directly to raw destination process_runner.assert_not_has_call( "1.2.3.4", pattern=f"-avz {local_file} [email protected]:{remote_file}") process_runner.assert_not_has_call("1.2.3.4", pattern=f"mkdir -p {remote_file}") process_runner.assert_has_call("1.2.3.4", pattern="docker cp") process_runner.assert_has_call( "1.2.3.4", pattern=f"-avz {local_file} [email protected]:{remote_host_file}") process_runner.clear_history() ############################## cmd_runner.run_rsync_down(remote_mount, local_mount, options={"docker_mount_if_possible": True}) process_runner.assert_not_has_call("1.2.3.4", pattern="docker cp") process_runner.assert_not_has_call( "1.2.3.4", pattern=f"-avz [email protected]:{remote_mount} {local_mount}") process_runner.assert_has_call( "1.2.3.4", pattern=f"-avz [email protected]:{remote_host_mount} {local_mount}") process_runner.clear_history() ############################## cmd_runner.run_rsync_down(remote_file, local_file, options={"docker_mount_if_possible": False}) process_runner.assert_has_call("1.2.3.4", pattern="docker cp") process_runner.assert_not_has_call( "1.2.3.4", pattern=f"-avz [email protected]:{remote_file} {local_file}") process_runner.assert_has_call( "1.2.3.4", pattern=f"-avz [email protected]:{remote_host_file} {local_file}")
def testDockerWorkers(self): config = MULTI_WORKER_CLUSTER.copy() config["available_node_types"]["p2.8xlarge"]["docker"] = { "worker_image": "p2.8x_image:latest", "worker_run_options": ["p2.8x-run-options"] } config["available_node_types"]["p2.xlarge"]["docker"] = { "worker_image": "p2x_image:nightly" } config["docker"]["worker_run_options"] = ["standard-run-options"] config["docker"]["image"] = "default-image:nightly" config["docker"]["worker_image"] = "default-image:nightly" # Commenting out this line causes the test case to fail?!?! config["min_workers"] = 0 config["max_workers"] = 10 config_path = self.write_config(config) self.provider = MockProvider() runner = MockProcessRunner() autoscaler = StandardAutoscaler(config_path, LoadMetrics(), max_failures=0, process_runner=runner, update_interval_s=0) assert len(self.provider.non_terminated_nodes({})) == 0 autoscaler.update() self.waitForNodes(0) autoscaler.request_resources([{"CPU": 1}]) autoscaler.update() self.waitForNodes(1) assert self.provider.mock_nodes[0].node_type == "m4.large" autoscaler.request_resources([{"GPU": 8}]) autoscaler.update() self.waitForNodes(2) assert self.provider.mock_nodes[1].node_type == "p2.8xlarge" autoscaler.request_resources([{"GPU": 1}] * 9) autoscaler.update() self.waitForNodes(3) assert self.provider.mock_nodes[2].node_type == "p2.xlarge" autoscaler.update() # Fill up m4, p2.8, p2 and request 2 more CPUs autoscaler.request_resources([{ "CPU": 2 }, { "CPU": 16 }, { "CPU": 32 }, { "CPU": 2 }]) autoscaler.update() self.waitForNodes(4) assert self.provider.mock_nodes[3].node_type == "m4.16xlarge" autoscaler.update() sleep(0.1) runner.assert_has_call(self.provider.mock_nodes[1].internal_ip, "p2.8x-run-options") runner.assert_has_call(self.provider.mock_nodes[1].internal_ip, "p2.8x_image:latest") runner.assert_not_has_call(self.provider.mock_nodes[1].internal_ip, "default-image:nightly") runner.assert_not_has_call(self.provider.mock_nodes[1].internal_ip, "standard-run-options") runner.assert_has_call(self.provider.mock_nodes[2].internal_ip, "p2x_image:nightly") runner.assert_has_call(self.provider.mock_nodes[2].internal_ip, "standard-run-options") runner.assert_not_has_call(self.provider.mock_nodes[2].internal_ip, "p2.8x-run-options") runner.assert_has_call(self.provider.mock_nodes[3].internal_ip, "default-image:nightly") runner.assert_has_call(self.provider.mock_nodes[3].internal_ip, "standard-run-options") runner.assert_not_has_call(self.provider.mock_nodes[3].internal_ip, "p2.8x-run-options") runner.assert_not_has_call(self.provider.mock_nodes[3].internal_ip, "p2x_image:nightly")