예제 #1
0
    def testAutoscaling(self):
        """Test the following behaviors:

        1. Spinning up a Ray cluster
        2. Scaling up a Ray worker via autoscaler.sdk.request_resources()
        3. Scaling up by updating the CRD's minReplicas
        4. Scaling down by removing the resource request and reducing maxReplicas

        Items 1. and 2. protect the example in the documentation.
        Items 3. and 4. protect the autoscaler's ability to respond to Ray CR update.

        Resources requested by this test are safely within the bounds of an m5.xlarge
        instance.

        The resource REQUESTS are:
        - One Ray head pod
            - Autoscaler: .25 CPU, .5 Gi memory
            - Ray node: .5 CPU, .5 Gi memeory
        - Three Worker pods
            - Ray node: .5 CPU, .5 Gi memory
        Total: 2.25 CPU, 2.5 Gi memory.

        Including operator and system pods, the total CPU requested is around 3.

        The cpu LIMIT of each Ray container is 1.
        The `num-cpus` arg to Ray start is 1 for each Ray container; thus Ray accounts
        1 CPU for each Ray node in the test.
        """
        # Cluster creation
        logger.info("Creating a RayCluster with no worker pods.")
        self._apply_ray_cr(min_replicas=0, replicas=0)

        logger.info("Confirming presence of head.")
        wait_for_pods(goal_num_pods=1, namespace="default")

        logger.info("Waiting for head pod to start Running.")
        wait_for_pod_to_start(
            pod_name_filter="raycluster-complete-head", namespace="default"
        )
        logger.info("Confirming Ray is up on the head pod.")
        wait_for_ray_health(
            pod_name_filter="raycluster-complete-head", namespace="default"
        )

        head_pod = get_pod(
            pod_name_filter="raycluster-complete-head", namespace="default"
        )
        assert head_pod, "Could not find the Ray head pod."
        # Scale-up
        logger.info("Scaling up to one worker via Ray resource request.")
        # The request for 2 cpus should give us a 1-cpu head (already present) and a
        # 1-cpu worker (will await scale-up).
        kubectl_exec_python_script(
            script_name="scale_up.py",
            pod=head_pod,
            container="ray-head",
            namespace="default",
        )
        # TODO (Dmitri) Use Ray Client and/or Ray Job submission API to submit
        # instead of `kubectl exec`.
        logger.info("Confirming number of workers.")
        wait_for_pods(goal_num_pods=2, namespace="default")

        logger.info("Scaling up to two workers by editing minReplicas.")
        # replicas=1 reflects the current number of workers
        # (which is what we expect to be already present in the Ray CR)
        self._apply_ray_cr(
            min_replicas=2,
            replicas=1,
            # Validate that replicas set on the Ray CR by the autoscaler
            # is indeed 1:
            validate_replicas=True,
        )
        logger.info("Confirming number of workers.")
        wait_for_pods(goal_num_pods=3, namespace="default")

        # GPU upscaling.
        # 1. Check we haven't spuriously already started a fake GPU node.
        assert not any(
            "gpu" in pod_name for pod_name in get_pod_names(namespace="default")
        )
        # 2. Trigger GPU upscaling by requesting placement of a GPU actor.
        logger.info("Scheduling an Actor with GPU demands.")
        # Use Ray client to validate that it works against KubeRay.
        with ray_client_port_forward(
            head_service="raycluster-complete-head-svc", ray_namespace="gpu-test"
        ):
            gpu_actor_placement.main()
        # 3. Confirm new pod number and presence of fake GPU worker.
        logger.info("Confirming fake GPU worker up-scaling.")
        wait_for_pods(goal_num_pods=4, namespace="default")
        gpu_workers = [
            pod_name
            for pod_name in get_pod_names(namespace="default")
            if "gpu" in pod_name
        ]
        assert len(gpu_workers) == 1
        # 4. Confirm that the GPU actor is up and that Ray believes
        # the node the actor is on has a GPU.
        logger.info("Confirming GPU actor placement.")
        with ray_client_port_forward(
            head_service="raycluster-complete-head-svc", ray_namespace="gpu-test"
        ):
            out = gpu_actor_validation.main()
        # Confirms the actor was placed on a GPU-annotated node.
        # (See gpu_actor_validation.py for details.)
        assert "on-a-gpu-node" in out

        # Scale-down
        logger.info("Removing resource demands.")
        kubectl_exec_python_script(
            script_name="scale_down.py",
            pod=head_pod,
            container="ray-head",
            namespace="default",
        )
        logger.info("Scaling down all workers by editing maxReplicas.")
        # TODO (Dmitri) Expose worker idleTimeout in KubeRay CRD, set it low,
        # and validate autoscaler-initiated idle timeout, instead of modifying the CR.
        # (replicas=2 reflects the current number of workers)
        self._apply_ray_cr(
            min_replicas=0,
            max_replicas=0,
            replicas=2,
            # Check that the replicas set on the Ray CR by the
            # autoscaler is indeed 2:
            validate_replicas=True,
        )
        logger.info("Confirming workers are gone.")
        wait_for_pods(goal_num_pods=1, namespace="default")

        # Check custom resource upscaling.
        # First, restore max replicas to allow worker upscaling.
        self._apply_ray_cr(
            min_replicas=0,
            max_replicas=10,
            replicas=0,
            # Check that the replicas set on the Ray CR by the
            # autoscaler is indeed 2:
            validate_replicas=True,
        )

        # Submit two {"Custom2": 3} bundles to upscale two workers with 5
        # Custom2 capacity each.
        logger.info("Scaling up workers with request for custom resources.")
        kubectl_exec_python_script(
            script_name="scale_up_custom.py",
            pod=head_pod,
            container="ray-head",
            namespace="default",
        )
        logger.info("Confirming two workers have scaled up.")
        wait_for_pods(goal_num_pods=3, namespace="default")

        # Cluster deletion
        logger.info("Deleting Ray cluster.")
        subprocess.check_call(
            ["kubectl", "delete", "raycluster", "raycluster-complete"]
        )
        logger.info("Confirming Ray pods are gone.")
        wait_for_pods(goal_num_pods=0, namespace="default")
예제 #2
0
    def testAutoscaling(self):
        """Test the following behaviors:

        1. Spinning up a Ray cluster
        2. Scaling up Ray workers via autoscaler.sdk.request_resources()
        3. Scaling up by updating the CRD's minReplicas
        4. Scaling down by removing the resource request and reducing maxReplicas
        5. Autoscaler recognizes GPU annotations and Ray custom resources.
        6. Autoscaler and operator ignore pods marked for deletion.

        Items 1. and 2. protect the example in the documentation.
        Items 3. and 4. protect the autoscaler's ability to respond to Ray CR update.

        Tests the following modes of interaction with a Ray cluster on K8s:
        1. kubectl exec
        2. Ray Client
        3. Ray Job Submission

        Resources requested by this test are safely within the bounds of an m5.xlarge
        instance.

        The resource REQUESTS are:
        - One Ray head pod
            - Autoscaler: .25 CPU, .5 Gi memory
            - Ray node: .5 CPU, .5 Gi memeory
        - Three Worker pods
            - Ray node: .5 CPU, .5 Gi memory
        Total: 2.25 CPU, 2.5 Gi memory.

        Including operator and system pods, the total CPU requested is around 3.

        The cpu LIMIT of each Ray container is 1.
        The `num-cpus` arg to Ray start is 1 for each Ray container; thus Ray accounts
        1 CPU for each Ray node in the test.
        """
        # Cluster creation
        logger.info("Creating a RayCluster with no worker pods.")
        self._apply_ray_cr(min_replicas=0, replicas=0, max_replicas=3)

        logger.info("Confirming presence of head.")
        wait_for_pods(goal_num_pods=1, namespace=RAY_CLUSTER_NAMESPACE)

        logger.info("Waiting for head pod to start Running.")
        wait_for_pod_to_start(pod_name_filter=HEAD_POD_PREFIX,
                              namespace=RAY_CLUSTER_NAMESPACE)
        logger.info("Confirming Ray is up on the head pod.")
        wait_for_ray_health(pod_name_filter=HEAD_POD_PREFIX,
                            namespace=RAY_CLUSTER_NAMESPACE)

        head_pod = get_pod(pod_name_filter=HEAD_POD_PREFIX,
                           namespace=RAY_CLUSTER_NAMESPACE)
        assert head_pod, "Could not find the Ray head pod."
        # Scale-up
        logger.info("Scaling up to one worker via Ray resource request.")
        # The request for 2 cpus should give us a 1-cpu head (already present) and a
        # 1-cpu worker (will await scale-up).
        kubectl_exec_python_script(  # Interaction mode #1: `kubectl exec`
            script_name="scale_up.py",
            pod=head_pod,
            container="ray-head",
            namespace="default",
        )
        logger.info("Confirming number of workers.")
        wait_for_pods(goal_num_pods=2, namespace=RAY_CLUSTER_NAMESPACE)

        # Pods marked for deletion are ignored.
        logger.info(
            "Confirming that operator and autoscaler ignore pods marked for"
            "termination.")
        worker_pod = get_pod(pod_name_filter=CPU_WORKER_PREFIX,
                             namespace=RAY_CLUSTER_NAMESPACE)
        logger.info("Patching finalizer onto worker pod to block termination.")
        add_finalizer = {"metadata": {"finalizers": ["ray.io/test"]}}
        kubectl_patch(
            kind="pod",
            name=worker_pod,
            namespace=RAY_CLUSTER_NAMESPACE,
            patch=add_finalizer,
        )
        logger.info("Marking worker for deletion.")
        kubectl_delete(kind="pod",
                       name=worker_pod,
                       namespace=RAY_CLUSTER_NAMESPACE,
                       wait=False)
        # Deletion of the worker hangs forever because of the finalizer.
        # We expect another pod to come up to replace it.
        logger.info(
            "Confirming another worker is up to replace the one marked for deletion."
        )
        wait_for_pods(goal_num_pods=3, namespace=RAY_CLUSTER_NAMESPACE)
        logger.info("Confirming NodeProvider ignores terminating nodes.")
        # 3 pods, 2 of which are not marked for deletion.
        assert self._non_terminated_nodes_count() == 2
        remove_finalizer = {"metadata": {"finalizers": []}}
        logger.info("Removing finalizer to allow deletion.")
        kubectl_patch(
            kind="pod",
            name=worker_pod,
            namespace="default",
            patch=remove_finalizer,
            patch_type="merge",
        )
        logger.info("Confirming worker deletion.")
        wait_for_pods(goal_num_pods=2, namespace=RAY_CLUSTER_NAMESPACE)

        # Ray CR updates.
        logger.info("Scaling up to two workers by editing minReplicas.")
        # replicas=1 reflects the current number of workers
        # (which is what we expect to be already present in the Ray CR)
        self._apply_ray_cr(
            min_replicas=2,
            replicas=1,
            # Validate that replicas set on the Ray CR by the autoscaler
            # is indeed 1:
            validate_replicas=True,
        )
        logger.info("Confirming number of workers.")
        wait_for_pods(goal_num_pods=3, namespace=RAY_CLUSTER_NAMESPACE)

        # GPU upscaling.
        # 1. Check we haven't spuriously already started a fake GPU node.
        assert not any(
            "gpu" in pod_name
            for pod_name in get_pod_names(namespace=RAY_CLUSTER_NAMESPACE))
        # 2. Trigger GPU upscaling by requesting placement of a GPU actor.
        logger.info("Scheduling an Actor with GPU demands.")
        # Use Ray Client to validate that it works against KubeRay.
        with ray_client_port_forward(  # Interaction mode #2: Ray Client
                head_service=HEAD_SERVICE,
                ray_namespace="gpu-test"):
            gpu_actor_placement.main()
        # 3. Confirm new pod number and presence of fake GPU worker.
        logger.info("Confirming fake GPU worker up-scaling.")
        wait_for_pods(goal_num_pods=4, namespace=RAY_CLUSTER_NAMESPACE)
        gpu_workers = [
            pod_name
            for pod_name in get_pod_names(namespace=RAY_CLUSTER_NAMESPACE)
            if "gpu" in pod_name
        ]
        assert len(gpu_workers) == 1
        # 4. Confirm that the GPU actor is up and that Ray believes
        # the node the actor is on has a GPU.
        logger.info("Confirming GPU actor placement.")
        with ray_client_port_forward(head_service=HEAD_SERVICE,
                                     ray_namespace="gpu-test"):
            out = gpu_actor_validation.main()
        # Confirms the actor was placed on a GPU-annotated node.
        # (See gpu_actor_validation.py for details.)
        assert "on-a-gpu-node" in out

        # Scale-down
        logger.info("Removing resource demands.")
        kubectl_exec_python_script(
            script_name="scale_down.py",
            pod=head_pod,
            container="ray-head",
            namespace="default",
        )
        logger.info("Scaling down all workers by editing maxReplicas.")
        # TODO (Dmitri) Expose worker idleTimeout in KubeRay CRD, set it low,
        # and validate autoscaler-initiated idle timeout, instead of modifying the CR.
        # (replicas=2 reflects the current number of workers)
        self._apply_ray_cr(
            min_replicas=0,
            max_replicas=0,
            replicas=2,
            # Check that the replicas set on the Ray CR by the
            # autoscaler is indeed 2:
            validate_replicas=True,
        )
        logger.info("Confirming workers are gone.")
        wait_for_pods(goal_num_pods=1, namespace=RAY_CLUSTER_NAMESPACE)

        # Check custom resource upscaling.
        # First, restore max replicas to allow worker upscaling.
        self._apply_ray_cr(
            min_replicas=0,
            max_replicas=10,
            replicas=0,
            # Check that the replicas set on the Ray CR by the
            # autoscaler is indeed 2:
            validate_replicas=True,
        )

        # Submit two {"Custom2": 3} bundles to upscale two workers with 5
        # Custom2 capacity each.
        logger.info("Scaling up workers with request for custom resources.")
        job_logs = ray_job_submit(  # Interaction mode #3: Ray Job Submission
            script_name="scale_up_custom.py",
            head_service=HEAD_SERVICE,
        )
        assert job_logs == "Submitted custom scale request!\n"

        logger.info("Confirming two workers have scaled up.")
        wait_for_pods(goal_num_pods=3, namespace=RAY_CLUSTER_NAMESPACE)

        # Cluster deletion
        logger.info("Deleting Ray cluster.")
        kubectl_delete(kind="raycluster",
                       name=RAY_CLUSTER_NAME,
                       namespace=RAY_CLUSTER_NAMESPACE)
        logger.info("Confirming Ray pods are gone.")
        wait_for_pods(goal_num_pods=0, namespace=RAY_CLUSTER_NAMESPACE)