Example #1
0
    def _apply_ray_cr(
        self,
        min_replicas=0,
        max_replicas=300,
        replicas=0,
        validate_replicas: bool = False,
    ) -> None:
        """Apply Ray CR config yaml, with configurable replica fields for the cpu
        workerGroup.

        If the CR does not yet exist, `replicas` can be set as desired.
        If the CR does already exist, the recommended usage is this:
            (1) Set `replicas` to what we currently expect it to be.
            (2) Set `validate_replicas` to True. We will then check that the replicas
            set on the CR coincides with `replicas`.
        """
        with tempfile.NamedTemporaryFile("w") as config_file:
            if validate_replicas:
                raycluster = get_raycluster("raycluster-complete", namespace="default")
                assert raycluster["spec"]["workerGroupSpecs"][0]["replicas"] == replicas
                logger.info(
                    f"Validated that worker replicas for raycluster-complete"
                    f" is currently {replicas}."
                )
            cr_config = self._get_ray_cr_config(
                min_replicas=min_replicas, max_replicas=max_replicas, replicas=replicas
            )
            yaml.dump(cr_config, config_file)
            config_file.flush()
            subprocess.check_call(["kubectl", "apply", "-f", config_file.name])
Example #2
0
    def _apply_ray_cr(
        self,
        min_replicas=0,
        cpu_replicas=0,
        gpu_replicas=0,
        validate_replicas: bool = False,
    ) -> None:
        """Apply Ray CR config yaml, with configurable replica fields for the cpu
        workerGroup.

        If the CR does not yet exist, `replicas` can be set as desired.
        If the CR does already exist, the recommended usage is this:
            (1) Set `cpu_replicas` and `gpu_replicas` to what we currently expect them
                to be.
            (2) Set `validate_replicas` to True. We will then check that the replicas
            set on the CR coincides with `replicas`.
        """
        with tempfile.NamedTemporaryFile("w") as config_file:
            if validate_replicas:
                raycluster = get_raycluster(RAY_CLUSTER_NAME,
                                            namespace=RAY_CLUSTER_NAMESPACE)
                assert (raycluster["spec"]["workerGroupSpecs"][0]["replicas"]
                        == cpu_replicas)
                assert (raycluster["spec"]["workerGroupSpecs"][1]["replicas"]
                        == gpu_replicas)
                logger.info(
                    f"Validated that cpu and gpu worker replicas for "
                    f"{RAY_CLUSTER_NAME} are currently {cpu_replicas} and"
                    f" {gpu_replicas}, respectively.")
            cr_config = self._get_ray_cr_config(
                min_replicas=min_replicas,
                cpu_replicas=cpu_replicas,
                gpu_replicas=gpu_replicas,
            )
            yaml.dump(cr_config, config_file)
            config_file.flush()
            subprocess.check_call(["kubectl", "apply", "-f", config_file.name])