Beispiel #1
0
def sanitize_resources(
    resources: Union[k8s_schemas.V1ResourceRequirements, Dict]
) -> Optional[k8s_schemas.V1ResourceRequirements]:
    def validate_resources(r_field: Dict) -> Dict:
        if not r_field:
            return r_field

        for k in r_field:
            r_field[k] = str(r_field[k])

        return r_field

    if not resources:
        return None

    if isinstance(resources, Dict):
        return k8s_schemas.V1ResourceRequirements(
            limits=validate_resources(resources.get("limits", None)),
            requests=validate_resources(resources.get("requests", None)),
        )
    else:
        return k8s_schemas.V1ResourceRequirements(
            limits=validate_resources(resources.limits),
            requests=validate_resources(resources.requests),
        )
Beispiel #2
0
 def test_requests_gpu(self):
     assert (requests_gpu(
         k8s_schemas.V1ResourceRequirements(limits={"cpu": 1})) is False)
     assert (requests_gpu(
         k8s_schemas.V1ResourceRequirements(limits={"amd.com/gpu": 1})) is
             True)
     assert (requests_gpu(
         k8s_schemas.V1ResourceRequirements(requests={"nvidia.com/gpu": 1}))
             is True)
Beispiel #3
0
 def test_requests_tpu(self):
     assert (requests_tpu(
         k8s_schemas.V1ResourceRequirements(limits={"cpu": 1})) is False)
     assert (requests_tpu(
         k8s_schemas.V1ResourceRequirements(
             limits={"cloud-tpus.google.com/v2": 1})) is True)
     assert (requests_tpu(
         k8s_schemas.V1ResourceRequirements(
             requests={"cloud-tpus.google.com/v2:": 32})) is True)
Beispiel #4
0
def get_default_notification_container():
    return V1Container(
        name=MAIN_JOB_CONTAINER,
        image="polyaxon/polyaxon-events-handlers:{}".format("dev"),
        image_pull_policy=PullPolicy.ALWAYS.value,
        command=["polyaxon", "notify"],
        args=[
            "--kind={{kind}}",
            "--owner={{owner}}",
            "--project={{project}}",
            "--run_uuid={{run_uuid}}",
            "--run_name={{run_name}}",
            "--condition={{condition}}",
        ],
        resources=k8s_schemas.V1ResourceRequirements(
            limits={
                "cpu": "0.5",
                "memory": "100Mi"
            },
            requests={
                "cpu": "0.1",
                "memory": "20Mi"
            },
        ),
    )
Beispiel #5
0
def get_default_notification_container():
    return V1Container(
        name=MAIN_JOB_CONTAINER,
        image="polyaxon/polyaxon-events-handlers:{}".format(pkg.VERSION),
        image_pull_policy=PullPolicy.IF_NOT_PRESENT.value,
        command=["polyaxon", "notify"],
        args=[
            "--kind={{kind}}",
            "--owner={{owner}}",
            "--project={{project}}",
            "--run-uuid={{run_uuid}}",
            "{{params.condition.as_arg}}",
            "{{params.run_name.as_arg}}",
        ],
        resources=k8s_schemas.V1ResourceRequirements(
            limits={
                "cpu": "0.5",
                "memory": "100Mi"
            },
            requests={
                "cpu": "0.1",
                "memory": "20Mi"
            },
        ),
    )
Beispiel #6
0
def get_batch_cleaner_container(
    store: V1ConnectionType,
    paths: List[str],
):
    subpaths = [os.path.join(store.store_path, subpath) for subpath in paths]
    subpaths = " ".join(["-sp={}".format(sp) for sp in subpaths])

    clean_args = "polyaxon clean-artifacts {} {}".format(
        store.kind.replace("_", "-"), subpaths)
    return V1Container(
        name=MAIN_JOB_CONTAINER,
        image="polyaxon/polyaxon-init:{}".format(pkg.VERSION),
        image_pull_policy=PullPolicy.IF_NOT_PRESENT.value,
        command=["/bin/bash", "-c"],
        args=[clean_args],
        resources=k8s_schemas.V1ResourceRequirements(
            limits={
                "cpu": "0.5",
                "memory": "160Mi"
            },
            requests={
                "cpu": "0.1",
                "memory": "80Mi"
            },
        ),
    )
    def test_get_main_container_simple_params(self):
        initial_mounts = [
            k8s_schemas.V1VolumeMount(name="test",
                                      mount_path="/mount_test",
                                      read_only=True)
        ]
        resources = k8s_schemas.V1ResourceRequirements(
            requests={
                "cpu": "1",
                "memory": "256Mi"
            },
            limits={
                "cpu": "1",
                "memory": "256Mi"
            },
        )
        container = get_main_container(
            container_id="new-name",
            main_container=k8s_schemas.V1Container(
                name="main",
                image="job_docker_image",
                image_pull_policy="IfNotPresent",
                command=["cmd", "-p", "-c"],
                args=["arg1", "arg2"],
                resources=resources,
            ),
            contexts=None,
            volume_mounts=initial_mounts,
            log_level="info",
            artifacts_store=None,
            init=None,
            connection_by_names=None,
            connections=None,
            secrets=None,
            config_maps=None,
            kv_env_vars=None,
            env=None,
            ports=23,
            run_path=None,
        )

        assert container.name == "new-name"
        assert container.image == "job_docker_image"
        assert container.image_pull_policy == "IfNotPresent"
        assert container.command == ["cmd", "-p", "-c"]
        assert container.args == ["arg1", "arg2"]
        assert container.ports == [
            k8s_schemas.V1ContainerPort(container_port=23)
        ]
        assert container.env == [
            get_env_var(name=POLYAXON_KEYS_LOG_LEVEL, value="info")
        ]
        assert container.env_from == []
        assert container.resources == resources
        assert container.volume_mounts == initial_mounts
Beispiel #8
0
def get_init_resources() -> k8s_schemas.V1ResourceRequirements:
    return k8s_schemas.V1ResourceRequirements(
        limits={
            "cpu": "1",
            "memory": "200Mi"
        },
        requests={
            "cpu": "0.1",
            "memory": "20Mi"
        },
    )
 def test_get_init_resources(self):
     assert get_init_resources() == k8s_schemas.V1ResourceRequirements(
         limits={
             "cpu": "1",
             "memory": "200Mi"
         },
         requests={
             "cpu": "0.1",
             "memory": "20Mi"
         },
     )
Beispiel #10
0
    def test_get_resources_env_vars(self):
        env_vars = get_resources_env_vars(None)
        assert len(env_vars) == 1
        assert env_vars[0].name == "NVIDIA_VISIBLE_DEVICES"
        assert env_vars[0].value == "none"

        resources = k8s_schemas.V1ResourceRequirements(limits={"cpu": 1})
        env_vars = get_resources_env_vars(resources)
        assert len(env_vars) == 1
        assert env_vars[0].name == "NVIDIA_VISIBLE_DEVICES"
        assert env_vars[0].value == "none"

        resources = k8s_schemas.V1ResourceRequirements(limits={"memory": 1})
        env_vars = get_resources_env_vars(resources)
        assert len(env_vars) == 1
        assert env_vars[0].name == "NVIDIA_VISIBLE_DEVICES"
        assert env_vars[0].value == "none"

        resources = k8s_schemas.V1ResourceRequirements(
            requests={"nvidia.com/gpu": 1})
        env_vars = get_resources_env_vars(resources)
        assert len(env_vars) == 0
        assert env_vars == []
Beispiel #11
0
def get_default_tuner_container(command):
    return V1Container(
        name=MAIN_JOB_CONTAINER,
        image="polyaxon/polyaxon-hpsearch:{}".format("dev"),
        image_pull_policy=PullPolicy.ALWAYS.value,
        command=command,
        args=[
            "--parallel={{parallel}}",
            "--configs={{configs}}",
            "--metrics={{metrics}}",
        ],
        resources=k8s_schemas.V1ResourceRequirements(
            requests={"cpu": "0.1", "memory": "180Mi"},
        ),
    )
Beispiel #12
0
def requests_tpu(
        resources: Union[k8s_schemas.V1ResourceRequirements, Dict]) -> bool:
    if not resources:
        return False

    if not isinstance(resources, k8s_schemas.V1ResourceRequirements):
        resources = k8s_schemas.V1ResourceRequirements(**resources)

    if resources.requests:
        for key in resources.requests.keys():
            if "tpu" in key:
                return True

    if resources.limits:
        for key in resources.limits.keys():
            if "tpu" in key:
                return True

    return False
Beispiel #13
0
def get_default_tuner_container(command, bracket_iteration: int = None):
    args = [
        "{{params.matrix.as_arg}}",
        "{{params.search.as_arg}}",
        "{{params.iteration.as_arg}}",
    ]
    if bracket_iteration is not None:
        args.append("{{params.bracket_iteration.as_arg}}")
    return V1Container(
        name=MAIN_JOB_CONTAINER,
        image="polyaxon/polyaxon-hpsearch:{}".format(pkg.VERSION),
        image_pull_policy=PullPolicy.IF_NOT_PRESENT.value,
        command=command,
        args=args,
        resources=k8s_schemas.V1ResourceRequirements(requests={
            "cpu": "0.1",
            "memory": "180Mi"
        }, ),
    )
Beispiel #14
0
def get_default_cleaner_container(store: V1ConnectionType, run_path: str):
    subpath = os.path.join(store.store_path, run_path)

    return V1Container(
        name=MAIN_JOB_CONTAINER,
        image="polyaxon/polyaxon-init:{}".format(pkg.VERSION),
        image_pull_policy=PullPolicy.ALWAYS.value,
        command=["polyaxon", "clean-artifacts",
                 store.kind.replace('_', '-')],
        args=["--subpath={}".format(subpath)],
        resources=k8s_schemas.V1ResourceRequirements(
            limits={
                "cpu": "0.5",
                "memory": "100Mi"
            },
            requests={
                "cpu": "0.1",
                "memory": "20Mi"
            },
        ),
    )
Beispiel #15
0
def get_default_cleaner_container(store: V1ConnectionType, run_uuid: str,
                                  run_kind: str):
    subpath = os.path.join(store.store_path, run_uuid)

    clean_args = "polyaxon clean-artifacts {} --subpath={}".format(
        store.kind.replace("_", "-"), subpath)
    wait_args = "polyaxon wait --uuid={} --kind={}".format(run_uuid, run_kind)
    return V1Container(
        name=MAIN_JOB_CONTAINER,
        image="polyaxon/polyaxon-init:{}".format(pkg.VERSION),
        image_pull_policy=PullPolicy.IF_NOT_PRESENT.value,
        command=["/bin/bash", "-c"],
        args=["{} && {}".format(wait_args, clean_args)],
        resources=k8s_schemas.V1ResourceRequirements(
            limits={
                "cpu": "0.5",
                "memory": "160Mi"
            },
            requests={
                "cpu": "0.1",
                "memory": "80Mi"
            },
        ),
    )