Exemple #1
0
    def test_job_is_finished(self):
        manager = JobManager(namespace="xyz", signer=Mock(), register=Mock())

        job = V1Job(status=V1JobStatus(conditions=[]))
        assert not manager.job_is_finished(job)

        job = V1Job(status=V1JobStatus(conditions=[], completion_time=datetime.now()))
        assert not manager.job_is_finished(job), "Completion time field is unchecked"

        job = V1Job(
            status=V1JobStatus(
                conditions=[V1JobCondition(status="True", type="Complete")]
            )
        )
        assert manager.job_is_finished(job), "A complete job should be finished"

        job = V1Job(
            status=V1JobStatus(
                conditions=[V1JobCondition(status="False", type="Complete")]
            )
        )
        assert not manager.job_is_finished(
            job
        ), "False job status conditions should be ignored"

        job = V1Job(
            status=V1JobStatus(
                conditions=[V1JobCondition(status="True", type="Failed")]
            )
        )
        assert manager.job_is_finished(job), "A failed job is finished"
Exemple #2
0
def test_main_failed_pods(mocker, main_mock):
    job_status_history = [
        V1Job(status=V1JobStatus(active=1, succeeded=1)),
        V1Job(status=V1JobStatus(active=1, succeeded=1)),
        V1Job(status=V1JobStatus(active=0, succeeded=1, failed=1))
    ]

    mocker.patch.object(main_mock.kubernetes_client_mock, 'read_namespaced_job').side_effect = job_status_history

    main()

    assert main_mock.kubernetes_config_load_mock.call_count == 1
    assert main_mock.builtins_open_mock.call_count == 2
    assert main_mock.time_sleep_mock.call_count == len(job_status_history) - 1
Exemple #3
0
def test_waiting_for_benchmark_container():
    status = V1PodStatus(
        phase="Pending",
        container_statuses=[
            V1ContainerStatus(
                image="benchmarkai/hello-world",
                name="benchmark",
                image_id="",
                ready=False,
                restart_count=0,
                state=V1ContainerState(waiting=CONTAINER_STATE_WAITING),
            ),
            V1ContainerStatus(
                image="benchmarkai/metrics-pusher",
                name="sidecar",
                image_id="",
                ready=True,
                restart_count=0,
                state=V1ContainerState(running=V1ContainerStateRunning()),
            ),
        ],
    )

    pod = V1Pod(metadata=V1ObjectMeta(name="pod-name"), status=status)
    inferrer = SingleNodeStrategyKubernetesStatusInferrer(
        V1JobStatus(active=1), pods=[pod])
    assert inferrer.status(
    ) == BenchmarkJobStatus.PENDING_AT_BENCHMARK_CONTAINER
Exemple #4
0
def test_failed_at_sidecar_container_and_pod_terminated():
    status = V1PodStatus(
        phase="Failed",
        container_statuses=[
            V1ContainerStatus(
                image="benchmarkai/hello-world",
                name="benchmark",
                image_id="",
                ready=True,
                restart_count=0,
                state=V1ContainerState(
                    terminated=CONTAINER_STATE_TERMINATED_AND_SUCCEEDED),
            ),
            V1ContainerStatus(
                image="benchmarkai/metrics-pusher",
                name="sidecar",
                image_id="",
                ready=True,
                restart_count=0,
                state=V1ContainerState(
                    terminated=CONTAINER_STATE_TERMINATED_AND_FAILED),
            ),
        ],
    )

    pod = V1Pod(metadata=V1ObjectMeta(name="pod-name"), status=status)
    inferrer = SingleNodeStrategyKubernetesStatusInferrer(
        V1JobStatus(active=1), pods=[pod])
    assert inferrer.status() == BenchmarkJobStatus.FAILED_AT_SIDECAR_CONTAINER
Exemple #5
0
def test_failed_at_benchmark_container_but_sidecar_still_running():
    status = V1PodStatus(
        phase="Running",
        container_statuses=[
            V1ContainerStatus(
                image="benchmarkai/hello-world",
                name="benchmark",
                image_id="",
                ready=False,
                restart_count=0,
                state=V1ContainerState(
                    terminated=CONTAINER_STATE_TERMINATED_AND_FAILED),
            ),
            V1ContainerStatus(
                image="benchmarkai/metrics-pusher",
                name="sidecar",
                image_id="",
                ready=True,
                restart_count=0,
                state=V1ContainerState(running=V1ContainerStateRunning()),
            ),
        ],
    )

    pod = V1Pod(metadata=V1ObjectMeta(name="pod-name"), status=status)
    inferrer = SingleNodeStrategyKubernetesStatusInferrer(
        V1JobStatus(active=1), pods=[pod])
    # TODO: Not sure if this is the status we want
    assert inferrer.status() == BenchmarkJobStatus.RUNNING_AT_MAIN_CONTAINERS
Exemple #6
0
def test_pod_scaling_nodes():
    condition = V1PodCondition(type="PodScheduled",
                               reason="Unschedulable",
                               status="False")
    status = V1PodStatus(phase="Pending", conditions=[condition])
    inferrer = SingleNodeStrategyKubernetesStatusInferrer(
        V1JobStatus(active=1), pods=[V1Pod(status=status)])
    assert inferrer.status() == BenchmarkJobStatus.PENDING_NODE_SCALING
Exemple #7
0
def test_job_status():
    for s in JobStatus:
        terminal = s.is_terminal()
        if s.name in ['FAILED', 'SUCCEEDED', 'UNAVAILABLE']:
            assert terminal
        else:
            assert not terminal

    # completed jobs
    status = V1JobStatus(completion_time=datetime.now(), succeeded=1)
    job_info = V1Job(status=status)
    job_status = JobStatus.from_job_info(job_info)
    assert job_status == JobStatus.SUCCEEDED

    status = V1JobStatus(completion_time=datetime.now(), succeeded=0)
    job_info = V1Job(status=status)
    job_status = JobStatus.from_job_info(job_info)
    assert job_status == JobStatus.FAILED

    # active jobs
    status = V1JobStatus(completion_time=None, active=1)
    job_info = V1Job(status=status)
    job_status = JobStatus.from_job_info(job_info)
    assert job_status == JobStatus.RUNNING

    # pending jobs
    status = V1JobStatus(completion_time=None, active=0)
    job_info = V1Job(status=status)
    job_status = JobStatus.from_job_info(job_info)
    assert job_status == JobStatus.PENDING

    # unknown state
    status = V1JobStatus()
    job_info = V1Job(status=status)
    job_status = JobStatus.from_job_info(job_info)
    assert job_status == JobStatus.STATE_UNSPECIFIED

    job_info = V1Job()
    job_status = JobStatus.from_job_info(job_info)
    assert job_status == JobStatus.STATE_UNSPECIFIED

    job_status = JobStatus.from_job_info(None)
    assert job_status == JobStatus.STATE_UNSPECIFIED
Exemple #8
0
def test_init_containers_running():
    container_state = V1ContainerState(running=V1ContainerStateRunning())
    status = V1PodStatus(
        phase="Pending",
        init_container_statuses=[
            V1ContainerStatus(
                image="benchmarkai/data-puller",
                name="data-puller",
                image_id="",
                ready=True,
                restart_count=0,
                state=container_state,
            )
        ],
    )

    pod = V1Pod(metadata=V1ObjectMeta(name="pod-name"), status=status)
    inferrer = SingleNodeStrategyKubernetesStatusInferrer(
        V1JobStatus(active=1), pods=[pod])
    assert inferrer.status() == BenchmarkJobStatus.RUNNING_AT_INIT_CONTAINERS
def test_delete_complete_jobs_raises_server_error(api: MagicMock,
                                                  batch_api: MagicMock):
    batch_api.list_namespaced_job.return_value = V1JobList(items=[
        # delete because complete
        V1Job(
            metadata=V1ObjectMeta(
                name="flush-pv-1", uid="uid-flush-pv-1", resource_version="1"),
            status=V1JobStatus(
                conditions=[V1JobCondition(status="", type="Complete")]),
        ),
    ])

    def delete_job(name, namespace, body):
        raise ApiException(reason="Server Error")

    batch_api.delete_namespaced_job.side_effect = delete_job

    with pytest.raises(ApiException):
        delete_complete_jobs(api, batch_api, "namespace")

    batch_api.list_namespaced_job.called_once_with("namespace")
Exemple #10
0
def main_mock(mocker):
    mocker.patch('os.path.isfile').return_value = False
    kubernetes_config_load = mocker.patch('kubernetes.config.load_incluster_config')
    mocker.patch('main.getenv').return_value = "fake_job_name"
    file_mock = mocker.MagicMock(read=lambda: 'fake-namespace')
    open_mock = mocker.MagicMock(__enter__=lambda x: file_mock)
    builtins_open = mocker.patch('builtins.open')
    builtins_open.return_value = open_mock
    main_sleep = mocker.patch('main.sleep')

    fake_k8s_client = mocker.MagicMock()
    mocker.patch('kubernetes.client.BatchV1Api').return_value = fake_k8s_client
    mocker.patch.object(fake_k8s_client, 'read_namespaced_job').return_value = V1Job(
        status=V1JobStatus(active=0, succeeded=1))

    class MainMock:
        kubernetes_config_load_mock = kubernetes_config_load
        builtins_open_mock = builtins_open
        kubernetes_client_mock = fake_k8s_client
        time_sleep_mock = main_sleep

    return MainMock
Exemple #11
0
    def test_info(self, status_p, create_p, get_p, punits_p):
        status_p.return_value = V1Job(status=V1JobStatus(
            conditions=[V1JobCondition(type='Complete', status='ready')]))
        create_p.return_value = {
            'cubegen_id': 'id',
            'status': V1JobStatus().to_dict()
        }

        get_p.return_value = {
            'cubegen_id': 'id',
            'status': 'ready',
            'output': [_OUTPUT],
            'progress': 100
        }

        punits_p.return_value = dict(punits=dict(total_count=1000), count=500)

        res = cubegens.info(user_id='drwho',
                            email='*****@*****.**',
                            body=_CFG,
                            token='fdsvdf')

        expected = {
            'dataset_descriptor': {
                'data_id': 'test_cube.zarr',
                'type_specifier': 'dataset',
                'crs': 'WGS84',
                'bbox': [-2.24, 51.99, -2.15, 52.05],
                'time_range': ['2020-12-01', '2021-02-28'],
                'time_period': '1D',
                'dims': {
                    'time': 90,
                    'lat': 674,
                    'lon': 1024
                },
                'spatial_res': 8.9e-05,
                'data_vars': {
                    'B02': {
                        'name': 'B02',
                        'dtype': 'float32',
                        'dims': ['time', 'lat', 'lon']
                    },
                    'CLM': {
                        'name': 'CLM',
                        'dtype': 'float32',
                        'dims': ['time', 'lat', 'lon']
                    }
                }
            },
            'size_estimation': {
                'image_size': [1024, 674],
                'tile_size': [512, 512],
                'num_variables': 5,
                'num_tiles': [2, 1],
                'num_requests': 900,
                'num_bytes': 1242316800
            },
            'cost_estimation': {
                'required': 540,
                'available': 500,
                'limit': 1000
            }
        }

        self.assertDictEqual(expected, res)

        punits_p.return_value = dict(punits=dict(total_count=1000))

        with self.assertRaises(api.ApiError) as e:
            cubegens.info(user_id='drwho',
                          email='*****@*****.**',
                          body=_CFG,
                          token='fdsvdf')

        self.assertEqual(
            "Error. Cannot handle punit data. Entry 'count' is missing.",
            str(e.exception))
        punits_p.return_value = dict(punits=dict(total_count=1000), count=500)

        cfg = _CFG.copy()
        del cfg['input_config']

        with self.assertRaises(api.ApiError) as e:
            cubegens.info(user_id='drwho',
                          email='*****@*****.**',
                          body=cfg,
                          token='fdsvdf')

        self.assertEqual("Error. Invalid input configuration.",
                         str(e.exception))

        cfg = _CFG.copy()
        # noinspection PyTypeChecker
        cfg['input_configs'] = [_CFG['input_config']]

        res = cubegens.info(user_id='drwho',
                            email='*****@*****.**',
                            body=cfg,
                            token='fdsvdf')

        self.assertEqual(expected, res)

        output = _OUTPUT.replace("Awaiting", "Awaitingxxx")

        get_p.return_value = {
            'cubegen_id': 'id',
            'status': 'ready',
            'output': [output],
            'progress': 100
        }

        with self.assertRaises(api.ApiError) as e:
            cubegens.info(user_id='drwho',
                          email='*****@*****.**',
                          body=_CFG,
                          token='fdsvdf')

        self.assertEqual("Expecting value: line 2 column 1 (char 1)",
                         str(e.exception))
Exemple #12
0
    job_failed,
    job_complete,
    get_alphanumeric_unique_tag,
)
from fv3kube.utils import _handle_jobs
import kubernetes

from kubernetes.client import V1Job, V1JobStatus, V1ObjectMeta

failed_condition = kubernetes.client.V1JobCondition(type="Failed",
                                                    status="True")

succesful_condition = kubernetes.client.V1JobCondition(type="Complete",
                                                       status="True")

failed_status = V1JobStatus(active=None, conditions=[failed_condition])

complete_status = V1JobStatus(active=None, conditions=[succesful_condition])

inprogress_status = V1JobStatus(active=1,
                                completion_time=None,
                                conditions=None,
                                failed=None,
                                succeeded=None)


def test_get_base_fv3config():

    config = get_base_fv3config("v0.3")
    assert isinstance(config, Mapping)
Exemple #13
0
def test_pod_in_running_phase():
    pod = V1Pod(status=V1PodStatus(phase="Running"))
    inferrer = SingleNodeStrategyKubernetesStatusInferrer(
        V1JobStatus(active=1), pods=[pod])
    assert inferrer.status() == BenchmarkJobStatus.RUNNING_AT_MAIN_CONTAINERS
Exemple #14
0
def test_no_pods_scheduled_for_k8s_job_yet():
    k8s_job_status = V1JobStatus(active=1)
    inferrer = SingleNodeStrategyKubernetesStatusInferrer(k8s_job_status,
                                                          pods=[])
    assert inferrer.status() == BenchmarkJobStatus.NO_POD_SCHEDULED
Exemple #15
0
def test_job_status(failed, succeeded, conditions, expected_status):
    job_status = V1JobStatus(succeeded=succeeded,
                             failed=failed,
                             conditions=conditions)
    inferrer = SingleNodeStrategyKubernetesStatusInferrer(job_status, pods=[])
    assert inferrer.status() == expected_status
def mock_job() -> V1Job:
    meta = V1ObjectMeta(namespace="default", name="some-job")
    spec = V1JobSpec(template=V1PodTemplate())
    status = V1JobStatus(conditions=[])
    return V1Job(metadata=meta, spec=spec, status=status)
def test_delete_complete_jobs(api: MagicMock, batch_api: MagicMock):
    batch_api.list_namespaced_job.return_value = V1JobList(items=[
        # delete because complete
        *(V1Job(
            metadata=V1ObjectMeta(
                name=f"flush-pv-{i}",
                uid=f"uid-flush-pv-{i}",
                resource_version=f"{i}",
            ),
            status=V1JobStatus(
                conditions=[V1JobCondition(status="", type="Complete")]),
        ) for i in range(3)),
        # don't delete because already deleted
        V1Job(
            metadata=V1ObjectMeta(
                name="flush-pv-3",
                deletion_timestamp=datetime.utcnow(),
            ),
            status=V1JobStatus(
                conditions=[V1JobCondition(status="", type="Complete")]),
        ),
        # don't delete because not complete
        V1Job(
            metadata=V1ObjectMeta(name="flush-pv-4"),
            status=V1JobStatus(
                conditions=[V1JobCondition(status="", type="Pending")]),
        ),
        # don't delete because status does not contain conditions
        V1Job(metadata=V1ObjectMeta(name="flush-pv-5"), status=V1JobStatus()),
        # don't delete because not a flush job
        V1Job(
            metadata=V1ObjectMeta(name="cron-1"),
            status=V1JobStatus(
                conditions=[V1JobCondition(status="", type="Complete")]),
        ),
    ])

    def delete_job(name, namespace, body):
        if name == "flush-pv-1":
            raise ApiException(reason="Conflict")
        if name == "flush-pv-2":
            raise ApiException(reason="Not Found")

    batch_api.delete_namespaced_job.side_effect = delete_job

    delete_complete_jobs(api, batch_api, "namespace")

    batch_api.list_namespaced_job.assert_called_once_with("namespace")
    assert [(f"pv-{i}", "Delete") for i in range(3)] == [(
        call.kwargs["name"],
        call.kwargs["body"].spec.persistent_volume_reclaim_policy,
    ) for call in api.patch_persistent_volume.call_args_list]
    assert [(f"flush-pv-{i}", "namespace", [
        ("Job", f"flush-pv-{i}")
    ]) for i in range(3)] == [(
        call.kwargs["name"],
        call.kwargs["namespace"],
        [(ref.kind, ref.name)
         for ref in call.kwargs["body"].metadata.owner_references],
    ) for call in api.patch_namespaced_persistent_volume_claim.call_args_list]
    assert [(f"flush-pv-{i}", "namespace", f"uid-flush-pv-{i}", f"{i}")
            for i in range(3)] == [(
                call.kwargs["name"],
                call.kwargs["namespace"],
                call.kwargs["body"].preconditions.uid,
                call.kwargs["body"].preconditions.resource_version,
            ) for call in batch_api.delete_namespaced_job.call_args_list]