def test_job_is_finished(self): manager = JobManager(namespace="xyz", signer=Mock(), register=Mock()) job = V1Job(status=V1JobStatus(conditions=[])) assert not manager.job_is_finished(job) job = V1Job(status=V1JobStatus(conditions=[], completion_time=datetime.now())) assert not manager.job_is_finished(job), "Completion time field is unchecked" job = V1Job( status=V1JobStatus( conditions=[V1JobCondition(status="True", type="Complete")] ) ) assert manager.job_is_finished(job), "A complete job should be finished" job = V1Job( status=V1JobStatus( conditions=[V1JobCondition(status="False", type="Complete")] ) ) assert not manager.job_is_finished( job ), "False job status conditions should be ignored" job = V1Job( status=V1JobStatus( conditions=[V1JobCondition(status="True", type="Failed")] ) ) assert manager.job_is_finished(job), "A failed job is finished"
def test_main_failed_pods(mocker, main_mock): job_status_history = [ V1Job(status=V1JobStatus(active=1, succeeded=1)), V1Job(status=V1JobStatus(active=1, succeeded=1)), V1Job(status=V1JobStatus(active=0, succeeded=1, failed=1)) ] mocker.patch.object(main_mock.kubernetes_client_mock, 'read_namespaced_job').side_effect = job_status_history main() assert main_mock.kubernetes_config_load_mock.call_count == 1 assert main_mock.builtins_open_mock.call_count == 2 assert main_mock.time_sleep_mock.call_count == len(job_status_history) - 1
def test_waiting_for_benchmark_container(): status = V1PodStatus( phase="Pending", container_statuses=[ V1ContainerStatus( image="benchmarkai/hello-world", name="benchmark", image_id="", ready=False, restart_count=0, state=V1ContainerState(waiting=CONTAINER_STATE_WAITING), ), V1ContainerStatus( image="benchmarkai/metrics-pusher", name="sidecar", image_id="", ready=True, restart_count=0, state=V1ContainerState(running=V1ContainerStateRunning()), ), ], ) pod = V1Pod(metadata=V1ObjectMeta(name="pod-name"), status=status) inferrer = SingleNodeStrategyKubernetesStatusInferrer( V1JobStatus(active=1), pods=[pod]) assert inferrer.status( ) == BenchmarkJobStatus.PENDING_AT_BENCHMARK_CONTAINER
def test_failed_at_sidecar_container_and_pod_terminated(): status = V1PodStatus( phase="Failed", container_statuses=[ V1ContainerStatus( image="benchmarkai/hello-world", name="benchmark", image_id="", ready=True, restart_count=0, state=V1ContainerState( terminated=CONTAINER_STATE_TERMINATED_AND_SUCCEEDED), ), V1ContainerStatus( image="benchmarkai/metrics-pusher", name="sidecar", image_id="", ready=True, restart_count=0, state=V1ContainerState( terminated=CONTAINER_STATE_TERMINATED_AND_FAILED), ), ], ) pod = V1Pod(metadata=V1ObjectMeta(name="pod-name"), status=status) inferrer = SingleNodeStrategyKubernetesStatusInferrer( V1JobStatus(active=1), pods=[pod]) assert inferrer.status() == BenchmarkJobStatus.FAILED_AT_SIDECAR_CONTAINER
def test_failed_at_benchmark_container_but_sidecar_still_running(): status = V1PodStatus( phase="Running", container_statuses=[ V1ContainerStatus( image="benchmarkai/hello-world", name="benchmark", image_id="", ready=False, restart_count=0, state=V1ContainerState( terminated=CONTAINER_STATE_TERMINATED_AND_FAILED), ), V1ContainerStatus( image="benchmarkai/metrics-pusher", name="sidecar", image_id="", ready=True, restart_count=0, state=V1ContainerState(running=V1ContainerStateRunning()), ), ], ) pod = V1Pod(metadata=V1ObjectMeta(name="pod-name"), status=status) inferrer = SingleNodeStrategyKubernetesStatusInferrer( V1JobStatus(active=1), pods=[pod]) # TODO: Not sure if this is the status we want assert inferrer.status() == BenchmarkJobStatus.RUNNING_AT_MAIN_CONTAINERS
def test_pod_scaling_nodes(): condition = V1PodCondition(type="PodScheduled", reason="Unschedulable", status="False") status = V1PodStatus(phase="Pending", conditions=[condition]) inferrer = SingleNodeStrategyKubernetesStatusInferrer( V1JobStatus(active=1), pods=[V1Pod(status=status)]) assert inferrer.status() == BenchmarkJobStatus.PENDING_NODE_SCALING
def test_job_status(): for s in JobStatus: terminal = s.is_terminal() if s.name in ['FAILED', 'SUCCEEDED', 'UNAVAILABLE']: assert terminal else: assert not terminal # completed jobs status = V1JobStatus(completion_time=datetime.now(), succeeded=1) job_info = V1Job(status=status) job_status = JobStatus.from_job_info(job_info) assert job_status == JobStatus.SUCCEEDED status = V1JobStatus(completion_time=datetime.now(), succeeded=0) job_info = V1Job(status=status) job_status = JobStatus.from_job_info(job_info) assert job_status == JobStatus.FAILED # active jobs status = V1JobStatus(completion_time=None, active=1) job_info = V1Job(status=status) job_status = JobStatus.from_job_info(job_info) assert job_status == JobStatus.RUNNING # pending jobs status = V1JobStatus(completion_time=None, active=0) job_info = V1Job(status=status) job_status = JobStatus.from_job_info(job_info) assert job_status == JobStatus.PENDING # unknown state status = V1JobStatus() job_info = V1Job(status=status) job_status = JobStatus.from_job_info(job_info) assert job_status == JobStatus.STATE_UNSPECIFIED job_info = V1Job() job_status = JobStatus.from_job_info(job_info) assert job_status == JobStatus.STATE_UNSPECIFIED job_status = JobStatus.from_job_info(None) assert job_status == JobStatus.STATE_UNSPECIFIED
def test_init_containers_running(): container_state = V1ContainerState(running=V1ContainerStateRunning()) status = V1PodStatus( phase="Pending", init_container_statuses=[ V1ContainerStatus( image="benchmarkai/data-puller", name="data-puller", image_id="", ready=True, restart_count=0, state=container_state, ) ], ) pod = V1Pod(metadata=V1ObjectMeta(name="pod-name"), status=status) inferrer = SingleNodeStrategyKubernetesStatusInferrer( V1JobStatus(active=1), pods=[pod]) assert inferrer.status() == BenchmarkJobStatus.RUNNING_AT_INIT_CONTAINERS
def test_delete_complete_jobs_raises_server_error(api: MagicMock, batch_api: MagicMock): batch_api.list_namespaced_job.return_value = V1JobList(items=[ # delete because complete V1Job( metadata=V1ObjectMeta( name="flush-pv-1", uid="uid-flush-pv-1", resource_version="1"), status=V1JobStatus( conditions=[V1JobCondition(status="", type="Complete")]), ), ]) def delete_job(name, namespace, body): raise ApiException(reason="Server Error") batch_api.delete_namespaced_job.side_effect = delete_job with pytest.raises(ApiException): delete_complete_jobs(api, batch_api, "namespace") batch_api.list_namespaced_job.called_once_with("namespace")
def main_mock(mocker): mocker.patch('os.path.isfile').return_value = False kubernetes_config_load = mocker.patch('kubernetes.config.load_incluster_config') mocker.patch('main.getenv').return_value = "fake_job_name" file_mock = mocker.MagicMock(read=lambda: 'fake-namespace') open_mock = mocker.MagicMock(__enter__=lambda x: file_mock) builtins_open = mocker.patch('builtins.open') builtins_open.return_value = open_mock main_sleep = mocker.patch('main.sleep') fake_k8s_client = mocker.MagicMock() mocker.patch('kubernetes.client.BatchV1Api').return_value = fake_k8s_client mocker.patch.object(fake_k8s_client, 'read_namespaced_job').return_value = V1Job( status=V1JobStatus(active=0, succeeded=1)) class MainMock: kubernetes_config_load_mock = kubernetes_config_load builtins_open_mock = builtins_open kubernetes_client_mock = fake_k8s_client time_sleep_mock = main_sleep return MainMock
def test_info(self, status_p, create_p, get_p, punits_p): status_p.return_value = V1Job(status=V1JobStatus( conditions=[V1JobCondition(type='Complete', status='ready')])) create_p.return_value = { 'cubegen_id': 'id', 'status': V1JobStatus().to_dict() } get_p.return_value = { 'cubegen_id': 'id', 'status': 'ready', 'output': [_OUTPUT], 'progress': 100 } punits_p.return_value = dict(punits=dict(total_count=1000), count=500) res = cubegens.info(user_id='drwho', email='*****@*****.**', body=_CFG, token='fdsvdf') expected = { 'dataset_descriptor': { 'data_id': 'test_cube.zarr', 'type_specifier': 'dataset', 'crs': 'WGS84', 'bbox': [-2.24, 51.99, -2.15, 52.05], 'time_range': ['2020-12-01', '2021-02-28'], 'time_period': '1D', 'dims': { 'time': 90, 'lat': 674, 'lon': 1024 }, 'spatial_res': 8.9e-05, 'data_vars': { 'B02': { 'name': 'B02', 'dtype': 'float32', 'dims': ['time', 'lat', 'lon'] }, 'CLM': { 'name': 'CLM', 'dtype': 'float32', 'dims': ['time', 'lat', 'lon'] } } }, 'size_estimation': { 'image_size': [1024, 674], 'tile_size': [512, 512], 'num_variables': 5, 'num_tiles': [2, 1], 'num_requests': 900, 'num_bytes': 1242316800 }, 'cost_estimation': { 'required': 540, 'available': 500, 'limit': 1000 } } self.assertDictEqual(expected, res) punits_p.return_value = dict(punits=dict(total_count=1000)) with self.assertRaises(api.ApiError) as e: cubegens.info(user_id='drwho', email='*****@*****.**', body=_CFG, token='fdsvdf') self.assertEqual( "Error. Cannot handle punit data. Entry 'count' is missing.", str(e.exception)) punits_p.return_value = dict(punits=dict(total_count=1000), count=500) cfg = _CFG.copy() del cfg['input_config'] with self.assertRaises(api.ApiError) as e: cubegens.info(user_id='drwho', email='*****@*****.**', body=cfg, token='fdsvdf') self.assertEqual("Error. Invalid input configuration.", str(e.exception)) cfg = _CFG.copy() # noinspection PyTypeChecker cfg['input_configs'] = [_CFG['input_config']] res = cubegens.info(user_id='drwho', email='*****@*****.**', body=cfg, token='fdsvdf') self.assertEqual(expected, res) output = _OUTPUT.replace("Awaiting", "Awaitingxxx") get_p.return_value = { 'cubegen_id': 'id', 'status': 'ready', 'output': [output], 'progress': 100 } with self.assertRaises(api.ApiError) as e: cubegens.info(user_id='drwho', email='*****@*****.**', body=_CFG, token='fdsvdf') self.assertEqual("Expecting value: line 2 column 1 (char 1)", str(e.exception))
job_failed, job_complete, get_alphanumeric_unique_tag, ) from fv3kube.utils import _handle_jobs import kubernetes from kubernetes.client import V1Job, V1JobStatus, V1ObjectMeta failed_condition = kubernetes.client.V1JobCondition(type="Failed", status="True") succesful_condition = kubernetes.client.V1JobCondition(type="Complete", status="True") failed_status = V1JobStatus(active=None, conditions=[failed_condition]) complete_status = V1JobStatus(active=None, conditions=[succesful_condition]) inprogress_status = V1JobStatus(active=1, completion_time=None, conditions=None, failed=None, succeeded=None) def test_get_base_fv3config(): config = get_base_fv3config("v0.3") assert isinstance(config, Mapping)
def test_pod_in_running_phase(): pod = V1Pod(status=V1PodStatus(phase="Running")) inferrer = SingleNodeStrategyKubernetesStatusInferrer( V1JobStatus(active=1), pods=[pod]) assert inferrer.status() == BenchmarkJobStatus.RUNNING_AT_MAIN_CONTAINERS
def test_no_pods_scheduled_for_k8s_job_yet(): k8s_job_status = V1JobStatus(active=1) inferrer = SingleNodeStrategyKubernetesStatusInferrer(k8s_job_status, pods=[]) assert inferrer.status() == BenchmarkJobStatus.NO_POD_SCHEDULED
def test_job_status(failed, succeeded, conditions, expected_status): job_status = V1JobStatus(succeeded=succeeded, failed=failed, conditions=conditions) inferrer = SingleNodeStrategyKubernetesStatusInferrer(job_status, pods=[]) assert inferrer.status() == expected_status
def mock_job() -> V1Job: meta = V1ObjectMeta(namespace="default", name="some-job") spec = V1JobSpec(template=V1PodTemplate()) status = V1JobStatus(conditions=[]) return V1Job(metadata=meta, spec=spec, status=status)
def test_delete_complete_jobs(api: MagicMock, batch_api: MagicMock): batch_api.list_namespaced_job.return_value = V1JobList(items=[ # delete because complete *(V1Job( metadata=V1ObjectMeta( name=f"flush-pv-{i}", uid=f"uid-flush-pv-{i}", resource_version=f"{i}", ), status=V1JobStatus( conditions=[V1JobCondition(status="", type="Complete")]), ) for i in range(3)), # don't delete because already deleted V1Job( metadata=V1ObjectMeta( name="flush-pv-3", deletion_timestamp=datetime.utcnow(), ), status=V1JobStatus( conditions=[V1JobCondition(status="", type="Complete")]), ), # don't delete because not complete V1Job( metadata=V1ObjectMeta(name="flush-pv-4"), status=V1JobStatus( conditions=[V1JobCondition(status="", type="Pending")]), ), # don't delete because status does not contain conditions V1Job(metadata=V1ObjectMeta(name="flush-pv-5"), status=V1JobStatus()), # don't delete because not a flush job V1Job( metadata=V1ObjectMeta(name="cron-1"), status=V1JobStatus( conditions=[V1JobCondition(status="", type="Complete")]), ), ]) def delete_job(name, namespace, body): if name == "flush-pv-1": raise ApiException(reason="Conflict") if name == "flush-pv-2": raise ApiException(reason="Not Found") batch_api.delete_namespaced_job.side_effect = delete_job delete_complete_jobs(api, batch_api, "namespace") batch_api.list_namespaced_job.assert_called_once_with("namespace") assert [(f"pv-{i}", "Delete") for i in range(3)] == [( call.kwargs["name"], call.kwargs["body"].spec.persistent_volume_reclaim_policy, ) for call in api.patch_persistent_volume.call_args_list] assert [(f"flush-pv-{i}", "namespace", [ ("Job", f"flush-pv-{i}") ]) for i in range(3)] == [( call.kwargs["name"], call.kwargs["namespace"], [(ref.kind, ref.name) for ref in call.kwargs["body"].metadata.owner_references], ) for call in api.patch_namespaced_persistent_volume_claim.call_args_list] assert [(f"flush-pv-{i}", "namespace", f"uid-flush-pv-{i}", f"{i}") for i in range(3)] == [( call.kwargs["name"], call.kwargs["namespace"], call.kwargs["body"].preconditions.uid, call.kwargs["body"].preconditions.resource_version, ) for call in batch_api.delete_namespaced_job.call_args_list]