def pod_with_preferred_affinity(): return V1Pod( status=V1PodStatus(phase='Pending', conditions=[ V1PodCondition(status='False', type='PodScheduled', reason='Unschedulable') ]), spec=V1PodSpec( containers=[ V1Container( name='container', resources=V1ResourceRequirements(requests={'cpu': '1.5'})) ], affinity=V1Affinity(node_affinity=V1NodeAffinity( required_during_scheduling_ignored_during_execution= V1NodeSelector(node_selector_terms=[ V1NodeSelectorTerm(match_expressions=[ V1NodeSelectorRequirement( key='clusterman.com/scheduler', operator='Exists') ]) ]), preferred_during_scheduling_ignored_during_execution=[ V1PreferredSchedulingTerm( weight=10, preference=V1NodeSelectorTerm(match_expressions=[ V1NodeSelectorRequirement( key='clusterman.com/pool', operator='In', values=['bar']) ])) ]))))
def test_delete_unschedulable_pods_raises_server_error(api: MagicMock): api.list_namespaced_pod.return_value = V1PodList(items=[ V1Pod( metadata=V1ObjectMeta( name="web-0", namespace="default", uid="uid-web-0", resource_version="1", owner_references=[V1ObjectReference(kind="StatefulSet")], ), status=V1PodStatus( phase="Pending", conditions=[ V1PodCondition( status="Not Ready", type="False", reason="Unschedulable", message='persistentvolumeclaim "queue-web-0" not found', ) ], ), ), ]) def delete_pod(name, namespace, body): raise ApiException(reason="Server Error") api.delete_namespaced_pod.side_effect = delete_pod with pytest.raises(ApiException): delete_unschedulable_pods(api, "namespace") api.list_namespaced_pod.called_once_with("namespace")
def pending_pods(): return [ ( V1Pod( metadata=V1ObjectMeta(name='pod1'), status=V1PodStatus( phase='Pending', conditions=[ V1PodCondition(status='False', type='PodScheduled', reason='Unschedulable') ], ), spec=V1PodSpec(containers=[ V1Container( name='container1', resources=V1ResourceRequirements(requests={'cpu': '1.5', 'memory': '150MB'}) ), V1Container( name='container1', resources=V1ResourceRequirements(requests={'cpu': '1.5', 'memory': '350MB'}) ) ]), ), PodUnschedulableReason.InsufficientResources, ), ( V1Pod( metadata=V1ObjectMeta(name='pod2'), status=V1PodStatus( phase='Pending', conditions=[ V1PodCondition(status='False', type='PodScheduled', reason='Unschedulable') ], ), spec=V1PodSpec(containers=[ V1Container( name='container1', resources=V1ResourceRequirements(requests={'cpu': '1.5'}) ), V1Container( name='container1', resources=V1ResourceRequirements(requests={'cpu': '1.5', 'mem': '300MB'}) ) ]), ), PodUnschedulableReason.Unknown, ) ]
def test_pod_scaling_nodes(): condition = V1PodCondition(type="PodScheduled", reason="Unschedulable", status="False") status = V1PodStatus(phase="Pending", conditions=[condition]) inferrer = SingleNodeStrategyKubernetesStatusInferrer( V1JobStatus(active=1), pods=[V1Pod(status=status)]) assert inferrer.status() == BenchmarkJobStatus.PENDING_NODE_SCALING
def unschedulable_pod(): return V1Pod(metadata=V1ObjectMeta(name='unschedulable_pod', annotations=dict(), owner_references=[]), status=V1PodStatus(phase='Pending', conditions=[ V1PodCondition(status='False', type='PodScheduled', reason='Unschedulable') ]), spec=V1PodSpec(containers=[ V1Container(name='container2', resources=V1ResourceRequirements( requests={'cpu': '1.5'})) ], node_selector={'clusterman.com/pool': 'bar'}))
def test_delete_unschedulable_pods(api: MagicMock): api.list_namespaced_pod.return_value = V1PodList(items=[ V1Pod( metadata=V1ObjectMeta( name=f"web-{i}", namespace="default", uid=f"uid-web-{i}", resource_version=f"{i}", owner_references=[V1ObjectReference(kind="StatefulSet")], ), status=V1PodStatus( phase="Pending", conditions=[ V1PodCondition( status="Not Ready", type="False", reason="Unschedulable", # 0-2 should be deleted, 3 should have the wrong message message="" if i == 3 else f'persistentvolumeclaim "queue-web-{i}" not found', ) ], ), ) for i in range(4) ]) def delete_pod(name, namespace, body): if name == "web-1": raise ApiException(reason="Conflict") if name == "web-2": raise ApiException(reason="Not Found") api.delete_namespaced_pod.side_effect = delete_pod delete_unschedulable_pods(api, "namespace") assert [(f"web-{i}", "namespace", f"uid-web-{i}", f"{i}") for i in range(3)] == [( call.kwargs["name"], call.kwargs["namespace"], call.kwargs["body"].preconditions.uid, call.kwargs["body"].preconditions.resource_version, ) for call in api.delete_namespaced_pod.call_args_list]
def test_delete_detached_pvcs(api: MagicMock): api.list_namespaced_pod.return_value = V1PodList(items=[ # pvc is attached V1Pod(spec=V1PodSpec( containers=[], volumes=[ V1Volume( name="queue", persistent_volume_claim=V1PersistentVolumeClaimVolumeSource( claim_name="queue-web-3", ), ) ], ), ), # pvc not attached because spec is missing V1Pod(), # pvc not attached because volumes are missing V1Pod(spec=V1PodSpec(containers=[], ), ), # pvc not attached because volume is not persistent V1Pod(spec=V1PodSpec(containers=[], volumes=[V1Volume( name="queue")]), ), # pvc not attached because pod is unschedulable due to pvc V1Pod( metadata=V1ObjectMeta( name="web-0", namespace="default", uid="uid-web-0", resource_version="1", owner_references=[V1ObjectReference(kind="StatefulSet")], ), status=V1PodStatus( phase="Pending", conditions=[ V1PodCondition( status="Not Ready", type="False", reason="Unschedulable", message='persistentvolumeclaim "queue-web-0" not found', ) ], ), ), ]) api.list_namespaced_persistent_volume_claim.return_value = V1PersistentVolumeClaimList( items=[ # should delete 0-2, 3 is in attached pvcs *(V1PersistentVolumeClaim( metadata=V1ObjectMeta( name=f"queue-web-{i}", uid=f"uid-queue-web-{i}", resource_version=f"{i}", ), spec=V1PersistentVolumeClaimSpec(volume_name=f"pv-{i}"), ) for i in range(4)), # name does not start with claim prefix V1PersistentVolumeClaim(metadata=V1ObjectMeta( name="other-web-0"), ), ]) def delete_pvc(name, namespace, body): if name == "queue-web-1": raise ApiException(reason="Conflict") if name == "queue-web-2": raise ApiException(reason="Not Found") api.delete_namespaced_persistent_volume_claim.side_effect = delete_pvc pvc_cleanup_delay = timedelta(microseconds=1) delay_complete = datetime.utcnow() - pvc_cleanup_delay cache = { # wrong pv name, should be overwritten "queue-web-0": PvcCacheEntry(pv="wrong", time=delay_complete), # no longer detached, should be removed "queue-web-3": PvcCacheEntry(pv="pv-3", time=delay_complete), } delete_detached_pvcs(api, "namespace", "queue-", pvc_cleanup_delay, cache) api.list_namespaced_pod.assert_called_once_with("namespace") api.list_namespaced_persistent_volume_claim.assert_called_once_with( "namespace") api.delete_namespaced_persistent_volume_claim.assert_not_called() assert {f"queue-web-{i}": f"pv-{i}" for i in range(3)} == {k: v.pv for k, v in cache.items()} api.list_namespaced_pod.reset_mock() api.list_namespaced_persistent_volume_claim.reset_mock() previous_cache = {**cache} delete_detached_pvcs(api, "namespace", "queue-", pvc_cleanup_delay, cache) api.list_namespaced_pod.assert_called_once_with("namespace") api.list_namespaced_persistent_volume_claim.assert_called_once_with( "namespace") assert previous_cache == cache assert [ (f"queue-web-{i}", "namespace", f"uid-queue-web-{i}", f"{i}") for i in range(3) ] == [( call.kwargs["name"], call.kwargs["namespace"], call.kwargs["body"].preconditions.uid, call.kwargs["body"].preconditions.resource_version, ) for call in api.delete_namespaced_persistent_volume_claim.call_args_list]
def create_k8s_autoscaler(context, prevent_scale_down_after_capacity_loss=False): behave.use_fixture(autoscaler_patches, context) context.mock_cluster_connector.__class__ = KubernetesClusterConnector context.mock_cluster_connector.get_cluster_allocated_resources.return_value = ClustermanResources( cpus=context.allocated_cpus, ) context.mock_cluster_connector._pending_pods = [] if float(context.pending_cpus) > 0: context.mock_cluster_connector.get_unschedulable_pods = \ lambda: KubernetesClusterConnector.get_unschedulable_pods(context.mock_cluster_connector) context.mock_cluster_connector._get_pod_unschedulable_reason.side_effect = lambda pod: ( PodUnschedulableReason.InsufficientResources if pod.metadata.name == 'pod1' else PodUnschedulableReason.Unknown) context.mock_cluster_connector._pending_pods = [ V1Pod( metadata=V1ObjectMeta(name='pod1'), status=V1PodStatus( phase='Pending', conditions=[ V1PodCondition(status='False', type='PodScheduled', reason='Unschedulable') ], ), spec=V1PodSpec(containers=[ V1Container(name='container1', resources=V1ResourceRequirements( requests={'cpu': context.pending_cpus})), ]), ), V1Pod( metadata=V1ObjectMeta(name='pod2'), status=V1PodStatus( phase='Pending', conditions=[ V1PodCondition(status='False', type='PodScheduled', reason='Unschedulable') ], ), spec=V1PodSpec(containers=[ V1Container(name='container1', resources=V1ResourceRequirements( requests={'cpu': context.pending_cpus})), ]), ), ] context.autoscaler = Autoscaler( cluster='kube-test', pool='bar', apps=['bar'], scheduler='kubernetes', metrics_client=mock.Mock(), monitoring_enabled=False, ) if prevent_scale_down_after_capacity_loss: context.autoscaler.autoscaling_config = AutoscalingConfig( excluded_resources=[], setpoint=0.7, target_capacity_margin=0.1, prevent_scale_down_after_capacity_loss=True, instance_loss_threshold=0)
def test_delete_detached_pvcs(api: MagicMock): api.list_namespaced_pod.return_value = V1PodList( items=[ # pvc is attached V1Pod( spec=V1PodSpec( containers=[], volumes=[ V1Volume( name="queue", persistent_volume_claim=V1PersistentVolumeClaimVolumeSource( claim_name="queue-web-3", ), ) ], ), ), # pvc no attached because spec is missing V1Pod(), # pvc no attached because volumes are missing V1Pod(spec=V1PodSpec(containers=[],),), # pvc no attached because volume is not persistent V1Pod(spec=V1PodSpec(containers=[], volumes=[V1Volume(name="queue")]),), # pvc not attached because pod is unschedulable due to pvc V1Pod( metadata=V1ObjectMeta( name="web-0", namespace="default", uid="uid-web-0", resource_version="1", owner_references=[V1ObjectReference(kind="StatefulSet")], ), status=V1PodStatus( phase="Pending", conditions=[ V1PodCondition( status="Not Ready", type="False", reason="Unschedulable", message='persistentvolumeclaim "queue-web-0" not found', ) ], ), ), ] ) api.list_namespaced_persistent_volume_claim.return_value = V1PersistentVolumeClaimList( items=[ # should delete 0-2, 3 is in attached pvcs *( V1PersistentVolumeClaim( metadata=V1ObjectMeta( name=f"queue-web-{i}", uid=f"uid-queue-web-{i}", resource_version=f"{i}", ), ) for i in range(4) ), # name does not start with claim prefix V1PersistentVolumeClaim(metadata=V1ObjectMeta(name="other-web-0"),), ] ) def delete_pvc(name, namespace, body): if name == "queue-web-1": raise ApiException(reason="Conflict") if name == "queue-web-2": raise ApiException(reason="Not Found") api.delete_namespaced_persistent_volume_claim.side_effect = delete_pvc delete_detached_pvcs(api, "namespace", "queue-") api.list_namespaced_pod.called_once_with("namespace") api.list_namespaced_persistent_volume_claim.called_once_with("namespace") assert [ (f"queue-web-{i}", "namespace", f"uid-queue-web-{i}", f"{i}") for i in range(3) ] == [ ( call.kwargs["name"], call.kwargs["namespace"], call.kwargs["body"].preconditions.uid, call.kwargs["body"].preconditions.resource_version, ) for call in api.delete_namespaced_persistent_volume_claim.call_args_list ]