コード例 #1
0
def pod_with_preferred_affinity():
    return V1Pod(
        status=V1PodStatus(phase='Pending',
                           conditions=[
                               V1PodCondition(status='False',
                                              type='PodScheduled',
                                              reason='Unschedulable')
                           ]),
        spec=V1PodSpec(
            containers=[
                V1Container(
                    name='container',
                    resources=V1ResourceRequirements(requests={'cpu': '1.5'}))
            ],
            affinity=V1Affinity(node_affinity=V1NodeAffinity(
                required_during_scheduling_ignored_during_execution=
                V1NodeSelector(node_selector_terms=[
                    V1NodeSelectorTerm(match_expressions=[
                        V1NodeSelectorRequirement(
                            key='clusterman.com/scheduler', operator='Exists')
                    ])
                ]),
                preferred_during_scheduling_ignored_during_execution=[
                    V1PreferredSchedulingTerm(
                        weight=10,
                        preference=V1NodeSelectorTerm(match_expressions=[
                            V1NodeSelectorRequirement(
                                key='clusterman.com/pool',
                                operator='In',
                                values=['bar'])
                        ]))
                ]))))
コード例 #2
0
def test_delete_unschedulable_pods_raises_server_error(api: MagicMock):
    api.list_namespaced_pod.return_value = V1PodList(items=[
        V1Pod(
            metadata=V1ObjectMeta(
                name="web-0",
                namespace="default",
                uid="uid-web-0",
                resource_version="1",
                owner_references=[V1ObjectReference(kind="StatefulSet")],
            ),
            status=V1PodStatus(
                phase="Pending",
                conditions=[
                    V1PodCondition(
                        status="Not Ready",
                        type="False",
                        reason="Unschedulable",
                        message='persistentvolumeclaim "queue-web-0" not found',
                    )
                ],
            ),
        ),
    ])

    def delete_pod(name, namespace, body):
        raise ApiException(reason="Server Error")

    api.delete_namespaced_pod.side_effect = delete_pod

    with pytest.raises(ApiException):
        delete_unschedulable_pods(api, "namespace")

    api.list_namespaced_pod.called_once_with("namespace")
コード例 #3
0
def pending_pods():
    return [
        (
            V1Pod(
                metadata=V1ObjectMeta(name='pod1'),
                status=V1PodStatus(
                    phase='Pending',
                    conditions=[
                        V1PodCondition(status='False', type='PodScheduled', reason='Unschedulable')
                    ],
                ),
                spec=V1PodSpec(containers=[
                    V1Container(
                        name='container1',
                        resources=V1ResourceRequirements(requests={'cpu': '1.5', 'memory': '150MB'})
                    ),
                    V1Container(
                        name='container1',
                        resources=V1ResourceRequirements(requests={'cpu': '1.5', 'memory': '350MB'})
                    )
                ]),
            ),
            PodUnschedulableReason.InsufficientResources,
        ),
        (
            V1Pod(
                metadata=V1ObjectMeta(name='pod2'),
                status=V1PodStatus(
                    phase='Pending',
                    conditions=[
                        V1PodCondition(status='False', type='PodScheduled', reason='Unschedulable')
                    ],
                ),
                spec=V1PodSpec(containers=[
                    V1Container(
                        name='container1',
                        resources=V1ResourceRequirements(requests={'cpu': '1.5'})
                    ),
                    V1Container(
                        name='container1',
                        resources=V1ResourceRequirements(requests={'cpu': '1.5', 'mem': '300MB'})
                    )
                ]),
            ),
            PodUnschedulableReason.Unknown,
        )
    ]
コード例 #4
0
def test_pod_scaling_nodes():
    condition = V1PodCondition(type="PodScheduled",
                               reason="Unschedulable",
                               status="False")
    status = V1PodStatus(phase="Pending", conditions=[condition])
    inferrer = SingleNodeStrategyKubernetesStatusInferrer(
        V1JobStatus(active=1), pods=[V1Pod(status=status)])
    assert inferrer.status() == BenchmarkJobStatus.PENDING_NODE_SCALING
コード例 #5
0
def unschedulable_pod():
    return V1Pod(metadata=V1ObjectMeta(name='unschedulable_pod',
                                       annotations=dict(),
                                       owner_references=[]),
                 status=V1PodStatus(phase='Pending',
                                    conditions=[
                                        V1PodCondition(status='False',
                                                       type='PodScheduled',
                                                       reason='Unschedulable')
                                    ]),
                 spec=V1PodSpec(containers=[
                     V1Container(name='container2',
                                 resources=V1ResourceRequirements(
                                     requests={'cpu': '1.5'}))
                 ],
                                node_selector={'clusterman.com/pool': 'bar'}))
コード例 #6
0
def test_delete_unschedulable_pods(api: MagicMock):
    api.list_namespaced_pod.return_value = V1PodList(items=[
        V1Pod(
            metadata=V1ObjectMeta(
                name=f"web-{i}",
                namespace="default",
                uid=f"uid-web-{i}",
                resource_version=f"{i}",
                owner_references=[V1ObjectReference(kind="StatefulSet")],
            ),
            status=V1PodStatus(
                phase="Pending",
                conditions=[
                    V1PodCondition(
                        status="Not Ready",
                        type="False",
                        reason="Unschedulable",
                        # 0-2 should be deleted, 3 should have the wrong message
                        message="" if i == 3 else
                        f'persistentvolumeclaim "queue-web-{i}" not found',
                    )
                ],
            ),
        ) for i in range(4)
    ])

    def delete_pod(name, namespace, body):
        if name == "web-1":
            raise ApiException(reason="Conflict")
        if name == "web-2":
            raise ApiException(reason="Not Found")

    api.delete_namespaced_pod.side_effect = delete_pod
    delete_unschedulable_pods(api, "namespace")
    assert [(f"web-{i}", "namespace", f"uid-web-{i}", f"{i}")
            for i in range(3)] == [(
                call.kwargs["name"],
                call.kwargs["namespace"],
                call.kwargs["body"].preconditions.uid,
                call.kwargs["body"].preconditions.resource_version,
            ) for call in api.delete_namespaced_pod.call_args_list]
コード例 #7
0
def test_delete_detached_pvcs(api: MagicMock):
    api.list_namespaced_pod.return_value = V1PodList(items=[
        # pvc is attached
        V1Pod(spec=V1PodSpec(
            containers=[],
            volumes=[
                V1Volume(
                    name="queue",
                    persistent_volume_claim=V1PersistentVolumeClaimVolumeSource(
                        claim_name="queue-web-3", ),
                )
            ],
        ), ),
        # pvc not attached because spec is missing
        V1Pod(),
        # pvc not attached because volumes are missing
        V1Pod(spec=V1PodSpec(containers=[], ), ),
        # pvc not attached because volume is not persistent
        V1Pod(spec=V1PodSpec(containers=[], volumes=[V1Volume(
            name="queue")]), ),
        # pvc not attached because pod is unschedulable due to pvc
        V1Pod(
            metadata=V1ObjectMeta(
                name="web-0",
                namespace="default",
                uid="uid-web-0",
                resource_version="1",
                owner_references=[V1ObjectReference(kind="StatefulSet")],
            ),
            status=V1PodStatus(
                phase="Pending",
                conditions=[
                    V1PodCondition(
                        status="Not Ready",
                        type="False",
                        reason="Unschedulable",
                        message='persistentvolumeclaim "queue-web-0" not found',
                    )
                ],
            ),
        ),
    ])
    api.list_namespaced_persistent_volume_claim.return_value = V1PersistentVolumeClaimList(
        items=[
            # should delete 0-2, 3 is in attached pvcs
            *(V1PersistentVolumeClaim(
                metadata=V1ObjectMeta(
                    name=f"queue-web-{i}",
                    uid=f"uid-queue-web-{i}",
                    resource_version=f"{i}",
                ),
                spec=V1PersistentVolumeClaimSpec(volume_name=f"pv-{i}"),
            ) for i in range(4)),
            # name does not start with claim prefix
            V1PersistentVolumeClaim(metadata=V1ObjectMeta(
                name="other-web-0"), ),
        ])

    def delete_pvc(name, namespace, body):
        if name == "queue-web-1":
            raise ApiException(reason="Conflict")
        if name == "queue-web-2":
            raise ApiException(reason="Not Found")

    api.delete_namespaced_persistent_volume_claim.side_effect = delete_pvc
    pvc_cleanup_delay = timedelta(microseconds=1)
    delay_complete = datetime.utcnow() - pvc_cleanup_delay
    cache = {
        # wrong pv name, should be overwritten
        "queue-web-0": PvcCacheEntry(pv="wrong", time=delay_complete),
        # no longer detached, should be removed
        "queue-web-3": PvcCacheEntry(pv="pv-3", time=delay_complete),
    }

    delete_detached_pvcs(api, "namespace", "queue-", pvc_cleanup_delay, cache)

    api.list_namespaced_pod.assert_called_once_with("namespace")
    api.list_namespaced_persistent_volume_claim.assert_called_once_with(
        "namespace")
    api.delete_namespaced_persistent_volume_claim.assert_not_called()
    assert {f"queue-web-{i}": f"pv-{i}"
            for i in range(3)} == {k: v.pv
                                   for k, v in cache.items()}
    api.list_namespaced_pod.reset_mock()
    api.list_namespaced_persistent_volume_claim.reset_mock()
    previous_cache = {**cache}

    delete_detached_pvcs(api, "namespace", "queue-", pvc_cleanup_delay, cache)

    api.list_namespaced_pod.assert_called_once_with("namespace")
    api.list_namespaced_persistent_volume_claim.assert_called_once_with(
        "namespace")
    assert previous_cache == cache
    assert [
        (f"queue-web-{i}", "namespace", f"uid-queue-web-{i}", f"{i}")
        for i in range(3)
    ] == [(
        call.kwargs["name"],
        call.kwargs["namespace"],
        call.kwargs["body"].preconditions.uid,
        call.kwargs["body"].preconditions.resource_version,
    ) for call in api.delete_namespaced_persistent_volume_claim.call_args_list]
コード例 #8
0
def create_k8s_autoscaler(context,
                          prevent_scale_down_after_capacity_loss=False):
    behave.use_fixture(autoscaler_patches, context)
    context.mock_cluster_connector.__class__ = KubernetesClusterConnector
    context.mock_cluster_connector.get_cluster_allocated_resources.return_value = ClustermanResources(
        cpus=context.allocated_cpus, )
    context.mock_cluster_connector._pending_pods = []
    if float(context.pending_cpus) > 0:
        context.mock_cluster_connector.get_unschedulable_pods = \
            lambda: KubernetesClusterConnector.get_unschedulable_pods(context.mock_cluster_connector)
        context.mock_cluster_connector._get_pod_unschedulable_reason.side_effect = lambda pod: (
            PodUnschedulableReason.InsufficientResources
            if pod.metadata.name == 'pod1' else PodUnschedulableReason.Unknown)
        context.mock_cluster_connector._pending_pods = [
            V1Pod(
                metadata=V1ObjectMeta(name='pod1'),
                status=V1PodStatus(
                    phase='Pending',
                    conditions=[
                        V1PodCondition(status='False',
                                       type='PodScheduled',
                                       reason='Unschedulable')
                    ],
                ),
                spec=V1PodSpec(containers=[
                    V1Container(name='container1',
                                resources=V1ResourceRequirements(
                                    requests={'cpu': context.pending_cpus})),
                ]),
            ),
            V1Pod(
                metadata=V1ObjectMeta(name='pod2'),
                status=V1PodStatus(
                    phase='Pending',
                    conditions=[
                        V1PodCondition(status='False',
                                       type='PodScheduled',
                                       reason='Unschedulable')
                    ],
                ),
                spec=V1PodSpec(containers=[
                    V1Container(name='container1',
                                resources=V1ResourceRequirements(
                                    requests={'cpu': context.pending_cpus})),
                ]),
            ),
        ]

    context.autoscaler = Autoscaler(
        cluster='kube-test',
        pool='bar',
        apps=['bar'],
        scheduler='kubernetes',
        metrics_client=mock.Mock(),
        monitoring_enabled=False,
    )

    if prevent_scale_down_after_capacity_loss:
        context.autoscaler.autoscaling_config = AutoscalingConfig(
            excluded_resources=[],
            setpoint=0.7,
            target_capacity_margin=0.1,
            prevent_scale_down_after_capacity_loss=True,
            instance_loss_threshold=0)
コード例 #9
0
def test_delete_detached_pvcs(api: MagicMock):
    api.list_namespaced_pod.return_value = V1PodList(
        items=[
            # pvc is attached
            V1Pod(
                spec=V1PodSpec(
                    containers=[],
                    volumes=[
                        V1Volume(
                            name="queue",
                            persistent_volume_claim=V1PersistentVolumeClaimVolumeSource(
                                claim_name="queue-web-3",
                            ),
                        )
                    ],
                ),
            ),
            # pvc no attached because spec is missing
            V1Pod(),
            # pvc no attached because volumes are missing
            V1Pod(spec=V1PodSpec(containers=[],),),
            # pvc no attached because volume is not persistent
            V1Pod(spec=V1PodSpec(containers=[], volumes=[V1Volume(name="queue")]),),
            # pvc not attached because pod is unschedulable due to pvc
            V1Pod(
                metadata=V1ObjectMeta(
                    name="web-0",
                    namespace="default",
                    uid="uid-web-0",
                    resource_version="1",
                    owner_references=[V1ObjectReference(kind="StatefulSet")],
                ),
                status=V1PodStatus(
                    phase="Pending",
                    conditions=[
                        V1PodCondition(
                            status="Not Ready",
                            type="False",
                            reason="Unschedulable",
                            message='persistentvolumeclaim "queue-web-0" not found',
                        )
                    ],
                ),
            ),
        ]
    )
    api.list_namespaced_persistent_volume_claim.return_value = V1PersistentVolumeClaimList(
        items=[
            # should delete 0-2, 3 is in attached pvcs
            *(
                V1PersistentVolumeClaim(
                    metadata=V1ObjectMeta(
                        name=f"queue-web-{i}",
                        uid=f"uid-queue-web-{i}",
                        resource_version=f"{i}",
                    ),
                )
                for i in range(4)
            ),
            # name does not start with claim prefix
            V1PersistentVolumeClaim(metadata=V1ObjectMeta(name="other-web-0"),),
        ]
    )

    def delete_pvc(name, namespace, body):
        if name == "queue-web-1":
            raise ApiException(reason="Conflict")
        if name == "queue-web-2":
            raise ApiException(reason="Not Found")

    api.delete_namespaced_persistent_volume_claim.side_effect = delete_pvc

    delete_detached_pvcs(api, "namespace", "queue-")

    api.list_namespaced_pod.called_once_with("namespace")
    api.list_namespaced_persistent_volume_claim.called_once_with("namespace")
    assert [
        (f"queue-web-{i}", "namespace", f"uid-queue-web-{i}", f"{i}") for i in range(3)
    ] == [
        (
            call.kwargs["name"],
            call.kwargs["namespace"],
            call.kwargs["body"].preconditions.uid,
            call.kwargs["body"].preconditions.resource_version,
        )
        for call in api.delete_namespaced_persistent_volume_claim.call_args_list
    ]