コード例 #1
0
def wait_for_toleration_update(core_api, apps_api, count,
                               set_tolerations):  # NOQA
    updated = False

    for i in range(RETRY_COUNTS):
        time.sleep(RETRY_INTERVAL_LONG)
        updated = True

        da_list = apps_api.list_namespaced_daemon_set(LONGHORN_NAMESPACE).items
        for da in da_list:
            if da.status.updated_number_scheduled != count:
                updated = False
                break
        if not updated:
            continue

        dp_list = apps_api.list_namespaced_deployment(LONGHORN_NAMESPACE).items
        for dp in dp_list:
            if dp.status.updated_replicas != dp.spec.replicas:
                updated = False
                break
        if not updated:
            continue

        im_pod_list = core_api.list_namespaced_pod(
            LONGHORN_NAMESPACE,
            label_selector="longhorn.io/component=instance-manager").items
        if len(im_pod_list) != 2 * count:
            updated = False
            continue

        for p in im_pod_list:
            if p.status.phase != "Running":
                updated = False
                break
        if not updated:
            continue

        pod_list = core_api.list_namespaced_pod(LONGHORN_NAMESPACE).items
        for p in pod_list:
            if p.status.phase != "Running" or \
                    not check_tolerations_set(p.spec.tolerations,
                                              set_tolerations):
                updated = False
                break
        if not updated:
            continue

        client = get_longhorn_api_client()  # NOQA
        images = client.list_engine_image()
        assert len(images) == 1
        if images[0].state != "ready":
            updated = False
            continue

        if updated:
            break

    assert updated
コード例 #2
0
def wait_for_priority_class_update(core_api,
                                   apps_api,
                                   count,
                                   priority_class=None):  # NOQA
    updated = False

    for i in range(RETRY_COUNTS):
        time.sleep(RETRY_INTERVAL_LONG)
        updated = True

        if not check_workload_update(core_api, apps_api, count):
            updated = False
            continue

        pod_list = core_api.list_namespaced_pod(LONGHORN_NAMESPACE).items
        for p in pod_list:
            if p.status.phase != "Running" and \
                    not check_priority_class(p, priority_class):
                updated = False
                break
        if not updated:
            continue

        if updated:
            break

    assert updated
コード例 #3
0
def check_workload_update(core_api, apps_api, count):  # NOQA
    da_list = apps_api.list_namespaced_daemon_set(LONGHORN_NAMESPACE).items
    for da in da_list:
        if da.status.updated_number_scheduled != count:
            return False

    dp_list = apps_api.list_namespaced_deployment(LONGHORN_NAMESPACE).items
    for dp in dp_list:
        if dp.status.updated_replicas != dp.spec.replicas:
            return False

    im_pod_list = core_api.list_namespaced_pod(
        LONGHORN_NAMESPACE,
        label_selector="longhorn.io/component=instance-manager").items
    if len(im_pod_list) != 2 * count:
        return False

    for p in im_pod_list:
        if p.status.phase != "Running":
            return False

    client = get_longhorn_api_client()  # NOQA
    images = client.list_engine_image()
    assert len(images) == 1
    ei_state = get_engine_image_status_value(client, images[0].name)
    if images[0].state != ei_state:
        return False

    return True
コード例 #4
0
def wait_for_toleration_update(core_api, apps_api, count, set_tolerations):  # NOQA
    updated = False

    for i in range(RETRY_COUNTS):
        time.sleep(RETRY_INTERVAL_LONG)
        updated = True

        if not check_workload_update(core_api, apps_api, count):
            updated = False
            continue

        pod_list = core_api.list_namespaced_pod(LONGHORN_NAMESPACE).items
        for p in pod_list:
            if p.status.phase != "Running" or \
                    not check_tolerations_set(p.spec.tolerations,
                                              set_tolerations):
                updated = False
                break
        if not updated:
            continue

        if updated:
            break

    assert updated
コード例 #5
0
def wait_for_toleration_update(
        core_api,
        apps_api,
        count,  # NOQA
        expected_tolerations,
        chk_removed_tolerations=[]):
    not_managed_apps = [
        "csi-attacher",
        "csi-provisioner",
        "csi-resizer",
        "csi-snapshotter",
        "longhorn-csi-plugin",
        "longhorn-driver-deployer",
        "longhorn-manager",
        "longhorn-ui",
    ]
    updated = False
    for _ in range(RETRY_COUNTS):
        time.sleep(RETRY_INTERVAL_LONG)

        updated = True
        if not check_workload_update(core_api, apps_api, count):
            updated = False
            continue

        pod_list = core_api.list_namespaced_pod(LONGHORN_NAMESPACE).items
        for p in pod_list:
            managed_by = p.metadata.labels.get('longhorn.io/managed-by', '')
            if str(managed_by) != "longhorn-manager":
                continue
            else:
                app_name = str(p.metadata.labels.get('app', ''))
                assert app_name not in not_managed_apps

            if p.status.phase != "Running" \
                or not check_tolerations_set(p.spec.tolerations,
                                             expected_tolerations,
                                             chk_removed_tolerations):
                updated = False
                break
        if updated:
            break
    assert updated
コード例 #6
0
ファイル: test_infra.py プロジェクト: ttpcodes/longhorn-tests
def test_offline_node_with_attached_volume_and_pod(
        client, core_api, volume_name, make_deployment_with_pvc,
        reset_cluster_ready_status):  # NOQA
    """
    Test offline node with attached volume and pod

    1. Create PV/PVC/Deployment manifest.
    2. Update deployment's tolerations to 20 seconds to speed up test
    3. Update deployment's node affinity rule to avoid the current node
    4. Create volume, PV/PVC and deployment.
    5. Find the pod in the deployment and write `test_data` into it
    6. Shutdown the node pod is running on
    7. Wait for deployment to delete the pod
        1. Deployment cannot delete the pod here because kubelet doesn't
        response
    8. Force delete the terminating pod
    9. Wait for the new pod to be created and the volume attached
    10. Check `test_data` in the new pod
    """
    toleration_seconds = 20

    apps_api = get_apps_api_client()
    cloudprovider = detect_cloudprovider()

    volume_name = generate_volume_name()
    pv_name = volume_name + "-pv"
    pvc_name = volume_name + "-pvc"
    deployment_name = volume_name + "-dep"

    longhorn_test_node_name = get_self_host_id()

    deployment_manifest = make_deployment_with_pvc(deployment_name, pvc_name)

    unreachable_toleration = {
        "key": "node.kubernetes.io/unreachable",
        "operator": "Exists",
        "effect": "NoExecute",
        "tolerationSeconds": toleration_seconds
    }

    not_ready_toleration = {
        "key": "node.kubernetes.io/not-ready",
        "operator": "Exists",
        "effect": "NoExecute",
        "tolerationSeconds": toleration_seconds
    }

    deployment_manifest["spec"]["template"]["spec"]["tolerations"] =\
        [unreachable_toleration, not_ready_toleration]

    node_affinity_roles = {
        "nodeAffinity": {
            "requiredDuringSchedulingIgnoredDuringExecution": {
                "nodeSelectorTerms": [{
                    "matchExpressions": [{
                        "key": "kubernetes.io/hostname",
                        "operator": "NotIn",
                        "values": [longhorn_test_node_name]
                    }]
                }]
            }
        }
    }

    deployment_manifest["spec"]["template"]["spec"]["affinity"] =\
        node_affinity_roles

    longhorn_volume = create_and_check_volume(client, volume_name, size=SIZE)

    wait_for_volume_detached(client, volume_name)

    create_pv_for_volume(client, core_api, longhorn_volume, pv_name)

    create_pvc_for_volume(client, core_api, longhorn_volume, pvc_name)

    create_and_wait_deployment(apps_api, deployment_manifest)

    deployment_label_selector =\
        "name=" + deployment_manifest["metadata"]["labels"]["name"]

    deployment_pod_list =\
        core_api.list_namespaced_pod(namespace="default",
                                     label_selector=deployment_label_selector)

    assert deployment_pod_list.items.__len__() == 1

    pod_name = deployment_pod_list.items[0].metadata.name

    test_data = generate_random_data(VOLUME_RWTEST_SIZE)

    write_pod_volume_data(core_api, pod_name, test_data)

    node_name = deployment_pod_list.items[0].spec.node_name
    node = cloudprovider.node_id(node_name)

    cloudprovider.node_shutdown(node)

    k8s_node_down = wait_for_node_down_k8s(node_name, core_api)

    assert k8s_node_down

    client = get_longhorn_api_client()

    longhorn_node_down = wait_for_node_down_longhorn(node_name, client)
    assert longhorn_node_down

    time.sleep(toleration_seconds + 5)

    for i in range(TERMINATING_POD_RETRYS):
        deployment_pod_list =\
            core_api.list_namespaced_pod(
                namespace="default",
                label_selector=deployment_label_selector
            )

        terminating_pod_name = None
        for pod in deployment_pod_list.items:
            if pod.metadata.__getattribute__("deletion_timestamp") is not None:
                terminating_pod_name = pod.metadata.name
                break

        if terminating_pod_name is not None:
            break
        else:
            time.sleep(TERMINATING_POD_INTERVAL)

    assert terminating_pod_name is not None

    core_api.delete_namespaced_pod(namespace="default",
                                   name=terminating_pod_name,
                                   grace_period_seconds=0)

    delete_and_wait_pod(core_api, terminating_pod_name)

    deployment_pod_list =\
        core_api.list_namespaced_pod(
            namespace="default",
            label_selector=deployment_label_selector
        )

    assert deployment_pod_list.items.__len__() == 1

    wait_for_volume_detached(client, volume_name)
    wait_for_volume_healthy(client, volume_name)

    deployment_pod_list =\
        core_api.list_namespaced_pod(
            namespace="default",
            label_selector=deployment_label_selector
        )

    assert deployment_pod_list.items.__len__() == 1

    new_pod_name = deployment_pod_list.items[0].metadata.name

    wait_pod(new_pod_name)

    resp_data = read_volume_data(core_api, new_pod_name)

    assert test_data == resp_data
コード例 #7
0
def test_restore_rwo_volume_to_rwx(set_random_backupstore, client, core_api,
                                   volume_name, pvc, csi_pv, pod_make,
                                   make_deployment_with_pvc):  # NOQA
    """
    Test restoring a rwo to a rwx volume.

    1. Create a volume with 'accessMode' rwo.
    2. Create a PV and a PVC with access mode 'readwriteonce' and attach to the
       volume.
    3. Create a pod and attach to the PVC.
    4. Write some data into the pod and compute md5sum.
    5. Take a backup of the volume.
    6. Restore the backup with 'accessMode' rwx.
    7. Create PV and PVC and attach to 2 pods.
    8. Verify the data.
    """

    data_path = "/data/test"
    pod_name, pv_name, pvc_name, md5sum = \
        prepare_pod_with_data_in_mb(client, core_api, csi_pv, pvc,
                                    pod_make,
                                    volume_name,
                                    data_size_in_mb=DATA_SIZE_IN_MB_1,
                                    data_path=data_path)

    snap = create_snapshot(client, volume_name)
    volume = client.by_id_volume(volume_name)
    volume.snapshotBackup(name=snap.name)
    wait_for_backup_completion(client, volume_name, snap.name)
    bv, b1 = find_backup(client, volume_name, snap.name)

    restore_volume_name = 'restored-rwx-volume'
    restore_pv_name = restore_volume_name + "-pv"
    restore_pvc_name = restore_volume_name + "-pvc"

    client.create_volume(name=restore_volume_name,
                         size=str(1 * Gi),
                         numberOfReplicas=3,
                         fromBackup=b1.url,
                         accessMode='rwx')
    wait_for_volume_creation(client, restore_volume_name)
    restore_volume = wait_for_volume_detached(client, restore_volume_name)
    create_pv_for_volume(client, core_api, restore_volume, restore_pv_name)
    create_pvc_for_volume(client, core_api, restore_volume, restore_pvc_name)
    deployment = make_deployment_with_pvc('deployment-multi-pods-test',
                                          restore_pvc_name,
                                          replicas=2)
    apps_api = get_apps_api_client()
    create_and_wait_deployment(apps_api, deployment)

    deployment_label_selector = \
        "name=" + deployment["metadata"]["labels"]["name"]

    deployment_pod_list = \
        core_api.list_namespaced_pod(namespace="default",
                                     label_selector=deployment_label_selector)

    pod_name_1 = deployment_pod_list.items[0].metadata.name
    pod_name_2 = deployment_pod_list.items[1].metadata.name

    md5sum_pod1 = get_pod_data_md5sum(core_api, pod_name_1, data_path)
    md5sum_pod2 = get_pod_data_md5sum(core_api, pod_name_2, data_path)

    assert md5sum == md5sum_pod1 == md5sum_pod2
コード例 #8
0
def test_rwx_deployment_with_multi_pods(core_api, pvc,
                                        make_deployment_with_pvc):  # NOQA
    """
    Test deployment of 2 pods with same PVC.

    1. Create a volume with 'accessMode' rwx.
    2. Create a PV and a PVC with access mode 'readwritemany' and attach to the
       volume.
    3. Create a deployment of 2 pods with PVC created
    4. Wait for 2 pods to come up healthy.
    5. Write data in both pods and compute md5sum.
    6. Check the data md5sum in the share manager pod.
    """

    pvc_name = 'pvc-deployment-multi-pods-test'
    pvc['metadata']['name'] = pvc_name
    pvc['spec']['storageClassName'] = 'longhorn'
    pvc['spec']['accessModes'] = ['ReadWriteMany']

    core_api.create_namespaced_persistent_volume_claim(body=pvc,
                                                       namespace='default')

    deployment = make_deployment_with_pvc('deployment-multi-pods-test',
                                          pvc_name,
                                          replicas=2)
    apps_api = get_apps_api_client()
    create_and_wait_deployment(apps_api, deployment)

    pv_name = get_volume_name(core_api, pvc_name)
    share_manager_name = 'share-manager-' + pv_name
    deployment_label_selector = "name=" + \
                                deployment["metadata"]["labels"]["name"]

    deployment_pod_list = \
        core_api.list_namespaced_pod(namespace="default",
                                     label_selector=deployment_label_selector)

    assert deployment_pod_list.items.__len__() == 2

    pod_name_1 = deployment_pod_list.items[0].metadata.name
    test_data_1 = generate_random_data(VOLUME_RWTEST_SIZE)
    write_pod_volume_data(core_api, pod_name_1, test_data_1, filename='test1')

    pod_name_2 = deployment_pod_list.items[1].metadata.name
    command = 'cat /data/test1'
    pod_data_2 = exec_command_in_pod(core_api, command, pod_name_2, 'default')

    assert test_data_1 == pod_data_2

    test_data_2 = generate_random_data(VOLUME_RWTEST_SIZE)
    write_pod_volume_data(core_api, pod_name_2, test_data_2, filename='test2')

    command = 'cat /export' + '/' + pv_name + '/' + 'test1'
    share_manager_data_1 = exec_command_in_pod(core_api, command,
                                               share_manager_name,
                                               LONGHORN_NAMESPACE)
    assert test_data_1 == share_manager_data_1

    command = 'cat /export' + '/' + pv_name + '/' + 'test2'
    share_manager_data_2 = exec_command_in_pod(core_api, command,
                                               share_manager_name,
                                               LONGHORN_NAMESPACE)

    assert test_data_2 == share_manager_data_2
コード例 #9
0
def test_rwx_statefulset_scale_down_up(core_api, statefulset):  # NOQA
    """
    Test Scaling up and down of pods attached to rwx volume.

    1. Create a StatefulSet of 2 pods with VolumeClaimTemplate where accessMode
       is 'RWX'.
    2. Wait for StatefulSet pods to come up healthy.
    3. Write data and compute md5sum in the both pods.
    4. Delete the pods.
    5. Wait for the pods to be terminated.
    6. Verify the share manager pods are no longer available and the volume is
       detached.
    6. Recreate the pods
    7. Wait for new pods to come up.
    8. Check the data md5sum in new pods.
    """

    statefulset_name = 'statefulset-rwx-scale-down-up-test'
    share_manager_name = []

    statefulset['metadata']['name'] = \
        statefulset['spec']['selector']['matchLabels']['app'] = \
        statefulset['spec']['serviceName'] = \
        statefulset['spec']['template']['metadata']['labels']['app'] = \
        statefulset_name
    statefulset['spec']['volumeClaimTemplates'][0]['spec']['storageClassName']\
        = 'longhorn'
    statefulset['spec']['volumeClaimTemplates'][0]['spec']['accessModes'] \
        = ['ReadWriteMany']

    create_and_wait_statefulset(statefulset)

    for i in range(2):
        pvc_name = \
            statefulset['spec']['volumeClaimTemplates'][0]['metadata']['name']\
            + '-' + statefulset_name + '-' + str(i)
        pv_name = get_volume_name(core_api, pvc_name)

        assert pv_name is not None

        share_manager_name.append('share-manager-' + pv_name)

        check_pod_existence(core_api,
                            share_manager_name[i],
                            namespace=LONGHORN_NAMESPACE)

    md5sum_pod = []
    for i in range(2):
        test_pod_name = statefulset_name + '-' + str(i)
        test_data = generate_random_data(VOLUME_RWTEST_SIZE)
        write_pod_volume_data(core_api, test_pod_name, test_data)
        md5sum_pod.append(test_data)

    statefulset['spec']['replicas'] = replicas = 0
    apps_api = get_apps_api_client()
    apps_api.patch_namespaced_stateful_set(
        name=statefulset_name,
        namespace='default',
        body={'spec': {
            'replicas': replicas
        }})
    for i in range(DEFAULT_STATEFULSET_TIMEOUT):
        s_set = apps_api.read_namespaced_stateful_set(
            name=statefulset['metadata']['name'], namespace='default')
        if s_set.status.ready_replicas == replicas or \
                (replicas == 0 and not s_set.status.ready_replicas):
            break
        time.sleep(DEFAULT_STATEFULSET_INTERVAL)

    pods = core_api.list_namespaced_pod(namespace=LONGHORN_NAMESPACE)

    found = False
    for item in pods.items:
        if item.metadata.name == share_manager_name[0] or \
                item.metadata.name == share_manager_name[1]:
            found = True
            break

    assert not found

    statefulset['spec']['replicas'] = replicas = 2
    apps_api = get_apps_api_client()
    apps_api.patch_namespaced_stateful_set(
        name=statefulset_name,
        namespace='default',
        body={'spec': {
            'replicas': replicas
        }})
    wait_statefulset(statefulset)

    for i in range(2):
        test_pod_name = statefulset_name + '-' + str(i)
        command = 'cat /data/test'
        pod_data = exec_command_in_pod(core_api, command, test_pod_name,
                                       'default')

        assert pod_data == md5sum_pod[i]