Ejemplo n.º 1
0
def test_pv_creation(client, core_api):  # NOQA
    volume_name = "test-pv-creation"
    client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2)
    volume = wait_for_volume_detached(client, volume_name)

    pv_name = "pv-" + volume_name
    create_pv_for_volume(client, core_api, volume, pv_name)

    # try to create one more pv for the volume
    pv_name_2 = "pv2-" + volume_name
    with pytest.raises(Exception) as e:
        volume.pvCreate(pvName=pv_name_2)
        assert "already exist" in str(e.value)

    ks = {
        'pvName': pv_name,
        'pvStatus': 'Available',
        'namespace': '',
        'pvcName': '',
        'lastPVCRefAt': '',
        'lastPodRefAt': '',
    }
    wait_volume_kubernetes_status(client, volume_name, ks)

    delete_and_wait_pv(core_api, pv_name)
def test_pv_creation(client, core_api):  # NOQA
    """
    Test creating PV using Longhorn API

    1. Create volume
    2. Create PV for the volume
    3. Try to create another PV for the same volume. It should fail.
    4. Check Kubernetes Status for the volume since PV is created.
    """
    volume_name = "test-pv-creation"  # NOQA
    client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2)
    volume = wait_for_volume_detached(client, volume_name)

    pv_name = "pv-" + volume_name
    create_pv_for_volume(client, core_api, volume, pv_name)

    # try to create one more pv for the volume
    pv_name_2 = "pv2-" + volume_name
    with pytest.raises(Exception) as e:
        volume.pvCreate(pvName=pv_name_2)
        assert "already exist" in str(e.value)

    ks = {
        'pvName': pv_name,
        'pvStatus': 'Available',
        'namespace': '',
        'pvcName': '',
        'lastPVCRefAt': '',
        'lastPodRefAt': '',
    }
    wait_volume_kubernetes_status(client, volume_name, ks)

    delete_and_wait_pv(core_api, pv_name)
def test_delete_with_static_pv(client, core_api, volume_name):  # NOQA
    """
    Test that deleting a Volume with related static Persistent Volume and
    Persistent Volume Claim resources successfully deletes the Volume and
    cleans up those resources.

    1. Create a Volume in Longhorn.
    2. Create a static Persistent Volume and Persistent Volume Claim for the
    Volume through Longhorn.
    3. Wait for the Kubernetes Status to indicate the existence of these
    resources.
    4. Attempt deletion of the Volume.
    5. Verify that the Volume and its associated resources have been deleted.
    """
    volume = create_and_check_volume(client, volume_name)
    pv_name = 'pv-' + volume_name
    pvc_name = 'pvc-' + volume_name
    create_pv_for_volume(client, core_api, volume, pv_name)
    create_pvc_for_volume(client, core_api, volume, pvc_name)

    ks = {
        'pvName': pv_name,
        'pvStatus': 'Bound',
        'namespace': 'default',
        'pvcName': pvc_name,
        'lastPVCRefAt': '',
        'lastPodRefAt': '',
    }
    wait_volume_kubernetes_status(client, volume_name, ks)

    client.delete(volume)
    wait_for_volume_delete(client, volume_name)
    wait_delete_pv(core_api, pv_name)
    wait_delete_pvc(core_api, pvc_name)
Ejemplo n.º 4
0
def test_xfs_pv_existing_volume(client, core_api, pod_manifest): # NOQA
    """
    Test create PV with existing XFS filesystem

    1. Create a volume
    2. Create PV/PVC for the existing volume, specify `xfs` as filesystem
    3. Attach the volume to the current node.
    4. Format it to `xfs`
    5. Create a POD using the volume

    FIXME: We should write data in step 4 and validate the data in step 5, make
    sure the disk won't be reformatted
    """
    volume_name = generate_volume_name()

    volume = create_and_check_volume(client, volume_name)

    create_pv_for_volume(client, core_api, volume, volume_name, "xfs")

    create_pvc_for_volume(client, core_api, volume, volume_name)

    host_id = get_self_host_id()

    volume = volume.attach(hostId=host_id)

    volume = wait_for_volume_healthy(client, volume_name)

    cmd = ['mkfs.xfs', get_volume_endpoint(volume)]
    subprocess.check_call(cmd)

    volume = volume.detach()

    volume = wait_for_volume_detached(client, volume_name)

    pod_manifest['spec']['volumes'] = [{
        "name": "pod-data",
        "persistentVolumeClaim": {
            "claimName": volume_name
        }
    }]

    create_and_wait_pod(core_api, pod_manifest)
Ejemplo n.º 5
0
def test_xfs_pv(client, core_api, pod_manifest): # NOQA
    """
    Test create PV with new XFS filesystem

    1. Create a volume
    2. Create a PV for the existing volume, specify `xfs` as filesystem
    3. Create PVC and Pod
    4. Make sure Pod is running.
    5. Write data into the pod and read back for validation.

    Note: The volume will be formatted to XFS filesystem by Kubernetes in this
    case.
    """
    volume_name = generate_volume_name()

    volume = create_and_check_volume(client, volume_name)

    create_pv_for_volume(client, core_api, volume, volume_name, "xfs")

    create_pvc_for_volume(client, core_api, volume, volume_name)

    pod_manifest['spec']['volumes'] = [{
        "name": "pod-data",
        "persistentVolumeClaim": {
            "claimName": volume_name
        }
    }]

    pod_name = pod_manifest['metadata']['name']

    create_and_wait_pod(core_api, pod_manifest)

    test_data = generate_random_data(VOLUME_RWTEST_SIZE)
    write_pod_volume_data(core_api, pod_name, test_data)
    resp = read_volume_data(core_api, pod_name)
    assert resp == test_data
Ejemplo n.º 6
0
def test_allow_volume_creation_with_degraded_availability_csi(
        client, core_api, apps_api, make_deployment_with_pvc):  # NOQA
    """
    Test Allow Volume Creation with Degraded Availability (CSI)

    Requirement:
    1. Set `allow-volume-creation-with-degraded-availability` to true.
    2. Set `node-level-soft-anti-affinity` to false.

    Steps:
    1. Disable scheduling for node 3.
    2. Create a Deployment Pod with a volume and 3 replicas.
        1. After the volume is attached, scheduling error should be seen.
    3. Write data to the Pod.
    4. Scale down the deployment to 0 to detach the volume.
        1. Scheduled condition should become true.
    5. Scale up the deployment back to 1 and verify the data.
        1. Scheduled condition should become false.
    6. Enable the scheduling for node 3.
        1. Volume should start rebuilding on the node 3 soon.
        2. Once the rebuilding starts, the scheduled condition should become
           true.
    7. Once rebuild finished, scale down and back the deployment to verify
       the data.
    """
    setting = client.by_id_setting(common.SETTING_DEGRADED_AVAILABILITY)
    client.update(setting, value="true")

    setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")

    nodes = client.list_node()
    node3 = nodes[2]
    client.update(node3, allowScheduling=False)

    vol = common.create_and_check_volume(client, generate_volume_name(),
                                         size=str(500 * Mi))

    pv_name = vol.name + "-pv"
    common.create_pv_for_volume(client, core_api, vol, pv_name)

    pvc_name = vol.name + "-pvc"
    common.create_pvc_for_volume(client, core_api, vol, pvc_name)

    deployment_name = vol.name + "-dep"
    deployment = make_deployment_with_pvc(deployment_name, pvc_name)
    deployment["spec"]["replicas"] = 3
    apps_api.create_namespaced_deployment(body=deployment, namespace='default')
    common.wait_for_volume_status(client, vol.name,
                                  common.VOLUME_FIELD_STATE,
                                  common.VOLUME_STATE_ATTACHED)
    common.wait_scheduling_failure(client, vol.name)

    data_path = "/data/test"
    pod = common.wait_and_get_any_deployment_pod(core_api, deployment_name)
    common.write_pod_volume_random_data(core_api, pod.metadata.name,
                                        data_path, common.DATA_SIZE_IN_MB_2)
    created_md5sum = get_pod_data_md5sum(core_api, pod.metadata.name,
                                         data_path)

    deployment['spec']['replicas'] = 0
    apps_api.patch_namespaced_deployment(body=deployment,
                                         namespace='default',
                                         name=deployment_name)
    vol = common.wait_for_volume_detached(client, vol.name)
    assert vol.conditions[VOLUME_CONDITION_SCHEDULED]['status'] == "True"

    deployment['spec']['replicas'] = 1
    apps_api.patch_namespaced_deployment(body=deployment,
                                         namespace='default',
                                         name=deployment_name)
    common.wait_for_volume_status(client, vol.name,
                                  common.VOLUME_FIELD_STATE,
                                  common.VOLUME_STATE_ATTACHED)
    common.wait_for_volume_condition_scheduled(client, vol.name, "status",
                                               common.CONDITION_STATUS_FALSE)
    pod = common.wait_and_get_any_deployment_pod(core_api, deployment_name)
    assert created_md5sum == get_pod_data_md5sum(core_api,
                                                 pod.metadata.name,
                                                 data_path)

    client.update(node3, allowScheduling=True)
    common.wait_for_rebuild_start(client, vol.name)
    vol = client.by_id_volume(vol.name)
    assert vol.conditions[VOLUME_CONDITION_SCHEDULED]['status'] == "True"
    common.wait_for_rebuild_complete(client, vol.name)

    deployment['spec']['replicas'] = 0
    apps_api.patch_namespaced_deployment(body=deployment,
                                         namespace='default',
                                         name=deployment_name)
    common.wait_for_volume_detached(client, vol.name)

    deployment['spec']['replicas'] = 1
    apps_api.patch_namespaced_deployment(body=deployment,
                                         namespace='default',
                                         name=deployment_name)
    common.wait_for_volume_status(client, vol.name,
                                  common.VOLUME_FIELD_STATE,
                                  common.VOLUME_STATE_ATTACHED)

    pod = common.wait_and_get_any_deployment_pod(core_api, deployment_name)
    assert created_md5sum == get_pod_data_md5sum(core_api,
                                                 pod.metadata.name,
                                                 data_path)
Ejemplo n.º 7
0
def test_replica_auto_balance_zone_best_effort_with_data_locality(
        client, core_api, volume_name, pod):  # NOQA
    """
    Background:
    Given set `replica-soft-anti-affinity` to `true`.
    And set `replica-zone-soft-anti-affinity` to `true`.
    And set `default-data-locality` to `best-effort`.
    And set `replicaAutoBalance` to `best-effort`.
    And set node-1 to zone-1.
        set node-2 to zone-1.
        set node-3 to zone-2.
    And create volume with 2 replicas.
    And create pv for volume.
    And create pvc for volume.

    Scenario Outline: replica auto-balance zones with best-effort should
                      not remove pod local replicas when data locality is
                      enabled (best-effort).

    Given create and wait pod on <pod-node>.
    And disable scheduling and evict node-3.
    And count replicas on each nodes.
    And 1 replica running on <pod-node>.
        1 replica running on <duplicate-node>.
        0 replica running on node-3.

    When enable scheduling for node-3.
    Then count replicas on each nodes.
    And 1 replica running on <pod-node>.
        0 replica running on <duplicate-node>.
        1 replica running on node-3.
    And count replicas in each zones.
    And 1 replica running in zone-1.
        1 replica running in zone-2.
    And loop 3 times with each wait 5 seconds and count replicas on each nodes.
        To ensure no addition scheduling is happening.
        1 replica running on <pod-node>.
        0 replica running on <duplicate-node>.
        1 replica running on node-3.

    And delete pod.

    Examples:
        | pod-node | duplicate-node |
        | node-1   | node-2         |
        | node-2   | node-1         |
        | node-1   | node-2         |
    """

    common.update_setting(client, SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY,
                          "true")
    common.update_setting(client, SETTING_REPLICA_ZONE_SOFT_ANTI_AFFINITY,
                          "true")
    common.update_setting(client, SETTING_DEFAULT_DATA_LOCALITY, "best-effort")
    common.update_setting(client, SETTING_REPLICA_AUTO_BALANCE, "best-effort")

    n1, n2, n3 = client.list_node()

    set_k8s_node_zone_label(core_api, n1.name, ZONE1)
    set_k8s_node_zone_label(core_api, n2.name, ZONE1)
    set_k8s_node_zone_label(core_api, n3.name, ZONE2)
    wait_longhorn_node_zone_updated(client)

    n_replicas = 2
    volume = create_and_check_volume(client,
                                     volume_name,
                                     num_of_replicas=n_replicas)
    common.create_pv_for_volume(client, core_api, volume, volume_name)
    common.create_pvc_for_volume(client, core_api, volume, volume_name)
    pod['spec']['volumes'] = [{
        "name": "pod-data",
        "persistentVolumeClaim": {
            "claimName": volume_name
        }
    }]

    for i in range(1, 4):
        pod_node_name = n2.name if i % 2 == 0 else n1.name
        pod['spec']['nodeSelector'] = {"kubernetes.io/hostname": pod_node_name}
        common.create_and_wait_pod(core_api, pod)

        client.update(n3, allowScheduling=False, evictionRequested=True)

        duplicate_node = [n1.name, n2.name]
        duplicate_node.remove(pod_node_name)
        for _ in range(RETRY_COUNTS):
            pod_node_r_count = common.get_host_replica_count(client,
                                                             volume_name,
                                                             pod_node_name,
                                                             chk_running=True)
            duplicate_node_r_count = common.get_host_replica_count(
                client, volume_name, duplicate_node[0], chk_running=True)
            balance_node_r_count = common.get_host_replica_count(
                client, volume_name, n3.name, chk_running=False)

            if pod_node_r_count == duplicate_node_r_count == 1 and \
                    balance_node_r_count == 0:
                break

            time.sleep(RETRY_INTERVAL)
        assert pod_node_r_count == 1
        assert duplicate_node_r_count == 1
        assert balance_node_r_count == 0

        client.update(n3, allowScheduling=True)

        for _ in range(RETRY_COUNTS):
            pod_node_r_count = common.get_host_replica_count(client,
                                                             volume_name,
                                                             pod_node_name,
                                                             chk_running=True)
            duplicate_node_r_count = common.get_host_replica_count(
                client, volume_name, duplicate_node[0], chk_running=False)
            balance_node_r_count = common.get_host_replica_count(
                client, volume_name, n3.name, chk_running=True)

            if pod_node_r_count == balance_node_r_count == 1 and \
                    duplicate_node_r_count == 0:
                break

            time.sleep(RETRY_INTERVAL)
        assert pod_node_r_count == 1
        assert duplicate_node_r_count == 0
        assert balance_node_r_count == 1

        z1_r_count = get_zone_replica_count(client,
                                            volume_name,
                                            ZONE1,
                                            chk_running=True)
        z2_r_count = get_zone_replica_count(client,
                                            volume_name,
                                            ZONE2,
                                            chk_running=True)
        assert z1_r_count == z2_r_count == 1

        # loop 3 times and each to wait 5 seconds to ensure there is no
        # re-scheduling happening.
        for _ in range(3):
            time.sleep(5)
            assert pod_node_r_count == common.get_host_replica_count(
                client, volume_name, pod_node_name, chk_running=True)
            assert duplicate_node_r_count == common.get_host_replica_count(
                client, volume_name, duplicate_node[0], chk_running=False)
            assert balance_node_r_count == common.get_host_replica_count(
                client, volume_name, n3.name, chk_running=True)

        common.delete_and_wait_pod(core_api, pod['metadata']['name'])
Ejemplo n.º 8
0
def test_offline_node_with_attached_volume_and_pod(
        client, core_api, volume_name, make_deployment_with_pvc,
        reset_cluster_ready_status):  # NOQA
    """
    Test offline node with attached volume and pod

    1. Create PV/PVC/Deployment manifest.
    2. Update deployment's tolerations to 20 seconds to speed up test
    3. Update deployment's node affinity rule to avoid the current node
    4. Create volume, PV/PVC and deployment.
    5. Find the pod in the deployment and write `test_data` into it
    6. Shutdown the node pod is running on
    7. Wait for deployment to delete the pod
        1. Deployment cannot delete the pod here because kubelet doesn't
        response
    8. Force delete the terminating pod
    9. Wait for the new pod to be created and the volume attached
    10. Check `test_data` in the new pod
    """
    toleration_seconds = 20

    apps_api = get_apps_api_client()
    cloudprovider = detect_cloudprovider()

    volume_name = generate_volume_name()
    pv_name = volume_name + "-pv"
    pvc_name = volume_name + "-pvc"
    deployment_name = volume_name + "-dep"

    longhorn_test_node_name = get_self_host_id()

    deployment_manifest = make_deployment_with_pvc(deployment_name, pvc_name)

    unreachable_toleration = {
        "key": "node.kubernetes.io/unreachable",
        "operator": "Exists",
        "effect": "NoExecute",
        "tolerationSeconds": toleration_seconds
    }

    not_ready_toleration = {
        "key": "node.kubernetes.io/not-ready",
        "operator": "Exists",
        "effect": "NoExecute",
        "tolerationSeconds": toleration_seconds
    }

    deployment_manifest["spec"]["template"]["spec"]["tolerations"] =\
        [unreachable_toleration, not_ready_toleration]

    node_affinity_roles = {
        "nodeAffinity": {
            "requiredDuringSchedulingIgnoredDuringExecution": {
                "nodeSelectorTerms": [{
                    "matchExpressions": [{
                        "key": "kubernetes.io/hostname",
                        "operator": "NotIn",
                        "values": [longhorn_test_node_name]
                    }]
                }]
            }
        }
    }

    deployment_manifest["spec"]["template"]["spec"]["affinity"] =\
        node_affinity_roles

    longhorn_volume = create_and_check_volume(client, volume_name, size=SIZE)

    wait_for_volume_detached(client, volume_name)

    create_pv_for_volume(client, core_api, longhorn_volume, pv_name)

    create_pvc_for_volume(client, core_api, longhorn_volume, pvc_name)

    create_and_wait_deployment(apps_api, deployment_manifest)

    deployment_label_selector =\
        "name=" + deployment_manifest["metadata"]["labels"]["name"]

    deployment_pod_list =\
        core_api.list_namespaced_pod(namespace="default",
                                     label_selector=deployment_label_selector)

    assert deployment_pod_list.items.__len__() == 1

    pod_name = deployment_pod_list.items[0].metadata.name

    test_data = generate_random_data(VOLUME_RWTEST_SIZE)

    write_pod_volume_data(core_api, pod_name, test_data)

    node_name = deployment_pod_list.items[0].spec.node_name
    node = cloudprovider.node_id(node_name)

    cloudprovider.node_shutdown(node)

    k8s_node_down = wait_for_node_down_k8s(node_name, core_api)

    assert k8s_node_down

    client = get_longhorn_api_client()

    longhorn_node_down = wait_for_node_down_longhorn(node_name, client)
    assert longhorn_node_down

    time.sleep(toleration_seconds + 5)

    for i in range(TERMINATING_POD_RETRYS):
        deployment_pod_list =\
            core_api.list_namespaced_pod(
                namespace="default",
                label_selector=deployment_label_selector
            )

        terminating_pod_name = None
        for pod in deployment_pod_list.items:
            if pod.metadata.__getattribute__("deletion_timestamp") is not None:
                terminating_pod_name = pod.metadata.name
                break

        if terminating_pod_name is not None:
            break
        else:
            time.sleep(TERMINATING_POD_INTERVAL)

    assert terminating_pod_name is not None

    core_api.delete_namespaced_pod(namespace="default",
                                   name=terminating_pod_name,
                                   grace_period_seconds=0)

    delete_and_wait_pod(core_api, terminating_pod_name)

    deployment_pod_list =\
        core_api.list_namespaced_pod(
            namespace="default",
            label_selector=deployment_label_selector
        )

    assert deployment_pod_list.items.__len__() == 1

    wait_for_volume_detached(client, volume_name)
    wait_for_volume_healthy(client, volume_name)

    deployment_pod_list =\
        core_api.list_namespaced_pod(
            namespace="default",
            label_selector=deployment_label_selector
        )

    assert deployment_pod_list.items.__len__() == 1

    new_pod_name = deployment_pod_list.items[0].metadata.name

    wait_pod(new_pod_name)

    resp_data = read_volume_data(core_api, new_pod_name)

    assert test_data == resp_data
Ejemplo n.º 9
0
def test_backup_kubernetes_status(client, core_api, pod):  # NOQA
    """
    Test that Backups have KubernetesStatus stored properly when there is an
    associated PersistentVolumeClaim and Pod.
    """
    host_id = get_self_host_id()
    static_sc_name = "longhorn-static-test"
    setting = client.by_id_setting(SETTING_DEFAULT_LONGHORN_STATIC_SC)
    setting = client.update(setting, value=static_sc_name)
    assert setting["value"] == static_sc_name

    volume_name = "test-backup-kubernetes-status-pod"
    client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2)
    volume = wait_for_volume_detached(client, volume_name)

    pod_name = "pod-" + volume_name
    pv_name = "pv-" + volume_name
    pvc_name = "pvc-" + volume_name
    create_pv_for_volume(client, core_api, volume, pv_name)
    create_pvc_for_volume(client, core_api, volume, pvc_name)
    ret = core_api.list_namespaced_persistent_volume_claim(namespace='default')
    pvc_found = False
    for item in ret.items:
        if item.metadata.name == pvc_name:
            pvc_found = item
            break
    assert pvc_found
    assert pvc_found.spec.storage_class_name == static_sc_name

    pod['metadata']['name'] = pod_name
    pod['spec']['volumes'] = [{
        'name':
        pod['spec']['containers'][0]['volumeMounts'][0]['name'],
        'persistentVolumeClaim': {
            'claimName': pvc_name,
        },
    }]
    create_and_wait_pod(core_api, pod)

    ks = {
        'lastPodRefAt':
        '',
        'lastPVCRefAt':
        '',
        'namespace':
        'default',
        'pvcName':
        pvc_name,
        'pvName':
        pv_name,
        'pvStatus':
        'Bound',
        'workloadsStatus': [{
            'podName': pod_name,
            'podStatus': 'Running',
            'workloadName': '',
            'workloadType': ''
        }]
    }
    wait_volume_kubernetes_status(client, volume_name, ks)
    volume = wait_for_volume_healthy(client, volume_name)

    # Create Backup manually instead of calling create_backup since Kubernetes
    # is not guaranteed to mount our Volume to the test host.
    snap = volume.snapshotCreate()
    volume.snapshotBackup(name=snap["name"])
    bv, b = find_backup(client, volume_name, snap["name"])
    new_b = bv.backupGet(name=b["name"])
    status = loads(new_b["labels"].get(KUBERNETES_STATUS_LABEL))
    assert status == ks

    restore_name = generate_volume_name()
    client.create_volume(name=restore_name,
                         size=SIZE,
                         numberOfReplicas=2,
                         fromBackup=b["url"])
    wait_for_volume_restoration_completed(client, restore_name)
    wait_for_volume_detached(client, restore_name)

    snapshot_created = b["snapshotCreated"]
    ks = {
        'lastPodRefAt':
        b["snapshotCreated"],
        'lastPVCRefAt':
        b["snapshotCreated"],
        'namespace':
        'default',
        'pvcName':
        pvc_name,
        # Restoration should not apply PersistentVolume data.
        'pvName':
        '',
        'pvStatus':
        '',
        'workloadsStatus': [{
            'podName': pod_name,
            'podStatus': 'Running',
            'workloadName': '',
            'workloadType': ''
        }]
    }
    wait_volume_kubernetes_status(client, restore_name, ks)
    restore = client.by_id_volume(restore_name)
    # We need to compare LastPodRefAt and LastPVCRefAt manually since
    # wait_volume_kubernetes_status only checks for empty or non-empty state.
    assert restore["kubernetesStatus"]["lastPodRefAt"] == ks["lastPodRefAt"]
    assert restore["kubernetesStatus"]["lastPVCRefAt"] == ks["lastPVCRefAt"]

    bv.backupDelete(name=b["name"])
    client.delete(restore)
    wait_for_volume_delete(client, restore_name)
    delete_and_wait_pod(core_api, pod_name)
    delete_and_wait_pvc(core_api, pvc_name)
    delete_and_wait_pv(core_api, pv_name)

    # With the Pod, PVC, and PV deleted, the Volume should have both Ref
    # fields set. Check that a new Backup and Restore will use this instead of
    # manually populating the Ref fields.
    ks = {
        'lastPodRefAt':
        'NOT NULL',
        'lastPVCRefAt':
        'NOT NULL',
        'namespace':
        'default',
        'pvcName':
        pvc_name,
        'pvName':
        '',
        'pvStatus':
        '',
        'workloadsStatus': [{
            'podName': pod_name,
            'podStatus': 'Running',
            'workloadName': '',
            'workloadType': ''
        }]
    }
    wait_volume_kubernetes_status(client, volume_name, ks)
    volume = wait_for_volume_detached(client, volume_name)

    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)

    snap = volume.snapshotCreate()
    volume.snapshotBackup(name=snap["name"])
    bv, b = find_backup(client, volume_name, snap["name"])
    new_b = bv.backupGet(name=b["name"])
    status = loads(new_b["labels"].get(KUBERNETES_STATUS_LABEL))
    # Check each field manually, we have no idea what the LastPodRefAt or the
    # LastPVCRefAt will be. We just know it shouldn't be SnapshotCreated.
    assert status["lastPodRefAt"] != snapshot_created
    assert status["lastPVCRefAt"] != snapshot_created
    assert status["namespace"] == "default"
    assert status["pvcName"] == pvc_name
    assert status["pvName"] == ""
    assert status["pvStatus"] == ""
    assert status["workloadsStatus"] == [{
        'podName': pod_name,
        'podStatus': 'Running',
        'workloadName': '',
        'workloadType': ''
    }]

    restore_name = generate_volume_name()
    client.create_volume(name=restore_name,
                         size=SIZE,
                         numberOfReplicas=2,
                         fromBackup=b["url"])
    wait_for_volume_restoration_completed(client, restore_name)
    wait_for_volume_detached(client, restore_name)

    ks = {
        'lastPodRefAt':
        status["lastPodRefAt"],
        'lastPVCRefAt':
        status["lastPVCRefAt"],
        'namespace':
        'default',
        'pvcName':
        pvc_name,
        'pvName':
        '',
        'pvStatus':
        '',
        'workloadsStatus': [{
            'podName': pod_name,
            'podStatus': 'Running',
            'workloadName': '',
            'workloadType': ''
        }]
    }
    wait_volume_kubernetes_status(client, restore_name, ks)
    restore = client.by_id_volume(restore_name)
    assert restore["kubernetesStatus"]["lastPodRefAt"] == ks["lastPodRefAt"]
    assert restore["kubernetesStatus"]["lastPVCRefAt"] == ks["lastPVCRefAt"]

    bv.backupDelete(name=b["name"])
    client.delete(restore)
    cleanup_volume(client, volume)
def test_backup_kubernetes_status(set_random_backupstore, client, core_api,
                                  pod):  # NOQA
    """
    Test that Backups have KubernetesStatus stored properly when there is an
    associated PersistentVolumeClaim and Pod.

    1. Setup a random backupstore
    2. Set settings Longhorn Static StorageClass to `longhorn-static-test`
    3. Create a volume and PV/PVC. Verify the StorageClass of PVC
    4. Create a Pod using the PVC.
    5. Check volume's Kubernetes status to reflect PV/PVC/Pod correctly.
    6. Create a backup for the volume.
    7. Verify the labels of created backup reflect PV/PVC/Pod status.
    8. Restore the backup to a volume. Wait for restoration to complete.
    9. Check the volume's Kubernetes Status
        1. Make sure the `lastPodRefAt` and `lastPVCRefAt` is snapshot created
    time
    10. Delete the backup and restored volume.
    11. Delete PV/PVC/Pod.
    12. Verify volume's Kubernetes Status updated to reflect history data.
    13. Attach the volume and create another backup. Verify the labels
    14. Verify the volume's Kubernetes status.
    15. Restore the previous backup to a new volume. Wait for restoration.
    16. Verify the restored volume's Kubernetes status.
        1. Make sure `lastPodRefAt` and `lastPVCRefAt` matched volume on step
        12
    """

    host_id = get_self_host_id()
    static_sc_name = "longhorn-static-test"
    setting = client.by_id_setting(SETTING_DEFAULT_LONGHORN_STATIC_SC)
    setting = client.update(setting, value=static_sc_name)
    assert setting.value == static_sc_name

    volume_name = "test-backup-kubernetes-status-pod"  # NOQA
    client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2)
    volume = wait_for_volume_detached(client, volume_name)

    pod_name = "pod-" + volume_name
    pv_name = "pv-" + volume_name
    pvc_name = "pvc-" + volume_name
    create_pv_for_volume(client, core_api, volume, pv_name)
    create_pvc_for_volume(client, core_api, volume, pvc_name)
    ret = core_api.list_namespaced_persistent_volume_claim(namespace='default')
    pvc_found = False
    for item in ret.items:
        if item.metadata.name == pvc_name:
            pvc_found = item
            break
    assert pvc_found
    assert pvc_found.spec.storage_class_name == static_sc_name

    pod['metadata']['name'] = pod_name
    pod['spec']['volumes'] = [{
        'name':
        pod['spec']['containers'][0]['volumeMounts'][0]['name'],
        'persistentVolumeClaim': {
            'claimName': pvc_name,
        },
    }]
    create_and_wait_pod(core_api, pod)

    ks = {
        'lastPodRefAt':
        '',
        'lastPVCRefAt':
        '',
        'namespace':
        'default',
        'pvcName':
        pvc_name,
        'pvName':
        pv_name,
        'pvStatus':
        'Bound',
        'workloadsStatus': [{
            'podName': pod_name,
            'podStatus': 'Running',
            'workloadName': '',
            'workloadType': ''
        }]
    }
    wait_volume_kubernetes_status(client, volume_name, ks)
    volume = wait_for_volume_healthy(client, volume_name)

    # Create Backup manually instead of calling create_backup since Kubernetes
    # is not guaranteed to mount our Volume to the test host.
    snap = create_snapshot(client, volume_name)
    volume.snapshotBackup(name=snap.name)
    wait_for_backup_completion(client, volume_name, snap.name)
    _, b = find_backup(client, volume_name, snap.name)
    # Check backup label
    status = loads(b.labels.get(KUBERNETES_STATUS_LABEL))
    assert status == ks
    # Check backup volume label
    for _ in range(RETRY_COUNTS):
        bv = client.by_id_backupVolume(volume_name)
        if bv is not None and bv.labels is not None:
            break
        time.sleep(RETRY_INTERVAL)
    assert bv is not None and bv.labels is not None
    status = loads(bv.labels.get(KUBERNETES_STATUS_LABEL))
    assert status == ks

    restore_name = generate_volume_name()
    client.create_volume(name=restore_name,
                         size=SIZE,
                         numberOfReplicas=2,
                         fromBackup=b.url)
    wait_for_volume_restoration_completed(client, restore_name)
    wait_for_volume_detached(client, restore_name)

    snapshot_created = b.snapshotCreated
    ks = {
        'lastPodRefAt':
        b.snapshotCreated,
        'lastPVCRefAt':
        b.snapshotCreated,
        'namespace':
        'default',
        'pvcName':
        pvc_name,
        # Restoration should not apply PersistentVolume data.
        'pvName':
        '',
        'pvStatus':
        '',
        'workloadsStatus': [{
            'podName': pod_name,
            'podStatus': 'Running',
            'workloadName': '',
            'workloadType': ''
        }]
    }
    wait_volume_kubernetes_status(client, restore_name, ks)
    restore = client.by_id_volume(restore_name)
    # We need to compare LastPodRefAt and LastPVCRefAt manually since
    # wait_volume_kubernetes_status only checks for empty or non-empty state.
    assert restore.kubernetesStatus.lastPodRefAt == ks["lastPodRefAt"]
    assert restore.kubernetesStatus.lastPVCRefAt == ks["lastPVCRefAt"]

    delete_backup(client, bv.name, b.name)
    client.delete(restore)
    wait_for_volume_delete(client, restore_name)
    delete_and_wait_pod(core_api, pod_name)
    delete_and_wait_pvc(core_api, pvc_name)
    delete_and_wait_pv(core_api, pv_name)

    # With the Pod, PVC, and PV deleted, the Volume should have both Ref
    # fields set. Check that a new Backup and Restore will use this instead of
    # manually populating the Ref fields.
    ks = {
        'lastPodRefAt':
        'NOT NULL',
        'lastPVCRefAt':
        'NOT NULL',
        'namespace':
        'default',
        'pvcName':
        pvc_name,
        'pvName':
        '',
        'pvStatus':
        '',
        'workloadsStatus': [{
            'podName': pod_name,
            'podStatus': 'Running',
            'workloadName': '',
            'workloadType': ''
        }]
    }
    wait_volume_kubernetes_status(client, volume_name, ks)
    volume = wait_for_volume_detached(client, volume_name)

    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)

    snap = create_snapshot(client, volume_name)
    volume.snapshotBackup(name=snap.name)
    volume = wait_for_backup_completion(client, volume_name, snap.name)
    bv, b = find_backup(client, volume_name, snap.name)
    new_b = bv.backupGet(name=b.name)
    status = loads(new_b.labels.get(KUBERNETES_STATUS_LABEL))
    # Check each field manually, we have no idea what the LastPodRefAt or the
    # LastPVCRefAt will be. We just know it shouldn't be SnapshotCreated.
    assert status['lastPodRefAt'] != snapshot_created
    assert status['lastPVCRefAt'] != snapshot_created
    assert status['namespace'] == "default"
    assert status['pvcName'] == pvc_name
    assert status['pvName'] == ""
    assert status['pvStatus'] == ""
    assert status['workloadsStatus'] == [{
        'podName': pod_name,
        'podStatus': 'Running',
        'workloadName': '',
        'workloadType': ''
    }]

    restore_name = generate_volume_name()
    client.create_volume(name=restore_name,
                         size=SIZE,
                         numberOfReplicas=2,
                         fromBackup=b.url)
    wait_for_volume_restoration_completed(client, restore_name)
    wait_for_volume_detached(client, restore_name)

    ks = {
        'lastPodRefAt':
        status['lastPodRefAt'],
        'lastPVCRefAt':
        status['lastPVCRefAt'],
        'namespace':
        'default',
        'pvcName':
        pvc_name,
        'pvName':
        '',
        'pvStatus':
        '',
        'workloadsStatus': [{
            'podName': pod_name,
            'podStatus': 'Running',
            'workloadName': '',
            'workloadType': ''
        }]
    }
    wait_volume_kubernetes_status(client, restore_name, ks)
    restore = client.by_id_volume(restore_name)
    assert restore.kubernetesStatus.lastPodRefAt == ks["lastPodRefAt"]
    assert restore.kubernetesStatus.lastPVCRefAt == ks["lastPVCRefAt"]

    # cleanup
    backupstore_cleanup(client)
    client.delete(restore)
    cleanup_volume(client, volume)
Ejemplo n.º 11
0
def test_recurring_job_kubernetes_status(client, core_api,
                                         volume_name):  # NOQA
    """
    Test RecurringJob properly backs up the KubernetesStatus

    1. Setup a random backupstore.
    2. Create a volume.
    3. Create a PV from the volume, and verify the PV status.
    4. Create a backup recurring job to run every 2 minutes.
    5. Verify the recurring job runs correctly.
    6. Verify the backup contains the Kubernetes Status labels
    """
    set_random_backupstore(client)
    host_id = get_self_host_id()
    client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2)
    volume = common.wait_for_volume_detached(client, volume_name)

    pv_name = "pv-" + volume_name
    create_pv_for_volume(client, core_api, volume, pv_name)
    ks = {
        'pvName': pv_name,
        'pvStatus': 'Available',
        'namespace': '',
        'pvcName': '',
        'lastPVCRefAt': '',
        'lastPodRefAt': '',
    }
    wait_volume_kubernetes_status(client, volume_name, ks)

    # Simple Backup Job that runs every 2 minutes, retains 1.
    jobs = [{
        "name": RECURRING_JOB_NAME,
        "cron": "*/2 * * * *",
        "task": "backup",
        "retain": 1
    }]
    volume.recurringUpdate(jobs=jobs)
    volume.attach(hostId=host_id)
    volume = common.wait_for_volume_healthy(client, volume_name)

    # 5 minutes
    time.sleep(300)
    snapshots = volume.snapshotList()
    count = 0
    for snapshot in snapshots:
        if snapshot.removed is False:
            count += 1
    # 1 from Backup, 1 from Volume Head.
    assert count == 2

    # Verify the Labels on the actual Backup.
    bv = client.by_id_backupVolume(volume_name)
    backups = bv.backupList().data
    assert len(backups) == 1

    b = bv.backupGet(name=backups[0].name)
    status = json.loads(b.labels.get(KUBERNETES_STATUS_LABEL))
    assert b.labels.get(RECURRING_JOB_LABEL) == RECURRING_JOB_NAME
    assert status == {
        'lastPodRefAt': '',
        'lastPVCRefAt': '',
        'namespace': '',
        'pvcName': '',
        'pvName': pv_name,
        'pvStatus': 'Available',
        'workloadsStatus': None
    }
    # Two Labels: KubernetesStatus and RecurringJob.
    assert len(b.labels) == 2

    cleanup_volume(client, volume)
    delete_and_wait_pv(core_api, pv_name)
Ejemplo n.º 12
0
def restore_inc_test(client, core_api, volume_name, pod):  # NOQA
    std_volume = create_and_check_volume(client, volume_name, 2, SIZE)
    lht_host_id = get_self_host_id()
    std_volume.attach(hostId=lht_host_id)
    std_volume = common.wait_for_volume_healthy(client, volume_name)

    with pytest.raises(Exception) as e:
        std_volume.activate(frontend="blockdev")
        assert "already in active mode" in str(e.value)

    data0 = {'len': 4 * 1024, 'pos': 0}
    data0['content'] = common.generate_random_data(data0['len'])
    bv, backup0, _, data0 = create_backup(client, volume_name, data0)

    sb_volume0_name = "sb-0-" + volume_name
    sb_volume1_name = "sb-1-" + volume_name
    sb_volume2_name = "sb-2-" + volume_name
    client.create_volume(name=sb_volume0_name,
                         size=SIZE,
                         numberOfReplicas=2,
                         fromBackup=backup0['url'],
                         frontend="",
                         standby=True)
    client.create_volume(name=sb_volume1_name,
                         size=SIZE,
                         numberOfReplicas=2,
                         fromBackup=backup0['url'],
                         frontend="",
                         standby=True)
    client.create_volume(name=sb_volume2_name,
                         size=SIZE,
                         numberOfReplicas=2,
                         fromBackup=backup0['url'],
                         frontend="",
                         standby=True)
    common.wait_for_volume_restoration_completed(client, sb_volume0_name)
    common.wait_for_volume_restoration_completed(client, sb_volume1_name)
    common.wait_for_volume_restoration_completed(client, sb_volume2_name)

    sb_volume0 = common.wait_for_volume_healthy(client, sb_volume0_name)
    sb_volume1 = common.wait_for_volume_healthy(client, sb_volume1_name)
    sb_volume2 = common.wait_for_volume_healthy(client, sb_volume2_name)

    for i in range(RETRY_COUNTS):
        sb_volume0 = client.by_id_volume(sb_volume0_name)
        sb_volume1 = client.by_id_volume(sb_volume1_name)
        sb_volume2 = client.by_id_volume(sb_volume2_name)
        sb_engine0 = get_volume_engine(sb_volume0)
        sb_engine1 = get_volume_engine(sb_volume1)
        sb_engine2 = get_volume_engine(sb_volume2)
        if sb_volume0["lastBackup"] != backup0["name"] or \
                sb_volume1["lastBackup"] != backup0["name"] or \
                sb_volume2["lastBackup"] != backup0["name"] or \
                sb_engine0["lastRestoredBackup"] != backup0["name"] or \
                sb_engine1["lastRestoredBackup"] != backup0["name"] or \
                sb_engine2["lastRestoredBackup"] != backup0["name"]:
            time.sleep(RETRY_INTERVAL)
        else:
            break
    assert sb_volume0["standby"] is True
    assert sb_volume0["lastBackup"] == backup0["name"]
    assert sb_volume0["frontend"] == ""
    assert sb_volume0["disableFrontend"] is True
    assert sb_volume0["initialRestorationRequired"] is False
    sb_engine0 = get_volume_engine(sb_volume0)
    assert sb_engine0["lastRestoredBackup"] == backup0["name"]
    assert sb_engine0["requestedBackupRestore"] == backup0["name"]
    assert sb_volume1["standby"] is True
    assert sb_volume1["lastBackup"] == backup0["name"]
    assert sb_volume1["frontend"] == ""
    assert sb_volume1["disableFrontend"] is True
    assert sb_volume1["initialRestorationRequired"] is False
    sb_engine1 = get_volume_engine(sb_volume1)
    assert sb_engine1["lastRestoredBackup"] == backup0["name"]
    assert sb_engine1["requestedBackupRestore"] == backup0["name"]
    assert sb_volume2["standby"] is True
    assert sb_volume2["lastBackup"] == backup0["name"]
    assert sb_volume2["frontend"] == ""
    assert sb_volume2["disableFrontend"] is True
    assert sb_volume2["initialRestorationRequired"] is False
    sb_engine2 = get_volume_engine(sb_volume2)
    assert sb_engine2["lastRestoredBackup"] == backup0["name"]
    assert sb_engine2["requestedBackupRestore"] == backup0["name"]

    sb0_snaps = sb_volume0.snapshotList()
    assert len(sb0_snaps) == 2
    for s in sb0_snaps:
        if s['name'] != "volume-head":
            sb0_snap = s
    assert sb0_snaps
    with pytest.raises(Exception) as e:
        sb_volume0.snapshotCreate()
        assert "cannot create snapshot for standby volume" in str(e.value)
    with pytest.raises(Exception) as e:
        sb_volume0.snapshotRevert(name=sb0_snap["name"])
        assert "cannot revert snapshot for standby volume" in str(e.value)
    with pytest.raises(Exception) as e:
        sb_volume0.snapshotDelete(name=sb0_snap["name"])
        assert "cannot delete snapshot for standby volume" in str(e.value)
    with pytest.raises(Exception) as e:
        sb_volume0.snapshotBackup(name=sb0_snap["name"])
        assert "cannot create backup for standby volume" in str(e.value)
    with pytest.raises(Exception) as e:
        sb_volume0.pvCreate(pvName=sb_volume0_name)
        assert "cannot create PV for standby volume" in str(e.value)
    with pytest.raises(Exception) as e:
        sb_volume0.pvcCreate(pvcName=sb_volume0_name)
        assert "cannot create PVC for standby volume" in str(e.value)
    setting = client.by_id_setting(common.SETTING_BACKUP_TARGET)
    with pytest.raises(Exception) as e:
        client.update(setting, value="random.backup.target")
        assert "cannot modify BackupTarget " \
               "since there are existing standby volumes" in str(e.value)
    with pytest.raises(Exception) as e:
        sb_volume0.activate(frontend="wrong_frontend")
        assert "invalid frontend" in str(e.value)

    activate_standby_volume(client, sb_volume0_name)
    sb_volume0 = client.by_id_volume(sb_volume0_name)
    sb_volume0.attach(hostId=lht_host_id)
    sb_volume0 = common.wait_for_volume_healthy(client, sb_volume0_name)
    check_volume_data(sb_volume0, data0, False)

    zero_string = b'\x00'.decode('utf-8')
    _, backup1, _, data1 = create_backup(client, volume_name, {
        'len': 2 * 1024,
        'pos': 0,
        'content': zero_string * 2 * 1024
    })
    # use this api to update field `last backup`
    client.list_backupVolume()
    check_volume_last_backup(client, sb_volume1_name, backup1['name'])
    activate_standby_volume(client, sb_volume1_name)
    sb_volume1 = client.by_id_volume(sb_volume1_name)
    sb_volume1.attach(hostId=lht_host_id)
    sb_volume1 = common.wait_for_volume_healthy(client, sb_volume1_name)
    data0_modified = {
        'len': data0['len'] - data1['len'],
        'pos': data1['len'],
        'content': data0['content'][data1['len']:],
    }
    check_volume_data(sb_volume1, data0_modified, False)
    check_volume_data(sb_volume1, data1)

    data2 = {'len': 1 * 1024 * 1024, 'pos': 0}
    data2['content'] = common.generate_random_data(data2['len'])
    _, backup2, _, data2 = create_backup(client, volume_name, data2)
    client.list_backupVolume()
    check_volume_last_backup(client, sb_volume2_name, backup2['name'])
    activate_standby_volume(client, sb_volume2_name)
    sb_volume2 = client.by_id_volume(sb_volume2_name)
    sb_volume2.attach(hostId=lht_host_id)
    sb_volume2 = common.wait_for_volume_healthy(client, sb_volume2_name)
    check_volume_data(sb_volume2, data2)

    # allocated this active volume to a pod
    sb_volume2.detach()
    sb_volume2 = common.wait_for_volume_detached(client, sb_volume2_name)

    create_pv_for_volume(client, core_api, sb_volume2, sb_volume2_name)
    create_pvc_for_volume(client, core_api, sb_volume2, sb_volume2_name)

    sb_volume2_pod_name = "pod-" + sb_volume2_name
    pod['metadata']['name'] = sb_volume2_pod_name
    pod['spec']['volumes'] = [{
        'name':
        pod['spec']['containers'][0]['volumeMounts'][0]['name'],
        'persistentVolumeClaim': {
            'claimName': sb_volume2_name,
        },
    }]
    create_and_wait_pod(core_api, pod)

    sb_volume2 = client.by_id_volume(sb_volume2_name)
    k_status = sb_volume2["kubernetesStatus"]
    workloads = k_status['workloadsStatus']
    assert k_status['pvName'] == sb_volume2_name
    assert k_status['pvStatus'] == 'Bound'
    assert len(workloads) == 1
    for i in range(RETRY_COUNTS):
        if workloads[0]['podStatus'] == 'Running':
            break
        time.sleep(RETRY_INTERVAL)
        sb_volume2 = client.by_id_volume(sb_volume2_name)
        k_status = sb_volume2["kubernetesStatus"]
        workloads = k_status['workloadsStatus']
        assert len(workloads) == 1
    assert workloads[0]['podName'] == sb_volume2_pod_name
    assert workloads[0]['podStatus'] == 'Running'
    assert not workloads[0]['workloadName']
    assert not workloads[0]['workloadType']
    assert k_status['namespace'] == 'default'
    assert k_status['pvcName'] == sb_volume2_name
    assert not k_status['lastPVCRefAt']
    assert not k_status['lastPodRefAt']

    delete_and_wait_pod(core_api, sb_volume2_pod_name)
    delete_and_wait_pvc(core_api, sb_volume2_name)
    delete_and_wait_pv(core_api, sb_volume2_name)

    # cleanup
    std_volume.detach()
    sb_volume0.detach()
    sb_volume1.detach()
    std_volume = common.wait_for_volume_detached(client, volume_name)
    sb_volume0 = common.wait_for_volume_detached(client, sb_volume0_name)
    sb_volume1 = common.wait_for_volume_detached(client, sb_volume1_name)
    sb_volume2 = common.wait_for_volume_detached(client, sb_volume2_name)

    bv.backupDelete(name=backup2["name"])
    bv.backupDelete(name=backup1["name"])
    bv.backupDelete(name=backup0["name"])

    client.delete(std_volume)
    client.delete(sb_volume0)
    client.delete(sb_volume1)
    client.delete(sb_volume2)

    wait_for_volume_delete(client, volume_name)
    wait_for_volume_delete(client, sb_volume0_name)
    wait_for_volume_delete(client, sb_volume1_name)
    wait_for_volume_delete(client, sb_volume2_name)

    volumes = client.list_volume()
    assert len(volumes) == 0
Ejemplo n.º 13
0
def test_engine_live_upgrade_with_intensive_data_writing(
        client, core_api, volume_name, pod_make):  # NOQA
    """
    Test engine live upgrade with intensive data writing

    1. Deploy a compatible new engine image
    2. Create a volume(with the old default engine image) with /PV/PVC/Pod
       and wait for pod to be deployed.
    3. Write data to a tmp file in the pod and get the md5sum
    4. Upgrade the volume to the new engine image without waiting.
    5. Keep copying data from the tmp file to the volume
       during the live upgrade.
    6. Wait until the upgrade completed, verify the volume engine image changed
    7. Wait for new replica mode update then check the engine status.
    8. Verify all engine and replicas' engine image changed
    9. Verify the reference count of the new engine image changed
    10. Check the existing data.
        Then write new data to the upgraded volume and get the md5sum.
    11. Delete the pod and wait for the volume detached.
        Then check engine and replicas's engine image again.
    12. Recreate the pod.
    13. Check if the attached volume is state `healthy`
        rather than `degraded`.
    14. Check the data.
    """
    default_img = common.get_default_engine_image(client)
    default_img_name = default_img.name
    default_img = wait_for_engine_image_ref_count(client, default_img_name, 0)
    cli_v = default_img.cliAPIVersion
    cli_minv = default_img.cliAPIMinVersion
    ctl_v = default_img.controllerAPIVersion
    ctl_minv = default_img.controllerAPIMinVersion
    data_v = default_img.dataFormatVersion
    data_minv = default_img.dataFormatMinVersion
    engine_upgrade_image = common.get_upgrade_test_image(
        cli_v, cli_minv, ctl_v, ctl_minv, data_v, data_minv)

    new_img = client.create_engine_image(image=engine_upgrade_image)
    new_img_name = new_img.name
    ei_status_value = get_engine_image_status_value(client, new_img_name)
    new_img = wait_for_engine_image_state(client, new_img_name,
                                          ei_status_value)
    assert new_img.refCount == 0
    assert new_img.noRefSince != ""

    default_img = common.get_default_engine_image(client)
    default_img_name = default_img.name

    pod_name = volume_name + "-pod"
    pv_name = volume_name + "-pv"
    pvc_name = volume_name + "-pvc"

    pod = pod_make(name=pod_name)
    volume = create_and_check_volume(client,
                                     volume_name,
                                     num_of_replicas=3,
                                     size=str(1 * Gi))
    original_engine_image = volume.engineImage
    assert original_engine_image != engine_upgrade_image

    create_pv_for_volume(client, core_api, volume, pv_name)
    create_pvc_for_volume(client, core_api, volume, pvc_name)
    pod['spec']['volumes'] = [create_pvc_spec(pvc_name)]
    create_and_wait_pod(core_api, pod)

    volume = client.by_id_volume(volume_name)
    assert volume.engineImage == original_engine_image
    assert volume.currentImage == original_engine_image
    engine = get_volume_engine(volume)
    assert engine.engineImage == original_engine_image
    assert engine.currentImage == original_engine_image
    for replica in volume.replicas:
        assert replica.engineImage == original_engine_image
        assert replica.currentImage == original_engine_image

    data_path0 = "/tmp/test"
    data_path1 = "/data/test1"
    write_pod_volume_random_data(core_api, pod_name, data_path0,
                                 RANDOM_DATA_SIZE_LARGE)
    original_md5sum1 = get_pod_data_md5sum(core_api, pod_name, data_path0)

    volume.engineUpgrade(image=engine_upgrade_image)
    # Keep writing data to the volume during the live upgrade
    copy_pod_volume_data(core_api, pod_name, data_path0, data_path1)

    # Wait for live upgrade complete
    wait_for_volume_current_image(client, volume_name, engine_upgrade_image)
    volume = wait_for_volume_replicas_mode(client, volume_name, "RW")
    engine = get_volume_engine(volume)
    assert engine.engineImage == engine_upgrade_image
    check_volume_endpoint(volume)

    wait_for_engine_image_ref_count(client, default_img_name, 0)
    wait_for_engine_image_ref_count(client, new_img_name, 1)

    volume_file_md5sum1 = get_pod_data_md5sum(core_api, pod_name, data_path1)
    assert volume_file_md5sum1 == original_md5sum1

    data_path2 = "/data/test2"
    write_pod_volume_random_data(core_api, pod_name, data_path2,
                                 RANDOM_DATA_SIZE_SMALL)
    original_md5sum2 = get_pod_data_md5sum(core_api, pod_name, data_path2)

    delete_and_wait_pod(core_api, pod_name)
    volume = wait_for_volume_detached(client, volume_name)
    assert len(volume.replicas) == 3
    assert volume.engineImage == engine_upgrade_image
    engine = get_volume_engine(volume)
    assert engine.engineImage == engine_upgrade_image
    for replica in volume.replicas:
        assert replica.engineImage == engine_upgrade_image

    create_and_wait_pod(core_api, pod)
    common.wait_for_volume_healthy(client, volume_name)

    volume_file_md5sum1 = get_pod_data_md5sum(core_api, pod_name, data_path1)
    assert volume_file_md5sum1 == original_md5sum1
    volume_file_md5sum2 = get_pod_data_md5sum(core_api, pod_name, data_path2)
    assert volume_file_md5sum2 == original_md5sum2
Ejemplo n.º 14
0
def test_pvc_creation_with_default_sc_set(client, core_api, storage_class,
                                          pod):  # NOQA
    # set default storage class
    storage_class['metadata']['annotations'] = \
        {"storageclass.kubernetes.io/is-default-class": "true"}
    create_storage_class(storage_class)

    static_sc_name = "longhorn-static-test"
    setting = client.by_id_setting(SETTING_DEFAULT_LONGHORN_STATIC_SC)
    setting = client.update(setting, value=static_sc_name)
    assert setting["value"] == static_sc_name

    volume_name = "test-pvc-creation-with-sc"
    pod_name = "pod-" + volume_name
    client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2)
    volume = wait_for_volume_detached(client, volume_name)

    pv_name = "pv-" + volume_name
    pvc_name = "pvc-" + volume_name
    pvc_name_extra = "pvc-" + volume_name + "-extra"

    create_pv_for_volume(client, core_api, volume, pv_name)
    create_pvc_for_volume(client, core_api, volume, pvc_name)

    ret = core_api.list_namespaced_persistent_volume_claim(namespace='default')
    for item in ret.items:
        if item.metadata.name == pvc_name:
            pvc_found = item
            break
    assert pvc_found
    assert pvc_found.spec.storage_class_name == static_sc_name

    pod['metadata']['name'] = pod_name
    pod['spec']['volumes'] = [{
        'name':
        pod['spec']['containers'][0]['volumeMounts'][0]['name'],
        'persistentVolumeClaim': {
            'claimName': pvc_name,
        },
    }]
    create_and_wait_pod(core_api, pod)

    ks = {
        'pvName':
        pv_name,
        'pvStatus':
        'Bound',
        'namespace':
        'default',
        'pvcName':
        pvc_name,
        'lastPVCRefAt':
        '',
        'lastPodRefAt':
        '',
        'workloadsStatus': [
            {
                'podName': pod_name,
                'podStatus': 'Running',
                'workloadName': '',
                'workloadType': '',
            },
        ],
    }
    wait_volume_kubernetes_status(client, volume_name, ks)

    delete_and_wait_pod(core_api, pod_name)
    delete_and_wait_pvc(core_api, pvc_name)

    # try to reuse the pv
    volume = wait_for_volume_detached(client, volume_name)
    create_pvc_for_volume(client, core_api, volume, pvc_name_extra)
    pod['spec']['volumes'][0]['persistentVolumeClaim']['claimName'] = \
        pvc_name_extra
    create_and_wait_pod(core_api, pod)

    ks['pvcName'] = pvc_name_extra
    wait_volume_kubernetes_status(client, volume_name, ks)

    delete_and_wait_pod(core_api, pod_name)
    delete_and_wait_pvc(core_api, pvc_name_extra)
    delete_and_wait_pv(core_api, pv_name)

    # without default storage class
    delete_storage_class(storage_class['metadata']['name'])

    create_pv_for_volume(client, core_api, volume, pv_name)
    create_pvc_for_volume(client, core_api, volume, pvc_name)

    ret = core_api.list_namespaced_persistent_volume_claim(namespace='default')
    for item in ret.items:
        if item.metadata.name == pvc_name:
            pvc2 = item
            break
    assert pvc2
    assert pvc2.spec.storage_class_name == static_sc_name

    delete_and_wait_pvc(core_api, pvc_name)
    delete_and_wait_pv(core_api, pv_name)
Ejemplo n.º 15
0
def test_recurring_jobs_for_detached_volume(set_random_backupstore, client,
                                            core_api, apps_api, volume_name,
                                            make_deployment_with_pvc):  # NOQA
    """
    Test recurring jobs for detached volume

    Context:
    In the current Longhorn implementation, users cannot do recurring
    backup when volumes are detached.
    This feature gives the users an option to do recurring backup even when
    volumes are detached.
    longhorn/longhorn#1509

    Steps:
    1.  Change the setting allow-recurring-job-while-volume-detached to true.
    2.  Create and attach volume, write 50MB data to the volume.
    3.  Detach the volume.
    4.  Set the recurring backup for the volume on every minute.
    5.  In a 2-minutes retry loop, verify that there is exactly 1 new backup.
    6.  Delete the recurring backup.
    7.  Create a PV and PVC from the volume.
    8.  Create a deployment of 1 pod using the PVC.
    9.  Write 400MB data to the volume from the pod.
    10. Scale down the deployment. Wait until the volume is detached.
    11. Set the recurring backup for every 2 minutes.
    12. Wait util the recurring backup starts, scale up the deployment to 1
        pod.
    13. Verify that during the recurring backup, the volume's frontend is
        disabled, and pod cannot start.
    14. Wait for the recurring backup finishes.
        Delete the recurring backup.
    15. In a 10-minutes retry loop, verify that the pod can eventually start.
    16. Change the setting allow-recurring-job-while-volume-detached to false.
    17. Cleanup.
    """
    recurring_job_setting = \
        client.by_id_setting(SETTING_RECURRING_JOB_WHILE_VOLUME_DETACHED)
    client.update(recurring_job_setting, value="true")

    vol = common.create_and_check_volume(client, volume_name, size=str(1 * Gi))

    lht_hostId = get_self_host_id()
    vol.attach(hostId=lht_hostId)
    vol = wait_for_volume_healthy(client, vol.name)

    data = {
        'pos': 0,
        'content': common.generate_random_data(50 * Mi),
    }
    common.write_volume_data(vol, data)

    # Give sometimes for data to flush to disk
    time.sleep(15)

    vol.detach(hostId="")
    vol = common.wait_for_volume_detached(client, vol.name)

    jobs = [{
        "name": RECURRING_JOB_NAME,
        "cron": "*/1 * * * *",
        "task": "backup",
        "retain": 1
    }]
    vol.recurringUpdate(jobs=jobs)
    common.wait_for_backup_completion(client, vol.name)
    for _ in range(4):
        bv = client.by_id_backupVolume(vol.name)
        backups = bv.backupList().data
        assert len(backups) == 1
        time.sleep(30)

    vol.recurringUpdate(jobs=[])

    pv_name = volume_name + "-pv"
    common.create_pv_for_volume(client, core_api, vol, pv_name)

    pvc_name = volume_name + "-pvc"
    common.create_pvc_for_volume(client, core_api, vol, pvc_name)

    deployment_name = volume_name + "-dep"
    deployment = make_deployment_with_pvc(deployment_name, pvc_name)
    common.create_and_wait_deployment(apps_api, deployment)

    size_mb = 400
    pod_names = common.get_deployment_pod_names(core_api, deployment)
    write_pod_volume_random_data(core_api, pod_names[0], "/data/test", size_mb)

    deployment['spec']['replicas'] = 0
    apps_api.patch_namespaced_deployment(body=deployment,
                                         namespace='default',
                                         name=deployment["metadata"]["name"])

    vol = common.wait_for_volume_detached(client, vol.name)

    jobs = [{
        "name": RECURRING_JOB_NAME,
        "cron": "*/2 * * * *",
        "task": "backup",
        "retain": 1
    }]
    vol.recurringUpdate(jobs=jobs)

    common.wait_for_backup_to_start(client, vol.name)

    deployment['spec']['replicas'] = 1
    apps_api.patch_namespaced_deployment(body=deployment,
                                         namespace='default',
                                         name=deployment["metadata"]["name"])

    deployment_label_name = deployment["metadata"]["labels"]["name"]
    common.wait_pod_auto_attach_after_first_backup_completion(
        client, core_api, vol.name, deployment_label_name)

    vol.recurringUpdate(jobs=[])

    pod_names = common.get_deployment_pod_names(core_api, deployment)
    common.wait_for_pod_phase(core_api, pod_names[0], pod_phase="Running")
def test_pvc_creation_with_default_sc_set(client, core_api, storage_class,
                                          pod):  # NOQA
    """
    Test creating PVC with default StorageClass set

    The target is to make sure the newly create PV/PVC won't use default
    StorageClass, and if there is no default StorageClass, PV/PVC can still be
    created.

    1. Create a StorageClass and set it to be the default StorageClass
    2. Update static StorageClass to `longhorn-static-test`
    3. Create volume then PV/PVC.
    4. Make sure the newly created PV/PVC using StorageClass
    `longhorn-static-test`
    5. Create pod with PVC.
    6. Verify volume's Kubernetes Status
    7. Remove PVC and Pod.
    8. Verify volume's Kubernetes Status only contains current PV and history
    9. Wait for volume to detach (since pod is deleted)
    10. Reuse the volume on a new pod. Wait for the pod to start
    11. Verify volume's Kubernetes Status reflect the new pod.
    12. Delete PV/PVC/Pod.
    13. Verify volume's Kubernetes Status only contains history
    14. Delete the default StorageClass.
    15. Create PV/PVC for the volume.
    16. Make sure the PV's StorageClass is static StorageClass
    """
    # set default storage class
    storage_class['metadata']['annotations'] = \
        {"storageclass.kubernetes.io/is-default-class": "true"}
    create_storage_class(storage_class)

    static_sc_name = "longhorn-static-test"
    setting = client.by_id_setting(SETTING_DEFAULT_LONGHORN_STATIC_SC)
    setting = client.update(setting, value=static_sc_name)
    assert setting.value == static_sc_name

    volume_name = "test-pvc-creation-with-sc"  # NOQA
    pod_name = "pod-" + volume_name
    client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2)
    volume = wait_for_volume_detached(client, volume_name)

    pv_name = "pv-" + volume_name
    pvc_name = "pvc-" + volume_name
    pvc_name_extra = "pvc-" + volume_name + "-extra"

    create_pv_for_volume(client, core_api, volume, pv_name)
    create_pvc_for_volume(client, core_api, volume, pvc_name)

    ret = core_api.list_namespaced_persistent_volume_claim(namespace='default')
    for item in ret.items:
        if item.metadata.name == pvc_name:
            pvc_found = item
            break
    assert pvc_found
    assert pvc_found.spec.storage_class_name == static_sc_name

    pod['metadata']['name'] = pod_name
    pod['spec']['volumes'] = [{
        'name':
        pod['spec']['containers'][0]['volumeMounts'][0]['name'],
        'persistentVolumeClaim': {
            'claimName': pvc_name,
        },
    }]
    create_and_wait_pod(core_api, pod)

    ks = {
        'pvName':
        pv_name,
        'pvStatus':
        'Bound',
        'namespace':
        'default',
        'pvcName':
        pvc_name,
        'lastPVCRefAt':
        '',
        'lastPodRefAt':
        '',
        'workloadsStatus': [
            {
                'podName': pod_name,
                'podStatus': 'Running',
                'workloadName': '',
                'workloadType': '',
            },
        ],
    }
    wait_volume_kubernetes_status(client, volume_name, ks)

    delete_and_wait_pod(core_api, pod_name)
    delete_and_wait_pvc(core_api, pvc_name)

    ks = {
        'pvName': pv_name,
        'pvStatus': 'Released',
        'namespace': 'default',
        'pvcName': pvc_name,
        'lastPVCRefAt': 'not empty',
        'lastPodRefAt': 'not empty',
    }
    wait_volume_kubernetes_status(client, volume_name, ks)

    # try to reuse the pv
    volume = wait_for_volume_detached(client, volume_name)
    create_pvc_for_volume(client, core_api, volume, pvc_name_extra)
    pod['spec']['volumes'][0]['persistentVolumeClaim']['claimName'] = \
        pvc_name_extra
    create_and_wait_pod(core_api, pod)

    ks = {
        'pvName':
        pv_name,
        'pvStatus':
        'Bound',
        'namespace':
        'default',
        'pvcName':
        pvc_name_extra,
        'lastPVCRefAt':
        '',
        'lastPodRefAt':
        '',
        'workloadsStatus': [
            {
                'podName': pod_name,
                'podStatus': 'Running',
                'workloadName': '',
                'workloadType': '',
            },
        ],
    }
    wait_volume_kubernetes_status(client, volume_name, ks)

    delete_and_wait_pod(core_api, pod_name)
    delete_and_wait_pvc(core_api, pvc_name_extra)
    delete_and_wait_pv(core_api, pv_name)

    ks = {
        'pvName': '',
        'pvStatus': '',
        'namespace': 'default',
        'pvcName': pvc_name_extra,
        'lastPVCRefAt': 'not empty',
        'lastPodRefAt': 'not empty',
    }
    wait_volume_kubernetes_status(client, volume_name, ks)

    # without default storage class
    delete_storage_class(storage_class['metadata']['name'])

    create_pv_for_volume(client, core_api, volume, pv_name)
    create_pvc_for_volume(client, core_api, volume, pvc_name)

    ret = core_api.list_namespaced_persistent_volume_claim(namespace='default')
    for item in ret.items:
        if item.metadata.name == pvc_name:
            pvc2 = item
            break
    assert pvc2
    assert pvc2.spec.storage_class_name == static_sc_name

    delete_and_wait_pvc(core_api, pvc_name)
    delete_and_wait_pv(core_api, pv_name)
Ejemplo n.º 17
0
def generate_load(request):

    index = get_random_suffix()

    longhorn_api_client = get_longhorn_api_client()
    k8s_api_client = get_core_api_client()

    check_and_set_backupstore(longhorn_api_client)

    volume_name = STRESS_VOLUME_NAME_PREFIX + index
    pv_name = STRESS_PV_NAME_PREFIX + index
    pvc_name = STRESS_PVC_NAME_PREFIX + index
    pod_name = STRESS_POD_NAME_PREFIX + index

    atexit.register(remove_datafile, pod_name)
    atexit.register(delete_and_wait_longhorn, longhorn_api_client, volume_name)
    atexit.register(delete_and_wait_pv, k8s_api_client, pv_name)
    atexit.register(delete_and_wait_pvc, k8s_api_client, pvc_name)
    atexit.register(delete_and_wait_pod, k8s_api_client, pod_name)

    longhorn_volume = create_and_check_volume(longhorn_api_client,
                                              volume_name,
                                              size=VOLUME_SIZE)

    wait_for_volume_detached(longhorn_api_client, volume_name)

    pod_manifest = generate_pod_with_pvc_manifest(pod_name, pvc_name)

    create_pv_for_volume(longhorn_api_client, k8s_api_client, longhorn_volume,
                         pv_name)

    create_pvc_for_volume(longhorn_api_client, k8s_api_client, longhorn_volume,
                          pvc_name)

    create_and_wait_pod(k8s_api_client, pod_manifest)

    snapshots_md5sum = dict()

    write_data(k8s_api_client, pod_name)
    create_recurring_jobs(longhorn_api_client, volume_name)

    global N_RANDOM_ACTIONS
    for round in range(N_RANDOM_ACTIONS):
        action = randrange(0, 8)

        if action == 0:
            print("write data started: " + time_now(), end=', ')
            write_data(k8s_api_client, pod_name)
            print("ended: " + time_now())

        elif action == 1:
            print("delete data started: " + time_now(), end=', ')
            delete_data(k8s_api_client, pod_name)
            print("ended: " + time_now())

        elif action == 2:
            print("create snapshot started: " + time_now(), end=', ')
            snapshot_create_and_record_md5sum(longhorn_api_client,
                                              k8s_api_client, volume_name,
                                              pod_name, snapshots_md5sum)
            print("ended: " + time_now())

        elif action == 3:
            print("delete random snapshot  started: " + time_now(), end=', ')
            delete_random_snapshot(longhorn_api_client, volume_name,
                                   snapshots_md5sum)
            print("ended: " + time_now())

        elif action == 4:
            print("revert random snapshot started: " + time_now(), end=', ')
            revert_random_snapshot(longhorn_api_client, k8s_api_client,
                                   volume_name, pod_manifest, snapshots_md5sum)
            print("ended: " + time_now())

        elif action == 5:
            print("create backup started: " + time_now(), end=', ')
            backup_create_and_record_md5sum(longhorn_api_client,
                                            k8s_api_client, volume_name,
                                            pod_name, snapshots_md5sum)
            print("ended: " + time_now())

        elif action == 6:
            print("delete replica started: " + time_now(), end=', ')
            delete_replica(longhorn_api_client, volume_name)
            print("ended: " + time_now())

        elif action == 7:
            print("restore random backup started: " + time_now(), end=', ')
            restore_and_check_random_backup(longhorn_api_client,
                                            k8s_api_client, volume_name,
                                            pod_name, snapshots_md5sum)

            print("ended: " + time_now())

    clean_volume_backups(longhorn_api_client, volume_name)
Ejemplo n.º 18
0
def test_restore_rwo_volume_to_rwx(set_random_backupstore, client, core_api,
                                   volume_name, pvc, csi_pv, pod_make,
                                   make_deployment_with_pvc):  # NOQA
    """
    Test restoring a rwo to a rwx volume.

    1. Create a volume with 'accessMode' rwo.
    2. Create a PV and a PVC with access mode 'readwriteonce' and attach to the
       volume.
    3. Create a pod and attach to the PVC.
    4. Write some data into the pod and compute md5sum.
    5. Take a backup of the volume.
    6. Restore the backup with 'accessMode' rwx.
    7. Create PV and PVC and attach to 2 pods.
    8. Verify the data.
    """

    data_path = "/data/test"
    pod_name, pv_name, pvc_name, md5sum = \
        prepare_pod_with_data_in_mb(client, core_api, csi_pv, pvc,
                                    pod_make,
                                    volume_name,
                                    data_size_in_mb=DATA_SIZE_IN_MB_1,
                                    data_path=data_path)

    snap = create_snapshot(client, volume_name)
    volume = client.by_id_volume(volume_name)
    volume.snapshotBackup(name=snap.name)
    wait_for_backup_completion(client, volume_name, snap.name)
    bv, b1 = find_backup(client, volume_name, snap.name)

    restore_volume_name = 'restored-rwx-volume'
    restore_pv_name = restore_volume_name + "-pv"
    restore_pvc_name = restore_volume_name + "-pvc"

    client.create_volume(name=restore_volume_name,
                         size=str(1 * Gi),
                         numberOfReplicas=3,
                         fromBackup=b1.url,
                         accessMode='rwx')
    wait_for_volume_creation(client, restore_volume_name)
    restore_volume = wait_for_volume_detached(client, restore_volume_name)
    create_pv_for_volume(client, core_api, restore_volume, restore_pv_name)
    create_pvc_for_volume(client, core_api, restore_volume, restore_pvc_name)
    deployment = make_deployment_with_pvc('deployment-multi-pods-test',
                                          restore_pvc_name,
                                          replicas=2)
    apps_api = get_apps_api_client()
    create_and_wait_deployment(apps_api, deployment)

    deployment_label_selector = \
        "name=" + deployment["metadata"]["labels"]["name"]

    deployment_pod_list = \
        core_api.list_namespaced_pod(namespace="default",
                                     label_selector=deployment_label_selector)

    pod_name_1 = deployment_pod_list.items[0].metadata.name
    pod_name_2 = deployment_pod_list.items[1].metadata.name

    md5sum_pod1 = get_pod_data_md5sum(core_api, pod_name_1, data_path)
    md5sum_pod2 = get_pod_data_md5sum(core_api, pod_name_2, data_path)

    assert md5sum == md5sum_pod1 == md5sum_pod2
Ejemplo n.º 19
0
def test_data_locality_basic(client, core_api, volume_name, pod,
                             settings_reset):  # NOQA
    """
    Test data locality basic feature

    Context:

    Data Locality feature allows users to have an option to keep a local
    replica on the same node as the consuming pod.
    Longhorn is currently supporting 2 modes:
    - disabled: Longhorn does not try to keep a local replica
    - best-effort: Longhorn try to keep a local replica

    See manual tests at:
    https://github.com/longhorn/longhorn/issues/1045#issuecomment-680706283

    Steps:

    Case 1: Test that Longhorn builds a local replica on the engine node

    1. Create a volume(1) with 1 replica and dataLocality set to disabled
    2. Find node where the replica is located on.
       Let's call the node is replica-node
    3. Attach the volume to a node different than replica-node.
       Let call the node is engine-node
    4. Write 200MB data to volume(1)
    5. Use a retry loop to verify that Longhorn does not create
       a replica on the engine-node
    6. Update dataLocality to best-effort for volume(1)
    7. Use a retry loop to verify that Longhorn creates and rebuilds
       a replica on the engine-node and remove the other replica
    8. detach the volume(1) and attach it to a different node.
       Let's call the new node is new-engine-node and the old
       node is old-engine-node
    9. Wait for volume(1) to finish attaching
    10. Use a retry loop to verify that Longhorn creates and rebuilds
       a replica on the new-engine-node and remove the replica on
       old-engine-node

    Case 2: Test that Longhorn prioritizes deleting replicas on the same node

    1. Add the tag AVAIL to node-1 and node-2
    2. Set node soft anti-affinity to `true`.
    3. Create a volume(2) with 3 replicas and dataLocality set to best-effort
    4. Use a retry loop to verify that all 3 replicas are on node-1 and
        node-2, no replica is on node-3
    5. Attach volume(2) to node-3
    6. User a retry loop to verify that there is no replica on node-3 and
        we can still read/write to volume(2)
    7. Find the node which contains 2 replicas.
        Let call the node is most-replica-node
    8. Set the replica count to 2 for volume(2)
    9. Verify that Longhorn remove one replica from most-replica-node

    Case 3: Test that the volume is not corrupted if there is an unexpected
    detachment during building local replica

    1. Remove the tag AVAIL from node-1 and node-2
       Set node soft anti-affinity to `false`.
    2. Create a volume(3) with 1 replicas and dataLocality set to best-effort
    3. Attach volume(3) to node-3.
    4. Use a retry loop to verify that volume(3) has only 1 replica on node-3
    5. Write 800MB data to volume(3)
    6. Detach volume(3)
    7. Attach volume(3) to node-1
    8. Use a retry loop to:
        Wait until volume(3) finishes attaching.
        Wait until Longhorn start rebuilding a replica on node-1
        Immediately detach volume(3)
    9. Verify that the replica on node-1 is in ERR state.
    10. Attach volume(3) to node-1
    11. Wait until volume(3) finishes attaching.
    12. Use a retry loop to verify the Longhorn cleanup the ERR replica,
        rebuild a new replica on node-1, and remove the replica on node-3

    Case 4: Make sure failed to schedule local replica doesn't block the
    the creation of other replicas.

    1. Disable scheduling for node-3
    2. Create a vol with 1 replica, `dataLocality = best-effort`.
        The replica is scheduled on a node (say node-1)
    3. Attach vol to node-3. There is a fail-to-schedule
        replica with Spec.HardNodeAffinity=node-3
    4. Increase numberOfReplica to 3. Verify that the replica set contains:
        one on node-1, one on node-2,  one failed replica
        with Spec.HardNodeAffinity=node-3.
    5. Decrease numberOfReplica to 2. Verify that the replica set contains:
        one on node-1, one on node-2,  one failed replica
        with Spec.HardNodeAffinity=node-3.
    6. Decrease numberOfReplica to 1. Verify that the replica set contains:
        one on node-1 or node-2,  one failed replica
        with Spec.HardNodeAffinity=node-3.
    7. Decrease numberOfReplica to 2. Verify that the replica set contains:
        one on node-1, one on node-2, one failed replica
        with Spec.HardNodeAffinity=node-3.
    8. Turn off data locality by set `dataLocality=disabled` for the vol.
        Verify that the replica set contains: one on node-1, one on node-2

    9. clean up
    """

    # Case 1: Test that Longhorn builds a local replica on the engine node

    nodes = client.list_node()

    default_data_locality_setting = \
        client.by_id_setting(SETTING_DEFAULT_DATA_LOCALITY)
    try:
        client.update(default_data_locality_setting, value="disabled")
    except Exception as e:
        print("Exception when update Default Data Locality setting",
              default_data_locality_setting, e)

    volume1_name = volume_name + "-1"
    volume1_size = str(500 * Mi)
    volume1_data_path = "/data/test"
    pv1_name = volume1_name + "-pv"
    pvc1_name = volume1_name + "-pvc"
    pod1_name = volume1_name + "-pod"
    pod1 = pod

    pod1['metadata']['name'] = pod1_name

    volume1 = create_and_check_volume(client,
                                      volume1_name,
                                      num_of_replicas=1,
                                      size=volume1_size)

    volume1 = client.by_id_volume(volume1_name)
    create_pv_for_volume(client, core_api, volume1, pv1_name)
    create_pvc_for_volume(client, core_api, volume1, pvc1_name)

    volume1 = client.by_id_volume(volume1_name)
    volume1_replica_node = volume1.replicas[0]['hostId']

    volume1_attached_node = None
    for node in nodes:
        if node.name != volume1_replica_node:
            volume1_attached_node = node.name
            break

    assert volume1_attached_node is not None

    pod1['spec']['volumes'] = [{
        "name": "pod-data",
        "persistentVolumeClaim": {
            "claimName": pvc1_name
        }
    }]

    pod1['spec']['nodeSelector'] = \
        {"kubernetes.io/hostname": volume1_attached_node}
    create_and_wait_pod(core_api, pod1)

    write_pod_volume_random_data(core_api, pod1_name, volume1_data_path,
                                 DATA_SIZE_IN_MB_2)

    for i in range(10):
        volume1 = client.by_id_volume(volume1_name)
        assert len(volume1.replicas) == 1
        assert volume1.replicas[0]['hostId'] != volume1_attached_node
        time.sleep(1)

    volume1 = client.by_id_volume(volume1_name)
    volume1.updateDataLocality(dataLocality="best-effort")

    for _ in range(RETRY_COUNTS):
        volume1 = client.by_id_volume(volume1_name)
        assert volume1[VOLUME_FIELD_ROBUSTNESS] == VOLUME_ROBUSTNESS_HEALTHY
        if len(volume1.replicas) == 1 and \
                volume1.replicas[0]['hostId'] == volume1_attached_node:
            break
        time.sleep(RETRY_INTERVAL)
    assert len(volume1.replicas) == 1
    assert volume1.replicas[0]['hostId'] == volume1_attached_node

    delete_and_wait_pod(core_api, pod1_name)
    volume1 = wait_for_volume_detached(client, volume1_name)

    volume1_replica_node = volume1.replicas[0]['hostId']

    volume1_attached_node = None
    for node in nodes:
        if node.name != volume1_replica_node:
            volume1_attached_node = node.name
            break

    assert volume1_attached_node is not None

    pod1['spec']['nodeSelector'] = \
        {"kubernetes.io/hostname": volume1_attached_node}
    create_and_wait_pod(core_api, pod1)
    for _ in range(RETRY_COUNTS):
        volume1 = client.by_id_volume(volume1_name)
        assert volume1[VOLUME_FIELD_ROBUSTNESS] == VOLUME_ROBUSTNESS_HEALTHY
        if len(volume1.replicas) == 1 and \
                volume1.replicas[0]['hostId'] == volume1_attached_node:
            break
        time.sleep(RETRY_INTERVAL)
    assert len(volume1.replicas) == 1
    assert volume1.replicas[0]['hostId'] == volume1_attached_node
    delete_and_wait_pod(core_api, pod1_name)
    wait_for_volume_detached(client, volume1_name)

    # Case 2: Test that Longhorn prioritizes deleting replicas on the same node

    node1 = nodes[0]
    node2 = nodes[1]
    node3 = nodes[2]

    client.update(node1, allowScheduling=True, tags=["AVAIL"])
    client.update(node2, allowScheduling=True, tags=["AVAIL"])

    replica_node_soft_anti_affinity_setting = \
        client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    try:
        client.update(replica_node_soft_anti_affinity_setting, value="true")
    except Exception as e:
        print(
            "Exception when update "
            "Replica Node Level Soft Anti-Affinity setting",
            replica_node_soft_anti_affinity_setting, e)

    volume2_name = volume_name + "-2"
    volume2_size = str(500 * Mi)
    pv2_name = volume2_name + "-pv"
    pvc2_name = volume2_name + "-pvc"
    pod2_name = volume2_name + "-pod"
    pod2 = pod

    pod2['metadata']['name'] = pod2_name

    volume2 = client.create_volume(name=volume2_name,
                                   size=volume2_size,
                                   numberOfReplicas=3,
                                   nodeSelector=["AVAIL"],
                                   dataLocality="best-effort")

    volume2 = wait_for_volume_detached(client, volume2_name)
    volume2 = client.by_id_volume(volume2_name)
    create_pv_for_volume(client, core_api, volume2, pv2_name)
    create_pvc_for_volume(client, core_api, volume2, pvc2_name)

    volume2 = client.by_id_volume(volume2_name)

    pod2['spec']['volumes'] = [{
        "name": "pod-data",
        "persistentVolumeClaim": {
            "claimName": pvc2_name
        }
    }]

    pod2['spec']['nodeSelector'] = {"kubernetes.io/hostname": node3.name}
    create_and_wait_pod(core_api, pod2)

    volume2 = wait_for_volume_healthy(client, volume2_name)

    for replica in volume2.replicas:
        assert replica["hostId"] != node3.name

    volume2.updateReplicaCount(replicaCount=2)

    # 2 Healthy replicas and 1 replica failed to schedule
    # The failed to schedule replica is the local replica on node3
    volume2 = wait_for_volume_replica_count(client, volume2_name, 3)
    volume2 = client.by_id_volume(volume2_name)

    volume2_healthy_replicas = []
    for replica in volume2.replicas:
        if replica.running is True:
            volume2_healthy_replicas.append(replica)

    assert len(volume2_healthy_replicas) == 2

    volume2_rep1 = volume2_healthy_replicas[0]
    volume2_rep2 = volume2_healthy_replicas[1]
    assert volume2_rep1["hostId"] != volume2_rep2["hostId"]
    delete_and_wait_pod(core_api, pod2_name)
    wait_for_volume_detached(client, volume2_name)

    # Case 3: Test that the volume is not corrupted if there is an unexpected
    # detachment during building local replica

    client.update(node1, allowScheduling=True, tags=[])
    client.update(node2, allowScheduling=True, tags=[])

    replica_node_soft_anti_affinity_setting = \
        client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    try:
        client.update(replica_node_soft_anti_affinity_setting, value="false")
    except Exception as e:
        print(
            "Exception when update "
            "Replica Node Level Soft Anti-Affinity setting",
            replica_node_soft_anti_affinity_setting, e)

    volume3_name = volume_name + "-3"
    volume3_size = str(1 * Gi)
    volume3_data_path = "/data/test"
    pv3_name = volume3_name + "-pv"
    pvc3_name = volume3_name + "-pvc"
    pod3_name = volume3_name + "-pod"
    pod3 = pod

    pod3['metadata']['name'] = pod3_name

    volume3 = client.create_volume(name=volume3_name,
                                   size=volume3_size,
                                   numberOfReplicas=1)

    volume3 = wait_for_volume_detached(client, volume3_name)
    volume3 = client.by_id_volume(volume3_name)
    create_pv_for_volume(client, core_api, volume3, pv3_name)
    create_pvc_for_volume(client, core_api, volume3, pvc3_name)

    volume3 = client.by_id_volume(volume3_name)

    pod3['spec']['volumes'] = [{
        "name": "pod-data",
        "persistentVolumeClaim": {
            "claimName": pvc3_name
        }
    }]

    pod3['spec']['nodeSelector'] = {"kubernetes.io/hostname": node3.name}
    create_and_wait_pod(core_api, pod3)
    volume3 = wait_for_volume_healthy(client, volume3_name)

    write_pod_volume_random_data(core_api, pod3_name, volume3_data_path,
                                 DATA_SIZE_IN_MB_4)

    volume3.updateDataLocality(dataLocality="best-effort")
    volume3 = client.by_id_volume(volume3_name)

    if volume3.replicas[0]['hostId'] != node3.name:
        wait_for_rebuild_start(client, volume3_name)
        volume3 = client.by_id_volume(volume3_name)
        assert len(volume3.replicas) == 2
        wait_for_rebuild_complete(client, volume3_name)

    volume3 = wait_for_volume_replica_count(client, volume3_name, 1)
    assert volume3.replicas[0]["hostId"] == node3.name

    delete_and_wait_pod(core_api, pod3_name)

    pod3['spec']['nodeSelector'] = {"kubernetes.io/hostname": node1.name}
    create_and_wait_pod(core_api, pod3)

    wait_for_rebuild_start(client, volume3_name)
    crash_engine_process_with_sigkill(client, core_api, volume3_name)
    delete_and_wait_pod(core_api, pod3_name)
    wait_for_volume_detached(client, volume3_name)
    volume3 = client.by_id_volume(volume3_name)
    assert len(volume3.replicas) == 1
    assert volume3.replicas[0]["hostId"] == node3.name

    create_and_wait_pod(core_api, pod3)
    wait_for_rebuild_start(client, volume3_name)
    volume3 = client.by_id_volume(volume3_name)
    assert len(volume3.replicas) == 2
    wait_for_rebuild_complete(client, volume3_name)

    # Wait for deletion of extra replica
    volume3 = wait_for_volume_replica_count(client, volume3_name, 1)
    assert volume3.replicas[0]["hostId"] == node1.name
    assert volume3.replicas[0]["mode"] == "RW"
    assert volume3.replicas[0]["running"] is True

    delete_and_wait_pod(core_api, pod3_name)
    wait_for_volume_detached(client, volume3_name)

    # Case 4: Make sure failed to schedule local replica doesn't block the
    # the creation of other replicas.

    replica_node_soft_anti_affinity_setting = \
        client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    try:
        client.update(replica_node_soft_anti_affinity_setting, value="false")
    except Exception as e:
        print(
            "Exception when update "
            "Replica Node Level Soft Anti-Affinity setting",
            replica_node_soft_anti_affinity_setting, e)

    client.update(node3, allowScheduling=False)

    volume4_name = volume_name + "-4"
    volume4_size = str(1 * Gi)

    volume4 = client.create_volume(name=volume4_name,
                                   size=volume4_size,
                                   numberOfReplicas=1,
                                   dataLocality="best-effort")

    volume4 = wait_for_volume_detached(client, volume4_name)
    volume4 = client.by_id_volume(volume4_name)

    volume4_replica_name = volume4.replicas[0]["name"]

    volume4.attach(hostId=node3.name)

    wait_for_volume_healthy(client, volume4_name)

    volume4 = client.by_id_volume(volume4_name)
    assert len(volume4.replicas) == 2

    for replica in volume4.replicas:
        if replica["name"] == volume4_replica_name:
            assert replica["running"] is True
            assert replica["mode"] == "RW"
        else:
            assert replica["running"] is False
            assert replica["mode"] == ""

    assert volume4.conditions.scheduled.reason == \
        "LocalReplicaSchedulingFailure"

    volume4 = volume4.updateReplicaCount(replicaCount=3)

    volume4 = wait_for_volume_degraded(client, volume4_name)

    v4_node1_replica_count = 0
    v4_node2_replica_count = 0
    v4_failed_replica_count = 0

    for replica in volume4.replicas:
        if replica["hostId"] == node1.name:
            v4_node1_replica_count += 1
        elif replica["hostId"] == node2.name:
            v4_node2_replica_count += 1
        elif replica["hostId"] == "":
            v4_failed_replica_count += 1

    assert v4_node1_replica_count == 1
    assert v4_node2_replica_count == 1
    assert v4_failed_replica_count > 0

    volume4 = volume4.updateReplicaCount(replicaCount=2)

    volume4 = wait_for_volume_replica_count(client, volume4_name, 3)

    v4_node1_replica_count = 0
    v4_node2_replica_count = 0
    v4_failed_replica_count = 0

    for replica in volume4.replicas:
        if replica["hostId"] == node1.name:
            v4_node1_replica_count += 1
        elif replica["hostId"] == node2.name:
            v4_node2_replica_count += 1
        elif replica["hostId"] == "":
            v4_failed_replica_count += 1

    assert v4_node1_replica_count == 1
    assert v4_node2_replica_count == 1
    assert v4_failed_replica_count > 0

    volume4 = volume4.updateReplicaCount(replicaCount=1)

    volume4 = wait_for_volume_replica_count(client, volume4_name, 2)

    v4_node1_replica_count = 0
    v4_node2_replica_count = 0
    v4_failed_replica_count = 0

    for replica in volume4.replicas:
        if replica["hostId"] == node1.name:
            v4_node1_replica_count += 1
        elif replica["hostId"] == node2.name:
            v4_node2_replica_count += 1
        elif replica["hostId"] == "":
            v4_failed_replica_count += 1

    assert v4_node1_replica_count + v4_node2_replica_count == 1
    assert v4_failed_replica_count == 1

    volume4 = volume4.updateDataLocality(dataLocality="disabled")
    volume4 = volume4.updateReplicaCount(replicaCount=2)

    running_replica_count = 0
    for _ in range(RETRY_COUNTS):
        volume4 = client.by_id_volume(volume4_name)
        running_replica_count = 0
        for r in volume4.replicas:
            if r.failedAt == "" and r.running is True:
                running_replica_count += 1
        if running_replica_count == 2:
            break
        time.sleep(RETRY_INTERVAL)
    assert running_replica_count == 2

    v4_node1_replica_count = 0
    v4_node2_replica_count = 0
    v4_node3_replica_count = 0

    for replica in volume4.replicas:
        wait_for_replica_running(client, volume4_name, replica["name"])
        if replica["hostId"] == node1.name:
            v4_node1_replica_count += 1
        elif replica["hostId"] == node2.name:
            v4_node2_replica_count += 1
        elif replica["hostId"] == node3.name:
            v4_node3_replica_count += 1
    assert v4_node1_replica_count == 1
    assert v4_node2_replica_count == 1
    assert v4_node3_replica_count == 0