예제 #1
0
def test_replica_scheduler_update_over_provisioning(client):  # NOQA
    nodes = client.list_node()
    lht_hostId = get_self_host_id()
    expect_node_disk = {}
    for node in nodes:
        disks = node["disks"]
        for fsid, disk in disks.iteritems():
            if disk["path"] == DEFAULT_DISK_PATH:
                expect_disk = disk
                expect_disk["fsid"] = fsid
                expect_node_disk[node["name"]] = expect_disk

    over_provisioning_setting = client.by_id_setting(
        SETTING_STORAGE_OVER_PROVISIONING_PERCENTAGE)
    old_provisioning_setting = over_provisioning_setting["value"]

    # set storage over provisioning percentage to 0
    # to test all replica couldn't be scheduled
    over_provisioning_setting = client.update(over_provisioning_setting,
                                              value="0")
    vol_name = common.generate_volume_name()
    volume = client.create_volume(name=vol_name,
                                  size=SIZE, numberOfReplicas=len(nodes))
    volume = common.wait_for_volume_condition_scheduled(client, vol_name,
                                                        "status",
                                                        CONDITION_STATUS_FALSE)

    # set storage over provisioning percentage to 100
    over_provisioning_setting = client.update(over_provisioning_setting,
                                              value="100")

    # check volume status
    volume = common.wait_for_volume_condition_scheduled(client, vol_name,
                                                        "status",
                                                        CONDITION_STATUS_TRUE)
    volume = common.wait_for_volume_detached(client, vol_name)
    assert volume["state"] == "detached"
    assert volume["created"] != ""

    volume.attach(hostId=lht_hostId)
    volume = common.wait_for_volume_healthy(client, vol_name)

    node_hosts = []
    for node in nodes:
        node_hosts.append(node["name"])
    # check all replica should be scheduled to default disk
    for replica in volume["replicas"]:
        id = replica["hostId"]
        assert id != ""
        assert replica["running"]
        expect_disk = expect_node_disk[id]
        assert replica["diskID"] == expect_disk["fsid"]
        assert expect_disk["path"] in replica["dataPath"]
        node_hosts = filter(lambda x: x != id, node_hosts)
    assert len(node_hosts) == 0

    # clean volume and disk
    cleanup_volume(client, vol_name)
    client.update(over_provisioning_setting,
                  value=old_provisioning_setting)
예제 #2
0
def test_soft_anti_affinity_scheduling(client, volume_name):  # NOQA
    """
    Test that volumes with Soft Anti-Affinity work as expected.

    With Soft Anti-Affinity, a new replica should still be scheduled on a node
    with an existing replica, which will result in "Healthy" state but limited
    redundancy.
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY)
    client.update(setting, value="true")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    replica_names = map(lambda replica: replica.name, volume["replicas"])
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica["name"])
    wait_new_replica_ready(client, volume_name, replica_names)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
예제 #3
0
def test_soft_anti_affinity_detach(client, volume_name):  # NOQA
    """
    Test that volumes with Soft Anti-Affinity can detach and reattach to a
    node properly.
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY)
    client.update(setting, value="true")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    replica_names = map(lambda replica: replica.name, volume["replicas"])
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica["name"])
    wait_new_replica_ready(client, volume_name, replica_names)
    volume = wait_for_volume_healthy(client, volume_name)
    volume.detach()
    volume = wait_for_volume_detached(client, volume_name)
    assert len(volume["replicas"]) == 3

    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
예제 #4
0
def test_replica_scheduler_exceed_over_provisioning(client):  # NOQA
    over_provisioning_setting = client.by_id_setting(
        SETTING_STORAGE_OVER_PROVISIONING_PERCENTAGE)
    old_provisioning_setting = over_provisioning_setting["value"]
    # set storage over provisioning percentage to 100
    over_provisioning_setting = client.update(over_provisioning_setting,
                                              value="100")

    # test exceed over provisioning limit couldn't be scheduled
    nodes = client.list_node()
    for node in nodes:
        disks = node["disks"]
        for fsid, disk in disks.iteritems():
            disk["storageReserved"] = \
                disk["storageMaximum"] - 1*Gi
        update_disks = get_update_disks(disks)
        node = node.diskUpdate(disks=update_disks)
        disks = node["disks"]
        for fsid, disk in disks.iteritems():
            wait_for_disk_status(client, node["name"],
                                 fsid, "storageReserved",
                                 disk["storageMaximum"] - 1*Gi)

    vol_name = common.generate_volume_name()
    volume = client.create_volume(name=vol_name,
                                  size=str(2*Gi), numberOfReplicas=len(nodes))
    volume = common.wait_for_volume_condition_scheduled(client, vol_name,
                                                        "status",
                                                        CONDITION_STATUS_FALSE)
    client.delete(volume)
    common.wait_for_volume_delete(client, vol_name)
    client.update(over_provisioning_setting, value=old_provisioning_setting)
예제 #5
0
def test_replica_scheduler_update_over_provisioning(client):  # NOQA
    nodes = client.list_node()
    lht_hostId = get_self_host_id()
    expect_node_disk = {}
    for node in nodes:
        disks = node["disks"]
        for fsid, disk in disks.iteritems():
            if disk["path"] == DEFAULT_DISK_PATH:
                expect_disk = disk
                expect_disk["fsid"] = fsid
                expect_node_disk[node["name"]] = expect_disk

    over_provisioning_setting = client.by_id_setting(
        SETTING_STORAGE_OVER_PROVISIONING_PERCENTAGE)
    old_provisioning_setting = over_provisioning_setting["value"]

    # set storage over provisioning percentage to 0
    # to test all replica couldn't be scheduled
    over_provisioning_setting = client.update(over_provisioning_setting,
                                              value="0")
    vol_name = common.generate_volume_name()
    volume = client.create_volume(name=vol_name,
                                  size=SIZE, numberOfReplicas=len(nodes))
    volume = common.wait_for_volume_condition_scheduled(client, vol_name,
                                                        "status",
                                                        CONDITION_STATUS_FALSE)

    # set storage over provisioning percentage to 100
    over_provisioning_setting = client.update(over_provisioning_setting,
                                              value="100")

    # check volume status
    volume = common.wait_for_volume_condition_scheduled(client, vol_name,
                                                        "status",
                                                        CONDITION_STATUS_TRUE)
    volume = common.wait_for_volume_detached(client, vol_name)
    assert volume["state"] == "detached"
    assert volume["created"] != ""

    volume.attach(hostId=lht_hostId)
    volume = common.wait_for_volume_healthy(client, vol_name)

    node_hosts = []
    for node in nodes:
        node_hosts.append(node["name"])
    # check all replica should be scheduled to default disk
    for replica in volume["replicas"]:
        id = replica["hostId"]
        assert id != ""
        assert replica["running"]
        expect_disk = expect_node_disk[id]
        assert replica["diskID"] == expect_disk["fsid"]
        assert expect_disk["path"] in replica["dataPath"]
        node_hosts = filter(lambda x: x != id, node_hosts)
    assert len(node_hosts) == 0

    # clean volume and disk
    cleanup_volume(client, vol_name)
    client.update(over_provisioning_setting,
                  value=old_provisioning_setting)
예제 #6
0
def test_replica_scheduler_exceed_over_provisioning(client):  # NOQA
    over_provisioning_setting = client.by_id_setting(
        SETTING_STORAGE_OVER_PROVISIONING_PERCENTAGE)
    old_provisioning_setting = over_provisioning_setting["value"]
    # set storage over provisioning percentage to 100
    over_provisioning_setting = client.update(over_provisioning_setting,
                                              value="100")

    # test exceed over provisioning limit couldn't be scheduled
    nodes = client.list_node()
    for node in nodes:
        disks = node["disks"]
        for fsid, disk in disks.iteritems():
            disk["storageReserved"] = \
                disk["storageMaximum"] - 1*Gi
        update_disks = get_update_disks(disks)
        node = node.diskUpdate(disks=update_disks)
        disks = node["disks"]
        for fsid, disk in disks.iteritems():
            wait_for_disk_status(client, node["name"], fsid, "storageReserved",
                                 disk["storageMaximum"] - 1 * Gi)

    vol_name = common.generate_volume_name()
    volume = client.create_volume(name=vol_name,
                                  size=str(2 * Gi),
                                  numberOfReplicas=len(nodes))
    volume = common.wait_for_volume_condition_scheduled(
        client, vol_name, "status", CONDITION_STATUS_FALSE)
    client.delete(volume)
    common.wait_for_volume_delete(client, vol_name)
    client.update(over_provisioning_setting, value=old_provisioning_setting)
예제 #7
0
def test_replica_scheduler_just_under_over_provisioning(client):  # NOQA
    over_provisioning_setting = client.by_id_setting(
        SETTING_STORAGE_OVER_PROVISIONING_PERCENTAGE)
    old_provisioning_setting = over_provisioning_setting["value"]
    # set storage over provisioning percentage to 100
    over_provisioning_setting = client.update(over_provisioning_setting,
                                              value="100")

    lht_hostId = get_self_host_id()
    nodes = client.list_node()
    expect_node_disk = {}
    max_size_array = []
    for node in nodes:
        disks = node["disks"]
        for fsid, disk in disks.iteritems():
            if disk["path"] == DEFAULT_DISK_PATH:
                expect_disk = disk
                expect_disk["fsid"] = fsid
                expect_node_disk[node["name"]] = expect_disk
                max_size_array.append(disk["storageMaximum"])
            disk["storageReserved"] = 0
            update_disks = get_update_disks(disks)
            node = node.diskUpdate(disks=update_disks)
            disks = node["disks"]
            for fsid, disk in disks.iteritems():
                wait_for_disk_status(client, node["name"], fsid,
                                     "storageReserved", 0)

    max_size = min(max_size_array)
    # test just under over provisioning limit could be scheduled
    vol_name = common.generate_volume_name()
    volume = client.create_volume(name=vol_name,
                                  size=str(max_size),
                                  numberOfReplicas=len(nodes))
    volume = common.wait_for_volume_condition_scheduled(
        client, vol_name, "status", CONDITION_STATUS_TRUE)
    volume = common.wait_for_volume_detached(client, vol_name)
    assert volume["state"] == "detached"
    assert volume["created"] != ""

    volume.attach(hostId=lht_hostId)
    volume = common.wait_for_volume_healthy(client, vol_name)
    nodes = client.list_node()
    node_hosts = []
    for node in nodes:
        node_hosts.append(node["name"])
    # check all replica should be scheduled to default disk
    for replica in volume["replicas"]:
        id = replica["hostId"]
        assert id != ""
        assert replica["running"]
        expect_disk = expect_node_disk[id]
        assert replica["diskID"] == expect_disk["fsid"]
        assert expect_disk["path"] in replica["dataPath"]
        node_hosts = filter(lambda x: x != id, node_hosts)
    assert len(node_hosts) == 0

    # clean volume and disk
    cleanup_volume(client, vol_name)
    client.update(over_provisioning_setting, value=old_provisioning_setting)
예제 #8
0
def csi_backup_test(client, core_api, csi_pv, pvc, pod_make, base_image=""):  # NOQA
    pod_name = 'csi-backup-test'
    create_and_wait_csi_pod(pod_name, client, core_api, csi_pv, pvc, pod_make,
                            base_image, "")
    test_data = generate_random_data(VOLUME_RWTEST_SIZE)

    setting = client.by_id_setting(common.SETTING_BACKUP_TARGET)
    # test backupTarget for multiple settings
    backupstores = common.get_backupstore_url()
    i = 1
    for backupstore in backupstores:
        if common.is_backupTarget_s3(backupstore):
            backupsettings = backupstore.split("$")
            setting = client.update(setting, value=backupsettings[0])
            assert setting.value == backupsettings[0]

            credential = client.by_id_setting(
                    common.SETTING_BACKUP_TARGET_CREDENTIAL_SECRET)
            credential = client.update(credential, value=backupsettings[1])
            assert credential.value == backupsettings[1]
        else:
            setting = client.update(setting, value=backupstore)
            assert setting.value == backupstore
            credential = client.by_id_setting(
                    common.SETTING_BACKUP_TARGET_CREDENTIAL_SECRET)
            credential = client.update(credential, value="")
            assert credential.value == ""

        backupstore_test(client, core_api, csi_pv, pvc, pod_make, pod_name,
                         base_image, test_data, i)
        i += 1
예제 #9
0
def guaranteed_engine_cpu_setting_check(client, core_api, setting,  # NOQA
                                        val, cpu_val,  # NOQA
                                        instance_managers,  # NOQA
                                        state, desire):  # NOQA
    """
    We check if instance managers are in the desired state with
    correct setting
    desire is for reflect the state we are looking for.
    If desire is True, meanning we need the state to be the same.
    Otherwise, we are looking for the state to be different.
    e.g. 'Pending', 'OutofCPU', 'Terminating' they are all 'Not Running'.
    """
    # Update guaranteed engine cpu setting
    client.update(setting, value=val)

    # Give sometime to k8s to update the instance manager status
    time.sleep(6 * RETRY_INTERVAL)

    for im in instance_managers:
        wait_for_instance_manager_desire_state(client, core_api,
                                               im.name, state, desire)

    if desire:
        # Verify guaranteed CPU set correctly
        for im in instance_managers:
            pod = core_api.read_namespaced_pod(name=im.name,
                                               namespace=LONGHORN_NAMESPACE)
            if cpu_val:
                assert (pod.spec.containers[0].resources.requests['cpu'] ==
                        cpu_val)
            else:
                assert (not pod.spec.containers[0].resources.requests)
예제 #10
0
def csi_backup_test(client, core_api, csi_pv, pvc, pod_make, base_image=""):  # NOQA
    pod_name = 'csi-backup-test'
    create_and_wait_csi_pod(pod_name, client, core_api, csi_pv, pvc, pod_make,
                            base_image, "")
    test_data = generate_random_data(VOLUME_RWTEST_SIZE)

    setting = client.by_id_setting(common.SETTING_BACKUP_TARGET)
    # test backupTarget for multiple settings
    backupstores = common.get_backupstore_url()
    i = 1
    for backupstore in backupstores:
        if common.is_backupTarget_s3(backupstore):
            backupsettings = backupstore.split("$")
            setting = client.update(setting, value=backupsettings[0])
            assert setting["value"] == backupsettings[0]

            credential = client.by_id_setting(
                    common.SETTING_BACKUP_TARGET_CREDENTIAL_SECRET)
            credential = client.update(credential, value=backupsettings[1])
            assert credential["value"] == backupsettings[1]
        else:
            setting = client.update(setting, value=backupstore)
            assert setting["value"] == backupstore
            credential = client.by_id_setting(
                    common.SETTING_BACKUP_TARGET_CREDENTIAL_SECRET)
            credential = client.update(credential, value="")
            assert credential["value"] == ""

        backupstore_test(client, core_api, csi_pv, pvc, pod_make, pod_name,
                         base_image, test_data, i)
        i += 1
예제 #11
0
def reset_settings():
    yield
    client = get_longhorn_api_client()  # NOQA
    host_id = get_self_host_id()
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=True)
    setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(setting, value="true")
예제 #12
0
def test_node_evicted(client, volume_name, request):  # NOQA
    """
    Test the orphan CR is deleted in background but on-disk data still exists
    if the node is evicted
    1. Create a new-disk for holding valid and invalid orphaned replica
       directories
    2. Create a volume and attach to the current node
    3. Create a valid orphaned replica directories by copying the active
       replica directory
    4. Clean up volume
    5. Verify orphan list contains the valid orphaned replica directory
    6. Evict the node containing the orphaned replica directory
    7. Verify the orphan CR is deleted in background, but the on-disk
       orphaned replica directory still exists
    8. Disable node eviction
    9. Verify the orphan CR is created again and the on-disk orphaned replica
       directory still exists
    """

    disk_names = ["vol-disk-" + generate_random_id(4)]

    # Step 1
    lht_hostId = get_self_host_id()
    cleanup_node_disks(client, lht_hostId)
    disk_paths = crate_disks_on_host(client, disk_names, request)

    # Step 2
    volume = create_volume_with_replica_on_host(client, volume_name)

    # Step 3
    create_orphaned_directories_on_host(volume, disk_paths, 1)

    # Step 4
    cleanup_volume_by_name(client, volume_name)

    # Step 5
    assert wait_for_orphan_count(client, 1, 180) == 1

    # Step 6: request node eviction
    node = client.by_id_node(lht_hostId)
    client.update(node, allowScheduling=False, evictionRequested=True)
    node = wait_for_node_update(client, lht_hostId, "allowScheduling", False)

    # Step 7
    assert wait_for_orphan_count(client, 0, 180) == 0
    assert wait_for_file_count(os.path.join(disk_paths[0], "replicas"), 1,
                               180) == 1

    # Step 8: Disable node eviction
    node = client.by_id_node(lht_hostId)
    client.update(node, allowScheduling=True, evictionRequested=False)
    node = wait_for_node_update(client, lht_hostId, "allowScheduling", True)

    # Step 9
    assert wait_for_orphan_count(client, 1, 180) == 1
    assert wait_for_file_count(os.path.join(disk_paths[0], "replicas"), 1,
                               180) == 1
예제 #13
0
def test_hard_anti_affinity_detach(client, volume_name):  # NOQA
    """
    Test that volumes with Hard Anti-Affinity are still able to detach and
    reattach to a node properly, even in degraded state.

    1. Create a volume and attach to the current node
    2. Generate and write `data` to the volume.
    3. Set `soft anti-affinity` to false
    4. Disable current node's scheduling.
    5. Remove the replica on the current node
        1. Verify volume will be in degraded state.
        2. Verify volume reports condition `scheduled == false`
    6. Detach the volume.
    7. Verify that volume only have 2 replicas
        1. Unhealthy replica will be removed upon detach.
    8. Attach the volume again.
        1. Verify volume will be in degraded state.
        2. Verify volume reports condition `scheduled == false`
        3. Verify only two of three replicas of volume are healthy.
        4. Verify the remaining replica doesn't have `replica.HostID`, meaning
        it's unscheduled
    9. Check volume `data`
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica.name)
    volume = wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    volume.detach()
    volume = wait_for_volume_detached(client, volume_name)
    assert len(volume.replicas) == 2

    volume.attach(hostId=host_id)
    # Make sure we're still not getting another successful replica.
    volume = wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    assert sum([
        1 for replica in volume.replicas
        if replica.running and replica.mode == "RW"
    ]) == 2
    assert sum([1 for replica in volume.replicas if not replica.hostId]) == 1
    assert len(volume.replicas) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
예제 #14
0
def test_hard_anti_affinity_scheduling(client, volume_name):  # NOQA
    """
    Test that volumes with Hard Anti-Affinity work as expected.

    With Hard Anti-Affinity, scheduling on nodes with existing replicas should
    be forbidden, resulting in "Degraded" state.

    1. Create a volume and attach to the current node
    2. Generate and write `data` to the volume.
    3. Set `soft anti-affinity` to false
    4. Disable current node's scheduling.
    5. Remove the replica on the current node
        1. Verify volume will be in degraded state.
        2. Verify volume reports condition `scheduled == false`
        3. Verify only two of three replicas of volume are healthy.
        4. Verify the remaining replica doesn't have `replica.HostID`, meaning
        it's unscheduled
    6. Check volume `data`
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica.name)
    # Instead of waiting for timeout and lengthening the tests a significant
    # amount we can make sure the scheduling isn't working by making sure the
    # volume becomes Degraded and reports a scheduling error.
    wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    # While there are three replicas that should exist to meet the Volume's
    # request, only two of those volumes should actually be Healthy.
    volume = client.by_id_volume(volume_name)
    assert sum([
        1 for replica in volume.replicas
        if replica.running and replica.mode == "RW"
    ]) == 2
    # Confirm that the final volume is an unscheduled volume.
    assert sum([1 for replica in volume.replicas if not replica.hostId]) == 1
    # Three replicas in total should still exist.
    assert len(volume.replicas) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
예제 #15
0
def test_replica_rebuild_per_volume_limit(client, core_api, storage_class,
                                          sts_name, statefulset):  # NOQA
    """
    Test the volume always only have one replica scheduled for rebuild

    1. Set soft anti-affinity to `true`.
    2. Create a volume with 1 replica.
    3. Attach the volume and write a few hundreds MB data to it.
    4. Scale the volume replica to 5.
    5. Constantly checking the volume replica list to make sure there should be
       only 1 replica in WO state.
    6. Wait for the volume to complete rebuilding. Then remove 4 of the 5
       replicas.
    7. Monitoring the volume replica list again.
    8. Once the rebuild was completed again, verify the data checksum.
    """
    replica_soft_anti_affinity_setting = \
        client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(replica_soft_anti_affinity_setting, value="true")

    data_path = '/data/test'
    storage_class['parameters']['numberOfReplicas'] = "1"
    vol_name, pod_name, md5sum = \
        common.prepare_statefulset_with_data_in_mb(
            client, core_api, statefulset, sts_name, storage_class,
            data_path=data_path, data_size_in_mb=DATA_SIZE_IN_MB_2)

    # Scale the volume replica to 5
    r_count = 5
    vol = client.by_id_volume(vol_name)
    vol.updateReplicaCount(replicaCount=r_count)

    vol = common.wait_for_volume_replicas_mode(client,
                                               vol_name,
                                               'RW',
                                               replica_count=r_count)

    # Delete 4 volume replicas
    del vol.replicas[0]
    for r in vol.replicas:
        vol.replicaRemove(name=r.name)

    r_count = 1
    common.wait_for_volume_replicas_mode(client,
                                         vol_name,
                                         'RW',
                                         replica_count=r_count)

    assert md5sum == common.get_pod_data_md5sum(core_api, pod_name, data_path)
예제 #16
0
def test_provisioner_tags(client, core_api, node_default_tags, storage_class,
                          pvc, pod):  # NOQA
    """
    Test that a StorageClass can properly provision a volume with requested
    Tags.

    Test prerequisite:
      - set Replica Node Level Soft Anti-Affinity enabled

    1. Use `node_default_tags` to add default tags to nodes.
    2. Create a StorageClass with disk and node tag set.
    3. Create PVC and Pod.
    4. Verify the volume has the correct parameters and tags.
    """

    replica_node_soft_anti_affinity_setting = \
        client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(replica_node_soft_anti_affinity_setting, value="true")

    # Prepare pod and volume specs.
    pod_name = 'provisioner-tags-test'
    tag_spec = {
        "disk": ["ssd", "nvme"],
        "expected": 1,
        "node": ["storage", "main"]
    }

    pod['metadata']['name'] = pod_name
    pod['spec']['volumes'] = [create_pvc_spec(pvc['metadata']['name'])]
    pvc['spec']['storageClassName'] = DEFAULT_STORAGECLASS_NAME
    storage_class['metadata']['name'] = DEFAULT_STORAGECLASS_NAME
    storage_class['parameters']['diskSelector'] = 'ssd,nvme'
    storage_class['parameters']['nodeSelector'] = 'storage,main'
    volume_size = DEFAULT_VOLUME_SIZE * Gi

    create_storage(core_api, storage_class, pvc)
    create_and_wait_pod(core_api, pod)
    pvc_volume_name = get_volume_name(core_api, pvc['metadata']['name'])

    # Confirm that the volume has all the correct parameters we gave it.
    volumes = client.list_volume()
    assert len(volumes) == 1
    assert volumes.data[0].name == pvc_volume_name
    assert volumes.data[0].size == str(volume_size)
    assert volumes.data[0].numberOfReplicas == \
        int(storage_class['parameters']['numberOfReplicas'])
    assert volumes.data[0].state == "attached"
    check_volume_replicas(volumes.data[0], tag_spec, node_default_tags)
예제 #17
0
def test_soft_anti_affinity_detach(client, volume_name):  # NOQA
    """
    Test that volumes with Soft Anti-Affinity can detach and reattach to a
    node properly.

    1. Create a volume and attach to the current node.
    2. Generate and write `data` to the volume
    3. Set `soft anti-affinity` to true
    4. Disable current node's scheduling.
    5. Remove the replica on the current node
    6. Wait for the new replica to be rebuilt
    7. Detach the volume.
    8. Verify there are 3 replicas
    9. Attach the volume again. Verify there are still 3 replicas
    10. Verify the `data`.
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(setting, value="true")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    replica_names = list(map(lambda replica: replica.name, volume.replicas))
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica.name)
    wait_new_replica_ready(client, volume_name, replica_names)
    volume = wait_for_volume_healthy(client, volume_name)
    volume.detach()
    volume = wait_for_volume_detached(client, volume_name)
    assert len(volume.replicas) == 3

    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
예제 #18
0
def test_node_controller_sync_disk_state(client):  # NOQA
    # update StorageMinimalAvailablePercentage to test Disk State
    setting = client.by_id_setting(
        SETTING_STORAGE_MINIMAL_AVAILABLE_PERCENTAGE)
    old_minimal_available_percentage = setting["value"]
    setting = client.update(setting, value="100")
    assert setting["value"] == "100"
    nodes = client.list_node()
    # wait for node controller to update disk state
    for node in nodes:
        disks = node["disks"]
        for fsid, disk in disks.iteritems():
            wait_for_disk_conditions(client, node["name"],
                                     fsid, DISK_CONDITION_SCHEDULABLE,
                                     CONDITION_STATUS_FALSE)

    nodes = client.list_node()
    for node in nodes:
        disks = node["disks"]
        for fsid, disk in disks.iteritems():
            conditions = disk["conditions"]
            assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \
                CONDITION_STATUS_FALSE

    setting = client.update(setting, value=old_minimal_available_percentage)
    assert setting["value"] == old_minimal_available_percentage
    # wait for node controller to update disk state
    nodes = client.list_node()
    for node in nodes:
        disks = node["disks"]
        for fsid, disk in disks.iteritems():
            wait_for_disk_conditions(client, node["name"],
                                     fsid, DISK_CONDITION_SCHEDULABLE,
                                     CONDITION_STATUS_TRUE)

    nodes = client.list_node()
    for node in nodes:
        disks = node["disks"]
        for fsid, disk in disks.iteritems():
            conditions = disk["conditions"]
            assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \
                CONDITION_STATUS_TRUE
예제 #19
0
def test_orphan_auto_deletion(client, volume_name, request):  # NOQA
    """
    Test orphaned dirs creation and background deletion in multiple disks
    1. Create a new disks for holding orphaned replica directories
    2. Create a volume and attach to the current node
    3. Create orphaned replica directories by copying the active
       replica directory
    4. Clean up volume
    5. Verify orphan list contains the orphan CRs for replica
       directories
    6. Enable the orphan-auto-deletion setting
    7. Verify orphan list is empty and the orphaned directory is
       deleted in background
    8. Clean up disk
    """
    disk_names = ["vol-disk-" + generate_random_id(4)]

    # Step 1
    lht_hostId = get_self_host_id()
    cleanup_node_disks(client, lht_hostId)
    disk_paths = crate_disks_on_host(client, disk_names, request)

    # Step 2
    volume = create_volume_with_replica_on_host(client, volume_name)

    # Step 3
    create_orphaned_directories_on_host(volume, disk_paths, 1)

    # Step 4
    cleanup_volume_by_name(client, volume_name)

    # Step 5
    assert wait_for_orphan_count(client, 1, 180) == 1

    # Step 6: enable orphan auto deletion
    setting = client.by_id_setting(SETTING_ORPHAN_AUTO_DELETION)
    client.update(setting, value="true")

    # Step 7
    assert wait_for_orphan_count(client, 0, 180) == 0
    assert wait_for_file_count(os.path.join(disk_paths[0], "replicas"), 0,
                               180) == 0
예제 #20
0
def test_node_controller_sync_disk_state(client):  # NOQA
    # update StorageMinimalAvailablePercentage to test Disk State
    setting = client.by_id_setting(
        SETTING_STORAGE_MINIMAL_AVAILABLE_PERCENTAGE)
    old_minimal_available_percentage = setting["value"]
    setting = client.update(setting, value="100")
    assert setting["value"] == "100"
    nodes = client.list_node()
    # wait for node controller to update disk state
    for node in nodes:
        disks = node["disks"]
        for fsid, disk in disks.iteritems():
            wait_for_disk_conditions(client, node["name"], fsid,
                                     DISK_CONDITION_SCHEDULABLE,
                                     CONDITION_STATUS_FALSE)

    nodes = client.list_node()
    for node in nodes:
        disks = node["disks"]
        for fsid, disk in disks.iteritems():
            conditions = disk["conditions"]
            assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \
                CONDITION_STATUS_FALSE

    setting = client.update(setting, value=old_minimal_available_percentage)
    assert setting["value"] == old_minimal_available_percentage
    # wait for node controller to update disk state
    nodes = client.list_node()
    for node in nodes:
        disks = node["disks"]
        for fsid, disk in disks.iteritems():
            wait_for_disk_conditions(client, node["name"], fsid,
                                     DISK_CONDITION_SCHEDULABLE,
                                     CONDITION_STATUS_TRUE)

    nodes = client.list_node()
    for node in nodes:
        disks = node["disks"]
        for fsid, disk in disks.iteritems():
            conditions = disk["conditions"]
            assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \
                CONDITION_STATUS_TRUE
예제 #21
0
def test_update_node(client):  # NOQA
    # test node update
    nodes = client.list_node()
    assert len(nodes) > 0

    lht_hostId = get_self_host_id()
    node = client.by_id_node(lht_hostId)
    node = client.update(node, allowScheduling=False)
    node = common.wait_for_node_update(client, lht_hostId, "allowScheduling",
                                       False)
    assert not node["allowScheduling"]
    node = client.by_id_node(lht_hostId)
    assert not node["allowScheduling"]

    node = client.update(node, allowScheduling=True)
    node = common.wait_for_node_update(client, lht_hostId, "allowScheduling",
                                       True)
    assert node["allowScheduling"]
    node = client.by_id_node(lht_hostId)
    assert node["allowScheduling"]
예제 #22
0
def test_update_node(client):  # NOQA
    # test node update
    nodes = client.list_node()
    assert len(nodes) > 0

    lht_hostId = get_self_host_id()
    node = client.by_id_node(lht_hostId)
    node = client.update(node, allowScheduling=False)
    node = common.wait_for_node_update(client, lht_hostId,
                                       "allowScheduling", False)
    assert not node["allowScheduling"]
    node = client.by_id_node(lht_hostId)
    assert not node["allowScheduling"]

    node = client.update(node, allowScheduling=True)
    node = common.wait_for_node_update(client, lht_hostId,
                                       "allowScheduling", True)
    assert node["allowScheduling"]
    node = client.by_id_node(lht_hostId)
    assert node["allowScheduling"]
예제 #23
0
def test_hard_anti_affinity_scheduling(client, volume_name):  # NOQA
    """
    Test that volumes with Hard Anti-Affinity work as expected.

    With Hard Anti-Affinity, scheduling on nodes with existing replicas should
    be forbidden, resulting in "Degraded" state.
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica["name"])
    # Instead of waiting for timeout and lengthening the tests a significant
    # amount we can make sure the scheduling isn't working by making sure the
    # volume becomes Degraded and reports a scheduling error.
    volume = wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    # While there are three replicas that should exist to meet the Volume's
    # request, only two of those volumes should actually be Healthy.
    assert sum([
        1 for replica in volume["replicas"]
        if replica["running"] and replica["mode"] == "RW"
    ]) == 2
    # Confirm that the final volume is an unscheduled volume.
    assert sum([1 for replica in volume["replicas"]
                if not replica["hostId"]]) == 1
    # Three replicas in total should still exist.
    assert len(volume["replicas"]) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
예제 #24
0
def test_hard_anti_affinity_live_rebuild(client, volume_name):  # NOQA
    """
    Test that volumes with Hard Anti-Affinity can build new replicas live once
    a valid node is available.

    If no nodes without existing replicas are available, the volume should
    remain in "Degraded" state. However, once one is available, the replica
    should now be scheduled successfully, with the volume returning to
    "Healthy" state.
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    replica_names = map(lambda replica: replica.name, volume["replicas"])
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica["name"])
    wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    # Allow scheduling on host node again
    client.update(node, allowScheduling=True)
    wait_new_replica_ready(client, volume_name, replica_names)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
예제 #25
0
def test_hard_anti_affinity_offline_rebuild(client, volume_name):  # NOQA
    """
    Test that volumes with Hard Anti-Affinity can build new replicas during
    the attaching process once a valid node is available.

    Once a new replica has been built as part of the attaching process, the
    volume should be Healthy again.
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    replica_names = map(lambda replica: replica.name, volume["replicas"])
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica["name"])
    volume = wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    volume.detach()
    volume = wait_for_volume_detached(client, volume_name)
    client.update(node, allowScheduling=True)
    volume.attach(hostId=host_id)
    wait_new_replica_ready(client, volume_name, replica_names)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
예제 #26
0
def test_replica_zone_anti_affinity(client, core_api, volume_name,
                                    k8s_node_zone_tags):  # NOQA
    """
    Test replica scheduler with zone anti-affinity

    1. Set zone anti-affinity to hard.
    2. Label nodes 1 & 2 with same zone label "zone1".
    Label node 3 with zone label "zone2".
    3. Create a volume with 3 replicas.
    4. Wait for volume condition `scheduled` to be false.
    5. Label node 2 with zone label "zone3".
    6. Wait for volume condition `scheduled` to be success.
    7. Clear the volume.
    8. Set zone anti-affinity to soft.
    9. Change the zone labels on node 1 & 2 & 3 to "zone1".
    10. Create a volume.
    11. Wait for volume condition `scheduled` to be success.
    12. Clean up the replica count, the zone labels and the volume.
    """

    wait_longhorn_node_zone_updated(client)

    replica_node_soft_anti_affinity_setting = \
        client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(replica_node_soft_anti_affinity_setting, value="false")

    replica_zone_soft_anti_affinity_setting = \
        client.by_id_setting(SETTING_REPLICA_ZONE_SOFT_ANTI_AFFINITY)
    client.update(replica_zone_soft_anti_affinity_setting, value="false")

    volume = create_and_check_volume(client, volume_name)

    lh_nodes = client.list_node()

    count = 0
    for node in lh_nodes:
        count += 1
        set_k8s_node_zone_label(core_api, node.name, "lh-zone" + str(count))

    wait_longhorn_node_zone_updated(client)

    wait_for_volume_condition_scheduled(client, volume_name, "status",
                                        CONDITION_STATUS_TRUE)

    replica_zone_soft_anti_affinity_setting = \
        client.by_id_setting(SETTING_REPLICA_ZONE_SOFT_ANTI_AFFINITY)
    client.update(replica_zone_soft_anti_affinity_setting, value="true")

    volume = client.by_id_volume(volume_name)
    client.delete(volume)
    wait_for_volume_delete(client, volume_name)

    for node in lh_nodes:
        set_k8s_node_zone_label(core_api, node.name, "lh-zone1")

    wait_longhorn_node_zone_updated(client)

    volume = create_and_check_volume(client, volume_name)
    wait_for_volume_condition_scheduled(client, volume_name, "status",
                                        CONDITION_STATUS_TRUE)
예제 #27
0
def test_hard_anti_affinity_detach(client, volume_name):  # NOQA
    """
    Test that volumes with Hard Anti-Affinity are still able to detach and
    reattach to a node properly, even in degraded state.
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica["name"])
    volume = wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    volume.detach()
    volume = wait_for_volume_detached(client, volume_name)
    assert len(volume["replicas"]) == 2

    volume.attach(hostId=host_id)
    # Make sure we're still not getting another successful replica.
    volume = wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    assert sum([
        1 for replica in volume["replicas"]
        if replica["running"] and replica["mode"] == "RW"
    ]) == 2
    assert sum([1 for replica in volume["replicas"]
                if not replica["hostId"]]) == 1
    assert len(volume["replicas"]) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
예제 #28
0
def test_soft_anti_affinity_scheduling(client, volume_name):  # NOQA
    """
    Test that volumes with Soft Anti-Affinity work as expected.

    With Soft Anti-Affinity, a new replica should still be scheduled on a node
    with an existing replica, which will result in "Healthy" state but limited
    redundancy.

    1. Create a volume and attach to the current node
    2. Generate and write `data` to the volume.
    3. Set `soft anti-affinity` to true
    4. Disable current node's scheduling.
    5. Remove the replica on the current node
    6. Wait for the volume to complete rebuild. Volume should have 3 replicas.
    7. Verify `data`
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(setting, value="true")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    replica_names = list(map(lambda replica: replica.name, volume.replicas))
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica.name)
    wait_new_replica_ready(client, volume_name, replica_names)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
예제 #29
0
def test_hard_anti_affinity_offline_rebuild(client, volume_name):  # NOQA
    """
    Test that volumes with Hard Anti-Affinity can build new replicas during
    the attaching process once a valid node is available.

    Once a new replica has been built as part of the attaching process, the
    volume should be Healthy again.

    1. Create a volume and attach to the current node
    2. Generate and write `data` to the volume.
    3. Set `soft anti-affinity` to false
    4. Disable current node's scheduling.
    5. Remove the replica on the current node
        1. Verify volume will be in degraded state.
        2. Verify volume reports condition `scheduled == false`
    6. Detach the volume.
    7. Enable current node's scheduling.
    8. Attach the volume again.
    9. Wait for volume to become healthy with 3 replicas
    10. Check volume `data`
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    replica_names = map(lambda replica: replica.name, volume.replicas)
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica.name)
    volume = wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    volume.detach()
    volume = wait_for_volume_detached(client, volume_name)
    client.update(node, allowScheduling=True)
    volume.attach(hostId=host_id)
    wait_new_replica_ready(client, volume_name, replica_names)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
예제 #30
0
def test_hard_anti_affinity_live_rebuild(client, volume_name):  # NOQA
    """
    Test that volumes with Hard Anti-Affinity can build new replicas live once
    a valid node is available.

    If no nodes without existing replicas are available, the volume should
    remain in "Degraded" state. However, once one is available, the replica
    should now be scheduled successfully, with the volume returning to
    "Healthy" state.

    1. Create a volume and attach to the current node
    2. Generate and write `data` to the volume.
    3. Set `soft anti-affinity` to false
    4. Disable current node's scheduling.
    5. Remove the replica on the current node
        1. Verify volume will be in degraded state.
        2. Verify volume reports condition `scheduled == false`
    6. Enable the current node's scheduling
    7. Wait for volume to start rebuilding and become healthy again
    8. Check volume `data`
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    replica_names = map(lambda replica: replica.name, volume.replicas)
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica.name)
    wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    # Allow scheduling on host node again
    client.update(node, allowScheduling=True)
    wait_new_replica_ready(client, volume_name, replica_names)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
예제 #31
0
def test_replica_scheduler_just_under_over_provisioning(client):  # NOQA
    over_provisioning_setting = client.by_id_setting(
        SETTING_STORAGE_OVER_PROVISIONING_PERCENTAGE)
    old_provisioning_setting = over_provisioning_setting["value"]
    # set storage over provisioning percentage to 100
    over_provisioning_setting = client.update(over_provisioning_setting,
                                              value="100")

    lht_hostId = get_self_host_id()
    nodes = client.list_node()
    expect_node_disk = {}
    max_size_array = []
    for node in nodes:
        disks = node["disks"]
        for fsid, disk in disks.iteritems():
            if disk["path"] == DEFAULT_DISK_PATH:
                expect_disk = disk
                expect_disk["fsid"] = fsid
                expect_node_disk[node["name"]] = expect_disk
                max_size_array.append(disk["storageMaximum"])
            disk["storageReserved"] = 0
            update_disks = get_update_disks(disks)
            node = node.diskUpdate(disks=update_disks)
            disks = node["disks"]
            for fsid, disk in disks.iteritems():
                wait_for_disk_status(client, node["name"],
                                     fsid, "storageReserved", 0)

    max_size = min(max_size_array)
    # test just under over provisioning limit could be scheduled
    vol_name = common.generate_volume_name()
    volume = client.create_volume(name=vol_name,
                                  size=str(max_size),
                                  numberOfReplicas=len(nodes))
    volume = common.wait_for_volume_condition_scheduled(client, vol_name,
                                                        "status",
                                                        CONDITION_STATUS_TRUE)
    volume = common.wait_for_volume_detached(client, vol_name)
    assert volume["state"] == "detached"
    assert volume["created"] != ""

    volume.attach(hostId=lht_hostId)
    volume = common.wait_for_volume_healthy(client, vol_name)
    nodes = client.list_node()
    node_hosts = []
    for node in nodes:
        node_hosts.append(node["name"])
    # check all replica should be scheduled to default disk
    for replica in volume["replicas"]:
        id = replica["hostId"]
        assert id != ""
        assert replica["running"]
        expect_disk = expect_node_disk[id]
        assert replica["diskID"] == expect_disk["fsid"]
        assert expect_disk["path"] in replica["dataPath"]
        node_hosts = filter(lambda x: x != id, node_hosts)
    assert len(node_hosts) == 0

    # clean volume and disk
    cleanup_volume(client, vol_name)
    client.update(over_provisioning_setting, value=old_provisioning_setting)
예제 #32
0
def test_allow_volume_creation_with_degraded_availability_csi(
        client, core_api, apps_api, make_deployment_with_pvc):  # NOQA
    """
    Test Allow Volume Creation with Degraded Availability (CSI)

    Requirement:
    1. Set `allow-volume-creation-with-degraded-availability` to true.
    2. Set `node-level-soft-anti-affinity` to false.

    Steps:
    1. Disable scheduling for node 3.
    2. Create a Deployment Pod with a volume and 3 replicas.
        1. After the volume is attached, scheduling error should be seen.
    3. Write data to the Pod.
    4. Scale down the deployment to 0 to detach the volume.
        1. Scheduled condition should become true.
    5. Scale up the deployment back to 1 and verify the data.
        1. Scheduled condition should become false.
    6. Enable the scheduling for node 3.
        1. Volume should start rebuilding on the node 3 soon.
        2. Once the rebuilding starts, the scheduled condition should become
           true.
    7. Once rebuild finished, scale down and back the deployment to verify
       the data.
    """
    setting = client.by_id_setting(common.SETTING_DEGRADED_AVAILABILITY)
    client.update(setting, value="true")

    setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")

    nodes = client.list_node()
    node3 = nodes[2]
    client.update(node3, allowScheduling=False)

    vol = common.create_and_check_volume(client, generate_volume_name(),
                                         size=str(500 * Mi))

    pv_name = vol.name + "-pv"
    common.create_pv_for_volume(client, core_api, vol, pv_name)

    pvc_name = vol.name + "-pvc"
    common.create_pvc_for_volume(client, core_api, vol, pvc_name)

    deployment_name = vol.name + "-dep"
    deployment = make_deployment_with_pvc(deployment_name, pvc_name)
    deployment["spec"]["replicas"] = 3
    apps_api.create_namespaced_deployment(body=deployment, namespace='default')
    common.wait_for_volume_status(client, vol.name,
                                  common.VOLUME_FIELD_STATE,
                                  common.VOLUME_STATE_ATTACHED)
    common.wait_scheduling_failure(client, vol.name)

    data_path = "/data/test"
    pod = common.wait_and_get_any_deployment_pod(core_api, deployment_name)
    common.write_pod_volume_random_data(core_api, pod.metadata.name,
                                        data_path, common.DATA_SIZE_IN_MB_2)
    created_md5sum = get_pod_data_md5sum(core_api, pod.metadata.name,
                                         data_path)

    deployment['spec']['replicas'] = 0
    apps_api.patch_namespaced_deployment(body=deployment,
                                         namespace='default',
                                         name=deployment_name)
    vol = common.wait_for_volume_detached(client, vol.name)
    assert vol.conditions[VOLUME_CONDITION_SCHEDULED]['status'] == "True"

    deployment['spec']['replicas'] = 1
    apps_api.patch_namespaced_deployment(body=deployment,
                                         namespace='default',
                                         name=deployment_name)
    common.wait_for_volume_status(client, vol.name,
                                  common.VOLUME_FIELD_STATE,
                                  common.VOLUME_STATE_ATTACHED)
    common.wait_for_volume_condition_scheduled(client, vol.name, "status",
                                               common.CONDITION_STATUS_FALSE)
    pod = common.wait_and_get_any_deployment_pod(core_api, deployment_name)
    assert created_md5sum == get_pod_data_md5sum(core_api,
                                                 pod.metadata.name,
                                                 data_path)

    client.update(node3, allowScheduling=True)
    common.wait_for_rebuild_start(client, vol.name)
    vol = client.by_id_volume(vol.name)
    assert vol.conditions[VOLUME_CONDITION_SCHEDULED]['status'] == "True"
    common.wait_for_rebuild_complete(client, vol.name)

    deployment['spec']['replicas'] = 0
    apps_api.patch_namespaced_deployment(body=deployment,
                                         namespace='default',
                                         name=deployment_name)
    common.wait_for_volume_detached(client, vol.name)

    deployment['spec']['replicas'] = 1
    apps_api.patch_namespaced_deployment(body=deployment,
                                         namespace='default',
                                         name=deployment_name)
    common.wait_for_volume_status(client, vol.name,
                                  common.VOLUME_FIELD_STATE,
                                  common.VOLUME_STATE_ATTACHED)

    pod = common.wait_and_get_any_deployment_pod(core_api, deployment_name)
    assert created_md5sum == get_pod_data_md5sum(core_api,
                                                 pod.metadata.name,
                                                 data_path)
예제 #33
0
def test_csi_expansion_with_replica_failure(client, core_api, storage_class, pvc, pod_manifest):  # NOQA
    """
    Test expansion success but with one replica expansion failure

    1. Create a new `storage_class` with `allowVolumeExpansion` set
    2. Create PVC and Pod with dynamic provisioned volume from the StorageClass
    3. Create an empty directory with expansion snapshot tmp meta file path
       for one replica so that the replica expansion will fail
    4. Generate `test_data` and write to the pod
    5. Delete the pod and wait for volume detachment
    6. Update pvc.spec.resources to expand the volume
    7. Check expansion result using Longhorn API. There will be expansion error
       caused by the failed replica but overall the expansion should succeed.
    8. Create a new pod and
       check if the volume will reuse the failed replica during rebuilding.
    9. Validate the volume content, then check if data writing looks fine
    """
    replenish_wait_setting = \
        client.by_id_setting(SETTING_REPLICA_REPLENISHMENT_WAIT_INTERVAL)
    client.update(replenish_wait_setting, value="600")

    create_storage_class(storage_class)

    pod_name = 'csi-expansion-with-replica-failure-test'
    pvc_name = pod_name + "-pvc"
    pvc['metadata']['name'] = pvc_name
    pvc['spec']['storageClassName'] = storage_class['metadata']['name']
    create_pvc(pvc)

    pod_manifest['metadata']['name'] = pod_name
    pod_manifest['spec']['volumes'] = [{
        'name':
            pod_manifest['spec']['containers'][0]['volumeMounts'][0]['name'],
        'persistentVolumeClaim': {'claimName': pvc_name},
    }]
    create_and_wait_pod(core_api, pod_manifest)

    expand_size = str(EXPANDED_VOLUME_SIZE*Gi)
    pv = wait_and_get_pv_for_pvc(core_api, pvc_name)
    assert pv.status.phase == "Bound"
    volume_name = pv.spec.csi.volume_handle
    volume = client.by_id_volume(volume_name)
    failed_replica = volume.replicas[0]
    fail_replica_expansion(client, core_api,
                           volume_name, expand_size, [failed_replica])

    test_data = generate_random_data(VOLUME_RWTEST_SIZE)
    write_pod_volume_data(core_api, pod_name, test_data)

    delete_and_wait_pod(core_api, pod_name)
    wait_for_volume_detached(client, volume_name)

    # There will be replica expansion error info
    # but the expansion should succeed.
    pvc['spec']['resources'] = {
        'requests': {
            'storage': size_to_string(EXPANDED_VOLUME_SIZE*Gi)
        }
    }
    expand_and_wait_for_pvc(core_api, pvc)
    wait_for_expansion_failure(client, volume_name)
    wait_for_volume_expansion(client, volume_name)
    volume = client.by_id_volume(volume_name)
    assert volume.state == "detached"
    assert volume.size == expand_size
    for r in volume.replicas:
        if r.name == failed_replica.name:
            assert r.failedAt != ""
        else:
            assert r.failedAt == ""

    # Check if the failed replica will be reused during rebuilding,
    # and if the volume still works fine.
    create_and_wait_pod(core_api, pod_manifest)
    volume = wait_for_volume_healthy(client, volume_name)
    for r in volume.replicas:
        assert r.mode == "RW"
    resp = read_volume_data(core_api, pod_name)
    assert resp == test_data
    test_data = generate_random_data(VOLUME_RWTEST_SIZE)
    write_pod_volume_data(core_api, pod_name, test_data)
    resp = read_volume_data(core_api, pod_name)
    assert resp == test_data
예제 #34
0
def test_replica_auto_balance_zone_best_effort_with_uneven_node_in_zones(
        client, core_api, volume_name, pod):  # NOQA
    """
    Given set `replica-soft-anti-affinity` to `true`.
    And set `replica-zone-soft-anti-affinity` to `true`.
    And set `replicaAutoBalance` to `best-effort`.
    And set node-1 to zone-1.
        set node-2 to zone-1.
        set node-3 to zone-1.
        set node-4 to zone-2.
        set node-5 to zone-2.
    And disable scheduling for node-2.
        disable scheduling for node-3.
        disable scheduling for node-4.
        disable scheduling for node-5.
    And create volume with 4 replicas.
    And attach the volume to node-1.

    Scenario: replica auto-balance zones with best-effort should balance
              replicas in zone.

    Given 4 replica running on node-1.
          0 replica running on node-2.
          0 replica running on node-3.
          0 replica running on node-4.
          0 replica running on node-5.

    When enable scheduling for node-4.
    Then count replicas on each zones.
    And 2 replica running on zode-1.
        2 replica running on zode-2.

    When enable scheduling for node-2.
         enable scheduling for node-3.
    Then count replicas on each nodes.
    And 1 replica running on node-1.
        1 replica running on node-2.
        1 replica running on node-3.
        1 replica running on node-4.
        0 replica running on node-5.

    When enable scheduling for node-5.
    Then count replicas on each zones.
    And 2 replica running on zode-1.
        2 replica running on zode-2.
    """

    common.update_setting(client, SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY,
                          "true")
    common.update_setting(client, SETTING_REPLICA_ZONE_SOFT_ANTI_AFFINITY,
                          "true")
    common.update_setting(client, SETTING_DEFAULT_DATA_LOCALITY, "best-effort")
    common.update_setting(client, SETTING_REPLICA_AUTO_BALANCE, "best-effort")

    n1, n2, n3, n4, n5 = client.list_node()

    set_k8s_node_zone_label(core_api, n1.name, ZONE1)
    set_k8s_node_zone_label(core_api, n2.name, ZONE1)
    set_k8s_node_zone_label(core_api, n3.name, ZONE1)
    set_k8s_node_zone_label(core_api, n4.name, ZONE2)
    set_k8s_node_zone_label(core_api, n5.name, ZONE2)
    wait_longhorn_node_zone_updated(client)

    client.update(n2, allowScheduling=False)
    client.update(n3, allowScheduling=False)
    client.update(n4, allowScheduling=False)
    client.update(n5, allowScheduling=False)

    n_replicas = 4
    volume = create_and_check_volume(client,
                                     volume_name,
                                     num_of_replicas=n_replicas)
    volume.attach(hostId=n1.name)

    for _ in range(RETRY_COUNTS):
        n1_r_count = common.get_host_replica_count(client,
                                                   volume_name,
                                                   n1.name,
                                                   chk_running=True)
        n2_r_count = common.get_host_replica_count(client,
                                                   volume_name,
                                                   n2.name,
                                                   chk_running=False)
        n3_r_count = common.get_host_replica_count(client,
                                                   volume_name,
                                                   n3.name,
                                                   chk_running=False)
        n4_r_count = common.get_host_replica_count(client,
                                                   volume_name,
                                                   n4.name,
                                                   chk_running=False)
        n5_r_count = common.get_host_replica_count(client,
                                                   volume_name,
                                                   n5.name,
                                                   chk_running=False)

        if n1_r_count == 4 and \
                n2_r_count == n3_r_count == n4_r_count == n5_r_count == 0:
            break

        time.sleep(RETRY_INTERVAL)
    assert n1_r_count == 4
    assert n2_r_count == 0
    assert n3_r_count == 0
    assert n4_r_count == 0
    assert n5_r_count == 0

    client.update(n4, allowScheduling=True)

    for _ in range(RETRY_COUNTS):
        z1_r_count = get_zone_replica_count(client,
                                            volume_name,
                                            ZONE1,
                                            chk_running=True)
        z2_r_count = get_zone_replica_count(client,
                                            volume_name,
                                            ZONE2,
                                            chk_running=True)

        if z1_r_count == z2_r_count == 2:
            break

        time.sleep(RETRY_INTERVAL)

    assert z1_r_count == 2
    assert z2_r_count == 2

    client.update(n2, allowScheduling=True)
    client.update(n3, allowScheduling=True)

    for _ in range(RETRY_COUNTS):
        n1_r_count = common.get_host_replica_count(client,
                                                   volume_name,
                                                   n1.name,
                                                   chk_running=True)
        n2_r_count = common.get_host_replica_count(client,
                                                   volume_name,
                                                   n2.name,
                                                   chk_running=True)
        n3_r_count = common.get_host_replica_count(client,
                                                   volume_name,
                                                   n3.name,
                                                   chk_running=True)
        n4_r_count = common.get_host_replica_count(client,
                                                   volume_name,
                                                   n4.name,
                                                   chk_running=True)
        n5_r_count = common.get_host_replica_count(client,
                                                   volume_name,
                                                   n5.name,
                                                   chk_running=False)

        if n1_r_count == n2_r_count == n3_r_count == n4_r_count == 1 and \
                n5_r_count == 0:
            break

        time.sleep(RETRY_INTERVAL)
    assert n1_r_count == 1
    assert n2_r_count == 1
    assert n3_r_count == 1
    assert n4_r_count == 1
    assert n5_r_count == 0

    client.update(n5, allowScheduling=True)

    for _ in range(RETRY_COUNTS):
        z1_r_count = get_zone_replica_count(client,
                                            volume_name,
                                            ZONE1,
                                            chk_running=True)
        z2_r_count = get_zone_replica_count(client,
                                            volume_name,
                                            ZONE2,
                                            chk_running=True)

        if z1_r_count == z2_r_count == 2:
            break

        time.sleep(RETRY_INTERVAL)

    assert z1_r_count == 2
    assert z2_r_count == 2
예제 #35
0
def ha_backup_deletion_recovery_test(client, volume_name, size, base_image=""):  # NOQA
    volume = client.create_volume(name=volume_name, size=size,
                                  numberOfReplicas=2, baseImage=base_image)
    volume = common.wait_for_volume_detached(client, volume_name)

    host_id = get_self_host_id()
    volume = volume.attach(hostId=host_id)
    volume = common.wait_for_volume_healthy(client, volume_name)

    setting = client.by_id_setting(common.SETTING_BACKUP_TARGET)
    # test backupTarget for multiple settings
    backupstores = common.get_backupstore_url()
    for backupstore in backupstores:
        if common.is_backupTarget_s3(backupstore):
            backupsettings = backupstore.split("$")
            setting = client.update(setting, value=backupsettings[0])
            assert setting["value"] == backupsettings[0]

            credential = client.by_id_setting(
                    common.SETTING_BACKUP_TARGET_CREDENTIAL_SECRET)
            credential = client.update(credential, value=backupsettings[1])
            assert credential["value"] == backupsettings[1]
        else:
            setting = client.update(setting, value=backupstore)
            assert setting["value"] == backupstore
            credential = client.by_id_setting(
                    common.SETTING_BACKUP_TARGET_CREDENTIAL_SECRET)
            credential = client.update(credential, value="")
            assert credential["value"] == ""

        data = write_volume_random_data(volume)
        snap2 = volume.snapshotCreate()
        volume.snapshotCreate()

        volume.snapshotBackup(name=snap2["name"])

        _, b = common.find_backup(client, volume_name, snap2["name"])

        res_name = common.generate_volume_name()
        res_volume = client.create_volume(name=res_name, size=size,
                                          numberOfReplicas=2,
                                          fromBackup=b["url"])
        res_volume = common.wait_for_volume_detached(client, res_name)
        res_volume = res_volume.attach(hostId=host_id)
        res_volume = common.wait_for_volume_healthy(client, res_name)
        check_volume_data(res_volume, data)

        snapshots = res_volume.snapshotList()
        # only the backup snapshot + volume-head
        assert len(snapshots) == 2
        backup_snapshot = ""
        for snap in snapshots:
            if snap["name"] != "volume-head":
                backup_snapshot = snap["name"]
        assert backup_snapshot != ""

        res_volume.snapshotCreate()
        snapshots = res_volume.snapshotList()
        assert len(snapshots) == 3

        res_volume.snapshotDelete(name=backup_snapshot)
        res_volume.snapshotPurge()
        snapshots = res_volume.snapshotList()
        assert len(snapshots) == 2

        ha_rebuild_replica_test(client, res_name)

        res_volume = res_volume.detach()
        res_volume = common.wait_for_volume_detached(client, res_name)

        client.delete(res_volume)
        common.wait_for_volume_delete(client, res_name)

    volume = volume.detach()
    volume = common.wait_for_volume_detached(client, volume_name)

    client.delete(volume)
    common.wait_for_volume_delete(client, volume_name)

    volumes = client.list_volume()
    assert len(volumes) == 0
def test_pvc_creation_with_default_sc_set(client, core_api, storage_class,
                                          pod):  # NOQA
    """
    Test creating PVC with default StorageClass set

    The target is to make sure the newly create PV/PVC won't use default
    StorageClass, and if there is no default StorageClass, PV/PVC can still be
    created.

    1. Create a StorageClass and set it to be the default StorageClass
    2. Update static StorageClass to `longhorn-static-test`
    3. Create volume then PV/PVC.
    4. Make sure the newly created PV/PVC using StorageClass
    `longhorn-static-test`
    5. Create pod with PVC.
    6. Verify volume's Kubernetes Status
    7. Remove PVC and Pod.
    8. Verify volume's Kubernetes Status only contains current PV and history
    9. Wait for volume to detach (since pod is deleted)
    10. Reuse the volume on a new pod. Wait for the pod to start
    11. Verify volume's Kubernetes Status reflect the new pod.
    12. Delete PV/PVC/Pod.
    13. Verify volume's Kubernetes Status only contains history
    14. Delete the default StorageClass.
    15. Create PV/PVC for the volume.
    16. Make sure the PV's StorageClass is static StorageClass
    """
    # set default storage class
    storage_class['metadata']['annotations'] = \
        {"storageclass.kubernetes.io/is-default-class": "true"}
    create_storage_class(storage_class)

    static_sc_name = "longhorn-static-test"
    setting = client.by_id_setting(SETTING_DEFAULT_LONGHORN_STATIC_SC)
    setting = client.update(setting, value=static_sc_name)
    assert setting.value == static_sc_name

    volume_name = "test-pvc-creation-with-sc"  # NOQA
    pod_name = "pod-" + volume_name
    client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2)
    volume = wait_for_volume_detached(client, volume_name)

    pv_name = "pv-" + volume_name
    pvc_name = "pvc-" + volume_name
    pvc_name_extra = "pvc-" + volume_name + "-extra"

    create_pv_for_volume(client, core_api, volume, pv_name)
    create_pvc_for_volume(client, core_api, volume, pvc_name)

    ret = core_api.list_namespaced_persistent_volume_claim(namespace='default')
    for item in ret.items:
        if item.metadata.name == pvc_name:
            pvc_found = item
            break
    assert pvc_found
    assert pvc_found.spec.storage_class_name == static_sc_name

    pod['metadata']['name'] = pod_name
    pod['spec']['volumes'] = [{
        'name':
        pod['spec']['containers'][0]['volumeMounts'][0]['name'],
        'persistentVolumeClaim': {
            'claimName': pvc_name,
        },
    }]
    create_and_wait_pod(core_api, pod)

    ks = {
        'pvName':
        pv_name,
        'pvStatus':
        'Bound',
        'namespace':
        'default',
        'pvcName':
        pvc_name,
        'lastPVCRefAt':
        '',
        'lastPodRefAt':
        '',
        'workloadsStatus': [
            {
                'podName': pod_name,
                'podStatus': 'Running',
                'workloadName': '',
                'workloadType': '',
            },
        ],
    }
    wait_volume_kubernetes_status(client, volume_name, ks)

    delete_and_wait_pod(core_api, pod_name)
    delete_and_wait_pvc(core_api, pvc_name)

    ks = {
        'pvName': pv_name,
        'pvStatus': 'Released',
        'namespace': 'default',
        'pvcName': pvc_name,
        'lastPVCRefAt': 'not empty',
        'lastPodRefAt': 'not empty',
    }
    wait_volume_kubernetes_status(client, volume_name, ks)

    # try to reuse the pv
    volume = wait_for_volume_detached(client, volume_name)
    create_pvc_for_volume(client, core_api, volume, pvc_name_extra)
    pod['spec']['volumes'][0]['persistentVolumeClaim']['claimName'] = \
        pvc_name_extra
    create_and_wait_pod(core_api, pod)

    ks = {
        'pvName':
        pv_name,
        'pvStatus':
        'Bound',
        'namespace':
        'default',
        'pvcName':
        pvc_name_extra,
        'lastPVCRefAt':
        '',
        'lastPodRefAt':
        '',
        'workloadsStatus': [
            {
                'podName': pod_name,
                'podStatus': 'Running',
                'workloadName': '',
                'workloadType': '',
            },
        ],
    }
    wait_volume_kubernetes_status(client, volume_name, ks)

    delete_and_wait_pod(core_api, pod_name)
    delete_and_wait_pvc(core_api, pvc_name_extra)
    delete_and_wait_pv(core_api, pv_name)

    ks = {
        'pvName': '',
        'pvStatus': '',
        'namespace': 'default',
        'pvcName': pvc_name_extra,
        'lastPVCRefAt': 'not empty',
        'lastPodRefAt': 'not empty',
    }
    wait_volume_kubernetes_status(client, volume_name, ks)

    # without default storage class
    delete_storage_class(storage_class['metadata']['name'])

    create_pv_for_volume(client, core_api, volume, pv_name)
    create_pvc_for_volume(client, core_api, volume, pvc_name)

    ret = core_api.list_namespaced_persistent_volume_claim(namespace='default')
    for item in ret.items:
        if item.metadata.name == pvc_name:
            pvc2 = item
            break
    assert pvc2
    assert pvc2.spec.storage_class_name == static_sc_name

    delete_and_wait_pvc(core_api, pvc_name)
    delete_and_wait_pv(core_api, pv_name)
def test_backup_kubernetes_status(set_random_backupstore, client, core_api,
                                  pod):  # NOQA
    """
    Test that Backups have KubernetesStatus stored properly when there is an
    associated PersistentVolumeClaim and Pod.

    1. Setup a random backupstore
    2. Set settings Longhorn Static StorageClass to `longhorn-static-test`
    3. Create a volume and PV/PVC. Verify the StorageClass of PVC
    4. Create a Pod using the PVC.
    5. Check volume's Kubernetes status to reflect PV/PVC/Pod correctly.
    6. Create a backup for the volume.
    7. Verify the labels of created backup reflect PV/PVC/Pod status.
    8. Restore the backup to a volume. Wait for restoration to complete.
    9. Check the volume's Kubernetes Status
        1. Make sure the `lastPodRefAt` and `lastPVCRefAt` is snapshot created
    time
    10. Delete the backup and restored volume.
    11. Delete PV/PVC/Pod.
    12. Verify volume's Kubernetes Status updated to reflect history data.
    13. Attach the volume and create another backup. Verify the labels
    14. Verify the volume's Kubernetes status.
    15. Restore the previous backup to a new volume. Wait for restoration.
    16. Verify the restored volume's Kubernetes status.
        1. Make sure `lastPodRefAt` and `lastPVCRefAt` matched volume on step
        12
    """

    host_id = get_self_host_id()
    static_sc_name = "longhorn-static-test"
    setting = client.by_id_setting(SETTING_DEFAULT_LONGHORN_STATIC_SC)
    setting = client.update(setting, value=static_sc_name)
    assert setting.value == static_sc_name

    volume_name = "test-backup-kubernetes-status-pod"  # NOQA
    client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2)
    volume = wait_for_volume_detached(client, volume_name)

    pod_name = "pod-" + volume_name
    pv_name = "pv-" + volume_name
    pvc_name = "pvc-" + volume_name
    create_pv_for_volume(client, core_api, volume, pv_name)
    create_pvc_for_volume(client, core_api, volume, pvc_name)
    ret = core_api.list_namespaced_persistent_volume_claim(namespace='default')
    pvc_found = False
    for item in ret.items:
        if item.metadata.name == pvc_name:
            pvc_found = item
            break
    assert pvc_found
    assert pvc_found.spec.storage_class_name == static_sc_name

    pod['metadata']['name'] = pod_name
    pod['spec']['volumes'] = [{
        'name':
        pod['spec']['containers'][0]['volumeMounts'][0]['name'],
        'persistentVolumeClaim': {
            'claimName': pvc_name,
        },
    }]
    create_and_wait_pod(core_api, pod)

    ks = {
        'lastPodRefAt':
        '',
        'lastPVCRefAt':
        '',
        'namespace':
        'default',
        'pvcName':
        pvc_name,
        'pvName':
        pv_name,
        'pvStatus':
        'Bound',
        'workloadsStatus': [{
            'podName': pod_name,
            'podStatus': 'Running',
            'workloadName': '',
            'workloadType': ''
        }]
    }
    wait_volume_kubernetes_status(client, volume_name, ks)
    volume = wait_for_volume_healthy(client, volume_name)

    # Create Backup manually instead of calling create_backup since Kubernetes
    # is not guaranteed to mount our Volume to the test host.
    snap = create_snapshot(client, volume_name)
    volume.snapshotBackup(name=snap.name)
    wait_for_backup_completion(client, volume_name, snap.name)
    _, b = find_backup(client, volume_name, snap.name)
    # Check backup label
    status = loads(b.labels.get(KUBERNETES_STATUS_LABEL))
    assert status == ks
    # Check backup volume label
    for _ in range(RETRY_COUNTS):
        bv = client.by_id_backupVolume(volume_name)
        if bv is not None and bv.labels is not None:
            break
        time.sleep(RETRY_INTERVAL)
    assert bv is not None and bv.labels is not None
    status = loads(bv.labels.get(KUBERNETES_STATUS_LABEL))
    assert status == ks

    restore_name = generate_volume_name()
    client.create_volume(name=restore_name,
                         size=SIZE,
                         numberOfReplicas=2,
                         fromBackup=b.url)
    wait_for_volume_restoration_completed(client, restore_name)
    wait_for_volume_detached(client, restore_name)

    snapshot_created = b.snapshotCreated
    ks = {
        'lastPodRefAt':
        b.snapshotCreated,
        'lastPVCRefAt':
        b.snapshotCreated,
        'namespace':
        'default',
        'pvcName':
        pvc_name,
        # Restoration should not apply PersistentVolume data.
        'pvName':
        '',
        'pvStatus':
        '',
        'workloadsStatus': [{
            'podName': pod_name,
            'podStatus': 'Running',
            'workloadName': '',
            'workloadType': ''
        }]
    }
    wait_volume_kubernetes_status(client, restore_name, ks)
    restore = client.by_id_volume(restore_name)
    # We need to compare LastPodRefAt and LastPVCRefAt manually since
    # wait_volume_kubernetes_status only checks for empty or non-empty state.
    assert restore.kubernetesStatus.lastPodRefAt == ks["lastPodRefAt"]
    assert restore.kubernetesStatus.lastPVCRefAt == ks["lastPVCRefAt"]

    delete_backup(client, bv.name, b.name)
    client.delete(restore)
    wait_for_volume_delete(client, restore_name)
    delete_and_wait_pod(core_api, pod_name)
    delete_and_wait_pvc(core_api, pvc_name)
    delete_and_wait_pv(core_api, pv_name)

    # With the Pod, PVC, and PV deleted, the Volume should have both Ref
    # fields set. Check that a new Backup and Restore will use this instead of
    # manually populating the Ref fields.
    ks = {
        'lastPodRefAt':
        'NOT NULL',
        'lastPVCRefAt':
        'NOT NULL',
        'namespace':
        'default',
        'pvcName':
        pvc_name,
        'pvName':
        '',
        'pvStatus':
        '',
        'workloadsStatus': [{
            'podName': pod_name,
            'podStatus': 'Running',
            'workloadName': '',
            'workloadType': ''
        }]
    }
    wait_volume_kubernetes_status(client, volume_name, ks)
    volume = wait_for_volume_detached(client, volume_name)

    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)

    snap = create_snapshot(client, volume_name)
    volume.snapshotBackup(name=snap.name)
    volume = wait_for_backup_completion(client, volume_name, snap.name)
    bv, b = find_backup(client, volume_name, snap.name)
    new_b = bv.backupGet(name=b.name)
    status = loads(new_b.labels.get(KUBERNETES_STATUS_LABEL))
    # Check each field manually, we have no idea what the LastPodRefAt or the
    # LastPVCRefAt will be. We just know it shouldn't be SnapshotCreated.
    assert status['lastPodRefAt'] != snapshot_created
    assert status['lastPVCRefAt'] != snapshot_created
    assert status['namespace'] == "default"
    assert status['pvcName'] == pvc_name
    assert status['pvName'] == ""
    assert status['pvStatus'] == ""
    assert status['workloadsStatus'] == [{
        'podName': pod_name,
        'podStatus': 'Running',
        'workloadName': '',
        'workloadType': ''
    }]

    restore_name = generate_volume_name()
    client.create_volume(name=restore_name,
                         size=SIZE,
                         numberOfReplicas=2,
                         fromBackup=b.url)
    wait_for_volume_restoration_completed(client, restore_name)
    wait_for_volume_detached(client, restore_name)

    ks = {
        'lastPodRefAt':
        status['lastPodRefAt'],
        'lastPVCRefAt':
        status['lastPVCRefAt'],
        'namespace':
        'default',
        'pvcName':
        pvc_name,
        'pvName':
        '',
        'pvStatus':
        '',
        'workloadsStatus': [{
            'podName': pod_name,
            'podStatus': 'Running',
            'workloadName': '',
            'workloadType': ''
        }]
    }
    wait_volume_kubernetes_status(client, restore_name, ks)
    restore = client.by_id_volume(restore_name)
    assert restore.kubernetesStatus.lastPodRefAt == ks["lastPodRefAt"]
    assert restore.kubernetesStatus.lastPVCRefAt == ks["lastPVCRefAt"]

    # cleanup
    backupstore_cleanup(client)
    client.delete(restore)
    cleanup_volume(client, volume)
예제 #38
0
def test_replica_scheduler_update_minimal_available(client):  # NOQA
    minimal_available_setting = client.by_id_setting(
        SETTING_STORAGE_MINIMAL_AVAILABLE_PERCENTAGE)
    old_minimal_setting = minimal_available_setting["value"]

    nodes = client.list_node()
    expect_node_disk = {}
    for node in nodes:
        disks = node["disks"]
        for fsid, disk in disks.iteritems():
            if disk["path"] == DEFAULT_DISK_PATH:
                expect_disk = disk
                expect_disk["fsid"] = fsid
                expect_node_disk[node["name"]] = expect_disk

    # set storage minimal available percentage to 100
    # to test all replica couldn't be scheduled
    minimal_available_setting = client.update(minimal_available_setting,
                                              value="100")
    # wait for disks state
    nodes = client.list_node()
    for node in nodes:
        disks = node["disks"]
        for fsid, disk in disks.iteritems():
            wait_for_disk_conditions(client, node["name"],
                                     fsid, DISK_CONDITION_SCHEDULABLE,
                                     CONDITION_STATUS_FALSE)

    lht_hostId = get_self_host_id()
    vol_name = common.generate_volume_name()
    volume = client.create_volume(name=vol_name,
                                  size=SIZE, numberOfReplicas=len(nodes))
    volume = common.wait_for_volume_condition_scheduled(client, vol_name,
                                                        "status",
                                                        CONDITION_STATUS_FALSE)

    # set storage minimal available percentage to default value(10)
    minimal_available_setting = client.update(minimal_available_setting,
                                              value=old_minimal_setting)
    # wait for disks state
    nodes = client.list_node()
    for node in nodes:
        disks = node["disks"]
        for fsid, disk in disks.iteritems():
            wait_for_disk_conditions(client, node["name"],
                                     fsid, DISK_CONDITION_SCHEDULABLE,
                                     CONDITION_STATUS_TRUE)
    # check volume status
    volume = common.wait_for_volume_condition_scheduled(client, vol_name,
                                                        "status",
                                                        CONDITION_STATUS_TRUE)
    volume = common.wait_for_volume_detached(client, vol_name)
    assert volume["state"] == "detached"
    assert volume["created"] != ""

    volume.attach(hostId=lht_hostId)
    volume = common.wait_for_volume_healthy(client, vol_name)
    nodes = client.list_node()
    node_hosts = []
    for node in nodes:
        node_hosts.append(node["name"])
    # check all replica should be scheduled to default disk
    for replica in volume["replicas"]:
        id = replica["hostId"]
        assert id != ""
        assert replica["running"]
        expect_disk = expect_node_disk[id]
        assert replica["diskID"] == expect_disk["fsid"]
        assert expect_disk["path"] in replica["dataPath"]
        node_hosts = filter(lambda x: x != id, node_hosts)
    assert len(node_hosts) == 0

    # clean volume and disk
    cleanup_volume(client, vol_name)