예제 #1
0
def test_hard_anti_affinity_live_rebuild(client, volume_name):  # NOQA
    """
    Test that volumes with Hard Anti-Affinity can build new replicas live once
    a valid node is available.

    If no nodes without existing replicas are available, the volume should
    remain in "Degraded" state. However, once one is available, the replica
    should now be scheduled successfully, with the volume returning to
    "Healthy" state.
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    replica_names = map(lambda replica: replica.name, volume["replicas"])
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica["name"])
    wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    # Allow scheduling on host node again
    client.update(node, allowScheduling=True)
    wait_new_replica_ready(client, volume_name, replica_names)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
예제 #2
0
def delete_replica(client, volume_name):
    volume = client.by_id_volume(volume_name)

    replica_count = len(volume.replicas)

    healthy_replica_count = 0
    for replica in volume.replicas:
        if replica.running is True and replica.mode == "RW":
            healthy_replica_count += 1

    # return if there is no or only one healthy replica left
    if healthy_replica_count == 1:
        print("skipped, only one healthy replica found", end=" ")
        return

    if healthy_replica_count == 0:
        print("skipped, no healthy replicas found", end=" ")
        return

    replica_id = randrange(0, replica_count)

    replica_name = volume["replicas"][replica_id]["name"]

    volume.replicaRemove(name=replica_name)

    wait_for_volume_degraded(client, volume_name)

    global WAIT_REPLICA_REBUILD
    if WAIT_REPLICA_REBUILD is None:
        WAIT_REPLICA_REBUILD = bool(random.getrandbits(1))

    if WAIT_REPLICA_REBUILD is True:
        wait_for_volume_replica_count(client, volume_name, replica_count)
        replica_names = map(lambda replica: replica.name, volume["replicas"])
        wait_new_replica_ready(client, volume_name, replica_names)
예제 #3
0
def test_hard_anti_affinity_detach(client, volume_name):  # NOQA
    """
    Test that volumes with Hard Anti-Affinity are still able to detach and
    reattach to a node properly, even in degraded state.

    1. Create a volume and attach to the current node
    2. Generate and write `data` to the volume.
    3. Set `soft anti-affinity` to false
    4. Disable current node's scheduling.
    5. Remove the replica on the current node
        1. Verify volume will be in degraded state.
        2. Verify volume reports condition `scheduled == false`
    6. Detach the volume.
    7. Verify that volume only have 2 replicas
        1. Unhealthy replica will be removed upon detach.
    8. Attach the volume again.
        1. Verify volume will be in degraded state.
        2. Verify volume reports condition `scheduled == false`
        3. Verify only two of three replicas of volume are healthy.
        4. Verify the remaining replica doesn't have `replica.HostID`, meaning
        it's unscheduled
    9. Check volume `data`
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica.name)
    volume = wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    volume.detach()
    volume = wait_for_volume_detached(client, volume_name)
    assert len(volume.replicas) == 2

    volume.attach(hostId=host_id)
    # Make sure we're still not getting another successful replica.
    volume = wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    assert sum([
        1 for replica in volume.replicas
        if replica.running and replica.mode == "RW"
    ]) == 2
    assert sum([1 for replica in volume.replicas if not replica.hostId]) == 1
    assert len(volume.replicas) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
예제 #4
0
def test_hard_anti_affinity_scheduling(client, volume_name):  # NOQA
    """
    Test that volumes with Hard Anti-Affinity work as expected.

    With Hard Anti-Affinity, scheduling on nodes with existing replicas should
    be forbidden, resulting in "Degraded" state.

    1. Create a volume and attach to the current node
    2. Generate and write `data` to the volume.
    3. Set `soft anti-affinity` to false
    4. Disable current node's scheduling.
    5. Remove the replica on the current node
        1. Verify volume will be in degraded state.
        2. Verify volume reports condition `scheduled == false`
        3. Verify only two of three replicas of volume are healthy.
        4. Verify the remaining replica doesn't have `replica.HostID`, meaning
        it's unscheduled
    6. Check volume `data`
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica.name)
    # Instead of waiting for timeout and lengthening the tests a significant
    # amount we can make sure the scheduling isn't working by making sure the
    # volume becomes Degraded and reports a scheduling error.
    wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    # While there are three replicas that should exist to meet the Volume's
    # request, only two of those volumes should actually be Healthy.
    volume = client.by_id_volume(volume_name)
    assert sum([
        1 for replica in volume.replicas
        if replica.running and replica.mode == "RW"
    ]) == 2
    # Confirm that the final volume is an unscheduled volume.
    assert sum([1 for replica in volume.replicas if not replica.hostId]) == 1
    # Three replicas in total should still exist.
    assert len(volume.replicas) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
예제 #5
0
def test_volume_update_replica_count(clients, volume_name):  # NOQA
    for host_id, client in clients.iteritems():
        break

    replica_count = 3
    volume = create_and_check_volume(client, volume_name, replica_count)

    volume.attach(hostId=host_id)
    volume = common.wait_for_volume_healthy(client, volume_name)

    replica_count = 5
    volume = volume.updateReplicaCount(replicaCount=replica_count)
    volume = common.wait_for_volume_degraded(client, volume_name)
    volume = common.wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == replica_count

    old_replica_count = replica_count
    replica_count = 2
    volume = volume.updateReplicaCount(replicaCount=replica_count)
    volume = common.wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == old_replica_count

    volume.replicaRemove(name=volume["replicas"][0]["name"])
    volume.replicaRemove(name=volume["replicas"][1]["name"])
    volume.replicaRemove(name=volume["replicas"][2]["name"])

    volume = common.wait_for_volume_replica_count(client, volume_name,
                                                  replica_count)
    volume = common.wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == replica_count

    client.delete(volume)
    wait_for_volume_delete(client, volume_name)
예제 #6
0
def test_hard_anti_affinity_offline_rebuild(client, volume_name):  # NOQA
    """
    Test that volumes with Hard Anti-Affinity can build new replicas during
    the attaching process once a valid node is available.

    Once a new replica has been built as part of the attaching process, the
    volume should be Healthy again.
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    replica_names = map(lambda replica: replica.name, volume["replicas"])
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica["name"])
    volume = wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    volume.detach()
    volume = wait_for_volume_detached(client, volume_name)
    client.update(node, allowScheduling=True)
    volume.attach(hostId=host_id)
    wait_new_replica_ready(client, volume_name, replica_names)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
예제 #7
0
def test_hard_anti_affinity_live_rebuild(client, volume_name):  # NOQA
    """
    Test that volumes with Hard Anti-Affinity can build new replicas live once
    a valid node is available.

    If no nodes without existing replicas are available, the volume should
    remain in "Degraded" state. However, once one is available, the replica
    should now be scheduled successfully, with the volume returning to
    "Healthy" state.

    1. Create a volume and attach to the current node
    2. Generate and write `data` to the volume.
    3. Set `soft anti-affinity` to false
    4. Disable current node's scheduling.
    5. Remove the replica on the current node
        1. Verify volume will be in degraded state.
        2. Verify volume reports condition `scheduled == false`
    6. Enable the current node's scheduling
    7. Wait for volume to start rebuilding and become healthy again
    8. Check volume `data`
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    replica_names = map(lambda replica: replica.name, volume.replicas)
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica.name)
    wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    # Allow scheduling on host node again
    client.update(node, allowScheduling=True)
    wait_new_replica_ready(client, volume_name, replica_names)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
예제 #8
0
def test_hard_anti_affinity_detach(client, volume_name):  # NOQA
    """
    Test that volumes with Hard Anti-Affinity are still able to detach and
    reattach to a node properly, even in degraded state.
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica["name"])
    volume = wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    volume.detach()
    volume = wait_for_volume_detached(client, volume_name)
    assert len(volume["replicas"]) == 2

    volume.attach(hostId=host_id)
    # Make sure we're still not getting another successful replica.
    volume = wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    assert sum([
        1 for replica in volume["replicas"]
        if replica["running"] and replica["mode"] == "RW"
    ]) == 2
    assert sum([1 for replica in volume["replicas"]
                if not replica["hostId"]]) == 1
    assert len(volume["replicas"]) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
예제 #9
0
def test_hard_anti_affinity_offline_rebuild(client, volume_name):  # NOQA
    """
    Test that volumes with Hard Anti-Affinity can build new replicas during
    the attaching process once a valid node is available.

    Once a new replica has been built as part of the attaching process, the
    volume should be Healthy again.

    1. Create a volume and attach to the current node
    2. Generate and write `data` to the volume.
    3. Set `soft anti-affinity` to false
    4. Disable current node's scheduling.
    5. Remove the replica on the current node
        1. Verify volume will be in degraded state.
        2. Verify volume reports condition `scheduled == false`
    6. Detach the volume.
    7. Enable current node's scheduling.
    8. Attach the volume again.
    9. Wait for volume to become healthy with 3 replicas
    10. Check volume `data`
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    replica_names = map(lambda replica: replica.name, volume.replicas)
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica.name)
    volume = wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    volume.detach()
    volume = wait_for_volume_detached(client, volume_name)
    client.update(node, allowScheduling=True)
    volume.attach(hostId=host_id)
    wait_new_replica_ready(client, volume_name, replica_names)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
예제 #10
0
def test_hard_anti_affinity_scheduling(client, volume_name):  # NOQA
    """
    Test that volumes with Hard Anti-Affinity work as expected.

    With Hard Anti-Affinity, scheduling on nodes with existing replicas should
    be forbidden, resulting in "Degraded" state.
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica["name"])
    # Instead of waiting for timeout and lengthening the tests a significant
    # amount we can make sure the scheduling isn't working by making sure the
    # volume becomes Degraded and reports a scheduling error.
    volume = wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    # While there are three replicas that should exist to meet the Volume's
    # request, only two of those volumes should actually be Healthy.
    assert sum([
        1 for replica in volume["replicas"]
        if replica["running"] and replica["mode"] == "RW"
    ]) == 2
    # Confirm that the final volume is an unscheduled volume.
    assert sum([1 for replica in volume["replicas"]
                if not replica["hostId"]]) == 1
    # Three replicas in total should still exist.
    assert len(volume["replicas"]) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
예제 #11
0
def test_zone_tags(client, core_api, volume_name, k8s_node_zone_tags):  # NOQA
    """
    Test anti affinity zone feature

    1. Add Kubernetes zone labels to the nodes
        1. Only two zones now: zone1 and zone2
    2. Create a volume with two replicas
    3. Verify zone1 and zone2 either has one replica.
    4. Remove a random replica and wait for volume to finish rebuild
    5. Verify zone1 and zone2 either has one replica.
    6. Repeat step 4-5 a few times.
    7. Update volume to 3 replicas, make sure they're scheduled on 3 nodes
    8. Remove a random replica and wait for volume to finish rebuild
    9. Make sure replicas are on different nodes
    10. Repeat step 8-9 a few times
    """

    wait_longhorn_node_zone_updated(client)

    volume = create_and_check_volume(client, volume_name, num_of_replicas=2)

    host_id = get_self_host_id()

    volume.attach(hostId=host_id)

    volume = wait_for_volume_healthy(client, volume_name)

    volume = client.by_id_volume(volume_name)

    zone1_replica_count = get_zone_replica_count(client, volume_name, ZONE1)
    zone2_replica_count = get_zone_replica_count(client, volume_name, ZONE2)

    assert zone1_replica_count == zone2_replica_count

    for i in range(randrange(3, 5)):
        volume = client.by_id_volume(volume_name)

        replica_count = len(volume.replicas)
        assert replica_count == 2

        replica_id = randrange(0, replica_count)

        replica_name = volume.replicas[replica_id].name

        volume.replicaRemove(name=replica_name)

        wait_for_volume_degraded(client, volume_name)

        wait_for_volume_healthy(client, volume_name)

        wait_for_volume_replica_count(client, volume_name, replica_count)

        volume = client.by_id_volume(volume_name)

        replica_names = map(lambda replica: replica.name, volume["replicas"])

        wait_new_replica_ready(client, volume_name, replica_names)

        zone1_replica_count = \
            get_zone_replica_count(client, volume_name, ZONE1)
        zone2_replica_count = \
            get_zone_replica_count(client, volume_name, ZONE2)

        assert zone1_replica_count == zone2_replica_count

    volume.updateReplicaCount(replicaCount=3)

    wait_for_volume_degraded(client, volume_name)

    wait_for_volume_replica_count(client, volume_name, 3)

    wait_for_volume_healthy(client, volume_name)

    volume = client.by_id_volume(volume_name)

    lh_node_names = list(map(lambda node: node.name, client.list_node()))

    for replica in volume.replicas:
        lh_node_names.remove(replica.hostId)

    assert lh_node_names == []

    for i in range(randrange(3, 5)):
        volume = client.by_id_volume(volume_name)

        replica_count = len(volume.replicas)
        assert replica_count == 3

        replica_id = randrange(0, replica_count)

        replica_name = volume.replicas[replica_id].name

        volume.replicaRemove(name=replica_name)

        wait_for_volume_degraded(client, volume_name)

        wait_for_volume_healthy(client, volume_name)

        wait_for_volume_replica_count(client, volume_name, replica_count)

        volume = client.by_id_volume(volume_name)

        lh_node_names = list(map(lambda node: node.name, client.list_node()))

        for replica in volume.replicas:
            lh_node_names.remove(replica.hostId)

        assert lh_node_names == []
예제 #12
0
def test_data_locality_basic(client, core_api, volume_name, pod,
                             settings_reset):  # NOQA
    """
    Test data locality basic feature

    Context:

    Data Locality feature allows users to have an option to keep a local
    replica on the same node as the consuming pod.
    Longhorn is currently supporting 2 modes:
    - disabled: Longhorn does not try to keep a local replica
    - best-effort: Longhorn try to keep a local replica

    See manual tests at:
    https://github.com/longhorn/longhorn/issues/1045#issuecomment-680706283

    Steps:

    Case 1: Test that Longhorn builds a local replica on the engine node

    1. Create a volume(1) with 1 replica and dataLocality set to disabled
    2. Find node where the replica is located on.
       Let's call the node is replica-node
    3. Attach the volume to a node different than replica-node.
       Let call the node is engine-node
    4. Write 200MB data to volume(1)
    5. Use a retry loop to verify that Longhorn does not create
       a replica on the engine-node
    6. Update dataLocality to best-effort for volume(1)
    7. Use a retry loop to verify that Longhorn creates and rebuilds
       a replica on the engine-node and remove the other replica
    8. detach the volume(1) and attach it to a different node.
       Let's call the new node is new-engine-node and the old
       node is old-engine-node
    9. Wait for volume(1) to finish attaching
    10. Use a retry loop to verify that Longhorn creates and rebuilds
       a replica on the new-engine-node and remove the replica on
       old-engine-node

    Case 2: Test that Longhorn prioritizes deleting replicas on the same node

    1. Add the tag AVAIL to node-1 and node-2
    2. Set node soft anti-affinity to `true`.
    3. Create a volume(2) with 3 replicas and dataLocality set to best-effort
    4. Use a retry loop to verify that all 3 replicas are on node-1 and
        node-2, no replica is on node-3
    5. Attach volume(2) to node-3
    6. User a retry loop to verify that there is no replica on node-3 and
        we can still read/write to volume(2)
    7. Find the node which contains 2 replicas.
        Let call the node is most-replica-node
    8. Set the replica count to 2 for volume(2)
    9. Verify that Longhorn remove one replica from most-replica-node

    Case 3: Test that the volume is not corrupted if there is an unexpected
    detachment during building local replica

    1. Remove the tag AVAIL from node-1 and node-2
       Set node soft anti-affinity to `false`.
    2. Create a volume(3) with 1 replicas and dataLocality set to best-effort
    3. Attach volume(3) to node-3.
    4. Use a retry loop to verify that volume(3) has only 1 replica on node-3
    5. Write 800MB data to volume(3)
    6. Detach volume(3)
    7. Attach volume(3) to node-1
    8. Use a retry loop to:
        Wait until volume(3) finishes attaching.
        Wait until Longhorn start rebuilding a replica on node-1
        Immediately detach volume(3)
    9. Verify that the replica on node-1 is in ERR state.
    10. Attach volume(3) to node-1
    11. Wait until volume(3) finishes attaching.
    12. Use a retry loop to verify the Longhorn cleanup the ERR replica,
        rebuild a new replica on node-1, and remove the replica on node-3

    Case 4: Make sure failed to schedule local replica doesn't block the
    the creation of other replicas.

    1. Disable scheduling for node-3
    2. Create a vol with 1 replica, `dataLocality = best-effort`.
        The replica is scheduled on a node (say node-1)
    3. Attach vol to node-3. There is a fail-to-schedule
        replica with Spec.HardNodeAffinity=node-3
    4. Increase numberOfReplica to 3. Verify that the replica set contains:
        one on node-1, one on node-2,  one failed replica
        with Spec.HardNodeAffinity=node-3.
    5. Decrease numberOfReplica to 2. Verify that the replica set contains:
        one on node-1, one on node-2,  one failed replica
        with Spec.HardNodeAffinity=node-3.
    6. Decrease numberOfReplica to 1. Verify that the replica set contains:
        one on node-1 or node-2,  one failed replica
        with Spec.HardNodeAffinity=node-3.
    7. Decrease numberOfReplica to 2. Verify that the replica set contains:
        one on node-1, one on node-2, one failed replica
        with Spec.HardNodeAffinity=node-3.
    8. Turn off data locality by set `dataLocality=disabled` for the vol.
        Verify that the replica set contains: one on node-1, one on node-2

    9. clean up
    """

    # Case 1: Test that Longhorn builds a local replica on the engine node

    nodes = client.list_node()

    default_data_locality_setting = \
        client.by_id_setting(SETTING_DEFAULT_DATA_LOCALITY)
    try:
        client.update(default_data_locality_setting, value="disabled")
    except Exception as e:
        print("Exception when update Default Data Locality setting",
              default_data_locality_setting, e)

    volume1_name = volume_name + "-1"
    volume1_size = str(500 * Mi)
    volume1_data_path = "/data/test"
    pv1_name = volume1_name + "-pv"
    pvc1_name = volume1_name + "-pvc"
    pod1_name = volume1_name + "-pod"
    pod1 = pod

    pod1['metadata']['name'] = pod1_name

    volume1 = create_and_check_volume(client,
                                      volume1_name,
                                      num_of_replicas=1,
                                      size=volume1_size)

    volume1 = client.by_id_volume(volume1_name)
    create_pv_for_volume(client, core_api, volume1, pv1_name)
    create_pvc_for_volume(client, core_api, volume1, pvc1_name)

    volume1 = client.by_id_volume(volume1_name)
    volume1_replica_node = volume1.replicas[0]['hostId']

    volume1_attached_node = None
    for node in nodes:
        if node.name != volume1_replica_node:
            volume1_attached_node = node.name
            break

    assert volume1_attached_node is not None

    pod1['spec']['volumes'] = [{
        "name": "pod-data",
        "persistentVolumeClaim": {
            "claimName": pvc1_name
        }
    }]

    pod1['spec']['nodeSelector'] = \
        {"kubernetes.io/hostname": volume1_attached_node}
    create_and_wait_pod(core_api, pod1)

    write_pod_volume_random_data(core_api, pod1_name, volume1_data_path,
                                 DATA_SIZE_IN_MB_2)

    for i in range(10):
        volume1 = client.by_id_volume(volume1_name)
        assert len(volume1.replicas) == 1
        assert volume1.replicas[0]['hostId'] != volume1_attached_node
        time.sleep(1)

    volume1 = client.by_id_volume(volume1_name)
    volume1.updateDataLocality(dataLocality="best-effort")

    for _ in range(RETRY_COUNTS):
        volume1 = client.by_id_volume(volume1_name)
        assert volume1[VOLUME_FIELD_ROBUSTNESS] == VOLUME_ROBUSTNESS_HEALTHY
        if len(volume1.replicas) == 1 and \
                volume1.replicas[0]['hostId'] == volume1_attached_node:
            break
        time.sleep(RETRY_INTERVAL)
    assert len(volume1.replicas) == 1
    assert volume1.replicas[0]['hostId'] == volume1_attached_node

    delete_and_wait_pod(core_api, pod1_name)
    volume1 = wait_for_volume_detached(client, volume1_name)

    volume1_replica_node = volume1.replicas[0]['hostId']

    volume1_attached_node = None
    for node in nodes:
        if node.name != volume1_replica_node:
            volume1_attached_node = node.name
            break

    assert volume1_attached_node is not None

    pod1['spec']['nodeSelector'] = \
        {"kubernetes.io/hostname": volume1_attached_node}
    create_and_wait_pod(core_api, pod1)
    for _ in range(RETRY_COUNTS):
        volume1 = client.by_id_volume(volume1_name)
        assert volume1[VOLUME_FIELD_ROBUSTNESS] == VOLUME_ROBUSTNESS_HEALTHY
        if len(volume1.replicas) == 1 and \
                volume1.replicas[0]['hostId'] == volume1_attached_node:
            break
        time.sleep(RETRY_INTERVAL)
    assert len(volume1.replicas) == 1
    assert volume1.replicas[0]['hostId'] == volume1_attached_node
    delete_and_wait_pod(core_api, pod1_name)
    wait_for_volume_detached(client, volume1_name)

    # Case 2: Test that Longhorn prioritizes deleting replicas on the same node

    node1 = nodes[0]
    node2 = nodes[1]
    node3 = nodes[2]

    client.update(node1, allowScheduling=True, tags=["AVAIL"])
    client.update(node2, allowScheduling=True, tags=["AVAIL"])

    replica_node_soft_anti_affinity_setting = \
        client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    try:
        client.update(replica_node_soft_anti_affinity_setting, value="true")
    except Exception as e:
        print(
            "Exception when update "
            "Replica Node Level Soft Anti-Affinity setting",
            replica_node_soft_anti_affinity_setting, e)

    volume2_name = volume_name + "-2"
    volume2_size = str(500 * Mi)
    pv2_name = volume2_name + "-pv"
    pvc2_name = volume2_name + "-pvc"
    pod2_name = volume2_name + "-pod"
    pod2 = pod

    pod2['metadata']['name'] = pod2_name

    volume2 = client.create_volume(name=volume2_name,
                                   size=volume2_size,
                                   numberOfReplicas=3,
                                   nodeSelector=["AVAIL"],
                                   dataLocality="best-effort")

    volume2 = wait_for_volume_detached(client, volume2_name)
    volume2 = client.by_id_volume(volume2_name)
    create_pv_for_volume(client, core_api, volume2, pv2_name)
    create_pvc_for_volume(client, core_api, volume2, pvc2_name)

    volume2 = client.by_id_volume(volume2_name)

    pod2['spec']['volumes'] = [{
        "name": "pod-data",
        "persistentVolumeClaim": {
            "claimName": pvc2_name
        }
    }]

    pod2['spec']['nodeSelector'] = {"kubernetes.io/hostname": node3.name}
    create_and_wait_pod(core_api, pod2)

    volume2 = wait_for_volume_healthy(client, volume2_name)

    for replica in volume2.replicas:
        assert replica["hostId"] != node3.name

    volume2.updateReplicaCount(replicaCount=2)

    # 2 Healthy replicas and 1 replica failed to schedule
    # The failed to schedule replica is the local replica on node3
    volume2 = wait_for_volume_replica_count(client, volume2_name, 3)
    volume2 = client.by_id_volume(volume2_name)

    volume2_healthy_replicas = []
    for replica in volume2.replicas:
        if replica.running is True:
            volume2_healthy_replicas.append(replica)

    assert len(volume2_healthy_replicas) == 2

    volume2_rep1 = volume2_healthy_replicas[0]
    volume2_rep2 = volume2_healthy_replicas[1]
    assert volume2_rep1["hostId"] != volume2_rep2["hostId"]
    delete_and_wait_pod(core_api, pod2_name)
    wait_for_volume_detached(client, volume2_name)

    # Case 3: Test that the volume is not corrupted if there is an unexpected
    # detachment during building local replica

    client.update(node1, allowScheduling=True, tags=[])
    client.update(node2, allowScheduling=True, tags=[])

    replica_node_soft_anti_affinity_setting = \
        client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    try:
        client.update(replica_node_soft_anti_affinity_setting, value="false")
    except Exception as e:
        print(
            "Exception when update "
            "Replica Node Level Soft Anti-Affinity setting",
            replica_node_soft_anti_affinity_setting, e)

    volume3_name = volume_name + "-3"
    volume3_size = str(1 * Gi)
    volume3_data_path = "/data/test"
    pv3_name = volume3_name + "-pv"
    pvc3_name = volume3_name + "-pvc"
    pod3_name = volume3_name + "-pod"
    pod3 = pod

    pod3['metadata']['name'] = pod3_name

    volume3 = client.create_volume(name=volume3_name,
                                   size=volume3_size,
                                   numberOfReplicas=1)

    volume3 = wait_for_volume_detached(client, volume3_name)
    volume3 = client.by_id_volume(volume3_name)
    create_pv_for_volume(client, core_api, volume3, pv3_name)
    create_pvc_for_volume(client, core_api, volume3, pvc3_name)

    volume3 = client.by_id_volume(volume3_name)

    pod3['spec']['volumes'] = [{
        "name": "pod-data",
        "persistentVolumeClaim": {
            "claimName": pvc3_name
        }
    }]

    pod3['spec']['nodeSelector'] = {"kubernetes.io/hostname": node3.name}
    create_and_wait_pod(core_api, pod3)
    volume3 = wait_for_volume_healthy(client, volume3_name)

    write_pod_volume_random_data(core_api, pod3_name, volume3_data_path,
                                 DATA_SIZE_IN_MB_4)

    volume3.updateDataLocality(dataLocality="best-effort")
    volume3 = client.by_id_volume(volume3_name)

    if volume3.replicas[0]['hostId'] != node3.name:
        wait_for_rebuild_start(client, volume3_name)
        volume3 = client.by_id_volume(volume3_name)
        assert len(volume3.replicas) == 2
        wait_for_rebuild_complete(client, volume3_name)

    volume3 = wait_for_volume_replica_count(client, volume3_name, 1)
    assert volume3.replicas[0]["hostId"] == node3.name

    delete_and_wait_pod(core_api, pod3_name)

    pod3['spec']['nodeSelector'] = {"kubernetes.io/hostname": node1.name}
    create_and_wait_pod(core_api, pod3)

    wait_for_rebuild_start(client, volume3_name)
    crash_engine_process_with_sigkill(client, core_api, volume3_name)
    delete_and_wait_pod(core_api, pod3_name)
    wait_for_volume_detached(client, volume3_name)
    volume3 = client.by_id_volume(volume3_name)
    assert len(volume3.replicas) == 1
    assert volume3.replicas[0]["hostId"] == node3.name

    create_and_wait_pod(core_api, pod3)
    wait_for_rebuild_start(client, volume3_name)
    volume3 = client.by_id_volume(volume3_name)
    assert len(volume3.replicas) == 2
    wait_for_rebuild_complete(client, volume3_name)

    # Wait for deletion of extra replica
    volume3 = wait_for_volume_replica_count(client, volume3_name, 1)
    assert volume3.replicas[0]["hostId"] == node1.name
    assert volume3.replicas[0]["mode"] == "RW"
    assert volume3.replicas[0]["running"] is True

    delete_and_wait_pod(core_api, pod3_name)
    wait_for_volume_detached(client, volume3_name)

    # Case 4: Make sure failed to schedule local replica doesn't block the
    # the creation of other replicas.

    replica_node_soft_anti_affinity_setting = \
        client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    try:
        client.update(replica_node_soft_anti_affinity_setting, value="false")
    except Exception as e:
        print(
            "Exception when update "
            "Replica Node Level Soft Anti-Affinity setting",
            replica_node_soft_anti_affinity_setting, e)

    client.update(node3, allowScheduling=False)

    volume4_name = volume_name + "-4"
    volume4_size = str(1 * Gi)

    volume4 = client.create_volume(name=volume4_name,
                                   size=volume4_size,
                                   numberOfReplicas=1,
                                   dataLocality="best-effort")

    volume4 = wait_for_volume_detached(client, volume4_name)
    volume4 = client.by_id_volume(volume4_name)

    volume4_replica_name = volume4.replicas[0]["name"]

    volume4.attach(hostId=node3.name)

    wait_for_volume_healthy(client, volume4_name)

    volume4 = client.by_id_volume(volume4_name)
    assert len(volume4.replicas) == 2

    for replica in volume4.replicas:
        if replica["name"] == volume4_replica_name:
            assert replica["running"] is True
            assert replica["mode"] == "RW"
        else:
            assert replica["running"] is False
            assert replica["mode"] == ""

    assert volume4.conditions.scheduled.reason == \
        "LocalReplicaSchedulingFailure"

    volume4 = volume4.updateReplicaCount(replicaCount=3)

    volume4 = wait_for_volume_degraded(client, volume4_name)

    v4_node1_replica_count = 0
    v4_node2_replica_count = 0
    v4_failed_replica_count = 0

    for replica in volume4.replicas:
        if replica["hostId"] == node1.name:
            v4_node1_replica_count += 1
        elif replica["hostId"] == node2.name:
            v4_node2_replica_count += 1
        elif replica["hostId"] == "":
            v4_failed_replica_count += 1

    assert v4_node1_replica_count == 1
    assert v4_node2_replica_count == 1
    assert v4_failed_replica_count > 0

    volume4 = volume4.updateReplicaCount(replicaCount=2)

    volume4 = wait_for_volume_replica_count(client, volume4_name, 3)

    v4_node1_replica_count = 0
    v4_node2_replica_count = 0
    v4_failed_replica_count = 0

    for replica in volume4.replicas:
        if replica["hostId"] == node1.name:
            v4_node1_replica_count += 1
        elif replica["hostId"] == node2.name:
            v4_node2_replica_count += 1
        elif replica["hostId"] == "":
            v4_failed_replica_count += 1

    assert v4_node1_replica_count == 1
    assert v4_node2_replica_count == 1
    assert v4_failed_replica_count > 0

    volume4 = volume4.updateReplicaCount(replicaCount=1)

    volume4 = wait_for_volume_replica_count(client, volume4_name, 2)

    v4_node1_replica_count = 0
    v4_node2_replica_count = 0
    v4_failed_replica_count = 0

    for replica in volume4.replicas:
        if replica["hostId"] == node1.name:
            v4_node1_replica_count += 1
        elif replica["hostId"] == node2.name:
            v4_node2_replica_count += 1
        elif replica["hostId"] == "":
            v4_failed_replica_count += 1

    assert v4_node1_replica_count + v4_node2_replica_count == 1
    assert v4_failed_replica_count == 1

    volume4 = volume4.updateDataLocality(dataLocality="disabled")
    volume4 = volume4.updateReplicaCount(replicaCount=2)

    running_replica_count = 0
    for _ in range(RETRY_COUNTS):
        volume4 = client.by_id_volume(volume4_name)
        running_replica_count = 0
        for r in volume4.replicas:
            if r.failedAt == "" and r.running is True:
                running_replica_count += 1
        if running_replica_count == 2:
            break
        time.sleep(RETRY_INTERVAL)
    assert running_replica_count == 2

    v4_node1_replica_count = 0
    v4_node2_replica_count = 0
    v4_node3_replica_count = 0

    for replica in volume4.replicas:
        wait_for_replica_running(client, volume4_name, replica["name"])
        if replica["hostId"] == node1.name:
            v4_node1_replica_count += 1
        elif replica["hostId"] == node2.name:
            v4_node2_replica_count += 1
        elif replica["hostId"] == node3.name:
            v4_node3_replica_count += 1
    assert v4_node1_replica_count == 1
    assert v4_node2_replica_count == 1
    assert v4_node3_replica_count == 0