def test_hard_anti_affinity_live_rebuild(client, volume_name): # NOQA """ Test that volumes with Hard Anti-Affinity can build new replicas live once a valid node is available. If no nodes without existing replicas are available, the volume should remain in "Degraded" state. However, once one is available, the replica should now be scheduled successfully, with the volume returning to "Healthy" state. """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY) client.update(setting, value="false") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) replica_names = map(lambda replica: replica.name, volume["replicas"]) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica["name"]) wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) # Allow scheduling on host node again client.update(node, allowScheduling=True) wait_new_replica_ready(client, volume_name, replica_names) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def delete_replica(client, volume_name): volume = client.by_id_volume(volume_name) replica_count = len(volume.replicas) healthy_replica_count = 0 for replica in volume.replicas: if replica.running is True and replica.mode == "RW": healthy_replica_count += 1 # return if there is no or only one healthy replica left if healthy_replica_count == 1: print("skipped, only one healthy replica found", end=" ") return if healthy_replica_count == 0: print("skipped, no healthy replicas found", end=" ") return replica_id = randrange(0, replica_count) replica_name = volume["replicas"][replica_id]["name"] volume.replicaRemove(name=replica_name) wait_for_volume_degraded(client, volume_name) global WAIT_REPLICA_REBUILD if WAIT_REPLICA_REBUILD is None: WAIT_REPLICA_REBUILD = bool(random.getrandbits(1)) if WAIT_REPLICA_REBUILD is True: wait_for_volume_replica_count(client, volume_name, replica_count) replica_names = map(lambda replica: replica.name, volume["replicas"]) wait_new_replica_ready(client, volume_name, replica_names)
def test_hard_anti_affinity_detach(client, volume_name): # NOQA """ Test that volumes with Hard Anti-Affinity are still able to detach and reattach to a node properly, even in degraded state. 1. Create a volume and attach to the current node 2. Generate and write `data` to the volume. 3. Set `soft anti-affinity` to false 4. Disable current node's scheduling. 5. Remove the replica on the current node 1. Verify volume will be in degraded state. 2. Verify volume reports condition `scheduled == false` 6. Detach the volume. 7. Verify that volume only have 2 replicas 1. Unhealthy replica will be removed upon detach. 8. Attach the volume again. 1. Verify volume will be in degraded state. 2. Verify volume reports condition `scheduled == false` 3. Verify only two of three replicas of volume are healthy. 4. Verify the remaining replica doesn't have `replica.HostID`, meaning it's unscheduled 9. Check volume `data` """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) client.update(setting, value="false") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica.name) volume = wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) volume.detach() volume = wait_for_volume_detached(client, volume_name) assert len(volume.replicas) == 2 volume.attach(hostId=host_id) # Make sure we're still not getting another successful replica. volume = wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) assert sum([ 1 for replica in volume.replicas if replica.running and replica.mode == "RW" ]) == 2 assert sum([1 for replica in volume.replicas if not replica.hostId]) == 1 assert len(volume.replicas) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def test_hard_anti_affinity_scheduling(client, volume_name): # NOQA """ Test that volumes with Hard Anti-Affinity work as expected. With Hard Anti-Affinity, scheduling on nodes with existing replicas should be forbidden, resulting in "Degraded" state. 1. Create a volume and attach to the current node 2. Generate and write `data` to the volume. 3. Set `soft anti-affinity` to false 4. Disable current node's scheduling. 5. Remove the replica on the current node 1. Verify volume will be in degraded state. 2. Verify volume reports condition `scheduled == false` 3. Verify only two of three replicas of volume are healthy. 4. Verify the remaining replica doesn't have `replica.HostID`, meaning it's unscheduled 6. Check volume `data` """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) client.update(setting, value="false") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica.name) # Instead of waiting for timeout and lengthening the tests a significant # amount we can make sure the scheduling isn't working by making sure the # volume becomes Degraded and reports a scheduling error. wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) # While there are three replicas that should exist to meet the Volume's # request, only two of those volumes should actually be Healthy. volume = client.by_id_volume(volume_name) assert sum([ 1 for replica in volume.replicas if replica.running and replica.mode == "RW" ]) == 2 # Confirm that the final volume is an unscheduled volume. assert sum([1 for replica in volume.replicas if not replica.hostId]) == 1 # Three replicas in total should still exist. assert len(volume.replicas) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def test_volume_update_replica_count(clients, volume_name): # NOQA for host_id, client in clients.iteritems(): break replica_count = 3 volume = create_and_check_volume(client, volume_name, replica_count) volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) replica_count = 5 volume = volume.updateReplicaCount(replicaCount=replica_count) volume = common.wait_for_volume_degraded(client, volume_name) volume = common.wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == replica_count old_replica_count = replica_count replica_count = 2 volume = volume.updateReplicaCount(replicaCount=replica_count) volume = common.wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == old_replica_count volume.replicaRemove(name=volume["replicas"][0]["name"]) volume.replicaRemove(name=volume["replicas"][1]["name"]) volume.replicaRemove(name=volume["replicas"][2]["name"]) volume = common.wait_for_volume_replica_count(client, volume_name, replica_count) volume = common.wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == replica_count client.delete(volume) wait_for_volume_delete(client, volume_name)
def test_hard_anti_affinity_offline_rebuild(client, volume_name): # NOQA """ Test that volumes with Hard Anti-Affinity can build new replicas during the attaching process once a valid node is available. Once a new replica has been built as part of the attaching process, the volume should be Healthy again. """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY) client.update(setting, value="false") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) replica_names = map(lambda replica: replica.name, volume["replicas"]) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica["name"]) volume = wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) volume.detach() volume = wait_for_volume_detached(client, volume_name) client.update(node, allowScheduling=True) volume.attach(hostId=host_id) wait_new_replica_ready(client, volume_name, replica_names) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def test_hard_anti_affinity_live_rebuild(client, volume_name): # NOQA """ Test that volumes with Hard Anti-Affinity can build new replicas live once a valid node is available. If no nodes without existing replicas are available, the volume should remain in "Degraded" state. However, once one is available, the replica should now be scheduled successfully, with the volume returning to "Healthy" state. 1. Create a volume and attach to the current node 2. Generate and write `data` to the volume. 3. Set `soft anti-affinity` to false 4. Disable current node's scheduling. 5. Remove the replica on the current node 1. Verify volume will be in degraded state. 2. Verify volume reports condition `scheduled == false` 6. Enable the current node's scheduling 7. Wait for volume to start rebuilding and become healthy again 8. Check volume `data` """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) client.update(setting, value="false") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) replica_names = map(lambda replica: replica.name, volume.replicas) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica.name) wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) # Allow scheduling on host node again client.update(node, allowScheduling=True) wait_new_replica_ready(client, volume_name, replica_names) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def test_hard_anti_affinity_detach(client, volume_name): # NOQA """ Test that volumes with Hard Anti-Affinity are still able to detach and reattach to a node properly, even in degraded state. """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY) client.update(setting, value="false") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica["name"]) volume = wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) volume.detach() volume = wait_for_volume_detached(client, volume_name) assert len(volume["replicas"]) == 2 volume.attach(hostId=host_id) # Make sure we're still not getting another successful replica. volume = wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) assert sum([ 1 for replica in volume["replicas"] if replica["running"] and replica["mode"] == "RW" ]) == 2 assert sum([1 for replica in volume["replicas"] if not replica["hostId"]]) == 1 assert len(volume["replicas"]) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def test_hard_anti_affinity_offline_rebuild(client, volume_name): # NOQA """ Test that volumes with Hard Anti-Affinity can build new replicas during the attaching process once a valid node is available. Once a new replica has been built as part of the attaching process, the volume should be Healthy again. 1. Create a volume and attach to the current node 2. Generate and write `data` to the volume. 3. Set `soft anti-affinity` to false 4. Disable current node's scheduling. 5. Remove the replica on the current node 1. Verify volume will be in degraded state. 2. Verify volume reports condition `scheduled == false` 6. Detach the volume. 7. Enable current node's scheduling. 8. Attach the volume again. 9. Wait for volume to become healthy with 3 replicas 10. Check volume `data` """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) client.update(setting, value="false") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) replica_names = map(lambda replica: replica.name, volume.replicas) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica.name) volume = wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) volume.detach() volume = wait_for_volume_detached(client, volume_name) client.update(node, allowScheduling=True) volume.attach(hostId=host_id) wait_new_replica_ready(client, volume_name, replica_names) volume = wait_for_volume_healthy(client, volume_name) assert len(volume.replicas) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def test_hard_anti_affinity_scheduling(client, volume_name): # NOQA """ Test that volumes with Hard Anti-Affinity work as expected. With Hard Anti-Affinity, scheduling on nodes with existing replicas should be forbidden, resulting in "Degraded" state. """ volume = create_and_check_volume(client, volume_name) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 data = write_volume_random_data(volume) setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY) client.update(setting, value="false") node = client.by_id_node(host_id) client.update(node, allowScheduling=False) host_replica = get_host_replica(volume, host_id) volume.replicaRemove(name=host_replica["name"]) # Instead of waiting for timeout and lengthening the tests a significant # amount we can make sure the scheduling isn't working by making sure the # volume becomes Degraded and reports a scheduling error. volume = wait_for_volume_degraded(client, volume_name) wait_scheduling_failure(client, volume_name) # While there are three replicas that should exist to meet the Volume's # request, only two of those volumes should actually be Healthy. assert sum([ 1 for replica in volume["replicas"] if replica["running"] and replica["mode"] == "RW" ]) == 2 # Confirm that the final volume is an unscheduled volume. assert sum([1 for replica in volume["replicas"] if not replica["hostId"]]) == 1 # Three replicas in total should still exist. assert len(volume["replicas"]) == 3 check_volume_data(volume, data) cleanup_volume(client, volume)
def test_zone_tags(client, core_api, volume_name, k8s_node_zone_tags): # NOQA """ Test anti affinity zone feature 1. Add Kubernetes zone labels to the nodes 1. Only two zones now: zone1 and zone2 2. Create a volume with two replicas 3. Verify zone1 and zone2 either has one replica. 4. Remove a random replica and wait for volume to finish rebuild 5. Verify zone1 and zone2 either has one replica. 6. Repeat step 4-5 a few times. 7. Update volume to 3 replicas, make sure they're scheduled on 3 nodes 8. Remove a random replica and wait for volume to finish rebuild 9. Make sure replicas are on different nodes 10. Repeat step 8-9 a few times """ wait_longhorn_node_zone_updated(client) volume = create_and_check_volume(client, volume_name, num_of_replicas=2) host_id = get_self_host_id() volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) volume = client.by_id_volume(volume_name) zone1_replica_count = get_zone_replica_count(client, volume_name, ZONE1) zone2_replica_count = get_zone_replica_count(client, volume_name, ZONE2) assert zone1_replica_count == zone2_replica_count for i in range(randrange(3, 5)): volume = client.by_id_volume(volume_name) replica_count = len(volume.replicas) assert replica_count == 2 replica_id = randrange(0, replica_count) replica_name = volume.replicas[replica_id].name volume.replicaRemove(name=replica_name) wait_for_volume_degraded(client, volume_name) wait_for_volume_healthy(client, volume_name) wait_for_volume_replica_count(client, volume_name, replica_count) volume = client.by_id_volume(volume_name) replica_names = map(lambda replica: replica.name, volume["replicas"]) wait_new_replica_ready(client, volume_name, replica_names) zone1_replica_count = \ get_zone_replica_count(client, volume_name, ZONE1) zone2_replica_count = \ get_zone_replica_count(client, volume_name, ZONE2) assert zone1_replica_count == zone2_replica_count volume.updateReplicaCount(replicaCount=3) wait_for_volume_degraded(client, volume_name) wait_for_volume_replica_count(client, volume_name, 3) wait_for_volume_healthy(client, volume_name) volume = client.by_id_volume(volume_name) lh_node_names = list(map(lambda node: node.name, client.list_node())) for replica in volume.replicas: lh_node_names.remove(replica.hostId) assert lh_node_names == [] for i in range(randrange(3, 5)): volume = client.by_id_volume(volume_name) replica_count = len(volume.replicas) assert replica_count == 3 replica_id = randrange(0, replica_count) replica_name = volume.replicas[replica_id].name volume.replicaRemove(name=replica_name) wait_for_volume_degraded(client, volume_name) wait_for_volume_healthy(client, volume_name) wait_for_volume_replica_count(client, volume_name, replica_count) volume = client.by_id_volume(volume_name) lh_node_names = list(map(lambda node: node.name, client.list_node())) for replica in volume.replicas: lh_node_names.remove(replica.hostId) assert lh_node_names == []
def test_data_locality_basic(client, core_api, volume_name, pod, settings_reset): # NOQA """ Test data locality basic feature Context: Data Locality feature allows users to have an option to keep a local replica on the same node as the consuming pod. Longhorn is currently supporting 2 modes: - disabled: Longhorn does not try to keep a local replica - best-effort: Longhorn try to keep a local replica See manual tests at: https://github.com/longhorn/longhorn/issues/1045#issuecomment-680706283 Steps: Case 1: Test that Longhorn builds a local replica on the engine node 1. Create a volume(1) with 1 replica and dataLocality set to disabled 2. Find node where the replica is located on. Let's call the node is replica-node 3. Attach the volume to a node different than replica-node. Let call the node is engine-node 4. Write 200MB data to volume(1) 5. Use a retry loop to verify that Longhorn does not create a replica on the engine-node 6. Update dataLocality to best-effort for volume(1) 7. Use a retry loop to verify that Longhorn creates and rebuilds a replica on the engine-node and remove the other replica 8. detach the volume(1) and attach it to a different node. Let's call the new node is new-engine-node and the old node is old-engine-node 9. Wait for volume(1) to finish attaching 10. Use a retry loop to verify that Longhorn creates and rebuilds a replica on the new-engine-node and remove the replica on old-engine-node Case 2: Test that Longhorn prioritizes deleting replicas on the same node 1. Add the tag AVAIL to node-1 and node-2 2. Set node soft anti-affinity to `true`. 3. Create a volume(2) with 3 replicas and dataLocality set to best-effort 4. Use a retry loop to verify that all 3 replicas are on node-1 and node-2, no replica is on node-3 5. Attach volume(2) to node-3 6. User a retry loop to verify that there is no replica on node-3 and we can still read/write to volume(2) 7. Find the node which contains 2 replicas. Let call the node is most-replica-node 8. Set the replica count to 2 for volume(2) 9. Verify that Longhorn remove one replica from most-replica-node Case 3: Test that the volume is not corrupted if there is an unexpected detachment during building local replica 1. Remove the tag AVAIL from node-1 and node-2 Set node soft anti-affinity to `false`. 2. Create a volume(3) with 1 replicas and dataLocality set to best-effort 3. Attach volume(3) to node-3. 4. Use a retry loop to verify that volume(3) has only 1 replica on node-3 5. Write 800MB data to volume(3) 6. Detach volume(3) 7. Attach volume(3) to node-1 8. Use a retry loop to: Wait until volume(3) finishes attaching. Wait until Longhorn start rebuilding a replica on node-1 Immediately detach volume(3) 9. Verify that the replica on node-1 is in ERR state. 10. Attach volume(3) to node-1 11. Wait until volume(3) finishes attaching. 12. Use a retry loop to verify the Longhorn cleanup the ERR replica, rebuild a new replica on node-1, and remove the replica on node-3 Case 4: Make sure failed to schedule local replica doesn't block the the creation of other replicas. 1. Disable scheduling for node-3 2. Create a vol with 1 replica, `dataLocality = best-effort`. The replica is scheduled on a node (say node-1) 3. Attach vol to node-3. There is a fail-to-schedule replica with Spec.HardNodeAffinity=node-3 4. Increase numberOfReplica to 3. Verify that the replica set contains: one on node-1, one on node-2, one failed replica with Spec.HardNodeAffinity=node-3. 5. Decrease numberOfReplica to 2. Verify that the replica set contains: one on node-1, one on node-2, one failed replica with Spec.HardNodeAffinity=node-3. 6. Decrease numberOfReplica to 1. Verify that the replica set contains: one on node-1 or node-2, one failed replica with Spec.HardNodeAffinity=node-3. 7. Decrease numberOfReplica to 2. Verify that the replica set contains: one on node-1, one on node-2, one failed replica with Spec.HardNodeAffinity=node-3. 8. Turn off data locality by set `dataLocality=disabled` for the vol. Verify that the replica set contains: one on node-1, one on node-2 9. clean up """ # Case 1: Test that Longhorn builds a local replica on the engine node nodes = client.list_node() default_data_locality_setting = \ client.by_id_setting(SETTING_DEFAULT_DATA_LOCALITY) try: client.update(default_data_locality_setting, value="disabled") except Exception as e: print("Exception when update Default Data Locality setting", default_data_locality_setting, e) volume1_name = volume_name + "-1" volume1_size = str(500 * Mi) volume1_data_path = "/data/test" pv1_name = volume1_name + "-pv" pvc1_name = volume1_name + "-pvc" pod1_name = volume1_name + "-pod" pod1 = pod pod1['metadata']['name'] = pod1_name volume1 = create_and_check_volume(client, volume1_name, num_of_replicas=1, size=volume1_size) volume1 = client.by_id_volume(volume1_name) create_pv_for_volume(client, core_api, volume1, pv1_name) create_pvc_for_volume(client, core_api, volume1, pvc1_name) volume1 = client.by_id_volume(volume1_name) volume1_replica_node = volume1.replicas[0]['hostId'] volume1_attached_node = None for node in nodes: if node.name != volume1_replica_node: volume1_attached_node = node.name break assert volume1_attached_node is not None pod1['spec']['volumes'] = [{ "name": "pod-data", "persistentVolumeClaim": { "claimName": pvc1_name } }] pod1['spec']['nodeSelector'] = \ {"kubernetes.io/hostname": volume1_attached_node} create_and_wait_pod(core_api, pod1) write_pod_volume_random_data(core_api, pod1_name, volume1_data_path, DATA_SIZE_IN_MB_2) for i in range(10): volume1 = client.by_id_volume(volume1_name) assert len(volume1.replicas) == 1 assert volume1.replicas[0]['hostId'] != volume1_attached_node time.sleep(1) volume1 = client.by_id_volume(volume1_name) volume1.updateDataLocality(dataLocality="best-effort") for _ in range(RETRY_COUNTS): volume1 = client.by_id_volume(volume1_name) assert volume1[VOLUME_FIELD_ROBUSTNESS] == VOLUME_ROBUSTNESS_HEALTHY if len(volume1.replicas) == 1 and \ volume1.replicas[0]['hostId'] == volume1_attached_node: break time.sleep(RETRY_INTERVAL) assert len(volume1.replicas) == 1 assert volume1.replicas[0]['hostId'] == volume1_attached_node delete_and_wait_pod(core_api, pod1_name) volume1 = wait_for_volume_detached(client, volume1_name) volume1_replica_node = volume1.replicas[0]['hostId'] volume1_attached_node = None for node in nodes: if node.name != volume1_replica_node: volume1_attached_node = node.name break assert volume1_attached_node is not None pod1['spec']['nodeSelector'] = \ {"kubernetes.io/hostname": volume1_attached_node} create_and_wait_pod(core_api, pod1) for _ in range(RETRY_COUNTS): volume1 = client.by_id_volume(volume1_name) assert volume1[VOLUME_FIELD_ROBUSTNESS] == VOLUME_ROBUSTNESS_HEALTHY if len(volume1.replicas) == 1 and \ volume1.replicas[0]['hostId'] == volume1_attached_node: break time.sleep(RETRY_INTERVAL) assert len(volume1.replicas) == 1 assert volume1.replicas[0]['hostId'] == volume1_attached_node delete_and_wait_pod(core_api, pod1_name) wait_for_volume_detached(client, volume1_name) # Case 2: Test that Longhorn prioritizes deleting replicas on the same node node1 = nodes[0] node2 = nodes[1] node3 = nodes[2] client.update(node1, allowScheduling=True, tags=["AVAIL"]) client.update(node2, allowScheduling=True, tags=["AVAIL"]) replica_node_soft_anti_affinity_setting = \ client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) try: client.update(replica_node_soft_anti_affinity_setting, value="true") except Exception as e: print( "Exception when update " "Replica Node Level Soft Anti-Affinity setting", replica_node_soft_anti_affinity_setting, e) volume2_name = volume_name + "-2" volume2_size = str(500 * Mi) pv2_name = volume2_name + "-pv" pvc2_name = volume2_name + "-pvc" pod2_name = volume2_name + "-pod" pod2 = pod pod2['metadata']['name'] = pod2_name volume2 = client.create_volume(name=volume2_name, size=volume2_size, numberOfReplicas=3, nodeSelector=["AVAIL"], dataLocality="best-effort") volume2 = wait_for_volume_detached(client, volume2_name) volume2 = client.by_id_volume(volume2_name) create_pv_for_volume(client, core_api, volume2, pv2_name) create_pvc_for_volume(client, core_api, volume2, pvc2_name) volume2 = client.by_id_volume(volume2_name) pod2['spec']['volumes'] = [{ "name": "pod-data", "persistentVolumeClaim": { "claimName": pvc2_name } }] pod2['spec']['nodeSelector'] = {"kubernetes.io/hostname": node3.name} create_and_wait_pod(core_api, pod2) volume2 = wait_for_volume_healthy(client, volume2_name) for replica in volume2.replicas: assert replica["hostId"] != node3.name volume2.updateReplicaCount(replicaCount=2) # 2 Healthy replicas and 1 replica failed to schedule # The failed to schedule replica is the local replica on node3 volume2 = wait_for_volume_replica_count(client, volume2_name, 3) volume2 = client.by_id_volume(volume2_name) volume2_healthy_replicas = [] for replica in volume2.replicas: if replica.running is True: volume2_healthy_replicas.append(replica) assert len(volume2_healthy_replicas) == 2 volume2_rep1 = volume2_healthy_replicas[0] volume2_rep2 = volume2_healthy_replicas[1] assert volume2_rep1["hostId"] != volume2_rep2["hostId"] delete_and_wait_pod(core_api, pod2_name) wait_for_volume_detached(client, volume2_name) # Case 3: Test that the volume is not corrupted if there is an unexpected # detachment during building local replica client.update(node1, allowScheduling=True, tags=[]) client.update(node2, allowScheduling=True, tags=[]) replica_node_soft_anti_affinity_setting = \ client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) try: client.update(replica_node_soft_anti_affinity_setting, value="false") except Exception as e: print( "Exception when update " "Replica Node Level Soft Anti-Affinity setting", replica_node_soft_anti_affinity_setting, e) volume3_name = volume_name + "-3" volume3_size = str(1 * Gi) volume3_data_path = "/data/test" pv3_name = volume3_name + "-pv" pvc3_name = volume3_name + "-pvc" pod3_name = volume3_name + "-pod" pod3 = pod pod3['metadata']['name'] = pod3_name volume3 = client.create_volume(name=volume3_name, size=volume3_size, numberOfReplicas=1) volume3 = wait_for_volume_detached(client, volume3_name) volume3 = client.by_id_volume(volume3_name) create_pv_for_volume(client, core_api, volume3, pv3_name) create_pvc_for_volume(client, core_api, volume3, pvc3_name) volume3 = client.by_id_volume(volume3_name) pod3['spec']['volumes'] = [{ "name": "pod-data", "persistentVolumeClaim": { "claimName": pvc3_name } }] pod3['spec']['nodeSelector'] = {"kubernetes.io/hostname": node3.name} create_and_wait_pod(core_api, pod3) volume3 = wait_for_volume_healthy(client, volume3_name) write_pod_volume_random_data(core_api, pod3_name, volume3_data_path, DATA_SIZE_IN_MB_4) volume3.updateDataLocality(dataLocality="best-effort") volume3 = client.by_id_volume(volume3_name) if volume3.replicas[0]['hostId'] != node3.name: wait_for_rebuild_start(client, volume3_name) volume3 = client.by_id_volume(volume3_name) assert len(volume3.replicas) == 2 wait_for_rebuild_complete(client, volume3_name) volume3 = wait_for_volume_replica_count(client, volume3_name, 1) assert volume3.replicas[0]["hostId"] == node3.name delete_and_wait_pod(core_api, pod3_name) pod3['spec']['nodeSelector'] = {"kubernetes.io/hostname": node1.name} create_and_wait_pod(core_api, pod3) wait_for_rebuild_start(client, volume3_name) crash_engine_process_with_sigkill(client, core_api, volume3_name) delete_and_wait_pod(core_api, pod3_name) wait_for_volume_detached(client, volume3_name) volume3 = client.by_id_volume(volume3_name) assert len(volume3.replicas) == 1 assert volume3.replicas[0]["hostId"] == node3.name create_and_wait_pod(core_api, pod3) wait_for_rebuild_start(client, volume3_name) volume3 = client.by_id_volume(volume3_name) assert len(volume3.replicas) == 2 wait_for_rebuild_complete(client, volume3_name) # Wait for deletion of extra replica volume3 = wait_for_volume_replica_count(client, volume3_name, 1) assert volume3.replicas[0]["hostId"] == node1.name assert volume3.replicas[0]["mode"] == "RW" assert volume3.replicas[0]["running"] is True delete_and_wait_pod(core_api, pod3_name) wait_for_volume_detached(client, volume3_name) # Case 4: Make sure failed to schedule local replica doesn't block the # the creation of other replicas. replica_node_soft_anti_affinity_setting = \ client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) try: client.update(replica_node_soft_anti_affinity_setting, value="false") except Exception as e: print( "Exception when update " "Replica Node Level Soft Anti-Affinity setting", replica_node_soft_anti_affinity_setting, e) client.update(node3, allowScheduling=False) volume4_name = volume_name + "-4" volume4_size = str(1 * Gi) volume4 = client.create_volume(name=volume4_name, size=volume4_size, numberOfReplicas=1, dataLocality="best-effort") volume4 = wait_for_volume_detached(client, volume4_name) volume4 = client.by_id_volume(volume4_name) volume4_replica_name = volume4.replicas[0]["name"] volume4.attach(hostId=node3.name) wait_for_volume_healthy(client, volume4_name) volume4 = client.by_id_volume(volume4_name) assert len(volume4.replicas) == 2 for replica in volume4.replicas: if replica["name"] == volume4_replica_name: assert replica["running"] is True assert replica["mode"] == "RW" else: assert replica["running"] is False assert replica["mode"] == "" assert volume4.conditions.scheduled.reason == \ "LocalReplicaSchedulingFailure" volume4 = volume4.updateReplicaCount(replicaCount=3) volume4 = wait_for_volume_degraded(client, volume4_name) v4_node1_replica_count = 0 v4_node2_replica_count = 0 v4_failed_replica_count = 0 for replica in volume4.replicas: if replica["hostId"] == node1.name: v4_node1_replica_count += 1 elif replica["hostId"] == node2.name: v4_node2_replica_count += 1 elif replica["hostId"] == "": v4_failed_replica_count += 1 assert v4_node1_replica_count == 1 assert v4_node2_replica_count == 1 assert v4_failed_replica_count > 0 volume4 = volume4.updateReplicaCount(replicaCount=2) volume4 = wait_for_volume_replica_count(client, volume4_name, 3) v4_node1_replica_count = 0 v4_node2_replica_count = 0 v4_failed_replica_count = 0 for replica in volume4.replicas: if replica["hostId"] == node1.name: v4_node1_replica_count += 1 elif replica["hostId"] == node2.name: v4_node2_replica_count += 1 elif replica["hostId"] == "": v4_failed_replica_count += 1 assert v4_node1_replica_count == 1 assert v4_node2_replica_count == 1 assert v4_failed_replica_count > 0 volume4 = volume4.updateReplicaCount(replicaCount=1) volume4 = wait_for_volume_replica_count(client, volume4_name, 2) v4_node1_replica_count = 0 v4_node2_replica_count = 0 v4_failed_replica_count = 0 for replica in volume4.replicas: if replica["hostId"] == node1.name: v4_node1_replica_count += 1 elif replica["hostId"] == node2.name: v4_node2_replica_count += 1 elif replica["hostId"] == "": v4_failed_replica_count += 1 assert v4_node1_replica_count + v4_node2_replica_count == 1 assert v4_failed_replica_count == 1 volume4 = volume4.updateDataLocality(dataLocality="disabled") volume4 = volume4.updateReplicaCount(replicaCount=2) running_replica_count = 0 for _ in range(RETRY_COUNTS): volume4 = client.by_id_volume(volume4_name) running_replica_count = 0 for r in volume4.replicas: if r.failedAt == "" and r.running is True: running_replica_count += 1 if running_replica_count == 2: break time.sleep(RETRY_INTERVAL) assert running_replica_count == 2 v4_node1_replica_count = 0 v4_node2_replica_count = 0 v4_node3_replica_count = 0 for replica in volume4.replicas: wait_for_replica_running(client, volume4_name, replica["name"]) if replica["hostId"] == node1.name: v4_node1_replica_count += 1 elif replica["hostId"] == node2.name: v4_node2_replica_count += 1 elif replica["hostId"] == node3.name: v4_node3_replica_count += 1 assert v4_node1_replica_count == 1 assert v4_node2_replica_count == 1 assert v4_node3_replica_count == 0