Esempio n. 1
0
def delete_replica(client, volume_name):
    volume = client.by_id_volume(volume_name)

    replica_count = len(volume.replicas)

    healthy_replica_count = 0
    for replica in volume.replicas:
        if replica.running is True and replica.mode == "RW":
            healthy_replica_count += 1

    # return if there is no or only one healthy replica left
    if healthy_replica_count == 1:
        print("skipped, only one healthy replica found", end=" ")
        return

    if healthy_replica_count == 0:
        print("skipped, no healthy replicas found", end=" ")
        return

    replica_id = randrange(0, replica_count)

    replica_name = volume["replicas"][replica_id]["name"]

    volume.replicaRemove(name=replica_name)

    wait_for_volume_degraded(client, volume_name)

    global WAIT_REPLICA_REBUILD
    if WAIT_REPLICA_REBUILD is None:
        WAIT_REPLICA_REBUILD = bool(random.getrandbits(1))

    if WAIT_REPLICA_REBUILD is True:
        wait_for_volume_replica_count(client, volume_name, replica_count)
        replica_names = map(lambda replica: replica.name, volume["replicas"])
        wait_new_replica_ready(client, volume_name, replica_names)
Esempio n. 2
0
def test_volume_update_replica_count(clients, volume_name):  # NOQA
    for host_id, client in clients.iteritems():
        break

    replica_count = 3
    volume = create_and_check_volume(client, volume_name, replica_count)

    volume.attach(hostId=host_id)
    volume = common.wait_for_volume_healthy(client, volume_name)

    replica_count = 5
    volume = volume.updateReplicaCount(replicaCount=replica_count)
    volume = common.wait_for_volume_degraded(client, volume_name)
    volume = common.wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == replica_count

    old_replica_count = replica_count
    replica_count = 2
    volume = volume.updateReplicaCount(replicaCount=replica_count)
    volume = common.wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == old_replica_count

    volume.replicaRemove(name=volume["replicas"][0]["name"])
    volume.replicaRemove(name=volume["replicas"][1]["name"])
    volume.replicaRemove(name=volume["replicas"][2]["name"])

    volume = common.wait_for_volume_replica_count(client, volume_name,
                                                  replica_count)
    volume = common.wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == replica_count

    client.delete(volume)
    wait_for_volume_delete(client, volume_name)
def engine_live_upgrade_rollback_test(client,
                                      volume_name,
                                      base_image=""):  # NOQA
    default_img = common.get_default_engine_image(client)
    default_img_name = default_img["name"]
    default_img = wait_for_engine_image_ref_count(client, default_img_name, 0)
    cli_v = default_img["cliAPIVersion"]
    cli_minv = default_img["cliAPIMinVersion"]
    ctl_v = default_img["controllerAPIVersion"]
    ctl_minv = default_img["controllerAPIMinVersion"]
    data_v = default_img["dataFormatVersion"]
    data_minv = default_img["dataFormatMinVersion"]
    wrong_engine_upgrade_image = common.get_compatibility_test_image(
        cli_v, cli_minv, ctl_v, ctl_minv, data_v, data_minv)
    new_img = client.create_engine_image(image=wrong_engine_upgrade_image)
    new_img_name = new_img["name"]
    new_img = wait_for_engine_image_state(client, new_img_name, "ready")
    assert new_img["refCount"] == 0
    assert new_img["noRefSince"] != ""

    default_img = common.get_default_engine_image(client)
    default_img_name = default_img["name"]

    volume = client.create_volume(name=volume_name,
                                  size=SIZE,
                                  numberOfReplicas=2,
                                  baseImage=base_image)
    volume = common.wait_for_volume_detached(client, volume_name)
    default_img = wait_for_engine_image_ref_count(client, default_img_name, 1)
    assert volume["baseImage"] == base_image

    original_engine_image = volume["engineImage"]
    assert original_engine_image != wrong_engine_upgrade_image

    host_id = get_self_host_id()
    volume = volume.attach(hostId=host_id)
    volume = common.wait_for_volume_healthy(client, volume_name)

    data = write_volume_random_data(volume)

    volume.engineUpgrade(image=wrong_engine_upgrade_image)
    volume = client.by_id_volume(volume["name"])
    assert volume["engineImage"] == wrong_engine_upgrade_image
    assert volume["currentImage"] == original_engine_image

    with pytest.raises(Exception):
        # this will timeout
        wait_for_volume_current_image(client, volume_name,
                                      wrong_engine_upgrade_image)

    # rollback
    volume.engineUpgrade(image=original_engine_image)
    volume = wait_for_volume_current_image(client, volume_name,
                                           original_engine_image)
    assert volume["engineImage"] == original_engine_image
    assert volume["currentImage"] == original_engine_image
    engine = get_volume_engine(volume)
    assert engine["engineImage"] == original_engine_image
    assert engine["currentImage"] == original_engine_image

    volume = common.wait_for_volume_replica_count(client, volume_name,
                                                  REPLICA_COUNT)

    check_volume_data(volume, data)

    assert volume["state"] == common.VOLUME_STATE_ATTACHED
    assert volume["robustness"] == common.VOLUME_ROBUSTNESS_HEALTHY

    # try again, this time let's try detach
    volume.engineUpgrade(image=wrong_engine_upgrade_image)
    volume = client.by_id_volume(volume["name"])
    assert volume["engineImage"] == wrong_engine_upgrade_image
    assert volume["currentImage"] == original_engine_image

    with pytest.raises(Exception):
        # this will timeout
        wait_for_volume_current_image(client, volume_name,
                                      wrong_engine_upgrade_image)

    volume = volume.detach()
    volume = wait_for_volume_current_image(client, volume_name,
                                           wrong_engine_upgrade_image)
    # all the images would be updated
    assert volume["engineImage"] == wrong_engine_upgrade_image
    engine = get_volume_engine(volume)
    assert engine["engineImage"] == wrong_engine_upgrade_image
    volume = common.wait_for_volume_replica_count(client, volume_name,
                                                  REPLICA_COUNT)
    for replica in volume["replicas"]:
        assert replica["engineImage"] == wrong_engine_upgrade_image

    # upgrade to the correct image when offline
    volume.engineUpgrade(image=original_engine_image)
    volume = client.by_id_volume(volume["name"])
    assert volume["engineImage"] == original_engine_image

    volume = volume.attach(hostId=host_id)
    volume = common.wait_for_volume_healthy(client, volume_name)
    assert volume["engineImage"] == original_engine_image
    assert volume["currentImage"] == original_engine_image
    engine = get_volume_engine(volume)
    assert engine["engineImage"] == original_engine_image
    assert engine["currentImage"] == original_engine_image
    for replica in volume["replicas"]:
        assert replica["engineImage"] == original_engine_image
        assert replica["currentImage"] == original_engine_image

    check_volume_data(volume, data)

    client.delete(volume)
    wait_for_volume_delete(client, volume_name)

    client.delete(new_img)
Esempio n. 4
0
def test_zone_tags(client, core_api, volume_name, k8s_node_zone_tags):  # NOQA
    """
    Test anti affinity zone feature

    1. Add Kubernetes zone labels to the nodes
        1. Only two zones now: zone1 and zone2
    2. Create a volume with two replicas
    3. Verify zone1 and zone2 either has one replica.
    4. Remove a random replica and wait for volume to finish rebuild
    5. Verify zone1 and zone2 either has one replica.
    6. Repeat step 4-5 a few times.
    7. Update volume to 3 replicas, make sure they're scheduled on 3 nodes
    8. Remove a random replica and wait for volume to finish rebuild
    9. Make sure replicas are on different nodes
    10. Repeat step 8-9 a few times
    """

    wait_longhorn_node_zone_updated(client)

    volume = create_and_check_volume(client, volume_name, num_of_replicas=2)

    host_id = get_self_host_id()

    volume.attach(hostId=host_id)

    volume = wait_for_volume_healthy(client, volume_name)

    volume = client.by_id_volume(volume_name)

    zone1_replica_count = get_zone_replica_count(client, volume_name, ZONE1)
    zone2_replica_count = get_zone_replica_count(client, volume_name, ZONE2)

    assert zone1_replica_count == zone2_replica_count

    for i in range(randrange(3, 5)):
        volume = client.by_id_volume(volume_name)

        replica_count = len(volume.replicas)
        assert replica_count == 2

        replica_id = randrange(0, replica_count)

        replica_name = volume.replicas[replica_id].name

        volume.replicaRemove(name=replica_name)

        wait_for_volume_degraded(client, volume_name)

        wait_for_volume_healthy(client, volume_name)

        wait_for_volume_replica_count(client, volume_name, replica_count)

        volume = client.by_id_volume(volume_name)

        replica_names = map(lambda replica: replica.name, volume["replicas"])

        wait_new_replica_ready(client, volume_name, replica_names)

        zone1_replica_count = \
            get_zone_replica_count(client, volume_name, ZONE1)
        zone2_replica_count = \
            get_zone_replica_count(client, volume_name, ZONE2)

        assert zone1_replica_count == zone2_replica_count

    volume.updateReplicaCount(replicaCount=3)

    wait_for_volume_degraded(client, volume_name)

    wait_for_volume_replica_count(client, volume_name, 3)

    wait_for_volume_healthy(client, volume_name)

    volume = client.by_id_volume(volume_name)

    lh_node_names = list(map(lambda node: node.name, client.list_node()))

    for replica in volume.replicas:
        lh_node_names.remove(replica.hostId)

    assert lh_node_names == []

    for i in range(randrange(3, 5)):
        volume = client.by_id_volume(volume_name)

        replica_count = len(volume.replicas)
        assert replica_count == 3

        replica_id = randrange(0, replica_count)

        replica_name = volume.replicas[replica_id].name

        volume.replicaRemove(name=replica_name)

        wait_for_volume_degraded(client, volume_name)

        wait_for_volume_healthy(client, volume_name)

        wait_for_volume_replica_count(client, volume_name, replica_count)

        volume = client.by_id_volume(volume_name)

        lh_node_names = list(map(lambda node: node.name, client.list_node()))

        for replica in volume.replicas:
            lh_node_names.remove(replica.hostId)

        assert lh_node_names == []
Esempio n. 5
0
def engine_live_upgrade_rollback_test(client,
                                      core_api,
                                      volume_name,
                                      backing_image=""):  # NOQA
    default_img = common.get_default_engine_image(client)
    default_img_name = default_img.name
    default_img = wait_for_engine_image_ref_count(client, default_img_name, 0)
    cli_v = default_img.cliAPIVersion
    cli_minv = default_img.cliAPIMinVersion
    ctl_v = default_img.controllerAPIVersion
    ctl_minv = default_img.controllerAPIMinVersion
    data_v = default_img.dataFormatVersion
    data_minv = default_img.dataFormatMinVersion
    wrong_engine_upgrade_image = common.get_compatibility_test_image(
        cli_v, cli_minv, ctl_v, ctl_minv, data_v, data_minv)
    new_img = client.create_engine_image(image=wrong_engine_upgrade_image)
    new_img_name = new_img.name
    ei_status_value = get_engine_image_status_value(client, new_img_name)
    new_img = wait_for_engine_image_state(client, new_img_name,
                                          ei_status_value)
    assert new_img.refCount == 0
    assert new_img.noRefSince != ""

    default_img = common.get_default_engine_image(client)
    default_img_name = default_img.name

    client.create_volume(name=volume_name,
                         size=SIZE,
                         numberOfReplicas=2,
                         backingImage=backing_image)
    volume = common.wait_for_volume_detached(client, volume_name)
    wait_for_engine_image_ref_count(client, default_img_name, 1)
    assert volume.backingImage == backing_image

    original_engine_image = volume.engineImage
    assert original_engine_image != wrong_engine_upgrade_image

    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = common.wait_for_volume_healthy(client, volume_name)

    data = write_volume_random_data(volume)

    volume.engineUpgrade(image=wrong_engine_upgrade_image)
    volume = client.by_id_volume(volume.name)
    assert volume.engineImage == wrong_engine_upgrade_image
    assert volume.currentImage == original_engine_image

    with pytest.raises(Exception):
        # this will timeout
        wait_for_volume_current_image(client, volume_name,
                                      wrong_engine_upgrade_image)

    # rollback
    volume.engineUpgrade(image=original_engine_image)
    wait_for_volume_current_image(client, volume_name, original_engine_image)
    volume = wait_for_volume_replicas_mode(client, volume_name, "RW")
    assert volume.engineImage == original_engine_image
    assert volume.currentImage == original_engine_image
    engine = get_volume_engine(volume)
    assert engine.engineImage == original_engine_image
    assert engine.currentImage == original_engine_image

    volume = common.wait_for_volume_replica_count(client, volume_name,
                                                  REPLICA_COUNT)

    check_volume_data(volume, data)

    assert volume.state == common.VOLUME_STATE_ATTACHED
    assert volume.robustness == common.VOLUME_ROBUSTNESS_HEALTHY

    # try again, this time let's try detach
    volume.engineUpgrade(image=wrong_engine_upgrade_image)
    volume = client.by_id_volume(volume.name)
    assert volume.engineImage == wrong_engine_upgrade_image
    assert volume.currentImage == original_engine_image

    with pytest.raises(Exception):
        # this will timeout
        wait_for_volume_current_image(client, volume_name,
                                      wrong_engine_upgrade_image)

    volume.detach(hostId="")
    volume = wait_for_volume_current_image(client, volume_name,
                                           wrong_engine_upgrade_image)
    # all the images would be updated
    assert volume.engineImage == wrong_engine_upgrade_image
    engine = get_volume_engine(volume)
    assert engine.engineImage == wrong_engine_upgrade_image
    volume = common.wait_for_volume_replica_count(client, volume_name,
                                                  REPLICA_COUNT)
    for replica in volume.replicas:
        assert replica.engineImage == wrong_engine_upgrade_image

    # upgrade to the correct image when offline
    volume.engineUpgrade(image=original_engine_image)
    volume = wait_for_volume_current_image(client, volume_name,
                                           original_engine_image)
    volume = client.by_id_volume(volume.name)
    assert volume.engineImage == original_engine_image

    volume.attach(hostId=host_id)
    common.wait_for_volume_healthy(client, volume_name)
    volume = wait_for_volume_replicas_mode(client, volume_name, "RW")
    assert volume.engineImage == original_engine_image
    assert volume.currentImage == original_engine_image
    engine = get_volume_engine(volume)
    assert engine.engineImage == original_engine_image
    assert engine.currentImage == original_engine_image
    check_volume_endpoint(volume)
    for replica in volume.replicas:
        assert replica.engineImage == original_engine_image
        assert replica.currentImage == original_engine_image

    check_volume_data(volume, data)

    client.delete(volume)
    wait_for_volume_delete(client, volume_name)

    client.delete(new_img)
    wait_for_engine_image_deletion(client, core_api, new_img.name)
def test_data_locality_basic(client, core_api, volume_name, pod,
                             settings_reset):  # NOQA
    """
    Test data locality basic feature

    Context:

    Data Locality feature allows users to have an option to keep a local
    replica on the same node as the consuming pod.
    Longhorn is currently supporting 2 modes:
    - disabled: Longhorn does not try to keep a local replica
    - best-effort: Longhorn try to keep a local replica

    See manual tests at:
    https://github.com/longhorn/longhorn/issues/1045#issuecomment-680706283

    Steps:

    Case 1: Test that Longhorn builds a local replica on the engine node

    1. Create a volume(1) with 1 replica and dataLocality set to disabled
    2. Find node where the replica is located on.
       Let's call the node is replica-node
    3. Attach the volume to a node different than replica-node.
       Let call the node is engine-node
    4. Write 200MB data to volume(1)
    5. Use a retry loop to verify that Longhorn does not create
       a replica on the engine-node
    6. Update dataLocality to best-effort for volume(1)
    7. Use a retry loop to verify that Longhorn creates and rebuilds
       a replica on the engine-node and remove the other replica
    8. detach the volume(1) and attach it to a different node.
       Let's call the new node is new-engine-node and the old
       node is old-engine-node
    9. Wait for volume(1) to finish attaching
    10. Use a retry loop to verify that Longhorn creates and rebuilds
       a replica on the new-engine-node and remove the replica on
       old-engine-node

    Case 2: Test that Longhorn prioritizes deleting replicas on the same node

    1. Add the tag AVAIL to node-1 and node-2
    2. Set node soft anti-affinity to `true`.
    3. Create a volume(2) with 3 replicas and dataLocality set to best-effort
    4. Use a retry loop to verify that all 3 replicas are on node-1 and
        node-2, no replica is on node-3
    5. Attach volume(2) to node-3
    6. User a retry loop to verify that there is no replica on node-3 and
        we can still read/write to volume(2)
    7. Find the node which contains 2 replicas.
        Let call the node is most-replica-node
    8. Set the replica count to 2 for volume(2)
    9. Verify that Longhorn remove one replica from most-replica-node

    Case 3: Test that the volume is not corrupted if there is an unexpected
    detachment during building local replica

    1. Remove the tag AVAIL from node-1 and node-2
       Set node soft anti-affinity to `false`.
    2. Create a volume(3) with 1 replicas and dataLocality set to best-effort
    3. Attach volume(3) to node-3.
    4. Use a retry loop to verify that volume(3) has only 1 replica on node-3
    5. Write 800MB data to volume(3)
    6. Detach volume(3)
    7. Attach volume(3) to node-1
    8. Use a retry loop to:
        Wait until volume(3) finishes attaching.
        Wait until Longhorn start rebuilding a replica on node-1
        Immediately detach volume(3)
    9. Verify that the replica on node-1 is in ERR state.
    10. Attach volume(3) to node-1
    11. Wait until volume(3) finishes attaching.
    12. Use a retry loop to verify the Longhorn cleanup the ERR replica,
        rebuild a new replica on node-1, and remove the replica on node-3

    Case 4: Make sure failed to schedule local replica doesn't block the
    the creation of other replicas.

    1. Disable scheduling for node-3
    2. Create a vol with 1 replica, `dataLocality = best-effort`.
        The replica is scheduled on a node (say node-1)
    3. Attach vol to node-3. There is a fail-to-schedule
        replica with Spec.HardNodeAffinity=node-3
    4. Increase numberOfReplica to 3. Verify that the replica set contains:
        one on node-1, one on node-2,  one failed replica
        with Spec.HardNodeAffinity=node-3.
    5. Decrease numberOfReplica to 2. Verify that the replica set contains:
        one on node-1, one on node-2,  one failed replica
        with Spec.HardNodeAffinity=node-3.
    6. Decrease numberOfReplica to 1. Verify that the replica set contains:
        one on node-1 or node-2,  one failed replica
        with Spec.HardNodeAffinity=node-3.
    7. Decrease numberOfReplica to 2. Verify that the replica set contains:
        one on node-1, one on node-2, one failed replica
        with Spec.HardNodeAffinity=node-3.
    8. Turn off data locality by set `dataLocality=disabled` for the vol.
        Verify that the replica set contains: one on node-1, one on node-2

    9. clean up
    """

    # Case 1: Test that Longhorn builds a local replica on the engine node

    nodes = client.list_node()

    default_data_locality_setting = \
        client.by_id_setting(SETTING_DEFAULT_DATA_LOCALITY)
    try:
        client.update(default_data_locality_setting, value="disabled")
    except Exception as e:
        print("Exception when update Default Data Locality setting",
              default_data_locality_setting, e)

    volume1_name = volume_name + "-1"
    volume1_size = str(500 * Mi)
    volume1_data_path = "/data/test"
    pv1_name = volume1_name + "-pv"
    pvc1_name = volume1_name + "-pvc"
    pod1_name = volume1_name + "-pod"
    pod1 = pod

    pod1['metadata']['name'] = pod1_name

    volume1 = create_and_check_volume(client,
                                      volume1_name,
                                      num_of_replicas=1,
                                      size=volume1_size)

    volume1 = client.by_id_volume(volume1_name)
    create_pv_for_volume(client, core_api, volume1, pv1_name)
    create_pvc_for_volume(client, core_api, volume1, pvc1_name)

    volume1 = client.by_id_volume(volume1_name)
    volume1_replica_node = volume1.replicas[0]['hostId']

    volume1_attached_node = None
    for node in nodes:
        if node.name != volume1_replica_node:
            volume1_attached_node = node.name
            break

    assert volume1_attached_node is not None

    pod1['spec']['volumes'] = [{
        "name": "pod-data",
        "persistentVolumeClaim": {
            "claimName": pvc1_name
        }
    }]

    pod1['spec']['nodeSelector'] = \
        {"kubernetes.io/hostname": volume1_attached_node}
    create_and_wait_pod(core_api, pod1)

    write_pod_volume_random_data(core_api, pod1_name, volume1_data_path,
                                 DATA_SIZE_IN_MB_2)

    for i in range(10):
        volume1 = client.by_id_volume(volume1_name)
        assert len(volume1.replicas) == 1
        assert volume1.replicas[0]['hostId'] != volume1_attached_node
        time.sleep(1)

    volume1 = client.by_id_volume(volume1_name)
    volume1.updateDataLocality(dataLocality="best-effort")

    for _ in range(RETRY_COUNTS):
        volume1 = client.by_id_volume(volume1_name)
        assert volume1[VOLUME_FIELD_ROBUSTNESS] == VOLUME_ROBUSTNESS_HEALTHY
        if len(volume1.replicas) == 1 and \
                volume1.replicas[0]['hostId'] == volume1_attached_node:
            break
        time.sleep(RETRY_INTERVAL)
    assert len(volume1.replicas) == 1
    assert volume1.replicas[0]['hostId'] == volume1_attached_node

    delete_and_wait_pod(core_api, pod1_name)
    volume1 = wait_for_volume_detached(client, volume1_name)

    volume1_replica_node = volume1.replicas[0]['hostId']

    volume1_attached_node = None
    for node in nodes:
        if node.name != volume1_replica_node:
            volume1_attached_node = node.name
            break

    assert volume1_attached_node is not None

    pod1['spec']['nodeSelector'] = \
        {"kubernetes.io/hostname": volume1_attached_node}
    create_and_wait_pod(core_api, pod1)
    for _ in range(RETRY_COUNTS):
        volume1 = client.by_id_volume(volume1_name)
        assert volume1[VOLUME_FIELD_ROBUSTNESS] == VOLUME_ROBUSTNESS_HEALTHY
        if len(volume1.replicas) == 1 and \
                volume1.replicas[0]['hostId'] == volume1_attached_node:
            break
        time.sleep(RETRY_INTERVAL)
    assert len(volume1.replicas) == 1
    assert volume1.replicas[0]['hostId'] == volume1_attached_node
    delete_and_wait_pod(core_api, pod1_name)
    wait_for_volume_detached(client, volume1_name)

    # Case 2: Test that Longhorn prioritizes deleting replicas on the same node

    node1 = nodes[0]
    node2 = nodes[1]
    node3 = nodes[2]

    client.update(node1, allowScheduling=True, tags=["AVAIL"])
    client.update(node2, allowScheduling=True, tags=["AVAIL"])

    replica_node_soft_anti_affinity_setting = \
        client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    try:
        client.update(replica_node_soft_anti_affinity_setting, value="true")
    except Exception as e:
        print(
            "Exception when update "
            "Replica Node Level Soft Anti-Affinity setting",
            replica_node_soft_anti_affinity_setting, e)

    volume2_name = volume_name + "-2"
    volume2_size = str(500 * Mi)
    pv2_name = volume2_name + "-pv"
    pvc2_name = volume2_name + "-pvc"
    pod2_name = volume2_name + "-pod"
    pod2 = pod

    pod2['metadata']['name'] = pod2_name

    volume2 = client.create_volume(name=volume2_name,
                                   size=volume2_size,
                                   numberOfReplicas=3,
                                   nodeSelector=["AVAIL"],
                                   dataLocality="best-effort")

    volume2 = wait_for_volume_detached(client, volume2_name)
    volume2 = client.by_id_volume(volume2_name)
    create_pv_for_volume(client, core_api, volume2, pv2_name)
    create_pvc_for_volume(client, core_api, volume2, pvc2_name)

    volume2 = client.by_id_volume(volume2_name)

    pod2['spec']['volumes'] = [{
        "name": "pod-data",
        "persistentVolumeClaim": {
            "claimName": pvc2_name
        }
    }]

    pod2['spec']['nodeSelector'] = {"kubernetes.io/hostname": node3.name}
    create_and_wait_pod(core_api, pod2)

    volume2 = wait_for_volume_healthy(client, volume2_name)

    for replica in volume2.replicas:
        assert replica["hostId"] != node3.name

    volume2.updateReplicaCount(replicaCount=2)

    # 2 Healthy replicas and 1 replica failed to schedule
    # The failed to schedule replica is the local replica on node3
    volume2 = wait_for_volume_replica_count(client, volume2_name, 3)
    volume2 = client.by_id_volume(volume2_name)

    volume2_healthy_replicas = []
    for replica in volume2.replicas:
        if replica.running is True:
            volume2_healthy_replicas.append(replica)

    assert len(volume2_healthy_replicas) == 2

    volume2_rep1 = volume2_healthy_replicas[0]
    volume2_rep2 = volume2_healthy_replicas[1]
    assert volume2_rep1["hostId"] != volume2_rep2["hostId"]
    delete_and_wait_pod(core_api, pod2_name)
    wait_for_volume_detached(client, volume2_name)

    # Case 3: Test that the volume is not corrupted if there is an unexpected
    # detachment during building local replica

    client.update(node1, allowScheduling=True, tags=[])
    client.update(node2, allowScheduling=True, tags=[])

    replica_node_soft_anti_affinity_setting = \
        client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    try:
        client.update(replica_node_soft_anti_affinity_setting, value="false")
    except Exception as e:
        print(
            "Exception when update "
            "Replica Node Level Soft Anti-Affinity setting",
            replica_node_soft_anti_affinity_setting, e)

    volume3_name = volume_name + "-3"
    volume3_size = str(1 * Gi)
    volume3_data_path = "/data/test"
    pv3_name = volume3_name + "-pv"
    pvc3_name = volume3_name + "-pvc"
    pod3_name = volume3_name + "-pod"
    pod3 = pod

    pod3['metadata']['name'] = pod3_name

    volume3 = client.create_volume(name=volume3_name,
                                   size=volume3_size,
                                   numberOfReplicas=1)

    volume3 = wait_for_volume_detached(client, volume3_name)
    volume3 = client.by_id_volume(volume3_name)
    create_pv_for_volume(client, core_api, volume3, pv3_name)
    create_pvc_for_volume(client, core_api, volume3, pvc3_name)

    volume3 = client.by_id_volume(volume3_name)

    pod3['spec']['volumes'] = [{
        "name": "pod-data",
        "persistentVolumeClaim": {
            "claimName": pvc3_name
        }
    }]

    pod3['spec']['nodeSelector'] = {"kubernetes.io/hostname": node3.name}
    create_and_wait_pod(core_api, pod3)
    volume3 = wait_for_volume_healthy(client, volume3_name)

    write_pod_volume_random_data(core_api, pod3_name, volume3_data_path,
                                 DATA_SIZE_IN_MB_4)

    volume3.updateDataLocality(dataLocality="best-effort")
    volume3 = client.by_id_volume(volume3_name)

    if volume3.replicas[0]['hostId'] != node3.name:
        wait_for_rebuild_start(client, volume3_name)
        volume3 = client.by_id_volume(volume3_name)
        assert len(volume3.replicas) == 2
        wait_for_rebuild_complete(client, volume3_name)

    volume3 = wait_for_volume_replica_count(client, volume3_name, 1)
    assert volume3.replicas[0]["hostId"] == node3.name

    delete_and_wait_pod(core_api, pod3_name)

    pod3['spec']['nodeSelector'] = {"kubernetes.io/hostname": node1.name}
    create_and_wait_pod(core_api, pod3)

    wait_for_rebuild_start(client, volume3_name)
    crash_engine_process_with_sigkill(client, core_api, volume3_name)
    delete_and_wait_pod(core_api, pod3_name)
    wait_for_volume_detached(client, volume3_name)
    volume3 = client.by_id_volume(volume3_name)
    assert len(volume3.replicas) == 1
    assert volume3.replicas[0]["hostId"] == node3.name

    create_and_wait_pod(core_api, pod3)
    wait_for_rebuild_start(client, volume3_name)
    volume3 = client.by_id_volume(volume3_name)
    assert len(volume3.replicas) == 2
    wait_for_rebuild_complete(client, volume3_name)

    # Wait for deletion of extra replica
    volume3 = wait_for_volume_replica_count(client, volume3_name, 1)
    assert volume3.replicas[0]["hostId"] == node1.name
    assert volume3.replicas[0]["mode"] == "RW"
    assert volume3.replicas[0]["running"] is True

    delete_and_wait_pod(core_api, pod3_name)
    wait_for_volume_detached(client, volume3_name)

    # Case 4: Make sure failed to schedule local replica doesn't block the
    # the creation of other replicas.

    replica_node_soft_anti_affinity_setting = \
        client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    try:
        client.update(replica_node_soft_anti_affinity_setting, value="false")
    except Exception as e:
        print(
            "Exception when update "
            "Replica Node Level Soft Anti-Affinity setting",
            replica_node_soft_anti_affinity_setting, e)

    client.update(node3, allowScheduling=False)

    volume4_name = volume_name + "-4"
    volume4_size = str(1 * Gi)

    volume4 = client.create_volume(name=volume4_name,
                                   size=volume4_size,
                                   numberOfReplicas=1,
                                   dataLocality="best-effort")

    volume4 = wait_for_volume_detached(client, volume4_name)
    volume4 = client.by_id_volume(volume4_name)

    volume4_replica_name = volume4.replicas[0]["name"]

    volume4.attach(hostId=node3.name)

    wait_for_volume_healthy(client, volume4_name)

    volume4 = client.by_id_volume(volume4_name)
    assert len(volume4.replicas) == 2

    for replica in volume4.replicas:
        if replica["name"] == volume4_replica_name:
            assert replica["running"] is True
            assert replica["mode"] == "RW"
        else:
            assert replica["running"] is False
            assert replica["mode"] == ""

    assert volume4.conditions.scheduled.reason == \
        "LocalReplicaSchedulingFailure"

    volume4 = volume4.updateReplicaCount(replicaCount=3)

    volume4 = wait_for_volume_degraded(client, volume4_name)

    v4_node1_replica_count = 0
    v4_node2_replica_count = 0
    v4_failed_replica_count = 0

    for replica in volume4.replicas:
        if replica["hostId"] == node1.name:
            v4_node1_replica_count += 1
        elif replica["hostId"] == node2.name:
            v4_node2_replica_count += 1
        elif replica["hostId"] == "":
            v4_failed_replica_count += 1

    assert v4_node1_replica_count == 1
    assert v4_node2_replica_count == 1
    assert v4_failed_replica_count > 0

    volume4 = volume4.updateReplicaCount(replicaCount=2)

    volume4 = wait_for_volume_replica_count(client, volume4_name, 3)

    v4_node1_replica_count = 0
    v4_node2_replica_count = 0
    v4_failed_replica_count = 0

    for replica in volume4.replicas:
        if replica["hostId"] == node1.name:
            v4_node1_replica_count += 1
        elif replica["hostId"] == node2.name:
            v4_node2_replica_count += 1
        elif replica["hostId"] == "":
            v4_failed_replica_count += 1

    assert v4_node1_replica_count == 1
    assert v4_node2_replica_count == 1
    assert v4_failed_replica_count > 0

    volume4 = volume4.updateReplicaCount(replicaCount=1)

    volume4 = wait_for_volume_replica_count(client, volume4_name, 2)

    v4_node1_replica_count = 0
    v4_node2_replica_count = 0
    v4_failed_replica_count = 0

    for replica in volume4.replicas:
        if replica["hostId"] == node1.name:
            v4_node1_replica_count += 1
        elif replica["hostId"] == node2.name:
            v4_node2_replica_count += 1
        elif replica["hostId"] == "":
            v4_failed_replica_count += 1

    assert v4_node1_replica_count + v4_node2_replica_count == 1
    assert v4_failed_replica_count == 1

    volume4 = volume4.updateDataLocality(dataLocality="disabled")
    volume4 = volume4.updateReplicaCount(replicaCount=2)

    running_replica_count = 0
    for _ in range(RETRY_COUNTS):
        volume4 = client.by_id_volume(volume4_name)
        running_replica_count = 0
        for r in volume4.replicas:
            if r.failedAt == "" and r.running is True:
                running_replica_count += 1
        if running_replica_count == 2:
            break
        time.sleep(RETRY_INTERVAL)
    assert running_replica_count == 2

    v4_node1_replica_count = 0
    v4_node2_replica_count = 0
    v4_node3_replica_count = 0

    for replica in volume4.replicas:
        wait_for_replica_running(client, volume4_name, replica["name"])
        if replica["hostId"] == node1.name:
            v4_node1_replica_count += 1
        elif replica["hostId"] == node2.name:
            v4_node2_replica_count += 1
        elif replica["hostId"] == node3.name:
            v4_node3_replica_count += 1
    assert v4_node1_replica_count == 1
    assert v4_node2_replica_count == 1
    assert v4_node3_replica_count == 0