Exemple #1
0
def test_replica_scheduler_large_volume_fit_small_disk(client):  # NOQA
    nodes = client.list_node()
    # create a small size disk on current node
    lht_hostId = get_self_host_id()
    node = client.by_id_node(lht_hostId)
    small_disk_path = create_host_disk(client, "vol-small", SIZE, lht_hostId)
    small_disk = {"path": small_disk_path, "allowScheduling": True}
    update_disks = get_update_disks(node["disks"])
    update_disks.append(small_disk)
    node = node.diskUpdate(disks=update_disks)
    node = common.wait_for_disk_update(client, lht_hostId, len(update_disks))
    assert len(node["disks"]) == len(update_disks)

    unexpected_disk = {}
    for fsid, disk in node["disks"].iteritems():
        if disk["path"] == small_disk_path:
            unexpected_disk["fsid"] = fsid
            unexpected_disk["path"] = disk["path"]
            break

    # volume is too large to fill into small size disk on current node
    vol_name = common.generate_volume_name()
    volume = create_volume(client, vol_name, str(Gi), lht_hostId, len(nodes))

    nodes = client.list_node()
    node_hosts = []
    for node in nodes:
        node_hosts.append(node["name"])

    # check replica on current node shouldn't schedule to small disk
    for replica in volume["replicas"]:
        id = replica["hostId"]
        assert id != ""
        assert replica["running"]
        if id == lht_hostId:
            assert replica["diskID"] != unexpected_disk["fsid"]
            assert replica["dataPath"] != unexpected_disk["path"]
        node_hosts = filter(lambda x: x != id, node_hosts)
    assert len(node_hosts) == 0

    cleanup_volume(client, vol_name)

    # cleanup test disks
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    disk = disks[unexpected_disk["fsid"]]
    disk["allowScheduling"] = False
    update_disks = get_update_disks(disks)
    node = node.diskUpdate(disks=update_disks)
    node = wait_for_disk_status(client, lht_hostId, unexpected_disk["fsid"],
                                "allowScheduling", False)
    disks = node["disks"]
    disk = disks[unexpected_disk["fsid"]]
    assert not disk["allowScheduling"]
    disks.pop(unexpected_disk["fsid"])
    update_disks = get_update_disks(disks)
    node.diskUpdate(disks=update_disks)
    cleanup_host_disk(client, 'vol-small')
def test_node_evicted(client, volume_name, request):  # NOQA
    """
    Test the orphan CR is deleted in background but on-disk data still exists
    if the node is evicted
    1. Create a new-disk for holding valid and invalid orphaned replica
       directories
    2. Create a volume and attach to the current node
    3. Create a valid orphaned replica directories by copying the active
       replica directory
    4. Clean up volume
    5. Verify orphan list contains the valid orphaned replica directory
    6. Evict the node containing the orphaned replica directory
    7. Verify the orphan CR is deleted in background, but the on-disk
       orphaned replica directory still exists
    8. Disable node eviction
    9. Verify the orphan CR is created again and the on-disk orphaned replica
       directory still exists
    """

    disk_names = ["vol-disk-" + generate_random_id(4)]

    # Step 1
    lht_hostId = get_self_host_id()
    cleanup_node_disks(client, lht_hostId)
    disk_paths = crate_disks_on_host(client, disk_names, request)

    # Step 2
    volume = create_volume_with_replica_on_host(client, volume_name)

    # Step 3
    create_orphaned_directories_on_host(volume, disk_paths, 1)

    # Step 4
    cleanup_volume_by_name(client, volume_name)

    # Step 5
    assert wait_for_orphan_count(client, 1, 180) == 1

    # Step 6: request node eviction
    node = client.by_id_node(lht_hostId)
    client.update(node, allowScheduling=False, evictionRequested=True)
    node = wait_for_node_update(client, lht_hostId, "allowScheduling", False)

    # Step 7
    assert wait_for_orphan_count(client, 0, 180) == 0
    assert wait_for_file_count(os.path.join(disk_paths[0], "replicas"), 1,
                               180) == 1

    # Step 8: Disable node eviction
    node = client.by_id_node(lht_hostId)
    client.update(node, allowScheduling=True, evictionRequested=False)
    node = wait_for_node_update(client, lht_hostId, "allowScheduling", True)

    # Step 9
    assert wait_for_orphan_count(client, 1, 180) == 1
    assert wait_for_file_count(os.path.join(disk_paths[0], "replicas"), 1,
                               180) == 1
Exemple #3
0
def test_node_controller_sync_storage_available(client):  # NOQA
    lht_hostId = get_self_host_id()
    # create a disk to test storageAvailable
    node = client.by_id_node(lht_hostId)
    test_disk_path = create_host_disk(client, "vol-test", SIZE, lht_hostId)
    test_disk = {"path": test_disk_path, "allowScheduling": True}
    update_disks = get_update_disks(node["disks"])
    update_disks.append(test_disk)
    node = node.diskUpdate(disks=update_disks)
    node = common.wait_for_disk_update(client, lht_hostId, len(update_disks))
    assert len(node["disks"]) == len(update_disks)

    # write specified byte data into disk
    test_file_path = os.path.join(test_disk_path, TEST_FILE)
    if os.path.exists(test_file_path):
        os.remove(test_file_path)
    cmd = ['dd', 'if=/dev/zero', 'of=' + test_file_path, 'bs=1M', 'count=1']
    subprocess.check_call(cmd)
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    # wait for node controller update disk status
    expect_disk = {}
    free, total = common.get_host_disk_size(test_disk_path)
    for fsid, disk in disks.iteritems():
        if disk["path"] == test_disk_path:
            node = wait_for_disk_status(client, lht_hostId, fsid,
                                        "storageAvailable", free)
            expect_disk = node["disks"][fsid]
            break

    assert expect_disk["storageAvailable"] == free

    os.remove(test_file_path)
    # cleanup test disks
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    wait_fsid = ''
    for fsid, disk in disks.iteritems():
        if disk["path"] == test_disk_path:
            wait_fsid = fsid
            disk["allowScheduling"] = False

    update_disks = get_update_disks(disks)
    node = node.diskUpdate(disks=update_disks)
    node = wait_for_disk_status(client, lht_hostId, wait_fsid,
                                "allowScheduling", False)
    disks = node["disks"]
    for fsid, disk in disks.iteritems():
        if disk["path"] == test_disk_path:
            disks.pop(fsid)
            break
    update_disks = get_update_disks(disks)
    node = node.diskUpdate(disks=update_disks)
    node = wait_for_disk_update(client, lht_hostId, len(update_disks))
    assert len(node["disks"]) == len(update_disks)
    cleanup_host_disk(client, 'vol-test')
Exemple #4
0
def test_node_controller_sync_storage_available(client):  # NOQA
    lht_hostId = get_self_host_id()
    # create a disk to test storageAvailable
    node = client.by_id_node(lht_hostId)
    test_disk_path = create_host_disk(client, "vol-test", SIZE, lht_hostId)
    test_disk = {"path": test_disk_path, "allowScheduling": True}
    update_disks = get_update_disks(node["disks"])
    update_disks.append(test_disk)
    node = node.diskUpdate(disks=update_disks)
    node = common.wait_for_disk_update(client, lht_hostId, len(update_disks))
    assert len(node["disks"]) == len(update_disks)

    # write specified byte data into disk
    test_file_path = os.path.join(test_disk_path, TEST_FILE)
    if os.path.exists(test_file_path):
        os.remove(test_file_path)
    cmd = ['dd', 'if=/dev/zero', 'of=' + test_file_path, 'bs=1M', 'count=1']
    subprocess.check_call(cmd)
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    # wait for node controller update disk status
    expect_disk = {}
    free, total = common.get_host_disk_size(test_disk_path)
    for fsid, disk in disks.iteritems():
        if disk["path"] == test_disk_path:
            node = wait_for_disk_status(client, lht_hostId, fsid,
                                        "storageAvailable", free)
            expect_disk = node["disks"][fsid]
            break

    assert expect_disk["storageAvailable"] == free

    os.remove(test_file_path)
    # cleanup test disks
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    wait_fsid = ''
    for fsid, disk in disks.iteritems():
        if disk["path"] == test_disk_path:
            wait_fsid = fsid
            disk["allowScheduling"] = False

    update_disks = get_update_disks(disks)
    node = node.diskUpdate(disks=update_disks)
    node = wait_for_disk_status(client, lht_hostId, wait_fsid,
                                "allowScheduling", False)
    disks = node["disks"]
    for fsid, disk in disks.iteritems():
        if disk["path"] == test_disk_path:
            disks.pop(fsid)
            break
    update_disks = get_update_disks(disks)
    node = node.diskUpdate(disks=update_disks)
    node = wait_for_disk_update(client, lht_hostId, len(update_disks))
    assert len(node["disks"]) == len(update_disks)
    cleanup_host_disk(client, 'vol-test')
Exemple #5
0
def test_replica_scheduler_no_disks(client):  # NOQA
    nodes = client.list_node()
    # delete all disks on each node
    for node in nodes:
        disks = node["disks"]
        name = node["name"]
        # set allowScheduling to false
        for fsid, disk in disks.iteritems():
            disk["allowScheduling"] = False
        update_disks = get_update_disks(disks)
        node = node.diskUpdate(disks=update_disks)
        for fsid, disk in node["disks"].iteritems():
            # wait for node controller update disk status
            wait_for_disk_status(client, name, fsid, "allowScheduling", False)
            wait_for_disk_status(client, name, fsid, "storageScheduled", 0)

        node = client.by_id_node(name)
        for fsid, disk in node["disks"].iteritems():
            assert not disk["allowScheduling"]
        node = node.diskUpdate(disks=[])
        node = common.wait_for_disk_update(client, name, 0)
        assert len(node["disks"]) == 0

    # test there's no disk fit for volume
    vol_name = common.generate_volume_name()
    volume = client.create_volume(name=vol_name,
                                  size=SIZE,
                                  numberOfReplicas=len(nodes))
    volume = common.wait_for_volume_condition_scheduled(
        client, vol_name, "status", CONDITION_STATUS_FALSE)
    client.delete(volume)
    common.wait_for_volume_delete(client, vol_name)
Exemple #6
0
def test_replica_scheduler_no_disks(client):  # NOQA
    nodes = client.list_node()
    # delete all disks on each node
    for node in nodes:
        disks = node["disks"]
        name = node["name"]
        # set allowScheduling to false
        for fsid, disk in disks.iteritems():
            disk["allowScheduling"] = False
        update_disks = get_update_disks(disks)
        node = node.diskUpdate(disks=update_disks)
        for fsid, disk in node["disks"].iteritems():
            # wait for node controller update disk status
            wait_for_disk_status(client, name, fsid,
                                 "allowScheduling", False)
            wait_for_disk_status(client, name, fsid,
                                 "storageScheduled", 0)

        node = client.by_id_node(name)
        for fsid, disk in node["disks"].iteritems():
            assert not disk["allowScheduling"]
        node = node.diskUpdate(disks=[])
        node = common.wait_for_disk_update(client, name, 0)
        assert len(node["disks"]) == 0

    # test there's no disk fit for volume
    vol_name = common.generate_volume_name()
    volume = client.create_volume(name=vol_name,
                                  size=SIZE, numberOfReplicas=len(nodes))
    volume = common.wait_for_volume_condition_scheduled(client, vol_name,
                                                        "status",
                                                        CONDITION_STATUS_FALSE)
    client.delete(volume)
    common.wait_for_volume_delete(client, vol_name)
Exemple #7
0
def test_soft_anti_affinity_detach(client, volume_name):  # NOQA
    """
    Test that volumes with Soft Anti-Affinity can detach and reattach to a
    node properly.
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY)
    client.update(setting, value="true")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    replica_names = map(lambda replica: replica.name, volume["replicas"])
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica["name"])
    wait_new_replica_ready(client, volume_name, replica_names)
    volume = wait_for_volume_healthy(client, volume_name)
    volume.detach()
    volume = wait_for_volume_detached(client, volume_name)
    assert len(volume["replicas"]) == 3

    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
Exemple #8
0
def test_soft_anti_affinity_scheduling(client, volume_name):  # NOQA
    """
    Test that volumes with Soft Anti-Affinity work as expected.

    With Soft Anti-Affinity, a new replica should still be scheduled on a node
    with an existing replica, which will result in "Healthy" state but limited
    redundancy.
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY)
    client.update(setting, value="true")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    replica_names = map(lambda replica: replica.name, volume["replicas"])
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica["name"])
    wait_new_replica_ready(client, volume_name, replica_names)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
Exemple #9
0
def test_hard_anti_affinity_offline_rebuild(client, volume_name):  # NOQA
    """
    Test that volumes with Hard Anti-Affinity can build new replicas during
    the attaching process once a valid node is available.

    Once a new replica has been built as part of the attaching process, the
    volume should be Healthy again.
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    replica_names = map(lambda replica: replica.name, volume["replicas"])
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica["name"])
    volume = wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    volume.detach()
    volume = wait_for_volume_detached(client, volume_name)
    client.update(node, allowScheduling=True)
    volume.attach(hostId=host_id)
    wait_new_replica_ready(client, volume_name, replica_names)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
Exemple #10
0
def test_hard_anti_affinity_live_rebuild(client, volume_name):  # NOQA
    """
    Test that volumes with Hard Anti-Affinity can build new replicas live once
    a valid node is available.

    If no nodes without existing replicas are available, the volume should
    remain in "Degraded" state. However, once one is available, the replica
    should now be scheduled successfully, with the volume returning to
    "Healthy" state.
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    replica_names = map(lambda replica: replica.name, volume["replicas"])
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica["name"])
    wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    # Allow scheduling on host node again
    client.update(node, allowScheduling=True)
    wait_new_replica_ready(client, volume_name, replica_names)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
Exemple #11
0
def reset_settings():
    yield
    client = get_longhorn_api_client()  # NOQA
    host_id = get_self_host_id()
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=True)
    setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(setting, value="true")
Exemple #12
0
def test_hard_anti_affinity_detach(client, volume_name):  # NOQA
    """
    Test that volumes with Hard Anti-Affinity are still able to detach and
    reattach to a node properly, even in degraded state.

    1. Create a volume and attach to the current node
    2. Generate and write `data` to the volume.
    3. Set `soft anti-affinity` to false
    4. Disable current node's scheduling.
    5. Remove the replica on the current node
        1. Verify volume will be in degraded state.
        2. Verify volume reports condition `scheduled == false`
    6. Detach the volume.
    7. Verify that volume only have 2 replicas
        1. Unhealthy replica will be removed upon detach.
    8. Attach the volume again.
        1. Verify volume will be in degraded state.
        2. Verify volume reports condition `scheduled == false`
        3. Verify only two of three replicas of volume are healthy.
        4. Verify the remaining replica doesn't have `replica.HostID`, meaning
        it's unscheduled
    9. Check volume `data`
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica.name)
    volume = wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    volume.detach()
    volume = wait_for_volume_detached(client, volume_name)
    assert len(volume.replicas) == 2

    volume.attach(hostId=host_id)
    # Make sure we're still not getting another successful replica.
    volume = wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    assert sum([
        1 for replica in volume.replicas
        if replica.running and replica.mode == "RW"
    ]) == 2
    assert sum([1 for replica in volume.replicas if not replica.hostId]) == 1
    assert len(volume.replicas) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
Exemple #13
0
def get_zone_replica_count(client, volume_name, zone_name):  # NOQA
    volume = client.by_id_volume(volume_name)

    zone_replica_count = 0
    for replica in volume.replicas:
        replica_host_id = replica.hostId
        replica_host_zone = client.by_id_node(replica_host_id).zone
        if replica_host_zone == zone_name:
            zone_replica_count += 1
    return zone_replica_count
Exemple #14
0
def test_update_node(client):  # NOQA
    # test node update
    nodes = client.list_node()
    assert len(nodes) > 0

    lht_hostId = get_self_host_id()
    node = client.by_id_node(lht_hostId)
    node = client.update(node, allowScheduling=False)
    node = common.wait_for_node_update(client, lht_hostId, "allowScheduling",
                                       False)
    assert not node["allowScheduling"]
    node = client.by_id_node(lht_hostId)
    assert not node["allowScheduling"]

    node = client.update(node, allowScheduling=True)
    node = common.wait_for_node_update(client, lht_hostId, "allowScheduling",
                                       True)
    assert node["allowScheduling"]
    node = client.by_id_node(lht_hostId)
    assert node["allowScheduling"]
Exemple #15
0
def test_update_node(client):  # NOQA
    # test node update
    nodes = client.list_node()
    assert len(nodes) > 0

    lht_hostId = get_self_host_id()
    node = client.by_id_node(lht_hostId)
    node = client.update(node, allowScheduling=False)
    node = common.wait_for_node_update(client, lht_hostId,
                                       "allowScheduling", False)
    assert not node["allowScheduling"]
    node = client.by_id_node(lht_hostId)
    assert not node["allowScheduling"]

    node = client.update(node, allowScheduling=True)
    node = common.wait_for_node_update(client, lht_hostId,
                                       "allowScheduling", True)
    assert node["allowScheduling"]
    node = client.by_id_node(lht_hostId)
    assert node["allowScheduling"]
def get_zone_replica_count(client, volume_name, zone_name, chk_running=False): # NOQA
    volume = client.by_id_volume(volume_name)

    zone_replica_count = 0
    for replica in volume.replicas:
        if chk_running and not replica.running:
            continue
        replica_host_id = replica.hostId
        replica_host_zone = client.by_id_node(replica_host_id).zone
        if replica_host_zone == zone_name:
            zone_replica_count += 1
    return zone_replica_count
def test_tag_basic(client):  # NOQA
    """
    Test that applying Tags to Nodes/Disks and retrieving them work as
    expected. Ensures that Tags are properly validated when updated.

    1. Generate tags and apply to the disk and nodes
    2. Make sure the tags are applied
    3. Try to apply invalid tags to the disk and node. Action will fail.
    """
    host_id = get_self_host_id()
    node = client.by_id_node(host_id)
    disks = get_update_disks(node.disks)
    assert len(node.disks) == 1
    assert len(node.disks[list(node.disks)[0]].tags) == 0, f" disks = {disks}"
    assert len(node.tags) == 0

    unsorted_disk, sorted_disk = generate_unordered_tag_names()
    unsorted_node, sorted_node = generate_unordered_tag_names()
    update_disks = get_update_disks(node.disks)
    update_disks[list(update_disks)[0]].tags = unsorted_disk
    node = update_node_disks(client, node.name, disks=update_disks)
    disks = get_update_disks(node.disks)
    assert disks[list(disks)[0]].tags == sorted_disk

    node = set_node_tags(client, node, unsorted_node)
    assert node.tags == sorted_node

    improper_tag_cases = [
        [""],  # Empty string
        [" "],  # Whitespace
        ["/"],  # Leading /
        [","],  # Illegal character
    ]
    for tags in improper_tag_cases:
        with pytest.raises(Exception) as e:
            set_node_tags(client, node, tags)
        assert "at least one error encountered while validating tags" in \
               str(e.value)
        with pytest.raises(Exception) as e:
            update_disks = get_update_disks(node.disks)
            update_disks[list(update_disks)[0]].tags = tags
            update_node_disks(client, node.name, disks=update_disks)
        assert "at least one error encountered while validating tags" in \
               str(e.value)

    update_disks = get_update_disks(node.disks)
    update_disks[list(update_disks)[0]].tags = []
    node = update_node_disks(client, node.name, disks=update_disks)
    disks = get_update_disks(node.disks)
    assert len(node.disks[list(node.disks)[0]].tags) == 0, f"disks = {disks}"

    node = set_node_tags(client, node)
    assert len(node.tags) == 0
Exemple #18
0
def test_hard_anti_affinity_scheduling(client, volume_name):  # NOQA
    """
    Test that volumes with Hard Anti-Affinity work as expected.

    With Hard Anti-Affinity, scheduling on nodes with existing replicas should
    be forbidden, resulting in "Degraded" state.

    1. Create a volume and attach to the current node
    2. Generate and write `data` to the volume.
    3. Set `soft anti-affinity` to false
    4. Disable current node's scheduling.
    5. Remove the replica on the current node
        1. Verify volume will be in degraded state.
        2. Verify volume reports condition `scheduled == false`
        3. Verify only two of three replicas of volume are healthy.
        4. Verify the remaining replica doesn't have `replica.HostID`, meaning
        it's unscheduled
    6. Check volume `data`
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica.name)
    # Instead of waiting for timeout and lengthening the tests a significant
    # amount we can make sure the scheduling isn't working by making sure the
    # volume becomes Degraded and reports a scheduling error.
    wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    # While there are three replicas that should exist to meet the Volume's
    # request, only two of those volumes should actually be Healthy.
    volume = client.by_id_volume(volume_name)
    assert sum([
        1 for replica in volume.replicas
        if replica.running and replica.mode == "RW"
    ]) == 2
    # Confirm that the final volume is an unscheduled volume.
    assert sum([1 for replica in volume.replicas if not replica.hostId]) == 1
    # Three replicas in total should still exist.
    assert len(volume.replicas) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
Exemple #19
0
def wait_longhorn_node_zone_updated(client):  # NOQA

    lh_nodes = client.list_node()
    node_names = map(lambda node: node.name, lh_nodes)

    for node_name in node_names:
        for j in range(RETRY_COUNTS):
            lh_node = client.by_id_node(node_name)
            if lh_node.zone != '':
                break
            time.sleep(RETRY_INTERVAL)

        assert lh_node.zone != ''
Exemple #20
0
def test_tag_basic(client):  # NOQA
    """
    Test that applying Tags to Nodes/Disks and retrieving them work as
    expected. Ensures that Tags are properly validated when updated.
    """
    host_id = get_self_host_id()
    node = client.by_id_node(host_id)
    disks = get_update_disks(node["disks"])
    assert len(node["disks"]) == 1
    assert disks[0]["tags"] is None
    assert node["tags"] is None

    unsorted_disk, sorted_disk = generate_unordered_tag_names()
    unsorted_node, sorted_node = generate_unordered_tag_names()
    update_disks = get_update_disks(node["disks"])
    update_disks[0]["tags"] = unsorted_disk
    node = node.diskUpdate(disks=update_disks)
    disks = get_update_disks(node["disks"])
    assert disks[0]["tags"] == sorted_disk

    node = set_node_tags(client, node, unsorted_node)
    assert node["tags"] == sorted_node

    improper_tag_cases = [
        [""],   # Empty string
        [" "],  # Whitespace
        ["/"],  # Leading /
        [","],  # Illegal character
    ]
    for tags in improper_tag_cases:
        with pytest.raises(Exception) as e:
            set_node_tags(client, node, tags)
        assert "at least one error encountered while validating tags" in \
               str(e.value)
        with pytest.raises(Exception) as e:
            update_disks = get_update_disks(node["disks"])
            update_disks[0]["tags"] = tags
            node.diskUpdate(disks=update_disks)
        assert "at least one error encountered while validating tags" in \
               str(e.value)

    update_disks = get_update_disks(node["disks"])
    update_disks[0]["tags"] = []
    node = node.diskUpdate(disks=update_disks)
    disks = get_update_disks(node["disks"])
    assert disks[0]["tags"] is None

    node = set_node_tags(client, node)
    assert node["tags"] is None
def delete_extra_disks_on_host(client, disk_names):  # NOQA
    lht_hostId = get_self_host_id()

    node = client.by_id_node(lht_hostId)
    update_disk = get_update_disks(node.disks)

    for disk_name in disk_names:
        update_disk[disk_name].allowScheduling = False
        update_disk[disk_name].evictionRequested = True

    node = node.diskUpdate(disks=update_disk)

    for disk_name in disk_names:
        wait_for_disk_status(client, lht_hostId, disk_name, "storageScheduled",
                             0)
Exemple #22
0
def test_hard_anti_affinity_offline_rebuild(client, volume_name):  # NOQA
    """
    Test that volumes with Hard Anti-Affinity can build new replicas during
    the attaching process once a valid node is available.

    Once a new replica has been built as part of the attaching process, the
    volume should be Healthy again.

    1. Create a volume and attach to the current node
    2. Generate and write `data` to the volume.
    3. Set `soft anti-affinity` to false
    4. Disable current node's scheduling.
    5. Remove the replica on the current node
        1. Verify volume will be in degraded state.
        2. Verify volume reports condition `scheduled == false`
    6. Detach the volume.
    7. Enable current node's scheduling.
    8. Attach the volume again.
    9. Wait for volume to become healthy with 3 replicas
    10. Check volume `data`
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    replica_names = map(lambda replica: replica.name, volume.replicas)
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica.name)
    volume = wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    volume.detach()
    volume = wait_for_volume_detached(client, volume_name)
    client.update(node, allowScheduling=True)
    volume.attach(hostId=host_id)
    wait_new_replica_ready(client, volume_name, replica_names)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
Exemple #23
0
def test_tag_scheduling_on_update(client, node_default_tags, volume_name):  # NOQA
    """
    Test that Replicas get scheduled if a Node/Disk disks updated with the
    proper Tags.
    """
    tag_spec = {
        "disk": ["ssd", "m2"],
        "expected": 1,
        "node": ["main", "fallback"]
    }
    client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=3,
                         diskSelector=tag_spec["disk"],
                         nodeSelector=tag_spec["node"])
    volume = wait_for_volume_detached(client, volume_name)
    assert volume["diskSelector"] == tag_spec["disk"]
    assert volume["nodeSelector"] == tag_spec["node"]

    wait_scheduling_failure(client, volume_name)

    host_id = get_self_host_id()
    node = client.by_id_node(host_id)
    update_disks = get_update_disks(node["disks"])
    update_disks[0]["tags"] = tag_spec["disk"]
    node = node.diskUpdate(disks=update_disks)
    set_node_tags(client, node, tag_spec["node"])
    scheduled = False
    for i in range(RETRY_COUNTS):
        v = client.by_id_volume(volume_name)
        if v["conditions"]["scheduled"]["status"] == "True":
            scheduled = True
        if scheduled:
            break
        sleep(RETRY_INTERVAL)
    assert scheduled

    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    nodes = client.list_node()
    node_mapping = {node["id"]: {
        "disk": get_update_disks(node["disks"])[0]["tags"],
        "node": node["tags"]
    } for node in nodes}
    assert len(volume["replicas"]) == 3
    check_volume_replicas(volume, tag_spec, node_mapping)

    cleanup_volume(client, volume)
Exemple #24
0
def test_hard_anti_affinity_live_rebuild(client, volume_name):  # NOQA
    """
    Test that volumes with Hard Anti-Affinity can build new replicas live once
    a valid node is available.

    If no nodes without existing replicas are available, the volume should
    remain in "Degraded" state. However, once one is available, the replica
    should now be scheduled successfully, with the volume returning to
    "Healthy" state.

    1. Create a volume and attach to the current node
    2. Generate and write `data` to the volume.
    3. Set `soft anti-affinity` to false
    4. Disable current node's scheduling.
    5. Remove the replica on the current node
        1. Verify volume will be in degraded state.
        2. Verify volume reports condition `scheduled == false`
    6. Enable the current node's scheduling
    7. Wait for volume to start rebuilding and become healthy again
    8. Check volume `data`
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    replica_names = map(lambda replica: replica.name, volume.replicas)
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica.name)
    wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    # Allow scheduling on host node again
    client.update(node, allowScheduling=True)
    wait_new_replica_ready(client, volume_name, replica_names)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
Exemple #25
0
def test_soft_anti_affinity_detach(client, volume_name):  # NOQA
    """
    Test that volumes with Soft Anti-Affinity can detach and reattach to a
    node properly.

    1. Create a volume and attach to the current node.
    2. Generate and write `data` to the volume
    3. Set `soft anti-affinity` to true
    4. Disable current node's scheduling.
    5. Remove the replica on the current node
    6. Wait for the new replica to be rebuilt
    7. Detach the volume.
    8. Verify there are 3 replicas
    9. Attach the volume again. Verify there are still 3 replicas
    10. Verify the `data`.
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(setting, value="true")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    replica_names = list(map(lambda replica: replica.name, volume.replicas))
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica.name)
    wait_new_replica_ready(client, volume_name, replica_names)
    volume = wait_for_volume_healthy(client, volume_name)
    volume.detach()
    volume = wait_for_volume_detached(client, volume_name)
    assert len(volume.replicas) == 3

    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
Exemple #26
0
def test_hard_anti_affinity_scheduling(client, volume_name):  # NOQA
    """
    Test that volumes with Hard Anti-Affinity work as expected.

    With Hard Anti-Affinity, scheduling on nodes with existing replicas should
    be forbidden, resulting in "Degraded" state.
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica["name"])
    # Instead of waiting for timeout and lengthening the tests a significant
    # amount we can make sure the scheduling isn't working by making sure the
    # volume becomes Degraded and reports a scheduling error.
    volume = wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    # While there are three replicas that should exist to meet the Volume's
    # request, only two of those volumes should actually be Healthy.
    assert sum([
        1 for replica in volume["replicas"]
        if replica["running"] and replica["mode"] == "RW"
    ]) == 2
    # Confirm that the final volume is an unscheduled volume.
    assert sum([1 for replica in volume["replicas"]
                if not replica["hostId"]]) == 1
    # Three replicas in total should still exist.
    assert len(volume["replicas"]) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
Exemple #27
0
def test_hard_anti_affinity_detach(client, volume_name):  # NOQA
    """
    Test that volumes with Hard Anti-Affinity are still able to detach and
    reattach to a node properly, even in degraded state.
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume["replicas"]) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_SOFT_ANTI_AFFINITY)
    client.update(setting, value="false")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica["name"])
    volume = wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    volume.detach()
    volume = wait_for_volume_detached(client, volume_name)
    assert len(volume["replicas"]) == 2

    volume.attach(hostId=host_id)
    # Make sure we're still not getting another successful replica.
    volume = wait_for_volume_degraded(client, volume_name)
    wait_scheduling_failure(client, volume_name)
    assert sum([
        1 for replica in volume["replicas"]
        if replica["running"] and replica["mode"] == "RW"
    ]) == 2
    assert sum([1 for replica in volume["replicas"]
                if not replica["hostId"]]) == 1
    assert len(volume["replicas"]) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
Exemple #28
0
def test_soft_anti_affinity_scheduling(client, volume_name):  # NOQA
    """
    Test that volumes with Soft Anti-Affinity work as expected.

    With Soft Anti-Affinity, a new replica should still be scheduled on a node
    with an existing replica, which will result in "Healthy" state but limited
    redundancy.

    1. Create a volume and attach to the current node
    2. Generate and write `data` to the volume.
    3. Set `soft anti-affinity` to true
    4. Disable current node's scheduling.
    5. Remove the replica on the current node
    6. Wait for the volume to complete rebuild. Volume should have 3 replicas.
    7. Verify `data`
    """
    volume = create_and_check_volume(client, volume_name)
    host_id = get_self_host_id()
    volume.attach(hostId=host_id)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3

    data = write_volume_random_data(volume)
    setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(setting, value="true")
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=False)
    replica_names = list(map(lambda replica: replica.name, volume.replicas))
    host_replica = get_host_replica(volume, host_id)

    volume.replicaRemove(name=host_replica.name)
    wait_new_replica_ready(client, volume_name, replica_names)
    volume = wait_for_volume_healthy(client, volume_name)
    assert len(volume.replicas) == 3
    check_volume_data(volume, data)

    cleanup_volume(client, volume)
def crate_disks_on_host(client, disk_names, request):  # NOQA
    disk_paths = []

    lht_hostId = get_self_host_id()
    node = client.by_id_node(lht_hostId)
    update_disks = get_update_disks(node.disks)

    for name in disk_names:
        disk_path = create_host_disk(client, name, str(Gi), lht_hostId)
        disk = {"path": disk_path, "allowScheduling": True}
        update_disks[name] = disk
        disk_paths.append(disk_path)

    node = node.diskUpdate(disks=update_disks)
    node = wait_for_disk_update(client, node.name, len(update_disks))

    def finalizer():
        delete_extra_disks_on_host(client, disk_names)
        for disk_name in disk_names:
            cleanup_host_disks(client, disk_name)

    request.addfinalizer(finalizer)

    return disk_paths
Exemple #30
0
def test_node_disk_update(client):  # NOQA
    lht_hostId = get_self_host_id()
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    # test add same disk by different mount path exception
    with pytest.raises(Exception) as e:
        disk = {
            "path": "/var/lib",
            "allowScheduling": True,
            "storageReserved": 2 * Gi
        }
        update_disk = get_update_disks(disks)
        update_disk.append(disk)
        node = node.diskUpdate(disks=update_disk)
    assert "the same file system" in str(e.value)

    # test delete disk exception
    with pytest.raises(Exception) as e:
        node.diskUpdate(disks=[])
    assert "disable the disk" in str(e.value)

    # test storageReserved invalid exception
    with pytest.raises(Exception) as e:
        for fsid, disk in disks.iteritems():
            disk["storageReserved"] = disk["storageMaximum"] + 1 * Gi
        update_disk = get_update_disks(disks)
        node.diskUpdate(disks=update_disk)
    assert "storageReserved setting of disk" in str(e.value)

    # create multiple disks for node
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    disk_path1 = create_host_disk(client, 'vol-disk-1', str(Gi), lht_hostId)
    disk1 = {"path": disk_path1, "allowScheduling": True}
    disk_path2 = create_host_disk(client, 'vol-disk-2', str(Gi), lht_hostId)
    disk2 = {"path": disk_path2, "allowScheduling": True}

    update_disk = get_update_disks(disks)
    # add new disk for node
    update_disk.append(disk1)
    update_disk.append(disk2)

    # save disks to node
    node = node.diskUpdate(disks=update_disk)
    node = common.wait_for_disk_update(client, lht_hostId, len(update_disk))
    assert len(node["disks"]) == len(update_disk)
    node = client.by_id_node(lht_hostId)
    assert len(node["disks"]) == len(update_disk)

    # update disk
    disks = node["disks"]
    update_disk = get_update_disks(disks)
    for disk in update_disk:
        # keep default disk for other tests
        if disk["path"] == disk_path1 or disk["path"] == disk_path2:
            disk["allowScheduling"] = False
            disk["storageReserved"] = SMALL_DISK_SIZE
    node = node.diskUpdate(disks=update_disk)
    disks = node["disks"]
    # wait for node controller to update disk status
    for fsid, disk in disks.iteritems():
        if disk["path"] == disk_path1 or disk["path"] == disk_path2:
            wait_for_disk_status(client, lht_hostId, fsid, "allowScheduling",
                                 False)
            wait_for_disk_status(client, lht_hostId, fsid, "storageReserved",
                                 SMALL_DISK_SIZE)
            free, total = common.get_host_disk_size(disk_path1)
            wait_for_disk_status(client, lht_hostId, fsid, "storageAvailable",
                                 free)

    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    for key, disk in disks.iteritems():
        if disk["path"] == disk_path1:
            assert not disk["allowScheduling"]
            assert disk["storageReserved"] == SMALL_DISK_SIZE
            assert disk["storageScheduled"] == 0
            free, total = common.get_host_disk_size(disk_path1)
            assert disk["storageMaximum"] == total
            assert disk["storageAvailable"] == free
        elif disk["path"] == disk_path2:
            assert not disk["allowScheduling"]
            assert disk["storageReserved"] == SMALL_DISK_SIZE
            assert disk["storageScheduled"] == 0
            free, total = common.get_host_disk_size(disk_path2)
            assert disk["storageMaximum"] == total
            assert disk["storageAvailable"] == free

    # delete other disks, just remain default disk
    update_disk = get_update_disks(disks)
    remain_disk = []
    for disk in update_disk:
        if disk["path"] != disk_path1 and disk["path"] != disk_path2:
            remain_disk.append(disk)
    node = node.diskUpdate(disks=remain_disk)
    node = wait_for_disk_update(client, lht_hostId, len(remain_disk))
    assert len(node["disks"]) == len(remain_disk)
    # cleanup disks
    cleanup_host_disk(client, 'vol-disk-1', 'vol-disk-2')
Exemple #31
0
def test_setting_backing_image_auto_cleanup(client, core_api,
                                            volume_name):  # NOQA
    """
    Test that the Backing Image Cleanup Wait Interval setting works correctly.

    The default value of setting `BackingImageCleanupWaitInterval` is 60.

    1. Create a backing image.
    2. Create multiple volumes using the backing image.
    3. Attach all volumes, Then:
        1. Wait for all volumes can become running.
        2. Verify the correct in all volumes.
        3. Verify the backing image disk status map.
        4. Verify the only backing image file in each disk is reused by
           multiple replicas. The backing image file path is
           `<Data path>/<The backing image name>/backing`
    4. Unschedule test node to guarantee when replica removed from test node,
       no new replica can be rebuilt on the test node.
    5. Remove all replicas in one disk.
       Wait for 50 seconds.
       Then verify nothing changes in the backing image disk state map
       (before the cleanup wait interval is passed).
    6. Modify `BackingImageCleanupWaitInterval` to a small value. Then verify:
        1. The download state of the disk containing no replica becomes
           terminating first, and the entry will be removed from the map later.
        2. The related backing image file is removed.
        3. The download state of other disks keep unchanged.
           All volumes still work fine.
    7. Delete all volumes. Verify that there will only remain 1 entry in the
       backing image disk map
    8. Delete the backing image.
    """

    # Step 1
    create_backing_image_with_matching_url(client, BACKING_IMAGE_NAME,
                                           BACKING_IMAGE_QCOW2_URL)

    # Step 2
    volume_names = [volume_name + "-1", volume_name + "-2", volume_name + "-3"]

    for volume_name in volume_names:
        volume = create_and_check_volume(client, volume_name, 3,
                                         str(BACKING_IMAGE_EXT4_SIZE),
                                         BACKING_IMAGE_NAME)

    # Step 3
    lht_host_id = get_self_host_id()
    for volume_name in volume_names:
        volume = client.by_id_volume(volume_name)
        volume.attach(hostId=lht_host_id)
        wait_for_volume_healthy(client, volume_name)
        assert volume.backingImage == BACKING_IMAGE_NAME

    backing_image = client.by_id_backing_image(BACKING_IMAGE_NAME)
    assert len(backing_image.diskFileStatusMap) == 3
    for disk_id, status in iter(backing_image.diskFileStatusMap.items()):
        assert status.state == "ready"

    backing_images_in_disk = os.listdir("/var/lib/longhorn/backing-images")
    assert len(backing_images_in_disk) == 1
    assert os.path.exists("/var/lib/longhorn/backing-images/{}/backing".format(
        backing_images_in_disk[0]))
    assert os.path.exists(
        "/var/lib/longhorn/backing-images/{}/backing.cfg".format(
            backing_images_in_disk[0]))

    # Step 4
    current_host = client.by_id_node(id=lht_host_id)
    client.update(current_host, allowScheduling=False)
    wait_for_node_update(client, lht_host_id, "allowScheduling", False)

    # Step 5
    for volume_name in volume_names:
        volume = client.by_id_volume(volume_name)
        for replica in volume.replicas:
            if replica.hostId == lht_host_id:
                replica_name = replica.name
        volume.replicaRemove(name=replica_name)
    # This wait interval should be smaller than the setting value.
    # Otherwise, the backing image files may be cleaned up.
    time.sleep(int(BACKING_IMAGE_CLEANUP_WAIT_INTERVAL))
    check_backing_image_disk_map_status(client, BACKING_IMAGE_NAME, 3, "ready")

    # Step 6
    update_setting(client, "backing-image-cleanup-wait-interval", "1")
    check_backing_image_disk_map_status(client, BACKING_IMAGE_NAME, 2, "ready")

    backing_images_in_disk = os.listdir("/var/lib/longhorn/backing-images")
    assert len(backing_images_in_disk) == 0

    # Step 7
    for volume_name in volume_names:
        volume = client.by_id_volume(volume_name)
        client.delete(volume)
        wait_for_volume_delete(client, volume_name)

    check_backing_image_disk_map_status(client, BACKING_IMAGE_NAME, 1, "ready")
Exemple #32
0
def test_instance_manager_cpu_reservation(client, core_api):  # NOQA
    """
    Test if the CPU requests of instance manager pods are controlled by
    the settings and the node specs correctly.

    1. Try to change the deprecated setting `Guaranteed Engine CPU`.
       --> The setting update should fail.
    2. Pick up node 1, set `node.engineManagerCPURequest` and
       `node.replicaManagerCPURequest` to 150 and 250, respectively.
       --> The IM pods on this node will be restarted. And the CPU requests
       of these IM pods matches the above milli value.
    3. Change the new settings `Guaranteed Engine Manager CPU` and
       `Guaranteed Replica Manager CPU` to 10 and 20, respectively.
       Then wait for all IM pods except for the pods on node 1 restarting.
       --> The CPU requests of the restarted IM pods equals to
           the new setting value multiply the kube node allocatable CPU.
    4. Set the both new settings to 0.
       --> All IM pods except for the pod on node 1 will be restarted without
        CPU requests.
    5. Set the fields on node 1 to 0.
       --> The IM pods on node 1 will be restarted without CPU requests.
    6. Set the both new settings to 2 random values,
       and the sum of the 2 values is small than 40.
       Then wait for all IM pods restarting.
       --> The CPU requests of all IM pods equals to
           the new setting value multiply the kube node allocatable CPU.
    7. Set the both new settings to 2 random values,
       and the single value or the sum of the 2 values is greater than 40.
       --> The setting update should fail.
    8. Create a volume, verify everything works as normal

    Note: use fixture to restore the setting into the original state
    """

    instance_managers = client.list_instance_manager()
    deprecated_setting = client.by_id_setting(SETTING_GUARANTEED_ENGINE_CPU)
    with pytest.raises(Exception) as e:
        client.update(deprecated_setting, value="0.1")

    host_node_name = get_self_host_id()
    host_node = client.by_id_node(host_node_name)
    other_ems, other_rms = [], []
    for im in instance_managers:
        if im.managerType == "engine":
            if im.nodeID == host_node_name:
                em_on_host = im
            else:
                other_ems.append(im)
        else:
            if im.nodeID == host_node_name:
                rm_on_host = im
            else:
                other_rms.append(im)
    assert em_on_host and rm_on_host
    host_kb_node = core_api.read_node(host_node_name)
    if host_kb_node.status.allocatable["cpu"].endswith('m'):
        allocatable_millicpu = int(host_kb_node.status.allocatable["cpu"][:-1])
    else:
        allocatable_millicpu = int(
            host_kb_node.status.allocatable["cpu"]) * 1000

    client.update(host_node,
                  allowScheduling=True,
                  engineManagerCPURequest=150,
                  replicaManagerCPURequest=250)
    time.sleep(5)
    guaranteed_engine_cpu_setting_check(client, core_api, [em_on_host],
                                        "Running", True, "150m")
    guaranteed_engine_cpu_setting_check(client, core_api, [rm_on_host],
                                        "Running", True, "250m")

    em_setting = client.by_id_setting(SETTING_GUARANTEED_ENGINE_MANAGER_CPU)
    client.update(em_setting, value="10")
    rm_setting = client.by_id_setting(SETTING_GUARANTEED_REPLICA_MANAGER_CPU)
    client.update(rm_setting, value="20")
    time.sleep(5)
    guaranteed_engine_cpu_setting_check(
        client, core_api, other_ems, "Running", True,
        str(int(allocatable_millicpu * 10 / 100)) + "m")
    guaranteed_engine_cpu_setting_check(
        client, core_api, other_rms, "Running", True,
        str(int(allocatable_millicpu * 20 / 100)) + "m")

    em_setting = client.by_id_setting(SETTING_GUARANTEED_ENGINE_MANAGER_CPU)
    client.update(em_setting, value="0")
    rm_setting = client.by_id_setting(SETTING_GUARANTEED_REPLICA_MANAGER_CPU)
    client.update(rm_setting, value="0")
    time.sleep(5)
    guaranteed_engine_cpu_setting_check(client, core_api, other_ems, "Running",
                                        True, "")
    guaranteed_engine_cpu_setting_check(client, core_api, other_rms, "Running",
                                        True, "")

    ems, rms = other_ems, other_rms
    ems.append(em_on_host)
    rms.append(rm_on_host)

    host_node = client.by_id_node(host_node_name)
    client.update(host_node,
                  allowScheduling=True,
                  engineManagerCPURequest=0,
                  replicaManagerCPURequest=0)
    time.sleep(5)
    guaranteed_engine_cpu_setting_check(client, core_api, ems, "Running", True,
                                        "")
    guaranteed_engine_cpu_setting_check(client, core_api, rms, "Running", True,
                                        "")

    client.update(em_setting, value="20")
    rm_setting = client.by_id_setting(SETTING_GUARANTEED_REPLICA_MANAGER_CPU)
    client.update(rm_setting, value="15")
    time.sleep(5)
    guaranteed_engine_cpu_setting_check(
        client, core_api, ems, "Running", True,
        str(int(allocatable_millicpu * 20 / 100)) + "m")
    guaranteed_engine_cpu_setting_check(
        client, core_api, rms, "Running", True,
        str(int(allocatable_millicpu * 15 / 100)) + "m")

    with pytest.raises(Exception) as e:
        client.update(em_setting, value="41")
    assert "should be between 0 to 40" in \
           str(e.value)

    em_setting = client.by_id_setting(SETTING_GUARANTEED_ENGINE_MANAGER_CPU)
    with pytest.raises(Exception) as e:
        client.update(em_setting, value="35")
    assert "The sum should not be smaller than 0% or greater than 40%" in \
           str(e.value)

    # Create a volume to test
    vol_name = generate_volume_name()
    volume = create_and_check_volume(client, vol_name)
    volume.attach(hostId=get_self_host_id())
    volume = wait_for_volume_healthy(client, vol_name)
    assert len(volume.replicas) == 3
    data = write_volume_random_data(volume)
    check_volume_data(volume, data)
    cleanup_volume(client, volume)
Exemple #33
0
def test_node_delete_umount_disks(client):  # NOQA
    # create test disks for node
    disk_volume_name = 'vol-disk-1'
    lht_hostId = get_self_host_id()
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    disk_path1 = create_host_disk(client, disk_volume_name, str(Gi),
                                  lht_hostId)
    disk1 = {
        "path": disk_path1,
        "allowScheduling": True,
        "storageReserved": SMALL_DISK_SIZE
    }

    update_disk = get_update_disks(disks)
    for disk in update_disk:
        disk["allowScheduling"] = False
    # add new disk for node
    update_disk.append(disk1)
    # save disks to node
    node = node.diskUpdate(disks=update_disk)
    node = common.wait_for_disk_update(client, lht_hostId, len(update_disk))
    assert len(node["disks"]) == len(update_disk)
    node = client.by_id_node(lht_hostId)
    assert len(node["disks"]) == len(update_disk)

    disks = node["disks"]
    # wait for node controller to update disk status
    for fsid, disk in disks.iteritems():
        if disk["path"] == disk_path1:
            wait_for_disk_status(client, lht_hostId, fsid, "allowScheduling",
                                 True)
            wait_for_disk_status(client, lht_hostId, fsid, "storageReserved",
                                 SMALL_DISK_SIZE)
            free, total = common.get_host_disk_size(disk_path1)
            wait_for_disk_status(client, lht_hostId, fsid, "storageAvailable",
                                 free)
            wait_for_disk_status(client, lht_hostId, fsid, "storageMaximum",
                                 total)

    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    for key, disk in disks.iteritems():
        if disk["path"] == disk_path1:
            assert disk["allowScheduling"]
            assert disk["storageReserved"] == SMALL_DISK_SIZE
            assert disk["storageScheduled"] == 0
            free, total = common.get_host_disk_size(disk_path1)
            assert disk["storageMaximum"] == total
            assert disk["storageAvailable"] == free
            conditions = disk["conditions"]
            assert conditions[DISK_CONDITION_READY]["status"] == \
                CONDITION_STATUS_TRUE
            assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \
                CONDITION_STATUS_TRUE
        else:
            assert not disk["allowScheduling"]

    # create a volume
    nodes = client.list_node()
    vol_name = common.generate_volume_name()
    volume = create_volume(client, vol_name, str(SMALL_DISK_SIZE), lht_hostId,
                           len(nodes))
    replicas = volume["replicas"]
    for replica in replicas:
        id = replica["hostId"]
        assert id != ""
        assert replica["running"]
        if id == lht_hostId:
            assert replica["dataPath"].startswith(disk_path1)

    # umount the disk
    mount_path = os.path.join(DIRECTORY_PATH, disk_volume_name)
    common.umount_disk(mount_path)

    # wait for update node status
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    for fsid, disk in disks.iteritems():
        if disk["path"] == disk_path1:
            wait_for_disk_status(client, lht_hostId, fsid, "allowScheduling",
                                 False)
            wait_for_disk_status(client, lht_hostId, fsid, "storageMaximum", 0)
            wait_for_disk_conditions(client, lht_hostId, fsid,
                                     DISK_CONDITION_READY,
                                     CONDITION_STATUS_FALSE)

    # check result
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    update_disks = []
    for fsid, disk in disks.iteritems():
        if disk["path"] == disk_path1:
            assert not disk["allowScheduling"]
            assert disk["storageMaximum"] == 0
            assert disk["storageAvailable"] == 0
            assert disk["storageReserved"] == SMALL_DISK_SIZE
            assert disk["storageScheduled"] == SMALL_DISK_SIZE
            conditions = disk["conditions"]
            assert conditions[DISK_CONDITION_READY]["status"] == \
                CONDITION_STATUS_FALSE
            assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \
                CONDITION_STATUS_FALSE
        else:
            conditions = disk["conditions"]
            assert conditions[DISK_CONDITION_READY]["status"] == \
                CONDITION_STATUS_TRUE
            assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \
                CONDITION_STATUS_TRUE
            update_disks.append(disk)

    # delete umount disk exception
    with pytest.raises(Exception) as e:
        node.diskUpdate(disks=update_disks)
    assert "disable the disk" in str(e.value)

    # update other disks
    disks = node["disks"]
    for fsid, disk in disks.iteritems():
        if disk["path"] != disk_path1:
            disk["allowScheduling"] = True
    test_update = get_update_disks(disks)
    node = node.diskUpdate(disks=test_update)
    disks = node["disks"]
    for fsid, disk in disks.iteritems():
        if disk["path"] != disk_path1:
            wait_for_disk_status(client, lht_hostId, fsid, "allowScheduling",
                                 True)
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    for fsid, disk in disks.iteritems():
        if disk["path"] != disk_path1:
            assert disk["allowScheduling"]

    # mount the disk back
    mount_path = os.path.join(DIRECTORY_PATH, disk_volume_name)
    disk_volume = client.by_id_volume(disk_volume_name)
    dev = get_volume_endpoint(disk_volume)
    common.mount_disk(dev, mount_path)

    # wait for update node status
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    for fsid, disk in disks.iteritems():
        if disk["path"] == disk_path1:
            wait_for_disk_status(client, lht_hostId, fsid, "allowScheduling",
                                 False)
            wait_for_disk_conditions(client, lht_hostId, fsid,
                                     DISK_CONDITION_READY,
                                     CONDITION_STATUS_TRUE)

    # check result
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    for fsid, disk in disks.iteritems():
        if disk["path"] == disk_path1:
            free, total = common.get_host_disk_size(disk_path1)
            assert not disk["allowScheduling"]
            assert disk["storageMaximum"] == total
            assert disk["storageAvailable"] == free
            assert disk["storageReserved"] == SMALL_DISK_SIZE
            assert disk["storageScheduled"] == SMALL_DISK_SIZE
            conditions = disk["conditions"]
            assert conditions[DISK_CONDITION_READY]["status"] == \
                CONDITION_STATUS_TRUE
            assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \
                CONDITION_STATUS_TRUE
        else:
            conditions = disk["conditions"]
            assert conditions[DISK_CONDITION_READY]["status"] == \
                CONDITION_STATUS_TRUE
            assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \
                CONDITION_STATUS_TRUE

    # delete volume and umount disk
    cleanup_volume(client, vol_name)
    mount_path = os.path.join(DIRECTORY_PATH, disk_volume_name)
    common.umount_disk(mount_path)

    # wait for update node status
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    for fsid, disk in disks.iteritems():
        if disk["path"] == disk_path1:
            wait_for_disk_status(client, lht_hostId, fsid, "allowScheduling",
                                 False)
            wait_for_disk_status(client, lht_hostId, fsid, "storageScheduled",
                                 0)
            wait_for_disk_status(client, lht_hostId, fsid, "storageMaximum", 0)

    # test delete the umount disk
    node = client.by_id_node(lht_hostId)
    node.diskUpdate(disks=update_disks)
    node = common.wait_for_disk_update(client, lht_hostId, len(update_disks))
    assert len(node["disks"]) == len(update_disks)
    cmd = ['rm', '-r', mount_path]
    subprocess.check_call(cmd)
Exemple #34
0
def test_replica_cleanup(client):  # NOQA
    nodes = client.list_node()
    lht_hostId = get_self_host_id()

    node = client.by_id_node(lht_hostId)
    extra_disk_path = create_host_disk(client, "extra-disk",
                                       "10G", lht_hostId)
    extra_disk = {"path": extra_disk_path, "allowScheduling": True}
    update_disks = get_update_disks(node["disks"])
    update_disks.append(extra_disk)
    node = node.diskUpdate(disks=update_disks)
    node = common.wait_for_disk_update(client, lht_hostId,
                                       len(update_disks))
    assert len(node["disks"]) == len(update_disks)

    extra_disk_fsid = ""
    for fsid, disk in node["disks"].iteritems():
        if disk["path"] == extra_disk_path:
            extra_disk_fsid = fsid
            break

    for node in nodes:
        # disable all the disks except the ones on the current node
        if node["name"] == lht_hostId:
            continue
        for fsid, disk in node["disks"].iteritems():
            break
        disk["allowScheduling"] = False
        update_disks = get_update_disks(node["disks"])
        node.diskUpdate(disks=update_disks)
        node = wait_for_disk_status(client, node["name"],
                                    fsid,
                                    "allowScheduling", False)

    vol_name = common.generate_volume_name()
    # more replicas, make sure both default and extra disk will get one
    volume = create_volume(client, vol_name, str(Gi), lht_hostId, 5)
    data_paths = []
    for replica in volume["replicas"]:
        data_paths.append(replica["dataPath"])

    # data path should exist now
    for data_path in data_paths:
        assert exec_nsenter("ls {}".format(data_path))

    cleanup_volume(client, vol_name)

    # data path should be gone due to the cleanup of replica
    for data_path in data_paths:
        with pytest.raises(subprocess.CalledProcessError):
            exec_nsenter("ls {}".format(data_path))

    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    disk = disks[extra_disk_fsid]
    disk["allowScheduling"] = False
    update_disks = get_update_disks(disks)
    node = node.diskUpdate(disks=update_disks)
    node = wait_for_disk_status(client, lht_hostId,
                                extra_disk_fsid,
                                "allowScheduling", False)
    wait_for_disk_status(client, lht_hostId, extra_disk_fsid,
                         "storageScheduled", 0)

    disks = node["disks"]
    disk = disks[extra_disk_fsid]
    assert not disk["allowScheduling"]
    disks.pop(extra_disk_fsid)
    update_disks = get_update_disks(disks)
    node.diskUpdate(disks=update_disks)
    node = common.wait_for_disk_update(client, lht_hostId,
                                       len(update_disks))

    cleanup_host_disk(client, 'extra-disk')
Exemple #35
0
def test_node_delete_umount_disks(client):  # NOQA
    # create test disks for node
    disk_volume_name = 'vol-disk-1'
    lht_hostId = get_self_host_id()
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    disk_path1 = create_host_disk(client, disk_volume_name,
                                  str(Gi), lht_hostId)
    disk1 = {"path": disk_path1, "allowScheduling": True,
             "storageReserved": SMALL_DISK_SIZE}

    update_disk = get_update_disks(disks)
    for disk in update_disk:
        disk["allowScheduling"] = False
    # add new disk for node
    update_disk.append(disk1)
    # save disks to node
    node = node.diskUpdate(disks=update_disk)
    node = common.wait_for_disk_update(client, lht_hostId,
                                       len(update_disk))
    assert len(node["disks"]) == len(update_disk)
    node = client.by_id_node(lht_hostId)
    assert len(node["disks"]) == len(update_disk)

    disks = node["disks"]
    # wait for node controller to update disk status
    for fsid, disk in disks.iteritems():
        if disk["path"] == disk_path1:
            wait_for_disk_status(client, lht_hostId, fsid,
                                 "allowScheduling", True)
            wait_for_disk_status(client, lht_hostId, fsid,
                                 "storageReserved", SMALL_DISK_SIZE)
            free, total = common.get_host_disk_size(disk_path1)
            wait_for_disk_status(client, lht_hostId, fsid,
                                 "storageAvailable", free)
            wait_for_disk_status(client, lht_hostId, fsid,
                                 "storageMaximum", total)

    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    for key, disk in disks.iteritems():
        if disk["path"] == disk_path1:
            assert disk["allowScheduling"]
            assert disk["storageReserved"] == SMALL_DISK_SIZE
            assert disk["storageScheduled"] == 0
            free, total = common.get_host_disk_size(disk_path1)
            assert disk["storageMaximum"] == total
            assert disk["storageAvailable"] == free
            conditions = disk["conditions"]
            assert conditions[DISK_CONDITION_READY]["status"] == \
                CONDITION_STATUS_TRUE
            assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \
                CONDITION_STATUS_TRUE
        else:
            assert not disk["allowScheduling"]

    # create a volume
    nodes = client.list_node()
    vol_name = common.generate_volume_name()
    volume = create_volume(client, vol_name, str(SMALL_DISK_SIZE),
                           lht_hostId, len(nodes))
    replicas = volume["replicas"]
    for replica in replicas:
        id = replica["hostId"]
        assert id != ""
        assert replica["running"]
        if id == lht_hostId:
            assert replica["dataPath"].startswith(disk_path1)

    # umount the disk
    mount_path = os.path.join(DIRECTORY_PATH, disk_volume_name)
    common.umount_disk(mount_path)

    # wait for update node status
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    for fsid, disk in disks.iteritems():
        if disk["path"] == disk_path1:
            wait_for_disk_status(client, lht_hostId,
                                 fsid, "allowScheduling", False)
            wait_for_disk_status(client, lht_hostId,
                                 fsid, "storageMaximum", 0)
            wait_for_disk_conditions(client, lht_hostId, fsid,
                                     DISK_CONDITION_READY,
                                     CONDITION_STATUS_FALSE)

    # check result
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    update_disks = []
    for fsid, disk in disks.iteritems():
        if disk["path"] == disk_path1:
            assert not disk["allowScheduling"]
            assert disk["storageMaximum"] == 0
            assert disk["storageAvailable"] == 0
            assert disk["storageReserved"] == SMALL_DISK_SIZE
            assert disk["storageScheduled"] == SMALL_DISK_SIZE
            conditions = disk["conditions"]
            assert conditions[DISK_CONDITION_READY]["status"] == \
                CONDITION_STATUS_FALSE
            assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \
                CONDITION_STATUS_FALSE
        else:
            conditions = disk["conditions"]
            assert conditions[DISK_CONDITION_READY]["status"] == \
                CONDITION_STATUS_TRUE
            assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \
                CONDITION_STATUS_TRUE
            update_disks.append(disk)

    # delete umount disk exception
    with pytest.raises(Exception) as e:
        node.diskUpdate(disks=update_disks)
    assert "disable the disk" in str(e.value)

    # update other disks
    disks = node["disks"]
    for fsid, disk in disks.iteritems():
        if disk["path"] != disk_path1:
            disk["allowScheduling"] = True
    test_update = get_update_disks(disks)
    node = node.diskUpdate(disks=test_update)
    disks = node["disks"]
    for fsid, disk in disks.iteritems():
        if disk["path"] != disk_path1:
            wait_for_disk_status(client, lht_hostId,
                                 fsid, "allowScheduling", True)
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    for fsid, disk in disks.iteritems():
        if disk["path"] != disk_path1:
            assert disk["allowScheduling"]

    # mount the disk back
    mount_path = os.path.join(DIRECTORY_PATH, disk_volume_name)
    disk_volume = client.by_id_volume(disk_volume_name)
    dev = get_volume_endpoint(disk_volume)
    common.mount_disk(dev, mount_path)

    # wait for update node status
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    for fsid, disk in disks.iteritems():
        if disk["path"] == disk_path1:
            wait_for_disk_status(client, lht_hostId,
                                 fsid, "allowScheduling", False)
            wait_for_disk_conditions(client, lht_hostId, fsid,
                                     DISK_CONDITION_READY,
                                     CONDITION_STATUS_TRUE)

    # check result
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    for fsid, disk in disks.iteritems():
        if disk["path"] == disk_path1:
            free, total = common.get_host_disk_size(disk_path1)
            assert not disk["allowScheduling"]
            assert disk["storageMaximum"] == total
            assert disk["storageAvailable"] == free
            assert disk["storageReserved"] == SMALL_DISK_SIZE
            assert disk["storageScheduled"] == SMALL_DISK_SIZE
            conditions = disk["conditions"]
            assert conditions[DISK_CONDITION_READY]["status"] == \
                CONDITION_STATUS_TRUE
            assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \
                CONDITION_STATUS_TRUE
        else:
            conditions = disk["conditions"]
            assert conditions[DISK_CONDITION_READY]["status"] == \
                CONDITION_STATUS_TRUE
            assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \
                CONDITION_STATUS_TRUE

    # delete volume and umount disk
    cleanup_volume(client, vol_name)
    mount_path = os.path.join(DIRECTORY_PATH, disk_volume_name)
    common.umount_disk(mount_path)

    # wait for update node status
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    for fsid, disk in disks.iteritems():
        if disk["path"] == disk_path1:
            wait_for_disk_status(client, lht_hostId,
                                 fsid, "allowScheduling", False)
            wait_for_disk_status(client, lht_hostId,
                                 fsid, "storageScheduled", 0)
            wait_for_disk_status(client, lht_hostId,
                                 fsid, "storageMaximum", 0)

    # test delete the umount disk
    node = client.by_id_node(lht_hostId)
    node.diskUpdate(disks=update_disks)
    node = common.wait_for_disk_update(client, lht_hostId,
                                       len(update_disks))
    assert len(node["disks"]) == len(update_disks)
    cmd = ['rm', '-r', mount_path]
    subprocess.check_call(cmd)
Exemple #36
0
def test_node_disk_update(client):  # NOQA
    lht_hostId = get_self_host_id()
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    # test add same disk by different mount path exception
    with pytest.raises(Exception) as e:
        disk = {"path": "/var/lib", "allowScheduling": True,
                "storageReserved": 2 * Gi}
        update_disk = get_update_disks(disks)
        update_disk.append(disk)
        node = node.diskUpdate(disks=update_disk)
    assert "the same file system" in str(e.value)

    # test delete disk exception
    with pytest.raises(Exception) as e:
        node.diskUpdate(disks=[])
    assert "disable the disk" in str(e.value)

    # test storageReserved invalid exception
    with pytest.raises(Exception) as e:
        for fsid, disk in disks.iteritems():
            disk["storageReserved"] = disk["storageMaximum"] + 1*Gi
        update_disk = get_update_disks(disks)
        node.diskUpdate(disks=update_disk)
    assert "storageReserved setting of disk" in str(e.value)

    # create multiple disks for node
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    disk_path1 = create_host_disk(client, 'vol-disk-1',
                                  str(Gi), lht_hostId)
    disk1 = {"path": disk_path1, "allowScheduling": True}
    disk_path2 = create_host_disk(client, 'vol-disk-2',
                                  str(Gi), lht_hostId)
    disk2 = {"path": disk_path2, "allowScheduling": True}

    update_disk = get_update_disks(disks)
    # add new disk for node
    update_disk.append(disk1)
    update_disk.append(disk2)

    # save disks to node
    node = node.diskUpdate(disks=update_disk)
    node = common.wait_for_disk_update(client, lht_hostId,
                                       len(update_disk))
    assert len(node["disks"]) == len(update_disk)
    node = client.by_id_node(lht_hostId)
    assert len(node["disks"]) == len(update_disk)

    # update disk
    disks = node["disks"]
    update_disk = get_update_disks(disks)
    for disk in update_disk:
        # keep default disk for other tests
        if disk["path"] == disk_path1 or disk["path"] == disk_path2:
            disk["allowScheduling"] = False
            disk["storageReserved"] = SMALL_DISK_SIZE
    node = node.diskUpdate(disks=update_disk)
    disks = node["disks"]
    # wait for node controller to update disk status
    for fsid, disk in disks.iteritems():
        if disk["path"] == disk_path1 or disk["path"] == disk_path2:
            wait_for_disk_status(client, lht_hostId, fsid,
                                 "allowScheduling", False)
            wait_for_disk_status(client, lht_hostId, fsid,
                                 "storageReserved", SMALL_DISK_SIZE)
            free, total = common.get_host_disk_size(disk_path1)
            wait_for_disk_status(client, lht_hostId, fsid,
                                 "storageAvailable", free)

    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    for key, disk in disks.iteritems():
        if disk["path"] == disk_path1:
            assert not disk["allowScheduling"]
            assert disk["storageReserved"] == SMALL_DISK_SIZE
            assert disk["storageScheduled"] == 0
            free, total = common.get_host_disk_size(disk_path1)
            assert disk["storageMaximum"] == total
            assert disk["storageAvailable"] == free
        elif disk["path"] == disk_path2:
            assert not disk["allowScheduling"]
            assert disk["storageReserved"] == SMALL_DISK_SIZE
            assert disk["storageScheduled"] == 0
            free, total = common.get_host_disk_size(disk_path2)
            assert disk["storageMaximum"] == total
            assert disk["storageAvailable"] == free

    # delete other disks, just remain default disk
    update_disk = get_update_disks(disks)
    remain_disk = []
    for disk in update_disk:
        if disk["path"] != disk_path1 and disk["path"] != disk_path2:
            remain_disk.append(disk)
    node = node.diskUpdate(disks=remain_disk)
    node = wait_for_disk_update(client, lht_hostId,
                                len(remain_disk))
    assert len(node["disks"]) == len(remain_disk)
    # cleanup disks
    cleanup_host_disk(client, 'vol-disk-1', 'vol-disk-2')
Exemple #37
0
def test_replica_scheduler_large_volume_fit_small_disk(client):  # NOQA
    nodes = client.list_node()
    # create a small size disk on current node
    lht_hostId = get_self_host_id()
    node = client.by_id_node(lht_hostId)
    small_disk_path = create_host_disk(client, "vol-small",
                                       SIZE, lht_hostId)
    small_disk = {"path": small_disk_path, "allowScheduling": True}
    update_disks = get_update_disks(node["disks"])
    update_disks.append(small_disk)
    node = node.diskUpdate(disks=update_disks)
    node = common.wait_for_disk_update(client, lht_hostId,
                                       len(update_disks))
    assert len(node["disks"]) == len(update_disks)

    unexpected_disk = {}
    for fsid, disk in node["disks"].iteritems():
        if disk["path"] == small_disk_path:
            unexpected_disk["fsid"] = fsid
            unexpected_disk["path"] = disk["path"]
            break

    # volume is too large to fill into small size disk on current node
    vol_name = common.generate_volume_name()
    volume = create_volume(client, vol_name, str(Gi), lht_hostId, len(nodes))

    nodes = client.list_node()
    node_hosts = []
    for node in nodes:
        node_hosts.append(node["name"])

    # check replica on current node shouldn't schedule to small disk
    for replica in volume["replicas"]:
        id = replica["hostId"]
        assert id != ""
        assert replica["running"]
        if id == lht_hostId:
            assert replica["diskID"] != unexpected_disk["fsid"]
            assert replica["dataPath"] != unexpected_disk["path"]
        node_hosts = filter(lambda x: x != id, node_hosts)
    assert len(node_hosts) == 0

    cleanup_volume(client, vol_name)

    # cleanup test disks
    node = client.by_id_node(lht_hostId)
    disks = node["disks"]
    disk = disks[unexpected_disk["fsid"]]
    disk["allowScheduling"] = False
    update_disks = get_update_disks(disks)
    node = node.diskUpdate(disks=update_disks)
    node = wait_for_disk_status(client, lht_hostId,
                                unexpected_disk["fsid"],
                                "allowScheduling", False)
    disks = node["disks"]
    disk = disks[unexpected_disk["fsid"]]
    assert not disk["allowScheduling"]
    disks.pop(unexpected_disk["fsid"])
    update_disks = get_update_disks(disks)
    node.diskUpdate(disks=update_disks)
    cleanup_host_disk(client, 'vol-small')