Ejemplo n.º 1
0
def test_node_evicted(client, volume_name, request):  # NOQA
    """
    Test the orphan CR is deleted in background but on-disk data still exists
    if the node is evicted
    1. Create a new-disk for holding valid and invalid orphaned replica
       directories
    2. Create a volume and attach to the current node
    3. Create a valid orphaned replica directories by copying the active
       replica directory
    4. Clean up volume
    5. Verify orphan list contains the valid orphaned replica directory
    6. Evict the node containing the orphaned replica directory
    7. Verify the orphan CR is deleted in background, but the on-disk
       orphaned replica directory still exists
    8. Disable node eviction
    9. Verify the orphan CR is created again and the on-disk orphaned replica
       directory still exists
    """

    disk_names = ["vol-disk-" + generate_random_id(4)]

    # Step 1
    lht_hostId = get_self_host_id()
    cleanup_node_disks(client, lht_hostId)
    disk_paths = crate_disks_on_host(client, disk_names, request)

    # Step 2
    volume = create_volume_with_replica_on_host(client, volume_name)

    # Step 3
    create_orphaned_directories_on_host(volume, disk_paths, 1)

    # Step 4
    cleanup_volume_by_name(client, volume_name)

    # Step 5
    assert wait_for_orphan_count(client, 1, 180) == 1

    # Step 6: request node eviction
    node = client.by_id_node(lht_hostId)
    client.update(node, allowScheduling=False, evictionRequested=True)
    node = wait_for_node_update(client, lht_hostId, "allowScheduling", False)

    # Step 7
    assert wait_for_orphan_count(client, 0, 180) == 0
    assert wait_for_file_count(os.path.join(disk_paths[0], "replicas"), 1,
                               180) == 1

    # Step 8: Disable node eviction
    node = client.by_id_node(lht_hostId)
    client.update(node, allowScheduling=True, evictionRequested=False)
    node = wait_for_node_update(client, lht_hostId, "allowScheduling", True)

    # Step 9
    assert wait_for_orphan_count(client, 1, 180) == 1
    assert wait_for_file_count(os.path.join(disk_paths[0], "replicas"), 1,
                               180) == 1
Ejemplo n.º 2
0
def test_volume_scheduling_failure(clients, volume_name):  # NOQA
    '''
    Test fail to schedule by disable scheduling for all the nodes
    Also test cannot attach a scheduling failed volume
    '''
    client = get_random_client(clients)
    nodes = client.list_node()
    assert len(nodes) > 0

    for node in nodes:
        node = client.update(node, allowScheduling=False)
        node = common.wait_for_node_update(client, node["id"],
                                           "allowScheduling", False)

    volume = client.create_volume(name=volume_name, size=SIZE,
                                  numberOfReplicas=3)

    volume = common.wait_for_volume_condition_scheduled(client, volume_name,
                                                        "status",
                                                        CONDITION_STATUS_FALSE)
    volume = common.wait_for_volume_detached(client, volume_name)
    self_node = get_self_host_id()
    with pytest.raises(Exception) as e:
        volume.attach(hostId=self_node)
    assert "not scheduled" in str(e.value)

    for node in nodes:
        node = client.update(node, allowScheduling=True)
        node = common.wait_for_node_update(client, node["id"],
                                           "allowScheduling", True)

    volume = common.wait_for_volume_condition_scheduled(client, volume_name,
                                                        "status",
                                                        CONDITION_STATUS_TRUE)
    volume = common.wait_for_volume_detached(client, volume_name)
    volume = volume.attach(hostId=self_node)
    volume = common.wait_for_volume_healthy(client, volume_name)
    endpoint = get_volume_endpoint(volume)
    assert endpoint != ""
    volume_rw_test(endpoint)

    volume = volume.detach()
    volume = common.wait_for_volume_detached(client, volume_name)

    client.delete(volume)
    wait_for_volume_delete(client, volume_name)
Ejemplo n.º 3
0
def test_update_node(client):  # NOQA
    # test node update
    nodes = client.list_node()
    assert len(nodes) > 0

    lht_hostId = get_self_host_id()
    node = client.by_id_node(lht_hostId)
    node = client.update(node, allowScheduling=False)
    node = common.wait_for_node_update(client, lht_hostId, "allowScheduling",
                                       False)
    assert not node["allowScheduling"]
    node = client.by_id_node(lht_hostId)
    assert not node["allowScheduling"]

    node = client.update(node, allowScheduling=True)
    node = common.wait_for_node_update(client, lht_hostId, "allowScheduling",
                                       True)
    assert node["allowScheduling"]
    node = client.by_id_node(lht_hostId)
    assert node["allowScheduling"]
Ejemplo n.º 4
0
def test_update_node(client):  # NOQA
    # test node update
    nodes = client.list_node()
    assert len(nodes) > 0

    lht_hostId = get_self_host_id()
    node = client.by_id_node(lht_hostId)
    node = client.update(node, allowScheduling=False)
    node = common.wait_for_node_update(client, lht_hostId,
                                       "allowScheduling", False)
    assert not node["allowScheduling"]
    node = client.by_id_node(lht_hostId)
    assert not node["allowScheduling"]

    node = client.update(node, allowScheduling=True)
    node = common.wait_for_node_update(client, lht_hostId,
                                       "allowScheduling", True)
    assert node["allowScheduling"]
    node = client.by_id_node(lht_hostId)
    assert node["allowScheduling"]
Ejemplo n.º 5
0
def test_setting_backing_image_auto_cleanup(client, core_api,
                                            volume_name):  # NOQA
    """
    Test that the Backing Image Cleanup Wait Interval setting works correctly.

    The default value of setting `BackingImageCleanupWaitInterval` is 60.

    1. Create a backing image.
    2. Create multiple volumes using the backing image.
    3. Attach all volumes, Then:
        1. Wait for all volumes can become running.
        2. Verify the correct in all volumes.
        3. Verify the backing image disk status map.
        4. Verify the only backing image file in each disk is reused by
           multiple replicas. The backing image file path is
           `<Data path>/<The backing image name>/backing`
    4. Unschedule test node to guarantee when replica removed from test node,
       no new replica can be rebuilt on the test node.
    5. Remove all replicas in one disk.
       Wait for 50 seconds.
       Then verify nothing changes in the backing image disk state map
       (before the cleanup wait interval is passed).
    6. Modify `BackingImageCleanupWaitInterval` to a small value. Then verify:
        1. The download state of the disk containing no replica becomes
           terminating first, and the entry will be removed from the map later.
        2. The related backing image file is removed.
        3. The download state of other disks keep unchanged.
           All volumes still work fine.
    7. Delete all volumes. Verify that there will only remain 1 entry in the
       backing image disk map
    8. Delete the backing image.
    """

    # Step 1
    create_backing_image_with_matching_url(client, BACKING_IMAGE_NAME,
                                           BACKING_IMAGE_QCOW2_URL)

    # Step 2
    volume_names = [volume_name + "-1", volume_name + "-2", volume_name + "-3"]

    for volume_name in volume_names:
        volume = create_and_check_volume(client, volume_name, 3,
                                         str(BACKING_IMAGE_EXT4_SIZE),
                                         BACKING_IMAGE_NAME)

    # Step 3
    lht_host_id = get_self_host_id()
    for volume_name in volume_names:
        volume = client.by_id_volume(volume_name)
        volume.attach(hostId=lht_host_id)
        wait_for_volume_healthy(client, volume_name)
        assert volume.backingImage == BACKING_IMAGE_NAME

    backing_image = client.by_id_backing_image(BACKING_IMAGE_NAME)
    assert len(backing_image.diskFileStatusMap) == 3
    for disk_id, status in iter(backing_image.diskFileStatusMap.items()):
        assert status.state == "ready"

    backing_images_in_disk = os.listdir("/var/lib/longhorn/backing-images")
    assert len(backing_images_in_disk) == 1
    assert os.path.exists("/var/lib/longhorn/backing-images/{}/backing".format(
        backing_images_in_disk[0]))
    assert os.path.exists(
        "/var/lib/longhorn/backing-images/{}/backing.cfg".format(
            backing_images_in_disk[0]))

    # Step 4
    current_host = client.by_id_node(id=lht_host_id)
    client.update(current_host, allowScheduling=False)
    wait_for_node_update(client, lht_host_id, "allowScheduling", False)

    # Step 5
    for volume_name in volume_names:
        volume = client.by_id_volume(volume_name)
        for replica in volume.replicas:
            if replica.hostId == lht_host_id:
                replica_name = replica.name
        volume.replicaRemove(name=replica_name)
    # This wait interval should be smaller than the setting value.
    # Otherwise, the backing image files may be cleaned up.
    time.sleep(int(BACKING_IMAGE_CLEANUP_WAIT_INTERVAL))
    check_backing_image_disk_map_status(client, BACKING_IMAGE_NAME, 3, "ready")

    # Step 6
    update_setting(client, "backing-image-cleanup-wait-interval", "1")
    check_backing_image_disk_map_status(client, BACKING_IMAGE_NAME, 2, "ready")

    backing_images_in_disk = os.listdir("/var/lib/longhorn/backing-images")
    assert len(backing_images_in_disk) == 0

    # Step 7
    for volume_name in volume_names:
        volume = client.by_id_volume(volume_name)
        client.delete(volume)
        wait_for_volume_delete(client, volume_name)

    check_backing_image_disk_map_status(client, BACKING_IMAGE_NAME, 1, "ready")