def test_tag_scheduling_failure(client, node_default_tags): # NOQA """ Test that scheduling fails if no Nodes/Disks with the requested Tags are available. Case 1: Validate that if specifying nonexist tags in volume, API call will fail. Case 2: 1. Specify existing but no node or disk can unsatisfied tags. 2. Validate the volume will failed the scheduling """ invalid_tag_cases = [ # Only one Disk Tag exists. { "disk": ["doesnotexist", "ssd"], "node": [] }, # Only one Node Tag exists. { "disk": [], "node": ["doesnotexist", "main"] } ] for tags in invalid_tag_cases: volume_name = generate_volume_name() # NOQA with pytest.raises(Exception) as e: client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=3, diskSelector=tags["disk"], nodeSelector=tags["node"]) assert "does not exist" in str(e.value) unsatisfied_tag_cases = [{ "disk": [], "node": ["main", "fallback"] }, { "disk": ["ssd", "m2"], "node": [] }] for tags in unsatisfied_tag_cases: volume_name = generate_volume_name() client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=3, diskSelector=tags["disk"], nodeSelector=tags["node"]) volume = wait_for_volume_detached(client, volume_name) assert volume.diskSelector == tags["disk"] assert volume.nodeSelector == tags["node"] wait_scheduling_failure(client, volume_name) client.delete(volume) wait_for_volume_delete(client, volume.name) volumes = client.list_volume() assert len(volumes) == 0
def test_replica_scheduler_exceed_over_provisioning(client): # NOQA over_provisioning_setting = client.by_id_setting( SETTING_STORAGE_OVER_PROVISIONING_PERCENTAGE) old_provisioning_setting = over_provisioning_setting["value"] # set storage over provisioning percentage to 100 over_provisioning_setting = client.update(over_provisioning_setting, value="100") # test exceed over provisioning limit couldn't be scheduled nodes = client.list_node() for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): disk["storageReserved"] = \ disk["storageMaximum"] - 1*Gi update_disks = get_update_disks(disks) node = node.diskUpdate(disks=update_disks) disks = node["disks"] for fsid, disk in disks.iteritems(): wait_for_disk_status(client, node["name"], fsid, "storageReserved", disk["storageMaximum"] - 1*Gi) vol_name = common.generate_volume_name() volume = client.create_volume(name=vol_name, size=str(2*Gi), numberOfReplicas=len(nodes)) volume = common.wait_for_volume_condition_scheduled(client, vol_name, "status", CONDITION_STATUS_FALSE) client.delete(volume) common.wait_for_volume_delete(client, vol_name) client.update(over_provisioning_setting, value=old_provisioning_setting)
def test_replica_scheduler_no_disks(client): # NOQA nodes = client.list_node() # delete all disks on each node for node in nodes: disks = node["disks"] name = node["name"] # set allowScheduling to false for fsid, disk in disks.iteritems(): disk["allowScheduling"] = False update_disks = get_update_disks(disks) node = node.diskUpdate(disks=update_disks) for fsid, disk in node["disks"].iteritems(): # wait for node controller update disk status wait_for_disk_status(client, name, fsid, "allowScheduling", False) wait_for_disk_status(client, name, fsid, "storageScheduled", 0) node = client.by_id_node(name) for fsid, disk in node["disks"].iteritems(): assert not disk["allowScheduling"] node = node.diskUpdate(disks=[]) node = common.wait_for_disk_update(client, name, 0) assert len(node["disks"]) == 0 # test there's no disk fit for volume vol_name = common.generate_volume_name() volume = client.create_volume(name=vol_name, size=SIZE, numberOfReplicas=len(nodes)) volume = common.wait_for_volume_condition_scheduled( client, vol_name, "status", CONDITION_STATUS_FALSE) client.delete(volume) common.wait_for_volume_delete(client, vol_name)
def test_replica_scheduler_no_disks(client): # NOQA nodes = client.list_node() # delete all disks on each node for node in nodes: disks = node["disks"] name = node["name"] # set allowScheduling to false for fsid, disk in disks.iteritems(): disk["allowScheduling"] = False update_disks = get_update_disks(disks) node = node.diskUpdate(disks=update_disks) for fsid, disk in node["disks"].iteritems(): # wait for node controller update disk status wait_for_disk_status(client, name, fsid, "allowScheduling", False) wait_for_disk_status(client, name, fsid, "storageScheduled", 0) node = client.by_id_node(name) for fsid, disk in node["disks"].iteritems(): assert not disk["allowScheduling"] node = node.diskUpdate(disks=[]) node = common.wait_for_disk_update(client, name, 0) assert len(node["disks"]) == 0 # test there's no disk fit for volume vol_name = common.generate_volume_name() volume = client.create_volume(name=vol_name, size=SIZE, numberOfReplicas=len(nodes)) volume = common.wait_for_volume_condition_scheduled(client, vol_name, "status", CONDITION_STATUS_FALSE) client.delete(volume) common.wait_for_volume_delete(client, vol_name)
def test_replica_scheduler_exceed_over_provisioning(client): # NOQA over_provisioning_setting = client.by_id_setting( SETTING_STORAGE_OVER_PROVISIONING_PERCENTAGE) old_provisioning_setting = over_provisioning_setting["value"] # set storage over provisioning percentage to 100 over_provisioning_setting = client.update(over_provisioning_setting, value="100") # test exceed over provisioning limit couldn't be scheduled nodes = client.list_node() for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): disk["storageReserved"] = \ disk["storageMaximum"] - 1*Gi update_disks = get_update_disks(disks) node = node.diskUpdate(disks=update_disks) disks = node["disks"] for fsid, disk in disks.iteritems(): wait_for_disk_status(client, node["name"], fsid, "storageReserved", disk["storageMaximum"] - 1 * Gi) vol_name = common.generate_volume_name() volume = client.create_volume(name=vol_name, size=str(2 * Gi), numberOfReplicas=len(nodes)) volume = common.wait_for_volume_condition_scheduled( client, vol_name, "status", CONDITION_STATUS_FALSE) client.delete(volume) common.wait_for_volume_delete(client, vol_name) client.update(over_provisioning_setting, value=old_provisioning_setting)
def test_replica_scheduler_update_over_provisioning(client): # NOQA nodes = client.list_node() lht_hostId = get_self_host_id() expect_node_disk = {} for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] == DEFAULT_DISK_PATH: expect_disk = disk expect_disk["fsid"] = fsid expect_node_disk[node["name"]] = expect_disk over_provisioning_setting = client.by_id_setting( SETTING_STORAGE_OVER_PROVISIONING_PERCENTAGE) old_provisioning_setting = over_provisioning_setting["value"] # set storage over provisioning percentage to 0 # to test all replica couldn't be scheduled over_provisioning_setting = client.update(over_provisioning_setting, value="0") vol_name = common.generate_volume_name() volume = client.create_volume(name=vol_name, size=SIZE, numberOfReplicas=len(nodes)) volume = common.wait_for_volume_condition_scheduled(client, vol_name, "status", CONDITION_STATUS_FALSE) # set storage over provisioning percentage to 100 over_provisioning_setting = client.update(over_provisioning_setting, value="100") # check volume status volume = common.wait_for_volume_condition_scheduled(client, vol_name, "status", CONDITION_STATUS_TRUE) volume = common.wait_for_volume_detached(client, vol_name) assert volume["state"] == "detached" assert volume["created"] != "" volume.attach(hostId=lht_hostId) volume = common.wait_for_volume_healthy(client, vol_name) node_hosts = [] for node in nodes: node_hosts.append(node["name"]) # check all replica should be scheduled to default disk for replica in volume["replicas"]: id = replica["hostId"] assert id != "" assert replica["running"] expect_disk = expect_node_disk[id] assert replica["diskID"] == expect_disk["fsid"] assert expect_disk["path"] in replica["dataPath"] node_hosts = filter(lambda x: x != id, node_hosts) assert len(node_hosts) == 0 # clean volume and disk cleanup_volume(client, vol_name) client.update(over_provisioning_setting, value=old_provisioning_setting)
def test_replica_scheduler_just_under_over_provisioning(client): # NOQA over_provisioning_setting = client.by_id_setting( SETTING_STORAGE_OVER_PROVISIONING_PERCENTAGE) old_provisioning_setting = over_provisioning_setting["value"] # set storage over provisioning percentage to 100 over_provisioning_setting = client.update(over_provisioning_setting, value="100") lht_hostId = get_self_host_id() nodes = client.list_node() expect_node_disk = {} max_size_array = [] for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] == DEFAULT_DISK_PATH: expect_disk = disk expect_disk["fsid"] = fsid expect_node_disk[node["name"]] = expect_disk max_size_array.append(disk["storageMaximum"]) disk["storageReserved"] = 0 update_disks = get_update_disks(disks) node = node.diskUpdate(disks=update_disks) disks = node["disks"] for fsid, disk in disks.iteritems(): wait_for_disk_status(client, node["name"], fsid, "storageReserved", 0) max_size = min(max_size_array) # test just under over provisioning limit could be scheduled vol_name = common.generate_volume_name() volume = client.create_volume(name=vol_name, size=str(max_size), numberOfReplicas=len(nodes)) volume = common.wait_for_volume_condition_scheduled( client, vol_name, "status", CONDITION_STATUS_TRUE) volume = common.wait_for_volume_detached(client, vol_name) assert volume["state"] == "detached" assert volume["created"] != "" volume.attach(hostId=lht_hostId) volume = common.wait_for_volume_healthy(client, vol_name) nodes = client.list_node() node_hosts = [] for node in nodes: node_hosts.append(node["name"]) # check all replica should be scheduled to default disk for replica in volume["replicas"]: id = replica["hostId"] assert id != "" assert replica["running"] expect_disk = expect_node_disk[id] assert replica["diskID"] == expect_disk["fsid"] assert expect_disk["path"] in replica["dataPath"] node_hosts = filter(lambda x: x != id, node_hosts) assert len(node_hosts) == 0 # clean volume and disk cleanup_volume(client, vol_name) client.update(over_provisioning_setting, value=old_provisioning_setting)
def csi_io_test(client, core_api, csi_pv, pvc, pod_make, base_image=""): # NOQA pv_name = generate_volume_name() pod_name = 'csi-io-test' create_and_wait_csi_pod_named_pv(pv_name, pod_name, client, core_api, csi_pv, pvc, pod_make, base_image, "") test_data = generate_random_data(VOLUME_RWTEST_SIZE) write_volume_data(core_api, pod_name, test_data) delete_and_wait_pod(core_api, pod_name) common.wait_for_volume_detached(client, csi_pv['metadata']['name']) pod_name = 'csi-io-test-2' pod = pod_make(name=pod_name) pod['spec']['volumes'] = [ create_pvc_spec(pv_name) ] csi_pv['metadata']['name'] = pv_name csi_pv['spec']['csi']['volumeHandle'] = pv_name pvc['metadata']['name'] = pv_name pvc['spec']['volumeName'] = pv_name update_storageclass_references(CSI_PV_TEST_STORAGE_NAME, csi_pv, pvc) create_and_wait_pod(core_api, pod) resp = read_volume_data(core_api, pod_name) assert resp == test_data
def test_empty_backup_volume(clients): # NOQA for host_id, client in clients.iteritems(): break lht_hostId = get_self_host_id() volName = generate_volume_name() volume = create_and_check_volume(client, volName) volume.attach(hostId=lht_hostId) volume = common.wait_for_volume_healthy(client, volName) bv, b1, snap1, _ = create_backup(client, volName) bv.backupDelete(name=b1["name"]) common.wait_for_backup_delete(b1["name"], bv) backup_list = bv.backupList() assert len(backup_list) == 0 # test the empty backup volume can recreate backup _, b2, snap2, _ = create_backup(client, volName) # test the empty backup volume is still deletable bv.backupDelete(name=b2["name"]) common.wait_for_backup_delete(b1["name"], bv) bv = client.by_id_backupVolume(volName) client.delete(bv) common.wait_for_backup_volume_delete(client, volName) cleanup_volume(client, volume)
def csi_io_test(client, core_api, csi_pv, pvc, pod_make, base_image=""): # NOQA pv_name = generate_volume_name() pod_name = 'csi-io-test' create_and_wait_csi_pod_named_pv(pv_name, pod_name, client, core_api, csi_pv, pvc, pod_make, base_image, "") test_data = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, pod_name, test_data) delete_and_wait_pod(core_api, pod_name) common.wait_for_volume_detached(client, csi_pv['metadata']['name']) pod_name = 'csi-io-test-2' pod = pod_make(name=pod_name) pod['spec']['volumes'] = [ create_pvc_spec(pv_name) ] csi_pv['metadata']['name'] = pv_name csi_pv['spec']['csi']['volumeHandle'] = pv_name pvc['metadata']['name'] = pv_name pvc['spec']['volumeName'] = pv_name update_storageclass_references(CSI_PV_TEST_STORAGE_NAME, csi_pv, pvc) create_and_wait_pod(core_api, pod) resp = read_volume_data(core_api, pod_name) assert resp == test_data
def test_replica_scheduler_large_volume_fit_small_disk(client): # NOQA nodes = client.list_node() # create a small size disk on current node lht_hostId = get_self_host_id() node = client.by_id_node(lht_hostId) small_disk_path = create_host_disk(client, "vol-small", SIZE, lht_hostId) small_disk = {"path": small_disk_path, "allowScheduling": True} update_disks = get_update_disks(node["disks"]) update_disks.append(small_disk) node = node.diskUpdate(disks=update_disks) node = common.wait_for_disk_update(client, lht_hostId, len(update_disks)) assert len(node["disks"]) == len(update_disks) unexpected_disk = {} for fsid, disk in node["disks"].iteritems(): if disk["path"] == small_disk_path: unexpected_disk["fsid"] = fsid unexpected_disk["path"] = disk["path"] break # volume is too large to fill into small size disk on current node vol_name = common.generate_volume_name() volume = create_volume(client, vol_name, str(Gi), lht_hostId, len(nodes)) nodes = client.list_node() node_hosts = [] for node in nodes: node_hosts.append(node["name"]) # check replica on current node shouldn't schedule to small disk for replica in volume["replicas"]: id = replica["hostId"] assert id != "" assert replica["running"] if id == lht_hostId: assert replica["diskID"] != unexpected_disk["fsid"] assert replica["dataPath"] != unexpected_disk["path"] node_hosts = filter(lambda x: x != id, node_hosts) assert len(node_hosts) == 0 cleanup_volume(client, vol_name) # cleanup test disks node = client.by_id_node(lht_hostId) disks = node["disks"] disk = disks[unexpected_disk["fsid"]] disk["allowScheduling"] = False update_disks = get_update_disks(disks) node = node.diskUpdate(disks=update_disks) node = wait_for_disk_status(client, lht_hostId, unexpected_disk["fsid"], "allowScheduling", False) disks = node["disks"] disk = disks[unexpected_disk["fsid"]] assert not disk["allowScheduling"] disks.pop(unexpected_disk["fsid"]) update_disks = get_update_disks(disks) node.diskUpdate(disks=update_disks) cleanup_host_disk(client, 'vol-small')
def test_csi_expansion_with_size_round_up(client, core_api): # NOQA """ test expand longhorn volume 1. Create longhorn volume with size '1Gi' 2. Attach, write data, and detach 3. Expand volume size to '2000000000/2G' and check if size round up '2000683008' 4. Attach, write data, and detach 5. Expand volume size to '2Gi' and check if size is '2147483648' 6. Attach, write data, and detach """ volume_name = generate_volume_name() volume = create_and_check_volume(client, volume_name, 2, str(1 * Gi)) self_hostId = get_self_host_id() volume.attach(hostId=self_hostId, disableFrontend=False) volume = wait_for_volume_healthy(client, volume_name) test_data = write_volume_random_data(volume) volume.detach(hostId="") volume = wait_for_volume_detached(client, volume_name) volume.expand(size="2000000000") wait_for_volume_expansion(client, volume_name) volume = client.by_id_volume(volume_name) assert volume.size == "2000683008" self_hostId = get_self_host_id() volume.attach(hostId=self_hostId, disableFrontend=False) volume = wait_for_volume_healthy(client, volume_name) check_volume_data(volume, test_data, False) test_data = write_volume_random_data(volume) volume.detach(hostId="") volume = wait_for_volume_detached(client, volume_name) volume.expand(size=str(2 * Gi)) wait_for_volume_expansion(client, volume_name) volume = client.by_id_volume(volume_name) assert volume.size == "2147483648" self_hostId = get_self_host_id() volume.attach(hostId=self_hostId, disableFrontend=False) volume = wait_for_volume_healthy(client, volume_name) check_volume_data(volume, test_data, False) volume.detach(hostId="") volume = wait_for_volume_detached(client, volume_name) client.delete(volume) wait_for_volume_delete(client, volume_name)
def backupstore_test(client, host_id, volname, size): volume = client.by_id_volume(volname) volume.snapshotCreate() data = write_volume_random_data(volume) snap2 = volume.snapshotCreate() volume.snapshotCreate() volume.snapshotBackup(name=snap2["name"]) bv, b = common.find_backup(client, volname, snap2["name"]) new_b = bv.backupGet(name=b["name"]) assert new_b["name"] == b["name"] assert new_b["url"] == b["url"] assert new_b["snapshotName"] == b["snapshotName"] assert new_b["snapshotCreated"] == b["snapshotCreated"] assert new_b["created"] == b["created"] assert new_b["volumeName"] == b["volumeName"] assert new_b["volumeSize"] == b["volumeSize"] assert new_b["volumeCreated"] == b["volumeCreated"] # test restore restoreName = generate_volume_name() volume = client.create_volume(name=restoreName, size=size, numberOfReplicas=2, fromBackup=b["url"]) volume = common.wait_for_volume_detached(client, restoreName) assert volume["name"] == restoreName assert volume["size"] == size assert volume["numberOfReplicas"] == 2 assert volume["state"] == "detached" volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, restoreName) check_volume_data(volume, data) volume = volume.detach() volume = common.wait_for_volume_detached(client, restoreName) client.delete(volume) volume = wait_for_volume_delete(client, restoreName) bv.backupDelete(name=b["name"]) backups = bv.backupList() found = False for b in backups: if b["snapshotName"] == snap2["name"]: found = True break assert not found
def test_tag_scheduling(client, node_default_tags): # NOQA """ Test that scheduling succeeds if there are available Nodes/Disks with the requested Tags. """ host_id = get_self_host_id() tag_specs = [ # Select all Nodes. { "disk": [], "expected": 3, "node": [] }, # Selector works with AND on Disk Tags. { "disk": ["ssd", "nvme"], "expected": 2, "node": [] }, # Selector works with AND on Node Tags. { "disk": [], "expected": 2, "node": ["main", "storage"] }, # Selector works based on combined Disk AND Node selector. { "disk": ["ssd", "nvme"], "expected": 1, "node": ["storage", "main"] } ] for specs in tag_specs: volume_name = generate_volume_name() # NOQA client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=3, diskSelector=specs["disk"], nodeSelector=specs["node"]) volume = wait_for_volume_detached(client, volume_name) assert volume["diskSelector"] == specs["disk"] assert volume["nodeSelector"] == specs["node"] volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 check_volume_replicas(volume, specs, node_default_tags) cleanup_volume(client, volume)
def test_deleting_backup_volume(clients): # NOQA for host_id, client in clients.iteritems(): break lht_hostId = get_self_host_id() volName = generate_volume_name() volume = create_and_check_volume(client, volName) volume.attach(hostId=lht_hostId) volume = common.wait_for_volume_healthy(client, volName) bv, _, snap1, _ = create_backup(client, volName) _, _, snap2, _ = create_backup(client, volName) bv = client.by_id_backupVolume(volName) client.delete(bv) common.wait_for_backup_volume_delete(client, volName) cleanup_volume(client, volume)
def test_xfs_pv_existing_volume(client, core_api, pod_manifest): # NOQA """ Test create PV with existing XFS filesystem 1. Create a volume 2. Create PV/PVC for the existing volume, specify `xfs` as filesystem 3. Attach the volume to the current node. 4. Format it to `xfs` 5. Create a POD using the volume FIXME: We should write data in step 4 and validate the data in step 5, make sure the disk won't be reformatted """ volume_name = generate_volume_name() volume = create_and_check_volume(client, volume_name) create_pv_for_volume(client, core_api, volume, volume_name, "xfs") create_pvc_for_volume(client, core_api, volume, volume_name) host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) cmd = ['mkfs.xfs', get_volume_endpoint(volume)] subprocess.check_call(cmd) volume = volume.detach() volume = wait_for_volume_detached(client, volume_name) pod_manifest['spec']['volumes'] = [{ "name": "pod-data", "persistentVolumeClaim": { "claimName": volume_name } }] create_and_wait_pod(core_api, pod_manifest)
def backupstore_test(client, host_id, volname, size): bv, b, snap2, data = create_backup(client, volname) # test restore restoreName = generate_volume_name() volume = client.create_volume(name=restoreName, size=size, numberOfReplicas=2, fromBackup=b["url"]) volume = common.wait_for_volume_restoration_completed(client, restoreName) volume = common.wait_for_volume_detached(client, restoreName) assert volume["name"] == restoreName assert volume["size"] == size assert volume["numberOfReplicas"] == 2 assert volume["state"] == "detached" assert volume["initialRestorationRequired"] is False volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, restoreName) check_volume_data(volume, data) volume = volume.detach() volume = common.wait_for_volume_detached(client, restoreName) bv.backupDelete(name=b["name"]) backups = bv.backupList() found = False for b in backups: if b["snapshotName"] == snap2["name"]: found = True break assert not found volume = wait_for_volume_status(client, volume["name"], "lastBackup", "") assert volume["lastBackupAt"] == "" client.delete(volume) volume = wait_for_volume_delete(client, restoreName)
def test_node_controller_sync_storage_scheduled(client): # NOQA lht_hostId = get_self_host_id() nodes = client.list_node() for node in nodes: for fsid, disk in node["disks"].iteritems(): # wait for node controller update disk status wait_for_disk_status(client, node["name"], fsid, "storageScheduled", 0) # create a volume and test update StorageScheduled of each node vol_name = common.generate_volume_name() volume = create_volume(client, vol_name, str(SMALL_DISK_SIZE), lht_hostId, len(nodes)) replicas = volume["replicas"] for replica in replicas: id = replica["hostId"] assert id != "" assert replica["running"] # wait for node controller to update disk status for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): wait_for_disk_status(client, node["name"], fsid, "storageScheduled", SMALL_DISK_SIZE) nodes = client.list_node() for node in nodes: disks = node["disks"] for replica in replicas: if replica["hostId"] == node["name"]: disk = disks[replica["diskID"]] conditions = disk["conditions"] assert disk["storageScheduled"] == SMALL_DISK_SIZE assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \ CONDITION_STATUS_TRUE break # clean volumes cleanup_volume(client, vol_name)
def test_restoration_required_field(clients): # NOQA for host_id, client in clients.iteritems(): break volname = generate_volume_name() volume = client.create_volume(name=volname, size=SIZE, numberOfReplicas=3) volume = common.wait_for_volume_detached(client, volname) assert volume["initialRestorationRequired"] is False volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volname) assert volume["initialRestorationRequired"] is False volume = volume.detach() volume = common.wait_for_volume_detached(client, volname) assert volume["initialRestorationRequired"] is False client.delete(volume) volume = wait_for_volume_delete(client, volname) volumes = client.list_volume() assert len(volumes) == 0
def test_xfs_pv(client, core_api, pod_manifest): # NOQA """ Test create PV with new XFS filesystem 1. Create a volume 2. Create a PV for the existing volume, specify `xfs` as filesystem 3. Create PVC and Pod 4. Make sure Pod is running. 5. Write data into the pod and read back for validation. Note: The volume will be formatted to XFS filesystem by Kubernetes in this case. """ volume_name = generate_volume_name() volume = create_and_check_volume(client, volume_name) create_pv_for_volume(client, core_api, volume, volume_name, "xfs") create_pvc_for_volume(client, core_api, volume, volume_name) pod_manifest['spec']['volumes'] = [{ "name": "pod-data", "persistentVolumeClaim": { "claimName": volume_name } }] pod_name = pod_manifest['metadata']['name'] create_and_wait_pod(core_api, pod_manifest) test_data = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, pod_name, test_data) resp = read_volume_data(core_api, pod_name) assert resp == test_data
def test_allow_volume_creation_with_degraded_availability_csi( client, core_api, apps_api, make_deployment_with_pvc): # NOQA """ Test Allow Volume Creation with Degraded Availability (CSI) Requirement: 1. Set `allow-volume-creation-with-degraded-availability` to true. 2. Set `node-level-soft-anti-affinity` to false. Steps: 1. Disable scheduling for node 3. 2. Create a Deployment Pod with a volume and 3 replicas. 1. After the volume is attached, scheduling error should be seen. 3. Write data to the Pod. 4. Scale down the deployment to 0 to detach the volume. 1. Scheduled condition should become true. 5. Scale up the deployment back to 1 and verify the data. 1. Scheduled condition should become false. 6. Enable the scheduling for node 3. 1. Volume should start rebuilding on the node 3 soon. 2. Once the rebuilding starts, the scheduled condition should become true. 7. Once rebuild finished, scale down and back the deployment to verify the data. """ setting = client.by_id_setting(common.SETTING_DEGRADED_AVAILABILITY) client.update(setting, value="true") setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) client.update(setting, value="false") nodes = client.list_node() node3 = nodes[2] client.update(node3, allowScheduling=False) vol = common.create_and_check_volume(client, generate_volume_name(), size=str(500 * Mi)) pv_name = vol.name + "-pv" common.create_pv_for_volume(client, core_api, vol, pv_name) pvc_name = vol.name + "-pvc" common.create_pvc_for_volume(client, core_api, vol, pvc_name) deployment_name = vol.name + "-dep" deployment = make_deployment_with_pvc(deployment_name, pvc_name) deployment["spec"]["replicas"] = 3 apps_api.create_namespaced_deployment(body=deployment, namespace='default') common.wait_for_volume_status(client, vol.name, common.VOLUME_FIELD_STATE, common.VOLUME_STATE_ATTACHED) common.wait_scheduling_failure(client, vol.name) data_path = "/data/test" pod = common.wait_and_get_any_deployment_pod(core_api, deployment_name) common.write_pod_volume_random_data(core_api, pod.metadata.name, data_path, common.DATA_SIZE_IN_MB_2) created_md5sum = get_pod_data_md5sum(core_api, pod.metadata.name, data_path) deployment['spec']['replicas'] = 0 apps_api.patch_namespaced_deployment(body=deployment, namespace='default', name=deployment_name) vol = common.wait_for_volume_detached(client, vol.name) assert vol.conditions[VOLUME_CONDITION_SCHEDULED]['status'] == "True" deployment['spec']['replicas'] = 1 apps_api.patch_namespaced_deployment(body=deployment, namespace='default', name=deployment_name) common.wait_for_volume_status(client, vol.name, common.VOLUME_FIELD_STATE, common.VOLUME_STATE_ATTACHED) common.wait_for_volume_condition_scheduled(client, vol.name, "status", common.CONDITION_STATUS_FALSE) pod = common.wait_and_get_any_deployment_pod(core_api, deployment_name) assert created_md5sum == get_pod_data_md5sum(core_api, pod.metadata.name, data_path) client.update(node3, allowScheduling=True) common.wait_for_rebuild_start(client, vol.name) vol = client.by_id_volume(vol.name) assert vol.conditions[VOLUME_CONDITION_SCHEDULED]['status'] == "True" common.wait_for_rebuild_complete(client, vol.name) deployment['spec']['replicas'] = 0 apps_api.patch_namespaced_deployment(body=deployment, namespace='default', name=deployment_name) common.wait_for_volume_detached(client, vol.name) deployment['spec']['replicas'] = 1 apps_api.patch_namespaced_deployment(body=deployment, namespace='default', name=deployment_name) common.wait_for_volume_status(client, vol.name, common.VOLUME_FIELD_STATE, common.VOLUME_STATE_ATTACHED) pod = common.wait_and_get_any_deployment_pod(core_api, deployment_name) assert created_md5sum == get_pod_data_md5sum(core_api, pod.metadata.name, data_path)
def test_instance_manager_cpu_reservation(client, core_api): # NOQA """ Test if the CPU requests of instance manager pods are controlled by the settings and the node specs correctly. 1. Try to change the deprecated setting `Guaranteed Engine CPU`. --> The setting update should fail. 2. Pick up node 1, set `node.engineManagerCPURequest` and `node.replicaManagerCPURequest` to 150 and 250, respectively. --> The IM pods on this node will be restarted. And the CPU requests of these IM pods matches the above milli value. 3. Change the new settings `Guaranteed Engine Manager CPU` and `Guaranteed Replica Manager CPU` to 10 and 20, respectively. Then wait for all IM pods except for the pods on node 1 restarting. --> The CPU requests of the restarted IM pods equals to the new setting value multiply the kube node allocatable CPU. 4. Set the both new settings to 0. --> All IM pods except for the pod on node 1 will be restarted without CPU requests. 5. Set the fields on node 1 to 0. --> The IM pods on node 1 will be restarted without CPU requests. 6. Set the both new settings to 2 random values, and the sum of the 2 values is small than 40. Then wait for all IM pods restarting. --> The CPU requests of all IM pods equals to the new setting value multiply the kube node allocatable CPU. 7. Set the both new settings to 2 random values, and the single value or the sum of the 2 values is greater than 40. --> The setting update should fail. 8. Create a volume, verify everything works as normal Note: use fixture to restore the setting into the original state """ instance_managers = client.list_instance_manager() deprecated_setting = client.by_id_setting(SETTING_GUARANTEED_ENGINE_CPU) with pytest.raises(Exception) as e: client.update(deprecated_setting, value="0.1") host_node_name = get_self_host_id() host_node = client.by_id_node(host_node_name) other_ems, other_rms = [], [] for im in instance_managers: if im.managerType == "engine": if im.nodeID == host_node_name: em_on_host = im else: other_ems.append(im) else: if im.nodeID == host_node_name: rm_on_host = im else: other_rms.append(im) assert em_on_host and rm_on_host host_kb_node = core_api.read_node(host_node_name) if host_kb_node.status.allocatable["cpu"].endswith('m'): allocatable_millicpu = int(host_kb_node.status.allocatable["cpu"][:-1]) else: allocatable_millicpu = int( host_kb_node.status.allocatable["cpu"]) * 1000 client.update(host_node, allowScheduling=True, engineManagerCPURequest=150, replicaManagerCPURequest=250) time.sleep(5) guaranteed_engine_cpu_setting_check(client, core_api, [em_on_host], "Running", True, "150m") guaranteed_engine_cpu_setting_check(client, core_api, [rm_on_host], "Running", True, "250m") em_setting = client.by_id_setting(SETTING_GUARANTEED_ENGINE_MANAGER_CPU) client.update(em_setting, value="10") rm_setting = client.by_id_setting(SETTING_GUARANTEED_REPLICA_MANAGER_CPU) client.update(rm_setting, value="20") time.sleep(5) guaranteed_engine_cpu_setting_check( client, core_api, other_ems, "Running", True, str(int(allocatable_millicpu * 10 / 100)) + "m") guaranteed_engine_cpu_setting_check( client, core_api, other_rms, "Running", True, str(int(allocatable_millicpu * 20 / 100)) + "m") em_setting = client.by_id_setting(SETTING_GUARANTEED_ENGINE_MANAGER_CPU) client.update(em_setting, value="0") rm_setting = client.by_id_setting(SETTING_GUARANTEED_REPLICA_MANAGER_CPU) client.update(rm_setting, value="0") time.sleep(5) guaranteed_engine_cpu_setting_check(client, core_api, other_ems, "Running", True, "") guaranteed_engine_cpu_setting_check(client, core_api, other_rms, "Running", True, "") ems, rms = other_ems, other_rms ems.append(em_on_host) rms.append(rm_on_host) host_node = client.by_id_node(host_node_name) client.update(host_node, allowScheduling=True, engineManagerCPURequest=0, replicaManagerCPURequest=0) time.sleep(5) guaranteed_engine_cpu_setting_check(client, core_api, ems, "Running", True, "") guaranteed_engine_cpu_setting_check(client, core_api, rms, "Running", True, "") client.update(em_setting, value="20") rm_setting = client.by_id_setting(SETTING_GUARANTEED_REPLICA_MANAGER_CPU) client.update(rm_setting, value="15") time.sleep(5) guaranteed_engine_cpu_setting_check( client, core_api, ems, "Running", True, str(int(allocatable_millicpu * 20 / 100)) + "m") guaranteed_engine_cpu_setting_check( client, core_api, rms, "Running", True, str(int(allocatable_millicpu * 15 / 100)) + "m") with pytest.raises(Exception) as e: client.update(em_setting, value="41") assert "should be between 0 to 40" in \ str(e.value) em_setting = client.by_id_setting(SETTING_GUARANTEED_ENGINE_MANAGER_CPU) with pytest.raises(Exception) as e: client.update(em_setting, value="35") assert "The sum should not be smaller than 0% or greater than 40%" in \ str(e.value) # Create a volume to test vol_name = generate_volume_name() volume = create_and_check_volume(client, vol_name) volume.attach(hostId=get_self_host_id()) volume = wait_for_volume_healthy(client, vol_name) assert len(volume.replicas) == 3 data = write_volume_random_data(volume) check_volume_data(volume, data) cleanup_volume(client, volume)
def test_node_delete_umount_disks(client): # NOQA # create test disks for node disk_volume_name = 'vol-disk-1' lht_hostId = get_self_host_id() node = client.by_id_node(lht_hostId) disks = node["disks"] disk_path1 = create_host_disk(client, disk_volume_name, str(Gi), lht_hostId) disk1 = { "path": disk_path1, "allowScheduling": True, "storageReserved": SMALL_DISK_SIZE } update_disk = get_update_disks(disks) for disk in update_disk: disk["allowScheduling"] = False # add new disk for node update_disk.append(disk1) # save disks to node node = node.diskUpdate(disks=update_disk) node = common.wait_for_disk_update(client, lht_hostId, len(update_disk)) assert len(node["disks"]) == len(update_disk) node = client.by_id_node(lht_hostId) assert len(node["disks"]) == len(update_disk) disks = node["disks"] # wait for node controller to update disk status for fsid, disk in disks.iteritems(): if disk["path"] == disk_path1: wait_for_disk_status(client, lht_hostId, fsid, "allowScheduling", True) wait_for_disk_status(client, lht_hostId, fsid, "storageReserved", SMALL_DISK_SIZE) free, total = common.get_host_disk_size(disk_path1) wait_for_disk_status(client, lht_hostId, fsid, "storageAvailable", free) wait_for_disk_status(client, lht_hostId, fsid, "storageMaximum", total) node = client.by_id_node(lht_hostId) disks = node["disks"] for key, disk in disks.iteritems(): if disk["path"] == disk_path1: assert disk["allowScheduling"] assert disk["storageReserved"] == SMALL_DISK_SIZE assert disk["storageScheduled"] == 0 free, total = common.get_host_disk_size(disk_path1) assert disk["storageMaximum"] == total assert disk["storageAvailable"] == free conditions = disk["conditions"] assert conditions[DISK_CONDITION_READY]["status"] == \ CONDITION_STATUS_TRUE assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \ CONDITION_STATUS_TRUE else: assert not disk["allowScheduling"] # create a volume nodes = client.list_node() vol_name = common.generate_volume_name() volume = create_volume(client, vol_name, str(SMALL_DISK_SIZE), lht_hostId, len(nodes)) replicas = volume["replicas"] for replica in replicas: id = replica["hostId"] assert id != "" assert replica["running"] if id == lht_hostId: assert replica["dataPath"].startswith(disk_path1) # umount the disk mount_path = os.path.join(DIRECTORY_PATH, disk_volume_name) common.umount_disk(mount_path) # wait for update node status node = client.by_id_node(lht_hostId) disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] == disk_path1: wait_for_disk_status(client, lht_hostId, fsid, "allowScheduling", False) wait_for_disk_status(client, lht_hostId, fsid, "storageMaximum", 0) wait_for_disk_conditions(client, lht_hostId, fsid, DISK_CONDITION_READY, CONDITION_STATUS_FALSE) # check result node = client.by_id_node(lht_hostId) disks = node["disks"] update_disks = [] for fsid, disk in disks.iteritems(): if disk["path"] == disk_path1: assert not disk["allowScheduling"] assert disk["storageMaximum"] == 0 assert disk["storageAvailable"] == 0 assert disk["storageReserved"] == SMALL_DISK_SIZE assert disk["storageScheduled"] == SMALL_DISK_SIZE conditions = disk["conditions"] assert conditions[DISK_CONDITION_READY]["status"] == \ CONDITION_STATUS_FALSE assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \ CONDITION_STATUS_FALSE else: conditions = disk["conditions"] assert conditions[DISK_CONDITION_READY]["status"] == \ CONDITION_STATUS_TRUE assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \ CONDITION_STATUS_TRUE update_disks.append(disk) # delete umount disk exception with pytest.raises(Exception) as e: node.diskUpdate(disks=update_disks) assert "disable the disk" in str(e.value) # update other disks disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] != disk_path1: disk["allowScheduling"] = True test_update = get_update_disks(disks) node = node.diskUpdate(disks=test_update) disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] != disk_path1: wait_for_disk_status(client, lht_hostId, fsid, "allowScheduling", True) node = client.by_id_node(lht_hostId) disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] != disk_path1: assert disk["allowScheduling"] # mount the disk back mount_path = os.path.join(DIRECTORY_PATH, disk_volume_name) disk_volume = client.by_id_volume(disk_volume_name) dev = get_volume_endpoint(disk_volume) common.mount_disk(dev, mount_path) # wait for update node status node = client.by_id_node(lht_hostId) disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] == disk_path1: wait_for_disk_status(client, lht_hostId, fsid, "allowScheduling", False) wait_for_disk_conditions(client, lht_hostId, fsid, DISK_CONDITION_READY, CONDITION_STATUS_TRUE) # check result node = client.by_id_node(lht_hostId) disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] == disk_path1: free, total = common.get_host_disk_size(disk_path1) assert not disk["allowScheduling"] assert disk["storageMaximum"] == total assert disk["storageAvailable"] == free assert disk["storageReserved"] == SMALL_DISK_SIZE assert disk["storageScheduled"] == SMALL_DISK_SIZE conditions = disk["conditions"] assert conditions[DISK_CONDITION_READY]["status"] == \ CONDITION_STATUS_TRUE assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \ CONDITION_STATUS_TRUE else: conditions = disk["conditions"] assert conditions[DISK_CONDITION_READY]["status"] == \ CONDITION_STATUS_TRUE assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \ CONDITION_STATUS_TRUE # delete volume and umount disk cleanup_volume(client, vol_name) mount_path = os.path.join(DIRECTORY_PATH, disk_volume_name) common.umount_disk(mount_path) # wait for update node status node = client.by_id_node(lht_hostId) disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] == disk_path1: wait_for_disk_status(client, lht_hostId, fsid, "allowScheduling", False) wait_for_disk_status(client, lht_hostId, fsid, "storageScheduled", 0) wait_for_disk_status(client, lht_hostId, fsid, "storageMaximum", 0) # test delete the umount disk node = client.by_id_node(lht_hostId) node.diskUpdate(disks=update_disks) node = common.wait_for_disk_update(client, lht_hostId, len(update_disks)) assert len(node["disks"]) == len(update_disks) cmd = ['rm', '-r', mount_path] subprocess.check_call(cmd)
def test_replica_scheduler_update_minimal_available(client): # NOQA minimal_available_setting = client.by_id_setting( SETTING_STORAGE_MINIMAL_AVAILABLE_PERCENTAGE) old_minimal_setting = minimal_available_setting["value"] nodes = client.list_node() expect_node_disk = {} for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] == DEFAULT_DISK_PATH: expect_disk = disk expect_disk["fsid"] = fsid expect_node_disk[node["name"]] = expect_disk # set storage minimal available percentage to 100 # to test all replica couldn't be scheduled minimal_available_setting = client.update(minimal_available_setting, value="100") # wait for disks state nodes = client.list_node() for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): wait_for_disk_conditions(client, node["name"], fsid, DISK_CONDITION_SCHEDULABLE, CONDITION_STATUS_FALSE) lht_hostId = get_self_host_id() vol_name = common.generate_volume_name() volume = client.create_volume(name=vol_name, size=SIZE, numberOfReplicas=len(nodes)) volume = common.wait_for_volume_condition_scheduled( client, vol_name, "status", CONDITION_STATUS_FALSE) # set storage minimal available percentage to default value(10) minimal_available_setting = client.update(minimal_available_setting, value=old_minimal_setting) # wait for disks state nodes = client.list_node() for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): wait_for_disk_conditions(client, node["name"], fsid, DISK_CONDITION_SCHEDULABLE, CONDITION_STATUS_TRUE) # check volume status volume = common.wait_for_volume_condition_scheduled( client, vol_name, "status", CONDITION_STATUS_TRUE) volume = common.wait_for_volume_detached(client, vol_name) assert volume["state"] == "detached" assert volume["created"] != "" volume.attach(hostId=lht_hostId) volume = common.wait_for_volume_healthy(client, vol_name) nodes = client.list_node() node_hosts = [] for node in nodes: node_hosts.append(node["name"]) # check all replica should be scheduled to default disk for replica in volume["replicas"]: id = replica["hostId"] assert id != "" assert replica["running"] expect_disk = expect_node_disk[id] assert replica["diskID"] == expect_disk["fsid"] assert expect_disk["path"] in replica["dataPath"] node_hosts = filter(lambda x: x != id, node_hosts) assert len(node_hosts) == 0 # clean volume and disk cleanup_volume(client, vol_name)
def create_and_wait_csi_pod(pod_name, client, core_api, csi_pv, pvc, pod_make, base_image, from_backup): # NOQA pv_name = generate_volume_name() create_and_wait_csi_pod_named_pv(pv_name, pod_name, client, core_api, csi_pv, pvc, pod_make, base_image, from_backup)
def ha_backup_deletion_recovery_test(client, volume_name, size, base_image=""): # NOQA volume = client.create_volume(name=volume_name, size=size, numberOfReplicas=2, baseImage=base_image) volume = common.wait_for_volume_detached(client, volume_name) host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) setting = client.by_id_setting(common.SETTING_BACKUP_TARGET) # test backupTarget for multiple settings backupstores = common.get_backupstore_url() for backupstore in backupstores: if common.is_backupTarget_s3(backupstore): backupsettings = backupstore.split("$") setting = client.update(setting, value=backupsettings[0]) assert setting["value"] == backupsettings[0] credential = client.by_id_setting( common.SETTING_BACKUP_TARGET_CREDENTIAL_SECRET) credential = client.update(credential, value=backupsettings[1]) assert credential["value"] == backupsettings[1] else: setting = client.update(setting, value=backupstore) assert setting["value"] == backupstore credential = client.by_id_setting( common.SETTING_BACKUP_TARGET_CREDENTIAL_SECRET) credential = client.update(credential, value="") assert credential["value"] == "" data = write_volume_random_data(volume) snap2 = volume.snapshotCreate() volume.snapshotCreate() volume.snapshotBackup(name=snap2["name"]) _, b = common.find_backup(client, volume_name, snap2["name"]) res_name = common.generate_volume_name() res_volume = client.create_volume(name=res_name, size=size, numberOfReplicas=2, fromBackup=b["url"]) res_volume = common.wait_for_volume_detached(client, res_name) res_volume = res_volume.attach(hostId=host_id) res_volume = common.wait_for_volume_healthy(client, res_name) check_volume_data(res_volume, data) snapshots = res_volume.snapshotList() # only the backup snapshot + volume-head assert len(snapshots) == 2 backup_snapshot = "" for snap in snapshots: if snap["name"] != "volume-head": backup_snapshot = snap["name"] assert backup_snapshot != "" res_volume.snapshotCreate() snapshots = res_volume.snapshotList() assert len(snapshots) == 3 res_volume.snapshotDelete(name=backup_snapshot) res_volume.snapshotPurge() snapshots = res_volume.snapshotList() assert len(snapshots) == 2 ha_rebuild_replica_test(client, res_name) res_volume = res_volume.detach() res_volume = common.wait_for_volume_detached(client, res_name) client.delete(res_volume) common.wait_for_volume_delete(client, res_name) volume = volume.detach() volume = common.wait_for_volume_detached(client, volume_name) client.delete(volume) common.wait_for_volume_delete(client, volume_name) volumes = client.list_volume() assert len(volumes) == 0
def test_replica_scheduler_too_large_volume_fit_any_disks(client): # NOQA nodes = client.list_node() lht_hostId = get_self_host_id() expect_node_disk = {} for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] == DEFAULT_DISK_PATH: expect_disk = disk expect_disk["fsid"] = fsid expect_node_disk[node["name"]] = expect_disk disk["storageReserved"] = disk["storageMaximum"] update_disks = get_update_disks(disks) node.diskUpdate(disks=update_disks) # volume is too large to fill into any disks volume_size = 4 * Gi vol_name = common.generate_volume_name() client.create_volume(name=vol_name, size=str(volume_size), numberOfReplicas=len(nodes)) volume = common.wait_for_volume_condition_scheduled( client, vol_name, "status", CONDITION_STATUS_FALSE) # Reduce StorageReserved of each default disk so that each node can fit # only one replica. needed_for_scheduling = int( volume_size * 1.5 * 100 / int(DEFAULT_STORAGE_OVER_PROVISIONING_PERCENTAGE)) nodes = client.list_node() for node in nodes: disks = node["disks"] update_disks = get_update_disks(disks) for disk in update_disks: disk["storageReserved"] = \ disk["storageMaximum"] - needed_for_scheduling node = node.diskUpdate(disks=update_disks) disks = node["disks"] for fsid, disk in disks.iteritems(): wait_for_disk_status( client, node["name"], fsid, "storageReserved", disk["storageMaximum"] - needed_for_scheduling) # check volume status volume = common.wait_for_volume_condition_scheduled( client, vol_name, "status", CONDITION_STATUS_TRUE) volume = common.wait_for_volume_detached(client, vol_name) assert volume["state"] == "detached" assert volume["created"] != "" volume.attach(hostId=lht_hostId) volume = common.wait_for_volume_healthy(client, vol_name) nodes = client.list_node() node_hosts = [] for node in nodes: node_hosts.append(node["name"]) # check all replica should be scheduled to default disk for replica in volume["replicas"]: id = replica["hostId"] assert id != "" assert replica["running"] expect_disk = expect_node_disk[id] assert replica["diskID"] == expect_disk["fsid"] assert expect_disk["path"] in replica["dataPath"] node_hosts = filter(lambda x: x != id, node_hosts) assert len(node_hosts) == 0 # clean volume and disk cleanup_volume(client, vol_name)
def backup_test(client, host_id, volname): volume = client.by_id_volume(volname) volume.snapshotCreate() w_data = generate_random_data(VOLUME_RWTEST_SIZE) start_pos = generate_random_pos(VOLUME_RWTEST_SIZE) l_data = volume_write(volume["endpoint"], start_pos, w_data) snap2 = volume.snapshotCreate() volume.snapshotCreate() volume.snapshotBackup(name=snap2["name"]) found = False for i in range(100): bvs = client.list_backupVolume() for bv in bvs: if bv["name"] == volname: found = True break if found: break time.sleep(1) assert found found = False for i in range(20): backups = bv.backupList() for b in backups: if b["snapshotName"] == snap2["name"]: found = True break if found: break time.sleep(1) assert found new_b = bv.backupGet(name=b["name"]) assert new_b["name"] == b["name"] assert new_b["url"] == b["url"] assert new_b["snapshotName"] == b["snapshotName"] assert new_b["snapshotCreated"] == b["snapshotCreated"] assert new_b["created"] == b["created"] assert new_b["volumeName"] == b["volumeName"] assert new_b["volumeSize"] == b["volumeSize"] assert new_b["volumeCreated"] == b["volumeCreated"] # test restore restoreName = generate_volume_name() volume = client.create_volume(name=restoreName, size=SIZE, numberOfReplicas=2, fromBackup=b["url"]) volume = common.wait_for_volume_detached(client, restoreName) assert volume["name"] == restoreName assert volume["size"] == SIZE assert volume["numberOfReplicas"] == 2 assert volume["state"] == "detached" volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, restoreName) r_data = volume_read(volume["endpoint"], start_pos, l_data) assert r_data == w_data volume = volume.detach() volume = common.wait_for_volume_detached(client, restoreName) client.delete(volume) volume = wait_for_volume_delete(client, restoreName) bv.backupDelete(name=b["name"]) backups = bv.backupList() found = False for b in backups: if b["snapshotName"] == snap2["name"]: found = True break assert not found
def test_replica_cleanup(client): # NOQA nodes = client.list_node() lht_hostId = get_self_host_id() node = client.by_id_node(lht_hostId) extra_disk_path = create_host_disk(client, "extra-disk", "10G", lht_hostId) extra_disk = {"path": extra_disk_path, "allowScheduling": True} update_disks = get_update_disks(node["disks"]) update_disks.append(extra_disk) node = node.diskUpdate(disks=update_disks) node = common.wait_for_disk_update(client, lht_hostId, len(update_disks)) assert len(node["disks"]) == len(update_disks) extra_disk_fsid = "" for fsid, disk in node["disks"].iteritems(): if disk["path"] == extra_disk_path: extra_disk_fsid = fsid break for node in nodes: # disable all the disks except the ones on the current node if node["name"] == lht_hostId: continue for fsid, disk in node["disks"].iteritems(): break disk["allowScheduling"] = False update_disks = get_update_disks(node["disks"]) node.diskUpdate(disks=update_disks) node = wait_for_disk_status(client, node["name"], fsid, "allowScheduling", False) vol_name = common.generate_volume_name() # more replicas, make sure both default and extra disk will get one volume = create_volume(client, vol_name, str(Gi), lht_hostId, 5) data_paths = [] for replica in volume["replicas"]: data_paths.append(replica["dataPath"]) # data path should exist now for data_path in data_paths: assert exec_nsenter("ls {}".format(data_path)) cleanup_volume(client, vol_name) # data path should be gone due to the cleanup of replica for data_path in data_paths: with pytest.raises(subprocess.CalledProcessError): exec_nsenter("ls {}".format(data_path)) node = client.by_id_node(lht_hostId) disks = node["disks"] disk = disks[extra_disk_fsid] disk["allowScheduling"] = False update_disks = get_update_disks(disks) node = node.diskUpdate(disks=update_disks) node = wait_for_disk_status(client, lht_hostId, extra_disk_fsid, "allowScheduling", False) wait_for_disk_status(client, lht_hostId, extra_disk_fsid, "storageScheduled", 0) disks = node["disks"] disk = disks[extra_disk_fsid] assert not disk["allowScheduling"] disks.pop(extra_disk_fsid) update_disks = get_update_disks(disks) node.diskUpdate(disks=update_disks) node = common.wait_for_disk_update(client, lht_hostId, len(update_disks)) cleanup_host_disk(client, 'extra-disk')
def test_offline_node_with_attached_volume_and_pod( client, core_api, volume_name, make_deployment_with_pvc, reset_cluster_ready_status): # NOQA """ Test offline node with attached volume and pod 1. Create PV/PVC/Deployment manifest. 2. Update deployment's tolerations to 20 seconds to speed up test 3. Update deployment's node affinity rule to avoid the current node 4. Create volume, PV/PVC and deployment. 5. Find the pod in the deployment and write `test_data` into it 6. Shutdown the node pod is running on 7. Wait for deployment to delete the pod 1. Deployment cannot delete the pod here because kubelet doesn't response 8. Force delete the terminating pod 9. Wait for the new pod to be created and the volume attached 10. Check `test_data` in the new pod """ toleration_seconds = 20 apps_api = get_apps_api_client() cloudprovider = detect_cloudprovider() volume_name = generate_volume_name() pv_name = volume_name + "-pv" pvc_name = volume_name + "-pvc" deployment_name = volume_name + "-dep" longhorn_test_node_name = get_self_host_id() deployment_manifest = make_deployment_with_pvc(deployment_name, pvc_name) unreachable_toleration = { "key": "node.kubernetes.io/unreachable", "operator": "Exists", "effect": "NoExecute", "tolerationSeconds": toleration_seconds } not_ready_toleration = { "key": "node.kubernetes.io/not-ready", "operator": "Exists", "effect": "NoExecute", "tolerationSeconds": toleration_seconds } deployment_manifest["spec"]["template"]["spec"]["tolerations"] =\ [unreachable_toleration, not_ready_toleration] node_affinity_roles = { "nodeAffinity": { "requiredDuringSchedulingIgnoredDuringExecution": { "nodeSelectorTerms": [{ "matchExpressions": [{ "key": "kubernetes.io/hostname", "operator": "NotIn", "values": [longhorn_test_node_name] }] }] } } } deployment_manifest["spec"]["template"]["spec"]["affinity"] =\ node_affinity_roles longhorn_volume = create_and_check_volume(client, volume_name, size=SIZE) wait_for_volume_detached(client, volume_name) create_pv_for_volume(client, core_api, longhorn_volume, pv_name) create_pvc_for_volume(client, core_api, longhorn_volume, pvc_name) create_and_wait_deployment(apps_api, deployment_manifest) deployment_label_selector =\ "name=" + deployment_manifest["metadata"]["labels"]["name"] deployment_pod_list =\ core_api.list_namespaced_pod(namespace="default", label_selector=deployment_label_selector) assert deployment_pod_list.items.__len__() == 1 pod_name = deployment_pod_list.items[0].metadata.name test_data = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, pod_name, test_data) node_name = deployment_pod_list.items[0].spec.node_name node = cloudprovider.node_id(node_name) cloudprovider.node_shutdown(node) k8s_node_down = wait_for_node_down_k8s(node_name, core_api) assert k8s_node_down client = get_longhorn_api_client() longhorn_node_down = wait_for_node_down_longhorn(node_name, client) assert longhorn_node_down time.sleep(toleration_seconds + 5) for i in range(TERMINATING_POD_RETRYS): deployment_pod_list =\ core_api.list_namespaced_pod( namespace="default", label_selector=deployment_label_selector ) terminating_pod_name = None for pod in deployment_pod_list.items: if pod.metadata.__getattribute__("deletion_timestamp") is not None: terminating_pod_name = pod.metadata.name break if terminating_pod_name is not None: break else: time.sleep(TERMINATING_POD_INTERVAL) assert terminating_pod_name is not None core_api.delete_namespaced_pod(namespace="default", name=terminating_pod_name, grace_period_seconds=0) delete_and_wait_pod(core_api, terminating_pod_name) deployment_pod_list =\ core_api.list_namespaced_pod( namespace="default", label_selector=deployment_label_selector ) assert deployment_pod_list.items.__len__() == 1 wait_for_volume_detached(client, volume_name) wait_for_volume_healthy(client, volume_name) deployment_pod_list =\ core_api.list_namespaced_pod( namespace="default", label_selector=deployment_label_selector ) assert deployment_pod_list.items.__len__() == 1 new_pod_name = deployment_pod_list.items[0].metadata.name wait_pod(new_pod_name) resp_data = read_volume_data(core_api, new_pod_name) assert test_data == resp_data
def test_csi_minimal_volume_size( client, core_api, csi_pv, pvc, pod_make): # NOQA """ Test CSI Minimal Volume Size 1. Create a PVC requesting size 5MiB. Check the PVC requested size is 5MiB and capacity size get is 10MiB. 2. Remove the PVC. 3. Create a PVC requesting size 10MiB. Check the PVC requested size and capacity size get are both 10MiB. 4. Create a pod to use this PVC. 5. Write some data to the volume and read it back to compare. """ vol_name = generate_volume_name() create_and_check_volume(client, vol_name, size=str(100*Mi)) low_storage = str(5*Mi) min_storage = str(10*Mi) pv_name = vol_name + "-pv" csi_pv['metadata']['name'] = pv_name csi_pv['spec']['csi']['volumeHandle'] = vol_name csi_pv['spec']['capacity']['storage'] = min_storage core_api.create_persistent_volume(csi_pv) pvc_name = vol_name + "-pvc" pvc['metadata']['name'] = pvc_name pvc['spec']['volumeName'] = pv_name pvc['spec']['resources']['requests']['storage'] = low_storage pvc['spec']['storageClassName'] = '' core_api.create_namespaced_persistent_volume_claim(body=pvc, namespace='default') claim = common.wait_for_pvc_phase(core_api, pvc_name, "Bound") assert claim.spec.resources.requests['storage'] == low_storage assert claim.status.capacity['storage'] == min_storage common.delete_and_wait_pvc(core_api, pvc_name) common.delete_and_wait_pv(core_api, pv_name) wait_for_volume_detached(client, vol_name) core_api.create_persistent_volume(csi_pv) pvc['spec']['resources']['requests']['storage'] = min_storage core_api.create_namespaced_persistent_volume_claim(body=pvc, namespace='default') claim = common.wait_for_pvc_phase(core_api, pvc_name, "Bound") assert claim.spec.resources.requests['storage'] == min_storage assert claim.status.capacity['storage'] == min_storage pod_name = vol_name + '-pod' pod = pod_make(name=pod_name) pod['spec']['volumes'] = [create_pvc_spec(pvc_name)] create_and_wait_pod(core_api, pod) test_data = "longhorn-integration-test" test_file = "test" write_pod_volume_data(core_api, pod_name, test_data, test_file) read_data = read_volume_data(core_api, pod_name, test_file) assert read_data == test_data
def test_replica_scheduler_just_under_over_provisioning(client): # NOQA over_provisioning_setting = client.by_id_setting( SETTING_STORAGE_OVER_PROVISIONING_PERCENTAGE) old_provisioning_setting = over_provisioning_setting["value"] # set storage over provisioning percentage to 100 over_provisioning_setting = client.update(over_provisioning_setting, value="100") lht_hostId = get_self_host_id() nodes = client.list_node() expect_node_disk = {} max_size_array = [] for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] == DEFAULT_DISK_PATH: expect_disk = disk expect_disk["fsid"] = fsid expect_node_disk[node["name"]] = expect_disk max_size_array.append(disk["storageMaximum"]) disk["storageReserved"] = 0 update_disks = get_update_disks(disks) node = node.diskUpdate(disks=update_disks) disks = node["disks"] for fsid, disk in disks.iteritems(): wait_for_disk_status(client, node["name"], fsid, "storageReserved", 0) max_size = min(max_size_array) # test just under over provisioning limit could be scheduled vol_name = common.generate_volume_name() volume = client.create_volume(name=vol_name, size=str(max_size), numberOfReplicas=len(nodes)) volume = common.wait_for_volume_condition_scheduled(client, vol_name, "status", CONDITION_STATUS_TRUE) volume = common.wait_for_volume_detached(client, vol_name) assert volume["state"] == "detached" assert volume["created"] != "" volume.attach(hostId=lht_hostId) volume = common.wait_for_volume_healthy(client, vol_name) nodes = client.list_node() node_hosts = [] for node in nodes: node_hosts.append(node["name"]) # check all replica should be scheduled to default disk for replica in volume["replicas"]: id = replica["hostId"] assert id != "" assert replica["running"] expect_disk = expect_node_disk[id] assert replica["diskID"] == expect_disk["fsid"] assert expect_disk["path"] in replica["dataPath"] node_hosts = filter(lambda x: x != id, node_hosts) assert len(node_hosts) == 0 # clean volume and disk cleanup_volume(client, vol_name) client.update(over_provisioning_setting, value=old_provisioning_setting)
def test_upgrade(upgrade_image_tag, settings_reset, volume_name, pod_make, statefulset, storage_class): # NOQA """ Test Longhorn upgrade Prerequisite: - Disable Auto Salvage Setting 1. Find the upgrade image tag 2. Create a volume, generate and write data into the volume. 3. Create a Pod using a volume, generate and write data 4. Create a StatefulSet with 2 replicas, generate and write data to their volumes 5. Keep all volumes attached 6. Upgrade Longhorn system. 7. Check Pod and StatefulSet didn't restart after upgrade 8. Check All volumes data 9. Write data to StatefulSet pods, and Attached volume 10. Check data written to StatefulSet pods, and attached volume. 11. Detach the volume, and Delete Pod, and StatefulSet to detach theirvolumes 12. Upgrade all volumes engine images. 13. Attach the volume, and recreate Pod, and StatefulSet 14. Check All volumes data """ new_ei_name = "longhornio/longhorn-engine:" + upgrade_image_tag client = get_longhorn_api_client() core_api = get_core_api_client() host_id = get_self_host_id() pod_data_path = "/data/test" pod_volume_name = generate_volume_name() auto_salvage_setting = client.by_id_setting(SETTING_AUTO_SALVAGE) setting = client.update(auto_salvage_setting, value="false") assert setting.name == SETTING_AUTO_SALVAGE assert setting.value == "false" # Create Volume attached to a node. volume1 = create_and_check_volume(client, volume_name, size=SIZE) volume1.attach(hostId=host_id) volume1 = wait_for_volume_healthy(client, volume_name) volume1_data = write_volume_random_data(volume1) # Create Volume used by Pod pod_name, pv_name, pvc_name, pod_md5sum = \ prepare_pod_with_data_in_mb(client, core_api, pod_make, pod_volume_name, data_path=pod_data_path, add_liveness_prope=False) # Create multiple volumes used by StatefulSet statefulset_name = 'statefulset-upgrade-test' update_statefulset_manifests(statefulset, storage_class, statefulset_name) create_storage_class(storage_class) create_and_wait_statefulset(statefulset) statefulset_pod_info = get_statefulset_pod_info(core_api, statefulset) for sspod_info in statefulset_pod_info: sspod_info['data'] = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, sspod_info['pod_name'], sspod_info['data']) # upgrade Longhorn assert longhorn_upgrade(upgrade_image_tag) client = get_longhorn_api_client() # wait for 1 minute before checking pod restarts time.sleep(60) pod = core_api.read_namespaced_pod(name=pod_name, namespace='default') assert pod.status.container_statuses[0].restart_count == 0 for sspod_info in statefulset_pod_info: sspod = core_api.read_namespaced_pod(name=sspod_info['pod_name'], namespace='default') assert \ sspod.status.container_statuses[0].restart_count == 0 for sspod_info in statefulset_pod_info: resp = read_volume_data(core_api, sspod_info['pod_name']) assert resp == sspod_info['data'] res_pod_md5sum = get_pod_data_md5sum(core_api, pod_name, pod_data_path) assert res_pod_md5sum == pod_md5sum check_volume_data(volume1, volume1_data) for sspod_info in statefulset_pod_info: sspod_info['data'] = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, sspod_info['pod_name'], sspod_info['data']) for sspod_info in statefulset_pod_info: resp = read_volume_data(core_api, sspod_info['pod_name']) assert resp == sspod_info['data'] volume1 = client.by_id_volume(volume_name) volume1_data = write_volume_random_data(volume1) check_volume_data(volume1, volume1_data) statefulset['spec']['replicas'] = replicas = 0 apps_api = get_apps_api_client() apps_api.patch_namespaced_stateful_set( name=statefulset_name, namespace='default', body={ 'spec': { 'replicas': replicas } }) delete_and_wait_pod(core_api, pod_name) volume = client.by_id_volume(volume_name) volume.detach() volumes = client.list_volume() for v in volumes: wait_for_volume_detached(client, v.name) engineimages = client.list_engine_image() for ei in engineimages: if ei.image == new_ei_name: new_ei = ei volumes = client.list_volume() for v in volumes: volume = client.by_id_volume(v.name) volume.engineUpgrade(image=new_ei.image) statefulset['spec']['replicas'] = replicas = 2 apps_api = get_apps_api_client() apps_api.patch_namespaced_stateful_set( name=statefulset_name, namespace='default', body={ 'spec': { 'replicas': replicas } }) wait_statefulset(statefulset) pod = pod_make(name=pod_name) pod['spec']['volumes'] = [create_pvc_spec(pvc_name)] create_and_wait_pod(core_api, pod) volume1 = client.by_id_volume(volume_name) volume1.attach(hostId=host_id) volume1 = wait_for_volume_healthy(client, volume_name) for sspod_info in statefulset_pod_info: resp = read_volume_data(core_api, sspod_info['pod_name']) assert resp == sspod_info['data'] res_pod_md5sum = get_pod_data_md5sum(core_api, pod_name, pod_data_path) assert res_pod_md5sum == pod_md5sum check_volume_data(volume1, volume1_data)
def test_replica_scheduler_update_minimal_available(client): # NOQA minimal_available_setting = client.by_id_setting( SETTING_STORAGE_MINIMAL_AVAILABLE_PERCENTAGE) old_minimal_setting = minimal_available_setting["value"] nodes = client.list_node() expect_node_disk = {} for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] == DEFAULT_DISK_PATH: expect_disk = disk expect_disk["fsid"] = fsid expect_node_disk[node["name"]] = expect_disk # set storage minimal available percentage to 100 # to test all replica couldn't be scheduled minimal_available_setting = client.update(minimal_available_setting, value="100") # wait for disks state nodes = client.list_node() for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): wait_for_disk_conditions(client, node["name"], fsid, DISK_CONDITION_SCHEDULABLE, CONDITION_STATUS_FALSE) lht_hostId = get_self_host_id() vol_name = common.generate_volume_name() volume = client.create_volume(name=vol_name, size=SIZE, numberOfReplicas=len(nodes)) volume = common.wait_for_volume_condition_scheduled(client, vol_name, "status", CONDITION_STATUS_FALSE) # set storage minimal available percentage to default value(10) minimal_available_setting = client.update(minimal_available_setting, value=old_minimal_setting) # wait for disks state nodes = client.list_node() for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): wait_for_disk_conditions(client, node["name"], fsid, DISK_CONDITION_SCHEDULABLE, CONDITION_STATUS_TRUE) # check volume status volume = common.wait_for_volume_condition_scheduled(client, vol_name, "status", CONDITION_STATUS_TRUE) volume = common.wait_for_volume_detached(client, vol_name) assert volume["state"] == "detached" assert volume["created"] != "" volume.attach(hostId=lht_hostId) volume = common.wait_for_volume_healthy(client, vol_name) nodes = client.list_node() node_hosts = [] for node in nodes: node_hosts.append(node["name"]) # check all replica should be scheduled to default disk for replica in volume["replicas"]: id = replica["hostId"] assert id != "" assert replica["running"] expect_disk = expect_node_disk[id] assert replica["diskID"] == expect_disk["fsid"] assert expect_disk["path"] in replica["dataPath"] node_hosts = filter(lambda x: x != id, node_hosts) assert len(node_hosts) == 0 # clean volume and disk cleanup_volume(client, vol_name)
def ha_backup_deletion_recovery_test(client, volume_name, size, base_image=""): # NOQA volume = client.create_volume(name=volume_name, size=size, numberOfReplicas=2, baseImage=base_image) volume = common.wait_for_volume_detached(client, volume_name) host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) setting = client.by_id_setting(common.SETTING_BACKUP_TARGET) # test backupTarget for multiple settings backupstores = common.get_backupstore_url() for backupstore in backupstores: if common.is_backupTarget_s3(backupstore): backupsettings = backupstore.split("$") setting = client.update(setting, value=backupsettings[0]) assert setting["value"] == backupsettings[0] credential = client.by_id_setting( common.SETTING_BACKUP_TARGET_CREDENTIAL_SECRET) credential = client.update(credential, value=backupsettings[1]) assert credential["value"] == backupsettings[1] else: setting = client.update(setting, value=backupstore) assert setting["value"] == backupstore credential = client.by_id_setting( common.SETTING_BACKUP_TARGET_CREDENTIAL_SECRET) credential = client.update(credential, value="") assert credential["value"] == "" data = write_volume_random_data(volume) snap2 = volume.snapshotCreate() volume.snapshotCreate() volume.snapshotBackup(name=snap2["name"]) _, b = common.find_backup(client, volume_name, snap2["name"]) res_name = common.generate_volume_name() res_volume = client.create_volume(name=res_name, size=size, numberOfReplicas=2, fromBackup=b["url"]) res_volume = common.wait_for_volume_restoration_completed( client, res_name) res_volume = common.wait_for_volume_detached(client, res_name) res_volume = res_volume.attach(hostId=host_id) res_volume = common.wait_for_volume_healthy(client, res_name) check_volume_data(res_volume, data) snapshots = res_volume.snapshotList() # only the backup snapshot + volume-head assert len(snapshots) == 2 backup_snapshot = "" for snap in snapshots: if snap["name"] != "volume-head": backup_snapshot = snap["name"] assert backup_snapshot != "" res_volume.snapshotCreate() snapshots = res_volume.snapshotList() assert len(snapshots) == 3 res_volume.snapshotDelete(name=backup_snapshot) res_volume.snapshotPurge() snapshots = res_volume.snapshotList() assert len(snapshots) == 2 ha_rebuild_replica_test(client, res_name) res_volume = res_volume.detach() res_volume = common.wait_for_volume_detached(client, res_name) client.delete(res_volume) common.wait_for_volume_delete(client, res_name) cleanup_volume(client, volume)
def test_replica_scheduler_too_large_volume_fit_any_disks(client): # NOQA nodes = client.list_node() lht_hostId = get_self_host_id() expect_node_disk = {} for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] == DEFAULT_DISK_PATH: expect_disk = disk expect_disk["fsid"] = fsid expect_node_disk[node["name"]] = expect_disk disk["storageReserved"] = disk["storageMaximum"] update_disks = get_update_disks(disks) node.diskUpdate(disks=update_disks) # volume is too large to fill into any disks vol_name = common.generate_volume_name() volume = client.create_volume(name=vol_name, size=str(4*Gi), numberOfReplicas=len(nodes)) volume = common.wait_for_volume_condition_scheduled(client, vol_name, "status", CONDITION_STATUS_FALSE) # reduce StorageReserved of each default disk nodes = client.list_node() for node in nodes: disks = node["disks"] update_disks = get_update_disks(disks) for disk in update_disks: disk["storageReserved"] = 0 node = node.diskUpdate(disks=update_disks) disks = node["disks"] for fsid, disk in disks.iteritems(): wait_for_disk_status(client, node["name"], fsid, "storageReserved", 0) # check volume status volume = common.wait_for_volume_condition_scheduled(client, vol_name, "status", CONDITION_STATUS_TRUE) volume = common.wait_for_volume_detached(client, vol_name) assert volume["state"] == "detached" assert volume["created"] != "" volume.attach(hostId=lht_hostId) volume = common.wait_for_volume_healthy(client, vol_name) nodes = client.list_node() node_hosts = [] for node in nodes: node_hosts.append(node["name"]) # check all replica should be scheduled to default disk for replica in volume["replicas"]: id = replica["hostId"] assert id != "" assert replica["running"] expect_disk = expect_node_disk[id] assert replica["diskID"] == expect_disk["fsid"] assert expect_disk["path"] in replica["dataPath"] node_hosts = filter(lambda x: x != id, node_hosts) assert len(node_hosts) == 0 # clean volume and disk cleanup_volume(client, vol_name)
def test_backup_kubernetes_status(set_random_backupstore, client, core_api, pod): # NOQA """ Test that Backups have KubernetesStatus stored properly when there is an associated PersistentVolumeClaim and Pod. 1. Setup a random backupstore 2. Set settings Longhorn Static StorageClass to `longhorn-static-test` 3. Create a volume and PV/PVC. Verify the StorageClass of PVC 4. Create a Pod using the PVC. 5. Check volume's Kubernetes status to reflect PV/PVC/Pod correctly. 6. Create a backup for the volume. 7. Verify the labels of created backup reflect PV/PVC/Pod status. 8. Restore the backup to a volume. Wait for restoration to complete. 9. Check the volume's Kubernetes Status 1. Make sure the `lastPodRefAt` and `lastPVCRefAt` is snapshot created time 10. Delete the backup and restored volume. 11. Delete PV/PVC/Pod. 12. Verify volume's Kubernetes Status updated to reflect history data. 13. Attach the volume and create another backup. Verify the labels 14. Verify the volume's Kubernetes status. 15. Restore the previous backup to a new volume. Wait for restoration. 16. Verify the restored volume's Kubernetes status. 1. Make sure `lastPodRefAt` and `lastPVCRefAt` matched volume on step 12 """ host_id = get_self_host_id() static_sc_name = "longhorn-static-test" setting = client.by_id_setting(SETTING_DEFAULT_LONGHORN_STATIC_SC) setting = client.update(setting, value=static_sc_name) assert setting.value == static_sc_name volume_name = "test-backup-kubernetes-status-pod" # NOQA client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = wait_for_volume_detached(client, volume_name) pod_name = "pod-" + volume_name pv_name = "pv-" + volume_name pvc_name = "pvc-" + volume_name create_pv_for_volume(client, core_api, volume, pv_name) create_pvc_for_volume(client, core_api, volume, pvc_name) ret = core_api.list_namespaced_persistent_volume_claim(namespace='default') pvc_found = False for item in ret.items: if item.metadata.name == pvc_name: pvc_found = item break assert pvc_found assert pvc_found.spec.storage_class_name == static_sc_name pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': pvc_name, }, }] create_and_wait_pod(core_api, pod) ks = { 'lastPodRefAt': '', 'lastPVCRefAt': '', 'namespace': 'default', 'pvcName': pvc_name, 'pvName': pv_name, 'pvStatus': 'Bound', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, volume_name, ks) volume = wait_for_volume_healthy(client, volume_name) # Create Backup manually instead of calling create_backup since Kubernetes # is not guaranteed to mount our Volume to the test host. snap = create_snapshot(client, volume_name) volume.snapshotBackup(name=snap.name) wait_for_backup_completion(client, volume_name, snap.name) _, b = find_backup(client, volume_name, snap.name) # Check backup label status = loads(b.labels.get(KUBERNETES_STATUS_LABEL)) assert status == ks # Check backup volume label for _ in range(RETRY_COUNTS): bv = client.by_id_backupVolume(volume_name) if bv is not None and bv.labels is not None: break time.sleep(RETRY_INTERVAL) assert bv is not None and bv.labels is not None status = loads(bv.labels.get(KUBERNETES_STATUS_LABEL)) assert status == ks restore_name = generate_volume_name() client.create_volume(name=restore_name, size=SIZE, numberOfReplicas=2, fromBackup=b.url) wait_for_volume_restoration_completed(client, restore_name) wait_for_volume_detached(client, restore_name) snapshot_created = b.snapshotCreated ks = { 'lastPodRefAt': b.snapshotCreated, 'lastPVCRefAt': b.snapshotCreated, 'namespace': 'default', 'pvcName': pvc_name, # Restoration should not apply PersistentVolume data. 'pvName': '', 'pvStatus': '', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, restore_name, ks) restore = client.by_id_volume(restore_name) # We need to compare LastPodRefAt and LastPVCRefAt manually since # wait_volume_kubernetes_status only checks for empty or non-empty state. assert restore.kubernetesStatus.lastPodRefAt == ks["lastPodRefAt"] assert restore.kubernetesStatus.lastPVCRefAt == ks["lastPVCRefAt"] delete_backup(client, bv.name, b.name) client.delete(restore) wait_for_volume_delete(client, restore_name) delete_and_wait_pod(core_api, pod_name) delete_and_wait_pvc(core_api, pvc_name) delete_and_wait_pv(core_api, pv_name) # With the Pod, PVC, and PV deleted, the Volume should have both Ref # fields set. Check that a new Backup and Restore will use this instead of # manually populating the Ref fields. ks = { 'lastPodRefAt': 'NOT NULL', 'lastPVCRefAt': 'NOT NULL', 'namespace': 'default', 'pvcName': pvc_name, 'pvName': '', 'pvStatus': '', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, volume_name, ks) volume = wait_for_volume_detached(client, volume_name) volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) snap = create_snapshot(client, volume_name) volume.snapshotBackup(name=snap.name) volume = wait_for_backup_completion(client, volume_name, snap.name) bv, b = find_backup(client, volume_name, snap.name) new_b = bv.backupGet(name=b.name) status = loads(new_b.labels.get(KUBERNETES_STATUS_LABEL)) # Check each field manually, we have no idea what the LastPodRefAt or the # LastPVCRefAt will be. We just know it shouldn't be SnapshotCreated. assert status['lastPodRefAt'] != snapshot_created assert status['lastPVCRefAt'] != snapshot_created assert status['namespace'] == "default" assert status['pvcName'] == pvc_name assert status['pvName'] == "" assert status['pvStatus'] == "" assert status['workloadsStatus'] == [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] restore_name = generate_volume_name() client.create_volume(name=restore_name, size=SIZE, numberOfReplicas=2, fromBackup=b.url) wait_for_volume_restoration_completed(client, restore_name) wait_for_volume_detached(client, restore_name) ks = { 'lastPodRefAt': status['lastPodRefAt'], 'lastPVCRefAt': status['lastPVCRefAt'], 'namespace': 'default', 'pvcName': pvc_name, 'pvName': '', 'pvStatus': '', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, restore_name, ks) restore = client.by_id_volume(restore_name) assert restore.kubernetesStatus.lastPodRefAt == ks["lastPodRefAt"] assert restore.kubernetesStatus.lastPVCRefAt == ks["lastPVCRefAt"] # cleanup backupstore_cleanup(client) client.delete(restore) cleanup_volume(client, volume)
def test_node_delete_umount_disks(client): # NOQA # create test disks for node disk_volume_name = 'vol-disk-1' lht_hostId = get_self_host_id() node = client.by_id_node(lht_hostId) disks = node["disks"] disk_path1 = create_host_disk(client, disk_volume_name, str(Gi), lht_hostId) disk1 = {"path": disk_path1, "allowScheduling": True, "storageReserved": SMALL_DISK_SIZE} update_disk = get_update_disks(disks) for disk in update_disk: disk["allowScheduling"] = False # add new disk for node update_disk.append(disk1) # save disks to node node = node.diskUpdate(disks=update_disk) node = common.wait_for_disk_update(client, lht_hostId, len(update_disk)) assert len(node["disks"]) == len(update_disk) node = client.by_id_node(lht_hostId) assert len(node["disks"]) == len(update_disk) disks = node["disks"] # wait for node controller to update disk status for fsid, disk in disks.iteritems(): if disk["path"] == disk_path1: wait_for_disk_status(client, lht_hostId, fsid, "allowScheduling", True) wait_for_disk_status(client, lht_hostId, fsid, "storageReserved", SMALL_DISK_SIZE) free, total = common.get_host_disk_size(disk_path1) wait_for_disk_status(client, lht_hostId, fsid, "storageAvailable", free) wait_for_disk_status(client, lht_hostId, fsid, "storageMaximum", total) node = client.by_id_node(lht_hostId) disks = node["disks"] for key, disk in disks.iteritems(): if disk["path"] == disk_path1: assert disk["allowScheduling"] assert disk["storageReserved"] == SMALL_DISK_SIZE assert disk["storageScheduled"] == 0 free, total = common.get_host_disk_size(disk_path1) assert disk["storageMaximum"] == total assert disk["storageAvailable"] == free conditions = disk["conditions"] assert conditions[DISK_CONDITION_READY]["status"] == \ CONDITION_STATUS_TRUE assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \ CONDITION_STATUS_TRUE else: assert not disk["allowScheduling"] # create a volume nodes = client.list_node() vol_name = common.generate_volume_name() volume = create_volume(client, vol_name, str(SMALL_DISK_SIZE), lht_hostId, len(nodes)) replicas = volume["replicas"] for replica in replicas: id = replica["hostId"] assert id != "" assert replica["running"] if id == lht_hostId: assert replica["dataPath"].startswith(disk_path1) # umount the disk mount_path = os.path.join(DIRECTORY_PATH, disk_volume_name) common.umount_disk(mount_path) # wait for update node status node = client.by_id_node(lht_hostId) disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] == disk_path1: wait_for_disk_status(client, lht_hostId, fsid, "allowScheduling", False) wait_for_disk_status(client, lht_hostId, fsid, "storageMaximum", 0) wait_for_disk_conditions(client, lht_hostId, fsid, DISK_CONDITION_READY, CONDITION_STATUS_FALSE) # check result node = client.by_id_node(lht_hostId) disks = node["disks"] update_disks = [] for fsid, disk in disks.iteritems(): if disk["path"] == disk_path1: assert not disk["allowScheduling"] assert disk["storageMaximum"] == 0 assert disk["storageAvailable"] == 0 assert disk["storageReserved"] == SMALL_DISK_SIZE assert disk["storageScheduled"] == SMALL_DISK_SIZE conditions = disk["conditions"] assert conditions[DISK_CONDITION_READY]["status"] == \ CONDITION_STATUS_FALSE assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \ CONDITION_STATUS_FALSE else: conditions = disk["conditions"] assert conditions[DISK_CONDITION_READY]["status"] == \ CONDITION_STATUS_TRUE assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \ CONDITION_STATUS_TRUE update_disks.append(disk) # delete umount disk exception with pytest.raises(Exception) as e: node.diskUpdate(disks=update_disks) assert "disable the disk" in str(e.value) # update other disks disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] != disk_path1: disk["allowScheduling"] = True test_update = get_update_disks(disks) node = node.diskUpdate(disks=test_update) disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] != disk_path1: wait_for_disk_status(client, lht_hostId, fsid, "allowScheduling", True) node = client.by_id_node(lht_hostId) disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] != disk_path1: assert disk["allowScheduling"] # mount the disk back mount_path = os.path.join(DIRECTORY_PATH, disk_volume_name) disk_volume = client.by_id_volume(disk_volume_name) dev = get_volume_endpoint(disk_volume) common.mount_disk(dev, mount_path) # wait for update node status node = client.by_id_node(lht_hostId) disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] == disk_path1: wait_for_disk_status(client, lht_hostId, fsid, "allowScheduling", False) wait_for_disk_conditions(client, lht_hostId, fsid, DISK_CONDITION_READY, CONDITION_STATUS_TRUE) # check result node = client.by_id_node(lht_hostId) disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] == disk_path1: free, total = common.get_host_disk_size(disk_path1) assert not disk["allowScheduling"] assert disk["storageMaximum"] == total assert disk["storageAvailable"] == free assert disk["storageReserved"] == SMALL_DISK_SIZE assert disk["storageScheduled"] == SMALL_DISK_SIZE conditions = disk["conditions"] assert conditions[DISK_CONDITION_READY]["status"] == \ CONDITION_STATUS_TRUE assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \ CONDITION_STATUS_TRUE else: conditions = disk["conditions"] assert conditions[DISK_CONDITION_READY]["status"] == \ CONDITION_STATUS_TRUE assert conditions[DISK_CONDITION_SCHEDULABLE]["status"] == \ CONDITION_STATUS_TRUE # delete volume and umount disk cleanup_volume(client, vol_name) mount_path = os.path.join(DIRECTORY_PATH, disk_volume_name) common.umount_disk(mount_path) # wait for update node status node = client.by_id_node(lht_hostId) disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] == disk_path1: wait_for_disk_status(client, lht_hostId, fsid, "allowScheduling", False) wait_for_disk_status(client, lht_hostId, fsid, "storageScheduled", 0) wait_for_disk_status(client, lht_hostId, fsid, "storageMaximum", 0) # test delete the umount disk node = client.by_id_node(lht_hostId) node.diskUpdate(disks=update_disks) node = common.wait_for_disk_update(client, lht_hostId, len(update_disks)) assert len(node["disks"]) == len(update_disks) cmd = ['rm', '-r', mount_path] subprocess.check_call(cmd)