def test_recurring_job_in_volume_creation(clients, volume_name): # NOQA for host_id, client in clients.iteritems(): # NOQA break # error when creating volume with duplicate jobs with pytest.raises(Exception) as e: client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2, recurringJobs=create_jobs1() + create_jobs1()) assert "duplicate job" in str(e.value) client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2, recurringJobs=create_jobs1()) volume = common.wait_for_volume_detached(client, volume_name) volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) # 5 minutes time.sleep(300) check_jobs1_result(volume) volume = volume.detach() common.wait_for_volume_detached(client, volume_name) client.delete(volume) wait_for_volume_delete(client, volume_name) volumes = client.list_volume() assert len(volumes) == 0
def test_pv_creation(client, core_api): # NOQA """ Test creating PV using Longhorn API 1. Create volume 2. Create PV for the volume 3. Try to create another PV for the same volume. It should fail. 4. Check Kubernetes Status for the volume since PV is created. """ volume_name = "test-pv-creation" # NOQA client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = wait_for_volume_detached(client, volume_name) pv_name = "pv-" + volume_name create_pv_for_volume(client, core_api, volume, pv_name) # try to create one more pv for the volume pv_name_2 = "pv2-" + volume_name with pytest.raises(Exception) as e: volume.pvCreate(pvName=pv_name_2) assert "already exist" in str(e.value) ks = { 'pvName': pv_name, 'pvStatus': 'Available', 'namespace': '', 'pvcName': '', 'lastPVCRefAt': '', 'lastPodRefAt': '', } wait_volume_kubernetes_status(client, volume_name, ks) delete_and_wait_pv(core_api, pv_name)
def test_pv_creation(client, core_api): # NOQA volume_name = "test-pv-creation" client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = wait_for_volume_detached(client, volume_name) pv_name = "pv-" + volume_name create_pv_for_volume(client, core_api, volume, pv_name) # try to create one more pv for the volume pv_name_2 = "pv2-" + volume_name with pytest.raises(Exception) as e: volume.pvCreate(pvName=pv_name_2) assert "already exist" in str(e.value) ks = { 'pvName': pv_name, 'pvStatus': 'Available', 'namespace': '', 'pvcName': '', 'lastPVCRefAt': '', 'lastPodRefAt': '', } wait_volume_kubernetes_status(client, volume_name, ks) delete_and_wait_pv(core_api, pv_name)
def recurring_job_labels_test(client, labels, volume_name, size=SIZE, backing_image=""): # NOQA host_id = get_self_host_id() client.create_volume(name=volume_name, size=size, numberOfReplicas=2, backingImage=backing_image) volume = common.wait_for_volume_detached(client, volume_name) # Simple Backup Job that runs every 1 minute, retains 1. jobs = [{ "name": RECURRING_JOB_NAME, "cron": "*/1 * * * *", "task": "backup", "retain": 1, "labels": labels }] volume.recurringUpdate(jobs=jobs) volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) write_volume_random_data(volume) # 1 minutes 15s time.sleep(75) labels["we-added-this-label"] = "definitely" jobs[0]["labels"] = labels volume = volume.recurringUpdate(jobs=jobs) volume = wait_for_volume_healthy(client, volume_name) write_volume_random_data(volume) # 2 minutes 15s time.sleep(135) snapshots = volume.snapshotList() count = 0 for snapshot in snapshots: if snapshot.removed is False: count += 1 # 1 from Backup, 1 from Volume Head. assert count == 2 # Verify the Labels on the actual Backup. bv = client.by_id_backupVolume(volume_name) backups = bv.backupList().data assert len(backups) == 1 b = bv.backupGet(name=backups[0].name) for key, val in iter(labels.items()): assert b.labels.get(key) == val assert b.labels.get(RECURRING_JOB_LABEL) == RECURRING_JOB_NAME # One extra Label from RecurringJob. assert len(b.labels) == len(labels) + 1 if backing_image: assert b.volumeBackingImageName == \ backing_image assert b.volumeBackingImageURL != "" cleanup_volume(client, volume)
def test_tag_scheduling_failure(client, node_default_tags): # NOQA """ Test that scheduling fails if no Nodes/Disks with the requested Tags are available. Case 1: Validate that if specifying nonexist tags in volume, API call will fail. Case 2: 1. Specify existing but no node or disk can unsatisfied tags. 2. Validate the volume will failed the scheduling """ invalid_tag_cases = [ # Only one Disk Tag exists. { "disk": ["doesnotexist", "ssd"], "node": [] }, # Only one Node Tag exists. { "disk": [], "node": ["doesnotexist", "main"] } ] for tags in invalid_tag_cases: volume_name = generate_volume_name() # NOQA with pytest.raises(Exception) as e: client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=3, diskSelector=tags["disk"], nodeSelector=tags["node"]) assert "does not exist" in str(e.value) unsatisfied_tag_cases = [{ "disk": [], "node": ["main", "fallback"] }, { "disk": ["ssd", "m2"], "node": [] }] for tags in unsatisfied_tag_cases: volume_name = generate_volume_name() client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=3, diskSelector=tags["disk"], nodeSelector=tags["node"]) volume = wait_for_volume_detached(client, volume_name) assert volume.diskSelector == tags["disk"] assert volume.nodeSelector == tags["node"] wait_scheduling_failure(client, volume_name) client.delete(volume) wait_for_volume_delete(client, volume.name) volumes = client.list_volume() assert len(volumes) == 0
def test_pv_creation(client, core_api): # NOQA volume_name = "test-pv-creation" client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = wait_for_volume_detached(client, volume_name) pv_name = "pv-" + volume_name volume.pvCreate(pvName=pv_name) for i in range(RETRY_COUNTS): if check_pv_existence(core_api, pv_name): break time.sleep(RETRY_INTERVAL) assert check_pv_existence(core_api, pv_name) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] for i in range(RETRY_COUNTS): if k_status['pvName'] and k_status['pvStatus'] == 'Available': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == pv_name assert k_status['pvStatus'] == 'Available' assert not k_status['namespace'] assert not k_status['pvcName'] assert not workloads assert not k_status['lastPVCRefAt'] assert not k_status['lastPodRefAt'] # try to create one more pv for the volume pv_name_2 = "pv2-" + volume_name with pytest.raises(Exception) as e: volume.pvCreate(pvName=pv_name_2) assert "already exist" in str(e.value) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == pv_name assert k_status['pvStatus'] == 'Available' assert not k_status['namespace'] assert not k_status['pvcName'] assert not workloads assert not k_status['lastPVCRefAt'] assert not k_status['lastPodRefAt'] delete_and_wait_pv(core_api, pv_name)
def recurring_job_labels_test(client, labels, volume_name, size=SIZE, base_image=""): # NOQA host_id = get_self_host_id() client.create_volume(name=volume_name, size=size, numberOfReplicas=2) volume = common.wait_for_volume_detached(client, volume_name) # Simple Backup Job that runs every 2 minutes, retains 1. jobs = [{ "name": RECURRING_JOB_NAME, "cron": "*/2 * * * *", "task": "backup", "retain": 1, "labels": labels }] volume.recurringUpdate(jobs=jobs) volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) # 5 minutes time.sleep(300) snapshots = volume.snapshotList() count = 0 for snapshot in snapshots: if snapshot["removed"] is False: count += 1 # 1 from Backup, 1 from Volume Head. assert count == 2 # Verify the Labels on the actual Backup. bv = client.by_id_backupVolume(volume_name) backups = bv.backupList() assert len(backups) == 1 b = bv.backupGet(name=backups[0]["name"]) for key, val in labels.iteritems(): assert b["labels"].get(key) == val assert b["labels"].get(RECURRING_JOB_LABEL) == RECURRING_JOB_NAME if base_image: assert b["labels"].get(BASE_IMAGE_LABEL) == base_image # One extra Label from the BaseImage being set. assert len(b["labels"]) == len(labels) + 2 else: # At least one extra Label from RecurringJob. assert len(b["labels"]) == len(labels) + 1 cleanup_volume(client, volume)
def test_recurring_job_in_volume_creation(set_random_backupstore, client, volume_name): # NOQA """ Test create volume with recurring jobs 1. Create volume with recurring jobs though Longhorn API 2. Verify the recurring jobs run correctly """ host_id = get_self_host_id() # error when creating volume with duplicate jobs with pytest.raises(Exception) as e: client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2, recurringJobs=create_jobs1() + create_jobs1()) assert "duplicate job" in str(e.value) client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2, recurringJobs=create_jobs1()) volume = common.wait_for_volume_detached(client, volume_name) volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) # wait until the beginning of an even minute wait_until_begin_of_an_even_minute() # wait until the 10th second of an even minute # to avoid writing data at the same time backup is taking time.sleep(10) write_volume_random_data(volume) time.sleep(150) # 2.5 minutes write_volume_random_data(volume) time.sleep(150) # 2.5 minutes check_jobs1_result(volume) volume = volume.detach(hostId="") common.wait_for_volume_detached(client, volume_name) client.delete(volume) wait_for_volume_delete(client, volume_name) volumes = client.list_volume() assert len(volumes) == 0
def test_replica_scheduler_exceed_over_provisioning(client): # NOQA over_provisioning_setting = client.by_id_setting( SETTING_STORAGE_OVER_PROVISIONING_PERCENTAGE) old_provisioning_setting = over_provisioning_setting["value"] # set storage over provisioning percentage to 100 over_provisioning_setting = client.update(over_provisioning_setting, value="100") # test exceed over provisioning limit couldn't be scheduled nodes = client.list_node() for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): disk["storageReserved"] = \ disk["storageMaximum"] - 1*Gi update_disks = get_update_disks(disks) node = node.diskUpdate(disks=update_disks) disks = node["disks"] for fsid, disk in disks.iteritems(): wait_for_disk_status(client, node["name"], fsid, "storageReserved", disk["storageMaximum"] - 1*Gi) vol_name = common.generate_volume_name() volume = client.create_volume(name=vol_name, size=str(2*Gi), numberOfReplicas=len(nodes)) volume = common.wait_for_volume_condition_scheduled(client, vol_name, "status", CONDITION_STATUS_FALSE) client.delete(volume) common.wait_for_volume_delete(client, vol_name) client.update(over_provisioning_setting, value=old_provisioning_setting)
def test_replica_scheduler_no_disks(client): # NOQA nodes = client.list_node() # delete all disks on each node for node in nodes: disks = node["disks"] name = node["name"] # set allowScheduling to false for fsid, disk in disks.iteritems(): disk["allowScheduling"] = False update_disks = get_update_disks(disks) node = node.diskUpdate(disks=update_disks) for fsid, disk in node["disks"].iteritems(): # wait for node controller update disk status wait_for_disk_status(client, name, fsid, "allowScheduling", False) wait_for_disk_status(client, name, fsid, "storageScheduled", 0) node = client.by_id_node(name) for fsid, disk in node["disks"].iteritems(): assert not disk["allowScheduling"] node = node.diskUpdate(disks=[]) node = common.wait_for_disk_update(client, name, 0) assert len(node["disks"]) == 0 # test there's no disk fit for volume vol_name = common.generate_volume_name() volume = client.create_volume(name=vol_name, size=SIZE, numberOfReplicas=len(nodes)) volume = common.wait_for_volume_condition_scheduled( client, vol_name, "status", CONDITION_STATUS_FALSE) client.delete(volume) common.wait_for_volume_delete(client, vol_name)
def test_ha_prohibit_deleting_last_replica(client, volume_name): # NOQA volume = client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=1) volume = common.wait_for_volume_detached(client, volume_name) assert volume["name"] == volume_name assert volume["size"] == SIZE assert volume["numberOfReplicas"] == 1 assert volume["state"] == "detached" assert volume["created"] != "" host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 1 replica0 = volume["replicas"][0] with pytest.raises(Exception) as e: volume.replicaRemove(name=replica0["name"]) assert "no other healthy replica available" in str(e.value) volume = volume.detach() volume = common.wait_for_volume_detached(client, volume_name) client.delete(volume) common.wait_for_volume_delete(client, volume_name) volumes = client.list_volume() assert len(volumes) == 0
def ha_simple_recovery_test(client, volume_name, size, base_image=""): # NOQA volume = client.create_volume(name=volume_name, size=size, numberOfReplicas=2, baseImage=base_image) volume = common.wait_for_volume_detached(client, volume_name) assert volume["name"] == volume_name assert volume["size"] == size assert volume["numberOfReplicas"] == 2 assert volume["state"] == "detached" assert volume["created"] != "" assert volume["baseImage"] == base_image host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) ha_rebuild_replica_test(client, volume_name) volume = volume.detach() volume = common.wait_for_volume_detached(client, volume_name) client.delete(volume) common.wait_for_volume_delete(client, volume_name) volumes = client.list_volume() assert len(volumes) == 0
def test_replica_scheduler_exceed_over_provisioning(client): # NOQA over_provisioning_setting = client.by_id_setting( SETTING_STORAGE_OVER_PROVISIONING_PERCENTAGE) old_provisioning_setting = over_provisioning_setting["value"] # set storage over provisioning percentage to 100 over_provisioning_setting = client.update(over_provisioning_setting, value="100") # test exceed over provisioning limit couldn't be scheduled nodes = client.list_node() for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): disk["storageReserved"] = \ disk["storageMaximum"] - 1*Gi update_disks = get_update_disks(disks) node = node.diskUpdate(disks=update_disks) disks = node["disks"] for fsid, disk in disks.iteritems(): wait_for_disk_status(client, node["name"], fsid, "storageReserved", disk["storageMaximum"] - 1 * Gi) vol_name = common.generate_volume_name() volume = client.create_volume(name=vol_name, size=str(2 * Gi), numberOfReplicas=len(nodes)) volume = common.wait_for_volume_condition_scheduled( client, vol_name, "status", CONDITION_STATUS_FALSE) client.delete(volume) common.wait_for_volume_delete(client, vol_name) client.update(over_provisioning_setting, value=old_provisioning_setting)
def test_replica_scheduler_just_under_over_provisioning(client): # NOQA over_provisioning_setting = client.by_id_setting( SETTING_STORAGE_OVER_PROVISIONING_PERCENTAGE) old_provisioning_setting = over_provisioning_setting["value"] # set storage over provisioning percentage to 100 over_provisioning_setting = client.update(over_provisioning_setting, value="100") lht_hostId = get_self_host_id() nodes = client.list_node() expect_node_disk = {} max_size_array = [] for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] == DEFAULT_DISK_PATH: expect_disk = disk expect_disk["fsid"] = fsid expect_node_disk[node["name"]] = expect_disk max_size_array.append(disk["storageMaximum"]) disk["storageReserved"] = 0 update_disks = get_update_disks(disks) node = node.diskUpdate(disks=update_disks) disks = node["disks"] for fsid, disk in disks.iteritems(): wait_for_disk_status(client, node["name"], fsid, "storageReserved", 0) max_size = min(max_size_array) # test just under over provisioning limit could be scheduled vol_name = common.generate_volume_name() volume = client.create_volume(name=vol_name, size=str(max_size), numberOfReplicas=len(nodes)) volume = common.wait_for_volume_condition_scheduled( client, vol_name, "status", CONDITION_STATUS_TRUE) volume = common.wait_for_volume_detached(client, vol_name) assert volume["state"] == "detached" assert volume["created"] != "" volume.attach(hostId=lht_hostId) volume = common.wait_for_volume_healthy(client, vol_name) nodes = client.list_node() node_hosts = [] for node in nodes: node_hosts.append(node["name"]) # check all replica should be scheduled to default disk for replica in volume["replicas"]: id = replica["hostId"] assert id != "" assert replica["running"] expect_disk = expect_node_disk[id] assert replica["diskID"] == expect_disk["fsid"] assert expect_disk["path"] in replica["dataPath"] node_hosts = filter(lambda x: x != id, node_hosts) assert len(node_hosts) == 0 # clean volume and disk cleanup_volume(client, vol_name) client.update(over_provisioning_setting, value=old_provisioning_setting)
def test_replica_scheduler_update_over_provisioning(client): # NOQA nodes = client.list_node() lht_hostId = get_self_host_id() expect_node_disk = {} for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] == DEFAULT_DISK_PATH: expect_disk = disk expect_disk["fsid"] = fsid expect_node_disk[node["name"]] = expect_disk over_provisioning_setting = client.by_id_setting( SETTING_STORAGE_OVER_PROVISIONING_PERCENTAGE) old_provisioning_setting = over_provisioning_setting["value"] # set storage over provisioning percentage to 0 # to test all replica couldn't be scheduled over_provisioning_setting = client.update(over_provisioning_setting, value="0") vol_name = common.generate_volume_name() volume = client.create_volume(name=vol_name, size=SIZE, numberOfReplicas=len(nodes)) volume = common.wait_for_volume_condition_scheduled(client, vol_name, "status", CONDITION_STATUS_FALSE) # set storage over provisioning percentage to 100 over_provisioning_setting = client.update(over_provisioning_setting, value="100") # check volume status volume = common.wait_for_volume_condition_scheduled(client, vol_name, "status", CONDITION_STATUS_TRUE) volume = common.wait_for_volume_detached(client, vol_name) assert volume["state"] == "detached" assert volume["created"] != "" volume.attach(hostId=lht_hostId) volume = common.wait_for_volume_healthy(client, vol_name) node_hosts = [] for node in nodes: node_hosts.append(node["name"]) # check all replica should be scheduled to default disk for replica in volume["replicas"]: id = replica["hostId"] assert id != "" assert replica["running"] expect_disk = expect_node_disk[id] assert replica["diskID"] == expect_disk["fsid"] assert expect_disk["path"] in replica["dataPath"] node_hosts = filter(lambda x: x != id, node_hosts) assert len(node_hosts) == 0 # clean volume and disk cleanup_volume(client, vol_name) client.update(over_provisioning_setting, value=old_provisioning_setting)
def test_replica_scheduler_no_disks(client): # NOQA nodes = client.list_node() # delete all disks on each node for node in nodes: disks = node["disks"] name = node["name"] # set allowScheduling to false for fsid, disk in disks.iteritems(): disk["allowScheduling"] = False update_disks = get_update_disks(disks) node = node.diskUpdate(disks=update_disks) for fsid, disk in node["disks"].iteritems(): # wait for node controller update disk status wait_for_disk_status(client, name, fsid, "allowScheduling", False) wait_for_disk_status(client, name, fsid, "storageScheduled", 0) node = client.by_id_node(name) for fsid, disk in node["disks"].iteritems(): assert not disk["allowScheduling"] node = node.diskUpdate(disks=[]) node = common.wait_for_disk_update(client, name, 0) assert len(node["disks"]) == 0 # test there's no disk fit for volume vol_name = common.generate_volume_name() volume = client.create_volume(name=vol_name, size=SIZE, numberOfReplicas=len(nodes)) volume = common.wait_for_volume_condition_scheduled(client, vol_name, "status", CONDITION_STATUS_FALSE) client.delete(volume) common.wait_for_volume_delete(client, vol_name)
def test_tag_scheduling(client, node_default_tags): # NOQA """ Test that scheduling succeeds if there are available Nodes/Disks with the requested Tags. """ host_id = get_self_host_id() tag_specs = [ # Select all Nodes. { "disk": [], "expected": 3, "node": [] }, # Selector works with AND on Disk Tags. { "disk": ["ssd", "nvme"], "expected": 2, "node": [] }, # Selector works with AND on Node Tags. { "disk": [], "expected": 2, "node": ["main", "storage"] }, # Selector works based on combined Disk AND Node selector. { "disk": ["ssd", "nvme"], "expected": 1, "node": ["storage", "main"] } ] for specs in tag_specs: volume_name = generate_volume_name() # NOQA client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=3, diskSelector=specs["disk"], nodeSelector=specs["node"]) volume = wait_for_volume_detached(client, volume_name) assert volume["diskSelector"] == specs["disk"] assert volume["nodeSelector"] == specs["node"] volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 3 check_volume_replicas(volume, specs, node_default_tags) cleanup_volume(client, volume)
def test_tag_scheduling_on_update(client, node_default_tags, volume_name): # NOQA """ Test that Replicas get scheduled if a Node/Disk disks updated with the proper Tags. """ tag_spec = { "disk": ["ssd", "m2"], "expected": 1, "node": ["main", "fallback"] } client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=3, diskSelector=tag_spec["disk"], nodeSelector=tag_spec["node"]) volume = wait_for_volume_detached(client, volume_name) assert volume["diskSelector"] == tag_spec["disk"] assert volume["nodeSelector"] == tag_spec["node"] wait_scheduling_failure(client, volume_name) host_id = get_self_host_id() node = client.by_id_node(host_id) update_disks = get_update_disks(node["disks"]) update_disks[0]["tags"] = tag_spec["disk"] node = node.diskUpdate(disks=update_disks) set_node_tags(client, node, tag_spec["node"]) scheduled = False for i in range(RETRY_COUNTS): v = client.by_id_volume(volume_name) if v["conditions"]["scheduled"]["status"] == "True": scheduled = True if scheduled: break sleep(RETRY_INTERVAL) assert scheduled volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) nodes = client.list_node() node_mapping = {node["id"]: { "disk": get_update_disks(node["disks"])[0]["tags"], "node": node["tags"] } for node in nodes} assert len(volume["replicas"]) == 3 check_volume_replicas(volume, tag_spec, node_mapping) cleanup_volume(client, volume)
def test_recurring_job_in_volume_creation(clients, volume_name): # NOQA """ Test create volume with recurring jobs 1. Create volume with recurring jobs though Longhorn API 2. Verify the recurring jobs run correctly """ for host_id, client in iter(clients.items()): # NOQA break set_random_backupstore(client) # error when creating volume with duplicate jobs with pytest.raises(Exception) as e: client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2, recurringJobs=create_jobs1() + create_jobs1()) assert "duplicate job" in str(e.value) client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2, recurringJobs=create_jobs1()) volume = common.wait_for_volume_detached(client, volume_name) volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) # 5 minutes time.sleep(300) check_jobs1_result(volume) volume = volume.detach() common.wait_for_volume_detached(client, volume_name) client.delete(volume) wait_for_volume_delete(client, volume_name) volumes = client.list_volume() assert len(volumes) == 0
def test_ha_salvage(client, volume_name): # NOQA # get a random client volume = client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = common.wait_for_volume_detached(client, volume_name) assert volume["name"] == volume_name assert volume["size"] == SIZE assert volume["numberOfReplicas"] == 2 assert volume["state"] == "detached" assert volume["created"] != "" host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 2 replica0_name = volume["replicas"][0]["name"] replica1_name = volume["replicas"][1]["name"] data = write_random_data(volume["endpoint"]) common.k8s_delete_replica_pods_for_volume(volume_name) volume = common.wait_for_volume_faulted(client, volume_name) assert len(volume["replicas"]) == 2 assert volume["replicas"][0]["failedAt"] != "" assert volume["replicas"][1]["failedAt"] != "" volume.salvage(names=[replica0_name, replica1_name]) volume = common.wait_for_volume_detached(client, volume_name) assert len(volume["replicas"]) == 2 assert volume["replicas"][0]["failedAt"] == "" assert volume["replicas"][1]["failedAt"] == "" volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) check_data(volume["endpoint"], data) volume = volume.detach() volume = common.wait_for_volume_detached(client, volume_name) client.delete(volume) common.wait_for_volume_delete(client, volume_name) volumes = client.list_volume() assert len(volumes) == 0
def ha_salvage_test(client, volume_name, base_image=""): # NOQA volume = client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2, baseImage=base_image) volume = common.wait_for_volume_detached(client, volume_name) assert volume["name"] == volume_name assert volume["size"] == SIZE assert volume["numberOfReplicas"] == 2 assert volume["state"] == "detached" assert volume["created"] != "" assert volume["baseImage"] == base_image host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) assert len(volume["replicas"]) == 2 replica0_name = volume["replicas"][0]["name"] replica1_name = volume["replicas"][1]["name"] data = write_volume_random_data(volume) common.k8s_delete_replica_pods_for_volume(volume_name) volume = common.wait_for_volume_faulted(client, volume_name) assert len(volume["replicas"]) == 2 assert volume["replicas"][0]["failedAt"] != "" assert volume["replicas"][1]["failedAt"] != "" volume.salvage(names=[replica0_name, replica1_name]) volume = common.wait_for_volume_detached(client, volume_name) assert len(volume["replicas"]) == 2 assert volume["replicas"][0]["failedAt"] == "" assert volume["replicas"][1]["failedAt"] == "" volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) check_volume_data(volume, data) volume = volume.detach() volume = common.wait_for_volume_detached(client, volume_name) client.delete(volume) common.wait_for_volume_delete(client, volume_name) volumes = client.list_volume() assert len(volumes) == 0
def test_recurring_job(clients, volume_name): # NOQA for host_id, client in clients.iteritems(): # NOQA break volume = client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = common.wait_for_volume_detached(client, volume_name) jobs = create_jobs1() volume.recurringUpdate(jobs=jobs) volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) # 5 minutes time.sleep(300) check_jobs1_result(volume) job_backup2 = { "name": "backup2", "cron": "* * * * *", "task": "backup", "retain": 2 } volume.recurringUpdate(jobs=[jobs[0], job_backup2]) # 5 minutes time.sleep(300) snapshots = volume.snapshotList() count = 0 for snapshot in snapshots: if snapshot["removed"] is False: count += 1 # 2 from job_snap, 1 from job_backup, 2 from job_backup2, 1 volume-head assert count == 6 volume = volume.detach() common.wait_for_volume_detached(client, volume_name) client.delete(volume) wait_for_volume_delete(client, volume_name) volumes = client.list_volume() assert len(volumes) == 0
def test_recurring_job(clients, volume_name): # NOQA for host_id, client in clients.iteritems(): # NOQA break volume = client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = common.wait_for_volume_detached(client, volume_name) jobs = create_jobs1() volume.recurringUpdate(jobs=jobs) volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) # 5 minutes time.sleep(300) check_jobs1_result(volume) job_backup2 = {"name": "backup2", "cron": "* * * * *", "task": "backup", "retain": 2} volume.recurringUpdate(jobs=[jobs[0], job_backup2]) # 5 minutes time.sleep(300) snapshots = volume.snapshotList() count = 0 for snapshot in snapshots: if snapshot["removed"] is False: count += 1 # 2 from job_snap, 1 from job_backup, 2 from job_backup2, 1 volume-head assert count == 6 volume = volume.detach() common.wait_for_volume_detached(client, volume_name) client.delete(volume) wait_for_volume_delete(client, volume_name) volumes = client.list_volume() assert len(volumes) == 0
def create_volume(client, vol_name, size, node_id, r_num): # NOQA volume = client.create_volume(name=vol_name, size=size, numberOfReplicas=r_num) assert volume["numberOfReplicas"] == r_num assert volume["frontend"] == "blockdev" volume = common.wait_for_volume_detached(client, vol_name) assert len(volume["replicas"]) == r_num assert volume["state"] == "detached" assert volume["created"] != "" volumeByName = client.by_id_volume(vol_name) assert volumeByName["name"] == volume["name"] assert volumeByName["size"] == volume["size"] assert volumeByName["numberOfReplicas"] == volume["numberOfReplicas"] assert volumeByName["state"] == volume["state"] assert volumeByName["created"] == volume["created"] volume.attach(hostId=node_id) volume = common.wait_for_volume_healthy(client, vol_name) return volume
def test_recurring_jobs_maximum_retain(client, core_api, volume_name): # NOQA """ Test recurring jobs' maximum retain 1. Create two jobs, with retain 30 and 21. 2. Try to apply the jobs to a volume. It should fail. 3. Reduce retain to 30 and 20. 4. Now the jobs can be applied the volume. """ volume = client.create_volume(name=volume_name) volume = common.wait_for_volume_detached(client, volume_name) jobs = create_jobs1() # set max total number of retain to exceed 50 jobs[0]['retain'] = 30 jobs[1]['retain'] = 21 host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) with pytest.raises(Exception) as e: volume.recurringUpdate(jobs=jobs) assert "Job Can\\'t retain more than 50 snapshots" in str(e.value) jobs[1]['retain'] = 20 volume = volume.recurringUpdate(jobs=jobs) assert len(volume.recurringJobs) == 2 assert volume.recurringJobs[0]['retain'] == 30 assert volume.recurringJobs[1]['retain'] == 20
def test_replica_scheduler_update_minimal_available(client): # NOQA minimal_available_setting = client.by_id_setting( SETTING_STORAGE_MINIMAL_AVAILABLE_PERCENTAGE) old_minimal_setting = minimal_available_setting["value"] nodes = client.list_node() expect_node_disk = {} for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] == DEFAULT_DISK_PATH: expect_disk = disk expect_disk["fsid"] = fsid expect_node_disk[node["name"]] = expect_disk # set storage minimal available percentage to 100 # to test all replica couldn't be scheduled minimal_available_setting = client.update(minimal_available_setting, value="100") # wait for disks state nodes = client.list_node() for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): wait_for_disk_conditions(client, node["name"], fsid, DISK_CONDITION_SCHEDULABLE, CONDITION_STATUS_FALSE) lht_hostId = get_self_host_id() vol_name = common.generate_volume_name() volume = client.create_volume(name=vol_name, size=SIZE, numberOfReplicas=len(nodes)) volume = common.wait_for_volume_condition_scheduled( client, vol_name, "status", CONDITION_STATUS_FALSE) # set storage minimal available percentage to default value(10) minimal_available_setting = client.update(minimal_available_setting, value=old_minimal_setting) # wait for disks state nodes = client.list_node() for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): wait_for_disk_conditions(client, node["name"], fsid, DISK_CONDITION_SCHEDULABLE, CONDITION_STATUS_TRUE) # check volume status volume = common.wait_for_volume_condition_scheduled( client, vol_name, "status", CONDITION_STATUS_TRUE) volume = common.wait_for_volume_detached(client, vol_name) assert volume["state"] == "detached" assert volume["created"] != "" volume.attach(hostId=lht_hostId) volume = common.wait_for_volume_healthy(client, vol_name) nodes = client.list_node() node_hosts = [] for node in nodes: node_hosts.append(node["name"]) # check all replica should be scheduled to default disk for replica in volume["replicas"]: id = replica["hostId"] assert id != "" assert replica["running"] expect_disk = expect_node_disk[id] assert replica["diskID"] == expect_disk["fsid"] assert expect_disk["path"] in replica["dataPath"] node_hosts = filter(lambda x: x != id, node_hosts) assert len(node_hosts) == 0 # clean volume and disk cleanup_volume(client, vol_name)
def test_pvc_creation(client, core_api, pod): # NOQA volume_name = "test-pvc-creation" client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = wait_for_volume_detached(client, volume_name) pv_name = "pv-" + volume_name pvc_name = "pvc-" + volume_name pod_name = "pod-" + volume_name # try to create pvc without pv for the volume with pytest.raises(Exception) as e: volume.pvcCreate(namespace="default", pvcName=pvc_name) assert "connot find existing PV for volume" in str(e.value) volume.pvCreate(pvName=pv_name) for i in range(RETRY_COUNTS): if check_pv_existence(core_api, pv_name): break time.sleep(RETRY_INTERVAL) assert check_pv_existence(core_api, pv_name) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] for i in range(RETRY_COUNTS): if k_status['pvName'] and k_status['pvStatus'] == 'Available': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] assert k_status['pvName'] == pv_name assert k_status['pvStatus'] == 'Available' assert not k_status['namespace'] assert not k_status['pvcName'] assert not k_status['workloadsStatus'] assert not k_status['lastPVCRefAt'] assert not k_status['lastPodRefAt'] volume.pvcCreate(namespace="default", pvcName=pvc_name) for i in range(RETRY_COUNTS): if check_pvc_existence(core_api, pvc_name): break time.sleep(RETRY_INTERVAL) assert check_pvc_existence(core_api, pvc_name) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] for i in range(RETRY_COUNTS): if k_status['pvcName'] and k_status['namespace']: break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] assert k_status['pvName'] == pv_name assert k_status['pvStatus'] == 'Bound' assert k_status['namespace'] == "default" assert k_status['pvcName'] == pvc_name assert not k_status['workloadsStatus'] assert not k_status['lastPVCRefAt'] assert not k_status['lastPodRefAt'] pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': pvc_name, }, }] create_and_wait_pod(core_api, pod) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == pv_name assert k_status['pvStatus'] == 'Bound' assert len(workloads) == 1 for i in range(RETRY_COUNTS): if workloads[0]['podStatus'] == 'Running': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert len(workloads) == 1 assert workloads[0]['podName'] == pod_name assert workloads[0]['podStatus'] == 'Running' assert not workloads[0]['workloadName'] assert not workloads[0]['workloadType'] assert k_status['namespace'] == 'default' assert k_status['pvcName'] == pvc_name assert not k_status['lastPVCRefAt'] assert not k_status['lastPodRefAt'] delete_and_wait_pod(core_api, pod_name) delete_and_wait_pvc(core_api, pvc_name) wait_delete_pv(core_api, pv_name)
def ha_backup_deletion_recovery_test(client, volume_name, size, base_image=""): # NOQA volume = client.create_volume(name=volume_name, size=size, numberOfReplicas=2, baseImage=base_image) volume = common.wait_for_volume_detached(client, volume_name) host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) setting = client.by_id_setting(common.SETTING_BACKUP_TARGET) # test backupTarget for multiple settings backupstores = common.get_backupstore_url() for backupstore in backupstores: if common.is_backupTarget_s3(backupstore): backupsettings = backupstore.split("$") setting = client.update(setting, value=backupsettings[0]) assert setting["value"] == backupsettings[0] credential = client.by_id_setting( common.SETTING_BACKUP_TARGET_CREDENTIAL_SECRET) credential = client.update(credential, value=backupsettings[1]) assert credential["value"] == backupsettings[1] else: setting = client.update(setting, value=backupstore) assert setting["value"] == backupstore credential = client.by_id_setting( common.SETTING_BACKUP_TARGET_CREDENTIAL_SECRET) credential = client.update(credential, value="") assert credential["value"] == "" data = write_volume_random_data(volume) snap2 = volume.snapshotCreate() volume.snapshotCreate() volume.snapshotBackup(name=snap2["name"]) _, b = common.find_backup(client, volume_name, snap2["name"]) res_name = common.generate_volume_name() res_volume = client.create_volume(name=res_name, size=size, numberOfReplicas=2, fromBackup=b["url"]) res_volume = common.wait_for_volume_detached(client, res_name) res_volume = res_volume.attach(hostId=host_id) res_volume = common.wait_for_volume_healthy(client, res_name) check_volume_data(res_volume, data) snapshots = res_volume.snapshotList() # only the backup snapshot + volume-head assert len(snapshots) == 2 backup_snapshot = "" for snap in snapshots: if snap["name"] != "volume-head": backup_snapshot = snap["name"] assert backup_snapshot != "" res_volume.snapshotCreate() snapshots = res_volume.snapshotList() assert len(snapshots) == 3 res_volume.snapshotDelete(name=backup_snapshot) res_volume.snapshotPurge() snapshots = res_volume.snapshotList() assert len(snapshots) == 2 ha_rebuild_replica_test(client, res_name) res_volume = res_volume.detach() res_volume = common.wait_for_volume_detached(client, res_name) client.delete(res_volume) common.wait_for_volume_delete(client, res_name) volume = volume.detach() volume = common.wait_for_volume_detached(client, volume_name) client.delete(volume) common.wait_for_volume_delete(client, volume_name) volumes = client.list_volume() assert len(volumes) == 0
def test_replica_scheduler_too_large_volume_fit_any_disks(client): # NOQA nodes = client.list_node() lht_hostId = get_self_host_id() expect_node_disk = {} for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] == DEFAULT_DISK_PATH: expect_disk = disk expect_disk["fsid"] = fsid expect_node_disk[node["name"]] = expect_disk disk["storageReserved"] = disk["storageMaximum"] update_disks = get_update_disks(disks) node.diskUpdate(disks=update_disks) # volume is too large to fill into any disks volume_size = 4 * Gi vol_name = common.generate_volume_name() client.create_volume(name=vol_name, size=str(volume_size), numberOfReplicas=len(nodes)) volume = common.wait_for_volume_condition_scheduled( client, vol_name, "status", CONDITION_STATUS_FALSE) # Reduce StorageReserved of each default disk so that each node can fit # only one replica. needed_for_scheduling = int( volume_size * 1.5 * 100 / int(DEFAULT_STORAGE_OVER_PROVISIONING_PERCENTAGE)) nodes = client.list_node() for node in nodes: disks = node["disks"] update_disks = get_update_disks(disks) for disk in update_disks: disk["storageReserved"] = \ disk["storageMaximum"] - needed_for_scheduling node = node.diskUpdate(disks=update_disks) disks = node["disks"] for fsid, disk in disks.iteritems(): wait_for_disk_status( client, node["name"], fsid, "storageReserved", disk["storageMaximum"] - needed_for_scheduling) # check volume status volume = common.wait_for_volume_condition_scheduled( client, vol_name, "status", CONDITION_STATUS_TRUE) volume = common.wait_for_volume_detached(client, vol_name) assert volume["state"] == "detached" assert volume["created"] != "" volume.attach(hostId=lht_hostId) volume = common.wait_for_volume_healthy(client, vol_name) nodes = client.list_node() node_hosts = [] for node in nodes: node_hosts.append(node["name"]) # check all replica should be scheduled to default disk for replica in volume["replicas"]: id = replica["hostId"] assert id != "" assert replica["running"] expect_disk = expect_node_disk[id] assert replica["diskID"] == expect_disk["fsid"] assert expect_disk["path"] in replica["dataPath"] node_hosts = filter(lambda x: x != id, node_hosts) assert len(node_hosts) == 0 # clean volume and disk cleanup_volume(client, vol_name)
def engine_live_upgrade_rollback_test(client, volume_name, base_image=""): # NOQA default_img = common.get_default_engine_image(client) default_img_name = default_img["name"] default_img = wait_for_engine_image_ref_count(client, default_img_name, 0) cli_v = default_img["cliAPIVersion"] cli_minv = default_img["cliAPIMinVersion"] ctl_v = default_img["controllerAPIVersion"] ctl_minv = default_img["controllerAPIMinVersion"] data_v = default_img["dataFormatVersion"] data_minv = default_img["dataFormatMinVersion"] wrong_engine_upgrade_image = common.get_compatibility_test_image( cli_v, cli_minv, ctl_v, ctl_minv, data_v, data_minv) new_img = client.create_engine_image(image=wrong_engine_upgrade_image) new_img_name = new_img["name"] new_img = wait_for_engine_image_state(client, new_img_name, "ready") assert new_img["refCount"] == 0 assert new_img["noRefSince"] != "" default_img = common.get_default_engine_image(client) default_img_name = default_img["name"] volume = client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2, baseImage=base_image) volume = common.wait_for_volume_detached(client, volume_name) default_img = wait_for_engine_image_ref_count(client, default_img_name, 1) assert volume["baseImage"] == base_image original_engine_image = volume["engineImage"] assert original_engine_image != wrong_engine_upgrade_image host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) data = write_volume_random_data(volume) volume.engineUpgrade(image=wrong_engine_upgrade_image) volume = client.by_id_volume(volume["name"]) assert volume["engineImage"] == wrong_engine_upgrade_image assert volume["currentImage"] == original_engine_image with pytest.raises(Exception): # this will timeout wait_for_volume_current_image(client, volume_name, wrong_engine_upgrade_image) # rollback volume.engineUpgrade(image=original_engine_image) volume = wait_for_volume_current_image(client, volume_name, original_engine_image) assert volume["engineImage"] == original_engine_image assert volume["currentImage"] == original_engine_image engine = get_volume_engine(volume) assert engine["engineImage"] == original_engine_image assert engine["currentImage"] == original_engine_image volume = common.wait_for_volume_replica_count(client, volume_name, REPLICA_COUNT) check_volume_data(volume, data) assert volume["state"] == common.VOLUME_STATE_ATTACHED assert volume["robustness"] == common.VOLUME_ROBUSTNESS_HEALTHY # try again, this time let's try detach volume.engineUpgrade(image=wrong_engine_upgrade_image) volume = client.by_id_volume(volume["name"]) assert volume["engineImage"] == wrong_engine_upgrade_image assert volume["currentImage"] == original_engine_image with pytest.raises(Exception): # this will timeout wait_for_volume_current_image(client, volume_name, wrong_engine_upgrade_image) volume = volume.detach() volume = wait_for_volume_current_image(client, volume_name, wrong_engine_upgrade_image) # all the images would be updated assert volume["engineImage"] == wrong_engine_upgrade_image engine = get_volume_engine(volume) assert engine["engineImage"] == wrong_engine_upgrade_image volume = common.wait_for_volume_replica_count(client, volume_name, REPLICA_COUNT) for replica in volume["replicas"]: assert replica["engineImage"] == wrong_engine_upgrade_image # upgrade to the correct image when offline volume.engineUpgrade(image=original_engine_image) volume = client.by_id_volume(volume["name"]) assert volume["engineImage"] == original_engine_image volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) assert volume["engineImage"] == original_engine_image assert volume["currentImage"] == original_engine_image engine = get_volume_engine(volume) assert engine["engineImage"] == original_engine_image assert engine["currentImage"] == original_engine_image for replica in volume["replicas"]: assert replica["engineImage"] == original_engine_image assert replica["currentImage"] == original_engine_image check_volume_data(volume, data) client.delete(volume) wait_for_volume_delete(client, volume_name) client.delete(new_img)
def test_replica_scheduler_just_under_over_provisioning(client): # NOQA over_provisioning_setting = client.by_id_setting( SETTING_STORAGE_OVER_PROVISIONING_PERCENTAGE) old_provisioning_setting = over_provisioning_setting["value"] # set storage over provisioning percentage to 100 over_provisioning_setting = client.update(over_provisioning_setting, value="100") lht_hostId = get_self_host_id() nodes = client.list_node() expect_node_disk = {} max_size_array = [] for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] == DEFAULT_DISK_PATH: expect_disk = disk expect_disk["fsid"] = fsid expect_node_disk[node["name"]] = expect_disk max_size_array.append(disk["storageMaximum"]) disk["storageReserved"] = 0 update_disks = get_update_disks(disks) node = node.diskUpdate(disks=update_disks) disks = node["disks"] for fsid, disk in disks.iteritems(): wait_for_disk_status(client, node["name"], fsid, "storageReserved", 0) max_size = min(max_size_array) # test just under over provisioning limit could be scheduled vol_name = common.generate_volume_name() volume = client.create_volume(name=vol_name, size=str(max_size), numberOfReplicas=len(nodes)) volume = common.wait_for_volume_condition_scheduled(client, vol_name, "status", CONDITION_STATUS_TRUE) volume = common.wait_for_volume_detached(client, vol_name) assert volume["state"] == "detached" assert volume["created"] != "" volume.attach(hostId=lht_hostId) volume = common.wait_for_volume_healthy(client, vol_name) nodes = client.list_node() node_hosts = [] for node in nodes: node_hosts.append(node["name"]) # check all replica should be scheduled to default disk for replica in volume["replicas"]: id = replica["hostId"] assert id != "" assert replica["running"] expect_disk = expect_node_disk[id] assert replica["diskID"] == expect_disk["fsid"] assert expect_disk["path"] in replica["dataPath"] node_hosts = filter(lambda x: x != id, node_hosts) assert len(node_hosts) == 0 # clean volume and disk cleanup_volume(client, vol_name) client.update(over_provisioning_setting, value=old_provisioning_setting)
def test_replica_scheduler_too_large_volume_fit_any_disks(client): # NOQA nodes = client.list_node() lht_hostId = get_self_host_id() expect_node_disk = {} for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] == DEFAULT_DISK_PATH: expect_disk = disk expect_disk["fsid"] = fsid expect_node_disk[node["name"]] = expect_disk disk["storageReserved"] = disk["storageMaximum"] update_disks = get_update_disks(disks) node.diskUpdate(disks=update_disks) # volume is too large to fill into any disks vol_name = common.generate_volume_name() volume = client.create_volume(name=vol_name, size=str(4*Gi), numberOfReplicas=len(nodes)) volume = common.wait_for_volume_condition_scheduled(client, vol_name, "status", CONDITION_STATUS_FALSE) # reduce StorageReserved of each default disk nodes = client.list_node() for node in nodes: disks = node["disks"] update_disks = get_update_disks(disks) for disk in update_disks: disk["storageReserved"] = 0 node = node.diskUpdate(disks=update_disks) disks = node["disks"] for fsid, disk in disks.iteritems(): wait_for_disk_status(client, node["name"], fsid, "storageReserved", 0) # check volume status volume = common.wait_for_volume_condition_scheduled(client, vol_name, "status", CONDITION_STATUS_TRUE) volume = common.wait_for_volume_detached(client, vol_name) assert volume["state"] == "detached" assert volume["created"] != "" volume.attach(hostId=lht_hostId) volume = common.wait_for_volume_healthy(client, vol_name) nodes = client.list_node() node_hosts = [] for node in nodes: node_hosts.append(node["name"]) # check all replica should be scheduled to default disk for replica in volume["replicas"]: id = replica["hostId"] assert id != "" assert replica["running"] expect_disk = expect_node_disk[id] assert replica["diskID"] == expect_disk["fsid"] assert expect_disk["path"] in replica["dataPath"] node_hosts = filter(lambda x: x != id, node_hosts) assert len(node_hosts) == 0 # clean volume and disk cleanup_volume(client, vol_name)
def test_ha_simple_recovery(client, volume_name): # NOQA # get a random client volume = client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = common.wait_for_volume_detached(client, volume_name) assert volume["name"] == volume_name assert volume["size"] == SIZE assert volume["numberOfReplicas"] == 2 assert volume["state"] == "detached" assert volume["created"] != "" host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) volume = client.by_id_volume(volume_name) assert volume["endpoint"] == DEV_PATH + volume_name assert len(volume["replicas"]) == 2 replica0 = volume["replicas"][0] assert replica0["name"] != "" replica1 = volume["replicas"][1] assert replica1["name"] != "" data = write_random_data(volume["endpoint"]) volume = volume.replicaRemove(name=replica0["name"]) # wait until we saw a replica starts rebuilding new_replica_found = False for i in range(RETRY_COUNTS): v = client.by_id_volume(volume_name) for r in v["replicas"]: if r["name"] != replica0["name"] and \ r["name"] != replica1["name"]: new_replica_found = True break if new_replica_found: break time.sleep(RETRY_ITERVAL) assert new_replica_found volume = common.wait_for_volume_healthy(client, volume_name) volume = client.by_id_volume(volume_name) assert volume["state"] == common.VOLUME_STATE_ATTACHED assert volume["robustness"] == common.VOLUME_ROBUSTNESS_HEALTHY assert len(volume["replicas"]) >= 2 found = False for replica in volume["replicas"]: if replica["name"] == replica1["name"]: found = True break assert found check_data(volume['endpoint'], data) volume = volume.detach() volume = common.wait_for_volume_detached(client, volume_name) client.delete(volume) common.wait_for_volume_delete(client, volume_name) volumes = client.list_volume() assert len(volumes) == 0
def test_replica_scheduler_update_minimal_available(client): # NOQA minimal_available_setting = client.by_id_setting( SETTING_STORAGE_MINIMAL_AVAILABLE_PERCENTAGE) old_minimal_setting = minimal_available_setting["value"] nodes = client.list_node() expect_node_disk = {} for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): if disk["path"] == DEFAULT_DISK_PATH: expect_disk = disk expect_disk["fsid"] = fsid expect_node_disk[node["name"]] = expect_disk # set storage minimal available percentage to 100 # to test all replica couldn't be scheduled minimal_available_setting = client.update(minimal_available_setting, value="100") # wait for disks state nodes = client.list_node() for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): wait_for_disk_conditions(client, node["name"], fsid, DISK_CONDITION_SCHEDULABLE, CONDITION_STATUS_FALSE) lht_hostId = get_self_host_id() vol_name = common.generate_volume_name() volume = client.create_volume(name=vol_name, size=SIZE, numberOfReplicas=len(nodes)) volume = common.wait_for_volume_condition_scheduled(client, vol_name, "status", CONDITION_STATUS_FALSE) # set storage minimal available percentage to default value(10) minimal_available_setting = client.update(minimal_available_setting, value=old_minimal_setting) # wait for disks state nodes = client.list_node() for node in nodes: disks = node["disks"] for fsid, disk in disks.iteritems(): wait_for_disk_conditions(client, node["name"], fsid, DISK_CONDITION_SCHEDULABLE, CONDITION_STATUS_TRUE) # check volume status volume = common.wait_for_volume_condition_scheduled(client, vol_name, "status", CONDITION_STATUS_TRUE) volume = common.wait_for_volume_detached(client, vol_name) assert volume["state"] == "detached" assert volume["created"] != "" volume.attach(hostId=lht_hostId) volume = common.wait_for_volume_healthy(client, vol_name) nodes = client.list_node() node_hosts = [] for node in nodes: node_hosts.append(node["name"]) # check all replica should be scheduled to default disk for replica in volume["replicas"]: id = replica["hostId"] assert id != "" assert replica["running"] expect_disk = expect_node_disk[id] assert replica["diskID"] == expect_disk["fsid"] assert expect_disk["path"] in replica["dataPath"] node_hosts = filter(lambda x: x != id, node_hosts) assert len(node_hosts) == 0 # clean volume and disk cleanup_volume(client, vol_name)
def test_statefulset_restore( client, core_api, storage_class, # NOQA statefulset): # NOQA """ Test that data can be restored into volumes usable by a StatefulSet. 1. Create a StatefulSet with VolumeClaimTemplate and Longhorn. 2. Wait for pods to run. 3. Create a backup for each pod. 4. Delete the StatefulSet, including the Longhorn volumes. 5. Create volumes and PV/PVC using previous backups from each Pod. 1. PVs will be created using the previous names. 2. PVCs will be created using previous name + "-2" due to statefulset has a naming policy for what should be PVC name for them. 6. Create a new StatefulSet using the previous name + "-2" 7. Wait for pods to be up. . Verify the pods contain the previous backed up data """ statefulset_name = 'statefulset-restore-test' update_statefulset_manifests(statefulset, storage_class, statefulset_name) create_storage_class(storage_class) create_and_wait_statefulset(statefulset) pod_info = get_statefulset_pod_info(core_api, statefulset) create_and_test_backups(core_api, client, pod_info) delete_and_wait_statefulset(core_api, client, statefulset) csi = check_csi(core_api) # StatefulSet fixture already cleans these up, use the manifests instead of # the fixtures to avoid issues during teardown. pv = { 'apiVersion': 'v1', 'kind': 'PersistentVolume', 'metadata': { 'name': '' }, 'spec': { 'capacity': { 'storage': size_to_string(DEFAULT_VOLUME_SIZE * Gi) }, 'volumeMode': 'Filesystem', 'accessModes': ['ReadWriteOnce'], 'persistentVolumeReclaimPolicy': 'Delete', 'storageClassName': DEFAULT_STORAGECLASS_NAME } } pvc = { 'apiVersion': 'v1', 'kind': 'PersistentVolumeClaim', 'metadata': { 'name': '' }, 'spec': { 'accessModes': ['ReadWriteOnce'], 'resources': { 'requests': { 'storage': size_to_string(DEFAULT_VOLUME_SIZE * Gi) } }, 'storageClassName': DEFAULT_STORAGECLASS_NAME } } assert csi pv['spec']['csi'] = { 'driver': 'driver.longhorn.io', 'fsType': 'ext4', 'volumeAttributes': { 'numberOfReplicas': storage_class['parameters']['numberOfReplicas'], 'staleReplicaTimeout': storage_class['parameters']['staleReplicaTimeout'] }, 'volumeHandle': '' } # Make sure that volumes still work even if the Pod and StatefulSet names # are different. for pod in pod_info: pod['pod_name'] = pod['pod_name'].replace( 'statefulset-restore-test', 'statefulset-restore-test-2') pod['pvc_name'] = pod['pvc_name'].replace( 'statefulset-restore-test', 'statefulset-restore-test-2') pv['metadata']['name'] = pod['pvc_name'] client.create_volume( name=pod['pvc_name'], size=size_to_string(DEFAULT_VOLUME_SIZE * Gi), numberOfReplicas=int( storage_class['parameters']['numberOfReplicas']), fromBackup=pod['backup_snapshot']['url']) wait_for_volume_detached(client, pod['pvc_name']) pv['spec']['csi']['volumeHandle'] = pod['pvc_name'] core_api.create_persistent_volume(pv) pvc['metadata']['name'] = pod['pvc_name'] pvc['spec']['volumeName'] = pod['pvc_name'] core_api.create_namespaced_persistent_volume_claim(body=pvc, namespace='default') statefulset_name = 'statefulset-restore-test-2' update_statefulset_manifests(statefulset, storage_class, statefulset_name) create_and_wait_statefulset(statefulset) for pod in pod_info: resp = read_volume_data(core_api, pod['pod_name']) assert resp == pod['data']
def test_pvc_creation_with_default_sc_set(client, core_api, storage_class, pod): # NOQA """ Test creating PVC with default StorageClass set The target is to make sure the newly create PV/PVC won't use default StorageClass, and if there is no default StorageClass, PV/PVC can still be created. 1. Create a StorageClass and set it to be the default StorageClass 2. Update static StorageClass to `longhorn-static-test` 3. Create volume then PV/PVC. 4. Make sure the newly created PV/PVC using StorageClass `longhorn-static-test` 5. Create pod with PVC. 6. Verify volume's Kubernetes Status 7. Remove PVC and Pod. 8. Verify volume's Kubernetes Status only contains current PV and history 9. Wait for volume to detach (since pod is deleted) 10. Reuse the volume on a new pod. Wait for the pod to start 11. Verify volume's Kubernetes Status reflect the new pod. 12. Delete PV/PVC/Pod. 13. Verify volume's Kubernetes Status only contains history 14. Delete the default StorageClass. 15. Create PV/PVC for the volume. 16. Make sure the PV's StorageClass is static StorageClass """ # set default storage class storage_class['metadata']['annotations'] = \ {"storageclass.kubernetes.io/is-default-class": "true"} create_storage_class(storage_class) static_sc_name = "longhorn-static-test" setting = client.by_id_setting(SETTING_DEFAULT_LONGHORN_STATIC_SC) setting = client.update(setting, value=static_sc_name) assert setting.value == static_sc_name volume_name = "test-pvc-creation-with-sc" # NOQA pod_name = "pod-" + volume_name client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = wait_for_volume_detached(client, volume_name) pv_name = "pv-" + volume_name pvc_name = "pvc-" + volume_name pvc_name_extra = "pvc-" + volume_name + "-extra" create_pv_for_volume(client, core_api, volume, pv_name) create_pvc_for_volume(client, core_api, volume, pvc_name) ret = core_api.list_namespaced_persistent_volume_claim(namespace='default') for item in ret.items: if item.metadata.name == pvc_name: pvc_found = item break assert pvc_found assert pvc_found.spec.storage_class_name == static_sc_name pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': pvc_name, }, }] create_and_wait_pod(core_api, pod) ks = { 'pvName': pv_name, 'pvStatus': 'Bound', 'namespace': 'default', 'pvcName': pvc_name, 'lastPVCRefAt': '', 'lastPodRefAt': '', 'workloadsStatus': [ { 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '', }, ], } wait_volume_kubernetes_status(client, volume_name, ks) delete_and_wait_pod(core_api, pod_name) delete_and_wait_pvc(core_api, pvc_name) ks = { 'pvName': pv_name, 'pvStatus': 'Released', 'namespace': 'default', 'pvcName': pvc_name, 'lastPVCRefAt': 'not empty', 'lastPodRefAt': 'not empty', } wait_volume_kubernetes_status(client, volume_name, ks) # try to reuse the pv volume = wait_for_volume_detached(client, volume_name) create_pvc_for_volume(client, core_api, volume, pvc_name_extra) pod['spec']['volumes'][0]['persistentVolumeClaim']['claimName'] = \ pvc_name_extra create_and_wait_pod(core_api, pod) ks = { 'pvName': pv_name, 'pvStatus': 'Bound', 'namespace': 'default', 'pvcName': pvc_name_extra, 'lastPVCRefAt': '', 'lastPodRefAt': '', 'workloadsStatus': [ { 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '', }, ], } wait_volume_kubernetes_status(client, volume_name, ks) delete_and_wait_pod(core_api, pod_name) delete_and_wait_pvc(core_api, pvc_name_extra) delete_and_wait_pv(core_api, pv_name) ks = { 'pvName': '', 'pvStatus': '', 'namespace': 'default', 'pvcName': pvc_name_extra, 'lastPVCRefAt': 'not empty', 'lastPodRefAt': 'not empty', } wait_volume_kubernetes_status(client, volume_name, ks) # without default storage class delete_storage_class(storage_class['metadata']['name']) create_pv_for_volume(client, core_api, volume, pv_name) create_pvc_for_volume(client, core_api, volume, pvc_name) ret = core_api.list_namespaced_persistent_volume_claim(namespace='default') for item in ret.items: if item.metadata.name == pvc_name: pvc2 = item break assert pvc2 assert pvc2.spec.storage_class_name == static_sc_name delete_and_wait_pvc(core_api, pvc_name) delete_and_wait_pv(core_api, pv_name)
def test_backup_kubernetes_status(set_random_backupstore, client, core_api, pod): # NOQA """ Test that Backups have KubernetesStatus stored properly when there is an associated PersistentVolumeClaim and Pod. 1. Setup a random backupstore 2. Set settings Longhorn Static StorageClass to `longhorn-static-test` 3. Create a volume and PV/PVC. Verify the StorageClass of PVC 4. Create a Pod using the PVC. 5. Check volume's Kubernetes status to reflect PV/PVC/Pod correctly. 6. Create a backup for the volume. 7. Verify the labels of created backup reflect PV/PVC/Pod status. 8. Restore the backup to a volume. Wait for restoration to complete. 9. Check the volume's Kubernetes Status 1. Make sure the `lastPodRefAt` and `lastPVCRefAt` is snapshot created time 10. Delete the backup and restored volume. 11. Delete PV/PVC/Pod. 12. Verify volume's Kubernetes Status updated to reflect history data. 13. Attach the volume and create another backup. Verify the labels 14. Verify the volume's Kubernetes status. 15. Restore the previous backup to a new volume. Wait for restoration. 16. Verify the restored volume's Kubernetes status. 1. Make sure `lastPodRefAt` and `lastPVCRefAt` matched volume on step 12 """ host_id = get_self_host_id() static_sc_name = "longhorn-static-test" setting = client.by_id_setting(SETTING_DEFAULT_LONGHORN_STATIC_SC) setting = client.update(setting, value=static_sc_name) assert setting.value == static_sc_name volume_name = "test-backup-kubernetes-status-pod" # NOQA client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = wait_for_volume_detached(client, volume_name) pod_name = "pod-" + volume_name pv_name = "pv-" + volume_name pvc_name = "pvc-" + volume_name create_pv_for_volume(client, core_api, volume, pv_name) create_pvc_for_volume(client, core_api, volume, pvc_name) ret = core_api.list_namespaced_persistent_volume_claim(namespace='default') pvc_found = False for item in ret.items: if item.metadata.name == pvc_name: pvc_found = item break assert pvc_found assert pvc_found.spec.storage_class_name == static_sc_name pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': pvc_name, }, }] create_and_wait_pod(core_api, pod) ks = { 'lastPodRefAt': '', 'lastPVCRefAt': '', 'namespace': 'default', 'pvcName': pvc_name, 'pvName': pv_name, 'pvStatus': 'Bound', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, volume_name, ks) volume = wait_for_volume_healthy(client, volume_name) # Create Backup manually instead of calling create_backup since Kubernetes # is not guaranteed to mount our Volume to the test host. snap = create_snapshot(client, volume_name) volume.snapshotBackup(name=snap.name) wait_for_backup_completion(client, volume_name, snap.name) _, b = find_backup(client, volume_name, snap.name) # Check backup label status = loads(b.labels.get(KUBERNETES_STATUS_LABEL)) assert status == ks # Check backup volume label for _ in range(RETRY_COUNTS): bv = client.by_id_backupVolume(volume_name) if bv is not None and bv.labels is not None: break time.sleep(RETRY_INTERVAL) assert bv is not None and bv.labels is not None status = loads(bv.labels.get(KUBERNETES_STATUS_LABEL)) assert status == ks restore_name = generate_volume_name() client.create_volume(name=restore_name, size=SIZE, numberOfReplicas=2, fromBackup=b.url) wait_for_volume_restoration_completed(client, restore_name) wait_for_volume_detached(client, restore_name) snapshot_created = b.snapshotCreated ks = { 'lastPodRefAt': b.snapshotCreated, 'lastPVCRefAt': b.snapshotCreated, 'namespace': 'default', 'pvcName': pvc_name, # Restoration should not apply PersistentVolume data. 'pvName': '', 'pvStatus': '', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, restore_name, ks) restore = client.by_id_volume(restore_name) # We need to compare LastPodRefAt and LastPVCRefAt manually since # wait_volume_kubernetes_status only checks for empty or non-empty state. assert restore.kubernetesStatus.lastPodRefAt == ks["lastPodRefAt"] assert restore.kubernetesStatus.lastPVCRefAt == ks["lastPVCRefAt"] delete_backup(client, bv.name, b.name) client.delete(restore) wait_for_volume_delete(client, restore_name) delete_and_wait_pod(core_api, pod_name) delete_and_wait_pvc(core_api, pvc_name) delete_and_wait_pv(core_api, pv_name) # With the Pod, PVC, and PV deleted, the Volume should have both Ref # fields set. Check that a new Backup and Restore will use this instead of # manually populating the Ref fields. ks = { 'lastPodRefAt': 'NOT NULL', 'lastPVCRefAt': 'NOT NULL', 'namespace': 'default', 'pvcName': pvc_name, 'pvName': '', 'pvStatus': '', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, volume_name, ks) volume = wait_for_volume_detached(client, volume_name) volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) snap = create_snapshot(client, volume_name) volume.snapshotBackup(name=snap.name) volume = wait_for_backup_completion(client, volume_name, snap.name) bv, b = find_backup(client, volume_name, snap.name) new_b = bv.backupGet(name=b.name) status = loads(new_b.labels.get(KUBERNETES_STATUS_LABEL)) # Check each field manually, we have no idea what the LastPodRefAt or the # LastPVCRefAt will be. We just know it shouldn't be SnapshotCreated. assert status['lastPodRefAt'] != snapshot_created assert status['lastPVCRefAt'] != snapshot_created assert status['namespace'] == "default" assert status['pvcName'] == pvc_name assert status['pvName'] == "" assert status['pvStatus'] == "" assert status['workloadsStatus'] == [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] restore_name = generate_volume_name() client.create_volume(name=restore_name, size=SIZE, numberOfReplicas=2, fromBackup=b.url) wait_for_volume_restoration_completed(client, restore_name) wait_for_volume_detached(client, restore_name) ks = { 'lastPodRefAt': status['lastPodRefAt'], 'lastPVCRefAt': status['lastPVCRefAt'], 'namespace': 'default', 'pvcName': pvc_name, 'pvName': '', 'pvStatus': '', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, restore_name, ks) restore = client.by_id_volume(restore_name) assert restore.kubernetesStatus.lastPodRefAt == ks["lastPodRefAt"] assert restore.kubernetesStatus.lastPVCRefAt == ks["lastPVCRefAt"] # cleanup backupstore_cleanup(client) client.delete(restore) cleanup_volume(client, volume)
def engine_live_upgrade_test(client, volume_name, base_image=""): # NOQA default_img = common.get_default_engine_image(client) default_img_name = default_img["name"] default_img = wait_for_engine_image_ref_count(client, default_img_name, 0) cli_v = default_img["cliAPIVersion"] cli_minv = default_img["cliAPIMinVersion"] ctl_v = default_img["controllerAPIVersion"] ctl_minv = default_img["controllerAPIMinVersion"] data_v = default_img["dataFormatVersion"] data_minv = default_img["dataFormatMinVersion"] engine_upgrade_image = common.get_upgrade_test_image( cli_v, cli_minv, ctl_v, ctl_minv, data_v, data_minv) new_img = client.create_engine_image(image=engine_upgrade_image) new_img_name = new_img["name"] new_img = wait_for_engine_image_state(client, new_img_name, "ready") assert new_img["refCount"] == 0 assert new_img["noRefSince"] != "" default_img = common.get_default_engine_image(client) default_img_name = default_img["name"] volume = client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2, baseImage=base_image) volume = common.wait_for_volume_detached(client, volume_name) default_img = wait_for_engine_image_ref_count(client, default_img_name, 1) assert volume["name"] == volume_name assert volume["baseImage"] == base_image original_engine_image = volume["engineImage"] assert original_engine_image != engine_upgrade_image host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) assert volume["engineImage"] == original_engine_image assert volume["currentImage"] == original_engine_image engine = get_volume_engine(volume) assert engine["engineImage"] == original_engine_image assert engine["currentImage"] == original_engine_image for replica in volume["replicas"]: assert replica["engineImage"] == original_engine_image assert replica["currentImage"] == original_engine_image data = write_volume_random_data(volume) volume.engineUpgrade(image=engine_upgrade_image) volume = wait_for_volume_current_image(client, volume_name, engine_upgrade_image) engine = get_volume_engine(volume) assert engine["engineImage"] == engine_upgrade_image default_img = wait_for_engine_image_ref_count(client, default_img_name, 0) new_img = wait_for_engine_image_ref_count(client, new_img_name, 1) count = 0 # old replica may be in deletion process for replica in volume["replicas"]: if replica["currentImage"] == engine_upgrade_image: count += 1 assert count == REPLICA_COUNT check_volume_data(volume, data) volume = volume.detach() volume = common.wait_for_volume_detached(client, volume_name) assert len(volume["replicas"]) == REPLICA_COUNT assert volume["engineImage"] == engine_upgrade_image engine = get_volume_engine(volume) assert engine["engineImage"] == engine_upgrade_image for replica in volume["replicas"]: assert replica["engineImage"] == engine_upgrade_image volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) assert volume["engineImage"] == engine_upgrade_image assert volume["currentImage"] == engine_upgrade_image engine = get_volume_engine(volume) assert engine["engineImage"] == engine_upgrade_image assert engine["currentImage"] == engine_upgrade_image for replica in volume["replicas"]: assert replica["engineImage"] == engine_upgrade_image assert replica["currentImage"] == engine_upgrade_image # Make sure detaching didn't somehow interfere with the data. check_volume_data(volume, data) volume.engineUpgrade(image=original_engine_image) volume = wait_for_volume_current_image(client, volume_name, original_engine_image) engine = get_volume_engine(volume) assert engine["engineImage"] == original_engine_image default_img = wait_for_engine_image_ref_count(client, default_img_name, 1) new_img = wait_for_engine_image_ref_count(client, new_img_name, 0) assert volume["engineImage"] == original_engine_image engine = get_volume_engine(volume) assert engine["engineImage"] == original_engine_image count = 0 # old replica may be in deletion process for replica in volume["replicas"]: if replica["engineImage"] == original_engine_image: count += 1 assert count == REPLICA_COUNT check_volume_data(volume, data) volume = volume.detach() volume = common.wait_for_volume_detached(client, volume_name) assert len(volume["replicas"]) == REPLICA_COUNT assert volume["engineImage"] == original_engine_image engine = get_volume_engine(volume) assert engine["engineImage"] == original_engine_image for replica in volume["replicas"]: assert replica["engineImage"] == original_engine_image client.delete(volume) wait_for_volume_delete(client, volume_name) client.delete(new_img)
def test_restore_rwo_volume_to_rwx(set_random_backupstore, client, core_api, volume_name, pvc, csi_pv, pod_make, make_deployment_with_pvc): # NOQA """ Test restoring a rwo to a rwx volume. 1. Create a volume with 'accessMode' rwo. 2. Create a PV and a PVC with access mode 'readwriteonce' and attach to the volume. 3. Create a pod and attach to the PVC. 4. Write some data into the pod and compute md5sum. 5. Take a backup of the volume. 6. Restore the backup with 'accessMode' rwx. 7. Create PV and PVC and attach to 2 pods. 8. Verify the data. """ data_path = "/data/test" pod_name, pv_name, pvc_name, md5sum = \ prepare_pod_with_data_in_mb(client, core_api, csi_pv, pvc, pod_make, volume_name, data_size_in_mb=DATA_SIZE_IN_MB_1, data_path=data_path) snap = create_snapshot(client, volume_name) volume = client.by_id_volume(volume_name) volume.snapshotBackup(name=snap.name) wait_for_backup_completion(client, volume_name, snap.name) bv, b1 = find_backup(client, volume_name, snap.name) restore_volume_name = 'restored-rwx-volume' restore_pv_name = restore_volume_name + "-pv" restore_pvc_name = restore_volume_name + "-pvc" client.create_volume(name=restore_volume_name, size=str(1 * Gi), numberOfReplicas=3, fromBackup=b1.url, accessMode='rwx') wait_for_volume_creation(client, restore_volume_name) restore_volume = wait_for_volume_detached(client, restore_volume_name) create_pv_for_volume(client, core_api, restore_volume, restore_pv_name) create_pvc_for_volume(client, core_api, restore_volume, restore_pvc_name) deployment = make_deployment_with_pvc('deployment-multi-pods-test', restore_pvc_name, replicas=2) apps_api = get_apps_api_client() create_and_wait_deployment(apps_api, deployment) deployment_label_selector = \ "name=" + deployment["metadata"]["labels"]["name"] deployment_pod_list = \ core_api.list_namespaced_pod(namespace="default", label_selector=deployment_label_selector) pod_name_1 = deployment_pod_list.items[0].metadata.name pod_name_2 = deployment_pod_list.items[1].metadata.name md5sum_pod1 = get_pod_data_md5sum(core_api, pod_name_1, data_path) md5sum_pod2 = get_pod_data_md5sum(core_api, pod_name_2, data_path) assert md5sum == md5sum_pod1 == md5sum_pod2
def engine_offline_upgrade_test(client, volume_name, base_image=""): # NOQA default_img = common.get_default_engine_image(client) default_img_name = default_img["name"] default_img = wait_for_engine_image_ref_count(client, default_img_name, 0) cli_v = default_img["cliAPIVersion"] cli_minv = default_img["cliAPIMinVersion"] ctl_v = default_img["controllerAPIVersion"] ctl_minv = default_img["controllerAPIMinVersion"] data_v = default_img["dataFormatVersion"] data_minv = default_img["dataFormatMinVersion"] engine_upgrade_image = common.get_upgrade_test_image( cli_v, cli_minv, ctl_v, ctl_minv, data_v, data_minv) new_img = client.create_engine_image(image=engine_upgrade_image) new_img_name = new_img["name"] new_img = wait_for_engine_image_state(client, new_img_name, "ready") assert new_img["refCount"] == 0 assert new_img["noRefSince"] != "" default_img = common.get_default_engine_image(client) default_img_name = default_img["name"] volume = client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=REPLICA_COUNT, baseImage=base_image) volume = common.wait_for_volume_detached(client, volume_name) default_img = wait_for_engine_image_ref_count(client, default_img_name, 1) original_engine_image = default_img["image"] assert volume["name"] == volume_name assert volume["engineImage"] == original_engine_image assert volume["currentImage"] == original_engine_image assert volume["baseImage"] == base_image # Before our upgrade, write data to the volume first. host_id = get_self_host_id() volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) data = write_volume_random_data(volume) volume = volume.detach() volume = common.wait_for_volume_detached(client, volume_name) volume.engineUpgrade(image=engine_upgrade_image) volume = wait_for_volume_current_image(client, volume_name, engine_upgrade_image) default_img = wait_for_engine_image_ref_count(client, default_img_name, 0) new_img = wait_for_engine_image_ref_count(client, new_img_name, 1) # cannot delete a image in use with pytest.raises(Exception) as e: client.delete(new_img) assert "while being used" in str(e.value) volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) engine = get_volume_engine(volume) assert engine["engineImage"] == engine_upgrade_image assert engine["currentImage"] == engine_upgrade_image for replica in volume["replicas"]: assert replica["engineImage"] == engine_upgrade_image assert replica["currentImage"] == engine_upgrade_image check_volume_data(volume, data) volume = volume.detach() volume = common.wait_for_volume_detached(client, volume_name) volume.engineUpgrade(image=original_engine_image) volume = wait_for_volume_current_image(client, volume_name, original_engine_image) engine = get_volume_engine(volume) assert volume["engineImage"] == original_engine_image assert engine["engineImage"] == original_engine_image for replica in volume["replicas"]: assert replica["engineImage"] == original_engine_image default_img = wait_for_engine_image_ref_count(client, default_img_name, 1) new_img = wait_for_engine_image_ref_count(client, new_img_name, 0) volume = volume.attach(hostId=host_id) volume = common.wait_for_volume_healthy(client, volume_name) engine = get_volume_engine(volume) assert engine["engineImage"] == original_engine_image assert engine["currentImage"] == original_engine_image for replica in volume["replicas"]: assert replica["engineImage"] == original_engine_image assert replica["currentImage"] == original_engine_image check_volume_data(volume, data) client.delete(volume) wait_for_volume_delete(client, volume_name) client.delete(new_img)
def test_statefulset_restore(client, core_api, storage_class, # NOQA statefulset): # NOQA """ Test that data can be restored into volumes usable by a StatefulSet. """ statefulset_name = 'statefulset-restore-test' update_statefulset_manifests(statefulset, storage_class, statefulset_name) create_storage_class(storage_class) create_and_wait_statefulset(statefulset) pod_info = get_statefulset_pod_info(core_api, statefulset) create_and_test_backups(core_api, client, pod_info) delete_and_wait_statefulset(core_api, client, statefulset) csi = check_csi(core_api) # StatefulSet fixture already cleans these up, use the manifests instead of # the fixtures to avoid issues during teardown. pv = { 'apiVersion': 'v1', 'kind': 'PersistentVolume', 'metadata': { 'name': '' }, 'spec': { 'capacity': { 'storage': size_to_string(DEFAULT_VOLUME_SIZE * Gi) }, 'volumeMode': 'Filesystem', 'accessModes': ['ReadWriteOnce'], 'persistentVolumeReclaimPolicy': 'Delete', 'storageClassName': DEFAULT_STORAGECLASS_NAME } } pvc = { 'apiVersion': 'v1', 'kind': 'PersistentVolumeClaim', 'metadata': { 'name': '' }, 'spec': { 'accessModes': [ 'ReadWriteOnce' ], 'resources': { 'requests': { 'storage': size_to_string(DEFAULT_VOLUME_SIZE * Gi) } }, 'storageClassName': DEFAULT_STORAGECLASS_NAME } } if csi: pv['spec']['csi'] = { 'driver': 'io.rancher.longhorn', 'fsType': 'ext4', 'volumeAttributes': { 'numberOfReplicas': storage_class['parameters']['numberOfReplicas'], 'staleReplicaTimeout': storage_class['parameters']['staleReplicaTimeout'] }, 'volumeHandle': '' } else: pv['spec']['flexVolume'] = { 'driver': 'rancher.io/longhorn', 'fsType': 'ext4', 'options': { 'numberOfReplicas': storage_class['parameters']['numberOfReplicas'], 'staleReplicaTimeout': storage_class['parameters']['staleReplicaTimeout'], 'fromBackup': '', 'size': size_to_string(DEFAULT_VOLUME_SIZE * Gi) } } # Make sure that volumes still work even if the Pod and StatefulSet names # are different. for pod in pod_info: pod['pod_name'] = pod['pod_name'].replace('statefulset-restore-test', 'statefulset-restore-test-2') pod['pvc_name'] = pod['pvc_name'].replace('statefulset-restore-test', 'statefulset-restore-test-2') pv['metadata']['name'] = pod['pvc_name'] if csi: client.create_volume( name=pod['pvc_name'], size=size_to_string(DEFAULT_VOLUME_SIZE * Gi), numberOfReplicas=int( storage_class['parameters']['numberOfReplicas']), fromBackup=pod['backup_snapshot']['url']) wait_for_volume_detached(client, pod['pvc_name']) pv['spec']['csi']['volumeHandle'] = pod['pvc_name'] else: pv['spec']['flexVolume']['options']['fromBackup'] = \ pod['backup_snapshot']['url'] core_api.create_persistent_volume(pv) pvc['metadata']['name'] = pod['pvc_name'] pvc['spec']['volumeName'] = pod['pvc_name'] core_api.create_namespaced_persistent_volume_claim( body=pvc, namespace='default') statefulset_name = 'statefulset-restore-test-2' update_statefulset_manifests(statefulset, storage_class, statefulset_name) create_and_wait_statefulset(statefulset) for pod in pod_info: resp = read_volume_data(core_api, pod['pod_name']) assert resp == pod['data']