def test_provisioner_mount(client, core_api, storage_class, pvc, pod): # NOQA """ Test that a StorageClass provisioned volume can be created, mounted, unmounted, and deleted properly on the Kubernetes cluster. Fixtures are torn down here in reverse order that they are specified as a parameter. Take caution when reordering test fixtures. """ # Prepare pod and volume specs. pod_name = 'provisioner-mount-test' pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [ create_pvc_spec(pvc['metadata']['name']) ] pvc['spec']['storageClassName'] = DEFAULT_STORAGECLASS_NAME storage_class['metadata']['name'] = DEFAULT_STORAGECLASS_NAME volume_size = DEFAULT_VOLUME_SIZE * Gi create_storage(core_api, storage_class, pvc) create_and_wait_pod(core_api, pod) pvc_volume_name = get_volume_name(core_api, pvc['metadata']['name']) # Confirm that the volume has all the correct parameters we gave it. volumes = client.list_volume() assert len(volumes) == 1 assert volumes[0]["name"] == pvc_volume_name assert volumes[0]["size"] == str(volume_size) assert volumes[0]["numberOfReplicas"] == \ int(storage_class['parameters']['numberOfReplicas']) assert volumes[0]["state"] == "attached"
def test_provisioner_io(client, core_api, storage_class, pvc, pod): # NOQA """ Test that input and output on a StorageClass provisioned PersistentVolumeClaim works as expected. Fixtures are torn down here in reverse order that they are specified as a parameter. Take caution when reordering test fixtures. """ # Prepare pod and volume specs. pod_name = 'provisioner-io-test' pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [ create_pvc_spec(pvc['metadata']['name']) ] pvc['spec']['storageClassName'] = DEFAULT_STORAGECLASS_NAME storage_class['metadata']['name'] = DEFAULT_STORAGECLASS_NAME test_data = generate_random_data(VOLUME_RWTEST_SIZE) create_storage(core_api, storage_class, pvc) create_and_wait_pod(core_api, pod) pvc_volume_name = get_volume_name(core_api, pvc['metadata']['name']) write_volume_data(core_api, pod_name, test_data) delete_and_wait_pod(core_api, pod_name) common.wait_for_volume_detached(client, pvc_volume_name) pod_name = 'flexvolume-provisioner-io-test-2' pod['metadata']['name'] = pod_name create_and_wait_pod(core_api, pod) resp = read_volume_data(core_api, pod_name) assert resp == test_data
def csi_io_test(client, core_api, csi_pv, pvc, pod_make, base_image=""): # NOQA pv_name = generate_volume_name() pod_name = 'csi-io-test' create_and_wait_csi_pod_named_pv(pv_name, pod_name, client, core_api, csi_pv, pvc, pod_make, base_image, "") test_data = generate_random_data(VOLUME_RWTEST_SIZE) write_volume_data(core_api, pod_name, test_data) delete_and_wait_pod(core_api, pod_name) common.wait_for_volume_detached(client, csi_pv['metadata']['name']) pod_name = 'csi-io-test-2' pod = pod_make(name=pod_name) pod['spec']['volumes'] = [ create_pvc_spec(pv_name) ] csi_pv['metadata']['name'] = pv_name csi_pv['spec']['csi']['volumeHandle'] = pv_name pvc['metadata']['name'] = pv_name pvc['spec']['volumeName'] = pv_name update_storageclass_references(CSI_PV_TEST_STORAGE_NAME, csi_pv, pvc) create_and_wait_pod(core_api, pod) resp = read_volume_data(core_api, pod_name) assert resp == test_data
def create_and_wait_csi_pod_named_pv(pv_name, pod_name, client, core_api, csi_pv, pvc, pod_make, base_image, from_backup): # NOQA pod = pod_make(name=pod_name) pod['spec']['volumes'] = [ create_pvc_spec(pv_name) ] csi_pv['metadata']['name'] = pv_name csi_pv['spec']['csi']['volumeHandle'] = pv_name csi_pv['spec']['csi']['volumeAttributes']['fromBackup'] = from_backup pvc['metadata']['name'] = pv_name pvc['spec']['volumeName'] = pv_name update_storageclass_references(CSI_PV_TEST_STORAGE_NAME, csi_pv, pvc) create_pv_storage(core_api, client, csi_pv, pvc, base_image, from_backup) create_and_wait_pod(core_api, pod)
def flexvolume_mount_test(client, core_api, flexvolume, pod, volume_size): # NOQA pod_name = 'flexvolume-mount-test' pod['metadata']['name'] = pod_name pod['spec']['containers'][0]['volumeMounts'][0]['name'] = \ flexvolume['name'] pod['spec']['volumes'] = [ flexvolume ] create_and_wait_pod(core_api, pod) volumes = client.list_volume() assert len(volumes) == 1 assert volumes[0]["name"] == flexvolume['name'] assert volumes[0]["size"] == str(volume_size) assert volumes[0]["numberOfReplicas"] == int( flexvolume["flexVolume"]["options"]["numberOfReplicas"]) assert volumes[0]["state"] == "attached"
def flexvolume_io_test(client, core_api, flexvolume, pod): # NOQA pod_name = 'flexvolume-io-test' pod['metadata']['name'] = pod_name pod['spec']['containers'][0]['volumeMounts'][0]['name'] = \ flexvolume['name'] pod['spec']['volumes'] = [ flexvolume ] test_data = generate_random_data(VOLUME_RWTEST_SIZE) create_and_wait_pod(core_api, pod) common.write_volume_data(core_api, pod_name, test_data) delete_and_wait_pod(core_api, pod_name) wait_for_volume_detached(client, flexvolume["name"]) pod_name = 'volume-driver-io-test-2' pod['metadata']['name'] = pod_name create_and_wait_pod(core_api, pod) resp = read_volume_data(core_api, pod_name) assert resp == test_data
def test_provisioner_params(client, core_api, storage_class, pvc, pod): # NOQA """ Test that substituting different StorageClass parameters is reflected in the resulting PersistentVolumeClaim. Fixtures are torn down here in reverse order that they are specified as a parameter. Take caution when reordering test fixtures. """ # Prepare pod and volume specs. pod_name = 'provisioner-params-test' volume_size = 2 * Gi pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [ create_pvc_spec(pvc['metadata']['name']) ] pvc['spec']['resources']['requests']['storage'] = \ size_to_string(volume_size) pvc['spec']['storageClassName'] = DEFAULT_STORAGECLASS_NAME storage_class['metadata']['name'] = DEFAULT_STORAGECLASS_NAME storage_class['parameters'] = { 'numberOfReplicas': '2', 'staleReplicaTimeout': '20' } create_storage(core_api, storage_class, pvc) create_and_wait_pod(core_api, pod) pvc_volume_name = get_volume_name(core_api, pvc['metadata']['name']) # Confirm that the volume has all the correct parameters we gave it. volumes = client.list_volume() assert len(volumes) == 1 assert volumes[0]["name"] == pvc_volume_name assert volumes[0]["size"] == str(volume_size) assert volumes[0]["numberOfReplicas"] == \ int(storage_class['parameters']['numberOfReplicas']) assert volumes[0]["state"] == "attached"
def test_provisioner_tags(client, core_api, node_default_tags, storage_class, pvc, pod): # NOQA """ Test that a StorageClass can properly provision a volume with requested Tags. """ # Prepare pod and volume specs. pod_name = 'provisioner-tags-test' tag_spec = { "disk": ["ssd", "nvme"], "expected": 1, "node": ["storage", "main"] } pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [create_pvc_spec(pvc['metadata']['name'])] pvc['spec']['storageClassName'] = DEFAULT_STORAGECLASS_NAME storage_class['metadata']['name'] = DEFAULT_STORAGECLASS_NAME storage_class['parameters']['diskSelector'] = 'ssd,nvme' storage_class['parameters']['nodeSelector'] = 'storage,main' volume_size = DEFAULT_VOLUME_SIZE * Gi create_storage(core_api, storage_class, pvc) create_and_wait_pod(core_api, pod) pvc_volume_name = get_volume_name(core_api, pvc['metadata']['name']) # Confirm that the volume has all the correct parameters we gave it. volumes = client.list_volume() assert len(volumes) == 1 assert volumes[0]["name"] == pvc_volume_name assert volumes[0]["size"] == str(volume_size) assert volumes[0]["numberOfReplicas"] == \ int(storage_class['parameters']['numberOfReplicas']) assert volumes[0]["state"] == "attached" check_volume_replicas(volumes[0], tag_spec, node_default_tags)
def test_rwx_parallel_writing(core_api, statefulset, pod): # NOQA """ Test parallel writing of data 1. Create a StatefulSet of 1 pod with VolumeClaimTemplate where accessMode is 'RWX'. 2. Wait for StatefulSet to come up healthy. 3. Create another statefulSet with same pvc which got created with first statefulSet. 4. Wait for statefulSet to come up healthy. 5. Start writing 800 MB data in first statefulSet `file 1` and start writing 500 MB data in second statefulSet `file 2`. 6. Compute md5sum. 7. Check the data md5sum in share manager pod volume """ statefulset_name = 'statefulset-rwx-parallel-writing-test' statefulset['metadata']['name'] = \ statefulset['spec']['selector']['matchLabels']['app'] = \ statefulset['spec']['serviceName'] = \ statefulset['spec']['template']['metadata']['labels']['app'] = \ statefulset_name statefulset['spec']['replicas'] = 1 statefulset['spec']['volumeClaimTemplates'][0]['spec']['storageClassName']\ = 'longhorn' statefulset['spec']['volumeClaimTemplates'][0]['spec']['accessModes'] \ = ['ReadWriteMany'] create_and_wait_statefulset(statefulset) statefulset_pod_name = statefulset_name + '-0' pvc_name = \ statefulset['spec']['volumeClaimTemplates'][0]['metadata']['name'] \ + '-' + statefulset_name + '-0' pv_name = get_volume_name(core_api, pvc_name) share_manager_name = 'share-manager-' + pv_name pod_name = 'pod-parallel-write-test' pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [create_pvc_spec(pvc_name)] create_and_wait_pod(core_api, pod) with Pool(2) as p: p.map( write_data_into_pod, [statefulset_pod_name + ':/data/test1', pod_name + ':/data/test2']) md5sum1 = get_pod_data_md5sum(core_api, statefulset_pod_name, 'data/test1') md5sum2 = get_pod_data_md5sum(core_api, pod_name, 'data/test2') command1 = 'md5sum /export' + '/' + pv_name + '/' + 'test1' + \ " | awk '{print $1}'" share_manager_data1 = exec_command_in_pod(core_api, command1, share_manager_name, LONGHORN_NAMESPACE) assert md5sum1 == share_manager_data1 command2 = 'md5sum /export' + '/' + pv_name + '/' + 'test2' + \ " | awk '{print $1}'" share_manager_data2 = exec_command_in_pod(core_api, command2, share_manager_name, LONGHORN_NAMESPACE) assert md5sum2 == share_manager_data2
def generate_load(request): index = get_random_suffix() longhorn_api_client = get_longhorn_api_client() k8s_api_client = get_core_api_client() check_and_set_backupstore(longhorn_api_client) volume_name = STRESS_VOLUME_NAME_PREFIX + index pv_name = STRESS_PV_NAME_PREFIX + index pvc_name = STRESS_PVC_NAME_PREFIX + index pod_name = STRESS_POD_NAME_PREFIX + index atexit.register(remove_datafile, pod_name) atexit.register(delete_and_wait_longhorn, longhorn_api_client, volume_name) atexit.register(delete_and_wait_pv, k8s_api_client, pv_name) atexit.register(delete_and_wait_pvc, k8s_api_client, pvc_name) atexit.register(delete_and_wait_pod, k8s_api_client, pod_name) longhorn_volume = create_and_check_volume(longhorn_api_client, volume_name, size=VOLUME_SIZE) wait_for_volume_detached(longhorn_api_client, volume_name) pod_manifest = generate_pod_with_pvc_manifest(pod_name, pvc_name) create_pv_for_volume(longhorn_api_client, k8s_api_client, longhorn_volume, pv_name) create_pvc_for_volume(longhorn_api_client, k8s_api_client, longhorn_volume, pvc_name) create_and_wait_pod(k8s_api_client, pod_manifest) snapshots_md5sum = dict() write_data(k8s_api_client, pod_name) create_recurring_jobs(longhorn_api_client, volume_name) global N_RANDOM_ACTIONS for round in range(N_RANDOM_ACTIONS): action = randrange(0, 8) if action == 0: print("write data started: " + time_now(), end=', ') write_data(k8s_api_client, pod_name) print("ended: " + time_now()) elif action == 1: print("delete data started: " + time_now(), end=', ') delete_data(k8s_api_client, pod_name) print("ended: " + time_now()) elif action == 2: print("create snapshot started: " + time_now(), end=', ') snapshot_create_and_record_md5sum(longhorn_api_client, k8s_api_client, volume_name, pod_name, snapshots_md5sum) print("ended: " + time_now()) elif action == 3: print("delete random snapshot started: " + time_now(), end=', ') delete_random_snapshot(longhorn_api_client, volume_name, snapshots_md5sum) print("ended: " + time_now()) elif action == 4: print("revert random snapshot started: " + time_now(), end=', ') revert_random_snapshot(longhorn_api_client, k8s_api_client, volume_name, pod_manifest, snapshots_md5sum) print("ended: " + time_now()) elif action == 5: print("create backup started: " + time_now(), end=', ') backup_create_and_record_md5sum(longhorn_api_client, k8s_api_client, volume_name, pod_name, snapshots_md5sum) print("ended: " + time_now()) elif action == 6: print("delete replica started: " + time_now(), end=', ') delete_replica(longhorn_api_client, volume_name) print("ended: " + time_now()) elif action == 7: print("restore random backup started: " + time_now(), end=', ') restore_and_check_random_backup(longhorn_api_client, k8s_api_client, volume_name, pod_name, snapshots_md5sum) print("ended: " + time_now()) clean_volume_backups(longhorn_api_client, volume_name)
def test_csi_volumesnapshot_restore_existing_backup( set_random_backupstore, # NOQA client, # NOQA core_api, # NOQA volume_name, # NOQA csi_pv, # NOQA pvc, # NOQA pod_make, # NOQA volumesnapshotclass, # NOQA volumesnapshotcontent, volumesnapshot, # NOQA volsnapshotclass_delete_policy, # NOQA backup_is_deleted): # NOQA """ Test retention of a backup while deleting the associated `VolumeSnapshot` via the csi snapshotter Context: We want to allow the user to programmatically create/restore/delete longhorn backups via the csi snapshot mechanism ref: https://kubernetes.io/docs/concepts/storage/volume-snapshots/ Setup: 1. Make sure your cluster contains the below crds https://github.com/kubernetes-csi/external-snapshotter /tree/master/client/config/crd 2. Make sure your cluster contains the snapshot controller https://github.com/kubernetes-csi/external-snapshotter /tree/master/deploy/kubernetes/snapshot-controller Steps: 1. create new snapshotClass with deletionPolicy set to Retain 2. call csi_volumesnapshot_creation_test(snapshotClass=custom) 3. call csi_volumesnapshot_restore_test() 4. call csi_volumesnapshot_deletion_test(deletionPolicy='Retain'): 5. cleanup """ csisnapclass = \ volumesnapshotclass(name="snapshotclass", deletepolicy=volsnapshotclass_delete_policy) pod_name, pv_name, pvc_name, md5sum = \ prepare_pod_with_data_in_mb(client, core_api, csi_pv, pvc, pod_make, volume_name, data_path="/data/test") volume = client.by_id_volume(volume_name) snap = create_snapshot(client, volume_name) volume.snapshotBackup(name=snap.name) wait_for_backup_completion(client, volume_name, snap.name) bv, b = find_backup(client, volume_name, snap.name) csivolsnap_name = volume_name + "-volumesnapshot" csivolsnap_namespace = "default" volsnapcontent = \ volumesnapshotcontent("volsnapcontent", csisnapclass["metadata"]["name"], "Delete", "bs://" + volume_name + "/" + b.name, csivolsnap_name, csivolsnap_namespace) csivolsnap = volumesnapshot(csivolsnap_name, csivolsnap_namespace, csisnapclass["metadata"]["name"], "volumeSnapshotContentName", volsnapcontent["metadata"]["name"]) restore_pvc_name = pvc["metadata"]["name"] + "-restore" restore_pvc_size = pvc["spec"]["resources"]["requests"]["storage"] restore_csi_volume_snapshot(core_api, client, csivolsnap, restore_pvc_name, restore_pvc_size) restore_pod = pod_make() restore_pod_name = restore_pod["metadata"]["name"] restore_pod['spec']['volumes'] = [create_pvc_spec(restore_pvc_name)] create_and_wait_pod(core_api, restore_pod) restore_md5sum = \ get_pod_data_md5sum(core_api, restore_pod_name, path="/data/test") assert restore_md5sum == md5sum # Delete volumeSnapshot test delete_volumesnapshot(csivolsnap["metadata"]["name"], "default") if backup_is_deleted is False: find_backup(client, volume_name, b["snapshotName"]) else: wait_for_backup_delete(client, volume_name, b["name"])
def test_cloning_interrupted(client, core_api, pvc, pod, clone_pvc, clone_pod): # NOQA """ 1. Create a PVC: ```yaml apiVersion: v1 kind: PersistentVolumeClaim metadata: name: source-pvc spec: storageClassName: longhorn accessModes: - ReadWriteOnce resources: requests: storage: 3Gi ``` 2. Specify the `source-pvc` in a pod yaml and start the pod 3. Wait for the pod to be running, write 500MB of data to the mount path of the volume 4. Clone a volume by creating the PVC: ```yaml apiVersion: v1 kind: PersistentVolumeClaim metadata: name: cloned-pvc spec: storageClassName: longhorn dataSource: name: source-pvc kind: PersistentVolumeClaim accessModes: - ReadWriteOnce resources: requests: storage: 3Gi ``` 5. Wait for the `CloneStatus.State` in `cloned-pvc` to be `initiated` 6. Kill all replicas process of the `source-pvc` 7. Wait for the `CloneStatus.State` in `cloned-pvc` to be `failed` 8. Clean up `clone-pvc` 9. Redeploy `cloned-pvc` and clone pod 10. In 3-min retry loop, verify cloned pod become running 11. `cloned-pvc` has the same data as `source-pvc` 12. In 2-min retry loop, verify the volume of the `clone-pvc` eventually becomes healthy. """ # Step-1 source_pvc_name = 'source-pvc' + generate_random_suffix() pvc['metadata']['name'] = source_pvc_name pvc['spec']['storageClassName'] = 'longhorn' core_api.create_namespaced_persistent_volume_claim( body=pvc, namespace='default') wait_for_pvc_phase(core_api, source_pvc_name, "Bound") # Step-2 pod_name = 'source-pod' + generate_random_suffix() pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [create_pvc_spec(source_pvc_name)] create_and_wait_pod(core_api, pod) # Step-3 write_pod_volume_random_data(core_api, pod_name, '/data/test', DATA_SIZE_IN_MB_3) source_data = get_pod_data_md5sum(core_api, pod_name, '/data/test') source_volume_name = get_volume_name(core_api, source_pvc_name) # Step-4 clone_pvc_name = 'clone-pvc' + generate_random_suffix() clone_pvc['metadata']['name'] = clone_pvc_name clone_pvc['spec']['storageClassName'] = 'longhorn' clone_pvc['spec']['dataSource'] = { 'name': source_pvc_name, 'kind': 'PersistentVolumeClaim' } core_api.create_namespaced_persistent_volume_claim( body=clone_pvc, namespace='default') # Step-5 clone_volume_name = get_clone_volume_name(client, source_volume_name) wait_for_volume_clone_status(client, clone_volume_name, VOLUME_FIELD_STATE, 'initiated') # Step-6 crash_replica_processes(client, core_api, source_volume_name) # Step-7 wait_for_volume_faulted(client, source_volume_name) wait_for_volume_clone_status(client, clone_volume_name, VOLUME_FIELD_STATE, 'failed') # Step-8 delete_and_wait_pvc(core_api, clone_pvc_name) # Step-9 clone_pvc_name = 'clone-pvc-2' + generate_random_suffix() clone_pvc['metadata']['name'] = clone_pvc_name clone_pvc['spec']['storageClassName'] = 'longhorn' clone_pvc['spec']['dataSource'] = { 'name': source_pvc_name, 'kind': 'PersistentVolumeClaim' } core_api.create_namespaced_persistent_volume_claim( body=clone_pvc, namespace='default') wait_for_pvc_phase(core_api, clone_pvc_name, "Bound") # Step-9 clone_pod_name = 'clone-pod' + generate_random_suffix() clone_pod['metadata']['name'] = clone_pod_name clone_pod['spec']['volumes'] = [create_pvc_spec(clone_pvc_name)] create_and_wait_pod(core_api, clone_pod) # Step-10 clone_volume_name = get_volume_name(core_api, clone_pvc_name) wait_for_volume_clone_status(client, clone_volume_name, VOLUME_FIELD_STATE, VOLUME_FIELD_CLONE_COMPLETED) # Step-11 clone_data = get_pod_data_md5sum(core_api, clone_pod_name, '/data/test') assert source_data == clone_data # Step-12 wait_for_volume_healthy(client, clone_volume_name)
def test_kubernetes_status( client, core_api, storage_class, # NOQA statefulset, csi_pv, pvc, pod): # NOQA statefulset_name = 'kubernetes-status-test' update_statefulset_manifests(statefulset, storage_class, statefulset_name) storage_class['reclaimPolicy'] = 'Retain' create_storage_class(storage_class) create_and_wait_statefulset(statefulset) pod_info = get_statefulset_pod_info(core_api, statefulset) volume_info = [p['pv_name'] for p in pod_info] extra_pod_name = 'extra-pod-using-' + volume_info[1] pod['metadata']['name'] = extra_pod_name p2 = core_api.read_namespaced_pod(name=pod_info[1]['pod_name'], namespace='default') pod['spec']['nodeName'] = p2.spec.node_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': pod_info[1]['pvc_name'], }, }] create_and_wait_pod(core_api, pod) for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == p['pv_name'] assert k_status['pvStatus'] == 'Bound' assert k_status['namespace'] == 'default' assert k_status['pvcName'] == p['pvc_name'] assert not k_status['lastPVCRefAt'] assert not k_status['lastPodRefAt'] if i == 0: assert len(workloads) == 1 assert workloads[0]['podName'] == p['pod_name'] assert workloads[0]['workloadName'] == statefulset_name assert workloads[0]['workloadType'] == 'StatefulSet' for _ in range(RETRY_COUNTS): if workloads[0]['podStatus'] == 'Running': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert workloads[0]['podStatus'] == 'Running' if i == 1: assert len(k_status['workloadsStatus']) == 2 if workloads[0]['podName'] == pod_info[i]['pod_name']: assert workloads[1]['podName'] == extra_pod_name assert workloads[0]['workloadName'] == statefulset_name assert workloads[0]['workloadType'] == 'StatefulSet' assert not workloads[1]['workloadName'] assert not workloads[1]['workloadType'] else: assert workloads[1]['podName'] == pod_info[i]['pod_name'] assert workloads[0]['podName'] == extra_pod_name assert not workloads[0]['workloadName'] assert not workloads[0]['workloadType'] assert workloads[1]['workloadName'] == statefulset_name assert workloads[1]['workloadType'] == 'StatefulSet' for _ in range(RETRY_COUNTS): if workloads[0]['podStatus'] == 'Running' and \ workloads[1]['podStatus'] == 'Running': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert len(workloads) == 2 assert workloads[0]['podStatus'] == 'Running' assert workloads[1]['podStatus'] == 'Running' ks_list = [{}, {}] delete_and_wait_statefulset_only(core_api, statefulset) # the extra pod is still using the 2nd volume for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] ks_list[i]['pvName'] = p['pv_name'] ks_list[i]['pvStatus'] = 'Bound' ks_list[i]['namespace'] = 'default' ks_list[i]['pvcName'] = p['pvc_name'] ks_list[i]['lastPVCRefAt'] = '' if i == 0: ks_list[i]['lastPodRefAt'] = 'not empty' ks_list[i]['workloadsStatus'] = [ { 'podName': p['pod_name'], 'podStatus': 'Running', 'workloadName': statefulset_name, 'workloadType': 'StatefulSet', }, ] if i == 1: ks_list[i]['lastPodRefAt'] = '' ks_list[i]['workloadsStatus'] = [{ 'podName': extra_pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '', }] wait_volume_kubernetes_status(client, volume_name, ks_list[i]) # deleted extra_pod, all volumes have no workload delete_and_wait_pod(core_api, pod['metadata']['name']) for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] ks_list[i]['lastPodRefAt'] = 'not empty' wait_volume_kubernetes_status(client, volume_name, ks_list[i]) # deleted pvc only. for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] delete_and_wait_pvc(core_api, p['pvc_name']) ks_list[i]['pvStatus'] = 'Released' ks_list[i]['lastPVCRefAt'] = 'not empty' wait_volume_kubernetes_status(client, volume_name, ks_list[i]) # deleted pv only. for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] delete_and_wait_pv(core_api, p['pv_name']) ks_list[i]['pvName'] = '' ks_list[i]['pvStatus'] = '' wait_volume_kubernetes_status(client, volume_name, ks_list[i]) # reuse that volume for p, volume_name in zip(pod_info, volume_info): p['pod_name'] = p['pod_name'].replace('kubernetes-status-test', 'kubernetes-status-test-reuse') p['pvc_name'] = p['pvc_name'].replace('kubernetes-status-test', 'kubernetes-status-test-reuse') p['pv_name'] = p['pvc_name'] csi_pv['metadata']['name'] = p['pv_name'] csi_pv['spec']['csi']['volumeHandle'] = volume_name core_api.create_persistent_volume(csi_pv) pvc['metadata']['name'] = p['pvc_name'] pvc['spec']['volumeName'] = p['pv_name'] core_api.create_namespaced_persistent_volume_claim(body=pvc, namespace='default') pod['metadata']['name'] = p['pod_name'] pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': p['pvc_name'], }, }] create_and_wait_pod(core_api, pod) ks = { 'pvName': p['pv_name'], 'pvStatus': 'Bound', 'namespace': 'default', 'pvcName': p['pvc_name'], 'lastPVCRefAt': '', 'lastPodRefAt': '', 'workloadsStatus': [ { 'podName': p['pod_name'], 'podStatus': 'Running', 'workloadName': '', 'workloadType': '', }, ], } wait_volume_kubernetes_status(client, volume_name, ks) delete_and_wait_pod(core_api, p['pod_name']) # Since persistentVolumeReclaimPolicy of csi_pv is `Delete`, # we don't need to delete bounded pv manually delete_and_wait_pvc(core_api, p['pvc_name']) wait_delete_pv(core_api, p['pv_name'])
def test_replica_auto_balance_zone_best_effort_with_data_locality( client, core_api, volume_name, pod): # NOQA """ Background: Given set `replica-soft-anti-affinity` to `true`. And set `replica-zone-soft-anti-affinity` to `true`. And set `default-data-locality` to `best-effort`. And set `replicaAutoBalance` to `best-effort`. And set node-1 to zone-1. set node-2 to zone-1. set node-3 to zone-2. And create volume with 2 replicas. And create pv for volume. And create pvc for volume. Scenario Outline: replica auto-balance zones with best-effort should not remove pod local replicas when data locality is enabled (best-effort). Given create and wait pod on <pod-node>. And disable scheduling and evict node-3. And count replicas on each nodes. And 1 replica running on <pod-node>. 1 replica running on <duplicate-node>. 0 replica running on node-3. When enable scheduling for node-3. Then count replicas on each nodes. And 1 replica running on <pod-node>. 0 replica running on <duplicate-node>. 1 replica running on node-3. And count replicas in each zones. And 1 replica running in zone-1. 1 replica running in zone-2. And loop 3 times with each wait 5 seconds and count replicas on each nodes. To ensure no addition scheduling is happening. 1 replica running on <pod-node>. 0 replica running on <duplicate-node>. 1 replica running on node-3. And delete pod. Examples: | pod-node | duplicate-node | | node-1 | node-2 | | node-2 | node-1 | | node-1 | node-2 | """ common.update_setting(client, SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY, "true") common.update_setting(client, SETTING_REPLICA_ZONE_SOFT_ANTI_AFFINITY, "true") common.update_setting(client, SETTING_DEFAULT_DATA_LOCALITY, "best-effort") common.update_setting(client, SETTING_REPLICA_AUTO_BALANCE, "best-effort") n1, n2, n3 = client.list_node() set_k8s_node_zone_label(core_api, n1.name, ZONE1) set_k8s_node_zone_label(core_api, n2.name, ZONE1) set_k8s_node_zone_label(core_api, n3.name, ZONE2) wait_longhorn_node_zone_updated(client) n_replicas = 2 volume = create_and_check_volume(client, volume_name, num_of_replicas=n_replicas) common.create_pv_for_volume(client, core_api, volume, volume_name) common.create_pvc_for_volume(client, core_api, volume, volume_name) pod['spec']['volumes'] = [{ "name": "pod-data", "persistentVolumeClaim": { "claimName": volume_name } }] for i in range(1, 4): pod_node_name = n2.name if i % 2 == 0 else n1.name pod['spec']['nodeSelector'] = {"kubernetes.io/hostname": pod_node_name} common.create_and_wait_pod(core_api, pod) client.update(n3, allowScheduling=False, evictionRequested=True) duplicate_node = [n1.name, n2.name] duplicate_node.remove(pod_node_name) for _ in range(RETRY_COUNTS): pod_node_r_count = common.get_host_replica_count(client, volume_name, pod_node_name, chk_running=True) duplicate_node_r_count = common.get_host_replica_count( client, volume_name, duplicate_node[0], chk_running=True) balance_node_r_count = common.get_host_replica_count( client, volume_name, n3.name, chk_running=False) if pod_node_r_count == duplicate_node_r_count == 1 and \ balance_node_r_count == 0: break time.sleep(RETRY_INTERVAL) assert pod_node_r_count == 1 assert duplicate_node_r_count == 1 assert balance_node_r_count == 0 client.update(n3, allowScheduling=True) for _ in range(RETRY_COUNTS): pod_node_r_count = common.get_host_replica_count(client, volume_name, pod_node_name, chk_running=True) duplicate_node_r_count = common.get_host_replica_count( client, volume_name, duplicate_node[0], chk_running=False) balance_node_r_count = common.get_host_replica_count( client, volume_name, n3.name, chk_running=True) if pod_node_r_count == balance_node_r_count == 1 and \ duplicate_node_r_count == 0: break time.sleep(RETRY_INTERVAL) assert pod_node_r_count == 1 assert duplicate_node_r_count == 0 assert balance_node_r_count == 1 z1_r_count = get_zone_replica_count(client, volume_name, ZONE1, chk_running=True) z2_r_count = get_zone_replica_count(client, volume_name, ZONE2, chk_running=True) assert z1_r_count == z2_r_count == 1 # loop 3 times and each to wait 5 seconds to ensure there is no # re-scheduling happening. for _ in range(3): time.sleep(5) assert pod_node_r_count == common.get_host_replica_count( client, volume_name, pod_node_name, chk_running=True) assert duplicate_node_r_count == common.get_host_replica_count( client, volume_name, duplicate_node[0], chk_running=False) assert balance_node_r_count == common.get_host_replica_count( client, volume_name, n3.name, chk_running=True) common.delete_and_wait_pod(core_api, pod['metadata']['name'])
def restore_inc_test(client, core_api, volume_name, pod): # NOQA std_volume = create_and_check_volume(client, volume_name, 2, SIZE) lht_host_id = get_self_host_id() std_volume.attach(hostId=lht_host_id) std_volume = common.wait_for_volume_healthy(client, volume_name) with pytest.raises(Exception) as e: std_volume.activate(frontend="blockdev") assert "already in active mode" in str(e.value) data0 = {'len': 4 * 1024, 'pos': 0} data0['content'] = common.generate_random_data(data0['len']) bv, backup0, _, data0 = create_backup(client, volume_name, data0) sb_volume0_name = "sb-0-" + volume_name sb_volume1_name = "sb-1-" + volume_name sb_volume2_name = "sb-2-" + volume_name client.create_volume(name=sb_volume0_name, size=SIZE, numberOfReplicas=2, fromBackup=backup0['url'], frontend="", standby=True) client.create_volume(name=sb_volume1_name, size=SIZE, numberOfReplicas=2, fromBackup=backup0['url'], frontend="", standby=True) client.create_volume(name=sb_volume2_name, size=SIZE, numberOfReplicas=2, fromBackup=backup0['url'], frontend="", standby=True) common.wait_for_volume_restoration_completed(client, sb_volume0_name) common.wait_for_volume_restoration_completed(client, sb_volume1_name) common.wait_for_volume_restoration_completed(client, sb_volume2_name) sb_volume0 = common.wait_for_volume_healthy(client, sb_volume0_name) sb_volume1 = common.wait_for_volume_healthy(client, sb_volume1_name) sb_volume2 = common.wait_for_volume_healthy(client, sb_volume2_name) for i in range(RETRY_COUNTS): sb_volume0 = client.by_id_volume(sb_volume0_name) sb_volume1 = client.by_id_volume(sb_volume1_name) sb_volume2 = client.by_id_volume(sb_volume2_name) sb_engine0 = get_volume_engine(sb_volume0) sb_engine1 = get_volume_engine(sb_volume1) sb_engine2 = get_volume_engine(sb_volume2) if sb_volume0["lastBackup"] != backup0["name"] or \ sb_volume1["lastBackup"] != backup0["name"] or \ sb_volume2["lastBackup"] != backup0["name"] or \ sb_engine0["lastRestoredBackup"] != backup0["name"] or \ sb_engine1["lastRestoredBackup"] != backup0["name"] or \ sb_engine2["lastRestoredBackup"] != backup0["name"]: time.sleep(RETRY_INTERVAL) else: break assert sb_volume0["standby"] is True assert sb_volume0["lastBackup"] == backup0["name"] assert sb_volume0["frontend"] == "" assert sb_volume0["disableFrontend"] is True assert sb_volume0["initialRestorationRequired"] is False sb_engine0 = get_volume_engine(sb_volume0) assert sb_engine0["lastRestoredBackup"] == backup0["name"] assert sb_engine0["requestedBackupRestore"] == backup0["name"] assert sb_volume1["standby"] is True assert sb_volume1["lastBackup"] == backup0["name"] assert sb_volume1["frontend"] == "" assert sb_volume1["disableFrontend"] is True assert sb_volume1["initialRestorationRequired"] is False sb_engine1 = get_volume_engine(sb_volume1) assert sb_engine1["lastRestoredBackup"] == backup0["name"] assert sb_engine1["requestedBackupRestore"] == backup0["name"] assert sb_volume2["standby"] is True assert sb_volume2["lastBackup"] == backup0["name"] assert sb_volume2["frontend"] == "" assert sb_volume2["disableFrontend"] is True assert sb_volume2["initialRestorationRequired"] is False sb_engine2 = get_volume_engine(sb_volume2) assert sb_engine2["lastRestoredBackup"] == backup0["name"] assert sb_engine2["requestedBackupRestore"] == backup0["name"] sb0_snaps = sb_volume0.snapshotList() assert len(sb0_snaps) == 2 for s in sb0_snaps: if s['name'] != "volume-head": sb0_snap = s assert sb0_snaps with pytest.raises(Exception) as e: sb_volume0.snapshotCreate() assert "cannot create snapshot for standby volume" in str(e.value) with pytest.raises(Exception) as e: sb_volume0.snapshotRevert(name=sb0_snap["name"]) assert "cannot revert snapshot for standby volume" in str(e.value) with pytest.raises(Exception) as e: sb_volume0.snapshotDelete(name=sb0_snap["name"]) assert "cannot delete snapshot for standby volume" in str(e.value) with pytest.raises(Exception) as e: sb_volume0.snapshotBackup(name=sb0_snap["name"]) assert "cannot create backup for standby volume" in str(e.value) with pytest.raises(Exception) as e: sb_volume0.pvCreate(pvName=sb_volume0_name) assert "cannot create PV for standby volume" in str(e.value) with pytest.raises(Exception) as e: sb_volume0.pvcCreate(pvcName=sb_volume0_name) assert "cannot create PVC for standby volume" in str(e.value) setting = client.by_id_setting(common.SETTING_BACKUP_TARGET) with pytest.raises(Exception) as e: client.update(setting, value="random.backup.target") assert "cannot modify BackupTarget " \ "since there are existing standby volumes" in str(e.value) with pytest.raises(Exception) as e: sb_volume0.activate(frontend="wrong_frontend") assert "invalid frontend" in str(e.value) activate_standby_volume(client, sb_volume0_name) sb_volume0 = client.by_id_volume(sb_volume0_name) sb_volume0.attach(hostId=lht_host_id) sb_volume0 = common.wait_for_volume_healthy(client, sb_volume0_name) check_volume_data(sb_volume0, data0, False) zero_string = b'\x00'.decode('utf-8') _, backup1, _, data1 = create_backup(client, volume_name, { 'len': 2 * 1024, 'pos': 0, 'content': zero_string * 2 * 1024 }) # use this api to update field `last backup` client.list_backupVolume() check_volume_last_backup(client, sb_volume1_name, backup1['name']) activate_standby_volume(client, sb_volume1_name) sb_volume1 = client.by_id_volume(sb_volume1_name) sb_volume1.attach(hostId=lht_host_id) sb_volume1 = common.wait_for_volume_healthy(client, sb_volume1_name) data0_modified = { 'len': data0['len'] - data1['len'], 'pos': data1['len'], 'content': data0['content'][data1['len']:], } check_volume_data(sb_volume1, data0_modified, False) check_volume_data(sb_volume1, data1) data2 = {'len': 1 * 1024 * 1024, 'pos': 0} data2['content'] = common.generate_random_data(data2['len']) _, backup2, _, data2 = create_backup(client, volume_name, data2) client.list_backupVolume() check_volume_last_backup(client, sb_volume2_name, backup2['name']) activate_standby_volume(client, sb_volume2_name) sb_volume2 = client.by_id_volume(sb_volume2_name) sb_volume2.attach(hostId=lht_host_id) sb_volume2 = common.wait_for_volume_healthy(client, sb_volume2_name) check_volume_data(sb_volume2, data2) # allocated this active volume to a pod sb_volume2.detach() sb_volume2 = common.wait_for_volume_detached(client, sb_volume2_name) create_pv_for_volume(client, core_api, sb_volume2, sb_volume2_name) create_pvc_for_volume(client, core_api, sb_volume2, sb_volume2_name) sb_volume2_pod_name = "pod-" + sb_volume2_name pod['metadata']['name'] = sb_volume2_pod_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': sb_volume2_name, }, }] create_and_wait_pod(core_api, pod) sb_volume2 = client.by_id_volume(sb_volume2_name) k_status = sb_volume2["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == sb_volume2_name assert k_status['pvStatus'] == 'Bound' assert len(workloads) == 1 for i in range(RETRY_COUNTS): if workloads[0]['podStatus'] == 'Running': break time.sleep(RETRY_INTERVAL) sb_volume2 = client.by_id_volume(sb_volume2_name) k_status = sb_volume2["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert len(workloads) == 1 assert workloads[0]['podName'] == sb_volume2_pod_name assert workloads[0]['podStatus'] == 'Running' assert not workloads[0]['workloadName'] assert not workloads[0]['workloadType'] assert k_status['namespace'] == 'default' assert k_status['pvcName'] == sb_volume2_name assert not k_status['lastPVCRefAt'] assert not k_status['lastPodRefAt'] delete_and_wait_pod(core_api, sb_volume2_pod_name) delete_and_wait_pvc(core_api, sb_volume2_name) delete_and_wait_pv(core_api, sb_volume2_name) # cleanup std_volume.detach() sb_volume0.detach() sb_volume1.detach() std_volume = common.wait_for_volume_detached(client, volume_name) sb_volume0 = common.wait_for_volume_detached(client, sb_volume0_name) sb_volume1 = common.wait_for_volume_detached(client, sb_volume1_name) sb_volume2 = common.wait_for_volume_detached(client, sb_volume2_name) bv.backupDelete(name=backup2["name"]) bv.backupDelete(name=backup1["name"]) bv.backupDelete(name=backup0["name"]) client.delete(std_volume) client.delete(sb_volume0) client.delete(sb_volume1) client.delete(sb_volume2) wait_for_volume_delete(client, volume_name) wait_for_volume_delete(client, sb_volume0_name) wait_for_volume_delete(client, sb_volume1_name) wait_for_volume_delete(client, sb_volume2_name) volumes = client.list_volume() assert len(volumes) == 0
def test_kubernetes_status(client, core_api, storage_class, # NOQA statefulset, csi_pv, pvc, pod): # NOQA statefulset_name = 'kubernetes-status-test' update_statefulset_manifests(statefulset, storage_class, statefulset_name) storage_class['reclaimPolicy'] = 'Retain' create_storage_class(storage_class) create_and_wait_statefulset(statefulset) pod_info = get_statefulset_pod_info(core_api, statefulset) volume_info = [p['pv_name'] for p in pod_info] extra_pod_name = 'extra-pod-using-' + volume_info[1] pod['metadata']['name'] = extra_pod_name p2 = core_api.read_namespaced_pod(name=pod_info[1]['pod_name'], namespace='default') pod['spec']['nodeName'] = p2.spec.node_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': pod_info[1]['pvc_name'], }, }] create_and_wait_pod(core_api, pod) for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == p['pv_name'] assert k_status['pvStatus'] == 'Bound' assert k_status['namespace'] == 'default' assert k_status['pvcName'] == p['pvc_name'] assert not k_status['lastPVCRefAt'] assert not k_status['lastPodRefAt'] if i == 0: assert len(workloads) == 1 assert workloads[0]['podName'] == p['pod_name'] assert workloads[0]['workloadName'] == statefulset_name assert workloads[0]['workloadType'] == 'StatefulSet' for _ in range(RETRY_COUNTS): if workloads[0]['podStatus'] == 'Running': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert workloads[0]['podStatus'] == 'Running' if i == 1: assert len(k_status['workloadsStatus']) == 2 if workloads[0]['podName'] == pod_info[i]['pod_name']: assert workloads[1]['podName'] == extra_pod_name assert workloads[0]['workloadName'] == statefulset_name assert workloads[0]['workloadType'] == 'StatefulSet' assert not workloads[1]['workloadName'] assert not workloads[1]['workloadType'] else: assert workloads[1]['podName'] == pod_info[i]['pod_name'] assert workloads[0]['podName'] == extra_pod_name assert not workloads[0]['workloadName'] assert not workloads[0]['workloadType'] assert workloads[1]['workloadName'] == statefulset_name assert workloads[1]['workloadType'] == 'StatefulSet' for _ in range(RETRY_COUNTS): if workloads[0]['podStatus'] == 'Running' and \ workloads[1]['podStatus'] == 'Running': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert len(workloads) == 2 assert workloads[0]['podStatus'] == 'Running' assert workloads[1]['podStatus'] == 'Running' # the extra pod is still using the 2nd volume delete_and_wait_statefulset_only(core_api, statefulset) for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == p['pv_name'] assert k_status['pvStatus'] == 'Bound' assert k_status['namespace'] == 'default' assert k_status['pvcName'] == p['pvc_name'] assert not k_status['lastPVCRefAt'] assert len(workloads) == 1 if i == 0: assert workloads[0]['podName'] == p['pod_name'] assert workloads[0]['workloadName'] == statefulset_name assert workloads[0]['workloadType'] == 'StatefulSet' assert k_status['lastPodRefAt'] if i == 1: assert workloads[0]['podName'] == extra_pod_name assert not workloads[0]['workloadName'] assert not workloads[0]['workloadType'] assert not k_status['lastPodRefAt'] # deleted extra_pod, all volumes have no workload delete_and_wait_pod(core_api, pod['metadata']['name']) for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == p['pv_name'] assert k_status['pvStatus'] == 'Bound' assert k_status['namespace'] == 'default' assert k_status['pvcName'] == p['pvc_name'] assert not k_status['lastPVCRefAt'] assert k_status['lastPodRefAt'] assert len(workloads) == 1 if i == 0: assert workloads[0]['podName'] == p['pod_name'] assert workloads[0]['workloadName'] == statefulset_name assert workloads[0]['workloadType'] == 'StatefulSet' if i == 1: assert workloads[0]['podName'] == extra_pod_name assert not workloads[0]['workloadName'] assert not workloads[0]['workloadType'] # deleted pvc only. for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] delete_and_wait_pvc(core_api, p['pvc_name']) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] for _ in range(RETRY_COUNTS): if k_status['pvStatus'] == 'Released': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == p['pv_name'] assert k_status['pvStatus'] == 'Released' assert k_status['namespace'] == 'default' assert k_status['pvcName'] == p['pvc_name'] assert k_status['lastPVCRefAt'] assert k_status['lastPodRefAt'] assert len(workloads) == 1 if i == 0: assert workloads[0]['podName'] == p['pod_name'] assert workloads[0]['workloadName'] == statefulset_name assert workloads[0]['workloadType'] == 'StatefulSet' if i == 1: assert workloads[0]['podName'] == extra_pod_name assert not workloads[0]['workloadName'] assert not workloads[0]['workloadType'] # deleted pv only. for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] delete_and_wait_pv(core_api, p['pv_name']) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == '' assert k_status['pvStatus'] == '' assert k_status['namespace'] == 'default' assert k_status['pvcName'] == p['pvc_name'] assert k_status['lastPVCRefAt'] assert k_status['lastPodRefAt'] assert len(workloads) == 1 if i == 0: assert workloads[0]['podName'] == p['pod_name'] assert workloads[0]['workloadName'] == statefulset_name assert workloads[0]['workloadType'] == 'StatefulSet' if i == 1: assert workloads[0]['podName'] == extra_pod_name assert not workloads[0]['workloadName'] assert not workloads[0]['workloadType'] # reuse that volume for p, volume_name in zip(pod_info, volume_info): p['pod_name'] = p['pod_name'].replace('kubernetes-status-test', 'kubernetes-status-test-reuse') p['pvc_name'] = p['pvc_name'].replace('kubernetes-status-test', 'kubernetes-status-test-reuse') p['pv_name'] = p['pvc_name'] csi_pv['metadata']['name'] = p['pv_name'] csi_pv['spec']['csi']['volumeHandle'] = volume_name core_api.create_persistent_volume(csi_pv) pvc['metadata']['name'] = p['pvc_name'] pvc['spec']['volumeName'] = p['pv_name'] core_api.create_namespaced_persistent_volume_claim( body=pvc, namespace='default') pod['metadata']['name'] = p['pod_name'] pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': p['pvc_name'], }, }] create_and_wait_pod(core_api, pod) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert len(workloads) == 1 assert k_status['pvName'] == p['pv_name'] for _ in range(RETRY_COUNTS): if k_status['pvStatus'] == 'Bound': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert len(workloads) == 1 assert k_status['pvStatus'] == 'Bound' for _ in range(RETRY_COUNTS): if workloads[0]['podStatus'] == 'Running': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert len(workloads) == 1 assert workloads[0]['podStatus'] == 'Running' assert workloads[0]['podName'] == p['pod_name'] assert not workloads[0]['workloadName'] assert not workloads[0]['workloadType'] assert k_status['namespace'] == 'default' assert k_status['pvcName'] == p['pvc_name'] assert not k_status['lastPVCRefAt'] assert not k_status['lastPodRefAt'] delete_and_wait_pod(core_api, p['pod_name']) # Since persistentVolumeReclaimPolicy of csi_pv is `Delete`, # we don't need to delete bounded pv manually delete_and_wait_pvc(core_api, p['pvc_name']) wait_delete_pv(core_api, p['pv_name'])
def test_pvc_creation(client, core_api, pod): # NOQA volume_name = "test-pvc-creation" client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = wait_for_volume_detached(client, volume_name) pv_name = "pv-" + volume_name pvc_name = "pvc-" + volume_name pod_name = "pod-" + volume_name # try to create pvc without pv for the volume with pytest.raises(Exception) as e: volume.pvcCreate(namespace="default", pvcName=pvc_name) assert "connot find existing PV for volume" in str(e.value) volume.pvCreate(pvName=pv_name) for i in range(RETRY_COUNTS): if check_pv_existence(core_api, pv_name): break time.sleep(RETRY_INTERVAL) assert check_pv_existence(core_api, pv_name) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] for i in range(RETRY_COUNTS): if k_status['pvName'] and k_status['pvStatus'] == 'Available': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] assert k_status['pvName'] == pv_name assert k_status['pvStatus'] == 'Available' assert not k_status['namespace'] assert not k_status['pvcName'] assert not k_status['workloadsStatus'] assert not k_status['lastPVCRefAt'] assert not k_status['lastPodRefAt'] volume.pvcCreate(namespace="default", pvcName=pvc_name) for i in range(RETRY_COUNTS): if check_pvc_existence(core_api, pvc_name): break time.sleep(RETRY_INTERVAL) assert check_pvc_existence(core_api, pvc_name) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] for i in range(RETRY_COUNTS): if k_status['pvcName'] and k_status['namespace']: break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] assert k_status['pvName'] == pv_name assert k_status['pvStatus'] == 'Bound' assert k_status['namespace'] == "default" assert k_status['pvcName'] == pvc_name assert not k_status['workloadsStatus'] assert not k_status['lastPVCRefAt'] assert not k_status['lastPodRefAt'] pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': pvc_name, }, }] create_and_wait_pod(core_api, pod) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert k_status['pvName'] == pv_name assert k_status['pvStatus'] == 'Bound' assert len(workloads) == 1 for i in range(RETRY_COUNTS): if workloads[0]['podStatus'] == 'Running': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume["kubernetesStatus"] workloads = k_status['workloadsStatus'] assert len(workloads) == 1 assert workloads[0]['podName'] == pod_name assert workloads[0]['podStatus'] == 'Running' assert not workloads[0]['workloadName'] assert not workloads[0]['workloadType'] assert k_status['namespace'] == 'default' assert k_status['pvcName'] == pvc_name assert not k_status['lastPVCRefAt'] assert not k_status['lastPodRefAt'] delete_and_wait_pod(core_api, pod_name) delete_and_wait_pvc(core_api, pvc_name) wait_delete_pv(core_api, pv_name)
def test_engine_live_upgrade_with_intensive_data_writing( client, core_api, volume_name, pod_make): # NOQA """ Test engine live upgrade with intensive data writing 1. Deploy a compatible new engine image 2. Create a volume(with the old default engine image) with /PV/PVC/Pod and wait for pod to be deployed. 3. Write data to a tmp file in the pod and get the md5sum 4. Upgrade the volume to the new engine image without waiting. 5. Keep copying data from the tmp file to the volume during the live upgrade. 6. Wait until the upgrade completed, verify the volume engine image changed 7. Wait for new replica mode update then check the engine status. 8. Verify all engine and replicas' engine image changed 9. Verify the reference count of the new engine image changed 10. Check the existing data. Then write new data to the upgraded volume and get the md5sum. 11. Delete the pod and wait for the volume detached. Then check engine and replicas's engine image again. 12. Recreate the pod. 13. Check if the attached volume is state `healthy` rather than `degraded`. 14. Check the data. """ default_img = common.get_default_engine_image(client) default_img_name = default_img.name default_img = wait_for_engine_image_ref_count(client, default_img_name, 0) cli_v = default_img.cliAPIVersion cli_minv = default_img.cliAPIMinVersion ctl_v = default_img.controllerAPIVersion ctl_minv = default_img.controllerAPIMinVersion data_v = default_img.dataFormatVersion data_minv = default_img.dataFormatMinVersion engine_upgrade_image = common.get_upgrade_test_image( cli_v, cli_minv, ctl_v, ctl_minv, data_v, data_minv) new_img = client.create_engine_image(image=engine_upgrade_image) new_img_name = new_img.name ei_status_value = get_engine_image_status_value(client, new_img_name) new_img = wait_for_engine_image_state(client, new_img_name, ei_status_value) assert new_img.refCount == 0 assert new_img.noRefSince != "" default_img = common.get_default_engine_image(client) default_img_name = default_img.name pod_name = volume_name + "-pod" pv_name = volume_name + "-pv" pvc_name = volume_name + "-pvc" pod = pod_make(name=pod_name) volume = create_and_check_volume(client, volume_name, num_of_replicas=3, size=str(1 * Gi)) original_engine_image = volume.engineImage assert original_engine_image != engine_upgrade_image create_pv_for_volume(client, core_api, volume, pv_name) create_pvc_for_volume(client, core_api, volume, pvc_name) pod['spec']['volumes'] = [create_pvc_spec(pvc_name)] create_and_wait_pod(core_api, pod) volume = client.by_id_volume(volume_name) assert volume.engineImage == original_engine_image assert volume.currentImage == original_engine_image engine = get_volume_engine(volume) assert engine.engineImage == original_engine_image assert engine.currentImage == original_engine_image for replica in volume.replicas: assert replica.engineImage == original_engine_image assert replica.currentImage == original_engine_image data_path0 = "/tmp/test" data_path1 = "/data/test1" write_pod_volume_random_data(core_api, pod_name, data_path0, RANDOM_DATA_SIZE_LARGE) original_md5sum1 = get_pod_data_md5sum(core_api, pod_name, data_path0) volume.engineUpgrade(image=engine_upgrade_image) # Keep writing data to the volume during the live upgrade copy_pod_volume_data(core_api, pod_name, data_path0, data_path1) # Wait for live upgrade complete wait_for_volume_current_image(client, volume_name, engine_upgrade_image) volume = wait_for_volume_replicas_mode(client, volume_name, "RW") engine = get_volume_engine(volume) assert engine.engineImage == engine_upgrade_image check_volume_endpoint(volume) wait_for_engine_image_ref_count(client, default_img_name, 0) wait_for_engine_image_ref_count(client, new_img_name, 1) volume_file_md5sum1 = get_pod_data_md5sum(core_api, pod_name, data_path1) assert volume_file_md5sum1 == original_md5sum1 data_path2 = "/data/test2" write_pod_volume_random_data(core_api, pod_name, data_path2, RANDOM_DATA_SIZE_SMALL) original_md5sum2 = get_pod_data_md5sum(core_api, pod_name, data_path2) delete_and_wait_pod(core_api, pod_name) volume = wait_for_volume_detached(client, volume_name) assert len(volume.replicas) == 3 assert volume.engineImage == engine_upgrade_image engine = get_volume_engine(volume) assert engine.engineImage == engine_upgrade_image for replica in volume.replicas: assert replica.engineImage == engine_upgrade_image create_and_wait_pod(core_api, pod) common.wait_for_volume_healthy(client, volume_name) volume_file_md5sum1 = get_pod_data_md5sum(core_api, pod_name, data_path1) assert volume_file_md5sum1 == original_md5sum1 volume_file_md5sum2 = get_pod_data_md5sum(core_api, pod_name, data_path2) assert volume_file_md5sum2 == original_md5sum2
def test_rwx_multi_statefulset_with_same_pvc(core_api, pvc, statefulset, pod): # NOQA """ Test writing of data into a volume from multiple pods using same PVC 1. Create a volume with 'accessMode' rwx. 2. Create a PV and a PVC with access mode 'readwritemany' and attach to the volume. 3. Deploy a StatefulSet of 2 pods with the existing PVC above created. 4. Wait for both pods to come up. 5. Create a pod with the existing PVC above created. 6. Wait for StatefulSet to come up healthy. 7. Write data all three pods and compute md5sum. 8. Check the data md5sum in the share manager pod. """ pvc_name = 'pvc-multi-pods-test' statefulset_name = 'statefulset-rwx-same-pvc-test' pod_name = 'pod-rwx-same-pvc-test' pvc['metadata']['name'] = pvc_name pvc['spec']['storageClassName'] = 'longhorn' pvc['spec']['accessModes'] = ['ReadWriteMany'] core_api.create_namespaced_persistent_volume_claim(body=pvc, namespace='default') statefulset['metadata']['name'] = \ statefulset['spec']['selector']['matchLabels']['app'] = \ statefulset['spec']['serviceName'] = \ statefulset['spec']['template']['metadata']['labels']['app'] = \ statefulset_name statefulset['spec']['template']['spec']['volumes'] = \ [create_pvc_spec(pvc_name)] del statefulset['spec']['volumeClaimTemplates'] create_and_wait_statefulset(statefulset) pv_name = get_volume_name(core_api, pvc_name) share_manager_name = 'share-manager-' + pv_name test_data = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, statefulset_name + '-0', test_data, filename='test1') assert test_data == read_volume_data(core_api, statefulset_name + '-1', filename='test1') pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [create_pvc_spec(pvc_name)] create_and_wait_pod(core_api, pod) assert test_data == read_volume_data(core_api, pod_name, filename='test1') test_data_2 = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, pod_name, test_data_2, filename='test2') command1 = 'cat /export' + '/' + pv_name + '/' + 'test1' command2 = 'cat /export' + '/' + pv_name + '/' + 'test2' assert test_data == exec_command_in_pod(core_api, command1, share_manager_name, LONGHORN_NAMESPACE) assert test_data_2 == exec_command_in_pod(core_api, command2, share_manager_name, LONGHORN_NAMESPACE)
def test_csi_block_volume(client, core_api, storage_class, pvc, pod_manifest): # NOQA """ Test CSI feature: raw block volume 1. Create a PVC with `volumeMode = Block` 2. Create a pod using the PVC to dynamic provision a volume 3. Verify the pod creation 4. Generate `test_data` and write to the block volume directly in the pod 5. Read the data back for validation 6. Delete the pod and create `pod2` to use the same volume 7. Validate the data in `pod2` is consistent with `test_data` """ pod_name = 'csi-block-volume-test' pvc_name = pod_name + "-pvc" device_path = "/dev/longhorn/longhorn-test-blk" storage_class['reclaimPolicy'] = 'Retain' pvc['metadata']['name'] = pvc_name pvc['spec']['volumeMode'] = 'Block' pvc['spec']['storageClassName'] = storage_class['metadata']['name'] pvc['spec']['resources'] = { 'requests': { 'storage': size_to_string(1 * Gi) } } pod_manifest['metadata']['name'] = pod_name pod_manifest['spec']['volumes'] = [{ 'name': 'longhorn-blk', 'persistentVolumeClaim': { 'claimName': pvc_name, }, }] pod_manifest['spec']['containers'][0]['volumeMounts'] = [] pod_manifest['spec']['containers'][0]['volumeDevices'] = [ {'name': 'longhorn-blk', 'devicePath': device_path} ] create_storage_class(storage_class) create_pvc(pvc) pv_name = wait_and_get_pv_for_pvc(core_api, pvc_name).metadata.name create_and_wait_pod(core_api, pod_manifest) test_data = generate_random_data(VOLUME_RWTEST_SIZE) test_offset = random.randint(0, VOLUME_RWTEST_SIZE) write_pod_block_volume_data( core_api, pod_name, test_data, test_offset, device_path) returned_data = read_pod_block_volume_data( core_api, pod_name, len(test_data), test_offset, device_path ) assert test_data == returned_data md5_sum = get_pod_data_md5sum( core_api, pod_name, device_path) delete_and_wait_pod(core_api, pod_name) common.wait_for_volume_detached(client, pv_name) pod_name_2 = 'csi-block-volume-test-reuse' pod_manifest['metadata']['name'] = pod_name_2 create_and_wait_pod(core_api, pod_manifest) returned_data = read_pod_block_volume_data( core_api, pod_name_2, len(test_data), test_offset, device_path ) assert test_data == returned_data md5_sum_2 = get_pod_data_md5sum( core_api, pod_name_2, device_path) assert md5_sum == md5_sum_2 delete_and_wait_pod(core_api, pod_name_2) delete_and_wait_pvc(core_api, pvc_name) delete_and_wait_pv(core_api, pv_name)
def test_backup_kubernetes_status(client, core_api, pod): # NOQA """ Test that Backups have KubernetesStatus stored properly when there is an associated PersistentVolumeClaim and Pod. """ host_id = get_self_host_id() static_sc_name = "longhorn-static-test" setting = client.by_id_setting(SETTING_DEFAULT_LONGHORN_STATIC_SC) setting = client.update(setting, value=static_sc_name) assert setting["value"] == static_sc_name volume_name = "test-backup-kubernetes-status-pod" client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = wait_for_volume_detached(client, volume_name) pod_name = "pod-" + volume_name pv_name = "pv-" + volume_name pvc_name = "pvc-" + volume_name create_pv_for_volume(client, core_api, volume, pv_name) create_pvc_for_volume(client, core_api, volume, pvc_name) ret = core_api.list_namespaced_persistent_volume_claim(namespace='default') pvc_found = False for item in ret.items: if item.metadata.name == pvc_name: pvc_found = item break assert pvc_found assert pvc_found.spec.storage_class_name == static_sc_name pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': pvc_name, }, }] create_and_wait_pod(core_api, pod) ks = { 'lastPodRefAt': '', 'lastPVCRefAt': '', 'namespace': 'default', 'pvcName': pvc_name, 'pvName': pv_name, 'pvStatus': 'Bound', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, volume_name, ks) volume = wait_for_volume_healthy(client, volume_name) # Create Backup manually instead of calling create_backup since Kubernetes # is not guaranteed to mount our Volume to the test host. snap = volume.snapshotCreate() volume.snapshotBackup(name=snap["name"]) bv, b = find_backup(client, volume_name, snap["name"]) new_b = bv.backupGet(name=b["name"]) status = loads(new_b["labels"].get(KUBERNETES_STATUS_LABEL)) assert status == ks restore_name = generate_volume_name() client.create_volume(name=restore_name, size=SIZE, numberOfReplicas=2, fromBackup=b["url"]) wait_for_volume_restoration_completed(client, restore_name) wait_for_volume_detached(client, restore_name) snapshot_created = b["snapshotCreated"] ks = { 'lastPodRefAt': b["snapshotCreated"], 'lastPVCRefAt': b["snapshotCreated"], 'namespace': 'default', 'pvcName': pvc_name, # Restoration should not apply PersistentVolume data. 'pvName': '', 'pvStatus': '', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, restore_name, ks) restore = client.by_id_volume(restore_name) # We need to compare LastPodRefAt and LastPVCRefAt manually since # wait_volume_kubernetes_status only checks for empty or non-empty state. assert restore["kubernetesStatus"]["lastPodRefAt"] == ks["lastPodRefAt"] assert restore["kubernetesStatus"]["lastPVCRefAt"] == ks["lastPVCRefAt"] bv.backupDelete(name=b["name"]) client.delete(restore) wait_for_volume_delete(client, restore_name) delete_and_wait_pod(core_api, pod_name) delete_and_wait_pvc(core_api, pvc_name) delete_and_wait_pv(core_api, pv_name) # With the Pod, PVC, and PV deleted, the Volume should have both Ref # fields set. Check that a new Backup and Restore will use this instead of # manually populating the Ref fields. ks = { 'lastPodRefAt': 'NOT NULL', 'lastPVCRefAt': 'NOT NULL', 'namespace': 'default', 'pvcName': pvc_name, 'pvName': '', 'pvStatus': '', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, volume_name, ks) volume = wait_for_volume_detached(client, volume_name) volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) snap = volume.snapshotCreate() volume.snapshotBackup(name=snap["name"]) bv, b = find_backup(client, volume_name, snap["name"]) new_b = bv.backupGet(name=b["name"]) status = loads(new_b["labels"].get(KUBERNETES_STATUS_LABEL)) # Check each field manually, we have no idea what the LastPodRefAt or the # LastPVCRefAt will be. We just know it shouldn't be SnapshotCreated. assert status["lastPodRefAt"] != snapshot_created assert status["lastPVCRefAt"] != snapshot_created assert status["namespace"] == "default" assert status["pvcName"] == pvc_name assert status["pvName"] == "" assert status["pvStatus"] == "" assert status["workloadsStatus"] == [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] restore_name = generate_volume_name() client.create_volume(name=restore_name, size=SIZE, numberOfReplicas=2, fromBackup=b["url"]) wait_for_volume_restoration_completed(client, restore_name) wait_for_volume_detached(client, restore_name) ks = { 'lastPodRefAt': status["lastPodRefAt"], 'lastPVCRefAt': status["lastPVCRefAt"], 'namespace': 'default', 'pvcName': pvc_name, 'pvName': '', 'pvStatus': '', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, restore_name, ks) restore = client.by_id_volume(restore_name) assert restore["kubernetesStatus"]["lastPodRefAt"] == ks["lastPodRefAt"] assert restore["kubernetesStatus"]["lastPVCRefAt"] == ks["lastPVCRefAt"] bv.backupDelete(name=b["name"]) client.delete(restore) cleanup_volume(client, volume)
def test_csi_expansion_with_replica_failure(client, core_api, storage_class, pvc, pod_manifest): # NOQA """ Test expansion success but with one replica expansion failure 1. Create a new `storage_class` with `allowVolumeExpansion` set 2. Create PVC and Pod with dynamic provisioned volume from the StorageClass 3. Create an empty directory with expansion snapshot tmp meta file path for one replica so that the replica expansion will fail 4. Generate `test_data` and write to the pod 5. Delete the pod and wait for volume detachment 6. Update pvc.spec.resources to expand the volume 7. Check expansion result using Longhorn API. There will be expansion error caused by the failed replica but overall the expansion should succeed. 8. Create a new pod and check if the volume will rebuild the failed replica 9. Validate the volume content, then check if data writing looks fine """ create_storage_class(storage_class) pod_name = 'csi-expansion-with-replica-failure-test' pvc_name = pod_name + "-pvc" pvc['metadata']['name'] = pvc_name pvc['spec']['storageClassName'] = storage_class['metadata']['name'] create_pvc(pvc) pod_manifest['metadata']['name'] = pod_name pod_manifest['spec']['volumes'] = [{ 'name': pod_manifest['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': {'claimName': pvc_name}, }] create_and_wait_pod(core_api, pod_manifest) expand_size = str(EXPANDED_VOLUME_SIZE*Gi) pv = wait_and_get_pv_for_pvc(core_api, pvc_name) assert pv.status.phase == "Bound" volume_name = pv.spec.csi.volume_handle volume = client.by_id_volume(volume_name) failed_replica = volume.replicas[0] fail_replica_expansion(client, core_api, volume_name, expand_size, [failed_replica]) test_data = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, pod_name, test_data) delete_and_wait_pod(core_api, pod_name) wait_for_volume_detached(client, volume_name) # There will be replica expansion error info # but the expansion should succeed. pvc['spec']['resources'] = { 'requests': { 'storage': size_to_string(EXPANDED_VOLUME_SIZE*Gi) } } expand_and_wait_for_pvc(core_api, pvc) wait_for_expansion_failure(client, volume_name) wait_for_volume_expansion(client, volume_name) volume = client.by_id_volume(volume_name) assert volume.state == "detached" assert volume.size == expand_size for r in volume.replicas: if r.name == failed_replica.name: assert r.failedAt != "" else: assert r.failedAt == "" # Check if the replica will be rebuilded # and if the volume still works fine. create_and_wait_pod(core_api, pod_manifest) volume = wait_for_volume_healthy(client, volume_name) for r in volume.replicas: if r.name == failed_replica.name: assert r.mode == "" else: assert r.mode == "RW" resp = read_volume_data(core_api, pod_name) assert resp == test_data test_data = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, pod_name, test_data) resp = read_volume_data(core_api, pod_name) assert resp == test_data
def test_cloning_basic(client, core_api, pvc, pod, clone_pvc, clone_pod, storage_class_name='longhorn'): # NOQA """ 1. Create a PVC: ```yaml apiVersion: v1 kind: PersistentVolumeClaim metadata: name: source-pvc spec: storageClassName: longhorn accessModes: - ReadWriteOnce resources: requests: storage: 3Gi ``` 2. Specify the `source-pvc` in a pod yaml and start the pod 3. Wait for the pod to be running, write some data to the mount path of the volume 4. Clone a volume by creating the PVC: ```yaml apiVersion: v1 kind: PersistentVolumeClaim metadata: name: cloned-pvc spec: storageClassName: longhorn dataSource: name: source-pvc kind: PersistentVolumeClaim accessModes: - ReadWriteOnce resources: requests: storage: 3Gi ``` 5. Wait for the `CloneStatus.State` in `cloned-pvc` to be `completed` 6. Clone volume should get detached after cloning completion, wait for it. 7. Specify the `cloned-pvc` in a cloned pod yaml and deploy the cloned pod 8. In 3-min retry loop, wait for the cloned pod to be running 9. Verify the data in `cloned-pvc` is the same as in `source-pvc` 10. In 2-min retry loop, verify the volume of the `clone-pvc` eventually becomes healthy """ # Step-1 source_pvc_name = 'source-pvc' + generate_random_suffix() pvc['metadata']['name'] = source_pvc_name pvc['spec']['storageClassName'] = storage_class_name core_api.create_namespaced_persistent_volume_claim( body=pvc, namespace='default') wait_for_pvc_phase(core_api, source_pvc_name, "Bound") # Step-2 pod_name = 'source-pod' + generate_random_suffix() pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [create_pvc_spec(source_pvc_name)] create_and_wait_pod(core_api, pod) # Step-3 write_pod_volume_random_data(core_api, pod_name, '/data/test', DATA_SIZE_IN_MB_2) source_data = get_pod_data_md5sum(core_api, pod_name, '/data/test') # Step-4 clone_pvc_name = 'clone-pvc' + generate_random_suffix() clone_pvc['metadata']['name'] = clone_pvc_name clone_pvc['spec']['storageClassName'] = storage_class_name clone_pvc['spec']['dataSource'] = { 'name': source_pvc_name, 'kind': 'PersistentVolumeClaim' } core_api.create_namespaced_persistent_volume_claim( body=clone_pvc, namespace='default') wait_for_pvc_phase(core_api, clone_pvc_name, "Bound") # Step-5 clone_volume_name = get_volume_name(core_api, clone_pvc_name) wait_for_volume_clone_status(client, clone_volume_name, VOLUME_FIELD_STATE, VOLUME_FIELD_CLONE_COMPLETED) # Step-6 wait_for_volume_detached(client, clone_volume_name) # Step-7,8 clone_pod_name = 'clone-pod' + generate_random_suffix() clone_pod['metadata']['name'] = clone_pod_name clone_pod['spec']['volumes'] = [create_pvc_spec(clone_pvc_name)] create_and_wait_pod(core_api, clone_pod) clone_data = get_pod_data_md5sum(core_api, clone_pod_name, '/data/test') # Step-9 assert source_data == clone_data # Step-10 wait_for_volume_healthy(client, clone_volume_name)
def test_pvc_creation_with_default_sc_set(client, core_api, storage_class, pod): # NOQA """ Test creating PVC with default StorageClass set The target is to make sure the newly create PV/PVC won't use default StorageClass, and if there is no default StorageClass, PV/PVC can still be created. 1. Create a StorageClass and set it to be the default StorageClass 2. Update static StorageClass to `longhorn-static-test` 3. Create volume then PV/PVC. 4. Make sure the newly created PV/PVC using StorageClass `longhorn-static-test` 5. Create pod with PVC. 6. Verify volume's Kubernetes Status 7. Remove PVC and Pod. 8. Verify volume's Kubernetes Status only contains current PV and history 9. Wait for volume to detach (since pod is deleted) 10. Reuse the volume on a new pod. Wait for the pod to start 11. Verify volume's Kubernetes Status reflect the new pod. 12. Delete PV/PVC/Pod. 13. Verify volume's Kubernetes Status only contains history 14. Delete the default StorageClass. 15. Create PV/PVC for the volume. 16. Make sure the PV's StorageClass is static StorageClass """ # set default storage class storage_class['metadata']['annotations'] = \ {"storageclass.kubernetes.io/is-default-class": "true"} create_storage_class(storage_class) static_sc_name = "longhorn-static-test" setting = client.by_id_setting(SETTING_DEFAULT_LONGHORN_STATIC_SC) setting = client.update(setting, value=static_sc_name) assert setting.value == static_sc_name volume_name = "test-pvc-creation-with-sc" # NOQA pod_name = "pod-" + volume_name client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = wait_for_volume_detached(client, volume_name) pv_name = "pv-" + volume_name pvc_name = "pvc-" + volume_name pvc_name_extra = "pvc-" + volume_name + "-extra" create_pv_for_volume(client, core_api, volume, pv_name) create_pvc_for_volume(client, core_api, volume, pvc_name) ret = core_api.list_namespaced_persistent_volume_claim(namespace='default') for item in ret.items: if item.metadata.name == pvc_name: pvc_found = item break assert pvc_found assert pvc_found.spec.storage_class_name == static_sc_name pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': pvc_name, }, }] create_and_wait_pod(core_api, pod) ks = { 'pvName': pv_name, 'pvStatus': 'Bound', 'namespace': 'default', 'pvcName': pvc_name, 'lastPVCRefAt': '', 'lastPodRefAt': '', 'workloadsStatus': [ { 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '', }, ], } wait_volume_kubernetes_status(client, volume_name, ks) delete_and_wait_pod(core_api, pod_name) delete_and_wait_pvc(core_api, pvc_name) ks = { 'pvName': pv_name, 'pvStatus': 'Released', 'namespace': 'default', 'pvcName': pvc_name, 'lastPVCRefAt': 'not empty', 'lastPodRefAt': 'not empty', } wait_volume_kubernetes_status(client, volume_name, ks) # try to reuse the pv volume = wait_for_volume_detached(client, volume_name) create_pvc_for_volume(client, core_api, volume, pvc_name_extra) pod['spec']['volumes'][0]['persistentVolumeClaim']['claimName'] = \ pvc_name_extra create_and_wait_pod(core_api, pod) ks = { 'pvName': pv_name, 'pvStatus': 'Bound', 'namespace': 'default', 'pvcName': pvc_name_extra, 'lastPVCRefAt': '', 'lastPodRefAt': '', 'workloadsStatus': [ { 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '', }, ], } wait_volume_kubernetes_status(client, volume_name, ks) delete_and_wait_pod(core_api, pod_name) delete_and_wait_pvc(core_api, pvc_name_extra) delete_and_wait_pv(core_api, pv_name) ks = { 'pvName': '', 'pvStatus': '', 'namespace': 'default', 'pvcName': pvc_name_extra, 'lastPVCRefAt': 'not empty', 'lastPodRefAt': 'not empty', } wait_volume_kubernetes_status(client, volume_name, ks) # without default storage class delete_storage_class(storage_class['metadata']['name']) create_pv_for_volume(client, core_api, volume, pv_name) create_pvc_for_volume(client, core_api, volume, pvc_name) ret = core_api.list_namespaced_persistent_volume_claim(namespace='default') for item in ret.items: if item.metadata.name == pvc_name: pvc2 = item break assert pvc2 assert pvc2.spec.storage_class_name == static_sc_name delete_and_wait_pvc(core_api, pvc_name) delete_and_wait_pv(core_api, pv_name)
def test_csi_minimal_volume_size( client, core_api, csi_pv, pvc, pod_make): # NOQA """ Test CSI Minimal Volume Size 1. Create a PVC requesting size 5MiB. Check the PVC requested size is 5MiB and capacity size get is 10MiB. 2. Remove the PVC. 3. Create a PVC requesting size 10MiB. Check the PVC requested size and capacity size get are both 10MiB. 4. Create a pod to use this PVC. 5. Write some data to the volume and read it back to compare. """ vol_name = generate_volume_name() create_and_check_volume(client, vol_name, size=str(100*Mi)) low_storage = str(5*Mi) min_storage = str(10*Mi) pv_name = vol_name + "-pv" csi_pv['metadata']['name'] = pv_name csi_pv['spec']['csi']['volumeHandle'] = vol_name csi_pv['spec']['capacity']['storage'] = min_storage core_api.create_persistent_volume(csi_pv) pvc_name = vol_name + "-pvc" pvc['metadata']['name'] = pvc_name pvc['spec']['volumeName'] = pv_name pvc['spec']['resources']['requests']['storage'] = low_storage pvc['spec']['storageClassName'] = '' core_api.create_namespaced_persistent_volume_claim(body=pvc, namespace='default') claim = common.wait_for_pvc_phase(core_api, pvc_name, "Bound") assert claim.spec.resources.requests['storage'] == low_storage assert claim.status.capacity['storage'] == min_storage common.delete_and_wait_pvc(core_api, pvc_name) common.delete_and_wait_pv(core_api, pv_name) wait_for_volume_detached(client, vol_name) core_api.create_persistent_volume(csi_pv) pvc['spec']['resources']['requests']['storage'] = min_storage core_api.create_namespaced_persistent_volume_claim(body=pvc, namespace='default') claim = common.wait_for_pvc_phase(core_api, pvc_name, "Bound") assert claim.spec.resources.requests['storage'] == min_storage assert claim.status.capacity['storage'] == min_storage pod_name = vol_name + '-pod' pod = pod_make(name=pod_name) pod['spec']['volumes'] = [create_pvc_spec(pvc_name)] create_and_wait_pod(core_api, pod) test_data = "longhorn-integration-test" test_file = "test" write_pod_volume_data(core_api, pod_name, test_data, test_file) read_data = read_volume_data(core_api, pod_name, test_file) assert read_data == test_data
def test_pvc_creation_with_default_sc_set(client, core_api, storage_class, pod): # NOQA # set default storage class storage_class['metadata']['annotations'] = \ {"storageclass.kubernetes.io/is-default-class": "true"} create_storage_class(storage_class) static_sc_name = "longhorn-static-test" setting = client.by_id_setting(SETTING_DEFAULT_LONGHORN_STATIC_SC) setting = client.update(setting, value=static_sc_name) assert setting["value"] == static_sc_name volume_name = "test-pvc-creation-with-sc" pod_name = "pod-" + volume_name client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = wait_for_volume_detached(client, volume_name) pv_name = "pv-" + volume_name pvc_name = "pvc-" + volume_name pvc_name_extra = "pvc-" + volume_name + "-extra" create_pv_for_volume(client, core_api, volume, pv_name) create_pvc_for_volume(client, core_api, volume, pvc_name) ret = core_api.list_namespaced_persistent_volume_claim(namespace='default') for item in ret.items: if item.metadata.name == pvc_name: pvc_found = item break assert pvc_found assert pvc_found.spec.storage_class_name == static_sc_name pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': pvc_name, }, }] create_and_wait_pod(core_api, pod) ks = { 'pvName': pv_name, 'pvStatus': 'Bound', 'namespace': 'default', 'pvcName': pvc_name, 'lastPVCRefAt': '', 'lastPodRefAt': '', 'workloadsStatus': [ { 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '', }, ], } wait_volume_kubernetes_status(client, volume_name, ks) delete_and_wait_pod(core_api, pod_name) delete_and_wait_pvc(core_api, pvc_name) # try to reuse the pv volume = wait_for_volume_detached(client, volume_name) create_pvc_for_volume(client, core_api, volume, pvc_name_extra) pod['spec']['volumes'][0]['persistentVolumeClaim']['claimName'] = \ pvc_name_extra create_and_wait_pod(core_api, pod) ks['pvcName'] = pvc_name_extra wait_volume_kubernetes_status(client, volume_name, ks) delete_and_wait_pod(core_api, pod_name) delete_and_wait_pvc(core_api, pvc_name_extra) delete_and_wait_pv(core_api, pv_name) # without default storage class delete_storage_class(storage_class['metadata']['name']) create_pv_for_volume(client, core_api, volume, pv_name) create_pvc_for_volume(client, core_api, volume, pvc_name) ret = core_api.list_namespaced_persistent_volume_claim(namespace='default') for item in ret.items: if item.metadata.name == pvc_name: pvc2 = item break assert pvc2 assert pvc2.spec.storage_class_name == static_sc_name delete_and_wait_pvc(core_api, pvc_name) delete_and_wait_pv(core_api, pv_name)
def test_csi_offline_expansion(client, core_api, storage_class, pvc, pod_manifest): # NOQA """ Test CSI feature: offline expansion 1. Create a new `storage_class` with `allowVolumeExpansion` set 2. Create PVC and Pod with dynamic provisioned volume from the StorageClass 3. Generate `test_data` and write to the pod 4. Delete the pod 5. Update pvc.spec.resources to expand the volume 6. Verify the volume expansion done using Longhorn API 7. Create a new pod and validate the volume content """ create_storage_class(storage_class) pod_name = 'csi-offline-expand-volume-test' pvc_name = pod_name + "-pvc" pvc['metadata']['name'] = pvc_name pvc['spec']['storageClassName'] = storage_class['metadata']['name'] create_pvc(pvc) pod_manifest['metadata']['name'] = pod_name pod_manifest['spec']['volumes'] = [{ 'name': pod_manifest['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': {'claimName': pvc_name}, }] create_and_wait_pod(core_api, pod_manifest) test_data = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, pod_name, test_data) delete_and_wait_pod(core_api, pod_name) pv = wait_and_get_pv_for_pvc(core_api, pvc_name) assert pv.status.phase == "Bound" volume_name = pv.spec.csi.volume_handle wait_for_volume_detached(client, volume_name) pvc['spec']['resources'] = { 'requests': { 'storage': size_to_string(EXPANDED_VOLUME_SIZE*Gi) } } expand_and_wait_for_pvc(core_api, pvc) wait_for_volume_expansion(client, volume_name) volume = client.by_id_volume(volume_name) assert volume.state == "detached" assert volume.size == str(EXPANDED_VOLUME_SIZE*Gi) pod_manifest['metadata']['name'] = pod_name pod_manifest['spec']['volumes'] = [{ 'name': pod_manifest['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': {'claimName': pvc_name}, }] create_and_wait_pod(core_api, pod_manifest) resp = read_volume_data(core_api, pod_name) assert resp == test_data volume = client.by_id_volume(volume_name) engine = get_volume_engine(volume) assert volume.size == str(EXPANDED_VOLUME_SIZE*Gi) assert volume.size == engine.size
def test_upgrade(upgrade_image_tag, settings_reset, volume_name, pod_make, statefulset, storage_class): # NOQA """ Test Longhorn upgrade Prerequisite: - Disable Auto Salvage Setting 1. Find the upgrade image tag 2. Create a volume, generate and write data into the volume. 3. Create a Pod using a volume, generate and write data 4. Create a StatefulSet with 2 replicas, generate and write data to their volumes 5. Keep all volumes attached 6. Upgrade Longhorn system. 7. Check Pod and StatefulSet didn't restart after upgrade 8. Check All volumes data 9. Write data to StatefulSet pods, and Attached volume 10. Check data written to StatefulSet pods, and attached volume. 11. Detach the volume, and Delete Pod, and StatefulSet to detach theirvolumes 12. Upgrade all volumes engine images. 13. Attach the volume, and recreate Pod, and StatefulSet 14. Check All volumes data """ new_ei_name = "longhornio/longhorn-engine:" + upgrade_image_tag client = get_longhorn_api_client() core_api = get_core_api_client() host_id = get_self_host_id() pod_data_path = "/data/test" pod_volume_name = generate_volume_name() auto_salvage_setting = client.by_id_setting(SETTING_AUTO_SALVAGE) setting = client.update(auto_salvage_setting, value="false") assert setting.name == SETTING_AUTO_SALVAGE assert setting.value == "false" # Create Volume attached to a node. volume1 = create_and_check_volume(client, volume_name, size=SIZE) volume1.attach(hostId=host_id) volume1 = wait_for_volume_healthy(client, volume_name) volume1_data = write_volume_random_data(volume1) # Create Volume used by Pod pod_name, pv_name, pvc_name, pod_md5sum = \ prepare_pod_with_data_in_mb(client, core_api, pod_make, pod_volume_name, data_path=pod_data_path, add_liveness_prope=False) # Create multiple volumes used by StatefulSet statefulset_name = 'statefulset-upgrade-test' update_statefulset_manifests(statefulset, storage_class, statefulset_name) create_storage_class(storage_class) create_and_wait_statefulset(statefulset) statefulset_pod_info = get_statefulset_pod_info(core_api, statefulset) for sspod_info in statefulset_pod_info: sspod_info['data'] = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, sspod_info['pod_name'], sspod_info['data']) # upgrade Longhorn assert longhorn_upgrade(upgrade_image_tag) client = get_longhorn_api_client() # wait for 1 minute before checking pod restarts time.sleep(60) pod = core_api.read_namespaced_pod(name=pod_name, namespace='default') assert pod.status.container_statuses[0].restart_count == 0 for sspod_info in statefulset_pod_info: sspod = core_api.read_namespaced_pod(name=sspod_info['pod_name'], namespace='default') assert \ sspod.status.container_statuses[0].restart_count == 0 for sspod_info in statefulset_pod_info: resp = read_volume_data(core_api, sspod_info['pod_name']) assert resp == sspod_info['data'] res_pod_md5sum = get_pod_data_md5sum(core_api, pod_name, pod_data_path) assert res_pod_md5sum == pod_md5sum check_volume_data(volume1, volume1_data) for sspod_info in statefulset_pod_info: sspod_info['data'] = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, sspod_info['pod_name'], sspod_info['data']) for sspod_info in statefulset_pod_info: resp = read_volume_data(core_api, sspod_info['pod_name']) assert resp == sspod_info['data'] volume1 = client.by_id_volume(volume_name) volume1_data = write_volume_random_data(volume1) check_volume_data(volume1, volume1_data) statefulset['spec']['replicas'] = replicas = 0 apps_api = get_apps_api_client() apps_api.patch_namespaced_stateful_set( name=statefulset_name, namespace='default', body={ 'spec': { 'replicas': replicas } }) delete_and_wait_pod(core_api, pod_name) volume = client.by_id_volume(volume_name) volume.detach() volumes = client.list_volume() for v in volumes: wait_for_volume_detached(client, v.name) engineimages = client.list_engine_image() for ei in engineimages: if ei.image == new_ei_name: new_ei = ei volumes = client.list_volume() for v in volumes: volume = client.by_id_volume(v.name) volume.engineUpgrade(image=new_ei.image) statefulset['spec']['replicas'] = replicas = 2 apps_api = get_apps_api_client() apps_api.patch_namespaced_stateful_set( name=statefulset_name, namespace='default', body={ 'spec': { 'replicas': replicas } }) wait_statefulset(statefulset) pod = pod_make(name=pod_name) pod['spec']['volumes'] = [create_pvc_spec(pvc_name)] create_and_wait_pod(core_api, pod) volume1 = client.by_id_volume(volume_name) volume1.attach(hostId=host_id) volume1 = wait_for_volume_healthy(client, volume_name) for sspod_info in statefulset_pod_info: resp = read_volume_data(core_api, sspod_info['pod_name']) assert resp == sspod_info['data'] res_pod_md5sum = get_pod_data_md5sum(core_api, pod_name, pod_data_path) assert res_pod_md5sum == pod_md5sum check_volume_data(volume1, volume1_data)
def test_kubernetes_status( client, core_api, storage_class, # NOQA statefulset, csi_pv, pvc, pod): # NOQA """ Test Volume feature: Kubernetes Status 1. Create StorageClass with `reclaimPolicy = Retain` 2. Create a statefulset `kubernetes-status-test` with the StorageClass 1. The statefulset has scale of 2. 3. Get the volume name from the SECOND pod of the StateufulSet pod and create an `extra_pod` with the same volume on the same node 4. Check the volumes that used by the StatefulSet 1. The volume used by the FIRST StatefulSet pod will have one workload 2. The volume used by the SECOND StatefulSet pod will have two workloads 3. Validate related status, e.g. pv/pod name/state, workload name/type 5. Check the volumes again 1. PV/PVC should still be bound 2. The volume used by the FIRST pod should have history data 3. The volume used by the SECOND and extra pod should have current data point to the extra pod 6. Delete the extra pod 1. Now all the volume's should only have history data(`lastPodRefAt` set) 7. Delete the PVC 1. PVC should be updated with status `Released` and become history data 8. Delete PV 1. All the Kubernetes status information should be cleaned up. 9. Reuse the two Longhorn volumes to create new pods 1. Since the `reclaimPolicy == Retain`, volume won't be deleted by Longhorn 2. Check the Kubernetes status now updated, with pod info but empty workload 3. Default Longhorn Static StorageClass will remove the PV with PVC, but leave Longhorn volume """ statefulset_name = 'kubernetes-status-test' update_statefulset_manifests(statefulset, storage_class, statefulset_name) storage_class['reclaimPolicy'] = 'Retain' create_storage_class(storage_class) create_and_wait_statefulset(statefulset) pod_info = get_statefulset_pod_info(core_api, statefulset) volume_info = [p['pv_name'] for p in pod_info] extra_pod_name = 'extra-pod-using-' + volume_info[1] pod['metadata']['name'] = extra_pod_name p2 = core_api.read_namespaced_pod(name=pod_info[1]['pod_name'], namespace='default') pod['spec']['nodeName'] = p2.spec.node_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': pod_info[1]['pvc_name'], }, }] create_and_wait_pod(core_api, pod) for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] # NOQA volume = client.by_id_volume(volume_name) k_status = volume.kubernetesStatus workloads = k_status.workloadsStatus assert k_status.pvName == p['pv_name'] assert k_status.pvStatus == 'Bound' assert k_status.namespace == 'default' assert k_status.pvcName == p['pvc_name'] assert not k_status.lastPVCRefAt assert not k_status.lastPodRefAt if i == 0: assert len(workloads) == 1 assert workloads[0].podName == p['pod_name'] assert workloads[0].workloadName == statefulset_name assert workloads[0].workloadType == 'StatefulSet' for _ in range(RETRY_COUNTS): if workloads[0].podStatus == 'Running': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume.kubernetesStatus workloads = k_status.workloadsStatus assert workloads[0].podStatus == 'Running' if i == 1: assert len(k_status.workloadsStatus) == 2 if workloads[0].podName == pod_info[i]['pod_name']: assert workloads[1].podName == extra_pod_name assert workloads[0].workloadName == statefulset_name assert workloads[0].workloadType == 'StatefulSet' assert not workloads[1].workloadName assert not workloads[1].workloadType else: assert workloads[1].podName == pod_info[i]['pod_name'] assert workloads[0].podName == extra_pod_name assert not workloads[0].workloadName assert not workloads[0].workloadType assert workloads[1].workloadName == statefulset_name assert workloads[1].workloadType == 'StatefulSet' for _ in range(RETRY_COUNTS): if workloads[0].podStatus == 'Running' and \ workloads[1].podStatus == 'Running': break time.sleep(RETRY_INTERVAL) volume = client.by_id_volume(volume_name) k_status = volume.kubernetesStatus workloads = k_status.workloadsStatus assert len(workloads) == 2 assert workloads[0].podStatus == 'Running' assert workloads[1].podStatus == 'Running' ks_list = [{}, {}] delete_and_wait_statefulset_only(core_api, statefulset) # the extra pod is still using the 2nd volume for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] ks_list[i]['pvName'] = p['pv_name'] ks_list[i]['pvStatus'] = 'Bound' ks_list[i]['namespace'] = 'default' ks_list[i]['pvcName'] = p['pvc_name'] ks_list[i]['lastPVCRefAt'] = '' if i == 0: ks_list[i]['lastPodRefAt'] = 'not empty' ks_list[i]['workloadsStatus'] = [ { 'podName': p['pod_name'], 'podStatus': 'Running', 'workloadName': statefulset_name, 'workloadType': 'StatefulSet', }, ] if i == 1: ks_list[i]['lastPodRefAt'] = '' ks_list[i]['workloadsStatus'] = [{ 'podName': extra_pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '', }] wait_volume_kubernetes_status(client, volume_name, ks_list[i]) # deleted extra_pod, all volumes have no workload delete_and_wait_pod(core_api, pod['metadata']['name']) for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] ks_list[i]['lastPodRefAt'] = 'not empty' wait_volume_kubernetes_status(client, volume_name, ks_list[i]) # deleted pvc only. for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] delete_and_wait_pvc(core_api, p['pvc_name']) ks_list[i]['pvStatus'] = 'Released' ks_list[i]['lastPVCRefAt'] = 'not empty' wait_volume_kubernetes_status(client, volume_name, ks_list[i]) # deleted pv only. for i in range(len(volume_info)): p, volume_name = pod_info[i], volume_info[i] delete_and_wait_pv(core_api, p['pv_name']) ks_list[i]['pvName'] = '' ks_list[i]['pvStatus'] = '' wait_volume_kubernetes_status(client, volume_name, ks_list[i]) # reuse that volume for p, volume_name in zip(pod_info, volume_info): p['pod_name'] = p['pod_name'].replace('kubernetes-status-test', 'kubernetes-status-test-reuse') p['pvc_name'] = p['pvc_name'].replace('kubernetes-status-test', 'kubernetes-status-test-reuse') p['pv_name'] = p['pvc_name'] csi_pv['metadata']['name'] = p['pv_name'] csi_pv['spec']['csi']['volumeHandle'] = volume_name csi_pv['spec']['storageClassName'] = \ DEFAULT_LONGHORN_STATIC_STORAGECLASS_NAME core_api.create_persistent_volume(csi_pv) pvc['metadata']['name'] = p['pvc_name'] pvc['spec']['volumeName'] = p['pv_name'] pvc['spec']['storageClassName'] = \ DEFAULT_LONGHORN_STATIC_STORAGECLASS_NAME core_api.create_namespaced_persistent_volume_claim(body=pvc, namespace='default') pod['metadata']['name'] = p['pod_name'] pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': p['pvc_name'], }, }] create_and_wait_pod(core_api, pod) ks = { 'pvName': p['pv_name'], 'pvStatus': 'Bound', 'namespace': 'default', 'pvcName': p['pvc_name'], 'lastPVCRefAt': '', 'lastPodRefAt': '', 'workloadsStatus': [ { 'podName': p['pod_name'], 'podStatus': 'Running', 'workloadName': '', 'workloadType': '', }, ], } wait_volume_kubernetes_status(client, volume_name, ks) delete_and_wait_pod(core_api, p['pod_name']) # Since persistentVolumeReclaimPolicy of csi_pv is `Delete`, # we don't need to delete bounded pv manually delete_and_wait_pvc(core_api, p['pvc_name']) wait_delete_pv(core_api, p['pv_name'])
def test_csi_volumesnapshot_basic( set_random_backupstore, # NOQA volumesnapshotclass, # NOQA volumesnapshot, # NOQA client, # NOQA core_api, # NOQA volume_name, # NOQA csi_pv, # NOQA pvc, # NOQA pod_make, # NOQA volsnapshotclass_delete_policy, # NOQA backup_is_deleted): # NOQA """ Test creation / restoration / deletion of a backup via the csi snapshotter Context: We want to allow the user to programmatically create/restore/delete longhorn backups via the csi snapshot mechanism ref: https://kubernetes.io/docs/concepts/storage/volume-snapshots/ Setup: 1. Make sure your cluster contains the below crds https://github.com/kubernetes-csi/external-snapshotter /tree/master/client/config/crd 2. Make sure your cluster contains the snapshot controller https://github.com/kubernetes-csi/external-snapshotter /tree/master/deploy/kubernetes/snapshot-controller Steps: def csi_volumesnapshot_creation_test(snapshotClass=longhorn|custom): 1. create volume(1) 2. write data to volume(1) 3. create a kubernetes `VolumeSnapshot` object the `VolumeSnapshot.uuid` will be used to identify a **longhorn snapshot** and the associated `VolumeSnapshotContent` object 4. check creation of a new longhorn snapshot named `snapshot-uuid` 5. check for `VolumeSnapshotContent` named `snapcontent-uuid` 6. wait for `VolumeSnapshotContent.readyToUse` flag to be set to **true** 7. check for backup existance on the backupstore # the csi snapshot restore sets the fromBackup field same as # the StorageClass based restore approach. def csi_volumesnapshot_restore_test(): 8. create a `PersistentVolumeClaim` object where the `dataSource` field references the `VolumeSnapshot` object by name 9. verify creation of a new volume(2) bound to the pvc created in step(8) 10. verify data of new volume(2) equals data from backup (ie old data above) # default longhorn snapshot class is set to Delete # add a second test with a custom snapshot class with deletionPolicy # set to Retain you can reuse these methods for that and other tests def csi_volumesnapshot_deletion_test(deletionPolicy='Delete|Retain'): 11. delete `VolumeSnapshot` object 12. if deletionPolicy == Delete: 13. verify deletion of `VolumeSnapshot` and `VolumeSnapshotContent` objects 14. verify deletion of backup from backupstore 12. if deletionPolicy == Retain: 13. verify deletion of `VolumeSnapshot` 14. verify retention of `VolumeSnapshotContent` and backup on backupstore 15. cleanup """ csisnapclass = \ volumesnapshotclass(name="snapshotclass", deletepolicy=volsnapshotclass_delete_policy) pod_name, pv_name, pvc_name, md5sum = \ prepare_pod_with_data_in_mb(client, core_api, csi_pv, pvc, pod_make, volume_name, data_path="/data/test") # Create volumeSnapshot test csivolsnap = volumesnapshot(volume_name + "-volumesnapshot", "default", csisnapclass["metadata"]["name"], "persistentVolumeClaimName", pvc_name) volume = client.by_id_volume(volume_name) for i in range(RETRY_COUNTS): snapshots = volume.snapshotList() if len(snapshots) == 2: break time.sleep(RETRY_INTERVAL) lh_snapshot = None snapshots = volume.snapshotList() for snapshot in snapshots: if snapshot["name"] == "snapshot-" + csivolsnap["metadata"]["uid"]: lh_snapshot = snapshot assert lh_snapshot is not None wait_for_volumesnapshot_ready(csivolsnap["metadata"]["name"], csivolsnap["metadata"]["namespace"]) bv1, b = find_backup(client, volume_name, lh_snapshot["name"]) assert b["snapshotName"] == lh_snapshot["name"] restore_pvc_name = pvc["metadata"]["name"] + "-restore" restore_pvc_size = pvc["spec"]["resources"]["requests"]["storage"] restore_csi_volume_snapshot(core_api, client, csivolsnap, restore_pvc_name, restore_pvc_size) restore_pod = pod_make() restore_pod_name = restore_pod["metadata"]["name"] restore_pod['spec']['volumes'] = [create_pvc_spec(restore_pvc_name)] create_and_wait_pod(core_api, restore_pod) restore_md5sum = \ get_pod_data_md5sum(core_api, restore_pod_name, path="/data/test") assert restore_md5sum == md5sum # Delete volumeSnapshot test delete_volumesnapshot(csivolsnap["metadata"]["name"], "default") if backup_is_deleted is False: find_backup(client, volume_name, b["snapshotName"]) else: wait_for_backup_delete(client, volume_name, b["name"])
def test_backup_kubernetes_status(set_random_backupstore, client, core_api, pod): # NOQA """ Test that Backups have KubernetesStatus stored properly when there is an associated PersistentVolumeClaim and Pod. 1. Setup a random backupstore 2. Set settings Longhorn Static StorageClass to `longhorn-static-test` 3. Create a volume and PV/PVC. Verify the StorageClass of PVC 4. Create a Pod using the PVC. 5. Check volume's Kubernetes status to reflect PV/PVC/Pod correctly. 6. Create a backup for the volume. 7. Verify the labels of created backup reflect PV/PVC/Pod status. 8. Restore the backup to a volume. Wait for restoration to complete. 9. Check the volume's Kubernetes Status 1. Make sure the `lastPodRefAt` and `lastPVCRefAt` is snapshot created time 10. Delete the backup and restored volume. 11. Delete PV/PVC/Pod. 12. Verify volume's Kubernetes Status updated to reflect history data. 13. Attach the volume and create another backup. Verify the labels 14. Verify the volume's Kubernetes status. 15. Restore the previous backup to a new volume. Wait for restoration. 16. Verify the restored volume's Kubernetes status. 1. Make sure `lastPodRefAt` and `lastPVCRefAt` matched volume on step 12 """ host_id = get_self_host_id() static_sc_name = "longhorn-static-test" setting = client.by_id_setting(SETTING_DEFAULT_LONGHORN_STATIC_SC) setting = client.update(setting, value=static_sc_name) assert setting.value == static_sc_name volume_name = "test-backup-kubernetes-status-pod" # NOQA client.create_volume(name=volume_name, size=SIZE, numberOfReplicas=2) volume = wait_for_volume_detached(client, volume_name) pod_name = "pod-" + volume_name pv_name = "pv-" + volume_name pvc_name = "pvc-" + volume_name create_pv_for_volume(client, core_api, volume, pv_name) create_pvc_for_volume(client, core_api, volume, pvc_name) ret = core_api.list_namespaced_persistent_volume_claim(namespace='default') pvc_found = False for item in ret.items: if item.metadata.name == pvc_name: pvc_found = item break assert pvc_found assert pvc_found.spec.storage_class_name == static_sc_name pod['metadata']['name'] = pod_name pod['spec']['volumes'] = [{ 'name': pod['spec']['containers'][0]['volumeMounts'][0]['name'], 'persistentVolumeClaim': { 'claimName': pvc_name, }, }] create_and_wait_pod(core_api, pod) ks = { 'lastPodRefAt': '', 'lastPVCRefAt': '', 'namespace': 'default', 'pvcName': pvc_name, 'pvName': pv_name, 'pvStatus': 'Bound', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, volume_name, ks) volume = wait_for_volume_healthy(client, volume_name) # Create Backup manually instead of calling create_backup since Kubernetes # is not guaranteed to mount our Volume to the test host. snap = create_snapshot(client, volume_name) volume.snapshotBackup(name=snap.name) wait_for_backup_completion(client, volume_name, snap.name) _, b = find_backup(client, volume_name, snap.name) # Check backup label status = loads(b.labels.get(KUBERNETES_STATUS_LABEL)) assert status == ks # Check backup volume label for _ in range(RETRY_COUNTS): bv = client.by_id_backupVolume(volume_name) if bv is not None and bv.labels is not None: break time.sleep(RETRY_INTERVAL) assert bv is not None and bv.labels is not None status = loads(bv.labels.get(KUBERNETES_STATUS_LABEL)) assert status == ks restore_name = generate_volume_name() client.create_volume(name=restore_name, size=SIZE, numberOfReplicas=2, fromBackup=b.url) wait_for_volume_restoration_completed(client, restore_name) wait_for_volume_detached(client, restore_name) snapshot_created = b.snapshotCreated ks = { 'lastPodRefAt': b.snapshotCreated, 'lastPVCRefAt': b.snapshotCreated, 'namespace': 'default', 'pvcName': pvc_name, # Restoration should not apply PersistentVolume data. 'pvName': '', 'pvStatus': '', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, restore_name, ks) restore = client.by_id_volume(restore_name) # We need to compare LastPodRefAt and LastPVCRefAt manually since # wait_volume_kubernetes_status only checks for empty or non-empty state. assert restore.kubernetesStatus.lastPodRefAt == ks["lastPodRefAt"] assert restore.kubernetesStatus.lastPVCRefAt == ks["lastPVCRefAt"] delete_backup(client, bv.name, b.name) client.delete(restore) wait_for_volume_delete(client, restore_name) delete_and_wait_pod(core_api, pod_name) delete_and_wait_pvc(core_api, pvc_name) delete_and_wait_pv(core_api, pv_name) # With the Pod, PVC, and PV deleted, the Volume should have both Ref # fields set. Check that a new Backup and Restore will use this instead of # manually populating the Ref fields. ks = { 'lastPodRefAt': 'NOT NULL', 'lastPVCRefAt': 'NOT NULL', 'namespace': 'default', 'pvcName': pvc_name, 'pvName': '', 'pvStatus': '', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, volume_name, ks) volume = wait_for_volume_detached(client, volume_name) volume.attach(hostId=host_id) volume = wait_for_volume_healthy(client, volume_name) snap = create_snapshot(client, volume_name) volume.snapshotBackup(name=snap.name) volume = wait_for_backup_completion(client, volume_name, snap.name) bv, b = find_backup(client, volume_name, snap.name) new_b = bv.backupGet(name=b.name) status = loads(new_b.labels.get(KUBERNETES_STATUS_LABEL)) # Check each field manually, we have no idea what the LastPodRefAt or the # LastPVCRefAt will be. We just know it shouldn't be SnapshotCreated. assert status['lastPodRefAt'] != snapshot_created assert status['lastPVCRefAt'] != snapshot_created assert status['namespace'] == "default" assert status['pvcName'] == pvc_name assert status['pvName'] == "" assert status['pvStatus'] == "" assert status['workloadsStatus'] == [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] restore_name = generate_volume_name() client.create_volume(name=restore_name, size=SIZE, numberOfReplicas=2, fromBackup=b.url) wait_for_volume_restoration_completed(client, restore_name) wait_for_volume_detached(client, restore_name) ks = { 'lastPodRefAt': status['lastPodRefAt'], 'lastPVCRefAt': status['lastPVCRefAt'], 'namespace': 'default', 'pvcName': pvc_name, 'pvName': '', 'pvStatus': '', 'workloadsStatus': [{ 'podName': pod_name, 'podStatus': 'Running', 'workloadName': '', 'workloadType': '' }] } wait_volume_kubernetes_status(client, restore_name, ks) restore = client.by_id_volume(restore_name) assert restore.kubernetesStatus.lastPodRefAt == ks["lastPodRefAt"] assert restore.kubernetesStatus.lastPVCRefAt == ks["lastPVCRefAt"] # cleanup backupstore_cleanup(client) client.delete(restore) cleanup_volume(client, volume)
def test_data_locality_basic(client, core_api, volume_name, pod, settings_reset): # NOQA """ Test data locality basic feature Context: Data Locality feature allows users to have an option to keep a local replica on the same node as the consuming pod. Longhorn is currently supporting 2 modes: - disabled: Longhorn does not try to keep a local replica - best-effort: Longhorn try to keep a local replica See manual tests at: https://github.com/longhorn/longhorn/issues/1045#issuecomment-680706283 Steps: Case 1: Test that Longhorn builds a local replica on the engine node 1. Create a volume(1) with 1 replica and dataLocality set to disabled 2. Find node where the replica is located on. Let's call the node is replica-node 3. Attach the volume to a node different than replica-node. Let call the node is engine-node 4. Write 200MB data to volume(1) 5. Use a retry loop to verify that Longhorn does not create a replica on the engine-node 6. Update dataLocality to best-effort for volume(1) 7. Use a retry loop to verify that Longhorn creates and rebuilds a replica on the engine-node and remove the other replica 8. detach the volume(1) and attach it to a different node. Let's call the new node is new-engine-node and the old node is old-engine-node 9. Wait for volume(1) to finish attaching 10. Use a retry loop to verify that Longhorn creates and rebuilds a replica on the new-engine-node and remove the replica on old-engine-node Case 2: Test that Longhorn prioritizes deleting replicas on the same node 1. Add the tag AVAIL to node-1 and node-2 2. Set node soft anti-affinity to `true`. 3. Create a volume(2) with 3 replicas and dataLocality set to best-effort 4. Use a retry loop to verify that all 3 replicas are on node-1 and node-2, no replica is on node-3 5. Attach volume(2) to node-3 6. User a retry loop to verify that there is no replica on node-3 and we can still read/write to volume(2) 7. Find the node which contains 2 replicas. Let call the node is most-replica-node 8. Set the replica count to 2 for volume(2) 9. Verify that Longhorn remove one replica from most-replica-node Case 3: Test that the volume is not corrupted if there is an unexpected detachment during building local replica 1. Remove the tag AVAIL from node-1 and node-2 Set node soft anti-affinity to `false`. 2. Create a volume(3) with 1 replicas and dataLocality set to best-effort 3. Attach volume(3) to node-3. 4. Use a retry loop to verify that volume(3) has only 1 replica on node-3 5. Write 800MB data to volume(3) 6. Detach volume(3) 7. Attach volume(3) to node-1 8. Use a retry loop to: Wait until volume(3) finishes attaching. Wait until Longhorn start rebuilding a replica on node-1 Immediately detach volume(3) 9. Verify that the replica on node-1 is in ERR state. 10. Attach volume(3) to node-1 11. Wait until volume(3) finishes attaching. 12. Use a retry loop to verify the Longhorn cleanup the ERR replica, rebuild a new replica on node-1, and remove the replica on node-3 Case 4: Make sure failed to schedule local replica doesn't block the the creation of other replicas. 1. Disable scheduling for node-3 2. Create a vol with 1 replica, `dataLocality = best-effort`. The replica is scheduled on a node (say node-1) 3. Attach vol to node-3. There is a fail-to-schedule replica with Spec.HardNodeAffinity=node-3 4. Increase numberOfReplica to 3. Verify that the replica set contains: one on node-1, one on node-2, one failed replica with Spec.HardNodeAffinity=node-3. 5. Decrease numberOfReplica to 2. Verify that the replica set contains: one on node-1, one on node-2, one failed replica with Spec.HardNodeAffinity=node-3. 6. Decrease numberOfReplica to 1. Verify that the replica set contains: one on node-1 or node-2, one failed replica with Spec.HardNodeAffinity=node-3. 7. Decrease numberOfReplica to 2. Verify that the replica set contains: one on node-1, one on node-2, one failed replica with Spec.HardNodeAffinity=node-3. 8. Turn off data locality by set `dataLocality=disabled` for the vol. Verify that the replica set contains: one on node-1, one on node-2 9. clean up """ # Case 1: Test that Longhorn builds a local replica on the engine node nodes = client.list_node() default_data_locality_setting = \ client.by_id_setting(SETTING_DEFAULT_DATA_LOCALITY) try: client.update(default_data_locality_setting, value="disabled") except Exception as e: print("Exception when update Default Data Locality setting", default_data_locality_setting, e) volume1_name = volume_name + "-1" volume1_size = str(500 * Mi) volume1_data_path = "/data/test" pv1_name = volume1_name + "-pv" pvc1_name = volume1_name + "-pvc" pod1_name = volume1_name + "-pod" pod1 = pod pod1['metadata']['name'] = pod1_name volume1 = create_and_check_volume(client, volume1_name, num_of_replicas=1, size=volume1_size) volume1 = client.by_id_volume(volume1_name) create_pv_for_volume(client, core_api, volume1, pv1_name) create_pvc_for_volume(client, core_api, volume1, pvc1_name) volume1 = client.by_id_volume(volume1_name) volume1_replica_node = volume1.replicas[0]['hostId'] volume1_attached_node = None for node in nodes: if node.name != volume1_replica_node: volume1_attached_node = node.name break assert volume1_attached_node is not None pod1['spec']['volumes'] = [{ "name": "pod-data", "persistentVolumeClaim": { "claimName": pvc1_name } }] pod1['spec']['nodeSelector'] = \ {"kubernetes.io/hostname": volume1_attached_node} create_and_wait_pod(core_api, pod1) write_pod_volume_random_data(core_api, pod1_name, volume1_data_path, DATA_SIZE_IN_MB_2) for i in range(10): volume1 = client.by_id_volume(volume1_name) assert len(volume1.replicas) == 1 assert volume1.replicas[0]['hostId'] != volume1_attached_node time.sleep(1) volume1 = client.by_id_volume(volume1_name) volume1.updateDataLocality(dataLocality="best-effort") for _ in range(RETRY_COUNTS): volume1 = client.by_id_volume(volume1_name) assert volume1[VOLUME_FIELD_ROBUSTNESS] == VOLUME_ROBUSTNESS_HEALTHY if len(volume1.replicas) == 1 and \ volume1.replicas[0]['hostId'] == volume1_attached_node: break time.sleep(RETRY_INTERVAL) assert len(volume1.replicas) == 1 assert volume1.replicas[0]['hostId'] == volume1_attached_node delete_and_wait_pod(core_api, pod1_name) volume1 = wait_for_volume_detached(client, volume1_name) volume1_replica_node = volume1.replicas[0]['hostId'] volume1_attached_node = None for node in nodes: if node.name != volume1_replica_node: volume1_attached_node = node.name break assert volume1_attached_node is not None pod1['spec']['nodeSelector'] = \ {"kubernetes.io/hostname": volume1_attached_node} create_and_wait_pod(core_api, pod1) for _ in range(RETRY_COUNTS): volume1 = client.by_id_volume(volume1_name) assert volume1[VOLUME_FIELD_ROBUSTNESS] == VOLUME_ROBUSTNESS_HEALTHY if len(volume1.replicas) == 1 and \ volume1.replicas[0]['hostId'] == volume1_attached_node: break time.sleep(RETRY_INTERVAL) assert len(volume1.replicas) == 1 assert volume1.replicas[0]['hostId'] == volume1_attached_node delete_and_wait_pod(core_api, pod1_name) wait_for_volume_detached(client, volume1_name) # Case 2: Test that Longhorn prioritizes deleting replicas on the same node node1 = nodes[0] node2 = nodes[1] node3 = nodes[2] client.update(node1, allowScheduling=True, tags=["AVAIL"]) client.update(node2, allowScheduling=True, tags=["AVAIL"]) replica_node_soft_anti_affinity_setting = \ client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) try: client.update(replica_node_soft_anti_affinity_setting, value="true") except Exception as e: print( "Exception when update " "Replica Node Level Soft Anti-Affinity setting", replica_node_soft_anti_affinity_setting, e) volume2_name = volume_name + "-2" volume2_size = str(500 * Mi) pv2_name = volume2_name + "-pv" pvc2_name = volume2_name + "-pvc" pod2_name = volume2_name + "-pod" pod2 = pod pod2['metadata']['name'] = pod2_name volume2 = client.create_volume(name=volume2_name, size=volume2_size, numberOfReplicas=3, nodeSelector=["AVAIL"], dataLocality="best-effort") volume2 = wait_for_volume_detached(client, volume2_name) volume2 = client.by_id_volume(volume2_name) create_pv_for_volume(client, core_api, volume2, pv2_name) create_pvc_for_volume(client, core_api, volume2, pvc2_name) volume2 = client.by_id_volume(volume2_name) pod2['spec']['volumes'] = [{ "name": "pod-data", "persistentVolumeClaim": { "claimName": pvc2_name } }] pod2['spec']['nodeSelector'] = {"kubernetes.io/hostname": node3.name} create_and_wait_pod(core_api, pod2) volume2 = wait_for_volume_healthy(client, volume2_name) for replica in volume2.replicas: assert replica["hostId"] != node3.name volume2.updateReplicaCount(replicaCount=2) # 2 Healthy replicas and 1 replica failed to schedule # The failed to schedule replica is the local replica on node3 volume2 = wait_for_volume_replica_count(client, volume2_name, 3) volume2 = client.by_id_volume(volume2_name) volume2_healthy_replicas = [] for replica in volume2.replicas: if replica.running is True: volume2_healthy_replicas.append(replica) assert len(volume2_healthy_replicas) == 2 volume2_rep1 = volume2_healthy_replicas[0] volume2_rep2 = volume2_healthy_replicas[1] assert volume2_rep1["hostId"] != volume2_rep2["hostId"] delete_and_wait_pod(core_api, pod2_name) wait_for_volume_detached(client, volume2_name) # Case 3: Test that the volume is not corrupted if there is an unexpected # detachment during building local replica client.update(node1, allowScheduling=True, tags=[]) client.update(node2, allowScheduling=True, tags=[]) replica_node_soft_anti_affinity_setting = \ client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) try: client.update(replica_node_soft_anti_affinity_setting, value="false") except Exception as e: print( "Exception when update " "Replica Node Level Soft Anti-Affinity setting", replica_node_soft_anti_affinity_setting, e) volume3_name = volume_name + "-3" volume3_size = str(1 * Gi) volume3_data_path = "/data/test" pv3_name = volume3_name + "-pv" pvc3_name = volume3_name + "-pvc" pod3_name = volume3_name + "-pod" pod3 = pod pod3['metadata']['name'] = pod3_name volume3 = client.create_volume(name=volume3_name, size=volume3_size, numberOfReplicas=1) volume3 = wait_for_volume_detached(client, volume3_name) volume3 = client.by_id_volume(volume3_name) create_pv_for_volume(client, core_api, volume3, pv3_name) create_pvc_for_volume(client, core_api, volume3, pvc3_name) volume3 = client.by_id_volume(volume3_name) pod3['spec']['volumes'] = [{ "name": "pod-data", "persistentVolumeClaim": { "claimName": pvc3_name } }] pod3['spec']['nodeSelector'] = {"kubernetes.io/hostname": node3.name} create_and_wait_pod(core_api, pod3) volume3 = wait_for_volume_healthy(client, volume3_name) write_pod_volume_random_data(core_api, pod3_name, volume3_data_path, DATA_SIZE_IN_MB_4) volume3.updateDataLocality(dataLocality="best-effort") volume3 = client.by_id_volume(volume3_name) if volume3.replicas[0]['hostId'] != node3.name: wait_for_rebuild_start(client, volume3_name) volume3 = client.by_id_volume(volume3_name) assert len(volume3.replicas) == 2 wait_for_rebuild_complete(client, volume3_name) volume3 = wait_for_volume_replica_count(client, volume3_name, 1) assert volume3.replicas[0]["hostId"] == node3.name delete_and_wait_pod(core_api, pod3_name) pod3['spec']['nodeSelector'] = {"kubernetes.io/hostname": node1.name} create_and_wait_pod(core_api, pod3) wait_for_rebuild_start(client, volume3_name) crash_engine_process_with_sigkill(client, core_api, volume3_name) delete_and_wait_pod(core_api, pod3_name) wait_for_volume_detached(client, volume3_name) volume3 = client.by_id_volume(volume3_name) assert len(volume3.replicas) == 1 assert volume3.replicas[0]["hostId"] == node3.name create_and_wait_pod(core_api, pod3) wait_for_rebuild_start(client, volume3_name) volume3 = client.by_id_volume(volume3_name) assert len(volume3.replicas) == 2 wait_for_rebuild_complete(client, volume3_name) # Wait for deletion of extra replica volume3 = wait_for_volume_replica_count(client, volume3_name, 1) assert volume3.replicas[0]["hostId"] == node1.name assert volume3.replicas[0]["mode"] == "RW" assert volume3.replicas[0]["running"] is True delete_and_wait_pod(core_api, pod3_name) wait_for_volume_detached(client, volume3_name) # Case 4: Make sure failed to schedule local replica doesn't block the # the creation of other replicas. replica_node_soft_anti_affinity_setting = \ client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY) try: client.update(replica_node_soft_anti_affinity_setting, value="false") except Exception as e: print( "Exception when update " "Replica Node Level Soft Anti-Affinity setting", replica_node_soft_anti_affinity_setting, e) client.update(node3, allowScheduling=False) volume4_name = volume_name + "-4" volume4_size = str(1 * Gi) volume4 = client.create_volume(name=volume4_name, size=volume4_size, numberOfReplicas=1, dataLocality="best-effort") volume4 = wait_for_volume_detached(client, volume4_name) volume4 = client.by_id_volume(volume4_name) volume4_replica_name = volume4.replicas[0]["name"] volume4.attach(hostId=node3.name) wait_for_volume_healthy(client, volume4_name) volume4 = client.by_id_volume(volume4_name) assert len(volume4.replicas) == 2 for replica in volume4.replicas: if replica["name"] == volume4_replica_name: assert replica["running"] is True assert replica["mode"] == "RW" else: assert replica["running"] is False assert replica["mode"] == "" assert volume4.conditions.scheduled.reason == \ "LocalReplicaSchedulingFailure" volume4 = volume4.updateReplicaCount(replicaCount=3) volume4 = wait_for_volume_degraded(client, volume4_name) v4_node1_replica_count = 0 v4_node2_replica_count = 0 v4_failed_replica_count = 0 for replica in volume4.replicas: if replica["hostId"] == node1.name: v4_node1_replica_count += 1 elif replica["hostId"] == node2.name: v4_node2_replica_count += 1 elif replica["hostId"] == "": v4_failed_replica_count += 1 assert v4_node1_replica_count == 1 assert v4_node2_replica_count == 1 assert v4_failed_replica_count > 0 volume4 = volume4.updateReplicaCount(replicaCount=2) volume4 = wait_for_volume_replica_count(client, volume4_name, 3) v4_node1_replica_count = 0 v4_node2_replica_count = 0 v4_failed_replica_count = 0 for replica in volume4.replicas: if replica["hostId"] == node1.name: v4_node1_replica_count += 1 elif replica["hostId"] == node2.name: v4_node2_replica_count += 1 elif replica["hostId"] == "": v4_failed_replica_count += 1 assert v4_node1_replica_count == 1 assert v4_node2_replica_count == 1 assert v4_failed_replica_count > 0 volume4 = volume4.updateReplicaCount(replicaCount=1) volume4 = wait_for_volume_replica_count(client, volume4_name, 2) v4_node1_replica_count = 0 v4_node2_replica_count = 0 v4_failed_replica_count = 0 for replica in volume4.replicas: if replica["hostId"] == node1.name: v4_node1_replica_count += 1 elif replica["hostId"] == node2.name: v4_node2_replica_count += 1 elif replica["hostId"] == "": v4_failed_replica_count += 1 assert v4_node1_replica_count + v4_node2_replica_count == 1 assert v4_failed_replica_count == 1 volume4 = volume4.updateDataLocality(dataLocality="disabled") volume4 = volume4.updateReplicaCount(replicaCount=2) running_replica_count = 0 for _ in range(RETRY_COUNTS): volume4 = client.by_id_volume(volume4_name) running_replica_count = 0 for r in volume4.replicas: if r.failedAt == "" and r.running is True: running_replica_count += 1 if running_replica_count == 2: break time.sleep(RETRY_INTERVAL) assert running_replica_count == 2 v4_node1_replica_count = 0 v4_node2_replica_count = 0 v4_node3_replica_count = 0 for replica in volume4.replicas: wait_for_replica_running(client, volume4_name, replica["name"]) if replica["hostId"] == node1.name: v4_node1_replica_count += 1 elif replica["hostId"] == node2.name: v4_node2_replica_count += 1 elif replica["hostId"] == node3.name: v4_node3_replica_count += 1 assert v4_node1_replica_count == 1 assert v4_node2_replica_count == 1 assert v4_node3_replica_count == 0