def wait_for_toleration_update(core_api, apps_api, count, set_tolerations): # NOQA updated = False for i in range(RETRY_COUNTS): time.sleep(RETRY_INTERVAL_LONG) updated = True da_list = apps_api.list_namespaced_daemon_set(LONGHORN_NAMESPACE).items for da in da_list: if da.status.updated_number_scheduled != count: updated = False break if not updated: continue dp_list = apps_api.list_namespaced_deployment(LONGHORN_NAMESPACE).items for dp in dp_list: if dp.status.updated_replicas != dp.spec.replicas: updated = False break if not updated: continue im_pod_list = core_api.list_namespaced_pod( LONGHORN_NAMESPACE, label_selector="longhorn.io/component=instance-manager").items if len(im_pod_list) != 2 * count: updated = False continue for p in im_pod_list: if p.status.phase != "Running": updated = False break if not updated: continue pod_list = core_api.list_namespaced_pod(LONGHORN_NAMESPACE).items for p in pod_list: if p.status.phase != "Running" or \ not check_tolerations_set(p.spec.tolerations, set_tolerations): updated = False break if not updated: continue client = get_longhorn_api_client() # NOQA images = client.list_engine_image() assert len(images) == 1 if images[0].state != "ready": updated = False continue if updated: break assert updated
def wait_for_priority_class_update(core_api, apps_api, count, priority_class=None): # NOQA updated = False for i in range(RETRY_COUNTS): time.sleep(RETRY_INTERVAL_LONG) updated = True if not check_workload_update(core_api, apps_api, count): updated = False continue pod_list = core_api.list_namespaced_pod(LONGHORN_NAMESPACE).items for p in pod_list: if p.status.phase != "Running" and \ not check_priority_class(p, priority_class): updated = False break if not updated: continue if updated: break assert updated
def check_workload_update(core_api, apps_api, count): # NOQA da_list = apps_api.list_namespaced_daemon_set(LONGHORN_NAMESPACE).items for da in da_list: if da.status.updated_number_scheduled != count: return False dp_list = apps_api.list_namespaced_deployment(LONGHORN_NAMESPACE).items for dp in dp_list: if dp.status.updated_replicas != dp.spec.replicas: return False im_pod_list = core_api.list_namespaced_pod( LONGHORN_NAMESPACE, label_selector="longhorn.io/component=instance-manager").items if len(im_pod_list) != 2 * count: return False for p in im_pod_list: if p.status.phase != "Running": return False client = get_longhorn_api_client() # NOQA images = client.list_engine_image() assert len(images) == 1 ei_state = get_engine_image_status_value(client, images[0].name) if images[0].state != ei_state: return False return True
def wait_for_toleration_update(core_api, apps_api, count, set_tolerations): # NOQA updated = False for i in range(RETRY_COUNTS): time.sleep(RETRY_INTERVAL_LONG) updated = True if not check_workload_update(core_api, apps_api, count): updated = False continue pod_list = core_api.list_namespaced_pod(LONGHORN_NAMESPACE).items for p in pod_list: if p.status.phase != "Running" or \ not check_tolerations_set(p.spec.tolerations, set_tolerations): updated = False break if not updated: continue if updated: break assert updated
def wait_for_toleration_update( core_api, apps_api, count, # NOQA expected_tolerations, chk_removed_tolerations=[]): not_managed_apps = [ "csi-attacher", "csi-provisioner", "csi-resizer", "csi-snapshotter", "longhorn-csi-plugin", "longhorn-driver-deployer", "longhorn-manager", "longhorn-ui", ] updated = False for _ in range(RETRY_COUNTS): time.sleep(RETRY_INTERVAL_LONG) updated = True if not check_workload_update(core_api, apps_api, count): updated = False continue pod_list = core_api.list_namespaced_pod(LONGHORN_NAMESPACE).items for p in pod_list: managed_by = p.metadata.labels.get('longhorn.io/managed-by', '') if str(managed_by) != "longhorn-manager": continue else: app_name = str(p.metadata.labels.get('app', '')) assert app_name not in not_managed_apps if p.status.phase != "Running" \ or not check_tolerations_set(p.spec.tolerations, expected_tolerations, chk_removed_tolerations): updated = False break if updated: break assert updated
def test_offline_node_with_attached_volume_and_pod( client, core_api, volume_name, make_deployment_with_pvc, reset_cluster_ready_status): # NOQA """ Test offline node with attached volume and pod 1. Create PV/PVC/Deployment manifest. 2. Update deployment's tolerations to 20 seconds to speed up test 3. Update deployment's node affinity rule to avoid the current node 4. Create volume, PV/PVC and deployment. 5. Find the pod in the deployment and write `test_data` into it 6. Shutdown the node pod is running on 7. Wait for deployment to delete the pod 1. Deployment cannot delete the pod here because kubelet doesn't response 8. Force delete the terminating pod 9. Wait for the new pod to be created and the volume attached 10. Check `test_data` in the new pod """ toleration_seconds = 20 apps_api = get_apps_api_client() cloudprovider = detect_cloudprovider() volume_name = generate_volume_name() pv_name = volume_name + "-pv" pvc_name = volume_name + "-pvc" deployment_name = volume_name + "-dep" longhorn_test_node_name = get_self_host_id() deployment_manifest = make_deployment_with_pvc(deployment_name, pvc_name) unreachable_toleration = { "key": "node.kubernetes.io/unreachable", "operator": "Exists", "effect": "NoExecute", "tolerationSeconds": toleration_seconds } not_ready_toleration = { "key": "node.kubernetes.io/not-ready", "operator": "Exists", "effect": "NoExecute", "tolerationSeconds": toleration_seconds } deployment_manifest["spec"]["template"]["spec"]["tolerations"] =\ [unreachable_toleration, not_ready_toleration] node_affinity_roles = { "nodeAffinity": { "requiredDuringSchedulingIgnoredDuringExecution": { "nodeSelectorTerms": [{ "matchExpressions": [{ "key": "kubernetes.io/hostname", "operator": "NotIn", "values": [longhorn_test_node_name] }] }] } } } deployment_manifest["spec"]["template"]["spec"]["affinity"] =\ node_affinity_roles longhorn_volume = create_and_check_volume(client, volume_name, size=SIZE) wait_for_volume_detached(client, volume_name) create_pv_for_volume(client, core_api, longhorn_volume, pv_name) create_pvc_for_volume(client, core_api, longhorn_volume, pvc_name) create_and_wait_deployment(apps_api, deployment_manifest) deployment_label_selector =\ "name=" + deployment_manifest["metadata"]["labels"]["name"] deployment_pod_list =\ core_api.list_namespaced_pod(namespace="default", label_selector=deployment_label_selector) assert deployment_pod_list.items.__len__() == 1 pod_name = deployment_pod_list.items[0].metadata.name test_data = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, pod_name, test_data) node_name = deployment_pod_list.items[0].spec.node_name node = cloudprovider.node_id(node_name) cloudprovider.node_shutdown(node) k8s_node_down = wait_for_node_down_k8s(node_name, core_api) assert k8s_node_down client = get_longhorn_api_client() longhorn_node_down = wait_for_node_down_longhorn(node_name, client) assert longhorn_node_down time.sleep(toleration_seconds + 5) for i in range(TERMINATING_POD_RETRYS): deployment_pod_list =\ core_api.list_namespaced_pod( namespace="default", label_selector=deployment_label_selector ) terminating_pod_name = None for pod in deployment_pod_list.items: if pod.metadata.__getattribute__("deletion_timestamp") is not None: terminating_pod_name = pod.metadata.name break if terminating_pod_name is not None: break else: time.sleep(TERMINATING_POD_INTERVAL) assert terminating_pod_name is not None core_api.delete_namespaced_pod(namespace="default", name=terminating_pod_name, grace_period_seconds=0) delete_and_wait_pod(core_api, terminating_pod_name) deployment_pod_list =\ core_api.list_namespaced_pod( namespace="default", label_selector=deployment_label_selector ) assert deployment_pod_list.items.__len__() == 1 wait_for_volume_detached(client, volume_name) wait_for_volume_healthy(client, volume_name) deployment_pod_list =\ core_api.list_namespaced_pod( namespace="default", label_selector=deployment_label_selector ) assert deployment_pod_list.items.__len__() == 1 new_pod_name = deployment_pod_list.items[0].metadata.name wait_pod(new_pod_name) resp_data = read_volume_data(core_api, new_pod_name) assert test_data == resp_data
def test_restore_rwo_volume_to_rwx(set_random_backupstore, client, core_api, volume_name, pvc, csi_pv, pod_make, make_deployment_with_pvc): # NOQA """ Test restoring a rwo to a rwx volume. 1. Create a volume with 'accessMode' rwo. 2. Create a PV and a PVC with access mode 'readwriteonce' and attach to the volume. 3. Create a pod and attach to the PVC. 4. Write some data into the pod and compute md5sum. 5. Take a backup of the volume. 6. Restore the backup with 'accessMode' rwx. 7. Create PV and PVC and attach to 2 pods. 8. Verify the data. """ data_path = "/data/test" pod_name, pv_name, pvc_name, md5sum = \ prepare_pod_with_data_in_mb(client, core_api, csi_pv, pvc, pod_make, volume_name, data_size_in_mb=DATA_SIZE_IN_MB_1, data_path=data_path) snap = create_snapshot(client, volume_name) volume = client.by_id_volume(volume_name) volume.snapshotBackup(name=snap.name) wait_for_backup_completion(client, volume_name, snap.name) bv, b1 = find_backup(client, volume_name, snap.name) restore_volume_name = 'restored-rwx-volume' restore_pv_name = restore_volume_name + "-pv" restore_pvc_name = restore_volume_name + "-pvc" client.create_volume(name=restore_volume_name, size=str(1 * Gi), numberOfReplicas=3, fromBackup=b1.url, accessMode='rwx') wait_for_volume_creation(client, restore_volume_name) restore_volume = wait_for_volume_detached(client, restore_volume_name) create_pv_for_volume(client, core_api, restore_volume, restore_pv_name) create_pvc_for_volume(client, core_api, restore_volume, restore_pvc_name) deployment = make_deployment_with_pvc('deployment-multi-pods-test', restore_pvc_name, replicas=2) apps_api = get_apps_api_client() create_and_wait_deployment(apps_api, deployment) deployment_label_selector = \ "name=" + deployment["metadata"]["labels"]["name"] deployment_pod_list = \ core_api.list_namespaced_pod(namespace="default", label_selector=deployment_label_selector) pod_name_1 = deployment_pod_list.items[0].metadata.name pod_name_2 = deployment_pod_list.items[1].metadata.name md5sum_pod1 = get_pod_data_md5sum(core_api, pod_name_1, data_path) md5sum_pod2 = get_pod_data_md5sum(core_api, pod_name_2, data_path) assert md5sum == md5sum_pod1 == md5sum_pod2
def test_rwx_deployment_with_multi_pods(core_api, pvc, make_deployment_with_pvc): # NOQA """ Test deployment of 2 pods with same PVC. 1. Create a volume with 'accessMode' rwx. 2. Create a PV and a PVC with access mode 'readwritemany' and attach to the volume. 3. Create a deployment of 2 pods with PVC created 4. Wait for 2 pods to come up healthy. 5. Write data in both pods and compute md5sum. 6. Check the data md5sum in the share manager pod. """ pvc_name = 'pvc-deployment-multi-pods-test' pvc['metadata']['name'] = pvc_name pvc['spec']['storageClassName'] = 'longhorn' pvc['spec']['accessModes'] = ['ReadWriteMany'] core_api.create_namespaced_persistent_volume_claim(body=pvc, namespace='default') deployment = make_deployment_with_pvc('deployment-multi-pods-test', pvc_name, replicas=2) apps_api = get_apps_api_client() create_and_wait_deployment(apps_api, deployment) pv_name = get_volume_name(core_api, pvc_name) share_manager_name = 'share-manager-' + pv_name deployment_label_selector = "name=" + \ deployment["metadata"]["labels"]["name"] deployment_pod_list = \ core_api.list_namespaced_pod(namespace="default", label_selector=deployment_label_selector) assert deployment_pod_list.items.__len__() == 2 pod_name_1 = deployment_pod_list.items[0].metadata.name test_data_1 = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, pod_name_1, test_data_1, filename='test1') pod_name_2 = deployment_pod_list.items[1].metadata.name command = 'cat /data/test1' pod_data_2 = exec_command_in_pod(core_api, command, pod_name_2, 'default') assert test_data_1 == pod_data_2 test_data_2 = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, pod_name_2, test_data_2, filename='test2') command = 'cat /export' + '/' + pv_name + '/' + 'test1' share_manager_data_1 = exec_command_in_pod(core_api, command, share_manager_name, LONGHORN_NAMESPACE) assert test_data_1 == share_manager_data_1 command = 'cat /export' + '/' + pv_name + '/' + 'test2' share_manager_data_2 = exec_command_in_pod(core_api, command, share_manager_name, LONGHORN_NAMESPACE) assert test_data_2 == share_manager_data_2
def test_rwx_statefulset_scale_down_up(core_api, statefulset): # NOQA """ Test Scaling up and down of pods attached to rwx volume. 1. Create a StatefulSet of 2 pods with VolumeClaimTemplate where accessMode is 'RWX'. 2. Wait for StatefulSet pods to come up healthy. 3. Write data and compute md5sum in the both pods. 4. Delete the pods. 5. Wait for the pods to be terminated. 6. Verify the share manager pods are no longer available and the volume is detached. 6. Recreate the pods 7. Wait for new pods to come up. 8. Check the data md5sum in new pods. """ statefulset_name = 'statefulset-rwx-scale-down-up-test' share_manager_name = [] statefulset['metadata']['name'] = \ statefulset['spec']['selector']['matchLabels']['app'] = \ statefulset['spec']['serviceName'] = \ statefulset['spec']['template']['metadata']['labels']['app'] = \ statefulset_name statefulset['spec']['volumeClaimTemplates'][0]['spec']['storageClassName']\ = 'longhorn' statefulset['spec']['volumeClaimTemplates'][0]['spec']['accessModes'] \ = ['ReadWriteMany'] create_and_wait_statefulset(statefulset) for i in range(2): pvc_name = \ statefulset['spec']['volumeClaimTemplates'][0]['metadata']['name']\ + '-' + statefulset_name + '-' + str(i) pv_name = get_volume_name(core_api, pvc_name) assert pv_name is not None share_manager_name.append('share-manager-' + pv_name) check_pod_existence(core_api, share_manager_name[i], namespace=LONGHORN_NAMESPACE) md5sum_pod = [] for i in range(2): test_pod_name = statefulset_name + '-' + str(i) test_data = generate_random_data(VOLUME_RWTEST_SIZE) write_pod_volume_data(core_api, test_pod_name, test_data) md5sum_pod.append(test_data) statefulset['spec']['replicas'] = replicas = 0 apps_api = get_apps_api_client() apps_api.patch_namespaced_stateful_set( name=statefulset_name, namespace='default', body={'spec': { 'replicas': replicas }}) for i in range(DEFAULT_STATEFULSET_TIMEOUT): s_set = apps_api.read_namespaced_stateful_set( name=statefulset['metadata']['name'], namespace='default') if s_set.status.ready_replicas == replicas or \ (replicas == 0 and not s_set.status.ready_replicas): break time.sleep(DEFAULT_STATEFULSET_INTERVAL) pods = core_api.list_namespaced_pod(namespace=LONGHORN_NAMESPACE) found = False for item in pods.items: if item.metadata.name == share_manager_name[0] or \ item.metadata.name == share_manager_name[1]: found = True break assert not found statefulset['spec']['replicas'] = replicas = 2 apps_api = get_apps_api_client() apps_api.patch_namespaced_stateful_set( name=statefulset_name, namespace='default', body={'spec': { 'replicas': replicas }}) wait_statefulset(statefulset) for i in range(2): test_pod_name = statefulset_name + '-' + str(i) command = 'cat /data/test' pod_data = exec_command_in_pod(core_api, command, test_pod_name, 'default') assert pod_data == md5sum_pod[i]