Exemple #1
0
def pytest_collection_modifyitems(config, items):
    if not config.getoption(ENABLE_RECURRING_JOB_OPT):
        skip_upgrade = pytest.mark.skip(reason="need " +
                                        ENABLE_RECURRING_JOB_OPT +
                                        " option to run")
        for item in items:
            if "recurring_job" in item.keywords:
                item.add_marker(skip_upgrade)

    c = Configuration()
    c.assert_hostname = False
    Configuration.set_default(c)
    k8sconfig.load_incluster_config()
    api = k8sclient.CoreV1Api()

    try:
        api.read_namespaced_pod(
            name='csi-provisioner-0', namespace='longhorn-system')
        skip_upgrade = pytest.mark.skip(reason="environment is not using " +
                                               "flexvolume")

        for item in items:
            if "flexvolume" in item.keywords:
                item.add_marker(skip_upgrade)
    except ApiException as e:
        if (e.status == 404):
            skip_upgrade = pytest.mark.skip(reason="environment is not " +
                                                   "using csi")

            for item in items:
                if "csi" in item.keywords:
                    item.add_marker(skip_upgrade)

    all_nodes_support_mount_propagation = True
    for node in get_longhorn_api_client().list_node():
        node = wait_for_node_mountpropagation_condition(
            get_longhorn_api_client(), node["name"])
        if "conditions" not in node.keys():
            all_nodes_support_mount_propagation = False
        else:
            conditions = node["conditions"]
            for key, condition in conditions.iteritems():
                if key == NODE_CONDITION_MOUNTPROPAGATION and \
                        condition["status"] != CONDITION_STATUS_TRUE:
                    all_nodes_support_mount_propagation = False
                    break
        if not all_nodes_support_mount_propagation:
            break

    if not all_nodes_support_mount_propagation:
        skip_upgrade = pytest.mark.skip(reason="environment does not " +
                                               "support base image")
        skip_node = pytest.mark.skip(reason="environment does not " +
                                            "support mount disk")

        for item in items:
            if "baseimage" in item.keywords:
                item.add_marker(skip_upgrade)
            elif "mountdisk" in item.keywords:
                item.add_marker(skip_node)
def pytest_collection_modifyitems(config, items):
    c = Configuration()
    c.assert_hostname = False
    Configuration.set_default(c)
    k8sconfig.load_incluster_config()
    core_api = k8sclient.CoreV1Api()

    check_longhorn(core_api)

    if config.getoption(SKIP_RECURRING_JOB_OPT):
        skip_upgrade = pytest.mark.skip(reason="remove " +
                                               SKIP_RECURRING_JOB_OPT +
                                               " option to run")
        for item in items:
            if "recurring_job" in item.keywords:
                item.add_marker(skip_upgrade)

    using_csi = check_csi(core_api)
    if using_csi:
        skip_upgrade = pytest.mark.skip(reason="environment is not using " +
                                               "flexvolume")
        for item in items:
            if "flexvolume" in item.keywords:
                item.add_marker(skip_upgrade)

    else:
        skip_upgrade = pytest.mark.skip(reason="environment is not " +
                                               "using csi")
        for item in items:
            if "csi" in item.keywords:
                item.add_marker(skip_upgrade)

    all_nodes_support_mount_propagation = True
    for node in get_longhorn_api_client().list_node():
        node = wait_for_node_mountpropagation_condition(
            get_longhorn_api_client(), node["name"])
        if "conditions" not in node.keys():
            all_nodes_support_mount_propagation = False
        else:
            conditions = node["conditions"]
            for key, condition in conditions.iteritems():
                if key == NODE_CONDITION_MOUNTPROPAGATION and \
                        condition["status"] != CONDITION_STATUS_TRUE:
                    all_nodes_support_mount_propagation = False
                    break
        if not all_nodes_support_mount_propagation:
            break

    if not all_nodes_support_mount_propagation:
        skip_upgrade = pytest.mark.skip(reason="environment does not " +
                                               "support base image")
        skip_node = pytest.mark.skip(reason="environment does not " +
                                            "support mount disk")

        for item in items:
            if "baseimage" in item.keywords:
                item.add_marker(skip_upgrade)
            elif "mountdisk" in item.keywords:
                item.add_marker(skip_node)
Exemple #3
0
def test_offline_node(reset_cluster_ready_status):
    """
    Test offline node

    1. Bring down one of the nodes in Kuberntes cluster (avoid current node)
    2. Make sure the Longhorn node state become `down`
    """
    node_worker_label = 'node-role.kubernetes.io/worker'
    pod_lable_selector = "longhorn-test=test-job"
    node_controlplane_label = 'node-role.kubernetes.io/control-plane'
    node_ip_annotation = "flannel.alpha.coreos.com/public-ip"

    k8s_api_client = get_core_api_client()
    longhorn_api_client = get_longhorn_api_client()
    cloudprovider = detect_cloudprovider()

    for pod in k8s_api_client.list_namespaced_pod(
            'default', label_selector=pod_lable_selector).items:
        if pod.metadata.name == "longhorn-test":
            longhorn_test_node_name = pod.spec.node_name

    k3s = is_infra_k3s()

    for node_item in k8s_api_client.list_node().items:
        if k3s is True:
            if node_controlplane_label not in node_item.metadata.labels:
                node_name = node_item.metadata.name
                node_ip = node_item.metadata.annotations[node_ip_annotation]
                if node_name == longhorn_test_node_name:
                    continue
                else:
                    node = cloudprovider.instance_id_by_ip(node_ip)
                    break
        else:
            if node_worker_label in node_item.metadata.labels and \
                    node_item.metadata.labels[node_worker_label] == 'true':
                node_name = node_item.metadata.name
                if node_name == longhorn_test_node_name:
                    continue
                else:
                    node = cloudprovider.instance_id(node_name)
                    break

    print(f'==> stop node: {node_name}')

    cloudprovider.instance_stop(node)
    wait_for_node_down_aws(cloudprovider, node)
    k8s_node_down = wait_for_node_down_k8s(node_name, k8s_api_client)

    assert k8s_node_down

    longhorn_api_client = get_longhorn_api_client()
    longhorn_node_down = wait_for_node_down_longhorn(node_name,
                                                     longhorn_api_client)

    assert longhorn_node_down
    def finalizer():
        node_worker_label = 'node-role.kubernetes.io/worker'

        k8s_api_client = get_core_api_client()
        longhorn_api_client = get_longhorn_api_client()
        cloudprovider = detect_cloudprovider()

        for node_item in k8s_api_client.list_node().items:
            if node_worker_label in node_item.metadata.labels and \
                    node_item.metadata.labels[node_worker_label] == 'true':
                node_name = node_item.metadata.name

                if is_node_ready_k8s(node_name, k8s_api_client) is False:
                    node = cloudprovider.node_id(node_name)

                    cloudprovider.node_start(node)

                    node_up_k8s = wait_for_node_up_k8s(node_name,
                                                       k8s_api_client)

                    assert node_up_k8s

                else:
                    continue

                node_up_longhorn =\
                    wait_for_node_up_longhorn(node_name,
                                              longhorn_api_client)

                assert node_up_longhorn
Exemple #5
0
def reset_disk_settings():
    yield
    api = get_longhorn_api_client()
    setting = api.by_id_setting(CREATE_DEFAULT_DISK_SETTING)
    api.update(setting, value="false")
    setting = api.by_id_setting(DEFAULT_DATA_PATH_SETTING)
    api.update(setting, value=DEFAULT_DISK_PATH)
Exemple #6
0
def test_offline_node(reset_cluster_ready_status):
    node_worker_label = 'node-role.kubernetes.io/worker'
    pod_lable_selector = "longhorn-test=test-job"

    k8s_api_client = get_core_api_client()
    longhorn_api_client = get_longhorn_api_client()
    cloudprovider = detect_cloudprovider()

    for pod in k8s_api_client.list_namespaced_pod(
            'default', label_selector=pod_lable_selector).items:
        if pod.metadata.name == "longhorn-test":
            longhorn_test_node_name = pod.spec.node_name

    for node_item in k8s_api_client.list_node().items:
        if node_worker_label in node_item.metadata.labels and \
                node_item.metadata.labels[node_worker_label] == 'true':
            node_name = node_item.metadata.name
            if node_name == longhorn_test_node_name:
                continue
            else:
                break

    node = cloudprovider.node_id(node_name)

    cloudprovider.node_shutdown(node)

    k8s_node_down = wait_for_node_down_k8s(node_name, k8s_api_client)

    assert k8s_node_down

    longhorn_node_down = wait_for_node_down_longhorn(node_name,
                                                     longhorn_api_client)

    assert longhorn_node_down
Exemple #7
0
def check_workload_update(core_api, apps_api, count):  # NOQA
    da_list = apps_api.list_namespaced_daemon_set(LONGHORN_NAMESPACE).items
    for da in da_list:
        if da.status.updated_number_scheduled != count:
            return False

    dp_list = apps_api.list_namespaced_deployment(LONGHORN_NAMESPACE).items
    for dp in dp_list:
        if dp.status.updated_replicas != dp.spec.replicas:
            return False

    im_pod_list = core_api.list_namespaced_pod(
        LONGHORN_NAMESPACE,
        label_selector="longhorn.io/component=instance-manager").items
    if len(im_pod_list) != 2 * count:
        return False

    for p in im_pod_list:
        if p.status.phase != "Running":
            return False

    client = get_longhorn_api_client()  # NOQA
    images = client.list_engine_image()
    assert len(images) == 1
    ei_state = get_engine_image_status_value(client, images[0].name)
    if images[0].state != ei_state:
        return False

    return True
Exemple #8
0
def wait_for_toleration_update(core_api, apps_api, count,
                               set_tolerations):  # NOQA
    updated = False

    for i in range(RETRY_COUNTS):
        time.sleep(RETRY_INTERVAL_LONG)
        updated = True

        da_list = apps_api.list_namespaced_daemon_set(LONGHORN_NAMESPACE).items
        for da in da_list:
            if da.status.updated_number_scheduled != count:
                updated = False
                break
        if not updated:
            continue

        dp_list = apps_api.list_namespaced_deployment(LONGHORN_NAMESPACE).items
        for dp in dp_list:
            if dp.status.updated_replicas != dp.spec.replicas:
                updated = False
                break
        if not updated:
            continue

        im_pod_list = core_api.list_namespaced_pod(
            LONGHORN_NAMESPACE,
            label_selector="longhorn.io/component=instance-manager").items
        if len(im_pod_list) != 2 * count:
            updated = False
            continue

        for p in im_pod_list:
            if p.status.phase != "Running":
                updated = False
                break
        if not updated:
            continue

        pod_list = core_api.list_namespaced_pod(LONGHORN_NAMESPACE).items
        for p in pod_list:
            if p.status.phase != "Running" or \
                    not check_tolerations_set(p.spec.tolerations,
                                              set_tolerations):
                updated = False
                break
        if not updated:
            continue

        client = get_longhorn_api_client()  # NOQA
        images = client.list_engine_image()
        assert len(images) == 1
        if images[0].state != "ready":
            updated = False
            continue

        if updated:
            break

    assert updated
Exemple #9
0
def reset_settings():
    yield
    client = get_longhorn_api_client()  # NOQA
    host_id = get_self_host_id()
    node = client.by_id_node(host_id)
    client.update(node, allowScheduling=True)
    setting = client.by_id_setting(SETTING_REPLICA_NODE_SOFT_ANTI_AFFINITY)
    client.update(setting, value="true")
def test_offline_node(reset_cluster_ready_status):
    """
    Test offline node

    1. Bring down one of the nodes in Kuberntes cluster (avoid current node)
    2. Make sure the Longhorn node state become `down`
    """
    node_worker_label = 'node-role.kubernetes.io/worker'
    pod_lable_selector = "longhorn-test=test-job"

    k8s_api_client = get_core_api_client()
    longhorn_api_client = get_longhorn_api_client()
    cloudprovider = detect_cloudprovider()

    for pod in k8s_api_client.list_namespaced_pod(
            'default', label_selector=pod_lable_selector).items:
        if pod.metadata.name == "longhorn-test":
            longhorn_test_node_name = pod.spec.node_name

    for node_item in k8s_api_client.list_node().items:
        if node_worker_label in node_item.metadata.labels and \
                node_item.metadata.labels[node_worker_label] == 'true':
            node_name = node_item.metadata.name
            if node_name == longhorn_test_node_name:
                continue
            else:
                break

    node = cloudprovider.instance_id(node_name)

    cloudprovider.instance_stop(node)

    k8s_node_down = wait_for_node_down_k8s(node_name, k8s_api_client)

    assert k8s_node_down

    longhorn_api_client = get_longhorn_api_client()
    longhorn_node_down = wait_for_node_down_longhorn(node_name,
                                                     longhorn_api_client)

    assert longhorn_node_down
Exemple #11
0
def wait_for_longhorn_node_ready():
    client = get_longhorn_api_client()  # NOQA

    ei = get_default_engine_image(client)
    ei_name = ei["name"]
    ei_state = get_engine_image_status_value(client, ei_name)
    wait_for_engine_image_state(client, ei_name, ei_state)

    node = get_self_host_id()
    wait_for_node_up_longhorn(node, client)

    return client, node
Exemple #12
0
def reset_default_disk_label():
    yield
    k8sapi = get_core_api_client()
    lhapi = get_longhorn_api_client()
    nodes = lhapi.list_node()
    for node in nodes:
        k8sapi.patch_node(
            node["id"],
            {"metadata": {
                "labels": {
                    CREATE_DEFAULT_DISK_LABEL: None
                }
            }})
Exemple #13
0
def reset_cluster_ready_status(request):
    yield
    node_worker_label = 'node-role.kubernetes.io/worker'
    node_controlplane_label = 'node-role.kubernetes.io/control-plane'
    node_ip_annotation = "flannel.alpha.coreos.com/public-ip"

    k8s_api_client = get_core_api_client()
    longhorn_api_client = get_longhorn_api_client()
    cloudprovider = detect_cloudprovider()

    k3s = is_infra_k3s()

    print('==> test completed! reset cluster ready status ...')

    for node_item in k8s_api_client.list_node().items:

        if k3s is True:
            if node_controlplane_label not in node_item.metadata.labels:
                node_name = node_item.metadata.name
                node_ip = node_item.metadata.annotations[node_ip_annotation]
                node = cloudprovider.instance_id_by_ip(node_ip)
            else:
                continue

        else:
            if node_worker_label in node_item.metadata.labels and \
                    node_item.metadata.labels[node_worker_label] == 'true':
                node_name = node_item.metadata.name
                node = cloudprovider.instance_id(node_name)
            else:
                continue

        if is_node_ready_k8s(node_name, k8s_api_client) is False:

            cloudprovider.instance_start(node)
            print(f'==> wait for aws node {node_name} up ...')
            aws_node_up = wait_for_node_up_aws(cloudprovider, node)
            assert aws_node_up, f'expect aws node {node_name} up'
            node_up_k8s = wait_for_node_up_k8s(node_name, k8s_api_client)

            assert node_up_k8s

        else:
            continue

        node_up_longhorn =\
            wait_for_node_up_longhorn(node_name, longhorn_api_client)

        assert node_up_longhorn
def test_reset_env():
    k8s_api_client = get_core_api_client()
    k8s_storage_client = get_storage_api_client()
    longhorn_api_client = get_longhorn_api_client()

    pod_list = k8s_api_client.list_namespaced_pod("default")
    for pod in pod_list.items:
        if STRESS_POD_NAME_PREFIX in pod.metadata.name:
            delete_and_wait_pod(k8s_api_client, pod.metadata.name)

    pvc_list = \
        k8s_api_client.list_namespaced_persistent_volume_claim("default")
    for pvc in pvc_list.items:
        if STRESS_PVC_NAME_PREFIX in pvc.metadata.name:
            delete_and_wait_pvc(k8s_api_client, pvc.metadata.name)

    pv_list = k8s_api_client.list_persistent_volume()
    for pv in pv_list.items:
        pv_name = pv.metadata.name
        if STRESS_PV_NAME_PREFIX in pv_name:
            try:
                delete_and_wait_pv(k8s_api_client, pv_name)
            except AssertionError:
                volumeattachment_list = \
                    k8s_storage_client.list_volume_attachment()
                for volumeattachment in volumeattachment_list.items:
                    volume_attachment_name = \
                        volumeattachment.spec.source.persistent_volume_name
                    if volume_attachment_name == pv_name:
                        delete_and_wait_volume_attachment(
                            k8s_storage_client,
                            volume_attachment_name
                        )
                        delete_and_wait_pv(k8s_api_client, pv.metadata.name)

    volume_list = \
        longhorn_api_client.list_volume()
    for volume in volume_list.data:
        if STRESS_VOLUME_NAME_PREFIX in volume.name:
            delete_and_wait_longhorn(longhorn_api_client, volume.name)
Exemple #15
0
def test_setting_toleration_extra(core_api, apps_api):  # NOQA
    """
    Steps:
    1. Set Kubernetes Taint Toleration to:
       `ex.com/foobar:NoExecute;ex.com/foobar:NoSchedule`.
    2. Verify that all system components have the 2 tolerations
       `ex.com/foobar:NoExecute; ex.com/foobar:NoSchedule`.
       Verify that UI, manager, and drive deployer don't restart and
       don't have toleration.
    3. Set Kubernetes Taint Toleration to:
       `node-role.kubernetes.io/controlplane=true:NoSchedule`.
    4. Verify that all system components have the the toleration
       `node-role.kubernetes.io/controlplane=true:NoSchedule`,
       and don't have the 2 tolerations
       `ex.com/foobar:NoExecute;ex.com/foobar:NoSchedule`.
       Verify that UI, manager, and drive deployer don't restart and
       don't have toleration.
    5. Set Kubernetes Taint Toleration to special value:
       `:`.
    6. Verify that all system components have the toleration with
       `operator: Exists` and other field of the toleration are empty.
       Verify that all system components don't have the toleration
       `node-role.kubernetes.io/controlplane=true:NoSchedule`.
       Verify that UI, manager, and drive deployer don't restart and
       don't have toleration.
    7. Clear Kubernetes Taint Toleration

    Note: system components are workloads other than UI, manager, driver
    deployer
    """
    settings = [
        {
            "value":
            "ex.com/foobar:NoExecute;ex.com/foobar:NoSchedule",
            "expect": [
                {
                    "key": "ex.com/foobar",
                    "value": None,
                    "operator": "Exists",
                    "effect": "NoExecute"
                },
                {
                    "key": "ex.com/foobar",
                    "value": None,
                    "operator": "Exists",
                    "effect": "NoSchedule"
                },
            ],
        },
        {
            "value":
            "node-role.kubernetes.io/controlplane=true:NoSchedule",
            "expect": [
                {
                    "key": "node-role.kubernetes.io/controlplane",
                    "value": "true",
                    "operator": "Equal",
                    "effect": "NoSchedule"
                },
            ],
        },
        # Skip the this special toleration for now because it makes
        # Longhorn deploy manager pods on control/etcd nodes
        # and the control/etcd nodes become "down" after the test
        # clear this toleration.
        # We will enable this test once we implement logic for
        # deleting failed nodes.
        # {
        #     "value": ":",
        #     "expect": [
        #         {
        #             "key": None,
        #             "value": None,
        #             "operator": "Exists",
        #             "effect": None,
        #         },
        #     ]
        # },
        {
            "value": "",
            "expect": [],
        },
    ]

    chk_removed_tolerations = []
    for setting in settings:
        client = get_longhorn_api_client()  # NOQA
        taint_toleration = client.by_id_setting(SETTING_TAINT_TOLERATION)
        updated = client.update(taint_toleration, value=setting["value"])
        assert updated.value == setting["value"]

        node_count = len(client.list_node())
        wait_for_toleration_update(core_api, apps_api, node_count,
                                   setting["expect"], chk_removed_tolerations)
        chk_removed_tolerations = setting["expect"]
Exemple #16
0
def test_offline_node_with_attached_volume_and_pod(
        client, core_api, volume_name, make_deployment_with_pvc,
        reset_cluster_ready_status):  # NOQA
    """
    Test offline node with attached volume and pod

    1. Create PV/PVC/Deployment manifest.
    2. Update deployment's tolerations to 20 seconds to speed up test
    3. Update deployment's node affinity rule to avoid the current node
    4. Create volume, PV/PVC and deployment.
    5. Find the pod in the deployment and write `test_data` into it
    6. Shutdown the node pod is running on
    7. Wait for deployment to delete the pod
        1. Deployment cannot delete the pod here because kubelet doesn't
        response
    8. Force delete the terminating pod
    9. Wait for the new pod to be created and the volume attached
    10. Check `test_data` in the new pod
    """
    toleration_seconds = 20

    apps_api = get_apps_api_client()
    cloudprovider = detect_cloudprovider()

    volume_name = generate_volume_name()
    pv_name = volume_name + "-pv"
    pvc_name = volume_name + "-pvc"
    deployment_name = volume_name + "-dep"

    longhorn_test_node_name = get_self_host_id()

    deployment_manifest = make_deployment_with_pvc(deployment_name, pvc_name)

    unreachable_toleration = {
        "key": "node.kubernetes.io/unreachable",
        "operator": "Exists",
        "effect": "NoExecute",
        "tolerationSeconds": toleration_seconds
    }

    not_ready_toleration = {
        "key": "node.kubernetes.io/not-ready",
        "operator": "Exists",
        "effect": "NoExecute",
        "tolerationSeconds": toleration_seconds
    }

    deployment_manifest["spec"]["template"]["spec"]["tolerations"] =\
        [unreachable_toleration, not_ready_toleration]

    node_affinity_roles = {
        "nodeAffinity": {
            "requiredDuringSchedulingIgnoredDuringExecution": {
                "nodeSelectorTerms": [{
                    "matchExpressions": [{
                        "key": "kubernetes.io/hostname",
                        "operator": "NotIn",
                        "values": [longhorn_test_node_name]
                    }]
                }]
            }
        }
    }

    deployment_manifest["spec"]["template"]["spec"]["affinity"] =\
        node_affinity_roles

    longhorn_volume = create_and_check_volume(client, volume_name, size=SIZE)

    wait_for_volume_detached(client, volume_name)

    create_pv_for_volume(client, core_api, longhorn_volume, pv_name)

    create_pvc_for_volume(client, core_api, longhorn_volume, pvc_name)

    create_and_wait_deployment(apps_api, deployment_manifest)

    deployment_label_selector =\
        "name=" + deployment_manifest["metadata"]["labels"]["name"]

    deployment_pod_list =\
        core_api.list_namespaced_pod(namespace="default",
                                     label_selector=deployment_label_selector)

    assert deployment_pod_list.items.__len__() == 1

    pod_name = deployment_pod_list.items[0].metadata.name

    test_data = generate_random_data(VOLUME_RWTEST_SIZE)

    write_pod_volume_data(core_api, pod_name, test_data)

    node_name = deployment_pod_list.items[0].spec.node_name
    node = cloudprovider.node_id(node_name)

    cloudprovider.node_shutdown(node)

    k8s_node_down = wait_for_node_down_k8s(node_name, core_api)

    assert k8s_node_down

    client = get_longhorn_api_client()

    longhorn_node_down = wait_for_node_down_longhorn(node_name, client)
    assert longhorn_node_down

    time.sleep(toleration_seconds + 5)

    for i in range(TERMINATING_POD_RETRYS):
        deployment_pod_list =\
            core_api.list_namespaced_pod(
                namespace="default",
                label_selector=deployment_label_selector
            )

        terminating_pod_name = None
        for pod in deployment_pod_list.items:
            if pod.metadata.__getattribute__("deletion_timestamp") is not None:
                terminating_pod_name = pod.metadata.name
                break

        if terminating_pod_name is not None:
            break
        else:
            time.sleep(TERMINATING_POD_INTERVAL)

    assert terminating_pod_name is not None

    core_api.delete_namespaced_pod(namespace="default",
                                   name=terminating_pod_name,
                                   grace_period_seconds=0)

    delete_and_wait_pod(core_api, terminating_pod_name)

    deployment_pod_list =\
        core_api.list_namespaced_pod(
            namespace="default",
            label_selector=deployment_label_selector
        )

    assert deployment_pod_list.items.__len__() == 1

    wait_for_volume_detached(client, volume_name)
    wait_for_volume_healthy(client, volume_name)

    deployment_pod_list =\
        core_api.list_namespaced_pod(
            namespace="default",
            label_selector=deployment_label_selector
        )

    assert deployment_pod_list.items.__len__() == 1

    new_pod_name = deployment_pod_list.items[0].metadata.name

    wait_pod(new_pod_name)

    resp_data = read_volume_data(core_api, new_pod_name)

    assert test_data == resp_data
def minio_get_backup_volume_prefix(volume_name):
    client = get_longhorn_api_client()
    backupstore_bv_path = backup_volume_path(volume_name)
    backupstore_path = minio_get_backupstore_path(client)
    return backupstore_path + backupstore_bv_path
Exemple #18
0
def test_setting_toleration():
    """
    Test toleration setting

    1. Verify that cannot use Kubernetes tolerations for Longhorn setting
    2. Use "key1=value1:NoSchedule; key2:NoExecute" as toleration.
    3. Create a volume and attach it.
    4. Verify that cannot update toleration setting when any volume is attached
    5. Generate and write `data1` into the volume
    6. Detach the volume.
    7. Update setting `toleration` to toleration.
    8. Wait for all the Longhorn components to restart with new toleration
    9. Attach the volume again and verify the volume `data1`.
    10. Generate and write `data2` to the volume.
    11. Detach the volume.
    12. Clean the `toleration` setting.
    13. Wait for all the Longhorn components to restart with no toleration
    14. Attach the volume and validate `data2`.
    15. Generate and write `data3` to the volume.
    """
    client = get_longhorn_api_client()  # NOQA
    apps_api = get_apps_api_client()  # NOQA
    core_api = get_core_api_client()  # NOQA
    count = len(client.list_node())

    setting = client.by_id_setting(SETTING_TAINT_TOLERATION)

    with pytest.raises(Exception) as e:
        client.update(setting,
                      value=KUBERNETES_DEFAULT_TOLERATION + ":NoSchedule")
    assert "is considered as the key of Kubernetes default tolerations" \
           in str(e.value)
    with pytest.raises(Exception) as e:
        client.update(setting,
                      value="key1=value1:NoSchedule; key2:InvalidEffect")
    assert 'invalid effect' in str(e.value)

    setting_value_str = "key1=value1:NoSchedule; key2:NoExecute"
    setting_value_dict = \
        {"key1": {"key": "key1", "value": "value1",
                  "operator": "Equal", "effect": "NoSchedule"},
         "key2": {"key": "key2", "value": None,
                  "operator": "Exists", "effect": "NoExecute"}, }

    volume_name = "test-toleration-vol"  # NOQA
    volume = create_and_check_volume(client, volume_name)
    volume.attach(hostId=get_self_host_id())
    volume = wait_for_volume_healthy(client, volume_name)
    with pytest.raises(Exception) as e:
        client.update(setting, value=setting_value_str)
    assert 'cannot modify toleration setting before all volumes are detached' \
           in str(e.value)

    data1 = write_volume_random_data(volume)
    check_volume_data(volume, data1)

    volume.detach()
    wait_for_volume_detached(client, volume_name)

    setting = client.update(setting, value=setting_value_str)
    assert setting.value == setting_value_str
    wait_for_toleration_update(core_api, apps_api, count, setting_value_dict)

    client, node = wait_for_longhorn_node_ready()

    volume = client.by_id_volume(volume_name)
    volume.attach(hostId=node)
    volume = wait_for_volume_healthy(client, volume_name)
    check_volume_data(volume, data1)
    data2 = write_volume_random_data(volume)
    check_volume_data(volume, data2)
    volume.detach()
    wait_for_volume_detached(client, volume_name)

    # cleanup
    setting_value_str = ""
    setting_value_dict = {}
    setting = client.by_id_setting(SETTING_TAINT_TOLERATION)
    setting = client.update(setting, value=setting_value_str)
    assert setting.value == setting_value_str
    wait_for_toleration_update(core_api, apps_api, count, setting_value_dict)

    client, node = wait_for_longhorn_node_ready()

    volume = client.by_id_volume(volume_name)
    volume.attach(hostId=node)
    volume = wait_for_volume_healthy(client, volume_name)
    check_volume_data(volume, data2)
    data3 = write_volume_random_data(volume)
    check_volume_data(volume, data3)

    cleanup_volume(client, volume)
Exemple #19
0
def pytest_collection_modifyitems(config, items):
    c = Configuration()
    c.assert_hostname = False
    Configuration.set_default(c)
    k8sconfig.load_incluster_config()
    core_api = k8sclient.CoreV1Api()

    check_longhorn(core_api)

    include_base_image = config.getoption(INCLUDE_BASE_IMAGE_OPT)
    if not include_base_image:
        skip_base_image = pytest.mark.skip(reason="set " +
                                                  INCLUDE_BASE_IMAGE_OPT +
                                                  " option to run")
        for item in items:
            if "baseimage" in item.keywords:
                item.add_marker(skip_base_image)

    if config.getoption(SKIP_RECURRING_JOB_OPT):
        skip_upgrade = pytest.mark.skip(reason="remove " +
                                               SKIP_RECURRING_JOB_OPT +
                                               " option to run")
        for item in items:
            if "recurring_job" in item.keywords:
                item.add_marker(skip_upgrade)

    csi_expansion_enabled = check_csi_expansion(core_api)
    if not csi_expansion_enabled:
        skip_csi_expansion = pytest.mark.skip(reason="environment is not " +
                                                     "using csi expansion")
        for item in items:
            if "csi_expansion" in item.keywords:
                item.add_marker(skip_csi_expansion)

    all_nodes_support_mount_propagation = True
    for node in get_longhorn_api_client().list_node():
        node = wait_for_node_mountpropagation_condition(
            get_longhorn_api_client(), node.name)
        if "conditions" not in node.keys():
            all_nodes_support_mount_propagation = False
        else:
            conditions = node.conditions
            for key, condition in conditions.items():
                if key == NODE_CONDITION_MOUNTPROPAGATION and \
                        condition.status != CONDITION_STATUS_TRUE:
                    all_nodes_support_mount_propagation = False
                    break
        if not all_nodes_support_mount_propagation:
            break

    if not all_nodes_support_mount_propagation:
        skip_upgrade = pytest.mark.skip(reason="environment does not " +
                                               "support base image")
        skip_node = pytest.mark.skip(reason="environment does not " +
                                            "support mount disk")

        for item in items:
            # Don't need to add skip marker for Base Image twice.
            if include_base_image and "baseimage" in item.keywords:
                item.add_marker(skip_upgrade)
            elif "mountdisk" in item.keywords:
                item.add_marker(skip_node)

    if not config.getoption(INCLUDE_INFRA_OPT):
        skip_infra = pytest.mark.skip(reason="include " +
                                      INCLUDE_INFRA_OPT +
                                      " option to run")

        for item in items:
            if "infra" in item.keywords:
                item.add_marker(skip_infra)

    if not config.getoption(INCLUDE_STRESS_OPT):
        skip_stress = pytest.mark.skip(reason="include " +
                                       INCLUDE_STRESS_OPT +
                                       " option to run")

        for item in items:
            if "stress" in item.keywords:
                item.add_marker(skip_stress)

    if not config.getoption(INCLUDE_UPGRADE_OPT):
        skip_upgrade = pytest.mark.skip(reason="include " +
                                        INCLUDE_UPGRADE_OPT +
                                        " option to run")

        for item in items:
            if "upgrade" in item.keywords:
                item.add_marker(skip_upgrade)
Exemple #20
0
def test_setting_priority_class(core_api, apps_api, scheduling_api,
                                priority_class, volume_name):  # NOQA
    """
    Test that the Priority Class setting is validated and utilized correctly.

    1. Verify that the name of a non-existent Priority Class cannot be used
    for the Setting.
    2. Create a new Priority Class in Kubernetes.
    3. Create and attach a Volume.
    4. Verify that the Priority Class Setting cannot be updated with an
    attached Volume.
    5. Generate and write `data1`.
    6. Detach the Volume.
    7. Update the Priority Class Setting to the new Priority Class.
    8. Wait for all the Longhorn system components to restart with the new
       Priority Class.
    9. Verify that UI, manager, and drive deployer don't have Priority Class
    10. Attach the Volume and verify `data1`.
    11. Generate and write `data2`.
    12. Unset the Priority Class Setting.
    13. Wait for all the Longhorn system components to restart with the new
        Priority Class.
    14. Verify that UI, manager, and drive deployer don't have Priority Class
    15. Attach the Volume and verify `data2`.
    16. Generate and write `data3`.

    Note: system components are workloads other than UI, manager, driver
     deployer
    """
    client = get_longhorn_api_client()  # NOQA
    count = len(client.list_node())
    name = priority_class['metadata']['name']
    setting = client.by_id_setting(SETTING_PRIORITY_CLASS)

    with pytest.raises(Exception) as e:
        client.update(setting, value=name)
    assert 'failed to get priority class ' in str(e.value)

    scheduling_api.create_priority_class(priority_class)

    volume = create_and_check_volume(client, volume_name)
    volume.attach(hostId=get_self_host_id())
    volume = wait_for_volume_healthy(client, volume_name)

    with pytest.raises(Exception) as e:
        client.update(setting, value=name)
    assert 'cannot modify priority class setting before all volumes are ' \
           'detached' in str(e.value)

    data1 = write_volume_random_data(volume)
    check_volume_data(volume, data1)

    volume.detach(hostId="")
    wait_for_volume_detached(client, volume_name)

    setting = client.update(setting, value=name)
    assert setting.value == name

    wait_for_priority_class_update(core_api, apps_api, count, priority_class)

    client, node = wait_for_longhorn_node_ready()

    volume = client.by_id_volume(volume_name)
    volume.attach(hostId=node)
    volume = wait_for_volume_healthy(client, volume_name)
    check_volume_data(volume, data1)
    data2 = write_volume_random_data(volume)
    check_volume_data(volume, data2)
    volume.detach(hostId="")
    wait_for_volume_detached(client, volume_name)

    setting = client.by_id_setting(SETTING_PRIORITY_CLASS)
    setting = client.update(setting, value='')
    assert setting.value == ''
    wait_for_priority_class_update(core_api, apps_api, count)

    client, node = wait_for_longhorn_node_ready()

    volume = client.by_id_volume(volume_name)
    volume.attach(hostId=node)
    volume = wait_for_volume_healthy(client, volume_name)
    check_volume_data(volume, data2)
    data3 = write_volume_random_data(volume)
    check_volume_data(volume, data3)

    cleanup_volume(client, volume)
Exemple #21
0
def test_upgrade(upgrade_image_tag, settings_reset, volume_name, pod_make, statefulset, storage_class):  # NOQA
    """
    Test Longhorn upgrade

    Prerequisite:
      - Disable Auto Salvage Setting

    1. Find the upgrade image tag
    2. Create a volume, generate and write data into the volume.
    3. Create a Pod using a volume, generate and write data
    4. Create a StatefulSet with 2 replicas,
       generate and write data to their volumes
    5. Keep all volumes attached
    6. Upgrade Longhorn system.
    7. Check Pod and StatefulSet didn't restart after upgrade
    8. Check All volumes data
    9. Write data to StatefulSet pods, and Attached volume
    10. Check data written to StatefulSet pods, and attached volume.
    11. Detach the volume, and Delete Pod, and
        StatefulSet to detach theirvolumes
    12. Upgrade all volumes engine images.
    13. Attach the volume, and recreate Pod, and StatefulSet
    14. Check All volumes data
    """
    new_ei_name = "longhornio/longhorn-engine:" + upgrade_image_tag

    client = get_longhorn_api_client()
    core_api = get_core_api_client()
    host_id = get_self_host_id()
    pod_data_path = "/data/test"

    pod_volume_name = generate_volume_name()

    auto_salvage_setting = client.by_id_setting(SETTING_AUTO_SALVAGE)
    setting = client.update(auto_salvage_setting, value="false")

    assert setting.name == SETTING_AUTO_SALVAGE
    assert setting.value == "false"

    # Create Volume attached to a node.
    volume1 = create_and_check_volume(client,
                                      volume_name,
                                      size=SIZE)
    volume1.attach(hostId=host_id)
    volume1 = wait_for_volume_healthy(client, volume_name)
    volume1_data = write_volume_random_data(volume1)

    # Create Volume used by Pod
    pod_name, pv_name, pvc_name, pod_md5sum = \
        prepare_pod_with_data_in_mb(client, core_api,
                                    pod_make, pod_volume_name,
                                    data_path=pod_data_path,
                                    add_liveness_prope=False)

    # Create multiple volumes used by StatefulSet
    statefulset_name = 'statefulset-upgrade-test'
    update_statefulset_manifests(statefulset,
                                 storage_class,
                                 statefulset_name)
    create_storage_class(storage_class)
    create_and_wait_statefulset(statefulset)
    statefulset_pod_info = get_statefulset_pod_info(core_api, statefulset)

    for sspod_info in statefulset_pod_info:
        sspod_info['data'] = generate_random_data(VOLUME_RWTEST_SIZE)
        write_pod_volume_data(core_api,
                              sspod_info['pod_name'],
                              sspod_info['data'])

    # upgrade Longhorn
    assert longhorn_upgrade(upgrade_image_tag)

    client = get_longhorn_api_client()

    # wait for 1 minute before checking pod restarts
    time.sleep(60)

    pod = core_api.read_namespaced_pod(name=pod_name,
                                       namespace='default')
    assert pod.status.container_statuses[0].restart_count == 0

    for sspod_info in statefulset_pod_info:
        sspod = core_api.read_namespaced_pod(name=sspod_info['pod_name'],
                                             namespace='default')
        assert \
            sspod.status.container_statuses[0].restart_count == 0

    for sspod_info in statefulset_pod_info:
        resp = read_volume_data(core_api, sspod_info['pod_name'])
        assert resp == sspod_info['data']

    res_pod_md5sum = get_pod_data_md5sum(core_api, pod_name, pod_data_path)
    assert res_pod_md5sum == pod_md5sum

    check_volume_data(volume1, volume1_data)

    for sspod_info in statefulset_pod_info:
        sspod_info['data'] = generate_random_data(VOLUME_RWTEST_SIZE)
        write_pod_volume_data(core_api,
                              sspod_info['pod_name'],
                              sspod_info['data'])

    for sspod_info in statefulset_pod_info:
        resp = read_volume_data(core_api, sspod_info['pod_name'])
        assert resp == sspod_info['data']

    volume1 = client.by_id_volume(volume_name)
    volume1_data = write_volume_random_data(volume1)
    check_volume_data(volume1, volume1_data)

    statefulset['spec']['replicas'] = replicas = 0
    apps_api = get_apps_api_client()

    apps_api.patch_namespaced_stateful_set(
        name=statefulset_name,
        namespace='default',
        body={
            'spec': {
                'replicas': replicas
            }
        })

    delete_and_wait_pod(core_api, pod_name)

    volume = client.by_id_volume(volume_name)
    volume.detach()

    volumes = client.list_volume()

    for v in volumes:
        wait_for_volume_detached(client, v.name)

    engineimages = client.list_engine_image()

    for ei in engineimages:
        if ei.image == new_ei_name:
            new_ei = ei

    volumes = client.list_volume()

    for v in volumes:
        volume = client.by_id_volume(v.name)
        volume.engineUpgrade(image=new_ei.image)

    statefulset['spec']['replicas'] = replicas = 2
    apps_api = get_apps_api_client()

    apps_api.patch_namespaced_stateful_set(
        name=statefulset_name,
        namespace='default',
        body={
            'spec': {
                'replicas': replicas
            }
        })

    wait_statefulset(statefulset)

    pod = pod_make(name=pod_name)
    pod['spec']['volumes'] = [create_pvc_spec(pvc_name)]
    create_and_wait_pod(core_api, pod)

    volume1 = client.by_id_volume(volume_name)
    volume1.attach(hostId=host_id)
    volume1 = wait_for_volume_healthy(client, volume_name)

    for sspod_info in statefulset_pod_info:
        resp = read_volume_data(core_api, sspod_info['pod_name'])
        assert resp == sspod_info['data']

    res_pod_md5sum = get_pod_data_md5sum(core_api, pod_name, pod_data_path)
    assert res_pod_md5sum == pod_md5sum

    check_volume_data(volume1, volume1_data)
Exemple #22
0
def test_setting_toleration():
    """
    Test toleration setting

    1.  Set `taint-toleration` to "key1=value1:NoSchedule; key2:InvalidEffect".
    2.  Verify the request fails.
    3.  Create a volume and attach it.
    4.  Set `taint-toleration` to "key1=value1:NoSchedule; key2:NoExecute".
    5.  Verify that cannot update toleration setting when any volume is
        attached.
    6.  Generate and write `data1` into the volume.
    7.  Detach the volume.
    8.  Set `taint-toleration` to "key1=value1:NoSchedule; key2:NoExecute".
    9.  Wait for all the Longhorn system components to restart with new
        toleration.
    10. Verify that UI, manager, and drive deployer don't restart and
        don't have new toleration.
    11. Attach the volume again and verify the volume `data1`.
    12. Generate and write `data2` to the volume.
    13. Detach the volume.
    14. Clean the `toleration` setting.
    15. Wait for all the Longhorn system components to restart with no
        toleration.
    16. Attach the volume and validate `data2`.
    17. Generate and write `data3` to the volume.
    """
    client = get_longhorn_api_client()  # NOQA
    apps_api = get_apps_api_client()  # NOQA
    core_api = get_core_api_client()  # NOQA
    count = len(client.list_node())

    setting = client.by_id_setting(SETTING_TAINT_TOLERATION)

    with pytest.raises(Exception) as e:
        client.update(setting,
                      value="key1=value1:NoSchedule; key2:InvalidEffect")
    assert 'invalid effect' in str(e.value)

    volume_name = "test-toleration-vol"  # NOQA
    volume = create_and_check_volume(client, volume_name)
    volume.attach(hostId=get_self_host_id())
    volume = wait_for_volume_healthy(client, volume_name)

    setting_value_str = "key1=value1:NoSchedule; key2:NoExecute"
    setting_value_dicts = [
        {
            "key": "key1",
            "value": "value1",
            "operator": "Equal",
            "effect": "NoSchedule"
        },
        {
            "key": "key2",
            "value": None,
            "operator": "Exists",
            "effect": "NoExecute"
        },
    ]
    with pytest.raises(Exception) as e:
        client.update(setting, value=setting_value_str)
    assert 'cannot modify toleration setting before all volumes are detached' \
           in str(e.value)

    data1 = write_volume_random_data(volume)
    check_volume_data(volume, data1)

    volume.detach(hostId="")
    wait_for_volume_detached(client, volume_name)

    setting = client.update(setting, value=setting_value_str)
    assert setting.value == setting_value_str
    wait_for_toleration_update(core_api, apps_api, count, setting_value_dicts)

    client, node = wait_for_longhorn_node_ready()

    volume = client.by_id_volume(volume_name)
    volume.attach(hostId=node)
    volume = wait_for_volume_healthy(client, volume_name)
    check_volume_data(volume, data1)
    data2 = write_volume_random_data(volume)
    check_volume_data(volume, data2)
    volume.detach(hostId="")
    wait_for_volume_detached(client, volume_name)

    # cleanup
    setting_value_str = ""
    setting_value_dicts = []
    setting = client.by_id_setting(SETTING_TAINT_TOLERATION)
    setting = client.update(setting, value=setting_value_str)
    assert setting.value == setting_value_str
    wait_for_toleration_update(core_api, apps_api, count, setting_value_dicts)

    client, node = wait_for_longhorn_node_ready()

    volume = client.by_id_volume(volume_name)
    volume.attach(hostId=node)
    volume = wait_for_volume_healthy(client, volume_name)
    check_volume_data(volume, data2)
    data3 = write_volume_random_data(volume)
    check_volume_data(volume, data3)

    cleanup_volume(client, volume)
def generate_load(request):

    index = get_random_suffix()

    longhorn_api_client = get_longhorn_api_client()
    k8s_api_client = get_core_api_client()

    check_and_set_backupstore(longhorn_api_client)

    volume_name = STRESS_VOLUME_NAME_PREFIX + index
    pv_name = STRESS_PV_NAME_PREFIX + index
    pvc_name = STRESS_PVC_NAME_PREFIX + index
    pod_name = STRESS_POD_NAME_PREFIX + index

    atexit.register(remove_datafile, pod_name)
    atexit.register(delete_and_wait_longhorn, longhorn_api_client, volume_name)
    atexit.register(delete_and_wait_pv, k8s_api_client, pv_name)
    atexit.register(delete_and_wait_pvc, k8s_api_client, pvc_name)
    atexit.register(delete_and_wait_pod, k8s_api_client, pod_name)

    longhorn_volume = create_and_check_volume(longhorn_api_client,
                                              volume_name,
                                              size=VOLUME_SIZE)

    wait_for_volume_detached(longhorn_api_client, volume_name)

    pod_manifest = generate_pod_with_pvc_manifest(pod_name, pvc_name)

    create_pv_for_volume(longhorn_api_client, k8s_api_client, longhorn_volume,
                         pv_name)

    create_pvc_for_volume(longhorn_api_client, k8s_api_client, longhorn_volume,
                          pvc_name)

    create_and_wait_pod(k8s_api_client, pod_manifest)

    snapshots_md5sum = dict()

    write_data(k8s_api_client, pod_name)
    create_recurring_jobs(longhorn_api_client, volume_name)

    global N_RANDOM_ACTIONS
    for round in range(N_RANDOM_ACTIONS):
        action = randrange(0, 8)

        if action == 0:
            print("write data started: " + time_now(), end=', ')
            write_data(k8s_api_client, pod_name)
            print("ended: " + time_now())

        elif action == 1:
            print("delete data started: " + time_now(), end=', ')
            delete_data(k8s_api_client, pod_name)
            print("ended: " + time_now())

        elif action == 2:
            print("create snapshot started: " + time_now(), end=', ')
            snapshot_create_and_record_md5sum(longhorn_api_client,
                                              k8s_api_client, volume_name,
                                              pod_name, snapshots_md5sum)
            print("ended: " + time_now())

        elif action == 3:
            print("delete random snapshot  started: " + time_now(), end=', ')
            delete_random_snapshot(longhorn_api_client, volume_name,
                                   snapshots_md5sum)
            print("ended: " + time_now())

        elif action == 4:
            print("revert random snapshot started: " + time_now(), end=', ')
            revert_random_snapshot(longhorn_api_client, k8s_api_client,
                                   volume_name, pod_manifest, snapshots_md5sum)
            print("ended: " + time_now())

        elif action == 5:
            print("create backup started: " + time_now(), end=', ')
            backup_create_and_record_md5sum(longhorn_api_client,
                                            k8s_api_client, volume_name,
                                            pod_name, snapshots_md5sum)
            print("ended: " + time_now())

        elif action == 6:
            print("delete replica started: " + time_now(), end=', ')
            delete_replica(longhorn_api_client, volume_name)
            print("ended: " + time_now())

        elif action == 7:
            print("restore random backup started: " + time_now(), end=', ')
            restore_and_check_random_backup(longhorn_api_client,
                                            k8s_api_client, volume_name,
                                            pod_name, snapshots_md5sum)

            print("ended: " + time_now())

    clean_volume_backups(longhorn_api_client, volume_name)