Ejemplo n.º 1
0
def do_test() -> bool:
    kubeconfig_path = shared_test_code.get_kubeconfig_path()
    print("Loading k8s config: {}".format(kubeconfig_path))
    config.load_kube_config(config_file=kubeconfig_path)

    # Get kubectl command
    kubectl_cmd = shared_test_code.get_kubectl_command()

    # Ensure Helm Akri installation applied CRDs and set up Agent and Controller
    print("Checking for CRDs")
    if not shared_test_code.crds_applied():
        print("CRDs not applied by helm chart")
        return False

    print("Checking for initial Akri state")

    if not shared_test_code.check_akri_state(1, 1, 0, 0, 0, 0):
        print("Akri not running in expected state")
        run("sudo {kubectl} get pods,services,akric,akrii --show-labels".
            format(kubectl=kubectl_cmd))
        return False

    # Enumerate Webhook resources
    print("Debugging:")

    print("Deployment:")
    run("sudo {kubectl} describe deployment/{service}\
        --namespace={namespace}".format(kubectl=kubectl_cmd,
                                        service=WEBHOOK_NAME,
                                        namespace=NAMESPACE))

    print("ReplicaSet:")
    run("sudo {kubectl} describe replicaset \
        --selector=app={service} \
        --namespace={namespace}".format(kubectl=kubectl_cmd,
                                        service=WEBHOOK_NAME,
                                        namespace=NAMESPACE))

    print("Pod:")
    run("sudo {kubectl} describe pod \
        --selector=app={service} \
        --namespace={namespace}".format(kubectl=kubectl_cmd,
                                        service=WEBHOOK_NAME,
                                        namespace=NAMESPACE))

    # Apply Valid Akri Configuration
    print("Applying Valid Akri Configuration")

    # Use the template and place resources in the correct location
    body = TEMPLATE
    body["spec"]["brokerSpec"]["brokerPodSpec"]["containers"][0][
        "resources"] = RESOURCES

    api = client.CustomObjectsApi()
    api.create_namespaced_custom_object(group=GROUP,
                                        version=VERSION,
                                        namespace=NAMESPACE,
                                        plural="configurations",
                                        body=body)

    # Check
    print("Retrieving Akri Configuration")
    akri_config = api.get_namespaced_custom_object(group=GROUP,
                                                   version=VERSION,
                                                   name=NAME,
                                                   namespace=NAMESPACE,
                                                   plural="configurations")
    print(akri_config)

    # Delete
    api.delete_namespaced_custom_object(
        group=GROUP,
        version=VERSION,
        name=NAME,
        namespace=NAMESPACE,
        plural="configurations",
        body=client.V1DeleteOptions(),
    )

    # Apply Invalid Akri Configuration
    res = False
    try:
        print("Applying Invalid (!) Akri Configuration")

        # Use the template but(!) place resources in an incorrect location
        body = TEMPLATE
        body["spec"]["brokerSpec"]["brokerPodSpec"]["resources"] = RESOURCES

        api.create_namespaced_custom_object(group=GROUP,
                                            version=VERSION,
                                            namespace=NAMESPACE,
                                            plural="configurations",
                                            body=body)
    except ApiException as e:
        print(
            "As expected, Invalid Akri Configuration generates API Exception")
        print("Status Code: {} [{}]", e.status, e.reason)
        print("Response: {}".format(e.body))
        res = True
    else:
        print("Expected APIException but none was thrown. This is an error!")

        # Debugging: check the Webhook's logs
        print("Webhook logs")
        run("sudo {kubectl} logs deployment/{service} --namespace={namespace}".
            format(kubectl=kubectl_cmd,
                   service=WEBHOOK_NAME,
                   namespace=NAMESPACE))

        res = False

    # Save Webhook logs
    run("{kubectl} logs deployment/{service} --namespace={namespace} >> {file}"
        .format(kubectl=kubectl_cmd,
                service=WEBHOOK_NAME,
                namespace=NAMESPACE,
                file=WEBHOOK_LOG_PATH))

    print("Akri Validating Webhook test: {}".format(
        "Success" if res else "Failure"))
    return res
Ejemplo n.º 2
0
def do_test():
    kubeconfig_path = shared_test_code.get_kubeconfig_path()
    print("Loading k8s config: {}".format(kubeconfig_path))
    config.load_kube_config(config_file=kubeconfig_path)

    # Get kubectl command
    kubectl_cmd = shared_test_code.get_kubectl_command()

    # Ensure Helm Akri installation applied CRDs and set up agent and controller
    print("Checking for CRDs")
    if not shared_test_code.crds_applied():
        print("CRDs not applied by helm chart")
        return False

    print("Checking for initial Akri state")
    if not shared_test_code.check_akri_state(1, 1, 2, 2, 1, 2):
        print("Akri not running in expected state")
        os.system('sudo {} get pods,services,akric,akrii --show-labels'.format(
            kubectl_cmd))
        return False

    # Create kube client
    v1 = client.CoreV1Api()

    # Delete one of the broker pods
    broker_pod_selector = "{}={}".format(
        shared_test_code.CONFIGURATION_LABEL_NAME,
        shared_test_code.DEBUG_ECHO_NAME)
    brokers_info = shared_test_code.get_running_pod_names_and_uids(
        broker_pod_selector)
    if len(brokers_info) != 2:
        print("Expected to find 2 broker pods but found: {}",
              len(brokers_info))
        os.system('sudo {} get pods,services,akric,akrii --show-labels'.format(
            kubectl_cmd))
        return False

    # There is a possible race condition here between when the `kubectl delete pod` returns,
    # when check_broker_pod_state validates that the pod is gone, and when the check_akri_state
    # validates that the broker pod has been restarted

    broker_pod_name = sorted(brokers_info.keys())[0]
    delete_pod_command = 'sudo {} delete pod {}'.format(
        kubectl_cmd, broker_pod_name)
    print("Deleting broker pod: {}".format(delete_pod_command))
    os.system(delete_pod_command)

    # Wait for there to be 2 brokers pods again
    if not shared_test_code.check_broker_pods_state(v1, 2):
        print(
            "Akri not running in expected state after broker pod restoration should have happened"
        )
        os.system('sudo {} get pods,services,akric,akrii --show-labels'.format(
            kubectl_cmd))
        return False

    restored_brokers_info = shared_test_code.get_running_pod_names_and_uids(
        broker_pod_selector)
    if len(restored_brokers_info) != 2:
        print("Expected to find 2 broker pods but found: {}",
              len(restored_brokers_info))
        os.system('sudo {} get pods,services,akric,akrii --show-labels'.foramt(
            kubectl_cmd))
        return False

    # Make sure that the deleted broker uid is different from the restored broker pod uid ... signifying
    # that the Pod was restarted
    print("Restored broker pod uid should differ from original broker pod uid")
    if brokers_info[broker_pod_name] == restored_brokers_info[broker_pod_name]:
        print(
            "Restored broker pod uid [{}] should differ from original broker pod uid [{}]"
            .format(brokers_info[broker_pod_name],
                    restored_brokers_info[broker_pod_name]))
        os.system('sudo {} get pods,services,akric,akrii --show-labels'.format(
            kubectl_cmd))
        return False

    return True
Ejemplo n.º 3
0
def do_test():
    kubeconfig_path = shared_test_code.get_kubeconfig_path()
    print("Loading k8s config: {}".format(kubeconfig_path))
    config.load_kube_config(config_file=kubeconfig_path)

    # Get kubectl command
    kubectl_cmd = shared_test_code.get_kubectl_command()

    # Ensure Helm Akri installation applied CRDs and set up agent and controller
    print("Checking for CRDs")
    if not shared_test_code.crds_applied():
        print("CRDs not applied by helm chart")
        return False

    print("Checking for initial Akri state")
    if not shared_test_code.check_akri_state(1, 1, 2, 2, 1, 2):
        print("Akri not running in expected state")
        os.system('sudo {} get pods,services,akric,akrii --show-labels'.format(
            kubectl_cmd))
        return False

    # Do offline scenario
    print("Writing to Agent pod {} that device offline".format(
        shared_test_code.agent_pod_name))
    os.system(
        'sudo {} exec -i {} -- /bin/bash -c "echo "OFFLINE" > /tmp/debug-echo-availability.txt"'
        .format(kubectl_cmd, shared_test_code.agent_pod_name))

    print("Checking Akri state after taking device offline")
    if not shared_test_code.check_akri_state(1, 1, 0, 0, 0, 0):
        print("Akri not running in expected state after taking device offline")
        os.system('sudo {} get pods,services,akric,akrii --show-labels'.format(
            kubectl_cmd))
        return False

    # Do back online scenario
    print("Writing to Agent pod {} that device online".format(
        shared_test_code.agent_pod_name))
    os.system(
        'sudo {} exec -i {} -- /bin/bash -c "echo "ONLINE" > /tmp/debug-echo-availability.txt"'
        .format(kubectl_cmd, shared_test_code.agent_pod_name))

    print("Checking Akri state after bringing device back online")
    if not shared_test_code.check_akri_state(1, 1, 2, 2, 1, 2):
        print(
            "Akri not running in expected state after bringing device back online"
        )
        os.system('sudo {} get pods,services,akric,akrii --show-labels'.format(
            kubectl_cmd))
        return False

    # Check Akri slot reconiliation logs for success
    print("Check logs for Agent slot-reconciliation for pod {}".format(
        shared_test_code.agent_pod_name))
    result = os.system(
        'sudo {} logs {} | grep "get_node_slots - crictl called successfully" | wc -l | grep -v 0'
        .format(kubectl_cmd, shared_test_code.agent_pod_name))
    if result != 0:
        print(
            "Akri failed to successfully connect to crictl via the CRI socket")
        return False

    # Do cleanup scenario
    print("Deleting Akri configuration: {}".format(
        shared_test_code.DEBUG_ECHO_NAME))
    os.system("sudo {} delete akric {}".format(
        kubectl_cmd, shared_test_code.DEBUG_ECHO_NAME))

    print("Checking Akri state after deleting configuration")
    if not shared_test_code.check_akri_state(1, 1, 0, 0, 0, 0):
        print(
            "Akri not running in expected state after deleting configuration")
        os.system('sudo {} get pods,services,akric,akrii --show-labels'.format(
            kubectl_cmd))
        return False

    return True
Ejemplo n.º 4
0
def do_test():
    kubeconfig_path = shared_test_code.get_kubeconfig_path()
    print("Loading k8s config: {}".format(kubeconfig_path))
    config.load_kube_config(config_file=kubeconfig_path)

    # Get kubectl command
    kubectl_cmd = shared_test_code.get_kubectl_command()

    # Ensure Helm Akri installation applied CRDs and set up agent and controller
    print("Checking for CRDs")
    if not shared_test_code.crds_applied():
        print("CRDs not applied by helm chart")
        return False

    print("Checking for initial Akri state")
    if not shared_test_code.check_akri_state(1, 1, 2, 2, 1, 2):
        print("Akri not running in expected state")
        os.system('sudo {} get pods,services,akric,akrii --show-labels'.format(
            kubectl_cmd))
        return False

    #
    # Check agent responds to dynamic offline/online resource
    #
    print("Writing to Agent pod {} that device offline".format(
        shared_test_code.agent_pod_name))
    os.system(
        'sudo {} exec -i {} -- /bin/sh -c "echo "OFFLINE" > /tmp/debug-echo-availability.txt"'
        .format(kubectl_cmd, shared_test_code.agent_pod_name))

    print("Checking Akri state after taking device offline")
    if not shared_test_code.check_akri_state(1, 1, 0, 0, 0, 0):
        print("Akri not running in expected state after taking device offline")
        os.system('sudo {} get pods,services,akric,akrii --show-labels'.format(
            kubectl_cmd))
        return False

    # Do back online scenario
    print("Writing to Agent pod {} that device online".format(
        shared_test_code.agent_pod_name))
    os.system(
        'sudo {} exec -i {} -- /bin/sh -c "echo "ONLINE" > /tmp/debug-echo-availability.txt"'
        .format(kubectl_cmd, shared_test_code.agent_pod_name))

    print("Checking Akri state after bringing device back online")
    if not shared_test_code.check_akri_state(1, 1, 2, 2, 1, 2):
        print(
            "Akri not running in expected state after bringing device back online"
        )
        os.system('sudo {} get pods,services,akric,akrii --show-labels'.format(
            kubectl_cmd))
        return False

    #
    # Check that slot reconciliation is working on agent
    #
    print("Check logs for Agent slot-reconciliation for pod {}".format(
        shared_test_code.agent_pod_name))
    temporary_agent_log_path = "/tmp/agent_log.txt"
    for x in range(3):
        log_result = subprocess.run('sudo {} logs {} > {}'.format(
            kubectl_cmd, shared_test_code.agent_pod_name,
            temporary_agent_log_path),
                                    shell=True)
        if log_result.returncode == 0:
            print("Successfully stored Agent logs in {}".format(
                temporary_agent_log_path))
            break
        print(
            "Failed to get logs from {} pod with result {} on attempt {} of 3".
            format(shared_test_code.agent_pod_name, log_result, x))
        if x == 2:
            return False
    grep_result = subprocess.run([
        'grep', "get_node_slots - crictl called successfully",
        temporary_agent_log_path
    ])
    if grep_result.returncode != 0:
        print(
            "Akri failed to successfully connect to crictl via the CRI socket with return value of {}",
            grep_result)
        # Log information to understand why error occurred
        os.system('sudo {} get pods,services,akric,akrii --show-labels'.format(
            kubectl_cmd))
        os.system('grep get_node_slots {}'.format(temporary_agent_log_path))
        return False

    #
    # Check that broker is recreated if it is deleted
    #
    broker_pod_selector = "{}={}".format(
        shared_test_code.CONFIGURATION_LABEL_NAME,
        shared_test_code.DEBUG_ECHO_NAME)
    brokers_info = shared_test_code.get_running_pod_names_and_uids(
        broker_pod_selector)
    if len(brokers_info) != 2:
        print("Expected to find 2 broker pods but found: {}",
              len(brokers_info))
        os.system('sudo {} get pods,services,akric,akrii --show-labels'.format(
            kubectl_cmd))
        return False

    # There is a possible race condition here between when the `kubectl delete pod` returns,
    # when check_broker_pod_state validates that the pod is gone, and when the check_akri_state
    # validates that the broker pod has been restarted

    broker_pod_name = sorted(brokers_info.keys())[0]
    delete_pod_command = 'sudo {} delete pod {}'.format(
        kubectl_cmd, broker_pod_name)
    print("Deleting broker pod: {}".format(delete_pod_command))
    os.system(delete_pod_command)

    # Create kube client
    v1 = client.CoreV1Api()

    # Wait for there to be 2 brokers pods again
    if not shared_test_code.check_broker_pods_state(v1, 2):
        print(
            "Akri not running in expected state after broker pod restoration should have happened"
        )
        os.system('sudo {} get pods,services,akric,akrii --show-labels'.format(
            kubectl_cmd))
        return False

    restored_brokers_info = shared_test_code.get_running_pod_names_and_uids(
        broker_pod_selector)
    if len(restored_brokers_info) != 2:
        print("Expected to find 2 broker pods but found: {}",
              len(restored_brokers_info))
        os.system('sudo {} get pods,services,akric,akrii --show-labels'.format(
            kubectl_cmd))
        return False

    # Make sure that the deleted broker uid is different from the restored broker pod uid ... signifying
    # that the Pod was restarted
    print("Restored broker pod uid should differ from original broker pod uid")
    if brokers_info[broker_pod_name] == restored_brokers_info[broker_pod_name]:
        print(
            "Restored broker pod uid [{}] should differ from original broker pod uid [{}]"
            .format(brokers_info[broker_pod_name],
                    restored_brokers_info[broker_pod_name]))
        os.system('sudo {} get pods,services,akric,akrii --show-labels'.format(
            kubectl_cmd))
        return False

    # Do cleanup scenario
    print("Deleting Akri configuration: {}".format(
        shared_test_code.DEBUG_ECHO_NAME))
    os.system("sudo {} delete akric {}".format(
        kubectl_cmd, shared_test_code.DEBUG_ECHO_NAME))

    print("Checking Akri state after deleting configuration")
    if not shared_test_code.check_akri_state(1, 1, 0, 0, 0, 0):
        print(
            "Akri not running in expected state after deleting configuration")
        os.system('sudo {} get pods,services,akric,akrii --show-labels'.format(
            kubectl_cmd))
        return False

    return True