Ejemplo n.º 1
0
def test_svc_lb_create(kubectl, monitor):
    svc = "test_svc_lb"
    svc_file = PWD + "/testdata/" + svc + ".yml"

    svc_name = "{}-{:08d}".format(svc.replace("_", "-"),
                                  random.randint(1, 99999999))
    kube_obj["name"] = svc_name
    kube_obj["kind"] = KubeApiObjKind.SERVICE
    kube_obj["validator"] = wait_for_svc_lb_validator

    waiter = KubeObjWaiter()
    monitor.wait_for_kube_object(kube_obj=kube_obj,
                                 timeout=DEFAULT_SVC_CREATION_TIMEOUT,
                                 waiter=waiter)

    with open(svc_file, "r") as f:
        data = f.read()
    yaml_obj = [obj for obj in yaml.load_all(data)]
    assert len(yaml_obj) == 1, "Loaded more than 1 yaml obj {}".format(
        yaml_obj)
    swagger_obj = yaml_to_swagger(yaml_obj[0])
    swagger_obj.metadata.name = svc_name

    create_svc_with_retry(kubectl, swagger_obj)
    waiter.wait()

    delete_svc_with_retry(kubectl, swagger_obj)

    if waiter.result != KubeObjStatusCode.OK:
        logger.info("Service created with status %s, events:\n%s",
                    waiter.result, pformat(waiter.details))
    test_result = waiter.result == KubeObjStatusCode.OK
    assert test_result
Ejemplo n.º 2
0
def test_pod_create_insufficient_resource(kubectl, monitor):
    pod = "test_pod_insufficient_resource"
    pod_file = PWD + "/testdata/" + pod + ".yml"

    pod_name = "{}-{:08d}".format(pod.replace("_", "-"),
                                  random.randint(1, 99999999))
    kube_obj["name"] = pod_name
    waiter = KubeObjWaiter()
    monitor.wait_for_kube_object(kube_obj=kube_obj,
                                 timeout=DEFAULT_POD_CREATION_TIMEOUT,
                                 waiter=waiter)

    with open(pod_file, "r") as f:
        data = f.read()
    yaml_obj = [obj for obj in yaml.load_all(data)]
    assert len(yaml_obj) == 1, "Loaded more than 1 yaml obj {}".format(
        yaml_obj)
    swagger_obj = yaml_to_swagger(yaml_obj[0])
    swagger_obj.metadata.name = pod_name

    create_pod_with_retry(kubectl, swagger_obj)

    waiter.wait()

    delete_pod_with_retry(kubectl, swagger_obj)

    if waiter.result != KubeObjStatusCode.ERR_INSUFFICIENT_RESOURCE:
        logger.info("Pod created with status %s, events:\n%s", waiter.result,
                    pformat(waiter.details))
    test_result = waiter.result == KubeObjStatusCode.ERR_INSUFFICIENT_RESOURCE
    assert test_result
Ejemplo n.º 3
0
def test_pod_create_timeout(monitor):
    # just don't create a pod
    pod = "test_pod_timeout"

    pod_name = "{}-{:08d}".format(pod.replace("_", "-"),
                                  random.randint(1, 99999999))
    kube_obj["name"] = pod_name
    waiter = KubeObjWaiter()
    monitor.wait_for_kube_object(kube_obj=kube_obj, timeout=10, waiter=waiter)

    waiter.wait()

    if waiter.result != KubeObjStatusCode.ERR_PLAT_TASK_CREATE_TIMEOUT:
        logger.info("Pod created with status %s, events:\n%s", waiter.result,
                    pformat(waiter.details))
    test_result = waiter.result == KubeObjStatusCode.ERR_PLAT_TASK_CREATE_TIMEOUT
    assert test_result
Ejemplo n.º 4
0
    def stop_one(self, name, namespace=AXNameSpaces.AXSYS):
        time.sleep(
            random.randint(0, AXPlatformConfigDefaults.ObjectOperationJitter))
        logger.info("Deleting %s in namespace %s ...", name, namespace)
        start = time.time()
        kube_obj = self._kube_objects[name]
        kube_obj.namespace = namespace
        kube_obj.replacing = self._replacing
        assert isinstance(kube_obj, KubeObject)

        result = {
            "name": name,
            "code": [],
            "events": [],
            "failed": False,
            "duration": ""
        }

        # Don't delete if object does not exist
        if not kube_obj.exists():
            result["code"] += [
                "{:.25s}:{}".format(name, KubeObjStatusCode.DELETED)
            ]
            result["duration"] = str(round(time.time() - start, 2))
            return result

        monitor_info = kube_obj.get_delete_monitor_info()
        if monitor_info:
            # use monitor
            waiters = []

            # Create and register waiters for all objects that can be monitored
            for m in monitor_info:
                wait_info = {
                    "kind": KubeKindToKubeApiObjKind[m.kube_kind],
                    "name": m.name,
                    "validator": m.validator
                }
                waiter = KubeObjWaiter()
                waiters.append((waiter, wait_info))
                AXKubeMonitor().wait_for_kube_object(
                    wait_info, AXPlatformConfigDefaults.ObjDeleteWaitTimeout,
                    waiter)

            # Call kubectl delete
            kube_obj.delete()

            # Wait on all waiters to retrieve status and events
            for waiter, wait_info in waiters:
                waiter.wait()
                result["events"] += waiter.details
                if waiter.result == KubeObjStatusCode.OK or waiter.result == KubeObjStatusCode.WARN:
                    result["code"].append("{:.25s}:{}".format(
                        wait_info["name"], KubeObjStatusCode.DELETED))
                    logger.info("Successfully deleted %s in %s with code %s.",
                                wait_info["name"], name, result["code"])
                else:
                    result["failed"] = True
                    result["code"].append("{:.25s}:{}".format(
                        wait_info["name"], KubeObjStatusCode.UNKNOWN))
                    logger.error(
                        "Failed to delete %s in %s with code %s. Events: %s",
                        wait_info["name"], name, result["code"],
                        str(waiter.details))

            # Poll once to confirm all components from this Kubenetes config file exist
            # In case there are objects in this config file cannot be monitored, i.e. svc without elb
            if kube_obj.exists():
                logger.error("Object %s deleted but still exists", name)
                result["failed"] = True
                result["code"].append("{:.25s}:{}".format(
                    name, KubeObjStatusCode.UNKNOWN))
                result["events"].append(
                    "Object {} deleted but still exists.".format(name))
            result["duration"] = str(round(time.time() - start, 2))
            logger.info("Successfully deleted %s.", name)
            return result
        else:
            # use polling
            kube_obj.delete()
            return self._poll_till_not_exists(
                name=name,
                kube_obj=kube_obj,
                start_time=start,
                poll_interval=AXPlatformConfigDefaults.ObjDeletePollInterval,
                poll_max_retry=AXPlatformConfigDefaults.ObjDeletePollMaxRetry,
                rst=result)
Ejemplo n.º 5
0
    def start_one(self, name, namespace=AXNameSpaces.AXSYS):
        time.sleep(
            random.randint(0, AXPlatformConfigDefaults.ObjectOperationJitter))
        logger.info("Creating %s in namespace %s ...", name, namespace)
        start = time.time()
        kube_obj = self._kube_objects[name]

        # Update them as there are new updates in replacing in platform start
        kube_obj.namespace = namespace
        kube_obj.replacing = self._replacing

        assert isinstance(kube_obj, KubeObject)
        result = {
            "name": name,
            "code": [],
            "events": [],
            "failed": False,
            "duration": ""
        }
        if kube_obj.healthy():
            result["code"] += [
                "{:.25s}:{}".format(name, KubeObjStatusCode.OBJ_EXISTS)
            ]
            result["duration"] = str(round(time.time() - start, 2))
            return result

        # Previous platform start might fail, and might result in some componenets created
        # but not healthy (i.e. in CrashLoopBackoff). In this case, we delete the existing
        # object and try to create a new one
        if kube_obj.exists():
            logger.warning(
                "Object %s exists but not healthy. Deleting object for idempotency ...",
                name)
            self.stop_one(name, namespace)

        assert not kube_obj.exists(
        ), "Kubeobject {} already created but is not healthy. Not Expected".format(
            name)

        monitor_info = kube_obj.get_create_monitor_info()
        if monitor_info:
            # use monitor
            waiters = []

            # Create and register waiters for all objects that can be monitored
            for m in monitor_info:
                wait_info = {
                    "kind": KubeKindToKubeApiObjKind[m.kube_kind],
                    "name": m.name,
                    "validator": m.validator
                }
                waiter = KubeObjWaiter()
                waiters.append((waiter, wait_info))
                AXKubeMonitor().wait_for_kube_object(
                    wait_info, AXPlatformConfigDefaults.ObjCreateWaitTimeout,
                    waiter)

            # Call kubectl create
            kube_obj.create()

            # Wait on all waiters to retrieve status and events
            for waiter, wait_info in waiters:
                waiter.wait()
                result["events"] += waiter.details
                result["code"].append("{:.25s}:{}".format(
                    wait_info["name"], waiter.result))
                if waiter.result == KubeObjStatusCode.OK or waiter.result == KubeObjStatusCode.WARN:
                    logger.info("Successfully created %s with code %s.",
                                wait_info["name"], waiter.result)
                else:
                    result["failed"] = True
                    logger.error(
                        "Failed to create %s in %s with code %s. Events: %s",
                        wait_info["name"], namespace, waiter.result,
                        str(waiter.details))
                    if not self._debug:
                        logger.info("Deleting %s due to creation failure",
                                    name)
                        del_rst = self.stop_one(name, namespace)
                        result["code"] += del_rst["code"]
                        result["events"] += del_rst["events"]
                        result["duration"] = str(round(time.time() - start, 2))
                        return result

            # Poll extra if required (for Petset and Deployments with multiple replicas)
            if kube_obj.extra_poll:
                logger.info(
                    "Polling till healthy to make sure rest of components of %s are up and running ...",
                    name)
                create_rst = self._poll_till_healthy(
                    name=name,
                    kube_obj=kube_obj,
                    start_time=start,
                    poll_interval=AXPlatformConfigDefaults.
                    ObjCreateExtraPollInterval,
                    poll_max_retry=AXPlatformConfigDefaults.
                    ObjCreateExtraPollMaxRetry,
                    rst=result)
                if create_rst["failed"] and not self._debug:
                    logger.info("Deleting %s due to creation failure", name)
                    del_rst = self.stop_one(name, namespace)
                    create_rst["code"] += del_rst["code"]
                    create_rst["events"] += del_rst["events"]
                    create_rst["duration"] = str(round(time.time() - start, 2))
                return create_rst

            # Poll once to confirm all components from this Kubernetes config file exist,
            # In case there are objects in this config file cannot be monitored, i.e. svc
            # without elb. This is really not expected so we don't delete it
            if not kube_obj.healthy():
                logger.error(
                    "Object %s created but is not healthy. This is NOT EXPECTED, please check manually.",
                    name)
                result["code"].append("{:.25s}:{}".format(
                    name, KubeObjStatusCode.UNHEALTHY))
                result["failed"] = True
                result["events"].append(
                    "Object {} created byt is not healthy".format(name))
            result["duration"] = str(round(time.time() - start, 2))

            if not result["failed"]:
                logger.info("Successfully created object %s.", name)
            return result
        else:
            # use polling
            kube_obj.create()
            create_rst = self._poll_till_healthy(
                name=name,
                kube_obj=kube_obj,
                start_time=start,
                poll_interval=AXPlatformConfigDefaults.ObjCreatePollInterval,
                poll_max_retry=AXPlatformConfigDefaults.ObjCreatePollMaxRetry,
                rst=result)
            if create_rst["failed"] and not self._debug:
                logger.info("Deleting %s due to creation failure", name)
                del_rst = self.stop_one(name, namespace)
                create_rst["code"] += del_rst["code"]
                create_rst["events"] += del_rst["events"]
                create_rst["duration"] = str(round(time.time() - start, 2))
            return create_rst
Ejemplo n.º 6
0
    def stop(self, jobname=None):
        """
        NOTE: This function assumes that a pod is already running.
        This process kills the user command so that artifacts collection can occur
        Once this is done, the pod will be completed. This call will return when
        pod is completed. Note: pod is not deleted (just completed)
        """
        def get_container_status(s, container_name, name):
            if isinstance(s, dict):
                try:
                    c_status = s.get("containerStatuses", None)
                    for c in c_status or []:
                        n = c.get("name", None)
                        if n == container_name:
                            return c
                except Exception:
                    logger.exception(
                        "cannot get_container_status for [%s] [%s]", name,
                        container_name)

            return None

        def get_container_state(s, container_name, name):
            container_status = get_container_status(s,
                                                    container_name,
                                                    name=name)
            container_states = ["waiting", "running", "terminated"]
            if isinstance(container_status, dict):
                if "state" in container_status:
                    for state_string in container_states:
                        if state_string in container_status["state"]:
                            # wait if state in state_strings
                            logger.debug("state=%s for [%s] [%s]",
                                         state_string, name, container_name)
                            return state_string
                    logger.error("unknown state for [%s] [%s]: %s", name,
                                 container_name, s)
                    return None
                else:
                    # No state
                    logger.error("no state for [%s] [%s]: %s", name,
                                 container_name, s)
                    return None
            else:
                # no status
                logger.error("no status for [%s] [%s]: %s", name,
                             container_name, s)
                return None

        def get_pod_phase(s):
            if isinstance(s, dict):
                return s.get("phase", None)
            else:
                return None

        def validator_func(pod_status):
            # always return true for any event
            return True

        def send_kill_signal_to_main_container():
            ax_command_path = "/ax-execu-host/art"
            busybox_command_path = os.path.join(ax_command_path,
                                                "busybox-i686")
            bash_path = os.path.join(ax_command_path, "ax_bash_ax")
            touch_command = "{} {}".format(busybox_command_path, "touch")
            pgrep_command = "{} {}".format(busybox_command_path, "pgrep")
            xargs_command = "{} {}".format(busybox_command_path, "xargs")
            kill_command = os.path.join(ax_command_path, "ax_kill_ax")
            cat_command = os.path.join(ax_command_path, "ax_cat_ax")

            # execute command to initiate user command kill
            # This command may or may not execute properly if the container is already dying or dead
            # but it does not matter to us here since we will have a waiter. This command will ensure
            # that if a container is running, it will start the process of terminating
            # TODO: we may have pods that are started programmatically that do not have artifacts later
            # HACK HACK
            cmd = [
                bash_path, "-c",
                "{touch} {scratch_path}/.ax_delete ;  {kill} -9 `{cat} {scratch_path}/.ax_pid` "
                .format(touch=touch_command,
                        scratch_path=ARTIFACTS_CONTAINER_SCRATCH_PATH,
                        pgrep=pgrep_command,
                        xargs=xargs_command,
                        kill=kill_command,
                        cat=cat_command)
            ]
            logger.debug(
                "Try gracefully stop main container in [%s][%s]. cmd=%s",
                jobname, self.name, cmd)
            output = self.exec_commands(cmd)
            logger.debug("Kill output:\n%s", output)

        main_name = self.get_main_container_name()
        wait_name = SIDEKICK_WAIT_CONTAINER_NAME

        logger.debug("About to stop pod [%s][%s]", jobname, self.name)

        count = 0
        while True:
            count += 1
            if count > 180:
                logger.warning("Pod [%s][%s] too many lopps, abort. count=%s",
                               jobname, self.name, count)
                return False
            obj = {
                "kind": "pods",
                "name": jobname if jobname else self.name,
                "validator": validator_func
            }
            waiter = KubeObjWaiter()
            monitor = AXKubeMonitor()
            monitor.wait_for_kube_object(obj,
                                         timeout=DELETE_WAITER_WAIT_TIMEOUT,
                                         waiter=waiter)

            # read status here
            read_count = 0
            while True:
                read_count += 1
                if read_count > 180:
                    logger.warning(
                        "Pod [%s][%s] too many retry, abort. count=%s",
                        jobname, self.name, count)
                    return False
                try:
                    status = self.client.api.read_namespaced_pod_status(
                        self.namespace, self.name).status
                    assert isinstance(status, swagger_client.V1PodStatus)
                    status_dict = swagger_client.ApiClient(
                    ).sanitize_for_serialization(status)
                    break
                except Exception:
                    # xxx todo: what if self.name is not there?
                    logger.exception(
                        "exception in get status for Pod [%s][%s] retry=%s count=%s",
                        jobname, self.name, read_count, count)
                    time.sleep(10)
                    continue

            main_container_state = get_container_state(status_dict, main_name,
                                                       self.name)
            wait_container_state = get_container_state(status_dict, wait_name,
                                                       self.name)
            pod_phase = get_pod_phase(status_dict)
            logger.debug("Pod [%s][%s] phase=%s. main=%s, wait=%s count=%s",
                         jobname, self.name, pod_phase, main_container_state,
                         wait_container_state, count)

            if main_container_state == "waiting":
                logger.debug("Pod [%s][%s] main in %s count=%s", jobname,
                             self.name, main_container_state, count)
            elif main_container_state == "running":
                logger.debug("Pod [%s][%s] main in %s count=%s", jobname,
                             self.name, main_container_state, count)
                send_kill_signal_to_main_container()
            elif main_container_state is None:
                if pod_phase == "Pending":
                    logger.debug("Pod [%s][%s] in %s phase count=%s", jobname,
                                 self.name, pod_phase, count)
                else:
                    logger.warning(
                        "Pod [%s][%s] unknown main container state, abort. %s count=%s",
                        jobname, self.name, status_dict, count)
                    return False
            else:
                assert main_container_state == "terminated", "bad state {}".format(
                    main_container_state)
                if wait_container_state in ["waiting", "running"]:
                    logger.debug("Pod [%s][%s] wait in %s count=%s", jobname,
                                 self.name, wait_container_state, count)
                    pass
                elif wait_container_state == "terminated":
                    logger.debug(
                        "Pod [%s][%s] all containers are terminated. stop() done. count=%s",
                        jobname, self.name, count)
                    return True
                else:
                    logger.warning(
                        "Pod [%s][%s] unknown wait container state, abort. %s. count=%s",
                        jobname, self.name, status_dict, count)
                    return False

            logger.debug("Pod [%s][%s] wait for new event. count=%s", jobname,
                         self.name, count)
            waiter.wait()
            if waiter.result != KubeObjStatusCode.OK:
                logger.info("Pod [%s][%s] waiter return %s, events: %s",
                            jobname, self.name, waiter.result, waiter.details)
            else:
                logger.debug("Pod [%s][%s] waiter return ok count=%s", jobname,
                             self.name, count)
Ejemplo n.º 7
0
def test_volume_create_delete(kubectl, monitor, kubepoll):
    pvc = "test_pvc"
    pvc_label = "app=testpvc"
    pvc_file = PWD + "/testdata/" + pvc + ".yml"
    pvc_name = "{}-{:08d}".format(pvc.replace("_", "-"),
                                  random.randint(1, 99999999))
    kube_obj["name"] = pvc_name
    kube_obj["kind"] = KubeApiObjKind.PVC
    kube_obj["validator"] = wait_for_pvc_validator

    waiter = KubeObjWaiter()
    monitor.wait_for_kube_object(kube_obj=kube_obj,
                                 timeout=DEFAULT_PVC_CREATION_TIMEOUT,
                                 waiter=waiter)

    with open(pvc_file, "r") as f:
        data = f.read()
    yaml_obj = [obj for obj in yaml.load_all(data)]
    assert len(yaml_obj) == 1, "Loaded more than 1 yaml obj {}".format(
        yaml_obj)
    swagger_obj = yaml_to_swagger(yaml_obj[0])
    swagger_obj.metadata.name = pvc_name
    # Manually patch access mode as swagger client mistakenly interprets this as map
    swagger_obj.spec.access_modes = ["ReadWriteOnce"]

    create_pvc_with_retry(kubectl, swagger_obj)

    waiter.wait()

    try:
        if waiter.result != KubeObjStatusCode.OK:
            logger.info("PVC created with status %s, events:\n%s",
                        waiter.result, pformat(waiter.details))
        test_result = waiter.result == KubeObjStatusCode.OK
        assert test_result

        pvcs = kubepoll.poll_kubernetes_sync(KubeKind.PVC, TEST_NAMESPACE,
                                             pvc_label)
        pvc = None
        for p in pvcs.items:
            if p.metadata.name == pvc_name:
                pvc = p
                break

        assert pvc

        kube_obj["name"] = pvc.spec.volume_name
        kube_obj["kind"] = KubeApiObjKind.PV
        kube_obj["validator"] = pv_release_validator

        waiter = KubeObjWaiter()
        monitor.wait_for_kube_object(kube_obj=kube_obj,
                                     timeout=DEFAULT_PV_DELETE_TIMEOUT,
                                     waiter=waiter)

        delete_pvc_with_retry(kubectl, swagger_obj)
        waiter.wait()

        if waiter.result != KubeObjStatusCode.OK:
            logger.info("PVC created with status %s, events:\n%s",
                        waiter.result, pformat(waiter.details))
        test_result = waiter.result == KubeObjStatusCode.OK or waiter.result == KubeObjStatusCode.WARN
        assert test_result
    except Exception as e:
        delete_pvc_with_retry(kubectl, swagger_obj)
        raise e