Beispiel #1
0
async def _execute(
    lifecycle: Callable,
    registry: registries.BaseRegistry,
    cause: Cause,
    retry_on_errors: bool = True,
) -> None:
    """
    Call the next handler(s) from the chain of the handlers.

    Keep the record on the progression of the handlers in the object's status,
    and use it on the next invocation to determined which handler(s) to call.

    This routine is used both for the global handlers (via global registry),
    and for the sub-handlers (via a simple registry of the current handler).

    Raises `HandlerChildrenRetry` if there are children handlers to be executed
    on the next call, and implicitly provokes such a call by making the changes
    to the status fields (on the handler progression and number of retries).

    Exits normally if all handlers for this cause are fully done.
    """
    logger = cause.logger

    # Filter and select the handlers to be executed right now, on this event reaction cycle.
    handlers = registry.get_handlers(cause=cause)
    handlers_done = [
        h for h in handlers if status.is_finished(body=cause.body, handler=h)
    ]
    handlers_wait = [
        h for h in handlers if status.is_sleeping(body=cause.body, handler=h)
    ]
    handlers_todo = [
        h for h in handlers if status.is_awakened(body=cause.body, handler=h)
    ]
    handlers_plan = [
        h
        for h in await invocation.invoke(lifecycle, handlers_todo, cause=cause)
    ]
    handlers_left = [
        h for h in handlers_todo if h.id not in {h.id
                                                 for h in handlers_plan}
    ]

    # Set the timestamps -- even if not executed on this event, but just got registered.
    for handler in handlers:
        if not status.is_started(body=cause.body, handler=handler):
            status.set_start_time(body=cause.body,
                                  patch=cause.patch,
                                  handler=handler)

    # Execute all planned (selected) handlers in one event reaction cycle, even if there are few.
    for handler in handlers_plan:

        # Restore the handler's progress status. It can be useful in the handlers.
        retry = status.get_retry_count(body=cause.body, handler=handler)
        started = status.get_start_time(body=cause.body,
                                        handler=handler,
                                        patch=cause.patch)
        runtime = datetime.datetime.utcnow() - started

        # The exceptions are handled locally and are not re-raised, to keep the operator running.
        try:
            logger.debug(f"Invoking handler {handler.id!r}.")

            if handler.timeout is not None and runtime.total_seconds(
            ) > handler.timeout:
                raise HandlerTimeoutError(
                    f"Handler {handler.id!r} has timed out after {runtime}.")

            result = await _call_handler(
                handler,
                cause=cause,
                retry=retry,
                started=started,
                runtime=runtime,
                lifecycle=
                lifecycle,  # just a default for the sub-handlers, not used directly.
            )

        # Unfinished children cause the regular retry, but with less logging and event reporting.
        except HandlerChildrenRetry as e:
            logger.info(
                f"Handler {handler.id!r} has unfinished sub-handlers. Will retry soon."
            )
            status.set_retry_time(body=cause.body,
                                  patch=cause.patch,
                                  handler=handler,
                                  delay=e.delay)
            handlers_left.append(handler)

        # Definitely retriable error, no matter what is the error-reaction mode.
        except HandlerRetryError as e:
            logger.exception(
                f"Handler {handler.id!r} failed with an retry exception. Will retry."
            )
            events.exception(
                cause.body,
                message=f"Handler {handler.id!r} failed. Will retry.")
            status.set_retry_time(body=cause.body,
                                  patch=cause.patch,
                                  handler=handler,
                                  delay=e.delay)
            handlers_left.append(handler)

        # Definitely fatal error, no matter what is the error-reaction mode.
        except HandlerFatalError as e:
            logger.exception(
                f"Handler {handler.id!r} failed with an fatal exception. Will stop."
            )
            events.exception(
                cause.body,
                message=f"Handler {handler.id!r} failed. Will stop.")
            status.store_failure(body=cause.body,
                                 patch=cause.patch,
                                 handler=handler,
                                 exc=e)
            # TODO: report the handling failure somehow (beside logs/events). persistent status?

        # Regular errors behave as either retriable or fatal depending on the error-reaction mode.
        except Exception as e:
            if retry_on_errors:
                logger.exception(
                    f"Handler {handler.id!r} failed with an exception. Will retry."
                )
                events.exception(
                    cause.body,
                    message=f"Handler {handler.id!r} failed. Will retry.")
                status.set_retry_time(body=cause.body,
                                      patch=cause.patch,
                                      handler=handler,
                                      delay=DEFAULT_RETRY_DELAY)
                handlers_left.append(handler)
            else:
                logger.exception(
                    f"Handler {handler.id!r} failed with an exception. Will stop."
                )
                events.exception(
                    cause.body,
                    message=f"Handler {handler.id!r} failed. Will stop.")
                status.store_failure(body=cause.body,
                                     patch=cause.patch,
                                     handler=handler,
                                     exc=e)
                # TODO: report the handling failure somehow (beside logs/events). persistent status?

        # No errors means the handler should be excluded from future runs in this reaction cycle.
        else:
            logger.info(f"Handler {handler.id!r} succeeded.")
            events.info(cause.body,
                        reason='Success',
                        message=f"Handler {handler.id!r} succeeded.")
            status.store_success(body=cause.body,
                                 patch=cause.patch,
                                 handler=handler,
                                 result=result)

    # Provoke the retry of the handling cycle if there were any unfinished handlers,
    # either because they were not selected by the lifecycle, or failed and need a retry.
    if handlers_left:
        raise HandlerChildrenRetry(delay=None)

    # If there are delayed handlers, block this object's cycle; but do keep-alives every few mins.
    # Other (non-delayed) handlers will continue as normlally, due to raise few lines above.
    # Other objects will continue as normally in their own handling asyncio tasks.
    if handlers_wait:
        times = [
            status.get_awake_time(body=cause.body, handler=handler)
            for handler in handlers_wait
        ]
        until = min(times)  # the soonest awake datetime.
        delay = (until - datetime.datetime.utcnow()).total_seconds()
        delay = max(0, min(WAITING_KEEPALIVE_INTERVAL, delay))
        raise HandlerChildrenRetry(delay=delay)
Beispiel #2
0
async def custom_object_handler(
    lifecycle: Callable,
    registry: registries.BaseRegistry,
    resource: registries.Resource,
    event: dict,
    freeze: asyncio.Event,
) -> None:
    """
    Handle a single custom object low-level watch-event.

    Convert the low-level events, as provided by the watching/queueing tasks,
    to the high-level causes, and then call the cause-handling logic.

    All the internally provoked changes are intercepted, do not create causes,
    and therefore do not call the handling logic.
    """
    etyp = event['type']  # e.g. ADDED, MODIFIED, DELETED.
    body = event['object']

    # Each object has its own prefixed logger, to distinguish parallel handling.
    logger = ObjectLogger(
        logging.getLogger(__name__),
        extra=dict(
            namespace=body.get('metadata', {}).get('namespace', 'default'),
            name=body.get('metadata',
                          {}).get('name',
                                  body.get('metadata', {}).get('uid', None)),
        ))

    # Object patch accumulator. Populated by the methods. Applied in the end of the handler.
    patch = {}
    delay = None

    # If the global freeze is set for the processing (i.e. other operator overrides), do nothing.
    if freeze.is_set():
        logger.debug("Ignoring the events due to freeze.")

    # The object was really deleted from the cluster. But we do not care anymore.
    elif etyp == 'DELETED':
        logger.debug("Deleted, really deleted, and we are notified.")

    # The finalizer has been just removed. We are fully done.
    elif finalizers.is_deleted(body) and not finalizers.has_finalizers(body):
        logger.debug(
            "Deletion event, but we are done with it, but we do not care.")

    elif finalizers.is_deleted(body):
        logger.debug("Deletion event: %r", body)
        cause = Cause(resource=resource,
                      event=registries.DELETE,
                      body=body,
                      patch=patch,
                      logger=logger)
        try:
            await execute(lifecycle=lifecycle, registry=registry, cause=cause)
        except HandlerChildrenRetry as e:
            # on the top-level, no patches -- it is pre-patched.
            delay = e.delay
        else:
            logger.info(f"All handlers succeeded for deletion.")
            events.info(cause.body,
                        reason='Success',
                        message=f"All handlers succeeded for deletion.")
            logger.debug(
                "Removing the finalizer, thus allowing the actual deletion.")
            finalizers.remove_finalizers(body=body, patch=patch)

    # For a fresh new object, first block it from accidental deletions without our permission.
    # The actual handler will be called on the next call.
    elif not finalizers.has_finalizers(body):
        logger.debug("First appearance: %r", body)
        logger.debug(
            "Adding the finalizer, thus preventing the actual deletion.")
        finalizers.append_finalizers(body=body, patch=patch)

    # For the object seen for the first time (i.e. just-created), call the creation handlers,
    # then mark the state as if it was seen when the creation has finished.
    elif not lastseen.has_state(body):
        logger.debug("Creation event: %r", body)
        cause = Cause(resource=resource,
                      event=registries.CREATE,
                      body=body,
                      patch=patch,
                      logger=logger)
        try:
            await execute(lifecycle=lifecycle, registry=registry, cause=cause)
        except HandlerChildrenRetry as e:
            # on the top-level, no patches -- it is pre-patched.
            delay = e.delay
        else:
            logger.info(f"All handlers succeeded for creation.")
            events.info(cause.body,
                        reason='Success',
                        message=f"All handlers succeeded for creation.")
            status.purge_progress(body=body, patch=patch)
            lastseen.refresh_state(body=body, patch=patch)

    # The previous step triggers one more patch operation without actual change. Ignore it.
    # Either the last-seen state or the status field has changed.
    elif not lastseen.is_state_changed(body):
        pass

    # And what is left, is the update operation on one of the useful fields of the existing object.
    else:
        old, new, diff = lastseen.get_state_diffs(body)
        logger.debug("Update event: %r", diff)
        cause = Cause(resource=resource,
                      event=registries.UPDATE,
                      body=body,
                      patch=patch,
                      logger=logger,
                      old=old,
                      new=new,
                      diff=diff)
        try:
            await execute(lifecycle=lifecycle, registry=registry, cause=cause)
        except HandlerChildrenRetry as e:
            # on the top-level, no patches -- it is pre-patched.
            delay = e.delay
        else:
            logger.info(f"All handlers succeeded for update.")
            events.info(cause.body,
                        reason='Success',
                        message=f"All handlers succeeded for update.")
            status.purge_progress(body=body, patch=patch)
            lastseen.refresh_state(body=body, patch=patch)

    # Provoke a dummy change to trigger the reactor after sleep.
    # TODO: reimplement via the handler delayed statuses properly.
    if delay and not patch:
        patch.setdefault('kopf',
                         {})['dummy'] = datetime.datetime.utcnow().isoformat()

    # Whatever was done, apply the accumulated changes to the object.
    # But only once, to reduce the number of API calls and the generated irrelevant events.
    if patch:
        logger.debug("Patching with: %r", patch)
        patching.patch_obj(resource=resource, patch=patch, body=body)

    # Sleep strictly after patching, never before -- to keep the status proper.
    if delay:
        logger.info(f"Sleeping for {delay} seconds for the delayed handlers.")
        await asyncio.sleep(delay)
Beispiel #3
0
async def handle_cause(
    lifecycle: Callable,
    registry: registries.BaseRegistry,
    cause: causation.Cause,
):
    """
    Handle a detected cause, as part of the bigger handler routine.
    """
    logger = cause.logger
    patch = cause.patch  # TODO get rid of this alias
    body = cause.body  # TODO get rid of this alias
    delay = None
    done = None

    # Regular causes invoke the handlers.
    if cause.event in [causation.CREATE, causation.UPDATE, causation.DELETE]:
        title = causation.TITLES.get(cause.event, repr(cause.event))
        logger.debug(f"{title.capitalize()} event: %r", body)
        try:
            await execute(lifecycle=lifecycle, registry=registry, cause=cause)
        except HandlerChildrenRetry as e:
            # on the top-level, no patches -- it is pre-patched.
            delay = e.delay
            done = False
        else:
            logger.info(f"All handlers succeeded for {title}.")
            events.info(cause.body,
                        reason='Success',
                        message=f"All handlers succeeded for {title}.")
            done = True

    # Regular causes also do some implicit post-handling when all handlers are done.
    if done:
        status.purge_progress(body=body, patch=patch)
        lastseen.refresh_state(body=body, patch=patch)
        if cause.event == causation.DELETE:
            logger.debug(
                "Removing the finalizer, thus allowing the actual deletion.")
            finalizers.remove_finalizers(body=body, patch=patch)

    # Informational causes just print the log lines.
    if cause.event == causation.NEW:
        logger.debug("First appearance: %r", body)

    if cause.event == causation.GONE:
        logger.debug("Deleted, really deleted, and we are notified.")

    if cause.event == causation.FREE:
        logger.debug(
            "Deletion event, but we are done with it, and we do not care.")

    if cause.event == causation.NOOP:
        logger.debug(
            "Something has changed, but we are not interested (state is the same)."
        )

    # For the case of a newly created object, lock it to this operator.
    # TODO: make it conditional.
    if cause.event == causation.NEW:
        logger.debug(
            "Adding the finalizer, thus preventing the actual deletion.")
        finalizers.append_finalizers(body=body, patch=patch)

    # The delay is then consumed by the main handling routine (in different ways).
    return delay