Example #1
0
async def spawn_resource_daemons(
    *,
    settings: configuration.OperatorSettings,
    handlers: Sequence[handlers_.ResourceSpawningHandler],
    daemons: MutableMapping[handlers_.HandlerId, containers.Daemon],
    cause: causation.ResourceSpawningCause,
    memory: containers.ResourceMemory,
) -> Collection[float]:
    """
    Ensure that all daemons are spawned for this individual resource.

    This function can be called multiple times on multiple handling cycles
    (though usually should be called on the first-seen occasion), so it must
    be idempotent: not having duplicating side-effects on multiple calls.
    """
    if memory.live_fresh_body is None:  # for type-checking; "not None" is ensured in processing.
        raise RuntimeError(
            "A daemon is spawned with None as body. This is a bug. Please report."
        )
    for handler in handlers:
        if handler.id not in daemons:
            stopper = primitives.DaemonStopper()
            daemon_cause = causation.DaemonCause(
                resource=cause.resource,
                indices=cause.indices,
                logger=cause.logger,
                body=memory.live_fresh_body,
                memo=memory.memo,
                patch=patches.Patch(
                ),  # not the same as the one-shot spawning patch!
                stopper=stopper,  # for checking (passed to kwargs)
            )
            daemon = containers.Daemon(
                stopper=stopper,  # for stopping (outside of causes)
                handler=handler,
                logger=loggers.LocalObjectLogger(body=cause.body,
                                                 settings=settings),
                task=aiotasks.create_task(
                    _runner(
                        settings=settings,
                        daemons=daemons,  # for self-garbage-collection
                        handler=handler,
                        cause=daemon_cause,
                        memory=memory,
                    ),
                    name=f'runner of {handler.id}'
                ),  # sometimes, daemons; sometimes, timers.
            )
            daemons[handler.id] = daemon
    return []
Example #2
0
async def serve_admission_request(
    # Required for all webhook servers, meaningless without it:
    request: reviews.Request,
    *,
    # Optional for webhook servers that can recognise this information:
    headers: Optional[Mapping[str, str]] = None,
    sslpeer: Optional[Mapping[str, Any]] = None,
    webhook: Optional[ids.HandlerId] = None,
    reason: Optional[
        handlers.
        WebhookType] = None,  # TODO: undocumented: requires typing clarity!
    # Injected by partial() from spawn_tasks():
    settings: configuration.OperatorSettings,
    memories: containers.ResourceMemories,
    memobase: ephemera.AnyMemo,
    registry: registries.OperatorRegistry,
    insights: references.Insights,
    indices: ephemera.Indices,
) -> reviews.Response:
    """
    The actual and the only implementation of the `WebhookFn` protocol.

    This function is passed to all webhook servers/tunnels to be called
    whenever a new admission request is received.

    Some parameters are provided by the framework itself via partial binding,
    so that the resulting function matches the `WebhookFn` protocol. Other
    parameters are passed by the webhook servers when they call the function.
    """

    # Reconstruct the cause specially for web handlers.
    resource = find_resource(request=request, insights=insights)
    operation = request.get('request', {}).get('operation')
    userinfo = request.get('request', {}).get('userInfo')
    new_body = request.get('request', {}).get('object')
    old_body = request.get('request', {}).get('oldObject')
    raw_body = new_body if new_body is not None else old_body
    if userinfo is None:
        raise MissingDataError(
            "User info is missing from the admission request.")
    if raw_body is None:
        raise MissingDataError(
            "Either old or new object is missing from the admission request.")

    memory = await memories.recall(raw_body,
                                   memo=memobase,
                                   ephemeral=operation == 'CREATE')
    body = bodies.Body(raw_body)
    patch = patches.Patch()
    warnings: List[str] = []
    cause = causation.ResourceWebhookCause(
        resource=resource,
        indices=indices,
        logger=loggers.LocalObjectLogger(body=body, settings=settings),
        patch=patch,
        memo=memory.memo,
        body=body,
        userinfo=userinfo,
        warnings=warnings,
        operation=operation,
        dryrun=bool(request.get('request', {}).get('dryRun')),
        sslpeer=sslpeer if sslpeer is not None else
        {},  # ensure a mapping even if not provided.
        headers=headers if headers is not None else
        {},  # ensure a mapping even if not provided.
        webhook=webhook,
        reason=reason,
    )

    # Retrieve the handlers to be executed; maybe only one if the webhook server provides a hint.
    handlers_ = registry._resource_webhooks.get_handlers(cause)
    state = states.State.from_scratch().with_handlers(handlers_)
    outcomes = await handling.execute_handlers_once(
        lifecycle=lifecycles.all_at_once,
        settings=settings,
        handlers=handlers_,
        cause=cause,
        state=state,
        default_errors=handlers.ErrorsMode.PERMANENT,
    )

    # Construct the response as per Kubernetes's conventions and expectations.
    response = build_response(
        request=request,
        outcomes=outcomes,
        warnings=warnings,
        jsonpatch=patch.as_json_patch(),
    )
    return response
Example #3
0
async def execute_handler_once(
    settings: configuration.OperatorSettings,
    handler: handlers_.BaseHandler,
    cause: causation.BaseCause,
    state: states.HandlerState,
    lifecycle: Optional[lifecycles.LifeCycleFn] = None,
    default_errors: handlers_.ErrorsMode = handlers_.ErrorsMode.TEMPORARY,
) -> states.HandlerOutcome:
    """
    Execute one and only one handler for one and only one time.

    *Execution* means not just *calling* the handler in properly set context
    (see `_call_handler`), but also interpreting its result and errors, and
    wrapping them into am `HandlerOutcome` object -- to be stored in the state.

    The *execution* can be long -- depending on how the handler is implemented.
    For daemons, it is normal to run for hours and days if needed.
    This is different from the regular handlers, which are supposed
    to be finished as soon as possible.

    This method is not supposed to raise any exceptions from the handlers:
    exceptions mean the failure of execution itself.
    """
    errors_mode = handler.errors if handler.errors is not None else default_errors
    backoff = handler.backoff if handler.backoff is not None else DEFAULT_RETRY_DELAY

    # Prevent successes/failures from posting k8s-events for resource-watching causes.
    logger: Union[logging.Logger, logging.LoggerAdapter]
    if isinstance(cause, causation.ResourceWatchingCause):
        logger = loggers.LocalObjectLogger(body=cause.body, settings=settings)
    else:
        logger = cause.logger

    # Mutable accumulator for all the sub-handlers of any level deep; populated in `kopf.execute`.
    subrefs: Set[handlers_.HandlerId] = set()

    # The exceptions are handled locally and are not re-raised, to keep the operator running.
    try:
        logger.debug(f"{handler} is invoked.")

        if handler.timeout is not None and state.runtime.total_seconds(
        ) >= handler.timeout:
            raise HandlerTimeoutError(
                f"{handler} has timed out after {state.runtime}.")

        if handler.retries is not None and state.retries >= handler.retries:
            raise HandlerRetriesError(
                f"{handler} has exceeded {state.retries} retries.")

        result = await invoke_handler(
            handler,
            cause=cause,
            retry=state.retries,
            started=state.started,
            runtime=state.runtime,
            settings=settings,
            lifecycle=
            lifecycle,  # just a default for the sub-handlers, not used directly.
            subrefs=subrefs,
        )

    # The cancellations are an excepted way of stopping the handler. Especially for daemons.
    except asyncio.CancelledError:
        logger.warning(f"{handler} is cancelled. Will escalate.")
        raise

    # Unfinished children cause the regular retry, but with less logging and event reporting.
    except HandlerChildrenRetry as e:
        logger.debug(
            f"{handler} has unfinished sub-handlers. Will retry soon.")
        return states.HandlerOutcome(final=False,
                                     exception=e,
                                     delay=e.delay,
                                     subrefs=subrefs)

    # Definitely a temporary error, regardless of the error strictness.
    except TemporaryError as e:
        logger.error(f"{handler} failed temporarily: %s", str(e) or repr(e))
        return states.HandlerOutcome(final=False,
                                     exception=e,
                                     delay=e.delay,
                                     subrefs=subrefs)

    # Same as permanent errors below, but with better logging for our internal cases.
    except HandlerTimeoutError as e:
        logger.error(f"%s", str(e) or repr(e))  # already formatted
        return states.HandlerOutcome(final=True, exception=e, subrefs=subrefs)
        # TODO: report the handling failure somehow (beside logs/events). persistent status?

    # Definitely a permanent error, regardless of the error strictness.
    except PermanentError as e:
        logger.error(f"{handler} failed permanently: %s", str(e) or repr(e))
        return states.HandlerOutcome(final=True, exception=e, subrefs=subrefs)
        # TODO: report the handling failure somehow (beside logs/events). persistent status?

    # Regular errors behave as either temporary or permanent depending on the error strictness.
    except Exception as e:
        if errors_mode == handlers_.ErrorsMode.IGNORED:
            logger.exception(
                f"{handler} failed with an exception. Will ignore.")
            return states.HandlerOutcome(final=True, subrefs=subrefs)
        elif errors_mode == handlers_.ErrorsMode.TEMPORARY:
            logger.exception(
                f"{handler} failed with an exception. Will retry.")
            return states.HandlerOutcome(final=False,
                                         exception=e,
                                         delay=backoff,
                                         subrefs=subrefs)
        elif errors_mode == handlers_.ErrorsMode.PERMANENT:
            logger.exception(f"{handler} failed with an exception. Will stop.")
            return states.HandlerOutcome(final=True,
                                         exception=e,
                                         subrefs=subrefs)
            # TODO: report the handling failure somehow (beside logs/events). persistent status?
        else:
            raise RuntimeError(f"Unknown mode for errors: {errors_mode!r}")

    # No errors means the handler should be excluded from future runs in this reaction cycle.
    else:
        logger.info(f"{handler} succeeded.")
        return states.HandlerOutcome(final=True,
                                     result=result,
                                     subrefs=subrefs)
Example #4
0
async def process_resource_event(
        lifecycle: lifecycles.LifeCycleFn,
        indexers: indexing.OperatorIndexers,
        registry: registries.OperatorRegistry,
        settings: configuration.OperatorSettings,
        memories: containers.ResourceMemories,
        memobase: ephemera.AnyMemo,
        resource: references.Resource,
        raw_event: bodies.RawEvent,
        event_queue: posting.K8sEventQueue,
        stream_pressure: Optional[asyncio.Event] = None,  # None for tests
        resource_indexed: Optional[
            primitives.Toggle] = None,  # None for tests & observation
        operator_indexed: Optional[
            primitives.ToggleSet] = None,  # None for tests & observation
) -> None:
    """
    Handle a single custom object low-level watch-event.

    Convert the low-level events, as provided by the watching/queueing tasks,
    to the high-level causes, and then call the cause-handling logic.
    """

    # Recall what is stored about that object. Share it in little portions with the consumers.
    # And immediately forget it if the object is deleted from the cluster (but keep in memory).
    raw_type, raw_body = raw_event['type'], raw_event['object']
    memory = await memories.recall(raw_body,
                                   noticed_by_listing=raw_type is None,
                                   memo=memobase)
    if memory.live_fresh_body is not None:
        memory.live_fresh_body._replace_with(raw_body)
    if raw_type == 'DELETED':
        await memories.forget(raw_body)

    # Convert to a heavy mapping-view wrapper only now, when heavy processing begins.
    # Raw-event streaming, queueing, and batching use regular lightweight dicts.
    # Why here? 1. Before it splits into multiple causes & handlers for the same object's body;
    # 2. After it is batched (queueing); 3. While the "raw" parsed JSON is still known;
    # 4. Same as where a patch object of a similar wrapping semantics is created.
    body = memory.live_fresh_body if memory.live_fresh_body is not None else bodies.Body(
        raw_body)
    patch = patches.Patch()

    # Throttle the non-handler-related errors. The regular event watching/batching continues
    # to prevent queue overfilling, but the processing is skipped (events are ignored).
    # Choice of place: late enough to have a per-resource memory for a throttler; also, a logger.
    # But early enough to catch environment errors from K8s API, and from most of the complex code.
    async with effects.throttled(
        throttler=memory.error_throttler,
        logger=loggers.LocalObjectLogger(body=body, settings=settings),
        delays=settings.batching.error_delays,
        wakeup=stream_pressure,
    ) as should_run:
        if should_run:

            # Each object has its own prefixed logger, to distinguish parallel handling.
            logger = loggers.ObjectLogger(body=body, settings=settings)
            posting.event_queue_loop_var.set(asyncio.get_running_loop())
            posting.event_queue_var.set(
                event_queue)  # till the end of this object's task.

            # [Pre-]populate the indices. This must be lightweight.
            await indexing.index_resource(
                registry=registry,
                indexers=indexers,
                settings=settings,
                resource=resource,
                raw_event=raw_event,
                body=body,
                memory=memory,
                logger=loggers.TerseObjectLogger(body=body, settings=settings),
            )

            # Wait for all other individual resources and all other resource kinds' lists to finish.
            # If this one has changed while waiting for the global readiness, let it be reprocessed.
            if operator_indexed is not None and resource_indexed is not None:
                await operator_indexed.drop_toggle(resource_indexed)
            if operator_indexed is not None:
                await operator_indexed.wait_for(
                    True)  # other resource kinds & objects.
            if stream_pressure is not None and stream_pressure.is_set():
                return

            # Do the magic -- do the job.
            delays, matched = await process_resource_causes(
                lifecycle=lifecycle,
                indexers=indexers,
                registry=registry,
                settings=settings,
                resource=resource,
                raw_event=raw_event,
                body=body,
                patch=patch,
                memory=memory,
                logger=logger,
            )

            # Whatever was done, apply the accumulated changes to the object, or sleep-n-touch for delays.
            # But only once, to reduce the number of API calls and the generated irrelevant events.
            # And only if the object is at least supposed to exist (not "GONE"), even if actually does not.
            if raw_event['type'] != 'DELETED':
                applied = await effects.apply(
                    settings=settings,
                    resource=resource,
                    body=body,
                    patch=patch,
                    logger=logger,
                    delays=delays,
                    stream_pressure=stream_pressure,
                )
                if applied and matched:
                    logger.debug(
                        f"Handling cycle is finished, waiting for new changes since now."
                    )