async def touch_command( *, lifetime: Optional[int], insights: references.Insights, identity: Identity, settings: configuration.OperatorSettings, ) -> None: await asyncio.wait({ insights.ready_namespaces.wait(), insights.ready_resources.wait(), }) selector = guess_selector(settings=settings) resource = insights.backbone.get(selector) if selector else None if resource is None: raise RuntimeError(f"Cannot find the peering resource {selector}.") await aiotasks.wait({ aiotasks.create_guarded_task( name="peering command", finishable=True, logger=logger, coro=touch( namespace=namespace, resource=resource, identity=identity, settings=settings, lifetime=lifetime), ) for namespace in insights.namespaces })
async def test_guard_escalates_on_failure(finishable): task = create_guarded_task(coro=fail("boo!"), name='this task', finishable=finishable) await asyncio.wait([task], timeout=0.01) # let it start & react with pytest.raises(Error): await task
async def test_guard_logs_on_cancellation(assert_logs, caplog): caplog.set_level(0) logger = logging.getLogger() task = create_guarded_task(coro=delay(1), name='this task', logger=logger) await asyncio.wait([task], timeout=0.01) # let it start task.cancel() await asyncio.wait([task], timeout=0.01) # let it react assert_logs(["This task is cancelled"])
async def test_guard_logs_on_failure(assert_logs, caplog): caplog.set_level(0) logger = logging.getLogger() task = create_guarded_task(coro=fail("boo!"), name='this task', logger=logger) await asyncio.wait([task], timeout=0.01) # let it start & react assert_logs(["This task has failed: boo!"])
async def test_guard_escalates_on_cancellation(cancellable): task = create_guarded_task(coro=delay(1), name='this task', cancellable=cancellable) await asyncio.wait([task], timeout=0.01) # let it start task.cancel() await asyncio.wait([task], timeout=0.01) # let it react with pytest.raises(asyncio.CancelledError): await task
async def test_guard_waits_for_the_flag(): flag = asyncio.Event() task = create_guarded_task(coro=sample(), name='this task', flag=flag) await asyncio.wait([task], timeout=0.01) # let it start assert not task.done() flag.set() await asyncio.wait([task], timeout=0.01) # let it react assert task.done()
async def spawn_missing_peerings( *, settings: configuration.OperatorSettings, identity: peering.Identity, resources: Collection[references.Resource], namespaces: Collection[references.Namespace], ensemble: Ensemble, ) -> None: for resource, namespace in itertools.product(resources, namespaces): dkey = EnsembleKey(resource=resource, namespace=namespace) if dkey not in ensemble.peering_tasks: what = f"{settings.peering.name}@{namespace}" is_preactivated = settings.peering.mandatory conflicts_found = await ensemble.operator_paused.make_toggle( is_preactivated, name=what) ensemble.conflicts_found[dkey] = conflicts_found ensemble.pinging_tasks[dkey] = aiotasks.create_guarded_task( name=f"peering keep-alive for {what}", logger=logger, cancellable=True, coro=peering.keepalive(namespace=namespace, resource=resource, settings=settings, identity=identity)) ensemble.peering_tasks[dkey] = aiotasks.create_guarded_task( name=f"peering observer for {what}", logger=logger, cancellable=True, coro=queueing.watcher(settings=settings, resource=resource, namespace=namespace, processor=functools.partial( peering.process_peering_event, conflicts_found=conflicts_found, namespace=namespace, resource=resource, settings=settings, identity=identity))) # Ensure that all guarded tasks got control for a moment to enter the guard. await asyncio.sleep(0)
async def test_guard_is_silent_when_cancellable(assert_logs, caplog): caplog.set_level(0) logger = logging.getLogger() task = create_guarded_task(coro=delay(1), name='this task', logger=logger, cancellable=True) await asyncio.wait([task], timeout=0.01) # let it start task.cancel() await asyncio.wait([task], timeout=0.01) # let it react assert_logs([], prohibited=["This task is cancelled"]) assert not caplog.messages
async def spawn_missing_watchers( *, processor: queueing.WatchStreamProcessor, settings: configuration.OperatorSettings, resources: Collection[references.Resource], indexable: Collection[references.Resource], namespaces: Collection[references.Namespace], ensemble: Ensemble, ) -> None: # Block the operator globally until specialised per-resource-kind blockers are created. # NB: Must be created before the point of parallelisation! operator_blocked = await ensemble.operator_indexed.make_toggle( name="orchestration blocker") # Spawn watchers and create the specialised per-resource-kind blockers. for resource, namespace in itertools.product(resources, namespaces): namespace = namespace if resource.namespaced else None dkey = EnsembleKey(resource=resource, namespace=namespace) if dkey not in ensemble.watcher_tasks: what = f"{resource}@{namespace}" resource_indexed: Optional[primitives.Toggle] = None if resource in indexable: resource_indexed = await ensemble.operator_indexed.make_toggle( name=what) ensemble.watcher_tasks[dkey] = aiotasks.create_guarded_task( name=f"watcher for {what}", logger=logger, cancellable=True, coro=queueing.watcher( operator_paused=ensemble.operator_paused, operator_indexed=ensemble.operator_indexed, resource_indexed=resource_indexed, settings=settings, resource=resource, namespace=namespace, processor=functools.partial(processor, resource=resource))) # Unblock globally, let the specialised per-resource-kind blockers hold the readiness. await ensemble.operator_indexed.drop_toggle(operator_blocked) # Ensure that all guarded tasks got control for a moment to enter the guard. await asyncio.sleep(0)
async def spawn_tasks( *, lifecycle: Optional[lifecycles.LifeCycleFn] = None, indexers: Optional[indexing.OperatorIndexers] = None, registry: Optional[registries.OperatorRegistry] = None, settings: Optional[configuration.OperatorSettings] = None, memories: Optional[containers.ResourceMemories] = None, insights: Optional[references.Insights] = None, identity: Optional[peering.Identity] = None, standalone: Optional[bool] = None, priority: Optional[int] = None, peering_name: Optional[str] = None, liveness_endpoint: Optional[str] = None, clusterwide: bool = False, namespaces: Collection[references.NamespacePattern] = (), namespace: Optional[references.NamespacePattern] = None, # deprecated stop_flag: Optional[primitives.Flag] = None, ready_flag: Optional[primitives.Flag] = None, vault: Optional[credentials.Vault] = None, memo: Optional[ephemera.AnyMemo] = None, _command: Optional[Coroutine[None, None, None]] = None, ) -> Collection[aiotasks.Task]: """ Spawn all the tasks needed to run the operator. The tasks are properly inter-connected with the synchronisation primitives. """ loop = asyncio.get_running_loop() if namespaces and namespace: raise TypeError("Either namespaces= or namespace= can be passed. Got both.") elif namespace: warnings.warn("namespace= is deprecated; use namespaces=[...]", DeprecationWarning) namespaces = [namespace] if clusterwide and namespaces: raise TypeError("The operator can be either cluster-wide or namespaced, not both.") if not clusterwide and not namespaces: warnings.warn("Absence of either namespaces or cluster-wide flag will become an error soon." " For now, switching to the cluster-wide mode for backward compatibility.", FutureWarning) clusterwide = True # All tasks of the operator are synced via these primitives and structures: lifecycle = lifecycle if lifecycle is not None else lifecycles.get_default_lifecycle() registry = registry if registry is not None else registries.get_default_registry() settings = settings if settings is not None else configuration.OperatorSettings() memories = memories if memories is not None else containers.ResourceMemories() indexers = indexers if indexers is not None else indexing.OperatorIndexers() insights = insights if insights is not None else references.Insights() identity = identity if identity is not None else peering.detect_own_id(manual=False) vault = vault if vault is not None else credentials.Vault() memo = memo if memo is not None else ephemera.Memo() event_queue: posting.K8sEventQueue = asyncio.Queue() signal_flag: aiotasks.Future = asyncio.Future() started_flag: asyncio.Event = asyncio.Event() operator_paused = primitives.ToggleSet(any) tasks: MutableSequence[aiotasks.Task] = [] # Map kwargs into the settings object. settings.peering.clusterwide = clusterwide if peering_name is not None: settings.peering.mandatory = True settings.peering.name = peering_name if standalone is not None: settings.peering.standalone = standalone if priority is not None: settings.peering.priority = priority # Prepopulate indexers with empty indices -- to be available startup handlers. indexers.ensure(registry._resource_indexing.get_all_handlers()) # Global credentials store for this operator, also for CRD-reading & peering mode detection. auth.vault_var.set(vault) # Special case: pass the settings container through the user-side handlers (no explicit args). # Toolkits have to keep the original operator context somehow, and the only way is contextvars. posting.settings_var.set(settings) # Few common background forever-running infrastructural tasks (irregular root tasks). tasks.append(aiotasks.create_task( name="stop-flag checker", coro=_stop_flag_checker( signal_flag=signal_flag, stop_flag=stop_flag))) tasks.append(aiotasks.create_task( name="ultimate termination", coro=_ultimate_termination( settings=settings, stop_flag=stop_flag))) tasks.append(aiotasks.create_task( name="startup/cleanup activities", coro=_startup_cleanup_activities( root_tasks=tasks, # used as a "live" view, populated later. ready_flag=ready_flag, started_flag=started_flag, registry=registry, settings=settings, indices=indexers.indices, vault=vault, memo=memo))) # to purge & finalize the caches in the end. # Kill all the daemons gracefully when the operator exits (so that they are not "hung"). tasks.append(aiotasks.create_guarded_task( name="daemon killer", flag=started_flag, logger=logger, coro=daemons.daemon_killer( settings=settings, memories=memories, operator_paused=operator_paused))) # Keeping the credentials fresh and valid via the authentication handlers on demand. tasks.append(aiotasks.create_guarded_task( name="credentials retriever", flag=started_flag, logger=logger, coro=activities.authenticator( registry=registry, settings=settings, indices=indexers.indices, vault=vault, memo=memo))) # K8s-event posting. Events are queued in-memory and posted in the background. # NB: currently, it is a global task, but can be made per-resource or per-object. tasks.append(aiotasks.create_guarded_task( name="poster of events", flag=started_flag, logger=logger, coro=posting.poster( backbone=insights.backbone, event_queue=event_queue))) # Liveness probing -- so that Kubernetes would know that the operator is alive. if liveness_endpoint: tasks.append(aiotasks.create_guarded_task( name="health reporter", flag=started_flag, logger=logger, coro=probing.health_reporter( registry=registry, settings=settings, endpoint=liveness_endpoint, indices=indexers.indices, memo=memo))) # Permanent observation of what resource kinds and namespaces are available in the cluster. # Spawn and cancel dimensional tasks as they come and go; dimensions = resources x namespaces. tasks.append(aiotasks.create_guarded_task( name="resource observer", flag=started_flag, logger=logger, coro=observation.resource_observer( insights=insights, registry=registry, settings=settings))) tasks.append(aiotasks.create_guarded_task( name="namespace observer", flag=started_flag, logger=logger, coro=observation.namespace_observer( clusterwide=clusterwide, namespaces=namespaces, insights=insights, settings=settings))) # Explicit command is a hack for the CLI to run coroutines in an operator-like environment. # If not specified, then use the normal resource processing. It is not exposed publicly (yet). if _command is not None: tasks.append(aiotasks.create_guarded_task( name="the command", flag=started_flag, logger=logger, finishable=True, coro=_command)) else: tasks.append(aiotasks.create_guarded_task( name="multidimensional multitasker", flag=started_flag, logger=logger, coro=orchestration.ochestrator( settings=settings, insights=insights, identity=identity, operator_paused=operator_paused, processor=functools.partial(processing.process_resource_event, lifecycle=lifecycle, registry=registry, settings=settings, indexers=indexers, memories=memories, memobase=memo, event_queue=event_queue)))) # Ensure that all guarded tasks got control for a moment to enter the guard. await asyncio.sleep(0) # On Ctrl+C or pod termination, cancel all tasks gracefully. if threading.current_thread() is threading.main_thread(): # Handle NotImplementedError when ran on Windows since asyncio only supports Unix signals try: loop.add_signal_handler(signal.SIGINT, signal_flag.set_result, signal.SIGINT) loop.add_signal_handler(signal.SIGTERM, signal_flag.set_result, signal.SIGTERM) except NotImplementedError: logger.warning("OS signals are ignored: can't add signal handler in Windows.") else: logger.warning("OS signals are ignored: running not in the main thread.") return tasks
async def spawn_tasks( *, lifecycle: Optional[lifecycles.LifeCycleFn] = None, registry: Optional[registries.OperatorRegistry] = None, settings: Optional[configuration.OperatorSettings] = None, memories: Optional[containers.ResourceMemories] = None, standalone: Optional[bool] = None, priority: Optional[int] = None, peering_name: Optional[str] = None, liveness_endpoint: Optional[str] = None, namespace: Optional[str] = None, stop_flag: Optional[primitives.Flag] = None, ready_flag: Optional[primitives.Flag] = None, vault: Optional[credentials.Vault] = None, ) -> Collection[aiotasks.Task]: """ Spawn all the tasks needed to run the operator. The tasks are properly inter-connected with the synchronisation primitives. """ loop = asyncio.get_running_loop() # The freezer and the registry are scoped to this whole task-set, to sync them all. lifecycle = lifecycle if lifecycle is not None else lifecycles.get_default_lifecycle( ) registry = registry if registry is not None else registries.get_default_registry( ) settings = settings if settings is not None else configuration.OperatorSettings( ) memories = memories if memories is not None else containers.ResourceMemories( ) vault = vault if vault is not None else global_vault vault = vault if vault is not None else credentials.Vault() event_queue: posting.K8sEventQueue = asyncio.Queue() freeze_name = f"{peering_name!r}@{namespace}" if namespace else f"cluster-wide {peering_name!r}" freeze_checker = primitives.ToggleSet() freeze_toggle = await freeze_checker.make_toggle(name=freeze_name) signal_flag: aiotasks.Future = asyncio.Future() started_flag: asyncio.Event = asyncio.Event() tasks: MutableSequence[aiotasks.Task] = [] # Map kwargs into the settings object. if peering_name is not None: settings.peering.mandatory = True settings.peering.name = peering_name if standalone is not None: settings.peering.standalone = standalone if priority is not None: settings.peering.priority = priority # Global credentials store for this operator, also for CRD-reading & peering mode detection. auth.vault_var.set(vault) # Special case: pass the settings container through the user-side handlers (no explicit args). # Toolkits have to keep the original operator context somehow, and the only way is contextvars. posting.settings_var.set(settings) # Few common background forever-running infrastructural tasks (irregular root tasks). tasks.append( aiotasks.create_task(name="stop-flag checker", coro=_stop_flag_checker(signal_flag=signal_flag, stop_flag=stop_flag))) tasks.append( aiotasks.create_task(name="ultimate termination", coro=_ultimate_termination(settings=settings, stop_flag=stop_flag))) tasks.append( aiotasks.create_task( name="startup/cleanup activities", coro=_startup_cleanup_activities( root_tasks=tasks, # used as a "live" view, populated later. ready_flag=ready_flag, started_flag=started_flag, registry=registry, settings=settings, vault=vault))) # to purge & finalize the caches in the end. # Kill all the daemons gracefully when the operator exits (so that they are not "hung"). tasks.append( aiotasks.create_guarded_task( name="daemon killer", flag=started_flag, logger=logger, coro=daemons.daemon_killer(settings=settings, memories=memories))) # Keeping the credentials fresh and valid via the authentication handlers on demand. tasks.append( aiotasks.create_guarded_task(name="credentials retriever", flag=started_flag, logger=logger, coro=activities.authenticator( registry=registry, settings=settings, vault=vault))) # K8s-event posting. Events are queued in-memory and posted in the background. # NB: currently, it is a global task, but can be made per-resource or per-object. tasks.append( aiotasks.create_guarded_task( name="poster of events", flag=started_flag, logger=logger, coro=posting.poster(event_queue=event_queue))) # Liveness probing -- so that Kubernetes would know that the operator is alive. if liveness_endpoint: tasks.append( aiotasks.create_guarded_task(name="health reporter", flag=started_flag, logger=logger, coro=probing.health_reporter( registry=registry, settings=settings, endpoint=liveness_endpoint))) # Monitor the peers, unless explicitly disabled. if await peering.detect_presence(namespace=namespace, settings=settings): identity = peering.detect_own_id(manual=False) tasks.append( aiotasks.create_guarded_task(name="peering keepalive", flag=started_flag, logger=logger, coro=peering.keepalive( namespace=namespace, settings=settings, identity=identity))) tasks.append( aiotasks.create_guarded_task( name="watcher of peering", flag=started_flag, logger=logger, coro=queueing.watcher( namespace=namespace, settings=settings, resource=peering.guess_resource(namespace=namespace), processor=functools.partial(peering.process_peering_event, namespace=namespace, settings=settings, identity=identity, freeze_toggle=freeze_toggle)))) # Resource event handling, only once for every known resource (de-duplicated). for resource in registry.resources: tasks.append( aiotasks.create_guarded_task( name=f"watcher of {resource.name}", flag=started_flag, logger=logger, coro=queueing.watcher(namespace=namespace, settings=settings, resource=resource, freeze_checker=freeze_checker, processor=functools.partial( processing.process_resource_event, lifecycle=lifecycle, registry=registry, settings=settings, memories=memories, resource=resource, event_queue=event_queue)))) # On Ctrl+C or pod termination, cancel all tasks gracefully. if threading.current_thread() is threading.main_thread(): # Handle NotImplementedError when ran on Windows since asyncio only supports Unix signals try: loop.add_signal_handler(signal.SIGINT, signal_flag.set_result, signal.SIGINT) loop.add_signal_handler(signal.SIGTERM, signal_flag.set_result, signal.SIGTERM) except NotImplementedError: logger.warning( "OS signals are ignored: can't add signal handler in Windows.") else: logger.warning( "OS signals are ignored: running not in the main thread.") return tasks