async def report_status( self, command: Command, volumes: bool = True, containers: bool = True, ): """Reports the status of the node to an actor. Parameters ---------- command The command that is requesting the status. volumes Whether to report the volumes connected to the node Docker engine. containers Whether to report the containers running. Only reports running containers whose ancestor matches the ``config['image']``. Notes ----- Outputs the ``node`` keyword, with format ``node={node_name, addr, daemon_addr, node_alive, docker_alive}``. If ``containers=True``, outputs the ``container`` keyword with format ``container={node_name, container_short_id}``. If ``volumes=True``, reports the ``volume`` keyword with format ``volume={node_name, volume, ping, docker_client}`` """ status = [self.name, self.addr, self.daemon_addr, False, False] config = command.actor.config if not self.client: command.warning(f"Node {self.addr} has no client.") return if not (await self.ping(timeout=config["ping_timeout"])): command.warning(text=f"Node {self.addr} is not pinging back.") command.info(node=status) if self.client: self.client.close() return status[3] = True # The NUC is responding. if not (await self.client_alive()): command.warning(text=f"Docker client on node {self.addr} is not connected.") command.info(node=status) if self.client: self.client.close() return status[4] = True command.info(node=status) if containers: image = config["image"].split(":")[0] if config["registry"]: image = config["registry"] + "/" + image container_list: List[Any] = await self._run( self.client.containers.list, all=True, filters={"ancestor": image, "status": "running"}, ) if len(container_list) == 0: command.warning(text=f"No containers running on {self.addr}.") command.debug(container=[self.name, "NA"]) elif len(container_list) > 1: command.warning( text=f"Multiple containers with image {image} " f"running on node {self.addr}." ) command.debug(container=[self.name, "NA"]) else: command.debug(container=[self.name, container_list[0].short_id]) if volumes: for vname in config["volumes"]: volume: Any = await self.get_volume(vname) if volume is False: command.warning(text=f"Volume {vname} not present in {self.name}.") command.debug(volume=[self.name, vname, False, "NA"]) continue command.debug( volume=[self.name, vname, True, volume.attrs["Options"]["device"]] )
async def reconnect( command: Command, nodes: Dict[str, Node], names: str, category: str, force: bool, ): """Recreates volumes and restarts the Docker containers.""" assert command.actor config = command.actor.config async def reconnect_node(node): """Reconnect sync. Will be run in an executor.""" actor = command.actor assert actor try: await node.connect() if not (await node.connected()): raise ConnectionError() except ConnectionError: command.warning(text=f"Node {node.name} is not pinging back or " "the Docker daemon is not running. Try " "rebooting the computer.") return # Stop container first, because we cannot remove volumes that are # attached to running containers. await node.stop_container( config["container_name"] + f"-{node.name}", config["image"], force=True, command=command, ) for vname in config["volumes"]: vconfig = config["volumes"][vname] await node.create_volume( vname, driver=vconfig["driver"], opts=vconfig["opts"], force=force, command=command, ) return await node.run_container( actor.get_container_name(node), config["image"], volumes=list(config["volumes"]), privileged=True, registry=config["registry"], ports=[config["nodes"][actor.observatory][node.name]["port"]], envs={ "ACTOR_NAME": node.name, "OBSERVATORY": actor.observatory }, force=True, command=command, ) c_nodes = select_nodes(nodes, category, names) # Drop the device before doing anything with the containers, or we'll # get weird hangups. for node in c_nodes: node_name = node.name device = command.actor.flicameras[node_name] if device.is_connected(): await device.stop() await asyncio.gather(*[reconnect_node(node) for node in c_nodes]) command.info(text="Waiting 5 seconds before reconnecting the devices ...") await asyncio.sleep(5) for node in c_nodes: container_name = config["container_name"] + f"-{node.name}" if not (await node.is_container_running(container_name)): continue device = command.actor.flicameras[node.name] await device.restart() if device.is_connected(): port = device.port await node.report_status(command) command.debug( text=f"{node.name}: reconnected to device on port {port}.") else: command.warning(text=f"{node.name}: failed to connect to device.") command.finish()