async def _reap_pools(self) -> List[AbcPool]: current_gen = self._reap_calls self._reap_calls += 1 collected = [] try: for addr, h in tuple(self._nodes.items()): if h.generation < current_gen: h.pool.close() # cleanup collections self._erase_addr(addr) collected.append(h.pool) except Exception as e: logger.error("Unexpected error while collect outdate pools: %r", e) if collected: for pool in collected: try: await pool.wait_closed() except (asyncio.CancelledError, SystemExit, KeyboardInterrupt): raise except BaseException as e: logger.error("Unexpected error while pool closing: %r", e) logger.info("%d idle connections pools reaped", len(collected)) return collected
async def _load_state(self, reload_id: int) -> ClusterState: commands: Optional[CommandsRegistry] = None init_addrs = self._get_init_addrs(reload_id) state = await self._fetch_state(init_addrs) # initialize connections pool for every master node in new state for node in state._data.masters: await self._pooler.ensure_pool(node.addr) # choose random master node and load command specs from node pool = await self._pooler.ensure_pool(state.random_master().addr) # fetch commands only for first cluster state load if reload_id == 1: async with async_timeout.timeout(self._execute_timeout): raw_commands = await pool.execute(b"COMMAND", encoding="utf-8") commands = create_registry(raw_commands) logger.debug("Found %d supported commands in cluster", commands.size()) # assign initial cluster state and commands self._state = state if commands is not None: self._commands = commands if logger.isEnabledFor(logging.INFO): logger.info( "Loaded state: %s (reload_id=%d)", state.repr_stats(), reload_id, ) return self._state
async def _execute_retry_slowdown(self, attempt: int, max_attempts: int) -> None: # first two tries run immediately if attempt <= 1: return delay = retry_backoff(attempt - 1, self._retry_min_delay, self._retry_max_delay) logger.info("[%d/%d] Retry was slowed down by %.02fms", attempt, max_attempts, delay * 1000) await asyncio.sleep(delay)
async def _init(self) -> None: logger.info( "Initialize cluster with %d startup nodes: %r", len(self._startup_nodes), self._startup_nodes, ) await self.reload_state() logger.info("Cluster successful initialized")
def _make_execute_props( self, state: ClusterState, ctx: ExecuteContext, fail_props: ExecuteFailProps = None, ) -> ExecuteProps: exec_props = ExecuteProps() node_addr: Address if fail_props: # reraise exception for simplify classification # instead of many isinstance conditions try: raise fail_props.error except self._connection_errors: if ctx.attempt <= 2 and ctx.slot is not None: replica = state.random_slot_replica(ctx.slot) if replica is not None: node_addr = replica.addr else: node_addr = state.random_node().addr else: node_addr = state.random_node().addr except MovedError as e: node_addr = Address(e.info.host, e.info.port) except AskError as e: node_addr = Address(e.info.host, e.info.port) exec_props.asking = e.info.ask except (ClusterDownError, TryAgainError, LoadingError, ProtocolError): node_addr = state.random_node().addr except Exception as e: # usualy never be done here logger.exception("Uncaught exception on execute: %r", e) raise logger.info("New node to execute: %s", node_addr) else: if ctx.slot is not None: try: node = state.slot_master(ctx.slot) except UncoveredSlotError: logger.warning("No node found by slot %d", ctx.slot) # probably cluster is corrupted and # we need try to recover cluster state exec_props.reload_state_required = True node = state.random_master() node_addr = node.addr else: node_addr = state.random_master().addr logger.debug("Defined node to command: %s", node_addr) exec_props.node_addr = node_addr return exec_props
async def _try_execute( self, ctx: ExecuteContext, props: ExecuteProps, fail_props: Optional[ExecuteFailProps] ) -> Any: node_addr = props.node_addr attempt_log_prefix = "" if ctx.attempt > 1: attempt_log_prefix = f"[{ctx.attempt}/{ctx.max_attempts}] " if logger.isEnabledFor(logging.DEBUG): logger.debug("%sExecute %r on %s", attempt_log_prefix, ctx.cmd_for_repr(), node_addr) pool = await self._pooler.ensure_pool(node_addr) pool_size = pool.size if pool_size >= pool.maxsize and pool.freesize == 0: logger.warning( "ConnectionPool to %s size limit reached (minsize:%s, maxsize:%s, current:%s])", node_addr, pool.minsize, pool.maxsize, pool_size, ) if props.asking: logger.info("Send ASKING to %s for command %r", node_addr, ctx.cmd_name) result = await self._conn_execute( pool, ctx.cmd, ctx.kwargs, timeout=self._attempt_timeout, asking=True, ) else: if ctx.cmd_info.is_blocking(): result = await self._conn_execute( pool, ctx.cmd, ctx.kwargs, timeout=self._attempt_timeout, ) else: result = await self._pool_execute( pool, ctx.cmd, ctx.kwargs, timeout=self._attempt_timeout, ) return result
async def close_only(self, addrs: Sequence[Address]) -> None: collected = [] for addr in addrs: if addr not in self._nodes: continue holder = self._nodes[addr] self._erase_addr(addr) holder.pool.close() collected.append(holder.pool) if collected: await asyncio.wait([p.wait_closed() for p in collected]) logger.info("%d connections pools was closed", len(collected))
async def _state_reloader(self) -> None: while True: auto_reload = False try: await asyncio.wait_for(self._reload_event.wait(), self._reload_interval) except asyncio.TimeoutError: auto_reload = True self._reload_count += 1 reload_id = self._reload_count if auto_reload: logger.info("Start cluster state auto reload (%d)", reload_id) else: logger.info("Start loading cluster state (%d)", reload_id) try: await self._load_state(reload_id) except asyncio.CancelledError: raise except network_errors + (RedisError,) as e: logger.warning("Unable to load cluster state: %r (%d)", e, reload_id) except Exception as e: logger.exception( "Unexpected error while loading cluster state: %r (%d)", e, reload_id ) else: logger.info("Cluster state successful loaded (%d)", reload_id) await asyncio.sleep(0.1) self._reload_event.clear()
async def close(self) -> None: if self._closed: return self._closed = True if self._reaper_task: self._reaper_task.cancel() await asyncio.wait([self._reaper_task]) addrs = tuple(self._nodes.keys()) pools = tuple(h.pool for h in self._nodes.values()) self._nodes.clear() self._pubsub_channels.clear() self._pubsub_addrs.clear() if addrs: logger.info("Close connections pools for: %s", addrs) for pool in pools: pool.close() await asyncio.wait([pool.wait_closed() for pool in pools])
async def _fetch_state(self, addrs: Sequence[Address]) -> ClusterState: if len(addrs) == 0: raise RuntimeError("no addrs to fetch cluster state") last_err: Optional[BaseException] = None if len(addrs) > 10: # choose first minimum ten addrs # addrs probable randomized addrs = addrs[: max(10, len(addrs) // 2)] logger.debug("Trying to obtain cluster state from addrs: %r", addrs) # get first successful cluster slots response for addr in addrs: logger.info("Obtain cluster state from %s", addr) try: pool = await self._pooler.ensure_pool(addr) async with async_timeout.timeout(self._execute_timeout): # ensure one connection behaviour async with pool.get() as conn: raw_cluster_info: str = await conn.execute( b"CLUSTER", b"INFO", encoding="utf-8" ) cluster_info = parse_info(raw_cluster_info) slots_resp = await conn.execute(b"CLUSTER", b"SLOTS", encoding="utf-8") except asyncio.TimeoutError as e: last_err = e logger.warning("Getting cluster state from %s is timed out", addr) continue except Exception as e: last_err = e logger.warning("Unable to get cluster state from %s: %r", addr, e) continue if cluster_info[CLUSTER_INFO_STATE_KEY] != NodeClusterState.OK.value: logger.warning( 'Node %s was return not "ok" cluster state "%s". Try next node', addr, cluster_info[CLUSTER_INFO_STATE_KEY], ) continue logger.debug( "Cluster state successful loaded from %s: info:%r slots:%r", addr, cluster_info, slots_resp, ) break else: if last_err is not None: logger.error("No available hosts to load cluster slots. Tried hosts: %r", addrs) raise last_err state = create_cluster_state(slots_resp, cluster_info, addr) if state.state is not NodeClusterState.OK: logger.warning( ( "Cluster probably broken. Tried %d nodes and " 'apply not "ok" (%s) cluster state from %s' ), len(addrs), state.state.value, addr, ) return state
async def _on_execute_fail(self, ctx: ExecuteContext, fail_props: ExecuteFailProps) -> None: # classify error for logging and # set mark to reload cluster state if needed try: raise fail_props.error except network_errors as e: logger.warning("Connection problem with %s: %r", fail_props.node_addr, e) self._manager.require_reload_state() except closed_errors as e: logger.warning("Connection is closed: %r", e) self._manager.require_reload_state() except ConnectTimeoutError as e: logger.warning("Connect to node is timed out: %s", e) self._manager.require_reload_state() except ClusterDownError as e: logger.warning("Cluster is down: %s", e) self._manager.require_reload_state() except TryAgainError as e: logger.warning("Try again error: %s", e) self._manager.require_reload_state() except MovedError as e: logger.info("MOVED reply: %s", e) self._manager.require_reload_state() except AskError as e: logger.info("ASK reply: %s", e) except LoadingError as e: logger.warning("Cluster node %s is loading: %s", fail_props.node_addr, e) self._manager.require_reload_state() except ProtocolError as e: logger.warning("Redis protocol error: %s", e) self._manager.require_reload_state() except ReplyError as e: # all other reply error we must propagate to caller logger.warning("Reply error: %s", e) raise except asyncio.TimeoutError: is_readonly = ctx.cmd_info.is_readonly() if is_readonly: logger.warning( "Read-Only command %s to %s is timed out", ctx.cmd_name, fail_props.node_addr ) else: logger.warning( "Non-idempotent command %s to %s is timed out. " "Abort command", ctx.cmd_name, fail_props.node_addr, ) # node probably down self._manager.require_reload_state() # abort non-idempotent commands if not is_readonly: raise except Exception as e: logger.exception("Unexpected error: %r", e) raise if ctx.attempt >= ctx.max_attempts: raise fail_props.error # slowdown retry calls await self._execute_retry_slowdown(ctx.attempt, ctx.max_attempts)