Esempio n. 1
0
    async def _reap_pools(self) -> List[AbcPool]:
        current_gen = self._reap_calls
        self._reap_calls += 1

        collected = []
        try:
            for addr, h in tuple(self._nodes.items()):
                if h.generation < current_gen:
                    h.pool.close()

                    # cleanup collections
                    self._erase_addr(addr)

                    collected.append(h.pool)
        except Exception as e:
            logger.error("Unexpected error while collect outdate pools: %r", e)

        if collected:
            for pool in collected:
                try:
                    await pool.wait_closed()
                except (asyncio.CancelledError, SystemExit, KeyboardInterrupt):
                    raise
                except BaseException as e:
                    logger.error("Unexpected error while pool closing: %r", e)

            logger.info("%d idle connections pools reaped", len(collected))

        return collected
Esempio n. 2
0
    async def _load_state(self, reload_id: int) -> ClusterState:
        commands: Optional[CommandsRegistry] = None

        init_addrs = self._get_init_addrs(reload_id)
        state = await self._fetch_state(init_addrs)

        # initialize connections pool for every master node in new state
        for node in state._data.masters:
            await self._pooler.ensure_pool(node.addr)

        # choose random master node and load command specs from node
        pool = await self._pooler.ensure_pool(state.random_master().addr)
        # fetch commands only for first cluster state load
        if reload_id == 1:
            async with async_timeout.timeout(self._execute_timeout):
                raw_commands = await pool.execute(b"COMMAND", encoding="utf-8")
            commands = create_registry(raw_commands)
            logger.debug("Found %d supported commands in cluster", commands.size())

        # assign initial cluster state and commands
        self._state = state
        if commands is not None:
            self._commands = commands

        if logger.isEnabledFor(logging.INFO):
            logger.info(
                "Loaded state: %s (reload_id=%d)",
                state.repr_stats(),
                reload_id,
            )

        return self._state
Esempio n. 3
0
    async def _execute_retry_slowdown(self, attempt: int, max_attempts: int) -> None:
        # first two tries run immediately
        if attempt <= 1:
            return

        delay = retry_backoff(attempt - 1, self._retry_min_delay, self._retry_max_delay)
        logger.info("[%d/%d] Retry was slowed down by %.02fms", attempt, max_attempts, delay * 1000)
        await asyncio.sleep(delay)
Esempio n. 4
0
    async def _init(self) -> None:
        logger.info(
            "Initialize cluster with %d startup nodes: %r",
            len(self._startup_nodes),
            self._startup_nodes,
        )

        await self.reload_state()

        logger.info("Cluster successful initialized")
Esempio n. 5
0
    def _make_execute_props(
        self,
        state: ClusterState,
        ctx: ExecuteContext,
        fail_props: ExecuteFailProps = None,
    ) -> ExecuteProps:
        exec_props = ExecuteProps()

        node_addr: Address

        if fail_props:
            # reraise exception for simplify classification
            # instead of many isinstance conditions
            try:
                raise fail_props.error
            except self._connection_errors:
                if ctx.attempt <= 2 and ctx.slot is not None:
                    replica = state.random_slot_replica(ctx.slot)
                    if replica is not None:
                        node_addr = replica.addr
                    else:
                        node_addr = state.random_node().addr
                else:
                    node_addr = state.random_node().addr
            except MovedError as e:
                node_addr = Address(e.info.host, e.info.port)
            except AskError as e:
                node_addr = Address(e.info.host, e.info.port)
                exec_props.asking = e.info.ask
            except (ClusterDownError, TryAgainError, LoadingError, ProtocolError):
                node_addr = state.random_node().addr
            except Exception as e:
                # usualy never be done here
                logger.exception("Uncaught exception on execute: %r", e)
                raise
            logger.info("New node to execute: %s", node_addr)
        else:
            if ctx.slot is not None:
                try:
                    node = state.slot_master(ctx.slot)
                except UncoveredSlotError:
                    logger.warning("No node found by slot %d", ctx.slot)

                    # probably cluster is corrupted and
                    # we need try to recover cluster state
                    exec_props.reload_state_required = True
                    node = state.random_master()
                node_addr = node.addr
            else:
                node_addr = state.random_master().addr
            logger.debug("Defined node to command: %s", node_addr)

        exec_props.node_addr = node_addr

        return exec_props
Esempio n. 6
0
    async def _try_execute(
        self, ctx: ExecuteContext, props: ExecuteProps, fail_props: Optional[ExecuteFailProps]
    ) -> Any:
        node_addr = props.node_addr

        attempt_log_prefix = ""
        if ctx.attempt > 1:
            attempt_log_prefix = f"[{ctx.attempt}/{ctx.max_attempts}] "

        if logger.isEnabledFor(logging.DEBUG):
            logger.debug("%sExecute %r on %s", attempt_log_prefix, ctx.cmd_for_repr(), node_addr)

        pool = await self._pooler.ensure_pool(node_addr)

        pool_size = pool.size
        if pool_size >= pool.maxsize and pool.freesize == 0:
            logger.warning(
                "ConnectionPool to %s size limit reached (minsize:%s, maxsize:%s, current:%s])",
                node_addr,
                pool.minsize,
                pool.maxsize,
                pool_size,
            )

        if props.asking:
            logger.info("Send ASKING to %s for command %r", node_addr, ctx.cmd_name)

            result = await self._conn_execute(
                pool,
                ctx.cmd,
                ctx.kwargs,
                timeout=self._attempt_timeout,
                asking=True,
            )
        else:
            if ctx.cmd_info.is_blocking():
                result = await self._conn_execute(
                    pool,
                    ctx.cmd,
                    ctx.kwargs,
                    timeout=self._attempt_timeout,
                )
            else:
                result = await self._pool_execute(
                    pool,
                    ctx.cmd,
                    ctx.kwargs,
                    timeout=self._attempt_timeout,
                )

        return result
Esempio n. 7
0
    async def close_only(self, addrs: Sequence[Address]) -> None:
        collected = []
        for addr in addrs:
            if addr not in self._nodes:
                continue

            holder = self._nodes[addr]

            self._erase_addr(addr)

            holder.pool.close()
            collected.append(holder.pool)

        if collected:
            await asyncio.wait([p.wait_closed() for p in collected])
            logger.info("%d connections pools was closed", len(collected))
Esempio n. 8
0
    async def _state_reloader(self) -> None:
        while True:
            auto_reload = False

            try:
                await asyncio.wait_for(self._reload_event.wait(), self._reload_interval)
            except asyncio.TimeoutError:
                auto_reload = True

            self._reload_count += 1
            reload_id = self._reload_count

            if auto_reload:
                logger.info("Start cluster state auto reload (%d)", reload_id)
            else:
                logger.info("Start loading cluster state (%d)", reload_id)

            try:
                await self._load_state(reload_id)
            except asyncio.CancelledError:
                raise
            except network_errors + (RedisError,) as e:
                logger.warning("Unable to load cluster state: %r (%d)", e, reload_id)
            except Exception as e:
                logger.exception(
                    "Unexpected error while loading cluster state: %r (%d)", e, reload_id
                )
            else:
                logger.info("Cluster state successful loaded (%d)", reload_id)

            await asyncio.sleep(0.1)
            self._reload_event.clear()
Esempio n. 9
0
    async def close(self) -> None:
        if self._closed:
            return

        self._closed = True

        if self._reaper_task:
            self._reaper_task.cancel()
            await asyncio.wait([self._reaper_task])

        addrs = tuple(self._nodes.keys())
        pools = tuple(h.pool for h in self._nodes.values())
        self._nodes.clear()
        self._pubsub_channels.clear()
        self._pubsub_addrs.clear()

        if addrs:
            logger.info("Close connections pools for: %s", addrs)
            for pool in pools:
                pool.close()

            await asyncio.wait([pool.wait_closed() for pool in pools])
Esempio n. 10
0
    async def _fetch_state(self, addrs: Sequence[Address]) -> ClusterState:
        if len(addrs) == 0:
            raise RuntimeError("no addrs to fetch cluster state")

        last_err: Optional[BaseException] = None

        if len(addrs) > 10:
            # choose first minimum ten addrs
            # addrs probable randomized
            addrs = addrs[: max(10, len(addrs) // 2)]

        logger.debug("Trying to obtain cluster state from addrs: %r", addrs)

        # get first successful cluster slots response
        for addr in addrs:
            logger.info("Obtain cluster state from %s", addr)
            try:
                pool = await self._pooler.ensure_pool(addr)
                async with async_timeout.timeout(self._execute_timeout):
                    # ensure one connection behaviour
                    async with pool.get() as conn:
                        raw_cluster_info: str = await conn.execute(
                            b"CLUSTER", b"INFO", encoding="utf-8"
                        )
                        cluster_info = parse_info(raw_cluster_info)
                        slots_resp = await conn.execute(b"CLUSTER", b"SLOTS", encoding="utf-8")

            except asyncio.TimeoutError as e:
                last_err = e
                logger.warning("Getting cluster state from %s is timed out", addr)
                continue
            except Exception as e:
                last_err = e
                logger.warning("Unable to get cluster state from %s: %r", addr, e)
                continue

            if cluster_info[CLUSTER_INFO_STATE_KEY] != NodeClusterState.OK.value:
                logger.warning(
                    'Node %s was return not "ok" cluster state "%s". Try next node',
                    addr,
                    cluster_info[CLUSTER_INFO_STATE_KEY],
                )
                continue

            logger.debug(
                "Cluster state successful loaded from %s: info:%r slots:%r",
                addr,
                cluster_info,
                slots_resp,
            )

            break
        else:
            if last_err is not None:
                logger.error("No available hosts to load cluster slots. Tried hosts: %r", addrs)
                raise last_err

        state = create_cluster_state(slots_resp, cluster_info, addr)

        if state.state is not NodeClusterState.OK:
            logger.warning(
                (
                    "Cluster probably broken. Tried %d nodes and "
                    'apply not "ok" (%s) cluster state from %s'
                ),
                len(addrs),
                state.state.value,
                addr,
            )

        return state
Esempio n. 11
0
    async def _on_execute_fail(self, ctx: ExecuteContext, fail_props: ExecuteFailProps) -> None:
        # classify error for logging and
        # set mark to reload cluster state if needed
        try:
            raise fail_props.error
        except network_errors as e:
            logger.warning("Connection problem with %s: %r", fail_props.node_addr, e)
            self._manager.require_reload_state()
        except closed_errors as e:
            logger.warning("Connection is closed: %r", e)
            self._manager.require_reload_state()
        except ConnectTimeoutError as e:
            logger.warning("Connect to node is timed out: %s", e)
            self._manager.require_reload_state()
        except ClusterDownError as e:
            logger.warning("Cluster is down: %s", e)
            self._manager.require_reload_state()
        except TryAgainError as e:
            logger.warning("Try again error: %s", e)
            self._manager.require_reload_state()
        except MovedError as e:
            logger.info("MOVED reply: %s", e)
            self._manager.require_reload_state()
        except AskError as e:
            logger.info("ASK reply: %s", e)
        except LoadingError as e:
            logger.warning("Cluster node %s is loading: %s", fail_props.node_addr, e)
            self._manager.require_reload_state()
        except ProtocolError as e:
            logger.warning("Redis protocol error: %s", e)
            self._manager.require_reload_state()
        except ReplyError as e:
            # all other reply error we must propagate to caller
            logger.warning("Reply error: %s", e)
            raise
        except asyncio.TimeoutError:
            is_readonly = ctx.cmd_info.is_readonly()
            if is_readonly:
                logger.warning(
                    "Read-Only command %s to %s is timed out", ctx.cmd_name, fail_props.node_addr
                )
            else:
                logger.warning(
                    "Non-idempotent command %s to %s is timed out. " "Abort command",
                    ctx.cmd_name,
                    fail_props.node_addr,
                )

            # node probably down
            self._manager.require_reload_state()

            # abort non-idempotent commands
            if not is_readonly:
                raise

        except Exception as e:
            logger.exception("Unexpected error: %r", e)
            raise

        if ctx.attempt >= ctx.max_attempts:
            raise fail_props.error

        # slowdown retry calls
        await self._execute_retry_slowdown(ctx.attempt, ctx.max_attempts)