Ejemplo n.º 1
0
    async def _load_state(self, reload_id: int) -> ClusterState:
        commands: Optional[CommandsRegistry] = None

        init_addrs = self._get_init_addrs(reload_id)
        state = await self._fetch_state(init_addrs)

        # initialize connections pool for every master node in new state
        for node in state._data.masters:
            await self._pooler.ensure_pool(node.addr)

        # choose random master node and load command specs from node
        pool = await self._pooler.ensure_pool(state.random_master().addr)
        # fetch commands only for first cluster state load
        if reload_id == 1:
            async with async_timeout.timeout(self._execute_timeout):
                raw_commands = await pool.execute(b"COMMAND", encoding="utf-8")
            commands = create_registry(raw_commands)
            logger.debug("Found %d supported commands in cluster", commands.size())

        # assign initial cluster state and commands
        self._state = state
        if commands is not None:
            self._commands = commands

        if logger.isEnabledFor(logging.INFO):
            logger.info(
                "Loaded state: %s (reload_id=%d)",
                state.repr_stats(),
                reload_id,
            )

        return self._state
Ejemplo n.º 2
0
    def _make_execute_props(
        self,
        state: ClusterState,
        ctx: ExecuteContext,
        fail_props: ExecuteFailProps = None,
    ) -> ExecuteProps:
        exec_props = ExecuteProps()

        node_addr: Address

        if fail_props:
            # reraise exception for simplify classification
            # instead of many isinstance conditions
            try:
                raise fail_props.error
            except self._connection_errors:
                if ctx.attempt <= 2 and ctx.slot is not None:
                    replica = state.random_slot_replica(ctx.slot)
                    if replica is not None:
                        node_addr = replica.addr
                    else:
                        node_addr = state.random_node().addr
                else:
                    node_addr = state.random_node().addr
            except MovedError as e:
                node_addr = Address(e.info.host, e.info.port)
            except AskError as e:
                node_addr = Address(e.info.host, e.info.port)
                exec_props.asking = e.info.ask
            except (ClusterDownError, TryAgainError, LoadingError, ProtocolError):
                node_addr = state.random_node().addr
            except Exception as e:
                # usualy never be done here
                logger.exception("Uncaught exception on execute: %r", e)
                raise
            logger.info("New node to execute: %s", node_addr)
        else:
            if ctx.slot is not None:
                try:
                    node = state.slot_master(ctx.slot)
                except UncoveredSlotError:
                    logger.warning("No node found by slot %d", ctx.slot)

                    # probably cluster is corrupted and
                    # we need try to recover cluster state
                    exec_props.reload_state_required = True
                    node = state.random_master()
                node_addr = node.addr
            else:
                node_addr = state.random_master().addr
            logger.debug("Defined node to command: %s", node_addr)

        exec_props.node_addr = node_addr

        return exec_props
Ejemplo n.º 3
0
    async def _try_execute(
        self, ctx: ExecuteContext, props: ExecuteProps, fail_props: Optional[ExecuteFailProps]
    ) -> Any:
        node_addr = props.node_addr

        attempt_log_prefix = ""
        if ctx.attempt > 1:
            attempt_log_prefix = f"[{ctx.attempt}/{ctx.max_attempts}] "

        if logger.isEnabledFor(logging.DEBUG):
            logger.debug("%sExecute %r on %s", attempt_log_prefix, ctx.cmd_for_repr(), node_addr)

        pool = await self._pooler.ensure_pool(node_addr)

        pool_size = pool.size
        if pool_size >= pool.maxsize and pool.freesize == 0:
            logger.warning(
                "ConnectionPool to %s size limit reached (minsize:%s, maxsize:%s, current:%s])",
                node_addr,
                pool.minsize,
                pool.maxsize,
                pool_size,
            )

        if props.asking:
            logger.info("Send ASKING to %s for command %r", node_addr, ctx.cmd_name)

            result = await self._conn_execute(
                pool,
                ctx.cmd,
                ctx.kwargs,
                timeout=self._attempt_timeout,
                asking=True,
            )
        else:
            if ctx.cmd_info.is_blocking():
                result = await self._conn_execute(
                    pool,
                    ctx.cmd,
                    ctx.kwargs,
                    timeout=self._attempt_timeout,
                )
            else:
                result = await self._pool_execute(
                    pool,
                    ctx.cmd,
                    ctx.kwargs,
                    timeout=self._attempt_timeout,
                )

        return result
Ejemplo n.º 4
0
    async def ensure_pool(self, addr: Address) -> AbcPool:
        if addr in self._nodes and self._nodes[addr].pool.closed:
            self._erase_addr(addr)

        if addr not in self._nodes:
            async with self._creation_lock(addr):
                if addr not in self._nodes:
                    logger.debug("Create connections pool for %s", addr)
                    pool = await self._create_pool((addr.host, addr.port))
                    self._nodes[addr] = PoolHolder(pool, self._reap_calls)
                    self._pubsub_addrs[addr] = set()

        holder = self._nodes[addr]
        holder.generation = self._reap_calls
        return holder.pool
Ejemplo n.º 5
0
    async def execute(self, *args, **kwargs) -> Any:
        """Execute redis command."""

        ctx = self._make_exec_context(args, kwargs)

        keys = self._extract_command_keys(ctx.cmd_info, ctx.cmd)
        if keys:
            ctx.slot = self.determine_slot(*keys)
            if logger.isEnabledFor(logging.DEBUG):
                logger.debug("Determined slot for %r is %d", ctx.cmd_for_repr(), ctx.slot)

        exec_fail_props: Optional[ExecuteFailProps] = None

        while ctx.attempt < ctx.max_attempts:
            self._check_closed()

            ctx.attempt += 1

            state = await self._manager.get_state()
            exec_props = self._make_execute_props(state, ctx, exec_fail_props)

            if exec_props.reload_state_required:
                self._manager.require_reload_state()

            node_addr = exec_props.node_addr

            # reset previous execute fail properties
            prev_exec_fail_props = exec_fail_props
            exec_fail_props = None

            try:
                result = await self._try_execute(ctx, exec_props, prev_exec_fail_props)
            except asyncio.CancelledError:
                raise
            except Exception as e:
                exec_fail_props = ExecuteFailProps(
                    node_addr=node_addr,
                    error=e,
                )

            if exec_fail_props:
                await self._on_execute_fail(ctx, exec_fail_props)
                continue

            break

        return result
Ejemplo n.º 6
0
    async def _fetch_state(self, addrs: Sequence[Address]) -> ClusterState:
        if len(addrs) == 0:
            raise RuntimeError("no addrs to fetch cluster state")

        last_err: Optional[BaseException] = None

        if len(addrs) > 10:
            # choose first minimum ten addrs
            # addrs probable randomized
            addrs = addrs[: max(10, len(addrs) // 2)]

        logger.debug("Trying to obtain cluster state from addrs: %r", addrs)

        # get first successful cluster slots response
        for addr in addrs:
            logger.info("Obtain cluster state from %s", addr)
            try:
                pool = await self._pooler.ensure_pool(addr)
                async with async_timeout.timeout(self._execute_timeout):
                    # ensure one connection behaviour
                    async with pool.get() as conn:
                        raw_cluster_info: str = await conn.execute(
                            b"CLUSTER", b"INFO", encoding="utf-8"
                        )
                        cluster_info = parse_info(raw_cluster_info)
                        slots_resp = await conn.execute(b"CLUSTER", b"SLOTS", encoding="utf-8")

            except asyncio.TimeoutError as e:
                last_err = e
                logger.warning("Getting cluster state from %s is timed out", addr)
                continue
            except Exception as e:
                last_err = e
                logger.warning("Unable to get cluster state from %s: %r", addr, e)
                continue

            if cluster_info[CLUSTER_INFO_STATE_KEY] != NodeClusterState.OK.value:
                logger.warning(
                    'Node %s was return not "ok" cluster state "%s". Try next node',
                    addr,
                    cluster_info[CLUSTER_INFO_STATE_KEY],
                )
                continue

            logger.debug(
                "Cluster state successful loaded from %s: info:%r slots:%r",
                addr,
                cluster_info,
                slots_resp,
            )

            break
        else:
            if last_err is not None:
                logger.error("No available hosts to load cluster slots. Tried hosts: %r", addrs)
                raise last_err

        state = create_cluster_state(slots_resp, cluster_info, addr)

        if state.state is not NodeClusterState.OK:
            logger.warning(
                (
                    "Cluster probably broken. Tried %d nodes and "
                    'apply not "ok" (%s) cluster state from %s'
                ),
                len(addrs),
                state.state.value,
                addr,
            )

        return state