예제 #1
0
    def on_POSITION(self, conn: IReplicationConnection, cmd: PositionCommand) -> None:
        if cmd.instance_name == self._instance_name:
            # Ignore POSITION that are just our own echoes
            return

        logger.debug("Handling '%s %s'", cmd.NAME, cmd.to_line())

        self._add_command_to_stream_queue(conn, cmd)
예제 #2
0
    async def on_REPLICATE(self, conn: AbstractConnection, cmd: ReplicateCommand):
        # We only want to announce positions by the writer of the streams.
        # Currently this is just the master process.
        if not self._is_master:
            return

        for stream_name, stream in self._streams.items():
            current_token = stream.current_token()
            self.send_command(
                PositionCommand(stream_name, self._instance_name, current_token)
            )
예제 #3
0
    def send_positions_to_connection(self, conn: AbstractConnection):
        """Send current position of all streams this process is source of to
        the connection.
        """

        # We respond with current position of all streams this instance
        # replicates.
        for stream in self.get_streams_to_replicate():
            self.send_command(
                PositionCommand(
                    stream.NAME,
                    self._instance_name,
                    stream.current_token(self._instance_name),
                ))
예제 #4
0
    def send_positions_to_connection(self, conn: IReplicationConnection):
        """Send current position of all streams this process is source of to
        the connection.
        """

        # We respond with current position of all streams this instance
        # replicates.
        for stream in self.get_streams_to_replicate():
            # Note that we use the current token as the prev token here (rather
            # than stream.last_token), as we can't be sure that there have been
            # no rows written between last token and the current token (since we
            # might be racing with the replication sending bg process).
            current_token = stream.current_token(self._instance_name)
            self.send_command(
                PositionCommand(
                    stream.NAME,
                    self._instance_name,
                    current_token,
                    current_token,
                ))
예제 #5
0
    async def _run_notifier_loop(self):
        self.is_looping = True

        try:
            # Keep looping while there have been pokes about potential updates.
            # This protects against the race where a stream we already checked
            # gets an update while we're handling other streams.
            while self.pending_updates:
                self.pending_updates = False

                with Measure(self.clock, "repl.stream.get_updates"):
                    all_streams = self.streams

                    if self._replication_torture_level is not None:
                        # there is no guarantee about ordering between the streams,
                        # so let's shuffle them around a bit when we are in torture mode.
                        all_streams = list(all_streams)
                        random.shuffle(all_streams)

                    for stream in all_streams:
                        if stream.last_token == stream.current_token(
                                self._instance_name):
                            continue

                        if self._replication_torture_level:
                            await self.clock.sleep(
                                self._replication_torture_level / 1000.0)

                        last_token = stream.last_token

                        logger.debug(
                            "Getting stream: %s: %s -> %s",
                            stream.NAME,
                            stream.last_token,
                            stream.current_token(self._instance_name),
                        )
                        try:
                            updates, current_token, limited = await stream.get_updates(
                            )
                            self.pending_updates |= limited
                        except Exception:
                            logger.info("Failed to handle stream %s",
                                        stream.NAME)
                            raise

                        logger.debug(
                            "Sending %d updates",
                            len(updates),
                        )

                        if updates:
                            logger.info("Streaming: %s -> %s", stream.NAME,
                                        updates[-1][0])
                            stream_updates_counter.labels(stream.NAME).inc(
                                len(updates))

                        else:
                            # The token has advanced but there is no data to
                            # send, so we send a `POSITION` to inform other
                            # workers of the updated position.
                            if stream.NAME == EventsStream.NAME:
                                # XXX: We only do this for the EventStream as it
                                # turns out that e.g. account data streams share
                                # their "current token" with each other, meaning
                                # that it is *not* safe to send a POSITION.
                                logger.info(
                                    "Sending position: %s -> %s",
                                    stream.NAME,
                                    current_token,
                                )
                                self.command_handler.send_command(
                                    PositionCommand(
                                        stream.NAME,
                                        self._instance_name,
                                        last_token,
                                        current_token,
                                    ))
                            continue

                        # Some streams return multiple rows with the same stream IDs,
                        # we need to make sure they get sent out in batches. We do
                        # this by setting the current token to all but the last of
                        # a series of updates with the same token to have a None
                        # token. See RdataCommand for more details.
                        batched_updates = _batch_updates(updates)

                        for token, row in batched_updates:
                            try:
                                self.command_handler.stream_update(
                                    stream.NAME, token, row)
                            except Exception:
                                logger.exception("Failed to replicate")

            logger.debug("No more pending updates, breaking poke loop")
        finally:
            self.pending_updates = False
            self.is_looping = False
예제 #6
0
파일: handler.py 프로젝트: lfaraone/synapse
    async def on_POSITION(self, conn: AbstractConnection, cmd: PositionCommand):
        if cmd.instance_name == self._instance_name:
            # Ignore POSITION that are just our own echoes
            return

        logger.info("Handling '%s %s'", cmd.NAME, cmd.to_line())

        stream_name = cmd.stream_name
        stream = self._streams.get(stream_name)
        if not stream:
            logger.error("Got POSITION for unknown stream: %s", stream_name)
            return

        # We protect catching up with a linearizer in case the replication
        # connection reconnects under us.
        with await self._position_linearizer.queue(stream_name):
            # We're about to go and catch up with the stream, so remove from set
            # of connected streams.
            for streams in self._streams_by_connection.values():
                streams.discard(stream_name)

            # We clear the pending batches for the stream as the fetching of the
            # missing updates below will fetch all rows in the batch.
            self._pending_batches.pop(stream_name, [])

            # Find where we previously streamed up to.
            current_token = stream.current_token(cmd.instance_name)

            # If the position token matches our current token then we're up to
            # date and there's nothing to do. Otherwise, fetch all updates
            # between then and now.
            missing_updates = cmd.token != current_token
            while missing_updates:
                logger.info(
                    "Fetching replication rows for '%s' between %i and %i",
                    stream_name,
                    current_token,
                    cmd.token,
                )
                (
                    updates,
                    current_token,
                    missing_updates,
                ) = await stream.get_updates_since(
                    cmd.instance_name, current_token, cmd.token
                )

                # TODO: add some tests for this

                # Some streams return multiple rows with the same stream IDs,
                # which need to be processed in batches.

                for token, rows in _batch_updates(updates):
                    await self.on_rdata(
                        stream_name,
                        cmd.instance_name,
                        token,
                        [stream.parse_row(row) for row in rows],
                    )

            logger.info("Caught up with stream '%s' to %i", stream_name, cmd.token)

            # We've now caught up to position sent to us, notify handler.
            await self._replication_data_handler.on_position(
                cmd.stream_name, cmd.instance_name, cmd.token
            )

            self._streams_by_connection.setdefault(conn, set()).add(stream_name)
예제 #7
0
    async def subscribe_to_stream(self, stream_name, token):
        """Subscribe the remote to a stream.

        This invloves checking if they've missed anything and sending those
        updates down if they have. During that time new updates for the stream
        are queued and sent once we've sent down any missed updates.
        """
        self.replication_streams.discard(stream_name)
        self.connecting_streams.add(stream_name)

        try:
            # Get missing updates
            updates, current_token = await self.streamer.get_stream_updates(
                stream_name, token
            )

            # Send all the missing updates
            for update in updates:
                token, row = update[0], update[1]
                self.send_command(RdataCommand(stream_name, token, row))

            # We send a POSITION command to ensure that they have an up to
            # date token (especially useful if we didn't send any updates
            # above)
            self.send_command(PositionCommand(stream_name, current_token))

            # Now we can send any updates that came in while we were subscribing
            pending_rdata = self.pending_rdata.pop(stream_name, [])
            updates = []
            for token, update in pending_rdata:
                # If the token is null, it is part of a batch update. Batches
                # are multiple updates that share a single token. To denote
                # this, the token is set to None for all tokens in the batch
                # except for the last. If we find a None token, we keep looking
                # through tokens until we find one that is not None and then
                # process all previous updates in the batch as if they had the
                # final token.
                if token is None:
                    # Store this update as part of a batch
                    updates.append(update)
                    continue

                if token <= current_token:
                    # This update or batch of updates is older than
                    # current_token, dismiss it
                    updates = []
                    continue

                updates.append(update)

                # Send all updates that are part of this batch with the
                # found token
                for update in updates:
                    self.send_command(RdataCommand(stream_name, token, update))

                # Clear stored updates
                updates = []

            # They're now fully subscribed
            self.replication_streams.add(stream_name)
        except Exception as e:
            logger.exception("[%s] Failed to handle REPLICATE command", self.id())
            self.send_error("failed to handle replicate: %r", e)
        finally:
            self.connecting_streams.discard(stream_name)