Ejemplo n.º 1
0
    def _transaction_transmission_loop(self, destination):
        pending_pdus = []
        try:
            self.pending_transactions[destination] = 1

            # This will throw if we wouldn't retry. We do this here so we fail
            # quickly, but we will later check this again in the http client,
            # hence why we throw the result away.
            yield get_retry_limiter(destination, self.clock, self.store)

            pending_pdus = []
            while True:
                device_message_edus, device_stream_id, dev_list_id = (
                    yield self._get_new_device_messages(destination))

                # BEGIN CRITICAL SECTION
                #
                # In order to avoid a race condition, we need to make sure that
                # the following code (from popping the queues up to the point
                # where we decide if we actually have any pending messages) is
                # atomic - otherwise new PDUs or EDUs might arrive in the
                # meantime, but not get sent because we hold the
                # pending_transactions flag.

                pending_pdus = self.pending_pdus_by_dest.pop(destination, [])
                pending_edus = self.pending_edus_by_dest.pop(destination, [])
                pending_presence = self.pending_presence_by_dest.pop(
                    destination, {})
                pending_failures = self.pending_failures_by_dest.pop(
                    destination, [])

                pending_edus.extend(
                    self.pending_edus_keyed_by_dest.pop(destination,
                                                        {}).values())

                pending_edus.extend(device_message_edus)
                if pending_presence:
                    pending_edus.append(
                        Edu(
                            origin=self.server_name,
                            destination=destination,
                            edu_type="m.presence",
                            content={
                                "push": [
                                    format_user_presence_state(
                                        presence, self.clock.time_msec())
                                    for presence in pending_presence.values()
                                ]
                            },
                        ))

                if pending_pdus:
                    logger.debug(
                        "TX [%s] len(pending_pdus_by_dest[dest]) = %d",
                        destination, len(pending_pdus))

                if not pending_pdus and not pending_edus and not pending_failures:
                    logger.debug("TX [%s] Nothing to send", destination)
                    self.last_device_stream_id_by_dest[destination] = (
                        device_stream_id)
                    return

                # END CRITICAL SECTION

                success = yield self._send_new_transaction(
                    destination,
                    pending_pdus,
                    pending_edus,
                    pending_failures,
                )
                if success:
                    sent_transactions_counter.inc()
                    # Remove the acknowledged device messages from the database
                    # Only bother if we actually sent some device messages
                    if device_message_edus:
                        yield self.store.delete_device_msgs_for_remote(
                            destination, device_stream_id)
                        logger.info("Marking as sent %r %r", destination,
                                    dev_list_id)
                        yield self.store.mark_as_sent_devices_by_remote(
                            destination, dev_list_id)

                    self.last_device_stream_id_by_dest[
                        destination] = device_stream_id
                    self.last_device_list_stream_id_by_dest[
                        destination] = dev_list_id
                else:
                    break
        except NotRetryingDestination as e:
            logger.debug(
                "TX [%s] not ready for retry yet (next retry at %s) - "
                "dropping transaction for now",
                destination,
                datetime.datetime.fromtimestamp(
                    (e.retry_last_ts + e.retry_interval) / 1000.0),
            )
        except FederationDeniedError as e:
            logger.info(e)
        except Exception as e:
            logger.warn(
                "TX [%s] Failed to send transaction: %s",
                destination,
                e,
            )
            for p, _ in pending_pdus:
                logger.info("Failed to send event %s to %s", p.event_id,
                            destination)
        finally:
            # We want to be *very* sure we delete this after we stop processing
            self.pending_transactions.pop(destination, None)
Ejemplo n.º 2
0
    def _transaction_transmission_loop(self):
        pending_pdus = []
        try:
            self.transmission_loop_running = True

            # This will throw if we wouldn't retry. We do this here so we fail
            # quickly, but we will later check this again in the http client,
            # hence why we throw the result away.
            yield get_retry_limiter(self._destination, self._clock,
                                    self._store)

            pending_pdus = []
            while True:
                device_message_edus, device_stream_id, dev_list_id = (
                    yield self._get_new_device_messages())

                # BEGIN CRITICAL SECTION
                #
                # In order to avoid a race condition, we need to make sure that
                # the following code (from popping the queues up to the point
                # where we decide if we actually have any pending messages) is
                # atomic - otherwise new PDUs or EDUs might arrive in the
                # meantime, but not get sent because we hold the
                # transmission_loop_running flag.

                pending_pdus = self._pending_pdus

                # We can only include at most 50 PDUs per transactions
                pending_pdus, self._pending_pdus = pending_pdus[:
                                                                50], pending_pdus[
                                                                    50:]

                pending_edus = []

                pending_edus.extend(self._get_rr_edus(force_flush=False))

                # We can only include at most 100 EDUs per transactions
                pending_edus.extend(
                    self._pop_pending_edus(100 - len(pending_edus)))

                pending_edus.extend(self._pending_edus_keyed.values())

                self._pending_edus_keyed = {}

                pending_edus.extend(device_message_edus)

                pending_presence = self._pending_presence
                self._pending_presence = {}
                if pending_presence:
                    pending_edus.append(
                        Edu(
                            origin=self._server_name,
                            destination=self._destination,
                            edu_type="m.presence",
                            content={
                                "push": [
                                    format_user_presence_state(
                                        presence, self._clock.time_msec())
                                    for presence in pending_presence.values()
                                ]
                            },
                        ))

                if pending_pdus:
                    logger.debug(
                        "TX [%s] len(pending_pdus_by_dest[dest]) = %d",
                        self._destination, len(pending_pdus))

                if not pending_pdus and not pending_edus:
                    logger.debug("TX [%s] Nothing to send", self._destination)
                    self._last_device_stream_id = device_stream_id
                    return

                # if we've decided to send a transaction anyway, and we have room, we
                # may as well send any pending RRs
                if len(pending_edus) < 100:
                    pending_edus.extend(self._get_rr_edus(force_flush=True))

                # END CRITICAL SECTION

                success = yield self._transaction_manager.send_new_transaction(
                    self._destination, pending_pdus, pending_edus)
                if success:
                    sent_transactions_counter.inc()
                    sent_edus_counter.inc(len(pending_edus))
                    for edu in pending_edus:
                        sent_edus_by_type.labels(edu.edu_type).inc()
                    # Remove the acknowledged device messages from the database
                    # Only bother if we actually sent some device messages
                    if device_message_edus:
                        yield self._store.delete_device_msgs_for_remote(
                            self._destination, device_stream_id)
                        logger.info("Marking as sent %r %r", self._destination,
                                    dev_list_id)
                        yield self._store.mark_as_sent_devices_by_remote(
                            self._destination, dev_list_id)

                    self._last_device_stream_id = device_stream_id
                    self._last_device_list_stream_id = dev_list_id
                else:
                    break
        except NotRetryingDestination as e:
            logger.debug(
                "TX [%s] not ready for retry yet (next retry at %s) - "
                "dropping transaction for now",
                self._destination,
                datetime.datetime.fromtimestamp(
                    (e.retry_last_ts + e.retry_interval) / 1000.0),
            )
        except FederationDeniedError as e:
            logger.info(e)
        except HttpResponseException as e:
            logger.warning(
                "TX [%s] Received %d response to transaction: %s",
                self._destination,
                e.code,
                e,
            )
        except RequestSendFailed as e:
            logger.warning("TX [%s] Failed to send transaction: %s",
                           self._destination, e)

            for p, _ in pending_pdus:
                logger.info("Failed to send event %s to %s", p.event_id,
                            self._destination)
        except Exception:
            logger.exception(
                "TX [%s] Failed to send transaction",
                self._destination,
            )
            for p, _ in pending_pdus:
                logger.info("Failed to send event %s to %s", p.event_id,
                            self._destination)
        finally:
            # We want to be *very* sure we clear this after we stop processing
            self.transmission_loop_running = False
Ejemplo n.º 3
0
    async def _transaction_transmission_loop(self) -> None:
        pending_pdus = []  # type: List[EventBase]
        try:
            self.transmission_loop_running = True

            # This will throw if we wouldn't retry. We do this here so we fail
            # quickly, but we will later check this again in the http client,
            # hence why we throw the result away.
            await get_retry_limiter(self._destination, self._clock,
                                    self._store)

            pending_pdus = []
            while True:
                # We have to keep 2 free slots for presence and rr_edus
                limit = MAX_EDUS_PER_TRANSACTION - 2

                device_update_edus, dev_list_id = await self._get_device_update_edus(
                    limit)

                limit -= len(device_update_edus)

                (
                    to_device_edus,
                    device_stream_id,
                ) = await self._get_to_device_message_edus(limit)

                pending_edus = device_update_edus + to_device_edus

                # BEGIN CRITICAL SECTION
                #
                # In order to avoid a race condition, we need to make sure that
                # the following code (from popping the queues up to the point
                # where we decide if we actually have any pending messages) is
                # atomic - otherwise new PDUs or EDUs might arrive in the
                # meantime, but not get sent because we hold the
                # transmission_loop_running flag.

                pending_pdus = self._pending_pdus

                # We can only include at most 50 PDUs per transactions
                pending_pdus, self._pending_pdus = pending_pdus[:
                                                                50], pending_pdus[
                                                                    50:]

                pending_edus.extend(self._get_rr_edus(force_flush=False))
                pending_presence = self._pending_presence
                self._pending_presence = {}
                if pending_presence:
                    pending_edus.append(
                        Edu(
                            origin=self._server_name,
                            destination=self._destination,
                            edu_type="m.presence",
                            content={
                                "push": [
                                    format_user_presence_state(
                                        presence, self._clock.time_msec())
                                    for presence in pending_presence.values()
                                ]
                            },
                        ))

                pending_edus.extend(
                    self._pop_pending_edus(MAX_EDUS_PER_TRANSACTION -
                                           len(pending_edus)))
                while (len(pending_edus) < MAX_EDUS_PER_TRANSACTION
                       and self._pending_edus_keyed):
                    _, val = self._pending_edus_keyed.popitem()
                    pending_edus.append(val)

                if pending_pdus:
                    logger.debug(
                        "TX [%s] len(pending_pdus_by_dest[dest]) = %d",
                        self._destination,
                        len(pending_pdus),
                    )

                if not pending_pdus and not pending_edus:
                    logger.debug("TX [%s] Nothing to send", self._destination)
                    self._last_device_stream_id = device_stream_id
                    return

                # if we've decided to send a transaction anyway, and we have room, we
                # may as well send any pending RRs
                if len(pending_edus) < MAX_EDUS_PER_TRANSACTION:
                    pending_edus.extend(self._get_rr_edus(force_flush=True))

                # END CRITICAL SECTION

                success = await self._transaction_manager.send_new_transaction(
                    self._destination, pending_pdus, pending_edus)
                if success:
                    sent_transactions_counter.inc()
                    sent_edus_counter.inc(len(pending_edus))
                    for edu in pending_edus:
                        sent_edus_by_type.labels(edu.edu_type).inc()
                    # Remove the acknowledged device messages from the database
                    # Only bother if we actually sent some device messages
                    if to_device_edus:
                        await self._store.delete_device_msgs_for_remote(
                            self._destination, device_stream_id)

                    # also mark the device updates as sent
                    if device_update_edus:
                        logger.info("Marking as sent %r %r", self._destination,
                                    dev_list_id)
                        await self._store.mark_as_sent_devices_by_remote(
                            self._destination, dev_list_id)

                    self._last_device_stream_id = device_stream_id
                    self._last_device_list_stream_id = dev_list_id
                else:
                    break
        except NotRetryingDestination as e:
            logger.debug(
                "TX [%s] not ready for retry yet (next retry at %s) - "
                "dropping transaction for now",
                self._destination,
                datetime.datetime.fromtimestamp(
                    (e.retry_last_ts + e.retry_interval) / 1000.0),
            )

            if e.retry_interval > 60 * 60 * 1000:
                # we won't retry for another hour!
                # (this suggests a significant outage)
                # We drop pending PDUs and EDUs because otherwise they will
                # rack up indefinitely.
                # Note that:
                # - the EDUs that are being dropped here are those that we can
                #   afford to drop (specifically, only typing notifications,
                #   read receipts and presence updates are being dropped here)
                # - Other EDUs such as to_device messages are queued with a
                #   different mechanism
                # - this is all volatile state that would be lost if the
                #   federation sender restarted anyway

                # dropping read receipts is a bit sad but should be solved
                # through another mechanism, because this is all volatile!
                self._pending_pdus = []
                self._pending_edus = []
                self._pending_edus_keyed = {}
                self._pending_presence = {}
                self._pending_rrs = {}
        except FederationDeniedError as e:
            logger.info(e)
        except HttpResponseException as e:
            logger.warning(
                "TX [%s] Received %d response to transaction: %s",
                self._destination,
                e.code,
                e,
            )
        except RequestSendFailed as e:
            logger.warning("TX [%s] Failed to send transaction: %s",
                           self._destination, e)

            for p in pending_pdus:
                logger.info("Failed to send event %s to %s", p.event_id,
                            self._destination)
        except Exception:
            logger.exception("TX [%s] Failed to send transaction",
                             self._destination)
            for p in pending_pdus:
                logger.info("Failed to send event %s to %s", p.event_id,
                            self._destination)
        finally:
            # We want to be *very* sure we clear this after we stop processing
            self.transmission_loop_running = False
Ejemplo n.º 4
0
    async def _catch_up_transmission_loop(self) -> None:
        first_catch_up_check = self._last_successful_stream_ordering is None

        if first_catch_up_check:
            # first catchup so get last_successful_stream_ordering from database
            self._last_successful_stream_ordering = (
                await
                self._store.get_destination_last_successful_stream_ordering(
                    self._destination))

        _tmp_last_successful_stream_ordering = self._last_successful_stream_ordering
        if _tmp_last_successful_stream_ordering is None:
            # if it's still None, then this means we don't have the information
            # in our database ­ we haven't successfully sent a PDU to this server
            # (at least since the introduction of the feature tracking
            # last_successful_stream_ordering).
            # Sadly, this means we can't do anything here as we don't know what
            # needs catching up — so catching up is futile; let's stop.
            self._catching_up = False
            return

        last_successful_stream_ordering: int = _tmp_last_successful_stream_ordering

        # get at most 50 catchup room/PDUs
        while True:
            event_ids = await self._store.get_catch_up_room_event_ids(
                self._destination, last_successful_stream_ordering)

            if not event_ids:
                # No more events to catch up on, but we can't ignore the chance
                # of a race condition, so we check that no new events have been
                # skipped due to us being in catch-up mode

                if self._catchup_last_skipped > last_successful_stream_ordering:
                    # another event has been skipped because we were in catch-up mode
                    continue

                # we are done catching up!
                self._catching_up = False
                break

            if first_catch_up_check:
                # as this is our check for needing catch-up, we may have PDUs in
                # the queue from before we *knew* we had to do catch-up, so
                # clear those out now.
                self._start_catching_up()

            # fetch the relevant events from the event store
            # - redacted behaviour of REDACT is fine, since we only send metadata
            #   of redacted events to the destination.
            # - don't need to worry about rejected events as we do not actively
            #   forward received events over federation.
            catchup_pdus = await self._store.get_events_as_list(event_ids)
            if not catchup_pdus:
                raise AssertionError(
                    "No events retrieved when we asked for %r. "
                    "This should not happen." % event_ids)

            # We send transactions with events from one room only, as its likely
            # that the remote will have to do additional processing, which may
            # take some time. It's better to give it small amounts of work
            # rather than risk the request timing out and repeatedly being
            # retried, and not making any progress.
            #
            # Note: `catchup_pdus` will have exactly one PDU per room.
            for pdu in catchup_pdus:
                # The PDU from the DB will be the last PDU in the room from
                # *this server* that wasn't sent to the remote. However, other
                # servers may have sent lots of events since then, and we want
                # to try and tell the remote only about the *latest* events in
                # the room. This is so that it doesn't get inundated by events
                # from various parts of the DAG, which all need to be processed.
                #
                # Note: this does mean that in large rooms a server coming back
                # online will get sent the same events from all the different
                # servers, but the remote will correctly deduplicate them and
                # handle it only once.

                # Step 1, fetch the current extremities
                extrems = await self._store.get_prev_events_for_room(
                    pdu.room_id)

                if pdu.event_id in extrems:
                    # If the event is in the extremities, then great! We can just
                    # use that without having to do further checks.
                    room_catchup_pdus = [pdu]
                else:
                    # If not, fetch the extremities and figure out which we can
                    # send.
                    extrem_events = await self._store.get_events_as_list(
                        extrems)

                    new_pdus = []
                    for p in extrem_events:
                        # We pulled this from the DB, so it'll be non-null
                        assert p.internal_metadata.stream_ordering

                        # Filter out events that happened before the remote went
                        # offline
                        if (p.internal_metadata.stream_ordering <
                                last_successful_stream_ordering):
                            continue

                        # Filter out events where the server is not in the room,
                        # e.g. it may have left/been kicked. *Ideally* we'd pull
                        # out the kick and send that, but it's a rare edge case
                        # so we don't bother for now (the server that sent the
                        # kick should send it out if its online).
                        hosts = await self._state.get_hosts_in_room_at_events(
                            p.room_id, [p.event_id])
                        if self._destination not in hosts:
                            continue

                        new_pdus.append(p)

                    # If we've filtered out all the extremities, fall back to
                    # sending the original event. This should ensure that the
                    # server gets at least some of missed events (especially if
                    # the other sending servers are up).
                    if new_pdus:
                        room_catchup_pdus = new_pdus
                    else:
                        room_catchup_pdus = [pdu]

                logger.info("Catching up rooms to %s: %r", self._destination,
                            pdu.room_id)

                await self._transaction_manager.send_new_transaction(
                    self._destination, room_catchup_pdus, [])

                sent_transactions_counter.inc()

                # We pulled this from the DB, so it'll be non-null
                assert pdu.internal_metadata.stream_ordering

                # Note that we mark the last successful stream ordering as that
                # from the *original* PDU, rather than the PDU(s) we actually
                # send. This is because we use it to mark our position in the
                # queue of missed PDUs to process.
                last_successful_stream_ordering = pdu.internal_metadata.stream_ordering

                self._last_successful_stream_ordering = last_successful_stream_ordering
                await self._store.set_destination_last_successful_stream_ordering(
                    self._destination, last_successful_stream_ordering)
Ejemplo n.º 5
0
    async def _transaction_transmission_loop(self) -> None:
        pending_pdus: List[EventBase] = []
        try:
            self.transmission_loop_running = True

            # This will throw if we wouldn't retry. We do this here so we fail
            # quickly, but we will later check this again in the http client,
            # hence why we throw the result away.
            await get_retry_limiter(self._destination, self._clock,
                                    self._store)

            if self._catching_up:
                # we potentially need to catch-up first
                await self._catch_up_transmission_loop()
                if self._catching_up:
                    # not caught up yet
                    return

            pending_pdus = []
            while True:
                self._new_data_to_send = False

                async with _TransactionQueueManager(self) as (
                        pending_pdus,
                        pending_edus,
                ):
                    if not pending_pdus and not pending_edus:
                        logger.debug("TX [%s] Nothing to send",
                                     self._destination)

                        # If we've gotten told about new things to send during
                        # checking for things to send, we try looking again.
                        # Otherwise new PDUs or EDUs might arrive in the meantime,
                        # but not get sent because we hold the
                        # `transmission_loop_running` flag.
                        if self._new_data_to_send:
                            continue
                        else:
                            return

                    if pending_pdus:
                        logger.debug(
                            "TX [%s] len(pending_pdus_by_dest[dest]) = %d",
                            self._destination,
                            len(pending_pdus),
                        )

                    await self._transaction_manager.send_new_transaction(
                        self._destination, pending_pdus, pending_edus)

                    sent_transactions_counter.inc()
                    sent_edus_counter.inc(len(pending_edus))
                    for edu in pending_edus:
                        sent_edus_by_type.labels(edu.edu_type).inc()

        except NotRetryingDestination as e:
            logger.debug(
                "TX [%s] not ready for retry yet (next retry at %s) - "
                "dropping transaction for now",
                self._destination,
                datetime.datetime.fromtimestamp(
                    (e.retry_last_ts + e.retry_interval) / 1000.0),
            )

            if e.retry_interval > 60 * 60 * 1000:
                # we won't retry for another hour!
                # (this suggests a significant outage)
                # We drop pending EDUs because otherwise they will
                # rack up indefinitely.
                # (Dropping PDUs is already performed by `_start_catching_up`.)
                # Note that:
                # - the EDUs that are being dropped here are those that we can
                #   afford to drop (specifically, only typing notifications,
                #   read receipts and presence updates are being dropped here)
                # - Other EDUs such as to_device messages are queued with a
                #   different mechanism
                # - this is all volatile state that would be lost if the
                #   federation sender restarted anyway

                # dropping read receipts is a bit sad but should be solved
                # through another mechanism, because this is all volatile!
                self._pending_edus = []
                self._pending_edus_keyed = {}
                self._pending_presence = {}
                self._pending_rrs = {}

                self._start_catching_up()
        except FederationDeniedError as e:
            logger.info(e)
        except HttpResponseException as e:
            logger.warning(
                "TX [%s] Received %d response to transaction: %s",
                self._destination,
                e.code,
                e,
            )

        except RequestSendFailed as e:
            logger.warning("TX [%s] Failed to send transaction: %s",
                           self._destination, e)

            for p in pending_pdus:
                logger.info("Failed to send event %s to %s", p.event_id,
                            self._destination)
        except Exception:
            logger.exception("TX [%s] Failed to send transaction",
                             self._destination)
            for p in pending_pdus:
                logger.info("Failed to send event %s to %s", p.event_id,
                            self._destination)
        finally:
            # We want to be *very* sure we clear this after we stop processing
            self.transmission_loop_running = False
    async def _catch_up_transmission_loop(self) -> None:
        first_catch_up_check = self._last_successful_stream_ordering is None

        if first_catch_up_check:
            # first catchup so get last_successful_stream_ordering from database
            self._last_successful_stream_ordering = await self._store.get_destination_last_successful_stream_ordering(
                self._destination)

        if self._last_successful_stream_ordering is None:
            # if it's still None, then this means we don't have the information
            # in our database ­ we haven't successfully sent a PDU to this server
            # (at least since the introduction of the feature tracking
            # last_successful_stream_ordering).
            # Sadly, this means we can't do anything here as we don't know what
            # needs catching up — so catching up is futile; let's stop.
            self._catching_up = False
            return

        # get at most 50 catchup room/PDUs
        while True:
            event_ids = await self._store.get_catch_up_room_event_ids(
                self._destination,
                self._last_successful_stream_ordering,
            )

            if not event_ids:
                # No more events to catch up on, but we can't ignore the chance
                # of a race condition, so we check that no new events have been
                # skipped due to us being in catch-up mode

                if self._catchup_last_skipped > self._last_successful_stream_ordering:
                    # another event has been skipped because we were in catch-up mode
                    continue

                # we are done catching up!
                self._catching_up = False
                break

            if first_catch_up_check:
                # as this is our check for needing catch-up, we may have PDUs in
                # the queue from before we *knew* we had to do catch-up, so
                # clear those out now.
                self._start_catching_up()

            # fetch the relevant events from the event store
            # - redacted behaviour of REDACT is fine, since we only send metadata
            #   of redacted events to the destination.
            # - don't need to worry about rejected events as we do not actively
            #   forward received events over federation.
            catchup_pdus = await self._store.get_events_as_list(event_ids)
            if not catchup_pdus:
                raise AssertionError(
                    "No events retrieved when we asked for %r. "
                    "This should not happen." % event_ids)

            if logger.isEnabledFor(logging.INFO):
                rooms = [p.room_id for p in catchup_pdus]
                logger.info("Catching up rooms to %s: %r", self._destination,
                            rooms)

            success = await self._transaction_manager.send_new_transaction(
                self._destination, catchup_pdus, [])

            if not success:
                return

            sent_transactions_counter.inc()
            final_pdu = catchup_pdus[-1]
            self._last_successful_stream_ordering = cast(
                int, final_pdu.internal_metadata.stream_ordering)
            await self._store.set_destination_last_successful_stream_ordering(
                self._destination, self._last_successful_stream_ordering)
Ejemplo n.º 7
0
    def _transaction_transmission_loop(self, destination):
        pending_pdus = []
        try:
            self.pending_transactions[destination] = 1

            # This will throw if we wouldn't retry. We do this here so we fail
            # quickly, but we will later check this again in the http client,
            # hence why we throw the result away.
            yield get_retry_limiter(destination, self.clock, self.store)

            pending_pdus = []
            while True:
                device_message_edus, device_stream_id, dev_list_id = (
                    yield self._get_new_device_messages(destination)
                )

                # BEGIN CRITICAL SECTION
                #
                # In order to avoid a race condition, we need to make sure that
                # the following code (from popping the queues up to the point
                # where we decide if we actually have any pending messages) is
                # atomic - otherwise new PDUs or EDUs might arrive in the
                # meantime, but not get sent because we hold the
                # pending_transactions flag.

                pending_pdus = self.pending_pdus_by_dest.pop(destination, [])

                # We can only include at most 50 PDUs per transactions
                pending_pdus, leftover_pdus = pending_pdus[:50], pending_pdus[50:]
                if leftover_pdus:
                    self.pending_pdus_by_dest[destination] = leftover_pdus

                pending_edus = self.pending_edus_by_dest.pop(destination, [])

                # We can only include at most 100 EDUs per transactions
                pending_edus, leftover_edus = pending_edus[:100], pending_edus[100:]
                if leftover_edus:
                    self.pending_edus_by_dest[destination] = leftover_edus

                pending_presence = self.pending_presence_by_dest.pop(destination, {})

                pending_edus.extend(
                    self.pending_edus_keyed_by_dest.pop(destination, {}).values()
                )

                pending_edus.extend(device_message_edus)
                if pending_presence:
                    pending_edus.append(
                        Edu(
                            origin=self.server_name,
                            destination=destination,
                            edu_type="m.presence",
                            content={
                                "push": [
                                    format_user_presence_state(
                                        presence, self.clock.time_msec()
                                    )
                                    for presence in pending_presence.values()
                                ]
                            },
                        )
                    )

                if pending_pdus:
                    logger.debug("TX [%s] len(pending_pdus_by_dest[dest]) = %d",
                                 destination, len(pending_pdus))

                if not pending_pdus and not pending_edus:
                    logger.debug("TX [%s] Nothing to send", destination)
                    self.last_device_stream_id_by_dest[destination] = (
                        device_stream_id
                    )
                    return

                # END CRITICAL SECTION

                success = yield self._send_new_transaction(
                    destination, pending_pdus, pending_edus,
                )
                if success:
                    sent_transactions_counter.inc()
                    # Remove the acknowledged device messages from the database
                    # Only bother if we actually sent some device messages
                    if device_message_edus:
                        yield self.store.delete_device_msgs_for_remote(
                            destination, device_stream_id
                        )
                        logger.info("Marking as sent %r %r", destination, dev_list_id)
                        yield self.store.mark_as_sent_devices_by_remote(
                            destination, dev_list_id
                        )

                    self.last_device_stream_id_by_dest[destination] = device_stream_id
                    self.last_device_list_stream_id_by_dest[destination] = dev_list_id
                else:
                    break
        except NotRetryingDestination as e:
            logger.debug(
                "TX [%s] not ready for retry yet (next retry at %s) - "
                "dropping transaction for now",
                destination,
                datetime.datetime.fromtimestamp(
                    (e.retry_last_ts + e.retry_interval) / 1000.0
                ),
            )
        except FederationDeniedError as e:
            logger.info(e)
        except Exception as e:
            logger.warn(
                "TX [%s] Failed to send transaction: %s",
                destination,
                e,
            )
            for p, _ in pending_pdus:
                logger.info("Failed to send event %s to %s", p.event_id,
                            destination)
        finally:
            # We want to be *very* sure we delete this after we stop processing
            self.pending_transactions.pop(destination, None)
Ejemplo n.º 8
0
    def _transaction_transmission_loop(self):
        pending_pdus = []
        try:
            self.transmission_loop_running = True

            # This will throw if we wouldn't retry. We do this here so we fail
            # quickly, but we will later check this again in the http client,
            # hence why we throw the result away.
            yield get_retry_limiter(self._destination, self._clock, self._store)

            pending_pdus = []
            while True:
                device_message_edus, device_stream_id, dev_list_id = (
                    # We have to keep 2 free slots for presence and rr_edus
                    yield self._get_new_device_messages(MAX_EDUS_PER_TRANSACTION - 2)
                )

                # BEGIN CRITICAL SECTION
                #
                # In order to avoid a race condition, we need to make sure that
                # the following code (from popping the queues up to the point
                # where we decide if we actually have any pending messages) is
                # atomic - otherwise new PDUs or EDUs might arrive in the
                # meantime, but not get sent because we hold the
                # transmission_loop_running flag.

                pending_pdus = self._pending_pdus

                # We can only include at most 50 PDUs per transactions
                pending_pdus, self._pending_pdus = pending_pdus[:50], pending_pdus[50:]

                pending_edus = []

                # We can only include at most 100 EDUs per transactions
                # rr_edus and pending_presence take at most one slot each
                pending_edus.extend(self._get_rr_edus(force_flush=False))
                pending_presence = self._pending_presence
                self._pending_presence = {}
                if pending_presence:
                    pending_edus.append(
                        Edu(
                            origin=self._server_name,
                            destination=self._destination,
                            edu_type="m.presence",
                            content={
                                "push": [
                                    format_user_presence_state(
                                        presence, self._clock.time_msec()
                                    )
                                    for presence in pending_presence.values()
                                ]
                            },
                        )
                    )

                pending_edus.extend(device_message_edus)
                pending_edus.extend(
                    self._pop_pending_edus(MAX_EDUS_PER_TRANSACTION - len(pending_edus))
                )
                while (
                    len(pending_edus) < MAX_EDUS_PER_TRANSACTION
                    and self._pending_edus_keyed
                ):
                    _, val = self._pending_edus_keyed.popitem()
                    pending_edus.append(val)

                if pending_pdus:
                    logger.debug(
                        "TX [%s] len(pending_pdus_by_dest[dest]) = %d",
                        self._destination,
                        len(pending_pdus),
                    )

                if not pending_pdus and not pending_edus:
                    logger.debug("TX [%s] Nothing to send", self._destination)
                    self._last_device_stream_id = device_stream_id
                    return

                # if we've decided to send a transaction anyway, and we have room, we
                # may as well send any pending RRs
                if len(pending_edus) < MAX_EDUS_PER_TRANSACTION:
                    pending_edus.extend(self._get_rr_edus(force_flush=True))

                # END CRITICAL SECTION

                success = yield self._transaction_manager.send_new_transaction(
                    self._destination, pending_pdus, pending_edus
                )
                if success:
                    sent_transactions_counter.inc()
                    sent_edus_counter.inc(len(pending_edus))
                    for edu in pending_edus:
                        sent_edus_by_type.labels(edu.edu_type).inc()
                    # Remove the acknowledged device messages from the database
                    # Only bother if we actually sent some device messages
                    if device_message_edus:
                        yield self._store.delete_device_msgs_for_remote(
                            self._destination, device_stream_id
                        )
                        logger.info(
                            "Marking as sent %r %r", self._destination, dev_list_id
                        )
                        yield self._store.mark_as_sent_devices_by_remote(
                            self._destination, dev_list_id
                        )

                    self._last_device_stream_id = device_stream_id
                    self._last_device_list_stream_id = dev_list_id
                else:
                    break
        except NotRetryingDestination as e:
            logger.debug(
                "TX [%s] not ready for retry yet (next retry at %s) - "
                "dropping transaction for now",
                self._destination,
                datetime.datetime.fromtimestamp(
                    (e.retry_last_ts + e.retry_interval) / 1000.0
                ),
            )
        except FederationDeniedError as e:
            logger.info(e)
        except HttpResponseException as e:
            logger.warning(
                "TX [%s] Received %d response to transaction: %s",
                self._destination,
                e.code,
                e,
            )
        except RequestSendFailed as e:
            logger.warning(
                "TX [%s] Failed to send transaction: %s", self._destination, e
            )

            for p, _ in pending_pdus:
                logger.info(
                    "Failed to send event %s to %s", p.event_id, self._destination
                )
        except Exception:
            logger.exception("TX [%s] Failed to send transaction", self._destination)
            for p, _ in pending_pdus:
                logger.info(
                    "Failed to send event %s to %s", p.event_id, self._destination
                )
        finally:
            # We want to be *very* sure we clear this after we stop processing
            self.transmission_loop_running = False