def _transaction_transmission_loop(self, destination): pending_pdus = [] try: self.pending_transactions[destination] = 1 # This will throw if we wouldn't retry. We do this here so we fail # quickly, but we will later check this again in the http client, # hence why we throw the result away. yield get_retry_limiter(destination, self.clock, self.store) pending_pdus = [] while True: device_message_edus, device_stream_id, dev_list_id = ( yield self._get_new_device_messages(destination)) # BEGIN CRITICAL SECTION # # In order to avoid a race condition, we need to make sure that # the following code (from popping the queues up to the point # where we decide if we actually have any pending messages) is # atomic - otherwise new PDUs or EDUs might arrive in the # meantime, but not get sent because we hold the # pending_transactions flag. pending_pdus = self.pending_pdus_by_dest.pop(destination, []) pending_edus = self.pending_edus_by_dest.pop(destination, []) pending_presence = self.pending_presence_by_dest.pop( destination, {}) pending_failures = self.pending_failures_by_dest.pop( destination, []) pending_edus.extend( self.pending_edus_keyed_by_dest.pop(destination, {}).values()) pending_edus.extend(device_message_edus) if pending_presence: pending_edus.append( Edu( origin=self.server_name, destination=destination, edu_type="m.presence", content={ "push": [ format_user_presence_state( presence, self.clock.time_msec()) for presence in pending_presence.values() ] }, )) if pending_pdus: logger.debug( "TX [%s] len(pending_pdus_by_dest[dest]) = %d", destination, len(pending_pdus)) if not pending_pdus and not pending_edus and not pending_failures: logger.debug("TX [%s] Nothing to send", destination) self.last_device_stream_id_by_dest[destination] = ( device_stream_id) return # END CRITICAL SECTION success = yield self._send_new_transaction( destination, pending_pdus, pending_edus, pending_failures, ) if success: sent_transactions_counter.inc() # Remove the acknowledged device messages from the database # Only bother if we actually sent some device messages if device_message_edus: yield self.store.delete_device_msgs_for_remote( destination, device_stream_id) logger.info("Marking as sent %r %r", destination, dev_list_id) yield self.store.mark_as_sent_devices_by_remote( destination, dev_list_id) self.last_device_stream_id_by_dest[ destination] = device_stream_id self.last_device_list_stream_id_by_dest[ destination] = dev_list_id else: break except NotRetryingDestination as e: logger.debug( "TX [%s] not ready for retry yet (next retry at %s) - " "dropping transaction for now", destination, datetime.datetime.fromtimestamp( (e.retry_last_ts + e.retry_interval) / 1000.0), ) except FederationDeniedError as e: logger.info(e) except Exception as e: logger.warn( "TX [%s] Failed to send transaction: %s", destination, e, ) for p, _ in pending_pdus: logger.info("Failed to send event %s to %s", p.event_id, destination) finally: # We want to be *very* sure we delete this after we stop processing self.pending_transactions.pop(destination, None)
def _transaction_transmission_loop(self): pending_pdus = [] try: self.transmission_loop_running = True # This will throw if we wouldn't retry. We do this here so we fail # quickly, but we will later check this again in the http client, # hence why we throw the result away. yield get_retry_limiter(self._destination, self._clock, self._store) pending_pdus = [] while True: device_message_edus, device_stream_id, dev_list_id = ( yield self._get_new_device_messages()) # BEGIN CRITICAL SECTION # # In order to avoid a race condition, we need to make sure that # the following code (from popping the queues up to the point # where we decide if we actually have any pending messages) is # atomic - otherwise new PDUs or EDUs might arrive in the # meantime, but not get sent because we hold the # transmission_loop_running flag. pending_pdus = self._pending_pdus # We can only include at most 50 PDUs per transactions pending_pdus, self._pending_pdus = pending_pdus[: 50], pending_pdus[ 50:] pending_edus = [] pending_edus.extend(self._get_rr_edus(force_flush=False)) # We can only include at most 100 EDUs per transactions pending_edus.extend( self._pop_pending_edus(100 - len(pending_edus))) pending_edus.extend(self._pending_edus_keyed.values()) self._pending_edus_keyed = {} pending_edus.extend(device_message_edus) pending_presence = self._pending_presence self._pending_presence = {} if pending_presence: pending_edus.append( Edu( origin=self._server_name, destination=self._destination, edu_type="m.presence", content={ "push": [ format_user_presence_state( presence, self._clock.time_msec()) for presence in pending_presence.values() ] }, )) if pending_pdus: logger.debug( "TX [%s] len(pending_pdus_by_dest[dest]) = %d", self._destination, len(pending_pdus)) if not pending_pdus and not pending_edus: logger.debug("TX [%s] Nothing to send", self._destination) self._last_device_stream_id = device_stream_id return # if we've decided to send a transaction anyway, and we have room, we # may as well send any pending RRs if len(pending_edus) < 100: pending_edus.extend(self._get_rr_edus(force_flush=True)) # END CRITICAL SECTION success = yield self._transaction_manager.send_new_transaction( self._destination, pending_pdus, pending_edus) if success: sent_transactions_counter.inc() sent_edus_counter.inc(len(pending_edus)) for edu in pending_edus: sent_edus_by_type.labels(edu.edu_type).inc() # Remove the acknowledged device messages from the database # Only bother if we actually sent some device messages if device_message_edus: yield self._store.delete_device_msgs_for_remote( self._destination, device_stream_id) logger.info("Marking as sent %r %r", self._destination, dev_list_id) yield self._store.mark_as_sent_devices_by_remote( self._destination, dev_list_id) self._last_device_stream_id = device_stream_id self._last_device_list_stream_id = dev_list_id else: break except NotRetryingDestination as e: logger.debug( "TX [%s] not ready for retry yet (next retry at %s) - " "dropping transaction for now", self._destination, datetime.datetime.fromtimestamp( (e.retry_last_ts + e.retry_interval) / 1000.0), ) except FederationDeniedError as e: logger.info(e) except HttpResponseException as e: logger.warning( "TX [%s] Received %d response to transaction: %s", self._destination, e.code, e, ) except RequestSendFailed as e: logger.warning("TX [%s] Failed to send transaction: %s", self._destination, e) for p, _ in pending_pdus: logger.info("Failed to send event %s to %s", p.event_id, self._destination) except Exception: logger.exception( "TX [%s] Failed to send transaction", self._destination, ) for p, _ in pending_pdus: logger.info("Failed to send event %s to %s", p.event_id, self._destination) finally: # We want to be *very* sure we clear this after we stop processing self.transmission_loop_running = False
async def _transaction_transmission_loop(self) -> None: pending_pdus = [] # type: List[EventBase] try: self.transmission_loop_running = True # This will throw if we wouldn't retry. We do this here so we fail # quickly, but we will later check this again in the http client, # hence why we throw the result away. await get_retry_limiter(self._destination, self._clock, self._store) pending_pdus = [] while True: # We have to keep 2 free slots for presence and rr_edus limit = MAX_EDUS_PER_TRANSACTION - 2 device_update_edus, dev_list_id = await self._get_device_update_edus( limit) limit -= len(device_update_edus) ( to_device_edus, device_stream_id, ) = await self._get_to_device_message_edus(limit) pending_edus = device_update_edus + to_device_edus # BEGIN CRITICAL SECTION # # In order to avoid a race condition, we need to make sure that # the following code (from popping the queues up to the point # where we decide if we actually have any pending messages) is # atomic - otherwise new PDUs or EDUs might arrive in the # meantime, but not get sent because we hold the # transmission_loop_running flag. pending_pdus = self._pending_pdus # We can only include at most 50 PDUs per transactions pending_pdus, self._pending_pdus = pending_pdus[: 50], pending_pdus[ 50:] pending_edus.extend(self._get_rr_edus(force_flush=False)) pending_presence = self._pending_presence self._pending_presence = {} if pending_presence: pending_edus.append( Edu( origin=self._server_name, destination=self._destination, edu_type="m.presence", content={ "push": [ format_user_presence_state( presence, self._clock.time_msec()) for presence in pending_presence.values() ] }, )) pending_edus.extend( self._pop_pending_edus(MAX_EDUS_PER_TRANSACTION - len(pending_edus))) while (len(pending_edus) < MAX_EDUS_PER_TRANSACTION and self._pending_edus_keyed): _, val = self._pending_edus_keyed.popitem() pending_edus.append(val) if pending_pdus: logger.debug( "TX [%s] len(pending_pdus_by_dest[dest]) = %d", self._destination, len(pending_pdus), ) if not pending_pdus and not pending_edus: logger.debug("TX [%s] Nothing to send", self._destination) self._last_device_stream_id = device_stream_id return # if we've decided to send a transaction anyway, and we have room, we # may as well send any pending RRs if len(pending_edus) < MAX_EDUS_PER_TRANSACTION: pending_edus.extend(self._get_rr_edus(force_flush=True)) # END CRITICAL SECTION success = await self._transaction_manager.send_new_transaction( self._destination, pending_pdus, pending_edus) if success: sent_transactions_counter.inc() sent_edus_counter.inc(len(pending_edus)) for edu in pending_edus: sent_edus_by_type.labels(edu.edu_type).inc() # Remove the acknowledged device messages from the database # Only bother if we actually sent some device messages if to_device_edus: await self._store.delete_device_msgs_for_remote( self._destination, device_stream_id) # also mark the device updates as sent if device_update_edus: logger.info("Marking as sent %r %r", self._destination, dev_list_id) await self._store.mark_as_sent_devices_by_remote( self._destination, dev_list_id) self._last_device_stream_id = device_stream_id self._last_device_list_stream_id = dev_list_id else: break except NotRetryingDestination as e: logger.debug( "TX [%s] not ready for retry yet (next retry at %s) - " "dropping transaction for now", self._destination, datetime.datetime.fromtimestamp( (e.retry_last_ts + e.retry_interval) / 1000.0), ) if e.retry_interval > 60 * 60 * 1000: # we won't retry for another hour! # (this suggests a significant outage) # We drop pending PDUs and EDUs because otherwise they will # rack up indefinitely. # Note that: # - the EDUs that are being dropped here are those that we can # afford to drop (specifically, only typing notifications, # read receipts and presence updates are being dropped here) # - Other EDUs such as to_device messages are queued with a # different mechanism # - this is all volatile state that would be lost if the # federation sender restarted anyway # dropping read receipts is a bit sad but should be solved # through another mechanism, because this is all volatile! self._pending_pdus = [] self._pending_edus = [] self._pending_edus_keyed = {} self._pending_presence = {} self._pending_rrs = {} except FederationDeniedError as e: logger.info(e) except HttpResponseException as e: logger.warning( "TX [%s] Received %d response to transaction: %s", self._destination, e.code, e, ) except RequestSendFailed as e: logger.warning("TX [%s] Failed to send transaction: %s", self._destination, e) for p in pending_pdus: logger.info("Failed to send event %s to %s", p.event_id, self._destination) except Exception: logger.exception("TX [%s] Failed to send transaction", self._destination) for p in pending_pdus: logger.info("Failed to send event %s to %s", p.event_id, self._destination) finally: # We want to be *very* sure we clear this after we stop processing self.transmission_loop_running = False
async def _catch_up_transmission_loop(self) -> None: first_catch_up_check = self._last_successful_stream_ordering is None if first_catch_up_check: # first catchup so get last_successful_stream_ordering from database self._last_successful_stream_ordering = ( await self._store.get_destination_last_successful_stream_ordering( self._destination)) _tmp_last_successful_stream_ordering = self._last_successful_stream_ordering if _tmp_last_successful_stream_ordering is None: # if it's still None, then this means we don't have the information # in our database we haven't successfully sent a PDU to this server # (at least since the introduction of the feature tracking # last_successful_stream_ordering). # Sadly, this means we can't do anything here as we don't know what # needs catching up — so catching up is futile; let's stop. self._catching_up = False return last_successful_stream_ordering: int = _tmp_last_successful_stream_ordering # get at most 50 catchup room/PDUs while True: event_ids = await self._store.get_catch_up_room_event_ids( self._destination, last_successful_stream_ordering) if not event_ids: # No more events to catch up on, but we can't ignore the chance # of a race condition, so we check that no new events have been # skipped due to us being in catch-up mode if self._catchup_last_skipped > last_successful_stream_ordering: # another event has been skipped because we were in catch-up mode continue # we are done catching up! self._catching_up = False break if first_catch_up_check: # as this is our check for needing catch-up, we may have PDUs in # the queue from before we *knew* we had to do catch-up, so # clear those out now. self._start_catching_up() # fetch the relevant events from the event store # - redacted behaviour of REDACT is fine, since we only send metadata # of redacted events to the destination. # - don't need to worry about rejected events as we do not actively # forward received events over federation. catchup_pdus = await self._store.get_events_as_list(event_ids) if not catchup_pdus: raise AssertionError( "No events retrieved when we asked for %r. " "This should not happen." % event_ids) # We send transactions with events from one room only, as its likely # that the remote will have to do additional processing, which may # take some time. It's better to give it small amounts of work # rather than risk the request timing out and repeatedly being # retried, and not making any progress. # # Note: `catchup_pdus` will have exactly one PDU per room. for pdu in catchup_pdus: # The PDU from the DB will be the last PDU in the room from # *this server* that wasn't sent to the remote. However, other # servers may have sent lots of events since then, and we want # to try and tell the remote only about the *latest* events in # the room. This is so that it doesn't get inundated by events # from various parts of the DAG, which all need to be processed. # # Note: this does mean that in large rooms a server coming back # online will get sent the same events from all the different # servers, but the remote will correctly deduplicate them and # handle it only once. # Step 1, fetch the current extremities extrems = await self._store.get_prev_events_for_room( pdu.room_id) if pdu.event_id in extrems: # If the event is in the extremities, then great! We can just # use that without having to do further checks. room_catchup_pdus = [pdu] else: # If not, fetch the extremities and figure out which we can # send. extrem_events = await self._store.get_events_as_list( extrems) new_pdus = [] for p in extrem_events: # We pulled this from the DB, so it'll be non-null assert p.internal_metadata.stream_ordering # Filter out events that happened before the remote went # offline if (p.internal_metadata.stream_ordering < last_successful_stream_ordering): continue # Filter out events where the server is not in the room, # e.g. it may have left/been kicked. *Ideally* we'd pull # out the kick and send that, but it's a rare edge case # so we don't bother for now (the server that sent the # kick should send it out if its online). hosts = await self._state.get_hosts_in_room_at_events( p.room_id, [p.event_id]) if self._destination not in hosts: continue new_pdus.append(p) # If we've filtered out all the extremities, fall back to # sending the original event. This should ensure that the # server gets at least some of missed events (especially if # the other sending servers are up). if new_pdus: room_catchup_pdus = new_pdus else: room_catchup_pdus = [pdu] logger.info("Catching up rooms to %s: %r", self._destination, pdu.room_id) await self._transaction_manager.send_new_transaction( self._destination, room_catchup_pdus, []) sent_transactions_counter.inc() # We pulled this from the DB, so it'll be non-null assert pdu.internal_metadata.stream_ordering # Note that we mark the last successful stream ordering as that # from the *original* PDU, rather than the PDU(s) we actually # send. This is because we use it to mark our position in the # queue of missed PDUs to process. last_successful_stream_ordering = pdu.internal_metadata.stream_ordering self._last_successful_stream_ordering = last_successful_stream_ordering await self._store.set_destination_last_successful_stream_ordering( self._destination, last_successful_stream_ordering)
async def _transaction_transmission_loop(self) -> None: pending_pdus: List[EventBase] = [] try: self.transmission_loop_running = True # This will throw if we wouldn't retry. We do this here so we fail # quickly, but we will later check this again in the http client, # hence why we throw the result away. await get_retry_limiter(self._destination, self._clock, self._store) if self._catching_up: # we potentially need to catch-up first await self._catch_up_transmission_loop() if self._catching_up: # not caught up yet return pending_pdus = [] while True: self._new_data_to_send = False async with _TransactionQueueManager(self) as ( pending_pdus, pending_edus, ): if not pending_pdus and not pending_edus: logger.debug("TX [%s] Nothing to send", self._destination) # If we've gotten told about new things to send during # checking for things to send, we try looking again. # Otherwise new PDUs or EDUs might arrive in the meantime, # but not get sent because we hold the # `transmission_loop_running` flag. if self._new_data_to_send: continue else: return if pending_pdus: logger.debug( "TX [%s] len(pending_pdus_by_dest[dest]) = %d", self._destination, len(pending_pdus), ) await self._transaction_manager.send_new_transaction( self._destination, pending_pdus, pending_edus) sent_transactions_counter.inc() sent_edus_counter.inc(len(pending_edus)) for edu in pending_edus: sent_edus_by_type.labels(edu.edu_type).inc() except NotRetryingDestination as e: logger.debug( "TX [%s] not ready for retry yet (next retry at %s) - " "dropping transaction for now", self._destination, datetime.datetime.fromtimestamp( (e.retry_last_ts + e.retry_interval) / 1000.0), ) if e.retry_interval > 60 * 60 * 1000: # we won't retry for another hour! # (this suggests a significant outage) # We drop pending EDUs because otherwise they will # rack up indefinitely. # (Dropping PDUs is already performed by `_start_catching_up`.) # Note that: # - the EDUs that are being dropped here are those that we can # afford to drop (specifically, only typing notifications, # read receipts and presence updates are being dropped here) # - Other EDUs such as to_device messages are queued with a # different mechanism # - this is all volatile state that would be lost if the # federation sender restarted anyway # dropping read receipts is a bit sad but should be solved # through another mechanism, because this is all volatile! self._pending_edus = [] self._pending_edus_keyed = {} self._pending_presence = {} self._pending_rrs = {} self._start_catching_up() except FederationDeniedError as e: logger.info(e) except HttpResponseException as e: logger.warning( "TX [%s] Received %d response to transaction: %s", self._destination, e.code, e, ) except RequestSendFailed as e: logger.warning("TX [%s] Failed to send transaction: %s", self._destination, e) for p in pending_pdus: logger.info("Failed to send event %s to %s", p.event_id, self._destination) except Exception: logger.exception("TX [%s] Failed to send transaction", self._destination) for p in pending_pdus: logger.info("Failed to send event %s to %s", p.event_id, self._destination) finally: # We want to be *very* sure we clear this after we stop processing self.transmission_loop_running = False
async def _catch_up_transmission_loop(self) -> None: first_catch_up_check = self._last_successful_stream_ordering is None if first_catch_up_check: # first catchup so get last_successful_stream_ordering from database self._last_successful_stream_ordering = await self._store.get_destination_last_successful_stream_ordering( self._destination) if self._last_successful_stream_ordering is None: # if it's still None, then this means we don't have the information # in our database we haven't successfully sent a PDU to this server # (at least since the introduction of the feature tracking # last_successful_stream_ordering). # Sadly, this means we can't do anything here as we don't know what # needs catching up — so catching up is futile; let's stop. self._catching_up = False return # get at most 50 catchup room/PDUs while True: event_ids = await self._store.get_catch_up_room_event_ids( self._destination, self._last_successful_stream_ordering, ) if not event_ids: # No more events to catch up on, but we can't ignore the chance # of a race condition, so we check that no new events have been # skipped due to us being in catch-up mode if self._catchup_last_skipped > self._last_successful_stream_ordering: # another event has been skipped because we were in catch-up mode continue # we are done catching up! self._catching_up = False break if first_catch_up_check: # as this is our check for needing catch-up, we may have PDUs in # the queue from before we *knew* we had to do catch-up, so # clear those out now. self._start_catching_up() # fetch the relevant events from the event store # - redacted behaviour of REDACT is fine, since we only send metadata # of redacted events to the destination. # - don't need to worry about rejected events as we do not actively # forward received events over federation. catchup_pdus = await self._store.get_events_as_list(event_ids) if not catchup_pdus: raise AssertionError( "No events retrieved when we asked for %r. " "This should not happen." % event_ids) if logger.isEnabledFor(logging.INFO): rooms = [p.room_id for p in catchup_pdus] logger.info("Catching up rooms to %s: %r", self._destination, rooms) success = await self._transaction_manager.send_new_transaction( self._destination, catchup_pdus, []) if not success: return sent_transactions_counter.inc() final_pdu = catchup_pdus[-1] self._last_successful_stream_ordering = cast( int, final_pdu.internal_metadata.stream_ordering) await self._store.set_destination_last_successful_stream_ordering( self._destination, self._last_successful_stream_ordering)
def _transaction_transmission_loop(self, destination): pending_pdus = [] try: self.pending_transactions[destination] = 1 # This will throw if we wouldn't retry. We do this here so we fail # quickly, but we will later check this again in the http client, # hence why we throw the result away. yield get_retry_limiter(destination, self.clock, self.store) pending_pdus = [] while True: device_message_edus, device_stream_id, dev_list_id = ( yield self._get_new_device_messages(destination) ) # BEGIN CRITICAL SECTION # # In order to avoid a race condition, we need to make sure that # the following code (from popping the queues up to the point # where we decide if we actually have any pending messages) is # atomic - otherwise new PDUs or EDUs might arrive in the # meantime, but not get sent because we hold the # pending_transactions flag. pending_pdus = self.pending_pdus_by_dest.pop(destination, []) # We can only include at most 50 PDUs per transactions pending_pdus, leftover_pdus = pending_pdus[:50], pending_pdus[50:] if leftover_pdus: self.pending_pdus_by_dest[destination] = leftover_pdus pending_edus = self.pending_edus_by_dest.pop(destination, []) # We can only include at most 100 EDUs per transactions pending_edus, leftover_edus = pending_edus[:100], pending_edus[100:] if leftover_edus: self.pending_edus_by_dest[destination] = leftover_edus pending_presence = self.pending_presence_by_dest.pop(destination, {}) pending_edus.extend( self.pending_edus_keyed_by_dest.pop(destination, {}).values() ) pending_edus.extend(device_message_edus) if pending_presence: pending_edus.append( Edu( origin=self.server_name, destination=destination, edu_type="m.presence", content={ "push": [ format_user_presence_state( presence, self.clock.time_msec() ) for presence in pending_presence.values() ] }, ) ) if pending_pdus: logger.debug("TX [%s] len(pending_pdus_by_dest[dest]) = %d", destination, len(pending_pdus)) if not pending_pdus and not pending_edus: logger.debug("TX [%s] Nothing to send", destination) self.last_device_stream_id_by_dest[destination] = ( device_stream_id ) return # END CRITICAL SECTION success = yield self._send_new_transaction( destination, pending_pdus, pending_edus, ) if success: sent_transactions_counter.inc() # Remove the acknowledged device messages from the database # Only bother if we actually sent some device messages if device_message_edus: yield self.store.delete_device_msgs_for_remote( destination, device_stream_id ) logger.info("Marking as sent %r %r", destination, dev_list_id) yield self.store.mark_as_sent_devices_by_remote( destination, dev_list_id ) self.last_device_stream_id_by_dest[destination] = device_stream_id self.last_device_list_stream_id_by_dest[destination] = dev_list_id else: break except NotRetryingDestination as e: logger.debug( "TX [%s] not ready for retry yet (next retry at %s) - " "dropping transaction for now", destination, datetime.datetime.fromtimestamp( (e.retry_last_ts + e.retry_interval) / 1000.0 ), ) except FederationDeniedError as e: logger.info(e) except Exception as e: logger.warn( "TX [%s] Failed to send transaction: %s", destination, e, ) for p, _ in pending_pdus: logger.info("Failed to send event %s to %s", p.event_id, destination) finally: # We want to be *very* sure we delete this after we stop processing self.pending_transactions.pop(destination, None)
def _transaction_transmission_loop(self): pending_pdus = [] try: self.transmission_loop_running = True # This will throw if we wouldn't retry. We do this here so we fail # quickly, but we will later check this again in the http client, # hence why we throw the result away. yield get_retry_limiter(self._destination, self._clock, self._store) pending_pdus = [] while True: device_message_edus, device_stream_id, dev_list_id = ( # We have to keep 2 free slots for presence and rr_edus yield self._get_new_device_messages(MAX_EDUS_PER_TRANSACTION - 2) ) # BEGIN CRITICAL SECTION # # In order to avoid a race condition, we need to make sure that # the following code (from popping the queues up to the point # where we decide if we actually have any pending messages) is # atomic - otherwise new PDUs or EDUs might arrive in the # meantime, but not get sent because we hold the # transmission_loop_running flag. pending_pdus = self._pending_pdus # We can only include at most 50 PDUs per transactions pending_pdus, self._pending_pdus = pending_pdus[:50], pending_pdus[50:] pending_edus = [] # We can only include at most 100 EDUs per transactions # rr_edus and pending_presence take at most one slot each pending_edus.extend(self._get_rr_edus(force_flush=False)) pending_presence = self._pending_presence self._pending_presence = {} if pending_presence: pending_edus.append( Edu( origin=self._server_name, destination=self._destination, edu_type="m.presence", content={ "push": [ format_user_presence_state( presence, self._clock.time_msec() ) for presence in pending_presence.values() ] }, ) ) pending_edus.extend(device_message_edus) pending_edus.extend( self._pop_pending_edus(MAX_EDUS_PER_TRANSACTION - len(pending_edus)) ) while ( len(pending_edus) < MAX_EDUS_PER_TRANSACTION and self._pending_edus_keyed ): _, val = self._pending_edus_keyed.popitem() pending_edus.append(val) if pending_pdus: logger.debug( "TX [%s] len(pending_pdus_by_dest[dest]) = %d", self._destination, len(pending_pdus), ) if not pending_pdus and not pending_edus: logger.debug("TX [%s] Nothing to send", self._destination) self._last_device_stream_id = device_stream_id return # if we've decided to send a transaction anyway, and we have room, we # may as well send any pending RRs if len(pending_edus) < MAX_EDUS_PER_TRANSACTION: pending_edus.extend(self._get_rr_edus(force_flush=True)) # END CRITICAL SECTION success = yield self._transaction_manager.send_new_transaction( self._destination, pending_pdus, pending_edus ) if success: sent_transactions_counter.inc() sent_edus_counter.inc(len(pending_edus)) for edu in pending_edus: sent_edus_by_type.labels(edu.edu_type).inc() # Remove the acknowledged device messages from the database # Only bother if we actually sent some device messages if device_message_edus: yield self._store.delete_device_msgs_for_remote( self._destination, device_stream_id ) logger.info( "Marking as sent %r %r", self._destination, dev_list_id ) yield self._store.mark_as_sent_devices_by_remote( self._destination, dev_list_id ) self._last_device_stream_id = device_stream_id self._last_device_list_stream_id = dev_list_id else: break except NotRetryingDestination as e: logger.debug( "TX [%s] not ready for retry yet (next retry at %s) - " "dropping transaction for now", self._destination, datetime.datetime.fromtimestamp( (e.retry_last_ts + e.retry_interval) / 1000.0 ), ) except FederationDeniedError as e: logger.info(e) except HttpResponseException as e: logger.warning( "TX [%s] Received %d response to transaction: %s", self._destination, e.code, e, ) except RequestSendFailed as e: logger.warning( "TX [%s] Failed to send transaction: %s", self._destination, e ) for p, _ in pending_pdus: logger.info( "Failed to send event %s to %s", p.event_id, self._destination ) except Exception: logger.exception("TX [%s] Failed to send transaction", self._destination) for p, _ in pending_pdus: logger.info( "Failed to send event %s to %s", p.event_id, self._destination ) finally: # We want to be *very* sure we clear this after we stop processing self.transmission_loop_running = False