class DeviceListEduUpdater(object): "Handles incoming device list updates from federation and updates the DB" def __init__(self, hs, device_handler): self.store = hs.get_datastore() self.federation = hs.get_federation_client() self.clock = hs.get_clock() self.device_handler = device_handler self._remote_edu_linearizer = Linearizer(name="remote_device_list") # user_id -> list of updates waiting to be handled. self._pending_updates = {} # Recently seen stream ids. We don't bother keeping these in the DB, # but they're useful to have them about to reduce the number of spurious # resyncs. self._seen_updates = ExpiringCache( cache_name="device_update_edu", clock=self.clock, max_len=10000, expiry_ms=30 * 60 * 1000, iterable=True, ) @defer.inlineCallbacks def incoming_device_list_update(self, origin, edu_content): """Called on incoming device list update from federation. Responsible for parsing the EDU and adding to pending updates list. """ user_id = edu_content.pop("user_id") device_id = edu_content.pop("device_id") stream_id = str(edu_content.pop("stream_id")) # They may come as ints prev_ids = edu_content.pop("prev_id", []) prev_ids = [str(p) for p in prev_ids] # They may come as ints if get_domain_from_id(user_id) != origin: # TODO: Raise? logger.warning("Got device list update edu for %r from %r", user_id, origin) return room_ids = yield self.store.get_rooms_for_user(user_id) if not room_ids: # We don't share any rooms with this user. Ignore update, as we # probably won't get any further updates. return self._pending_updates.setdefault(user_id, []).append( (device_id, stream_id, prev_ids, edu_content)) yield self._handle_device_updates(user_id) @measure_func("_incoming_device_list_update") @defer.inlineCallbacks def _handle_device_updates(self, user_id): "Actually handle pending updates." with (yield self._remote_edu_linearizer.queue(user_id)): pending_updates = self._pending_updates.pop(user_id, []) if not pending_updates: # This can happen since we batch updates return # Given a list of updates we check if we need to resync. This # happens if we've missed updates. resync = yield self._need_to_do_resync(user_id, pending_updates) if resync: # Fetch all devices for the user. origin = get_domain_from_id(user_id) try: result = yield self.federation.query_user_devices( origin, user_id) except NotRetryingDestination: # TODO: Remember that we are now out of sync and try again # later logger.warn( "Failed to handle device list update for %s," " we're not retrying the remote", user_id, ) # We abort on exceptions rather than accepting the update # as otherwise synapse will 'forget' that its device list # is out of date. If we bail then we will retry the resync # next time we get a device list update for this user_id. # This makes it more likely that the device lists will # eventually become consistent. return except FederationDeniedError as e: logger.info(e) return except Exception: # TODO: Remember that we are now out of sync and try again # later logger.exception( "Failed to handle device list update for %s", user_id) return stream_id = result["stream_id"] devices = result["devices"] yield self.store.update_remote_device_list_cache( user_id, devices, stream_id, ) device_ids = [device["device_id"] for device in devices] yield self.device_handler.notify_device_update( user_id, device_ids) else: # Simply update the single device, since we know that is the only # change (becuase of the single prev_id matching the current cache) for device_id, stream_id, prev_ids, content in pending_updates: yield self.store.update_remote_device_list_cache_entry( user_id, device_id, content, stream_id, ) yield self.device_handler.notify_device_update( user_id, [device_id for device_id, _, _, _ in pending_updates]) self._seen_updates.setdefault(user_id, set()).update( stream_id for _, stream_id, _, _ in pending_updates) @defer.inlineCallbacks def _need_to_do_resync(self, user_id, updates): """Given a list of updates for a user figure out if we need to do a full resync, or whether we have enough data that we can just apply the delta. """ seen_updates = self._seen_updates.get(user_id, set()) extremity = yield self.store.get_device_list_last_stream_id_for_remote( user_id) stream_id_in_updates = set() # stream_ids in updates list for _, stream_id, prev_ids, _ in updates: if not prev_ids: # We always do a resync if there are no previous IDs defer.returnValue(True) for prev_id in prev_ids: if prev_id == extremity: continue elif prev_id in seen_updates: continue elif prev_id in stream_id_in_updates: continue else: defer.returnValue(True) stream_id_in_updates.add(stream_id) defer.returnValue(False)
class DeviceListUpdater: "Handles incoming device list updates from federation and updates the DB" def __init__(self, hs, device_handler): self.store = hs.get_datastore() self.federation = hs.get_federation_client() self.clock = hs.get_clock() self.device_handler = device_handler self._remote_edu_linearizer = Linearizer(name="remote_device_list") # user_id -> list of updates waiting to be handled. self._pending_updates = {} # Recently seen stream ids. We don't bother keeping these in the DB, # but they're useful to have them about to reduce the number of spurious # resyncs. self._seen_updates = ExpiringCache( cache_name="device_update_edu", clock=self.clock, max_len=10000, expiry_ms=30 * 60 * 1000, iterable=True, ) # Attempt to resync out of sync device lists every 30s. self._resync_retry_in_progress = False self.clock.looping_call( run_as_background_process, 30 * 1000, func=self._maybe_retry_device_resync, desc="_maybe_retry_device_resync", ) @trace async def incoming_device_list_update(self, origin, edu_content): """Called on incoming device list update from federation. Responsible for parsing the EDU and adding to pending updates list. """ set_tag("origin", origin) set_tag("edu_content", edu_content) user_id = edu_content.pop("user_id") device_id = edu_content.pop("device_id") stream_id = str(edu_content.pop("stream_id")) # They may come as ints prev_ids = edu_content.pop("prev_id", []) prev_ids = [str(p) for p in prev_ids] # They may come as ints if get_domain_from_id(user_id) != origin: # TODO: Raise? logger.warning( "Got device list update edu for %r/%r from %r", user_id, device_id, origin, ) set_tag("error", True) log_kv({ "message": "Got a device list update edu from a user and " "device which does not match the origin of the request.", "user_id": user_id, "device_id": device_id, }) return room_ids = await self.store.get_rooms_for_user(user_id) if not room_ids: # We don't share any rooms with this user. Ignore update, as we # probably won't get any further updates. set_tag("error", True) log_kv({ "message": "Got an update from a user for which " "we don't share any rooms", "other user_id": user_id, }) logger.warning( "Got device list update edu for %r/%r, but don't share a room", user_id, device_id, ) return logger.debug("Received device list update for %r/%r", user_id, device_id) self._pending_updates.setdefault(user_id, []).append( (device_id, stream_id, prev_ids, edu_content)) await self._handle_device_updates(user_id) @measure_func("_incoming_device_list_update") async def _handle_device_updates(self, user_id): "Actually handle pending updates." with (await self._remote_edu_linearizer.queue(user_id)): pending_updates = self._pending_updates.pop(user_id, []) if not pending_updates: # This can happen since we batch updates return for device_id, stream_id, prev_ids, content in pending_updates: logger.debug( "Handling update %r/%r, ID: %r, prev: %r ", user_id, device_id, stream_id, prev_ids, ) # Given a list of updates we check if we need to resync. This # happens if we've missed updates. resync = await self._need_to_do_resync(user_id, pending_updates) if logger.isEnabledFor(logging.INFO): logger.info( "Received device list update for %s, requiring resync: %s. Devices: %s", user_id, resync, ", ".join(u[0] for u in pending_updates), ) if resync: await self.user_device_resync(user_id) else: # Simply update the single device, since we know that is the only # change (because of the single prev_id matching the current cache) for device_id, stream_id, prev_ids, content in pending_updates: await self.store.update_remote_device_list_cache_entry( user_id, device_id, content, stream_id) await self.device_handler.notify_device_update( user_id, [device_id for device_id, _, _, _ in pending_updates]) self._seen_updates.setdefault(user_id, set()).update( stream_id for _, stream_id, _, _ in pending_updates) async def _need_to_do_resync(self, user_id, updates): """Given a list of updates for a user figure out if we need to do a full resync, or whether we have enough data that we can just apply the delta. """ seen_updates = self._seen_updates.get(user_id, set()) extremity = await self.store.get_device_list_last_stream_id_for_remote( user_id) logger.debug("Current extremity for %r: %r", user_id, extremity) stream_id_in_updates = set() # stream_ids in updates list for _, stream_id, prev_ids, _ in updates: if not prev_ids: # We always do a resync if there are no previous IDs return True for prev_id in prev_ids: if prev_id == extremity: continue elif prev_id in seen_updates: continue elif prev_id in stream_id_in_updates: continue else: return True stream_id_in_updates.add(stream_id) return False @trace async def _maybe_retry_device_resync(self): """Retry to resync device lists that are out of sync, except if another retry is in progress. """ if self._resync_retry_in_progress: return try: # Prevent another call of this function to retry resyncing device lists so # we don't send too many requests. self._resync_retry_in_progress = True # Get all of the users that need resyncing. need_resync = await self.store.get_user_ids_requiring_device_list_resync( ) # Iterate over the set of user IDs. for user_id in need_resync: try: # Try to resync the current user's devices list. result = await self.user_device_resync( user_id=user_id, mark_failed_as_stale=False, ) # user_device_resync only returns a result if it managed to # successfully resync and update the database. Updating the table # of users requiring resync isn't necessary here as # user_device_resync already does it (through # self.store.update_remote_device_list_cache). if result: logger.debug( "Successfully resynced the device list for %s", user_id, ) except Exception as e: # If there was an issue resyncing this user, e.g. if the remote # server sent a malformed result, just log the error instead of # aborting all the subsequent resyncs. logger.debug( "Could not resync the device list for %s: %s", user_id, e, ) finally: # Allow future calls to retry resyncinc out of sync device lists. self._resync_retry_in_progress = False async def user_device_resync( self, user_id: str, mark_failed_as_stale: bool = True) -> Optional[dict]: """Fetches all devices for a user and updates the device cache with them. Args: user_id: The user's id whose device_list will be updated. mark_failed_as_stale: Whether to mark the user's device list as stale if the attempt to resync failed. Returns: A dict with device info as under the "devices" in the result of this request: https://matrix.org/docs/spec/server_server/r0.1.2#get-matrix-federation-v1-user-devices-userid """ logger.debug("Attempting to resync the device list for %s", user_id) log_kv({"message": "Doing resync to update device list."}) # Fetch all devices for the user. origin = get_domain_from_id(user_id) try: result = await self.federation.query_user_devices(origin, user_id) except NotRetryingDestination: if mark_failed_as_stale: # Mark the remote user's device list as stale so we know we need to retry # it later. await self.store.mark_remote_user_device_cache_as_stale(user_id ) return except (RequestSendFailed, HttpResponseException) as e: logger.warning( "Failed to handle device list update for %s: %s", user_id, e, ) if mark_failed_as_stale: # Mark the remote user's device list as stale so we know we need to retry # it later. await self.store.mark_remote_user_device_cache_as_stale(user_id ) # We abort on exceptions rather than accepting the update # as otherwise synapse will 'forget' that its device list # is out of date. If we bail then we will retry the resync # next time we get a device list update for this user_id. # This makes it more likely that the device lists will # eventually become consistent. return except FederationDeniedError as e: set_tag("error", True) log_kv({"reason": "FederationDeniedError"}) logger.info(e) return except Exception as e: set_tag("error", True) log_kv({ "message": "Exception raised by federation request", "exception": e }) logger.exception("Failed to handle device list update for %s", user_id) if mark_failed_as_stale: # Mark the remote user's device list as stale so we know we need to retry # it later. await self.store.mark_remote_user_device_cache_as_stale(user_id ) return log_kv({"result": result}) stream_id = result["stream_id"] devices = result["devices"] # Get the master key and the self-signing key for this user if provided in the # response (None if not in the response). # The response will not contain the user signing key, as this key is only used by # its owner, thus it doesn't make sense to send it over federation. master_key = result.get("master_key") self_signing_key = result.get("self_signing_key") # If the remote server has more than ~1000 devices for this user # we assume that something is going horribly wrong (e.g. a bot # that logs in and creates a new device every time it tries to # send a message). Maintaining lots of devices per user in the # cache can cause serious performance issues as if this request # takes more than 60s to complete, internal replication from the # inbound federation worker to the synapse master may time out # causing the inbound federation to fail and causing the remote # server to retry, causing a DoS. So in this scenario we give # up on storing the total list of devices and only handle the # delta instead. if len(devices) > 1000: logger.warning( "Ignoring device list snapshot for %s as it has >1K devs (%d)", user_id, len(devices), ) devices = [] for device in devices: logger.debug( "Handling resync update %r/%r, ID: %r", user_id, device["device_id"], stream_id, ) await self.store.update_remote_device_list_cache( user_id, devices, stream_id) device_ids = [device["device_id"] for device in devices] # Handle cross-signing keys. cross_signing_device_ids = await self.process_cross_signing_key_update( user_id, master_key, self_signing_key, ) device_ids = device_ids + cross_signing_device_ids await self.device_handler.notify_device_update(user_id, device_ids) # We clobber the seen updates since we've re-synced from a given # point. self._seen_updates[user_id] = {stream_id} return result async def process_cross_signing_key_update( self, user_id: str, master_key: Optional[Dict[str, Any]], self_signing_key: Optional[Dict[str, Any]], ) -> list: """Process the given new master and self-signing key for the given remote user. Args: user_id: The ID of the user these keys are for. master_key: The dict of the cross-signing master key as returned by the remote server. self_signing_key: The dict of the cross-signing self-signing key as returned by the remote server. Return: The device IDs for the given keys. """ device_ids = [] if master_key: await self.store.set_e2e_cross_signing_key(user_id, "master", master_key) _, verify_key = get_verify_key_from_cross_signing_key(master_key) # verify_key is a VerifyKey from signedjson, which uses # .version to denote the portion of the key ID after the # algorithm and colon, which is the device ID device_ids.append(verify_key.version) if self_signing_key: await self.store.set_e2e_cross_signing_key(user_id, "self_signing", self_signing_key) _, verify_key = get_verify_key_from_cross_signing_key( self_signing_key) device_ids.append(verify_key.version) return device_ids
class DeviceListEduUpdater(object): "Handles incoming device list updates from federation and updates the DB" def __init__(self, hs, device_handler): self.store = hs.get_datastore() self.federation = hs.get_federation_client() self.clock = hs.get_clock() self.device_handler = device_handler self._remote_edu_linearizer = Linearizer(name="remote_device_list") # user_id -> list of updates waiting to be handled. self._pending_updates = {} # Recently seen stream ids. We don't bother keeping these in the DB, # but they're useful to have them about to reduce the number of spurious # resyncs. self._seen_updates = ExpiringCache( cache_name="device_update_edu", clock=self.clock, max_len=10000, expiry_ms=30 * 60 * 1000, iterable=True, ) @defer.inlineCallbacks def incoming_device_list_update(self, origin, edu_content): """Called on incoming device list update from federation. Responsible for parsing the EDU and adding to pending updates list. """ user_id = edu_content.pop("user_id") device_id = edu_content.pop("device_id") stream_id = str(edu_content.pop("stream_id")) # They may come as ints prev_ids = edu_content.pop("prev_id", []) prev_ids = [str(p) for p in prev_ids] # They may come as ints if get_domain_from_id(user_id) != origin: # TODO: Raise? logger.warning("Got device list update edu for %r from %r", user_id, origin) return room_ids = yield self.store.get_rooms_for_user(user_id) if not room_ids: # We don't share any rooms with this user. Ignore update, as we # probably won't get any further updates. return self._pending_updates.setdefault(user_id, []).append( (device_id, stream_id, prev_ids, edu_content) ) yield self._handle_device_updates(user_id) @measure_func("_incoming_device_list_update") @defer.inlineCallbacks def _handle_device_updates(self, user_id): "Actually handle pending updates." with (yield self._remote_edu_linearizer.queue(user_id)): pending_updates = self._pending_updates.pop(user_id, []) if not pending_updates: # This can happen since we batch updates return # Given a list of updates we check if we need to resync. This # happens if we've missed updates. resync = yield self._need_to_do_resync(user_id, pending_updates) if resync: # Fetch all devices for the user. origin = get_domain_from_id(user_id) try: result = yield self.federation.query_user_devices(origin, user_id) except NotRetryingDestination: # TODO: Remember that we are now out of sync and try again # later logger.warn( "Failed to handle device list update for %s," " we're not retrying the remote", user_id, ) # We abort on exceptions rather than accepting the update # as otherwise synapse will 'forget' that its device list # is out of date. If we bail then we will retry the resync # next time we get a device list update for this user_id. # This makes it more likely that the device lists will # eventually become consistent. return except FederationDeniedError as e: logger.info(e) return except Exception: # TODO: Remember that we are now out of sync and try again # later logger.exception( "Failed to handle device list update for %s", user_id ) return stream_id = result["stream_id"] devices = result["devices"] yield self.store.update_remote_device_list_cache( user_id, devices, stream_id, ) device_ids = [device["device_id"] for device in devices] yield self.device_handler.notify_device_update(user_id, device_ids) else: # Simply update the single device, since we know that is the only # change (because of the single prev_id matching the current cache) for device_id, stream_id, prev_ids, content in pending_updates: yield self.store.update_remote_device_list_cache_entry( user_id, device_id, content, stream_id, ) yield self.device_handler.notify_device_update( user_id, [device_id for device_id, _, _, _ in pending_updates] ) self._seen_updates.setdefault(user_id, set()).update( stream_id for _, stream_id, _, _ in pending_updates ) @defer.inlineCallbacks def _need_to_do_resync(self, user_id, updates): """Given a list of updates for a user figure out if we need to do a full resync, or whether we have enough data that we can just apply the delta. """ seen_updates = self._seen_updates.get(user_id, set()) extremity = yield self.store.get_device_list_last_stream_id_for_remote( user_id ) stream_id_in_updates = set() # stream_ids in updates list for _, stream_id, prev_ids, _ in updates: if not prev_ids: # We always do a resync if there are no previous IDs defer.returnValue(True) for prev_id in prev_ids: if prev_id == extremity: continue elif prev_id in seen_updates: continue elif prev_id in stream_id_in_updates: continue else: defer.returnValue(True) stream_id_in_updates.add(stream_id) defer.returnValue(False)
class DeviceListUpdater(object): "Handles incoming device list updates from federation and updates the DB" def __init__(self, hs, device_handler): self.store = hs.get_datastore() self.federation = hs.get_federation_client() self.clock = hs.get_clock() self.device_handler = device_handler self._remote_edu_linearizer = Linearizer(name="remote_device_list") # user_id -> list of updates waiting to be handled. self._pending_updates = {} # Recently seen stream ids. We don't bother keeping these in the DB, # but they're useful to have them about to reduce the number of spurious # resyncs. self._seen_updates = ExpiringCache( cache_name="device_update_edu", clock=self.clock, max_len=10000, expiry_ms=30 * 60 * 1000, iterable=True, ) @trace @defer.inlineCallbacks def incoming_device_list_update(self, origin, edu_content): """Called on incoming device list update from federation. Responsible for parsing the EDU and adding to pending updates list. """ set_tag("origin", origin) set_tag("edu_content", edu_content) user_id = edu_content.pop("user_id") device_id = edu_content.pop("device_id") stream_id = str(edu_content.pop("stream_id")) # They may come as ints prev_ids = edu_content.pop("prev_id", []) prev_ids = [str(p) for p in prev_ids] # They may come as ints if get_domain_from_id(user_id) != origin: # TODO: Raise? logger.warning( "Got device list update edu for %r/%r from %r", user_id, device_id, origin, ) set_tag("error", True) log_kv( { "message": "Got a device list update edu from a user and " "device which does not match the origin of the request.", "user_id": user_id, "device_id": device_id, } ) return room_ids = yield self.store.get_rooms_for_user(user_id) if not room_ids: # We don't share any rooms with this user. Ignore update, as we # probably won't get any further updates. set_tag("error", True) log_kv( { "message": "Got an update from a user for which " "we don't share any rooms", "other user_id": user_id, } ) logger.warning( "Got device list update edu for %r/%r, but don't share a room", user_id, device_id, ) return logger.debug("Received device list update for %r/%r", user_id, device_id) self._pending_updates.setdefault(user_id, []).append( (device_id, stream_id, prev_ids, edu_content) ) yield self._handle_device_updates(user_id) @measure_func("_incoming_device_list_update") @defer.inlineCallbacks def _handle_device_updates(self, user_id): "Actually handle pending updates." with (yield self._remote_edu_linearizer.queue(user_id)): pending_updates = self._pending_updates.pop(user_id, []) if not pending_updates: # This can happen since we batch updates return for device_id, stream_id, prev_ids, content in pending_updates: logger.debug( "Handling update %r/%r, ID: %r, prev: %r ", user_id, device_id, stream_id, prev_ids, ) # Given a list of updates we check if we need to resync. This # happens if we've missed updates. resync = yield self._need_to_do_resync(user_id, pending_updates) if logger.isEnabledFor(logging.INFO): logger.info( "Received device list update for %s, requiring resync: %s. Devices: %s", user_id, resync, ", ".join(u[0] for u in pending_updates), ) if resync: yield self.user_device_resync(user_id) else: # Simply update the single device, since we know that is the only # change (because of the single prev_id matching the current cache) for device_id, stream_id, prev_ids, content in pending_updates: yield self.store.update_remote_device_list_cache_entry( user_id, device_id, content, stream_id ) yield self.device_handler.notify_device_update( user_id, [device_id for device_id, _, _, _ in pending_updates] ) self._seen_updates.setdefault(user_id, set()).update( stream_id for _, stream_id, _, _ in pending_updates ) @defer.inlineCallbacks def _need_to_do_resync(self, user_id, updates): """Given a list of updates for a user figure out if we need to do a full resync, or whether we have enough data that we can just apply the delta. """ seen_updates = self._seen_updates.get(user_id, set()) extremity = yield self.store.get_device_list_last_stream_id_for_remote(user_id) logger.debug("Current extremity for %r: %r", user_id, extremity) stream_id_in_updates = set() # stream_ids in updates list for _, stream_id, prev_ids, _ in updates: if not prev_ids: # We always do a resync if there are no previous IDs return True for prev_id in prev_ids: if prev_id == extremity: continue elif prev_id in seen_updates: continue elif prev_id in stream_id_in_updates: continue else: return True stream_id_in_updates.add(stream_id) return False @defer.inlineCallbacks def user_device_resync(self, user_id): """Fetches all devices for a user and updates the device cache with them. Args: user_id (str): The user's id whose device_list will be updated. Returns: Deferred[dict]: a dict with device info as under the "devices" in the result of this request: https://matrix.org/docs/spec/server_server/r0.1.2#get-matrix-federation-v1-user-devices-userid """ log_kv({"message": "Doing resync to update device list."}) # Fetch all devices for the user. origin = get_domain_from_id(user_id) try: result = yield self.federation.query_user_devices(origin, user_id) except (NotRetryingDestination, RequestSendFailed, HttpResponseException): # TODO: Remember that we are now out of sync and try again # later logger.warning("Failed to handle device list update for %s", user_id) # We abort on exceptions rather than accepting the update # as otherwise synapse will 'forget' that its device list # is out of date. If we bail then we will retry the resync # next time we get a device list update for this user_id. # This makes it more likely that the device lists will # eventually become consistent. return except FederationDeniedError as e: set_tag("error", True) log_kv({"reason": "FederationDeniedError"}) logger.info(e) return except Exception as e: # TODO: Remember that we are now out of sync and try again # later set_tag("error", True) log_kv( {"message": "Exception raised by federation request", "exception": e} ) logger.exception("Failed to handle device list update for %s", user_id) return log_kv({"result": result}) stream_id = result["stream_id"] devices = result["devices"] # If the remote server has more than ~1000 devices for this user # we assume that something is going horribly wrong (e.g. a bot # that logs in and creates a new device every time it tries to # send a message). Maintaining lots of devices per user in the # cache can cause serious performance issues as if this request # takes more than 60s to complete, internal replication from the # inbound federation worker to the synapse master may time out # causing the inbound federation to fail and causing the remote # server to retry, causing a DoS. So in this scenario we give # up on storing the total list of devices and only handle the # delta instead. if len(devices) > 1000: logger.warning( "Ignoring device list snapshot for %s as it has >1K devs (%d)", user_id, len(devices), ) devices = [] for device in devices: logger.debug( "Handling resync update %r/%r, ID: %r", user_id, device["device_id"], stream_id, ) yield self.store.update_remote_device_list_cache(user_id, devices, stream_id) device_ids = [device["device_id"] for device in devices] yield self.device_handler.notify_device_update(user_id, device_ids) # We clobber the seen updates since we've re-synced from a given # point. self._seen_updates[user_id] = set([stream_id]) defer.returnValue(result)
class DeviceListEduUpdater(object): "Handles incoming device list updates from federation and updates the DB" def __init__(self, hs, device_handler): self.store = hs.get_datastore() self.federation = hs.get_federation_client() self.clock = hs.get_clock() self.device_handler = device_handler self._remote_edu_linearizer = Linearizer(name="remote_device_list") # user_id -> list of updates waiting to be handled. self._pending_updates = {} # Recently seen stream ids. We don't bother keeping these in the DB, # but they're useful to have them about to reduce the number of spurious # resyncs. self._seen_updates = ExpiringCache( cache_name="device_update_edu", clock=self.clock, max_len=10000, expiry_ms=30 * 60 * 1000, iterable=True, ) @defer.inlineCallbacks def incoming_device_list_update(self, origin, edu_content): """Called on incoming device list update from federation. Responsible for parsing the EDU and adding to pending updates list. """ user_id = edu_content.pop("user_id") device_id = edu_content.pop("device_id") stream_id = str(edu_content.pop("stream_id")) # They may come as ints prev_ids = edu_content.pop("prev_id", []) prev_ids = [str(p) for p in prev_ids] # They may come as ints if get_domain_from_id(user_id) != origin: # TODO: Raise? logger.warning( "Got device list update edu for %r/%r from %r", user_id, device_id, origin, ) return room_ids = yield self.store.get_rooms_for_user(user_id) if not room_ids: # We don't share any rooms with this user. Ignore update, as we # probably won't get any further updates. logger.warning( "Got device list update edu for %r/%r, but don't share a room", user_id, device_id, ) return logger.debug( "Received device list update for %r/%r", user_id, device_id, ) self._pending_updates.setdefault(user_id, []).append( (device_id, stream_id, prev_ids, edu_content) ) yield self._handle_device_updates(user_id) @measure_func("_incoming_device_list_update") @defer.inlineCallbacks def _handle_device_updates(self, user_id): "Actually handle pending updates." with (yield self._remote_edu_linearizer.queue(user_id)): pending_updates = self._pending_updates.pop(user_id, []) if not pending_updates: # This can happen since we batch updates return for device_id, stream_id, prev_ids, content in pending_updates: logger.debug( "Handling update %r/%r, ID: %r, prev: %r ", user_id, device_id, stream_id, prev_ids, ) # Given a list of updates we check if we need to resync. This # happens if we've missed updates. resync = yield self._need_to_do_resync(user_id, pending_updates) logger.debug("Need to re-sync devices for %r? %r", user_id, resync) if resync: # Fetch all devices for the user. origin = get_domain_from_id(user_id) try: result = yield self.federation.query_user_devices(origin, user_id) except ( NotRetryingDestination, RequestSendFailed, HttpResponseException, ): # TODO: Remember that we are now out of sync and try again # later logger.warn( "Failed to handle device list update for %s", user_id, ) # We abort on exceptions rather than accepting the update # as otherwise synapse will 'forget' that its device list # is out of date. If we bail then we will retry the resync # next time we get a device list update for this user_id. # This makes it more likely that the device lists will # eventually become consistent. return except FederationDeniedError as e: logger.info(e) return except Exception: # TODO: Remember that we are now out of sync and try again # later logger.exception( "Failed to handle device list update for %s", user_id ) return stream_id = result["stream_id"] devices = result["devices"] # If the remote server has more than ~1000 devices for this user # we assume that something is going horribly wrong (e.g. a bot # that logs in and creates a new device every time it tries to # send a message). Maintaining lots of devices per user in the # cache can cause serious performance issues as if this request # takes more than 60s to complete, internal replication from the # inbound federation worker to the synapse master may time out # causing the inbound federation to fail and causing the remote # server to retry, causing a DoS. So in this scenario we give # up on storing the total list of devices and only handle the # delta instead. if len(devices) > 1000: logger.warn( "Ignoring device list snapshot for %s as it has >1K devs (%d)", user_id, len(devices) ) devices = [] for device in devices: logger.debug( "Handling resync update %r/%r, ID: %r", user_id, device["device_id"], stream_id, ) yield self.store.update_remote_device_list_cache( user_id, devices, stream_id, ) device_ids = [device["device_id"] for device in devices] yield self.device_handler.notify_device_update(user_id, device_ids) # We clobber the seen updates since we've re-synced from a given # point. self._seen_updates[user_id] = set([stream_id]) else: # Simply update the single device, since we know that is the only # change (because of the single prev_id matching the current cache) for device_id, stream_id, prev_ids, content in pending_updates: yield self.store.update_remote_device_list_cache_entry( user_id, device_id, content, stream_id, ) yield self.device_handler.notify_device_update( user_id, [device_id for device_id, _, _, _ in pending_updates] ) self._seen_updates.setdefault(user_id, set()).update( stream_id for _, stream_id, _, _ in pending_updates ) @defer.inlineCallbacks def _need_to_do_resync(self, user_id, updates): """Given a list of updates for a user figure out if we need to do a full resync, or whether we have enough data that we can just apply the delta. """ seen_updates = self._seen_updates.get(user_id, set()) extremity = yield self.store.get_device_list_last_stream_id_for_remote( user_id ) logger.debug( "Current extremity for %r: %r", user_id, extremity, ) stream_id_in_updates = set() # stream_ids in updates list for _, stream_id, prev_ids, _ in updates: if not prev_ids: # We always do a resync if there are no previous IDs defer.returnValue(True) for prev_id in prev_ids: if prev_id == extremity: continue elif prev_id in seen_updates: continue elif prev_id in stream_id_in_updates: continue else: defer.returnValue(True) stream_id_in_updates.add(stream_id) defer.returnValue(False)