async def _handle_received_pdu(self, origin: str, pdu: EventBase) -> None: """ Process a PDU received in a federation /send/ transaction. If the event is invalid, then this method throws a FederationError. (The error will then be logged and sent back to the sender (which probably won't do anything with it), and other events in the transaction will be processed as normal). It is likely that we'll then receive other events which refer to this rejected_event in their prev_events, etc. When that happens, we'll attempt to fetch the rejected event again, which will presumably fail, so those second-generation events will also get rejected. Eventually, we get to the point where there are more than 10 events between any new events and the original rejected event. Since we only try to backfill 10 events deep on received pdu, we then accept the new event, possibly introducing a discontinuity in the DAG, with new forward extremities, so normal service is approximately returned, until we try to backfill across the discontinuity. Args: origin: server which sent the pdu pdu: received pdu Raises: FederationError if the signatures / hash do not match, or if the event was unacceptable for any other reason (eg, too large, too many prev_events, couldn't find the prev_events) """ # check that it's actually being sent from a valid destination to # workaround bug #1753 in 0.18.5 and 0.18.6 if origin != get_domain_from_id(pdu.sender): # We continue to accept join events from any server; this is # necessary for the federation join dance to work correctly. # (When we join over federation, the "helper" server is # responsible for sending out the join event, rather than the # origin. See bug #1893. This is also true for some third party # invites). if not ( pdu.type == "m.room.member" and pdu.content and pdu.content.get("membership", None) in (Membership.JOIN, Membership.INVITE) ): logger.info( "Discarding PDU %s from invalid origin %s", pdu.event_id, origin ) return else: logger.info("Accepting join PDU %s from %s", pdu.event_id, origin) # We've already checked that we know the room version by this point room_version = await self.store.get_room_version(pdu.room_id) # Check signature. try: pdu = await self._check_sigs_and_hash(room_version, pdu) except SynapseError as e: raise FederationError("ERROR", e.code, e.msg, affected=pdu.event_id) await self.handler.on_receive_pdu(origin, pdu, sent_to_us_directly=True)
async def _handle_received_pdu(self, origin: str, pdu: EventBase) -> None: """Process a PDU received in a federation /send/ transaction. If the event is invalid, then this method throws a FederationError. (The error will then be logged and sent back to the sender (which probably won't do anything with it), and other events in the transaction will be processed as normal). It is likely that we'll then receive other events which refer to this rejected_event in their prev_events, etc. When that happens, we'll attempt to fetch the rejected event again, which will presumably fail, so those second-generation events will also get rejected. Eventually, we get to the point where there are more than 10 events between any new events and the original rejected event. Since we only try to backfill 10 events deep on received pdu, we then accept the new event, possibly introducing a discontinuity in the DAG, with new forward extremities, so normal service is approximately returned, until we try to backfill across the discontinuity. Args: origin: server which sent the pdu pdu: received pdu Raises: FederationError if the signatures / hash do not match, or if the event was unacceptable for any other reason (eg, too large, too many prev_events, couldn't find the prev_events) """ # We've already checked that we know the room version by this point room_version = await self.store.get_room_version(pdu.room_id) # Check signature. try: pdu = await self._check_sigs_and_hash(room_version, pdu) except SynapseError as e: raise FederationError("ERROR", e.code, e.msg, affected=pdu.event_id) # Add the event to our staging area await self.store.insert_received_event_to_staging(origin, pdu) # Try and acquire the processing lock for the room, if we get it start a # background process for handling the events in the room. lock = await self.store.try_acquire_lock( _INBOUND_EVENT_HANDLING_LOCK_NAME, pdu.room_id ) if lock: self._process_incoming_pdus_in_room_inner( pdu.room_id, room_version, lock, origin, pdu )
def _handle_received_pdu(self, origin, pdu): """ Process a PDU received in a federation /send/ transaction. Args: origin (str): server which sent the pdu pdu (FrozenEvent): received pdu Returns (Deferred): completes with None Raises: FederationError if the signatures / hash do not match """ # check that it's actually being sent from a valid destination to # workaround bug #1753 in 0.18.5 and 0.18.6 if origin != get_domain_from_id(pdu.event_id): # We continue to accept join events from any server; this is # necessary for the federation join dance to work correctly. # (When we join over federation, the "helper" server is # responsible for sending out the join event, rather than the # origin. See bug #1893). if not (pdu.type == 'm.room.member' and pdu.content and pdu.content.get("membership", None) == 'join'): logger.info("Discarding PDU %s from invalid origin %s", pdu.event_id, origin) return else: logger.info("Accepting join PDU %s from %s", pdu.event_id, origin) # Check signature. try: pdu = yield self._check_sigs_and_hash(pdu) except SynapseError as e: raise FederationError( "ERROR", e.code, e.msg, affected=pdu.event_id, ) yield self.handler.on_receive_pdu(origin, pdu, get_missing=True)
def _handle_received_pdu(self, origin, pdu): """ Process a PDU received in a federation /send/ transaction. Args: origin (str): server which sent the pdu pdu (FrozenEvent): received pdu Returns (Deferred): completes with None Raises: FederationError if the signatures / hash do not match """ # Check signature. try: pdu = yield self._check_sigs_and_hash(pdu) except SynapseError as e: raise FederationError( "ERROR", e.code, e.msg, affected=pdu.event_id, ) yield self.handler.on_receive_pdu(origin, pdu, get_missing=True)
def _handle_new_pdu(self, origin, pdu, get_missing=True): # We reprocess pdus when we have seen them only as outliers existing = yield self._get_persisted_pdu(origin, pdu.event_id, do_auth=False) # FIXME: Currently we fetch an event again when we already have it # if it has been marked as an outlier. already_seen = (existing and (not existing.internal_metadata.is_outlier() or pdu.internal_metadata.is_outlier())) if already_seen: logger.debug("Already seen pdu %s", pdu.event_id) return # Check signature. try: pdu = yield self._check_sigs_and_hash(pdu) except SynapseError as e: raise FederationError( "ERROR", e.code, e.msg, affected=pdu.event_id, ) state = None auth_chain = [] have_seen = yield self.store.have_events( [ev for ev, _ in pdu.prev_events]) fetch_state = False # Get missing pdus if necessary. if not pdu.internal_metadata.is_outlier(): # We only backfill backwards to the min depth. min_depth = yield self.handler.get_min_depth_for_context( pdu.room_id) logger.debug("_handle_new_pdu min_depth for %s: %d", pdu.room_id, min_depth) prevs = {e_id for e_id, _ in pdu.prev_events} seen = set(have_seen.keys()) if min_depth and pdu.depth < min_depth: # This is so that we don't notify the user about this # message, to work around the fact that some events will # reference really really old events we really don't want to # send to the clients. pdu.internal_metadata.outlier = True elif min_depth and pdu.depth > min_depth: if get_missing and prevs - seen: latest = yield self.store.get_latest_event_ids_in_room( pdu.room_id) # We add the prev events that we have seen to the latest # list to ensure the remote server doesn't give them to us latest = set(latest) latest |= seen missing_events = yield self.get_missing_events( origin, pdu.room_id, earliest_events_ids=list(latest), latest_events=[pdu], limit=10, min_depth=min_depth, ) # We want to sort these by depth so we process them and # tell clients about them in order. missing_events.sort(key=lambda x: x.depth) for e in missing_events: yield self._handle_new_pdu(origin, e, get_missing=False) have_seen = yield self.store.have_events( [ev for ev, _ in pdu.prev_events]) prevs = {e_id for e_id, _ in pdu.prev_events} seen = set(have_seen.keys()) if prevs - seen: fetch_state = True if fetch_state: # We need to get the state at this event, since we haven't # processed all the prev events. logger.debug("_handle_new_pdu getting state for %s", pdu.room_id) try: state, auth_chain = yield self.get_state_for_room( origin, pdu.room_id, pdu.event_id, ) except: logger.warn("Failed to get state for event: %s", pdu.event_id) yield self.handler.on_receive_pdu( origin, pdu, backfilled=False, state=state, auth_chain=auth_chain, )
def on_receive_pdu(self, origin, pdu, backfilled, state=None, auth_chain=None): """ Called by the ReplicationLayer when we have a new pdu. We need to do auth checks and put it through the StateHandler. """ event = pdu logger.debug("Got event: %s", event.event_id) # If we are currently in the process of joining this room, then we # queue up events for later processing. if event.room_id in self.room_queues: self.room_queues[event.room_id].append((pdu, origin)) return logger.debug("Processing event: %s", event.event_id) redacted_event = prune_event(event) redacted_pdu_json = redacted_event.get_pdu_json() try: yield self.keyring.verify_json_for_server(event.origin, redacted_pdu_json) except SynapseError as e: logger.warn( "Signature check failed for %s redacted to %s", encode_canonical_json(pdu.get_pdu_json()), encode_canonical_json(redacted_pdu_json), ) raise FederationError( "ERROR", e.code, e.msg, affected=event.event_id, ) if not check_event_content_hash(event): logger.warn("Event content has been tampered, redacting %s, %s", event.event_id, encode_canonical_json(event.get_dict())) event = redacted_event logger.debug("Event: %s", event) # FIXME (erikj): Awful hack to make the case where we are not currently # in the room work current_state = None is_in_room = yield self.auth.check_host_in_room( event.room_id, self.server_name) if not is_in_room and not event.internal_metadata.outlier: logger.debug("Got event for room we're not in.") replication = self.replication_layer if not state: state, auth_chain = yield replication.get_state_for_context( origin, context=event.room_id, event_id=event.event_id, ) if not auth_chain: auth_chain = yield replication.get_event_auth( origin, context=event.room_id, event_id=event.event_id, ) for e in auth_chain: e.internal_metadata.outlier = True try: yield self._handle_new_event(e, fetch_auth_from=origin) except: logger.exception( "Failed to handle auth event %s", e.event_id, ) current_state = state if state: for e in state: logging.info("A :) %r", e) e.internal_metadata.outlier = True try: yield self._handle_new_event(e) except: logger.exception( "Failed to handle state event %s", e.event_id, ) try: yield self._handle_new_event( event, state=state, backfilled=backfilled, current_state=current_state, ) except AuthError as e: raise FederationError( "ERROR", e.code, e.msg, affected=event.event_id, ) # if we're receiving valid events from an origin, # it's probably a good idea to mark it as not in retry-state # for sending (although this is a bit of a leap) retry_timings = yield self.store.get_destination_retry_timings(origin) if (retry_timings and retry_timings.retry_last_ts): self.store.set_destination_retry_timings(origin, 0, 0) room = yield self.store.get_room(event.room_id) if not room: try: yield self.store.store_room( room_id=event.room_id, room_creator_user_id="", is_public=False, ) except StoreError: logger.exception("Failed to store room.") if not backfilled: extra_users = [] if event.type == EventTypes.Member: target_user_id = event.state_key target_user = self.hs.parse_userid(target_user_id) extra_users.append(target_user) yield self.notifier.on_new_room_event(event, extra_users=extra_users) if event.type == EventTypes.Member: if event.membership == Membership.JOIN: user = self.hs.parse_userid(event.state_key) yield self.distributor.fire("user_joined_room", user=user, room_id=event.room_id)
def on_receive_pdu(self, origin, pdu, backfilled, state=None, auth_chain=None): """ Called by the ReplicationLayer when we have a new pdu. We need to do auth checks and put it through the StateHandler. """ event = pdu logger.debug("Got event: %s", event.event_id) # If we are currently in the process of joining this room, then we # queue up events for later processing. if event.room_id in self.room_queues: self.room_queues[event.room_id].append((pdu, origin)) return logger.debug("Processing event: %s", event.event_id) logger.debug("Event: %s", event) # FIXME (erikj): Awful hack to make the case where we are not currently # in the room work current_state = None is_in_room = yield self.auth.check_host_in_room( event.room_id, self.server_name ) if not is_in_room and not event.internal_metadata.is_outlier(): logger.debug("Got event for room we're not in.") current_state = state event_ids = set() if state: event_ids |= {e.event_id for e in state} if auth_chain: event_ids |= {e.event_id for e in auth_chain} seen_ids = set( (yield self.store.have_events(event_ids)).keys() ) if state and auth_chain is not None: # If we have any state or auth_chain given to us by the replication # layer, then we should handle them (if we haven't before.) for e in itertools.chain(auth_chain, state): if e.event_id in seen_ids: continue e.internal_metadata.outlier = True try: auth_ids = [e_id for e_id, _ in e.auth_events] auth = { (e.type, e.state_key): e for e in auth_chain if e.event_id in auth_ids } yield self._handle_new_event( origin, e, auth_events=auth ) seen_ids.add(e.event_id) except: logger.exception( "Failed to handle state event %s", e.event_id, ) try: yield self._handle_new_event( origin, event, state=state, backfilled=backfilled, current_state=current_state, ) except AuthError as e: raise FederationError( "ERROR", e.code, e.msg, affected=event.event_id, ) # if we're receiving valid events from an origin, # it's probably a good idea to mark it as not in retry-state # for sending (although this is a bit of a leap) retry_timings = yield self.store.get_destination_retry_timings(origin) if (retry_timings and retry_timings.retry_last_ts): self.store.set_destination_retry_timings(origin, 0, 0) room = yield self.store.get_room(event.room_id) if not room: try: yield self.store.store_room( room_id=event.room_id, room_creator_user_id="", is_public=False, ) except StoreError: logger.exception("Failed to store room.") if not backfilled: extra_users = [] if event.type == EventTypes.Member: target_user_id = event.state_key target_user = UserID.from_string(target_user_id) extra_users.append(target_user) yield self.notifier.on_new_room_event( event, extra_users=extra_users ) if event.type == EventTypes.Member: if event.membership == Membership.JOIN: user = UserID.from_string(event.state_key) yield self.distributor.fire( "user_joined_room", user=user, room_id=event.room_id )
def _handle_new_pdu(self, origin, pdu, max_recursion=10): # We reprocess pdus when we have seen them only as outliers existing = yield self._get_persisted_pdu(origin, pdu.event_id, do_auth=False) # FIXME: Currently we fetch an event again when we already have it # if it has been marked as an outlier. already_seen = (existing and (not existing.internal_metadata.is_outlier() or pdu.internal_metadata.is_outlier())) if already_seen: logger.debug("Already seen pdu %s", pdu.event_id) return # Check signature. try: pdu = yield self._check_sigs_and_hash(pdu) except SynapseError as e: raise FederationError( "ERROR", e.code, e.msg, affected=pdu.event_id, ) state = None auth_chain = [] have_seen = yield self.store.have_events( [ev for ev, _ in pdu.prev_events]) fetch_state = False # Get missing pdus if necessary. if not pdu.internal_metadata.is_outlier(): # We only backfill backwards to the min depth. min_depth = yield self.handler.get_min_depth_for_context( pdu.room_id) logger.debug("_handle_new_pdu min_depth for %s: %d", pdu.room_id, min_depth) if min_depth and pdu.depth < min_depth: # This is so that we don't notify the user about this # message, to work around the fact that some events will # reference really really old events we really don't want to # send to the clients. pdu.internal_metadata.outlier = True elif min_depth and pdu.depth > min_depth and max_recursion > 0: for event_id, hashes in pdu.prev_events: if event_id not in have_seen: logger.debug("_handle_new_pdu requesting pdu %s", event_id) try: new_pdu = yield self.federation_client.get_pdu( [origin, pdu.origin], event_id=event_id, ) if new_pdu: yield self._handle_new_pdu( origin, new_pdu, max_recursion=max_recursion - 1) logger.debug("Processed pdu %s", event_id) else: logger.warn("Failed to get PDU %s", event_id) fetch_state = True except: # TODO(erikj): Do some more intelligent retries. logger.exception("Failed to get PDU") fetch_state = True else: prevs = {e_id for e_id, _ in pdu.prev_events} seen = set(have_seen.keys()) if prevs - seen: fetch_state = True else: fetch_state = True if fetch_state: # We need to get the state at this event, since we haven't # processed all the prev events. logger.debug("_handle_new_pdu getting state for %s", pdu.room_id) try: state, auth_chain = yield self.get_state_for_room( origin, pdu.room_id, pdu.event_id, ) except: logger.warn("Failed to get state for event: %s", pdu.event_id) yield self.handler.on_receive_pdu( origin, pdu, backfilled=False, state=state, auth_chain=auth_chain, )
def _handle_new_pdu(self, origin, pdu, get_missing=True): # We reprocess pdus when we have seen them only as outliers existing = yield self._get_persisted_pdu(origin, pdu.event_id, do_auth=False) # FIXME: Currently we fetch an event again when we already have it # if it has been marked as an outlier. already_seen = (existing and (not existing.internal_metadata.is_outlier() or pdu.internal_metadata.is_outlier())) if already_seen: logger.debug("Already seen pdu %s", pdu.event_id) return # Check signature. try: pdu = yield self._check_sigs_and_hash(pdu) except SynapseError as e: raise FederationError( "ERROR", e.code, e.msg, affected=pdu.event_id, ) state = None auth_chain = [] have_seen = yield self.store.have_events( [ev for ev, _ in pdu.prev_events]) fetch_state = False # Get missing pdus if necessary. if not pdu.internal_metadata.is_outlier(): # We only backfill backwards to the min depth. min_depth = yield self.handler.get_min_depth_for_context( pdu.room_id) logger.debug("_handle_new_pdu min_depth for %s: %d", pdu.room_id, min_depth) prevs = {e_id for e_id, _ in pdu.prev_events} seen = set(have_seen.keys()) if min_depth and pdu.depth < min_depth: # This is so that we don't notify the user about this # message, to work around the fact that some events will # reference really really old events we really don't want to # send to the clients. pdu.internal_metadata.outlier = True elif min_depth and pdu.depth > min_depth: if get_missing and prevs - seen: # If we're missing stuff, ensure we only fetch stuff one # at a time. logger.info( "Acquiring lock for room %r to fetch %d missing events: %r...", pdu.room_id, len(prevs - seen), list(prevs - seen)[:5], ) with (yield self._room_pdu_linearizer.queue(pdu.room_id)): logger.info( "Acquired lock for room %r to fetch %d missing events", pdu.room_id, len(prevs - seen), ) # We recalculate seen, since it may have changed. have_seen = yield self.store.have_events(prevs) seen = set(have_seen.keys()) if prevs - seen: latest = yield self.store.get_latest_event_ids_in_room( pdu.room_id) # We add the prev events that we have seen to the latest # list to ensure the remote server doesn't give them to us latest = set(latest) latest |= seen logger.info("Missing %d events for room %r: %r...", len(prevs - seen), pdu.room_id, list(prevs - seen)[:5]) # XXX: we set timeout to 10s to help workaround # https://github.com/matrix-org/synapse/issues/1733. # The reason is to avoid holding the linearizer lock # whilst processing inbound /send transactions, causing # FDs to stack up and block other inbound transactions # which empirically can currently take up to 30 minutes. # # N.B. this explicitly disables retry attempts. # # N.B. this also increases our chances of falling back to # fetching fresh state for the room if the missing event # can't be found, which slightly reduces our security. # it may also increase our DAG extremity count for the room, # causing additional state resolution? See #1760. # However, fetching state doesn't hold the linearizer lock # apparently. # # see https://github.com/matrix-org/synapse/pull/1744 missing_events = yield self.get_missing_events( origin, pdu.room_id, earliest_events_ids=list(latest), latest_events=[pdu], limit=10, min_depth=min_depth, timeout=10000, ) # We want to sort these by depth so we process them and # tell clients about them in order. missing_events.sort(key=lambda x: x.depth) for e in missing_events: yield self._handle_new_pdu(origin, e, get_missing=False) have_seen = yield self.store.have_events( [ev for ev, _ in pdu.prev_events]) prevs = {e_id for e_id, _ in pdu.prev_events} seen = set(have_seen.keys()) if prevs - seen: logger.info("Still missing %d events for room %r: %r...", len(prevs - seen), pdu.room_id, list(prevs - seen)[:5]) fetch_state = True if fetch_state: # We need to get the state at this event, since we haven't # processed all the prev events. logger.debug("_handle_new_pdu getting state for %s", pdu.room_id) try: state, auth_chain = yield self.get_state_for_room( origin, pdu.room_id, pdu.event_id, ) except: logger.exception("Failed to get state for event: %s", pdu.event_id) yield self.handler.on_receive_pdu( origin, pdu, state=state, auth_chain=auth_chain, )