def test_state_filter_difference_simple_cases(self): """ Tests some very simple cases of the StateFilter approx_difference, that are not explicitly tested by the more in-depth tests. """ self.assert_difference(StateFilter.all(), StateFilter.all(), StateFilter.none()) self.assert_difference( StateFilter.all(), StateFilter.none(), StateFilter.all(), )
def get_state_ids_for_events(self, event_ids, state_filter=StateFilter.all()): """ Get the state dicts corresponding to a list of events, containing the event_ids of the state events (as opposed to the events themselves) Args: event_ids(list(str)): events whose state should be returned state_filter (StateFilter): The state filter used to fetch state from the database. Returns: A deferred dict from event_id -> (type, state_key) -> event_id """ event_to_groups = yield self._get_state_group_for_events(event_ids) groups = set(itervalues(event_to_groups)) group_to_state = yield self._get_state_for_groups(groups, state_filter) event_to_state = { event_id: group_to_state[group] for event_id, group in iteritems(event_to_groups) } return {event: event_to_state[event] for event in event_ids}
def get_state_for_events(self, event_ids, state_filter=StateFilter.all()): """Given a list of event_ids and type tuples, return a list of state dicts for each event. Args: event_ids (list[string]) state_filter (StateFilter): The state filter used to fetch state from the database. Returns: deferred: A dict of (event_id) -> (type, state_key) -> [state_events] """ event_to_groups = yield self._get_state_group_for_events(event_ids) groups = set(itervalues(event_to_groups)) group_to_state = yield self._get_state_for_groups(groups, state_filter) state_event_map = yield self.get_events( [ ev_id for sd in itervalues(group_to_state) for ev_id in itervalues(sd) ], get_prev_content=False, ) event_to_state = { event_id: { k: state_event_map[v] for k, v in iteritems(group_to_state[group]) if v in state_event_map } for event_id, group in iteritems(event_to_groups) } return {event: event_to_state[event] for event in event_ids}
def get_state_for_event(self, event_id, state_filter=StateFilter.all()): """ Get the state dict corresponding to a particular event Args: event_id(str): event whose state should be returned state_filter (StateFilter): The state filter used to fetch state from the database. Returns: A deferred dict from (type, state_key) -> state_event """ state_map = yield self.get_state_for_events([event_id], state_filter) return state_map[event_id]
async def get_partial_filtered_current_state_ids( self, room_id: str, state_filter: Optional[StateFilter] = None) -> StateMap[str]: """Get the current state event of a given type for a room based on the current_state_events table. This may not be as up-to-date as the result of doing a fresh state resolution as per state_handler.get_current_state This may be the partial state if we're lazy joining the room. Args: room_id state_filter: The state filter used to fetch state from the database. Returns: Map from type/state_key to event ID. """ where_clause, where_args = ( state_filter or StateFilter.all()).make_sql_filter_clause() if not where_clause: # We delegate to the cached version return await self.get_partial_current_state_ids(room_id) def _get_filtered_current_state_ids_txn( txn: LoggingTransaction, ) -> StateMap[str]: results = {} sql = """ SELECT type, state_key, event_id FROM current_state_events WHERE room_id = ? """ if where_clause: sql += " AND (%s)" % (where_clause, ) args = [room_id] args.extend(where_args) txn.execute(sql, args) for row in txn: typ, state_key, event_id = row key = (intern_string(typ), intern_string(state_key)) results[key] = event_id return results return await self.db_pool.runInteraction( "get_filtered_current_state_ids", _get_filtered_current_state_ids_txn)
def get_state_for_groups( self, groups: Iterable[int], state_filter: Optional[StateFilter] = None ) -> Awaitable[Dict[int, MutableStateMap[str]]]: """Gets the state at each of a list of state groups, optionally filtering by type/state_key Args: groups: list of state groups for which we want to get the state. state_filter: The state filter used to fetch state. from the database. Returns: Dict of state group to state map. """ return self.stores.state._get_state_for_groups( groups, state_filter or StateFilter.all() )
def get_filtered_current_state_ids(self, room_id, state_filter=StateFilter.all()): """Get the current state event of a given type for a room based on the current_state_events table. This may not be as up-to-date as the result of doing a fresh state resolution as per state_handler.get_current_state Args: room_id (str) state_filter (StateFilter): The state filter used to fetch state from the database. Returns: Deferred[dict[tuple[str, str], str]]: Map from type/state_key to event ID. """ where_clause, where_args = state_filter.make_sql_filter_clause() if not where_clause: # We delegate to the cached version return self.get_current_state_ids(room_id) def _get_filtered_current_state_ids_txn(txn): results = {} sql = """ SELECT type, state_key, event_id FROM current_state_events WHERE room_id = ? """ if where_clause: sql += " AND (%s)" % (where_clause, ) args = [room_id] args.extend(where_args) txn.execute(sql, args) for row in txn: typ, state_key, event_id = row key = (intern_string(typ), intern_string(state_key)) results[key] = event_id return results return self.db.runInteraction("get_filtered_current_state_ids", _get_filtered_current_state_ids_txn)
async def get_state_for_events( self, event_ids: Collection[str], state_filter: Optional[StateFilter] = None ) -> Dict[str, StateMap[EventBase]]: """Given a list of event_ids and type tuples, return a list of state dicts for each event. Args: event_ids: The events to fetch the state of. state_filter: The state filter used to fetch state. Returns: A dict of (event_id) -> (type, state_key) -> [state_events] Raises: RuntimeError if we don't have a state group for one or more of the events (ie they are outliers or unknown) """ await_full_state = True if state_filter and not state_filter.must_await_full_state(self._is_mine_id): await_full_state = False event_to_groups = await self.get_state_group_for_events( event_ids, await_full_state=await_full_state ) groups = set(event_to_groups.values()) group_to_state = await self.stores.state._get_state_for_groups( groups, state_filter or StateFilter.all() ) state_event_map = await self.stores.main.get_events( [ev_id for sd in group_to_state.values() for ev_id in sd.values()], get_prev_content=False, ) event_to_state = { event_id: { k: state_event_map[v] for k, v in group_to_state[group].items() if v in state_event_map } for event_id, group in event_to_groups.items() } return {event: event_to_state[event] for event in event_ids}
async def get_state_ids_for_event( self, event_id: str, state_filter: Optional[StateFilter] = None ) -> StateMap[str]: """ Get the state dict corresponding to a particular event Args: event_id: event whose state should be returned state_filter: The state filter used to fetch state from the database. Returns: A dict from (type, state_key) -> state_event_id Raises: RuntimeError if we don't have a state group for the event (ie it is an outlier or is unknown) """ state_map = await self.get_state_ids_for_events( [event_id], state_filter or StateFilter.all() ) return state_map[event_id]
async def get_state_ids_for_events( self, event_ids: Collection[str], state_filter: Optional[StateFilter] = None, ) -> Dict[str, StateMap[str]]: """ Get the state dicts corresponding to a list of events, containing the event_ids of the state events (as opposed to the events themselves) Args: event_ids: events whose state should be returned state_filter: The state filter used to fetch state from the database. Returns: A dict from event_id -> (type, state_key) -> event_id Raises: RuntimeError if we don't have a state group for one or more of the events (ie they are outliers or unknown) """ await_full_state = True if state_filter and not state_filter.must_await_full_state(self._is_mine_id): await_full_state = False event_to_groups = await self.get_state_group_for_events( event_ids, await_full_state=await_full_state ) groups = set(event_to_groups.values()) group_to_state = await self.stores.state._get_state_for_groups( groups, state_filter or StateFilter.all() ) event_to_state = { event_id: group_to_state[group] for event_id, group in event_to_groups.items() } return {event: event_to_state[event] for event in event_ids}
def get_event_context(self, user, room_id, event_id, limit, event_filter): """Retrieves events, pagination tokens and state around a given event in a room. Args: user (UserID) room_id (str) event_id (str) limit (int): The maximum number of events to return in total (excluding state). event_filter (Filter|None): the filter to apply to the events returned (excluding the target event_id) Returns: dict, or None if the event isn't found """ before_limit = math.floor(limit / 2.0) after_limit = limit - before_limit users = yield self.store.get_users_in_room(room_id) is_peeking = user.to_string() not in users def filter_evts(events): return filter_events_for_client(self.storage, user.to_string(), events, is_peeking=is_peeking) event = yield self.store.get_event(event_id, get_prev_content=True, allow_none=True) if not event: return None filtered = yield (filter_evts([event])) if not filtered: raise AuthError(403, "You don't have permission to access that event.") results = yield self.store.get_events_around(room_id, event_id, before_limit, after_limit, event_filter) if event_filter: results["events_before"] = event_filter.filter( results["events_before"]) results["events_after"] = event_filter.filter( results["events_after"]) results["events_before"] = yield filter_evts(results["events_before"]) results["events_after"] = yield filter_evts(results["events_after"]) # filter_evts can return a pruned event in case the user is allowed to see that # there's something there but not see the content, so use the event that's in # `filtered` rather than the event we retrieved from the datastore. results["event"] = filtered[0] if results["events_after"]: last_event_id = results["events_after"][-1].event_id else: last_event_id = event_id if event_filter and event_filter.lazy_load_members(): state_filter = StateFilter.from_lazy_load_member_list( ev.sender for ev in itertools.chain( results["events_before"], (results["event"], ), results["events_after"], )) else: state_filter = StateFilter.all() # XXX: why do we return the state as of the last event rather than the # first? Shouldn't we be consistent with /sync? # https://github.com/matrix-org/matrix-doc/issues/687 state = yield self.state_store.get_state_for_events( [last_event_id], state_filter=state_filter) state_events = list(state[last_event_id].values()) if event_filter: state_events = event_filter.filter(state_events) results["state"] = yield filter_evts(state_events) # We use a dummy token here as we only care about the room portion of # the token, which we replace. token = StreamToken.START results["start"] = token.copy_and_replace( "room_key", results["start"]).to_string() results["end"] = token.copy_and_replace("room_key", results["end"]).to_string() return results
def _get_state_for_groups(self, groups: Iterable[int], state_filter: StateFilter = StateFilter.all()): """Gets the state at each of a list of state groups, optionally filtering by type/state_key Args: groups: list of state groups for which we want to get the state. state_filter: The state filter used to fetch state from the database. Returns: Deferred[Dict[int, StateMap[str]]]: Dict of state group to state map. """ member_filter, non_member_filter = state_filter.get_member_split() # Now we look them up in the member and non-member caches ( non_member_state, incomplete_groups_nm, ) = yield self._get_state_for_groups_using_cache( groups, self._state_group_cache, state_filter=non_member_filter) ( member_state, incomplete_groups_m, ) = yield self._get_state_for_groups_using_cache( groups, self._state_group_members_cache, state_filter=member_filter) state = dict(non_member_state) for group in groups: state[group].update(member_state[group]) # Now fetch any missing groups from the database incomplete_groups = incomplete_groups_m | incomplete_groups_nm if not incomplete_groups: return state cache_sequence_nm = self._state_group_cache.sequence cache_sequence_m = self._state_group_members_cache.sequence # Help the cache hit ratio by expanding the filter a bit db_state_filter = state_filter.return_expanded() group_to_state_dict = yield self._get_state_groups_from_groups( list(incomplete_groups), state_filter=db_state_filter) # Now lets update the caches self._insert_into_cache( group_to_state_dict, db_state_filter, cache_seq_num_members=cache_sequence_m, cache_seq_num_non_members=cache_sequence_nm, ) # And finally update the result dict, by filtering out any extra # stuff we pulled out of the database. for group, group_state_dict in iteritems(group_to_state_dict): # We just replace any existing entries, as we will have loaded # everything we need from the database anyway. state[group] = state_filter.filter_state(group_state_dict) return state
def get_state_events( self, user_id, room_id, state_filter=StateFilter.all(), at_token=None, is_guest=False, ): """Retrieve all state events for a given room. If the user is joined to the room then return the current state. If the user has left the room return the state events from when they left. If an explicit 'at' parameter is passed, return the state events as of that event, if visible. Args: user_id(str): The user requesting state events. room_id(str): The room ID to get all state events from. state_filter (StateFilter): The state filter used to fetch state from the database. at_token(StreamToken|None): the stream token of the at which we are requesting the stats. If the user is not allowed to view the state as of that stream token, we raise a 403 SynapseError. If None, returns the current state based on the current_state_events table. is_guest(bool): whether this user is a guest Returns: A list of dicts representing state events. [{}, {}, {}] Raises: NotFoundError (404) if the at token does not yield an event AuthError (403) if the user doesn't have permission to view members of this room. """ if at_token: # FIXME this claims to get the state at a stream position, but # get_recent_events_for_room operates by topo ordering. This therefore # does not reliably give you the state at the given stream position. # (https://github.com/matrix-org/synapse/issues/3305) last_events, _ = yield self.store.get_recent_events_for_room( room_id, end_token=at_token.room_key, limit=1) if not last_events: raise NotFoundError("Can't find event for token %s" % (at_token, )) visible_events = yield filter_events_for_client( self.storage, user_id, last_events, filter_send_to_client=False) event = last_events[0] if visible_events: room_state = yield self.state_store.get_state_for_events( [event.event_id], state_filter=state_filter) room_state = room_state[event.event_id] else: raise AuthError( 403, "User %s not allowed to view events in room %s at token %s" % (user_id, room_id, at_token), ) else: ( membership, membership_event_id, ) = yield self.auth.check_user_in_room_or_world_readable( room_id, user_id, allow_departed_users=True) if membership == Membership.JOIN: state_ids = yield self.store.get_filtered_current_state_ids( room_id, state_filter=state_filter) room_state = yield self.store.get_events(state_ids.values()) elif membership == Membership.LEAVE: room_state = yield self.state_store.get_state_for_events( [membership_event_id], state_filter=state_filter) room_state = room_state[membership_event_id] now = self.clock.time_msec() events = yield self._event_serializer.serialize_events( room_state.values(), now, # We don't bother bundling aggregations in when asked for state # events, as clients won't use them. bundle_aggregations=False, ) return events
def test_return_expanded(self): """ Tests the behaviour of the return_expanded() function that expands StateFilters to include more state types (for the sake of cache hit rate). """ self.assertEqual(StateFilter.all().return_expanded(), StateFilter.all()) self.assertEqual(StateFilter.none().return_expanded(), StateFilter.none()) # Concrete-only state filters stay the same # (Case: mixed filter) self.assertEqual( StateFilter.freeze( { EventTypes.Member: {"@wombat:test", "@alicia:test"}, "some.other.state.type": {""}, }, include_others=False, ).return_expanded(), StateFilter.freeze( { EventTypes.Member: {"@wombat:test", "@alicia:test"}, "some.other.state.type": {""}, }, include_others=False, ), ) # Concrete-only state filters stay the same # (Case: non-member-only filter) self.assertEqual( StateFilter.freeze({ "some.other.state.type": {""} }, include_others=False).return_expanded(), StateFilter.freeze({"some.other.state.type": {""}}, include_others=False), ) # Concrete-only state filters stay the same # (Case: member-only filter) self.assertEqual( StateFilter.freeze( { EventTypes.Member: {"@wombat:test", "@alicia:test"}, }, include_others=False, ).return_expanded(), StateFilter.freeze( { EventTypes.Member: {"@wombat:test", "@alicia:test"}, }, include_others=False, ), ) # Wildcard member-only state filters stay the same self.assertEqual( StateFilter.freeze( { EventTypes.Member: None }, include_others=False, ).return_expanded(), StateFilter.freeze( {EventTypes.Member: None}, include_others=False, ), ) # If there is a wildcard in the non-member portion of the filter, # it's expanded to include ALL non-member events. # (Case: mixed filter) self.assertEqual( StateFilter.freeze( { EventTypes.Member: {"@wombat:test", "@alicia:test"}, "some.other.state.type": None, }, include_others=False, ).return_expanded(), StateFilter.freeze( {EventTypes.Member: {"@wombat:test", "@alicia:test"}}, include_others=True, ), ) # If there is a wildcard in the non-member portion of the filter, # it's expanded to include ALL non-member events. # (Case: non-member-only filter) self.assertEqual( StateFilter.freeze( { "some.other.state.type": None, }, include_others=False, ).return_expanded(), StateFilter.freeze({EventTypes.Member: set()}, include_others=True), ) self.assertEqual( StateFilter.freeze( { "some.other.state.type": None, "yet.another.state.type": {"wombat"}, }, include_others=False, ).return_expanded(), StateFilter.freeze({EventTypes.Member: set()}, include_others=True), )
def _get_state_groups_from_groups_txn(self, txn, groups, state_filter=StateFilter.all()): results = {group: {} for group in groups} where_clause, where_args = state_filter.make_sql_filter_clause() # Unless the filter clause is empty, we're going to append it after an # existing where clause if where_clause: where_clause = " AND (%s)" % (where_clause, ) if isinstance(self.database_engine, PostgresEngine): # Temporarily disable sequential scans in this transaction. This is # a temporary hack until we can add the right indices in txn.execute("SET LOCAL enable_seqscan=off") # The below query walks the state_group tree so that the "state" # table includes all state_groups in the tree. It then joins # against `state_groups_state` to fetch the latest state. # It assumes that previous state groups are always numerically # lesser. # The PARTITION is used to get the event_id in the greatest state # group for the given type, state_key. # This may return multiple rows per (type, state_key), but last_value # should be the same. sql = """ WITH RECURSIVE state(state_group) AS ( VALUES(?::bigint) UNION ALL SELECT prev_state_group FROM state_group_edges e, state s WHERE s.state_group = e.state_group ) SELECT DISTINCT type, state_key, last_value(event_id) OVER ( PARTITION BY type, state_key ORDER BY state_group ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) AS event_id FROM state_groups_state WHERE state_group IN ( SELECT state_group FROM state ) """ for group in groups: args = [group] args.extend(where_args) txn.execute(sql + where_clause, args) for row in txn: typ, state_key, event_id = row key = (typ, state_key) results[group][key] = event_id else: max_entries_returned = state_filter.max_entries_returned() # We don't use WITH RECURSIVE on sqlite3 as there are distributions # that ship with an sqlite3 version that doesn't support it (e.g. wheezy) for group in groups: next_group = group while next_group: # We did this before by getting the list of group ids, and # then passing that list to sqlite to get latest event for # each (type, state_key). However, that was terribly slow # without the right indices (which we can't add until # after we finish deduping state, which requires this func) args = [next_group] args.extend(where_args) txn.execute( "SELECT type, state_key, event_id FROM state_groups_state" " WHERE state_group = ? " + where_clause, args, ) results[group].update( ((typ, state_key), event_id) for typ, state_key, event_id in txn if (typ, state_key) not in results[group]) # If the number of entries in the (type,state_key)->event_id dict # matches the number of (type,state_keys) types we were searching # for, then we must have found them all, so no need to go walk # further down the tree... UNLESS our types filter contained # wildcards (i.e. Nones) in which case we have to do an exhaustive # search if (max_entries_returned is not None and len(results[group]) == max_entries_returned): break next_group = self.db.simple_select_one_onecol_txn( txn, table="state_group_edges", keyvalues={"state_group": next_group}, retcol="prev_state_group", allow_none=True, ) return results
def get_state_events( self, user_id, room_id, state_filter=StateFilter.all(), at_token=None, is_guest=False, ): """Retrieve all state events for a given room. If the user is joined to the room then return the current state. If the user has left the room return the state events from when they left. If an explicit 'at' parameter is passed, return the state events as of that event, if visible. Args: user_id(str): The user requesting state events. room_id(str): The room ID to get all state events from. state_filter (StateFilter): The state filter used to fetch state from the database. at_token(StreamToken|None): the stream token of the at which we are requesting the stats. If the user is not allowed to view the state as of that stream token, we raise a 403 SynapseError. If None, returns the current state based on the current_state_events table. is_guest(bool): whether this user is a guest Returns: A list of dicts representing state events. [{}, {}, {}] Raises: NotFoundError (404) if the at token does not yield an event AuthError (403) if the user doesn't have permission to view members of this room. """ if at_token: # FIXME this claims to get the state at a stream position, but # get_recent_events_for_room operates by topo ordering. This therefore # does not reliably give you the state at the given stream position. # (https://github.com/matrix-org/synapse/issues/3305) last_events, _ = yield self.store.get_recent_events_for_room( room_id, end_token=at_token.room_key, limit=1, ) if not last_events: raise NotFoundError("Can't find event for token %s" % (at_token, )) visible_events = yield filter_events_for_client( self.store, user_id, last_events, ) event = last_events[0] if visible_events: room_state = yield self.store.get_state_for_events( [event.event_id], state_filter=state_filter, ) room_state = room_state[event.event_id] else: raise AuthError( 403, "User %s not allowed to view events in room %s at token %s" % ( user_id, room_id, at_token, ) ) else: membership, membership_event_id = ( yield self.auth.check_in_room_or_world_readable( room_id, user_id, ) ) if membership == Membership.JOIN: state_ids = yield self.store.get_filtered_current_state_ids( room_id, state_filter=state_filter, ) room_state = yield self.store.get_events(state_ids.values()) elif membership == Membership.LEAVE: room_state = yield self.store.get_state_for_events( [membership_event_id], state_filter=state_filter, ) room_state = room_state[membership_event_id] now = self.clock.time_msec() events = yield self._event_serializer.serialize_events( room_state.values(), now, ) defer.returnValue(events)
def get_event_context(self, user, room_id, event_id, limit, event_filter): """Retrieves events, pagination tokens and state around a given event in a room. Args: user (UserID) room_id (str) event_id (str) limit (int): The maximum number of events to return in total (excluding state). event_filter (Filter|None): the filter to apply to the events returned (excluding the target event_id) Returns: dict, or None if the event isn't found """ before_limit = math.floor(limit / 2.) after_limit = limit - before_limit users = yield self.store.get_users_in_room(room_id) is_peeking = user.to_string() not in users def filter_evts(events): return filter_events_for_client( self.store, user.to_string(), events, is_peeking=is_peeking ) event = yield self.store.get_event(event_id, get_prev_content=True, allow_none=True) if not event: defer.returnValue(None) return filtered = yield(filter_evts([event])) if not filtered: raise AuthError( 403, "You don't have permission to access that event." ) results = yield self.store.get_events_around( room_id, event_id, before_limit, after_limit, event_filter ) results["events_before"] = yield filter_evts(results["events_before"]) results["events_after"] = yield filter_evts(results["events_after"]) results["event"] = event if results["events_after"]: last_event_id = results["events_after"][-1].event_id else: last_event_id = event_id if event_filter and event_filter.lazy_load_members(): state_filter = StateFilter.from_lazy_load_member_list( ev.sender for ev in itertools.chain( results["events_before"], (results["event"],), results["events_after"], ) ) else: state_filter = StateFilter.all() # XXX: why do we return the state as of the last event rather than the # first? Shouldn't we be consistent with /sync? # https://github.com/matrix-org/matrix-doc/issues/687 state = yield self.store.get_state_for_events( [last_event_id], state_filter=state_filter, ) results["state"] = list(state[last_event_id].values()) # We use a dummy token here as we only care about the room portion of # the token, which we replace. token = StreamToken.START results["start"] = token.copy_and_replace( "room_key", results["start"] ).to_string() results["end"] = token.copy_and_replace( "room_key", results["end"] ).to_string() defer.returnValue(results)