def _add_device_change_to_stream_txn( self, txn: LoggingTransaction, user_id: str, device_ids: Collection[str], stream_ids: List[str], ): txn.call_after( self._device_list_stream_cache.entity_has_changed, user_id, stream_ids[-1], ) min_stream_id = stream_ids[0] # Delete older entries in the table, as we really only care about # when the latest change happened. txn.executemany( """ DELETE FROM device_lists_stream WHERE user_id = ? AND device_id = ? AND stream_id < ? """, [(user_id, device_id, min_stream_id) for device_id in device_ids], ) self.db.simple_insert_many_txn( txn, table="device_lists_stream", values=[ {"stream_id": stream_id, "user_id": user_id, "device_id": device_id} for stream_id, device_id in zip(stream_ids, device_ids) ], )
def purged_chain_cover_txn(txn: LoggingTransaction) -> int: # The event ID from events will be null if the chain ID / sequence # number points to a purged event. sql = """ SELECT event_id, chain_id, sequence_number, e.event_id IS NOT NULL FROM event_auth_chains LEFT JOIN events AS e USING (event_id) WHERE event_id > ? ORDER BY event_auth_chains.event_id ASC LIMIT ? """ txn.execute(sql, (current_event_id, batch_size)) rows = txn.fetchall() if not rows: return 0 # The event IDs and chain IDs / sequence numbers where the event has # been purged. unreferenced_event_ids = [] unreferenced_chain_id_tuples = [] event_id = "" for event_id, chain_id, sequence_number, has_event in rows: if not has_event: unreferenced_event_ids.append((event_id, )) unreferenced_chain_id_tuples.append( (chain_id, sequence_number)) # Delete the unreferenced auth chains from event_auth_chain_links and # event_auth_chains. txn.executemany( """ DELETE FROM event_auth_chains WHERE event_id = ? """, unreferenced_event_ids, ) # We should also delete matching target_*, but there is no index on # target_chain_id. Hopefully any purged events are due to a room # being fully purged and they will be removed from the origin_* # searches. txn.executemany( """ DELETE FROM event_auth_chain_links WHERE origin_chain_id = ? AND origin_sequence_number = ? """, unreferenced_chain_id_tuples, ) progress = { "current_event_id": event_id, } self.db_pool.updates._background_update_progress_txn( txn, "purged_chain_cover", progress) return len(rows)
def insert_many_txn(self, txn: LoggingTransaction, table: str, headers: List[str], rows: List[Tuple]) -> None: sql = "INSERT INTO %s (%s) VALUES (%s)" % ( table, ", ".join(k for k in headers), ", ".join("%s" for _ in headers), ) try: txn.executemany(sql, rows) except Exception: logger.exception("Failed to insert: %s", table) raise
def insert(txn: LoggingTransaction) -> None: sql = ( "INSERT INTO event_search (event_id, room_id, key," " sender, vector, origin_server_ts, stream_ordering)" " VALUES (?,?,?,?,to_tsvector('english', ?),?,?)") rows_dict = [] for row in rows: d = dict(zip(headers, row)) if "\0" in d["value"]: logger.warning("dropping search row %s", d) else: rows_dict.append(d) txn.executemany( sql, [( row["event_id"], row["room_id"], row["key"], row["sender"], row["value"], row["origin_server_ts"], row["stream_ordering"], ) for row in rows_dict], ) self.postgres_store.db_pool.simple_update_one_txn( txn, table="port_from_sqlite3", keyvalues={"table_name": "event_search"}, updatevalues={ "forward_rowid": forward_chunk, "backward_rowid": backward_chunk, }, )
def _purge_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[int]: # First, fetch all the state groups that should be deleted, before # we delete that information. txn.execute( """ SELECT DISTINCT state_group FROM events INNER JOIN event_to_state_groups USING(event_id) WHERE events.room_id = ? """, (room_id, ), ) state_groups = [row[0] for row in txn] # Get all the auth chains that are referenced by events that are to be # deleted. txn.execute( """ SELECT chain_id, sequence_number FROM events LEFT JOIN event_auth_chains USING (event_id) WHERE room_id = ? """, (room_id, ), ) referenced_chain_id_tuples = list(txn) logger.info("[purge] removing events from event_auth_chain_links") txn.executemany( """ DELETE FROM event_auth_chain_links WHERE origin_chain_id = ? AND origin_sequence_number = ? """, referenced_chain_id_tuples, ) # Now we delete tables which lack an index on room_id but have one on event_id for table in ( "event_auth", "event_edges", "event_json", "event_push_actions_staging", "event_relations", "event_to_state_groups", "event_auth_chains", "event_auth_chain_to_calculate", "redactions", "rejections", "state_events", ): logger.info("[purge] removing %s from %s", room_id, table) txn.execute( """ DELETE FROM %s WHERE event_id IN ( SELECT event_id FROM events WHERE room_id=? ) """ % (table, ), (room_id, ), ) # next, the tables with an index on room_id (or no useful index) for table in ( "current_state_events", "destination_rooms", "event_backward_extremities", "event_forward_extremities", "event_push_actions", "event_search", "partial_state_events", "events", "federation_inbound_events_staging", "local_current_membership", "partial_state_rooms_servers", "partial_state_rooms", "receipts_graph", "receipts_linearized", "room_aliases", "room_depth", "room_memberships", "room_stats_state", "room_stats_current", "room_stats_earliest_token", "stream_ordering_to_exterm", "users_in_public_rooms", "users_who_share_private_rooms", # no useful index, but let's clear them anyway "appservice_room_list", "e2e_room_keys", "event_push_summary", "pusher_throttle", "room_account_data", "room_tags", # "rooms" happens last, to keep the foreign keys in the other tables # happy "rooms", ): logger.info("[purge] removing %s from %s", room_id, table) txn.execute("DELETE FROM %s WHERE room_id=?" % (table, ), (room_id, )) # Other tables we do NOT need to clear out: # # - blocked_rooms # This is important, to make sure that we don't accidentally rejoin a blocked # room after it was purged # # - user_directory # This has a room_id column, but it is unused # # Other tables that we might want to consider clearing out include: # # - event_reports # Given that these are intended for abuse management my initial # inclination is to leave them in place. # # - current_state_delta_stream # - ex_outlier_stream # - room_tags_revisions # The problem with these is that they are largeish and there is no room_id # index on them. In any case we should be clearing out 'stream' tables # periodically anyway (#5888) # TODO: we could probably usefully do a bunch more cache invalidation here # XXX: as with purge_history, this is racy, but no worse than other races # that already exist. self._invalidate_cache_and_stream(txn, self.have_seen_event, (room_id, )) logger.info("[purge] done") return state_groups