def test_emulated_tuple_comparison(self): db_engine = _stub_db_engine(supports_tuple_comparison=False) clause, args = make_tuple_comparison_clause(db_engine, [("a", 1), ("b", 2), ("c", 3)]) self.assertEqual( clause, "(a >= ? AND (a > ? OR (b >= ? AND (b > ? OR c > ?))))") self.assertEqual(args, [1, 1, 2, 2, 3])
def _devices_last_seen_update_txn(txn): # This consists of two queries: # # 1. The sub-query searches for the next N devices and joins # against user_ips to find the max last_seen associated with # that device. # 2. The outer query then joins again against user_ips on # user/device/last_seen. This *should* hopefully only # return one row, but if it does return more than one then # we'll just end up updating the same device row multiple # times, which is fine. where_clause, where_args = make_tuple_comparison_clause( self.database_engine, [("user_id", last_user_id), ("device_id", last_device_id)], ) sql = """ SELECT last_seen, ip, user_agent, user_id, device_id FROM ( SELECT user_id, device_id, MAX(u.last_seen) AS last_seen FROM devices INNER JOIN user_ips AS u USING (user_id, device_id) WHERE %(where_clause)s GROUP BY user_id, device_id ORDER BY user_id ASC, device_id ASC LIMIT ? ) c INNER JOIN user_ips AS u USING (user_id, device_id, last_seen) """ % { "where_clause": where_clause } txn.execute(sql, where_args + [batch_size]) rows = txn.fetchall() if not rows: return 0 sql = """ UPDATE devices SET last_seen = ?, ip = ?, user_agent = ? WHERE user_id = ? AND device_id = ? """ txn.execute_batch(sql, rows) _, _, _, user_id, device_id = rows[-1] self.db.updates._background_update_progress_txn( txn, "devices_last_seen", { "last_user_id": user_id, "last_device_id": device_id }, ) return len(rows)
def _txn(txn): clause, args = make_tuple_comparison_clause( self.db_pool.engine, [(x, last_row[x]) for x in KEY_COLS]) sql = """ SELECT stream_id, destination, user_id, device_id, MAX(ts) AS ts FROM device_lists_outbound_pokes WHERE %s GROUP BY %s HAVING count(*) > 1 ORDER BY %s LIMIT ? """ % ( clause, # WHERE ",".join(KEY_COLS), # GROUP BY ",".join(KEY_COLS), # ORDER BY ) txn.execute(sql, args + [batch_size]) rows = self.db_pool.cursor_to_dict(txn) row = None for row in rows: self.db_pool.simple_delete_txn( txn, "device_lists_outbound_pokes", {x: row[x] for x in KEY_COLS}, ) row["sent"] = False self.db_pool.simple_insert_txn( txn, "device_lists_outbound_pokes", row, ) if row: self.db_pool.updates._background_update_progress_txn( txn, BG_UPDATE_REMOVE_DUP_OUTBOUND_POKES, {"last_row": row}, ) return len(rows)
def test_native_tuple_comparison(self): db_engine = _stub_db_engine(supports_tuple_comparison=True) clause, args = make_tuple_comparison_clause(db_engine, [("a", 1), ("b", 2)]) self.assertEqual(clause, "(a,b) > (?,?)") self.assertEqual(args, [1, 2])
def _calculate_chain_cover_txn( self, txn: Cursor, last_room_id: str, last_depth: int, last_stream: int, batch_size: Optional[int], single_room: bool, ) -> _CalculateChainCover: """Calculate the chain cover for `batch_size` events, ordered by `(room_id, depth, stream)`. Args: txn, last_room_id, last_depth, last_stream: The `(room_id, depth, stream)` tuple to fetch results after. batch_size: The maximum number of events to process. If None then no limit. single_room: Whether to calculate the index for just the given room. """ # Get the next set of events in the room (that we haven't already # computed chain cover for). We do this in topological order. # We want to do a `(topological_ordering, stream_ordering) > (?,?)` # comparison, but that is not supported on older SQLite versions tuple_clause, tuple_args = make_tuple_comparison_clause([ ("events.room_id", last_room_id), ("topological_ordering", last_depth), ("stream_ordering", last_stream), ], ) extra_clause = "" if single_room: extra_clause = "AND events.room_id = ?" tuple_args.append(last_room_id) sql = """ SELECT event_id, state_events.type, state_events.state_key, topological_ordering, stream_ordering, events.room_id FROM events INNER JOIN state_events USING (event_id) LEFT JOIN event_auth_chains USING (event_id) LEFT JOIN event_auth_chain_to_calculate USING (event_id) WHERE event_auth_chains.event_id IS NULL AND event_auth_chain_to_calculate.event_id IS NULL AND %(tuple_cmp)s %(extra)s ORDER BY events.room_id, topological_ordering, stream_ordering %(limit)s """ % { "tuple_cmp": tuple_clause, "limit": "LIMIT ?" if batch_size is not None else "", "extra": extra_clause, } if batch_size is not None: tuple_args.append(batch_size) txn.execute(sql, tuple_args) rows = txn.fetchall() # Put the results in the necessary format for # `_add_chain_cover_index` event_to_room_id = {row[0]: row[5] for row in rows} event_to_types = {row[0]: (row[1], row[2]) for row in rows} # Calculate the new last position we've processed up to. new_last_depth: int = rows[-1][3] if rows else last_depth new_last_stream: int = rows[-1][4] if rows else last_stream new_last_room_id: str = rows[-1][5] if rows else "" # Map from room_id to last depth/stream_ordering processed for the room, # excluding the last room (which we're likely still processing). We also # need to include the room passed in if it's not included in the result # set (as we then know we've processed all events in said room). # # This is the set of rooms that we can now safely flip the # `has_auth_chain_index` bit for. finished_rooms = { row[5]: (row[3], row[4]) for row in rows if row[5] != new_last_room_id } if last_room_id not in finished_rooms and last_room_id != new_last_room_id: finished_rooms[last_room_id] = (last_depth, last_stream) count = len(rows) # We also need to fetch the auth events for them. auth_events = self.db_pool.simple_select_many_txn( txn, table="event_auth", column="event_id", iterable=event_to_room_id, keyvalues={}, retcols=("event_id", "auth_id"), ) event_to_auth_chain: Dict[str, List[str]] = {} for row in auth_events: event_to_auth_chain.setdefault(row["event_id"], []).append(row["auth_id"]) # Calculate and persist the chain cover index for this set of events. # # Annoyingly we need to gut wrench into the persit event store so that # we can reuse the function to calculate the chain cover for rooms. PersistEventsStore._add_chain_cover_index( txn, self.db_pool, self.event_chain_id_gen, event_to_room_id, event_to_types, event_to_auth_chain, ) return _CalculateChainCover( room_id=new_last_room_id, depth=new_last_depth, stream=new_last_stream, processed_count=count, finished_room_map=finished_rooms, )
def test_native_tuple_comparison(self): clause, args = make_tuple_comparison_clause([("a", 1), ("b", 2)]) self.assertEqual(clause, "(a,b) > (?,?)") self.assertEqual(args, [1, 2])