def test_not_upgraded_current_schema_version_with_outstanding_deltas(self): """ Test that workers don't start if the DB is on the current schema version, but there are still outstanding delta migrations to run. """ db_pool = self.hs.get_datastore().db_pool db_conn = LoggingDatabaseConnection( db_pool._db_pool.connect(), db_pool.engine, "tests", ) # Set the schema version of the database to the current version cur = db_conn.cursor() cur.execute("UPDATE schema_version SET version = ?", (SCHEMA_VERSION, )) db_conn.commit() # Path `os.listdir` here to make synapse think that there is a migration # file ready to be run. # Note that we can't patch this function for the whole method, else Synapse # will try to find the file when building the database initially. with mock.patch("os.listdir", mock.Mock(side_effect=fake_listdir)): with self.assertRaises(PrepareDatabaseException): # Synapse should think that there is an outstanding migration file due to # patching 'os.listdir' in the function decorator. # # We expect Synapse to raise an exception to indicate the master process # needs to apply this migration file. prepare_database(db_conn, db_pool.engine, self.hs.config)
def __init__( self, database: DatabasePool, db_conn: LoggingDatabaseConnection, hs: "HomeServer", ): super().__init__(database, db_conn, hs) # These get correctly set by _find_stream_orderings_for_times_txn self.stream_ordering_month_ago: Optional[int] = None self.stream_ordering_day_ago: Optional[int] = None cur = db_conn.cursor(txn_name="_find_stream_orderings_for_times_txn") self._find_stream_orderings_for_times_txn(cur) cur.close() self.find_stream_orderings_looping_call = self._clock.looping_call( self._find_stream_orderings_for_times, 10 * 60 * 1000) self._rotate_delay = 3 self._rotate_count = 10000 self._doing_notif_rotation = False if hs.config.worker.run_background_tasks: self._rotate_notif_loop = self._clock.looping_call( self._rotate_notifs, 30 * 60 * 1000)
def create_index(conn: LoggingDatabaseConnection) -> None: conn.rollback() # we have to set autocommit, because postgres refuses to # CREATE INDEX CONCURRENTLY without it. conn.set_session(autocommit=True) try: c = conn.cursor() # if we skipped the conversion to GIST, we may already/still # have an event_search_fts_idx; unfortunately postgres 9.4 # doesn't support CREATE INDEX IF EXISTS so we just catch the # exception and ignore it. import psycopg2 try: c.execute("CREATE INDEX CONCURRENTLY event_search_fts_idx" " ON event_search USING GIN (vector)") except psycopg2.ProgrammingError as e: logger.warning( "Ignoring error %r when trying to switch from GIST to GIN", e) # we should now be able to delete the GIST index. c.execute("DROP INDEX IF EXISTS event_search_fts_idx_gist") finally: conn.set_session(autocommit=False)
def __init__( self, database: DatabasePool, db_conn: LoggingDatabaseConnection, hs: "HomeServer", ): super().__init__(database, db_conn, hs) # These get correctly set by _find_stream_orderings_for_times_txn self.stream_ordering_month_ago: Optional[int] = None self.stream_ordering_day_ago: Optional[int] = None cur = db_conn.cursor(txn_name="_find_stream_orderings_for_times_txn") self._find_stream_orderings_for_times_txn(cur) cur.close() self.find_stream_orderings_looping_call = self._clock.looping_call( self._find_stream_orderings_for_times, 10 * 60 * 1000) self._rotate_delay = 3 self._rotate_count = 10000 self._doing_notif_rotation = False if hs.config.worker.run_background_tasks: self._rotate_notif_loop = self._clock.looping_call( self._rotate_notifs, 30 * 1000) self.db_pool.updates.register_background_index_update( "event_push_summary_unique_index", index_name="event_push_summary_unique_index", table="event_push_summary", columns=["user_id", "room_id"], unique=True, replaces_index="event_push_summary_user_rm", )
def reindex_txn(conn: LoggingDatabaseConnection) -> None: conn.rollback() if isinstance(self.database_engine, PostgresEngine): # postgres insists on autocommit for the index conn.set_session(autocommit=True) try: txn = conn.cursor() txn.execute( "CREATE INDEX CONCURRENTLY state_groups_state_type_idx" " ON state_groups_state(state_group, type, state_key)") txn.execute("DROP INDEX IF EXISTS state_groups_state_id") finally: conn.set_session(autocommit=False) else: txn = conn.cursor() txn.execute( "CREATE INDEX state_groups_state_type_idx" " ON state_groups_state(state_group, type, state_key)") txn.execute("DROP INDEX IF EXISTS state_groups_state_id")
def check_consistency( self, db_conn: LoggingDatabaseConnection, table: str, id_column: str, positive: bool = True, ): txn = db_conn.cursor(txn_name="sequence.check_consistency") # First we get the current max ID from the table. table_sql = "SELECT GREATEST(%(agg)s(%(id)s), 0) FROM %(table)s" % { "id": id_column, "table": table, "agg": "MAX" if positive else "-MIN", } txn.execute(table_sql) row = txn.fetchone() if not row: # Table is empty, so nothing to do. txn.close() return # Now we fetch the current value from the sequence and compare with the # above. max_stream_id = row[0] txn.execute("SELECT last_value, is_called FROM %(seq)s" % {"seq": self._sequence_name}) last_value, is_called = txn.fetchone() txn.close() # If `is_called` is False then `last_value` is actually the value that # will be generated next, so we decrement to get the true "last value". if not is_called: last_value -= 1 if max_stream_id > last_value: logger.warning( "Postgres sequence %s is behind table %s: %d < %d", self._sequence_name, table, last_value, max_stream_id, ) raise IncorrectDatabaseSetup( _INCONSISTENT_SEQUENCE_ERROR % { "seq": self._sequence_name, "table": table, "max_id_sql": table_sql })
def create_index(conn: LoggingDatabaseConnection) -> None: conn.rollback() conn.set_session(autocommit=True) c = conn.cursor() # We create with NULLS FIRST so that when we search *backwards* # we get the ones with non null origin_server_ts *first* c.execute( "CREATE INDEX CONCURRENTLY event_search_room_order ON event_search(" "room_id, origin_server_ts NULLS FIRST, stream_ordering NULLS FIRST)" ) c.execute( "CREATE INDEX CONCURRENTLY event_search_order ON event_search(" "origin_server_ts NULLS FIRST, stream_ordering NULLS FIRST)" ) conn.set_session(autocommit=False)
def test_not_upgraded_old_schema_version(self): """Test that workers don't start if the DB has an older schema version""" db_pool = self.hs.get_datastore().db_pool db_conn = LoggingDatabaseConnection( db_pool._db_pool.connect(), db_pool.engine, "tests", ) cur = db_conn.cursor() cur.execute("UPDATE schema_version SET version = ?", (SCHEMA_VERSION - 1,)) db_conn.commit() with self.assertRaises(PrepareDatabaseException): prepare_database(db_conn, db_pool.engine, self.hs.config)
def test_rolling_back(self): """Test that workers can start if the DB is a newer schema version""" db_pool = self.hs.get_datastore().db_pool db_conn = LoggingDatabaseConnection( db_pool._db_pool.connect(), db_pool.engine, "tests", ) cur = db_conn.cursor() cur.execute("UPDATE schema_version SET version = ?", (SCHEMA_VERSION + 1,)) db_conn.commit() prepare_database(db_conn, db_pool.engine, self.hs.config)
def _load_current_id(db_conn: LoggingDatabaseConnection, table: str, column: str, step: int = 1) -> int: # debug logging for https://github.com/matrix-org/synapse/issues/7968 logger.info("initialising stream generator for %s(%s)", table, column) cur = db_conn.cursor(txn_name="_load_current_id") if step == 1: cur.execute("SELECT MAX(%s) FROM %s" % (column, table)) else: cur.execute("SELECT MIN(%s) FROM %s" % (column, table)) result = cur.fetchone() assert result is not None (val, ) = result cur.close() current_id = int(val) if val else step return (max if step > 0 else min)(current_id, step)
def _load_current_id(db_conn: LoggingDatabaseConnection, table: str, column: str, step: int = 1) -> int: cur = db_conn.cursor(txn_name="_load_current_id") if step == 1: cur.execute("SELECT MAX(%s) FROM %s" % (column, table)) else: cur.execute("SELECT MIN(%s) FROM %s" % (column, table)) result = cur.fetchone() assert result is not None (val, ) = result cur.close() current_id = int(val) if val else step res = (max if step > 0 else min)(current_id, step) logger.info("Initialising stream generator for %s(%s): %i", table, column, res) return res
def __init__( self, database: DatabasePool, db_conn: LoggingDatabaseConnection, hs: "HomeServer", ): super().__init__(database, db_conn, hs) # Used by `_get_joined_hosts` to ensure only one thing mutates the cache # at a time. Keyed by room_id. self._joined_host_linearizer = Linearizer("_JoinedHostsCache") # Is the current_state_events.membership up to date? Or is the # background update still running? self._current_state_events_membership_up_to_date = False txn = db_conn.cursor( txn_name="_check_safe_current_state_events_membership_updated") self._check_safe_current_state_events_membership_updated_txn(txn) txn.close() if (self.hs.config.worker.run_background_tasks and self.hs.config.metrics.metrics_flags.known_servers): self._known_servers_count = 1 self.hs.get_clock().looping_call( self._count_known_servers, 60 * 1000, ) self.hs.get_clock().call_later( 1, self._count_known_servers, ) LaterGauge( "synapse_federation_known_servers", "", [], lambda: self._known_servers_count, )
def prepare_database( db_conn: LoggingDatabaseConnection, database_engine: BaseDatabaseEngine, config: Optional[HomeServerConfig], databases: Collection[str] = ("main", "state"), ): """Prepares a physical database for usage. Will either create all necessary tables or upgrade from an older schema version. If `config` is None then prepare_database will assert that no upgrade is necessary, *or* will create a fresh database if the database is empty. Args: db_conn: database_engine: config : application config, or None if we are connecting to an existing database which we expect to be configured already databases: The name of the databases that will be used with this physical database. Defaults to all databases. """ try: cur = db_conn.cursor(txn_name="prepare_database") # sqlite does not automatically start transactions for DDL / SELECT statements, # so we start one before running anything. This ensures that any upgrades # are either applied completely, or not at all. # # (psycopg2 automatically starts a transaction as soon as we run any statements # at all, so this is redundant but harmless there.) cur.execute("BEGIN TRANSACTION") logger.info("%r: Checking existing schema version", databases) version_info = _get_or_create_schema_state(cur, database_engine) if version_info: user_version, delta_files, upgraded = version_info logger.info( "%r: Existing schema is %i (+%i deltas)", databases, user_version, len(delta_files), ) # config should only be None when we are preparing an in-memory SQLite db, # which should be empty. if config is None: raise ValueError( "config==None in prepare_database, but database is not empty" ) # if it's a worker app, refuse to upgrade the database, to avoid multiple # workers doing it at once. if config.worker_app is not None and user_version != SCHEMA_VERSION: raise UpgradeDatabaseException( OUTDATED_SCHEMA_ON_WORKER_ERROR % (SCHEMA_VERSION, user_version)) _upgrade_existing_database( cur, user_version, delta_files, upgraded, database_engine, config, databases=databases, ) else: logger.info("%r: Initialising new database", databases) # if it's a worker app, refuse to upgrade the database, to avoid multiple # workers doing it at once. if config and config.worker_app is not None: raise UpgradeDatabaseException(EMPTY_DATABASE_ON_WORKER_ERROR) _setup_new_database(cur, database_engine, databases=databases) # check if any of our configured dynamic modules want a database if config is not None: _apply_module_schemas(cur, database_engine, config) cur.close() db_conn.commit() except Exception: db_conn.rollback() raise
def f(conn: LoggingDatabaseConnection) -> None: txn = conn.cursor() txn.execute("DROP INDEX IF EXISTS user_ips_user_ip") txn.close()
def _load_current_ids( self, db_conn: LoggingDatabaseConnection, tables: List[Tuple[str, str, str]], ) -> None: cur = db_conn.cursor(txn_name="_load_current_ids") # Load the current positions of all writers for the stream. if self._writers: # We delete any stale entries in the positions table. This is # important if we add back a writer after a long time; we want to # consider that a "new" writer, rather than using the old stale # entry here. sql = """ DELETE FROM stream_positions WHERE stream_name = ? AND instance_name != ALL(?) """ cur.execute(sql, (self._stream_name, self._writers)) sql = """ SELECT instance_name, stream_id FROM stream_positions WHERE stream_name = ? """ cur.execute(sql, (self._stream_name, )) self._current_positions = { instance: stream_id * self._return_factor for instance, stream_id in cur if instance in self._writers } # We set the `_persisted_upto_position` to be the minimum of all current # positions. If empty we use the max stream ID from the DB table. min_stream_id = min(self._current_positions.values(), default=None) if min_stream_id is None: # We add a GREATEST here to ensure that the result is always # positive. (This can be a problem for e.g. backfill streams where # the server has never backfilled). max_stream_id = 1 for table, _, id_column in tables: sql = """ SELECT GREATEST(COALESCE(%(agg)s(%(id)s), 1), 1) FROM %(table)s """ % { "id": id_column, "table": table, "agg": "MAX" if self._positive else "-MIN", } cur.execute(sql) result = cur.fetchone() assert result is not None (stream_id, ) = result max_stream_id = max(max_stream_id, stream_id) self._persisted_upto_position = max_stream_id else: # If we have a min_stream_id then we pull out everything greater # than it from the DB so that we can prefill # `_known_persisted_positions` and get a more accurate # `_persisted_upto_position`. # # We also check if any of the later rows are from this instance, in # which case we use that for this instance's current position. This # is to handle the case where we didn't finish persisting to the # stream positions table before restart (or the stream position # table otherwise got out of date). self._persisted_upto_position = min_stream_id rows: List[Tuple[str, int]] = [] for table, instance_column, id_column in tables: sql = """ SELECT %(instance)s, %(id)s FROM %(table)s WHERE ? %(cmp)s %(id)s """ % { "id": id_column, "table": table, "instance": instance_column, "cmp": "<=" if self._positive else ">=", } cur.execute(sql, (min_stream_id * self._return_factor, )) # Cast safety: this corresponds to the types returned by the query above. rows.extend(cast(Iterable[Tuple[str, int]], cur)) # Sort so that we handle rows in order for each instance. rows.sort() with self._lock: for ( instance, stream_id, ) in rows: stream_id = self._return_factor * stream_id self._add_persisted_position(stream_id) if instance == self._instance_name: self._current_positions[instance] = stream_id cur.close()
def reindex_txn(conn: LoggingDatabaseConnection) -> None: txn = conn.cursor() txn.execute("DROP INDEX IF EXISTS device_inbox_stream_id") txn.close()