Exemple #1
0
    def test_not_upgraded_current_schema_version_with_outstanding_deltas(self):
        """
        Test that workers don't start if the DB is on the current schema version,
        but there are still outstanding delta migrations to run.
        """
        db_pool = self.hs.get_datastore().db_pool
        db_conn = LoggingDatabaseConnection(
            db_pool._db_pool.connect(),
            db_pool.engine,
            "tests",
        )

        # Set the schema version of the database to the current version
        cur = db_conn.cursor()
        cur.execute("UPDATE schema_version SET version = ?",
                    (SCHEMA_VERSION, ))

        db_conn.commit()

        # Path `os.listdir` here to make synapse think that there is a migration
        # file ready to be run.
        # Note that we can't patch this function for the whole method, else Synapse
        # will try to find the file when building the database initially.
        with mock.patch("os.listdir", mock.Mock(side_effect=fake_listdir)):
            with self.assertRaises(PrepareDatabaseException):
                # Synapse should think that there is an outstanding migration file due to
                # patching 'os.listdir' in the function decorator.
                #
                # We expect Synapse to raise an exception to indicate the master process
                # needs to apply this migration file.
                prepare_database(db_conn, db_pool.engine, self.hs.config)
Exemple #2
0
def setupdb():
    # If we're using PostgreSQL, set up the db once
    if USE_POSTGRES_FOR_TESTS:
        # create a PostgresEngine
        db_engine = create_engine({"name": "psycopg2", "args": {}})

        # connect to postgres to create the base database.
        db_conn = db_engine.module.connect(
            user=POSTGRES_USER,
            host=POSTGRES_HOST,
            port=POSTGRES_PORT,
            password=POSTGRES_PASSWORD,
            dbname=POSTGRES_DBNAME_FOR_INITIAL_CREATE,
        )
        db_conn.autocommit = True
        cur = db_conn.cursor()
        cur.execute("DROP DATABASE IF EXISTS %s;" % (POSTGRES_BASE_DB, ))
        cur.execute(
            "CREATE DATABASE %s ENCODING 'UTF8' LC_COLLATE='C' LC_CTYPE='C' "
            "template=template0;" % (POSTGRES_BASE_DB, ))
        cur.close()
        db_conn.close()

        # Set up in the db
        db_conn = db_engine.module.connect(
            database=POSTGRES_BASE_DB,
            user=POSTGRES_USER,
            host=POSTGRES_HOST,
            port=POSTGRES_PORT,
            password=POSTGRES_PASSWORD,
        )
        db_conn = LoggingDatabaseConnection(db_conn, db_engine, "tests")
        prepare_database(db_conn, db_engine, None)
        db_conn.close()

        def _cleanup():
            db_conn = db_engine.module.connect(
                user=POSTGRES_USER,
                host=POSTGRES_HOST,
                port=POSTGRES_PORT,
                password=POSTGRES_PASSWORD,
                dbname=POSTGRES_DBNAME_FOR_INITIAL_CREATE,
            )
            db_conn.autocommit = True
            cur = db_conn.cursor()
            cur.execute("DROP DATABASE IF EXISTS %s;" % (POSTGRES_BASE_DB, ))
            cur.close()
            db_conn.close()

        atexit.register(_cleanup)
Exemple #3
0
    def test_not_upgraded_old_schema_version(self):
        """Test that workers don't start if the DB has an older schema version"""
        db_pool = self.hs.get_datastore().db_pool
        db_conn = LoggingDatabaseConnection(
            db_pool._db_pool.connect(),
            db_pool.engine,
            "tests",
        )

        cur = db_conn.cursor()
        cur.execute("UPDATE schema_version SET version = ?", (SCHEMA_VERSION - 1,))

        db_conn.commit()

        with self.assertRaises(PrepareDatabaseException):
            prepare_database(db_conn, db_pool.engine, self.hs.config)
    def __init__(
        self,
        database: DatabasePool,
        db_conn: LoggingDatabaseConnection,
        hs: "HomeServer",
    ):
        super().__init__(database, db_conn, hs)

        # These get correctly set by _find_stream_orderings_for_times_txn
        self.stream_ordering_month_ago: Optional[int] = None
        self.stream_ordering_day_ago: Optional[int] = None

        cur = db_conn.cursor(txn_name="_find_stream_orderings_for_times_txn")
        self._find_stream_orderings_for_times_txn(cur)
        cur.close()

        self.find_stream_orderings_looping_call = self._clock.looping_call(
            self._find_stream_orderings_for_times, 10 * 60 * 1000)

        self._rotate_delay = 3
        self._rotate_count = 10000
        self._doing_notif_rotation = False
        if hs.config.worker.run_background_tasks:
            self._rotate_notif_loop = self._clock.looping_call(
                self._rotate_notifs, 30 * 60 * 1000)
Exemple #5
0
    def __init__(
        self,
        database: DatabasePool,
        db_conn: LoggingDatabaseConnection,
        hs: "HomeServer",
    ):
        super().__init__(database, db_conn, hs)

        # These get correctly set by _find_stream_orderings_for_times_txn
        self.stream_ordering_month_ago: Optional[int] = None
        self.stream_ordering_day_ago: Optional[int] = None

        cur = db_conn.cursor(txn_name="_find_stream_orderings_for_times_txn")
        self._find_stream_orderings_for_times_txn(cur)
        cur.close()

        self.find_stream_orderings_looping_call = self._clock.looping_call(
            self._find_stream_orderings_for_times, 10 * 60 * 1000)

        self._rotate_delay = 3
        self._rotate_count = 10000
        self._doing_notif_rotation = False
        if hs.config.worker.run_background_tasks:
            self._rotate_notif_loop = self._clock.looping_call(
                self._rotate_notifs, 30 * 1000)

        self.db_pool.updates.register_background_index_update(
            "event_push_summary_unique_index",
            index_name="event_push_summary_unique_index",
            table="event_push_summary",
            columns=["user_id", "room_id"],
            unique=True,
            replaces_index="event_push_summary_user_rm",
        )
Exemple #6
0
    def test_rolling_back(self):
        """Test that workers can start if the DB is a newer schema version"""

        db_pool = self.hs.get_datastore().db_pool
        db_conn = LoggingDatabaseConnection(
            db_pool._db_pool.connect(),
            db_pool.engine,
            "tests",
        )

        cur = db_conn.cursor()
        cur.execute("UPDATE schema_version SET version = ?", (SCHEMA_VERSION + 1,))

        db_conn.commit()

        prepare_database(db_conn, db_pool.engine, self.hs.config)
Exemple #7
0
        def create_index(conn: LoggingDatabaseConnection) -> None:
            conn.rollback()

            # we have to set autocommit, because postgres refuses to
            # CREATE INDEX CONCURRENTLY without it.
            conn.set_session(autocommit=True)

            try:
                c = conn.cursor()

                # if we skipped the conversion to GIST, we may already/still
                # have an event_search_fts_idx; unfortunately postgres 9.4
                # doesn't support CREATE INDEX IF EXISTS so we just catch the
                # exception and ignore it.
                import psycopg2

                try:
                    c.execute("CREATE INDEX CONCURRENTLY event_search_fts_idx"
                              " ON event_search USING GIN (vector)")
                except psycopg2.ProgrammingError as e:
                    logger.warning(
                        "Ignoring error %r when trying to switch from GIST to GIN",
                        e)

                # we should now be able to delete the GIST index.
                c.execute("DROP INDEX IF EXISTS event_search_fts_idx_gist")
            finally:
                conn.set_session(autocommit=False)
    def check_consistency(
        self,
        db_conn: LoggingDatabaseConnection,
        table: str,
        id_column: str,
        positive: bool = True,
    ):
        txn = db_conn.cursor(txn_name="sequence.check_consistency")

        # First we get the current max ID from the table.
        table_sql = "SELECT GREATEST(%(agg)s(%(id)s), 0) FROM %(table)s" % {
            "id": id_column,
            "table": table,
            "agg": "MAX" if positive else "-MIN",
        }

        txn.execute(table_sql)
        row = txn.fetchone()
        if not row:
            # Table is empty, so nothing to do.
            txn.close()
            return

        # Now we fetch the current value from the sequence and compare with the
        # above.
        max_stream_id = row[0]
        txn.execute("SELECT last_value, is_called FROM %(seq)s" %
                    {"seq": self._sequence_name})
        last_value, is_called = txn.fetchone()
        txn.close()

        # If `is_called` is False then `last_value` is actually the value that
        # will be generated next, so we decrement to get the true "last value".
        if not is_called:
            last_value -= 1

        if max_stream_id > last_value:
            logger.warning(
                "Postgres sequence %s is behind table %s: %d < %d",
                self._sequence_name,
                table,
                last_value,
                max_stream_id,
            )
            raise IncorrectDatabaseSetup(
                _INCONSISTENT_SEQUENCE_ERROR % {
                    "seq": self._sequence_name,
                    "table": table,
                    "max_id_sql": table_sql
                })
Exemple #9
0
def _load_current_id(db_conn: LoggingDatabaseConnection,
                     table: str,
                     column: str,
                     step: int = 1) -> int:
    # debug logging for https://github.com/matrix-org/synapse/issues/7968
    logger.info("initialising stream generator for %s(%s)", table, column)
    cur = db_conn.cursor(txn_name="_load_current_id")
    if step == 1:
        cur.execute("SELECT MAX(%s) FROM %s" % (column, table))
    else:
        cur.execute("SELECT MIN(%s) FROM %s" % (column, table))
    result = cur.fetchone()
    assert result is not None
    (val, ) = result
    cur.close()
    current_id = int(val) if val else step
    return (max if step > 0 else min)(current_id, step)
 def reindex_txn(conn: LoggingDatabaseConnection) -> None:
     conn.rollback()
     if isinstance(self.database_engine, PostgresEngine):
         # postgres insists on autocommit for the index
         conn.set_session(autocommit=True)
         try:
             txn = conn.cursor()
             txn.execute(
                 "CREATE INDEX CONCURRENTLY state_groups_state_type_idx"
                 " ON state_groups_state(state_group, type, state_key)")
             txn.execute("DROP INDEX IF EXISTS state_groups_state_id")
         finally:
             conn.set_session(autocommit=False)
     else:
         txn = conn.cursor()
         txn.execute(
             "CREATE INDEX state_groups_state_type_idx"
             " ON state_groups_state(state_group, type, state_key)")
         txn.execute("DROP INDEX IF EXISTS state_groups_state_id")
Exemple #11
0
            def create_index(conn: LoggingDatabaseConnection) -> None:
                conn.rollback()
                conn.set_session(autocommit=True)
                c = conn.cursor()

                # We create with NULLS FIRST so that when we search *backwards*
                # we get the ones with non null origin_server_ts *first*
                c.execute(
                    "CREATE INDEX CONCURRENTLY event_search_room_order ON event_search("
                    "room_id, origin_server_ts NULLS FIRST, stream_ordering NULLS FIRST)"
                )
                c.execute(
                    "CREATE INDEX CONCURRENTLY event_search_order ON event_search("
                    "origin_server_ts NULLS FIRST, stream_ordering NULLS FIRST)"
                )
                conn.set_session(autocommit=False)
Exemple #12
0
def _load_current_id(db_conn: LoggingDatabaseConnection,
                     table: str,
                     column: str,
                     step: int = 1) -> int:
    cur = db_conn.cursor(txn_name="_load_current_id")
    if step == 1:
        cur.execute("SELECT MAX(%s) FROM %s" % (column, table))
    else:
        cur.execute("SELECT MIN(%s) FROM %s" % (column, table))
    result = cur.fetchone()
    assert result is not None
    (val, ) = result
    cur.close()
    current_id = int(val) if val else step
    res = (max if step > 0 else min)(current_id, step)
    logger.info("Initialising stream generator for %s(%s): %i", table, column,
                res)
    return res
Exemple #13
0
    def __init__(
        self,
        database: DatabasePool,
        db_conn: LoggingDatabaseConnection,
        hs: "HomeServer",
    ):
        super().__init__(database, db_conn, hs)

        # Used by `_get_joined_hosts` to ensure only one thing mutates the cache
        # at a time. Keyed by room_id.
        self._joined_host_linearizer = Linearizer("_JoinedHostsCache")

        # Is the current_state_events.membership up to date? Or is the
        # background update still running?
        self._current_state_events_membership_up_to_date = False

        txn = db_conn.cursor(
            txn_name="_check_safe_current_state_events_membership_updated")
        self._check_safe_current_state_events_membership_updated_txn(txn)
        txn.close()

        if (self.hs.config.worker.run_background_tasks
                and self.hs.config.metrics.metrics_flags.known_servers):
            self._known_servers_count = 1
            self.hs.get_clock().looping_call(
                self._count_known_servers,
                60 * 1000,
            )
            self.hs.get_clock().call_later(
                1,
                self._count_known_servers,
            )
            LaterGauge(
                "synapse_federation_known_servers",
                "",
                [],
                lambda: self._known_servers_count,
            )
Exemple #14
0
def prepare_database(
        db_conn: LoggingDatabaseConnection,
        database_engine: BaseDatabaseEngine,
        config: Optional[HomeServerConfig],
        databases: Collection[str] = ("main", "state"),
):
    """Prepares a physical database for usage. Will either create all necessary tables
    or upgrade from an older schema version.

    If `config` is None then prepare_database will assert that no upgrade is
    necessary, *or* will create a fresh database if the database is empty.

    Args:
        db_conn:
        database_engine:
        config :
            application config, or None if we are connecting to an existing
            database which we expect to be configured already
        databases: The name of the databases that will be used
            with this physical database. Defaults to all databases.
    """

    try:
        cur = db_conn.cursor(txn_name="prepare_database")

        # sqlite does not automatically start transactions for DDL / SELECT statements,
        # so we start one before running anything. This ensures that any upgrades
        # are either applied completely, or not at all.
        #
        # (psycopg2 automatically starts a transaction as soon as we run any statements
        # at all, so this is redundant but harmless there.)
        cur.execute("BEGIN TRANSACTION")

        logger.info("%r: Checking existing schema version", databases)
        version_info = _get_or_create_schema_state(cur, database_engine)

        if version_info:
            user_version, delta_files, upgraded = version_info
            logger.info(
                "%r: Existing schema is %i (+%i deltas)",
                databases,
                user_version,
                len(delta_files),
            )

            # config should only be None when we are preparing an in-memory SQLite db,
            # which should be empty.
            if config is None:
                raise ValueError(
                    "config==None in prepare_database, but database is not empty"
                )

            # if it's a worker app, refuse to upgrade the database, to avoid multiple
            # workers doing it at once.
            if config.worker_app is not None and user_version != SCHEMA_VERSION:
                raise UpgradeDatabaseException(
                    OUTDATED_SCHEMA_ON_WORKER_ERROR %
                    (SCHEMA_VERSION, user_version))

            _upgrade_existing_database(
                cur,
                user_version,
                delta_files,
                upgraded,
                database_engine,
                config,
                databases=databases,
            )
        else:
            logger.info("%r: Initialising new database", databases)

            # if it's a worker app, refuse to upgrade the database, to avoid multiple
            # workers doing it at once.
            if config and config.worker_app is not None:
                raise UpgradeDatabaseException(EMPTY_DATABASE_ON_WORKER_ERROR)

            _setup_new_database(cur, database_engine, databases=databases)

        # check if any of our configured dynamic modules want a database
        if config is not None:
            _apply_module_schemas(cur, database_engine, config)

        cur.close()
        db_conn.commit()
    except Exception:
        db_conn.rollback()
        raise
Exemple #15
0
    def _load_current_ids(
        self,
        db_conn: LoggingDatabaseConnection,
        tables: List[Tuple[str, str, str]],
    ) -> None:
        cur = db_conn.cursor(txn_name="_load_current_ids")

        # Load the current positions of all writers for the stream.
        if self._writers:
            # We delete any stale entries in the positions table. This is
            # important if we add back a writer after a long time; we want to
            # consider that a "new" writer, rather than using the old stale
            # entry here.
            sql = """
                DELETE FROM stream_positions
                WHERE
                    stream_name = ?
                    AND instance_name != ALL(?)
            """
            cur.execute(sql, (self._stream_name, self._writers))

            sql = """
                SELECT instance_name, stream_id FROM stream_positions
                WHERE stream_name = ?
            """
            cur.execute(sql, (self._stream_name, ))

            self._current_positions = {
                instance: stream_id * self._return_factor
                for instance, stream_id in cur if instance in self._writers
            }

        # We set the `_persisted_upto_position` to be the minimum of all current
        # positions. If empty we use the max stream ID from the DB table.
        min_stream_id = min(self._current_positions.values(), default=None)

        if min_stream_id is None:
            # We add a GREATEST here to ensure that the result is always
            # positive. (This can be a problem for e.g. backfill streams where
            # the server has never backfilled).
            max_stream_id = 1
            for table, _, id_column in tables:
                sql = """
                    SELECT GREATEST(COALESCE(%(agg)s(%(id)s), 1), 1)
                    FROM %(table)s
                """ % {
                    "id": id_column,
                    "table": table,
                    "agg": "MAX" if self._positive else "-MIN",
                }
                cur.execute(sql)
                result = cur.fetchone()
                assert result is not None
                (stream_id, ) = result

                max_stream_id = max(max_stream_id, stream_id)

            self._persisted_upto_position = max_stream_id
        else:
            # If we have a min_stream_id then we pull out everything greater
            # than it from the DB so that we can prefill
            # `_known_persisted_positions` and get a more accurate
            # `_persisted_upto_position`.
            #
            # We also check if any of the later rows are from this instance, in
            # which case we use that for this instance's current position. This
            # is to handle the case where we didn't finish persisting to the
            # stream positions table before restart (or the stream position
            # table otherwise got out of date).

            self._persisted_upto_position = min_stream_id

            rows: List[Tuple[str, int]] = []
            for table, instance_column, id_column in tables:
                sql = """
                    SELECT %(instance)s, %(id)s FROM %(table)s
                    WHERE ? %(cmp)s %(id)s
                """ % {
                    "id": id_column,
                    "table": table,
                    "instance": instance_column,
                    "cmp": "<=" if self._positive else ">=",
                }
                cur.execute(sql, (min_stream_id * self._return_factor, ))

                # Cast safety: this corresponds to the types returned by the query above.
                rows.extend(cast(Iterable[Tuple[str, int]], cur))

            # Sort so that we handle rows in order for each instance.
            rows.sort()

            with self._lock:
                for (
                        instance,
                        stream_id,
                ) in rows:
                    stream_id = self._return_factor * stream_id
                    self._add_persisted_position(stream_id)

                    if instance == self._instance_name:
                        self._current_positions[instance] = stream_id

        cur.close()
Exemple #16
0
 def f(conn: LoggingDatabaseConnection) -> None:
     txn = conn.cursor()
     txn.execute("DROP INDEX IF EXISTS user_ips_user_ip")
     txn.close()
Exemple #17
0
 def reindex_txn(conn: LoggingDatabaseConnection) -> None:
     txn = conn.cursor()
     txn.execute("DROP INDEX IF EXISTS device_inbox_stream_id")
     txn.close()