Python ServerFailure Examples, pymongo.errors.ServerFailure Python Examples

Example #1

0

Show file

File: periodic_kill_secondaries.py Project: tejaswini01/mongo

 def _enable_rssyncapplystop(self, secondary):
     # Enable the "rsSyncApplyStop" failpoint on the secondary to prevent them from
     # applying any oplog entries while the test is running.
     client = secondary.mongo_client()
     try:
         client.admin.command(bson.SON([
             ("configureFailPoint", "rsSyncApplyStop"),
             ("mode", "alwaysOn")]))
     except pymongo.errors.OperationFailure as err:
         self.logger.exception(
             "Unable to disable oplog application on the mongod on port %d", secondary.port)
         raise errors.ServerFailure(
             "Unable to disable oplog application on the mongod on port {}: {}".format(
                 secondary.port, err.args[0]))

Example #2

0

Show file

    def _get_primary_url(self):
        no_primary_err = errors.ServerFailure("No primary found")

        for node in self._rs_fixture.nodes:
            try:
                is_master = node.mongo_client().admin.command(
                    "isMaster")["ismaster"]
            except pymongo.errors.AutoReconnect:
                raise no_primary_err

            if is_master:
                return node.get_driver_connection_url()

        raise no_primary_err

Example #3

0

Show file

File: shard_split.py Project: TheRakeshPurohit/mongo

    def pause(self):
        """Pause the thread after test."""
        self.__lifecycle.mark_test_finished()

        # Wait until we are no longer executing splits.
        self._is_idle_evt.wait()
        # Check if the thread is alive in case it has thrown an exception while running.
        self._check_thread()

        # Check that the fixture is still running.
        if not self._shard_split_fixture.is_running():
            raise errors.ServerFailure(
                f"ShardSplitFixture with pids {self._shard_split_fixture.pids()} expected to be running in"
                " ContinuousShardSplit, but wasn't.")

Example #4

0

Show file

    def _kill_secondaries(self):
        for secondary in self.fixture.get_secondaries():
            # Disable the "rsSyncApplyStop" failpoint on the secondary to have it resume applying
            # oplog entries.
            for secondary in self.fixture.get_secondaries():
                client = utils.new_mongo_client(port=secondary.port)
                try:
                    client.admin.command(
                        bson.SON([("configureFailPoint", "rsSyncApplyStop"),
                                  ("mode", "off")]))
                except pymongo.errors.OperationFailure as err:
                    self.logger.exception(
                        "Unable to re-enable oplog application on the mongod on port %d",
                        secondary.port)
                    raise errors.ServerFailure(
                        "Unable to re-enable oplog application on the mongod on port %d: %s"
                        % (secondary.port, err.args[0]))

            # Wait a little bit for the secondary to start apply oplog entries so that we are more
            # likely to kill the mongod process while it is partway into applying a batch.
            time.sleep(0.1)

            # Check that the secondary is still running before forcibly terminating it. This ensures
            # we still detect some cases in which the secondary has already crashed.
            if not secondary.is_running():
                raise errors.ServerFailure(
                    "mongod on port %d was expected to be running in"
                    " PeriodicKillSecondaries.after_test(), but wasn't." %
                    (secondary.port))

            self.hook_test_case.logger.info(
                "Killing the secondary on port %d..." % (secondary.port))
            secondary.mongod.stop(kill=True)

        # Teardown may or may not be considered a success as a result of killing a secondary, so we
        # ignore the return value of Fixture.teardown().
        self.fixture.teardown()

Example #5

0

Show file

File: periodic_kill_secondaries.py Project: tejaswini01/mongo

 def _await_secondary_state(self, secondary):
     client = secondary.mongo_client()
     try:
         client.admin.command(bson.SON([
             ("replSetTest", 1),
             ("waitForMemberState", 2),  # 2 = SECONDARY
             ("timeoutMillis", fixture.ReplFixture.AWAIT_REPL_TIMEOUT_MINS * 60 * 1000)]))
     except pymongo.errors.OperationFailure as err:
         self.hook_test_case.logger.exception(
             "mongod on port %d failed to reach state SECONDARY after %d seconds",
             secondary.port,
             fixture.ReplFixture.AWAIT_REPL_TIMEOUT_MINS * 60)
         raise errors.ServerFailure(
             "mongod on port {} failed to reach state SECONDARY after {} seconds: {}".format(
                 secondary.port, fixture.ReplFixture.AWAIT_REPL_TIMEOUT_MINS * 60, err.args[0]))

Example #6

0

Show file

File: stepdown.py Project: taylr/mongo

    def pause(self):
        """Pause the thread."""
        self.__lifecycle.mark_test_finished()

        # Wait until we are no longer executing stepdowns.
        self._is_idle_evt.wait()
        # Check if the thread is alive in case it has thrown an exception while running.
        self._check_thread()
        # Wait until we all the replica sets have primaries.
        self._await_primaries()
        # Wait for Mongos to retarget the primary for each shard and the config server.
        self._do_wait_for_mongos_retarget()

        # Check that fixtures are still running
        for rs_fixture in self._rs_fixtures:
            if not rs_fixture.is_running():
                raise errors.ServerFailure(
                    "ReplicaSetFixture with pids {} expected to be running in"
                    " ContinuousStepdown, but wasn't.".format(rs_fixture.pids()))
        for mongos_fixture in self._mongos_fixtures:
            if not mongos_fixture.is_running():
                raise errors.ServerFailure("MongoSFixture with pids {} expected to be running in"
                                           " ContinuousStepdown, but wasn't.".format(
                                               mongos_fixture.pids()))

Example #7

0

Show file

File: shardedcluster.py Project: nirhapatel27/mongo

    def _do_teardown(self):
        if self.mongos is None:
            self.logger.warning("The mongos fixture has not been set up yet.")
            return  # Teardown is still a success even if nothing is running.

        self.logger.info("Stopping mongos on port %d with pid %d...", self.port, self.mongos.pid)
        if not self.is_running():
            exit_code = self.mongos.poll()
            msg = ("mongos on port {:d} was expected to be running, but wasn't. "
                   "Process exited with code {:d}").format(self.port, exit_code)
            self.logger.warning(msg)
            raise errors.ServerFailure(msg)

        self.mongos.stop()
        exit_code = self.mongos.wait()

        if exit_code == 0:
            self.logger.info("Successfully stopped the mongos on port {:d}".format(self.port))
        else:
            self.logger.warning("Stopped the mongos on port {:d}. "
                                "Process exited with code {:d}.".format(self.port, exit_code))
            raise errors.ServerFailure(
                "mongos on port {:d} with pid {:d} exited with code {:d}".format(
                    self.port, self.mongos.pid, exit_code))

Example #8

0

Show file

File: periodic_kill_secondaries.py Project: xnox/mongo

 def _disable_rssyncapplystop(self, secondary):
     # Disable the "rsSyncApplyStop" failpoint on the secondary to have it resume applying
     # oplog entries.
     client = secondary.mongo_client()
     try:
         client.admin.command(
             bson.SON([("configureFailPoint", "rsSyncApplyStop"),
                       ("mode", "off")]))
     except pymongo.errors.OperationFailure as err:
         self.logger.exception(
             "Unable to re-enable oplog application on the mongod on port %d",
             secondary.port)
         raise errors.ServerFailure(
             "Unable to re-enable oplog application on the mongod on port {}: {}"
             .format(secondary.port, err.args[0]))

Example #9

0

Show file

    def _do_teardown(self, mode=None):
        if self.mongos is None:
            self.logger.warning("The mongos fixture has not been set up yet.")
            return  # Teardown is still a success even if nothing is running.

        if mode == interface.TeardownMode.ABORT:
            self.logger.info(
                "Attempting to send SIGABRT from resmoke to mongos on port %d with pid %d...",
                self.port, self.mongos.pid)
        else:
            self.logger.info("Stopping mongos on port %d with pid %d...",
                             self.port, self.mongos.pid)
        if not self.is_running():
            exit_code = self.mongos.poll()
            msg = (
                "mongos on port {:d} was expected to be running, but wasn't. "
                "Process exited with code {:d}").format(self.port, exit_code)
            self.logger.warning(msg)
            raise errors.ServerFailure(msg)

        self.mongos.stop(mode=mode)
        exit_code = self.mongos.wait()

        # Python's subprocess module returns negative versions of system calls.
        # pylint: disable=invalid-unary-operand-type
        if exit_code == 0 or (mode is not None and exit_code == -(mode.value)):
            self.logger.info(
                "Successfully stopped the mongos on port {:d}".format(
                    self.port))
        else:
            self.logger.warning("Stopped the mongos on port {:d}. "
                                "Process exited with code {:d}.".format(
                                    self.port, exit_code))
            raise errors.ServerFailure(
                "mongos on port {:d} with pid {:d} exited with code {:d}".
                format(self.port, self.mongos.pid, exit_code))

Example #10

0

Show file

    def pause(self):
        """Pause the thread after test."""
        self.__lifecycle.mark_test_finished()

        # Wait until we are no longer executing migrations.
        self._is_idle_evt.wait()
        # Check if the thread is alive in case it has thrown an exception while running.
        self._check_thread()

        # Check that the fixture is still running.
        if not self._tenant_migration_fixture.is_running():
            raise errors.ServerFailure(
                "TenantMigrationFixture with pids {} expected to be running in"
                " ContinuousTenantMigration, but wasn't".format(
                    self._tenant_migration_fixture.pids()))

Example #11

0

Show file

File: shardedcluster.py Project: frongodb/frongo

    def await_ready(self):
        """Block until the fixture can be used for testing."""
        deadline = time.time(
        ) + standalone.MongoDFixture.AWAIT_READY_TIMEOUT_SECS

        # Wait until the mongos is accepting connections. The retry logic is necessary to support
        # versions of PyMongo <3.0 that immediately raise a ConnectionFailure if a connection cannot
        # be established.
        while True:
            # Check whether the mongos exited for some reason.
            exit_code = self.mongos.poll()
            if exit_code is not None:
                raise errors.ServerFailure(
                    "Could not connect to mongos on port {}, process ended"
                    " unexpectedly with code {}.".format(self.port, exit_code))

            try:
                # Use a shorter connection timeout to more closely satisfy the requested deadline.
                client = self.mongo_client(timeout_millis=500)
                client.admin.command("ping")
                break
            except pymongo.errors.ConnectionFailure:
                remaining = deadline - time.time()
                if remaining <= 0.0:
                    raise errors.ServerFailure(
                        "Failed to connect to mongos on port {} after {} seconds"
                        .format(
                            self.port,
                            standalone.MongoDFixture.AWAIT_READY_TIMEOUT_SECS))

                self.logger.info("Waiting to connect to mongos on port %d.",
                                 self.port)
                time.sleep(0.1)  # Wait a little bit before trying again.

        self.logger.info("Successfully contacted the mongos on port %d.",
                         self.port)

Example #12

0

Show file

File: tenant_migration.py Project: yanenquan/mongo

 def _disable_abort(self, donor_primary_client, donor_primary_port,
                    donor_primary_rs_name):
     try:
         donor_primary_client.admin.command(
             bson.SON([("configureFailPoint",
                        "abortTenantMigrationAfterBlockingStarts"),
                       ("mode", "off")]))
     except pymongo.errors.OperationFailure as err:
         self.logger.exception(
             "Unable to disable the failpoint to make migrations abort on donor primary on port "
             + "%d of replica set '%s'.", donor_primary_port,
             donor_primary_rs_name)
         raise errors.ServerFailure(
             "Unable to disable the failpoint to make migrations abort on donor primary on port "
             + "{} of replica set '{}': {}".format(
                 donor_primary_port, donor_primary_rs_name, err.args[0]))

Example #13

0

Show file

    def __restart_init_sync(self, test_report, sync_node, sync_node_conn):
        if self.use_resync:
            self.hook_test_case.logger.info(
                "Calling resync on initial sync node...")
            cmd = bson.SON([("resync", 1), ("wait", 0)])
            sync_node_conn.admin.command(cmd)
        else:
            # Tear down and restart the initial sync node to start initial sync again.
            if not sync_node.teardown():
                raise errors.ServerFailure("%s did not exit cleanly" %
                                           (sync_node))

            self.hook_test_case.logger.info(
                "Starting the initial sync node back up again...")
            sync_node.setup()
            sync_node.await_ready()

Example #14

0

Show file

File: shardedcluster.py Project: hse-sqa/hse-mongo

    def __init__(  # pylint: disable=too-many-arguments,too-many-locals
            self, logger, job_num, mongos_executable=None, mongos_options=None,
            mongod_executable=None, mongod_options=None, dbpath_prefix=None, preserve_dbpath=False,
            num_shards=1, num_rs_nodes_per_shard=1, num_mongos=1, enable_sharding=None,
            enable_balancer=True, enable_autosplit=True, auth_options=None, configsvr_options=None,
            shard_options=None, mixed_bin_versions=None):
        """Initialize ShardedClusterFixture with different options for the cluster processes."""

        interface.Fixture.__init__(self, logger, job_num, dbpath_prefix=dbpath_prefix)

        if "dbpath" in mongod_options:
            raise ValueError("Cannot specify mongod_options.dbpath")

        self.mongos_executable = mongos_executable
        self.mongos_options = utils.default_if_none(mongos_options, {})
        self.mongod_options = utils.default_if_none(mongod_options, {})
        self.mongod_executable = mongod_executable
        self.mongod_options["set_parameters"] = mongod_options.get("set_parameters", {}).copy()
        self.mongod_options["set_parameters"]["migrationLockAcquisitionMaxWaitMS"] = \
                mongod_options["set_parameters"].get("migrationLockAcquisitionMaxWaitMS", 30000)
        self.preserve_dbpath = preserve_dbpath
        # Use 'num_shards' and 'num_rs_nodes_per_shard' values from the command line if they exist.
        num_shards_option = config.NUM_SHARDS
        self.num_shards = num_shards if not num_shards_option else num_shards_option
        num_rs_nodes_per_shard_option = config.NUM_REPLSET_NODES
        self.num_rs_nodes_per_shard = num_rs_nodes_per_shard if not num_rs_nodes_per_shard_option else num_rs_nodes_per_shard_option
        self.num_mongos = num_mongos
        self.enable_sharding = utils.default_if_none(enable_sharding, [])
        self.enable_balancer = enable_balancer
        self.enable_autosplit = enable_autosplit
        self.auth_options = auth_options
        self.configsvr_options = utils.default_if_none(configsvr_options, {})
        self.shard_options = utils.default_if_none(shard_options, {})
        self.mixed_bin_versions = utils.default_if_none(mixed_bin_versions,
                                                        config.MIXED_BIN_VERSIONS)
        if self.mixed_bin_versions is not None and num_rs_nodes_per_shard is not None:
            num_mongods = self.num_shards * self.num_rs_nodes_per_shard
            if len(self.mixed_bin_versions) != num_mongods:
                msg = (("The number of binary versions specified: {} do not match the number of"\
                        " nodes in the sharded cluster: {}.")).format(len(self.mixed_bin_versions), num_mongods)
                raise errors.ServerFailure(msg)

        self._dbpath_prefix = os.path.join(self._dbpath_prefix, config.FIXTURE_SUBDIR)

        self.configsvr = None
        self.mongos = []
        self.shards = []

Example #15

0

Show file

File: periodic_kill_secondaries.py Project: xuZhu-lab/percona-server-mongodb

    def _restart_and_clear_fixture(self):
        # We restart the fixture after setting 'preserve_dbpath' back to its original value in order
        # to clear the contents of the data directory if desired. The CleanEveryN hook cannot be
        # used in combination with the PeriodicKillSecondaries hook because we may attempt to call
        # Fixture.teardown() while the "rsSyncApplyStop" failpoint is still enabled on the
        # secondaries, causing them to exit with a non-zero return code.
        self.logger.info("Finished verifying data consistency, stopping the fixture...")

        try:
            self.fixture.teardown()
        except errors.ServerFailure:
            raise errors.ServerFailure(
                "{} did not exit cleanly after verifying data consistency".format(self.fixture))

        self.logger.info("Starting the fixture back up again with no data...")
        self.fixture.setup()
        self.fixture.await_ready()

Example #16

0

Show file

File: shardedcluster.py Project: nirhapatel27/mongo

    def setup(self):
        """Set up the sharded cluster."""
        if "port" not in self.mongos_options:
            self.mongos_options["port"] = core.network.PortAllocator.next_fixture_port(self.job_num)
        self.port = self.mongos_options["port"]

        mongos = core.programs.mongos_program(self.logger, executable=self.mongos_executable,
                                              **self.mongos_options)
        try:
            self.logger.info("Starting mongos on port %d...\n%s", self.port, mongos.as_command())
            mongos.start()
            self.logger.info("mongos started on port %d with pid %d.", self.port, mongos.pid)
        except Exception as err:
            msg = "Failed to start mongos on port {:d}: {}".format(self.port, err)
            self.logger.exception(msg)
            raise errors.ServerFailure(msg)

        self.mongos = mongos

Example #17

0

Show file

File: replicaset.py Project: yilu1021/mongo

    def _add_node_to_repl_set(self, client, repl_config, member_index,
                              members):
        self.logger.info("Adding in node %d: %s", member_index,
                         members[member_index - 1])
        while True:
            try:
                # 'newlyAdded' removal reconfigs could bump the version.
                # Get the current version to be safe.
                curr_version = client.admin.command({"replSetGetConfig":
                                                     1})['config']['version']
                repl_config["version"] = curr_version + 1
                repl_config["members"] = members[:member_index]
                self.logger.info("Issuing replSetReconfig command: %s",
                                 repl_config)
                client.admin.command({
                    "replSetReconfig":
                    repl_config,
                    "maxTimeMS":
                    self.AWAIT_REPL_TIMEOUT_MINS * 60 * 1000
                })
                break
            except pymongo.errors.OperationFailure as err:
                # These error codes may be transient, and so we retry the reconfig with a
                # (potentially) higher config version. We should not receive these codes
                # indefinitely.
                if (err.code != ReplicaSetFixture.
                        _NEW_REPLICA_SET_CONFIGURATION_INCOMPATIBLE
                        and err.code !=
                        ReplicaSetFixture._CURRENT_CONFIG_NOT_COMMITTED_YET
                        and err.code !=
                        ReplicaSetFixture._CONFIGURATION_IN_PROGRESS
                        and err.code != ReplicaSetFixture._NODE_NOT_FOUND
                        and err.code !=
                        ReplicaSetFixture._INTERRUPTED_DUE_TO_REPL_STATE_CHANGE
                    ):
                    msg = ("Operation failure while setting up the "
                           "replica set fixture: {}").format(err)
                    self.logger.error(msg)
                    raise errors.ServerFailure(msg)

                msg = ("Retrying failed attempt to add new node to fixture: {}"
                       ).format(err)
                self.logger.error(msg)
                time.sleep(0.1)  # Wait a little bit before trying again.

Example #18

0

Show file

    def before_test(self, test, test_report):
        if self._start_time is not None:
            # The "rsSyncApplyStop" failpoint is already enabled.
            return

        # Enable the "rsSyncApplyStop" failpoint on each of the secondaries to prevent them from
        # applying any oplog entries while the test is running.
        for secondary in self.fixture.get_secondaries():
            client = utils.new_mongo_client(port=secondary.port)
            try:
                client.admin.command(bson.SON([
                    ("configureFailPoint", "rsSyncApplyStop"),
                    ("mode", "alwaysOn")]))
            except pymongo.errors.OperationFailure as err:
                self.logger.exception(
                    "Unable to disable oplog application on the mongod on port %d", secondary.port)
                raise errors.ServerFailure(
                    "Unable to disable oplog application on the mongod on port %d: %s"
                    % (secondary.port, err.args[0]))

        self._start_time = time.time()

Example #19

0

Show file

    def _do_teardown(self, mode=None):
        self.logger.info("Stopping all members of the replica set...")

        running_at_start = self.is_running()
        if not running_at_start:
            self.logger.info("All members of the replica set were expected to be running, "
                             "but weren't.")

        teardown_handler = interface.FixtureTeardownHandler(self.logger)

        if self.initial_sync_node:
            teardown_handler.teardown(self.initial_sync_node, "initial sync node", mode=mode)

        # Terminate the secondaries first to reduce noise in the logs.
        for node in reversed(self.nodes):
            teardown_handler.teardown(node, "replica set member on port %d" % node.port, mode=mode)

        if teardown_handler.was_successful():
            self.logger.info("Successfully stopped all members of the replica set.")
        else:
            self.logger.error("Stopping the replica set fixture failed.")
            raise errors.ServerFailure(teardown_handler.get_error_message())

Example #20

0

Show file

    def retry_until_wtimeout(self, insert_fn):
        """
        Given a callback function representing an insert operation on
        the primary, handle any connection failures, and keep retrying
        the operation for up to 'AWAIT_REPL_TIMEOUT_MINS' minutes.

        The insert operation callback should take an argument for the
        number of remaining seconds to provide as the timeout for the
        operation.
        """

        deadline = time.time() + ReplFixture.AWAIT_REPL_TIMEOUT_MINS * 60

        while True:
            try:
                remaining = deadline - time.time()
                insert_fn(remaining)
                break
            except pymongo.errors.ConnectionFailure:
                remaining = deadline - time.time()
                if remaining <= 0.0:
                    raise errors.ServerFailure("Failed to connect to ".format(
                        self.get_driver_connection_url()))

Example #21

0

Show file

File: periodic_kill_secondaries.py Project: wwjiang007/mongo

    def _check_secondaries_and_restart_fixture(self):
        preserve_dbpaths = []
        for node in self.fixture.nodes:
            preserve_dbpaths.append(node.preserve_dbpath)
            node.preserve_dbpath = True

        for secondary in self.fixture.get_secondaries():
            self._check_invariants_as_standalone(secondary)

            self.logger.info(
                "Restarting the secondary on port %d as a replica set node with"
                " its data files intact...", secondary.port)
            # Start the 'secondary' mongod back up as part of the replica set and wait for it to
            # reach state SECONDARY.
            secondary.setup()
            self.logger.info(fixture.create_fixture_table(self.fixture))
            secondary.await_ready()
            self._await_secondary_state(secondary)

            try:
                secondary.teardown()
            except errors.ServerFailure:
                raise errors.ServerFailure(
                    "{} did not exit cleanly after reconciling the end of its oplog"
                    .format(secondary))

        self.logger.info(
            "Starting the fixture back up again with its data files intact for final"
            " validation...")

        try:
            self.fixture.setup()
            self.logger.info(fixture.create_fixture_table(self.fixture))
            self.fixture.await_ready()
        finally:
            for (i, node) in enumerate(self.fixture.nodes):
                node.preserve_dbpath = preserve_dbpaths[i]

Example #22

0

Show file

File: periodic_kill_secondaries.py Project: tejaswini01/mongo

    def _kill_secondaries(self):
        for secondary in self.fixture.get_secondaries():
            # Disable the "rsSyncApplyStop" failpoint on the secondary to have it resume applying
            # oplog entries.
            self._disable_rssyncapplystop(secondary)

            # Wait a little bit for the secondary to start apply oplog entries so that we are more
            # likely to kill the mongod process while it is partway into applying a batch.
            time.sleep(0.1)

            # Check that the secondary is still running before forcibly terminating it. This ensures
            # we still detect some cases in which the secondary has already crashed.
            if not secondary.is_running():
                raise errors.ServerFailure(
                    "mongod on port {} was expected to be running in"
                    " PeriodicKillSecondaries.after_test(), but wasn't.".format(secondary.port))

            self.hook_test_case.logger.info(
                "Killing the secondary on port %d...", secondary.port)
            secondary.mongod.stop(kill=True)

        # Teardown may or may not be considered a success as a result of killing a secondary, so we
        # ignore the return value of Fixture.teardown().
        self.fixture.teardown()

Example #23

0

Show file

    def _get_recipient_primary(self, split_opts, timeout_secs=None):
        if timeout_secs is None:
            timeout_secs = self._shard_split_fixture.AWAIT_REPL_TIMEOUT_MINS * 60
        nodes = split_opts.get_recipient_nodes()
        start = time.time()
        clients = {}
        while True:
            for node in nodes:
                now = time.time()
                if (now - start) >= timeout_secs:
                    msg = f"Timed out while waiting for a primary on replica set '{split_opts.recipient_set_name}'."
                    self.logger.error(msg)
                    raise errors.ServerFailure(msg)

                try:
                    if node.port not in clients:
                        clients[node.port] = self._create_client(node)

                    client = clients[node.port]
                    is_master = client.admin.command("isMaster")["ismaster"]
                    if is_master:
                        return node
                except pymongo.errors.ConnectionFailure:
                    continue

Example #24

0

Show file

 def _check_thread(self):
     """Throw an error if the thread is not running."""
     if not self.is_alive():
         msg = "Tenant migration thread is not running."
         self.logger.error(msg)
         raise errors.ServerFailure(msg)

Example #25

0

Show file

File: replicaset.py Project: yilu1021/mongo

    def __init__(  # pylint: disable=too-many-arguments, too-many-locals
            self,
            logger,
            job_num,
            mongod_executable=None,
            mongod_options=None,
            dbpath_prefix=None,
            preserve_dbpath=False,
            num_nodes=2,
            start_initial_sync_node=False,
            write_concern_majority_journal_default=None,
            auth_options=None,
            replset_config_options=None,
            voting_secondaries=True,
            all_nodes_electable=False,
            use_replica_set_connection_string=None,
            linear_chain=False,
            mixed_bin_versions=None,
            default_read_concern=None,
            default_write_concern=None,
            shard_logging_prefix=None,
            replicaset_logging_prefix=None):
        """Initialize ReplicaSetFixture."""

        interface.ReplFixture.__init__(self,
                                       logger,
                                       job_num,
                                       dbpath_prefix=dbpath_prefix)

        self.mongod_executable = mongod_executable
        self.mongod_options = make_historic(
            utils.default_if_none(mongod_options, {}))
        self.preserve_dbpath = preserve_dbpath
        self.start_initial_sync_node = start_initial_sync_node
        self.write_concern_majority_journal_default = write_concern_majority_journal_default
        self.auth_options = auth_options
        self.replset_config_options = make_historic(
            utils.default_if_none(replset_config_options, {}))
        self.voting_secondaries = voting_secondaries
        self.all_nodes_electable = all_nodes_electable
        self.use_replica_set_connection_string = use_replica_set_connection_string
        self.default_read_concern = default_read_concern
        self.default_write_concern = default_write_concern
        self.mixed_bin_versions = utils.default_if_none(
            mixed_bin_versions, config.MIXED_BIN_VERSIONS)
        self.mixed_bin_versions_config = self.mixed_bin_versions
        self.shard_logging_prefix = shard_logging_prefix
        self.replicaset_logging_prefix = replicaset_logging_prefix

        # Use the values given from the command line if they exist for linear_chain and num_nodes.
        linear_chain_option = utils.default_if_none(config.LINEAR_CHAIN,
                                                    linear_chain)
        self.linear_chain = linear_chain_option if linear_chain_option else linear_chain
        num_replset_nodes = config.NUM_REPLSET_NODES
        self.num_nodes = num_replset_nodes if num_replset_nodes else num_nodes

        if self.mixed_bin_versions is not None:
            mongod_executable = utils.default_if_none(
                self.mongod_executable, config.MONGOD_EXECUTABLE,
                config.DEFAULT_MONGOD_EXECUTABLE)
            latest_mongod = mongod_executable
            # The last-lts binary is currently expected to live in '/data/multiversion', which is
            # part of the PATH.
            is_config_svr = "configsvr" in self.replset_config_options and self.replset_config_options[
                "configsvr"]
            if not is_config_svr:
                self.mixed_bin_versions = [
                    latest_mongod if (x == "new") else LAST_LTS_MONGOD_BINARY
                    for x in self.mixed_bin_versions
                ]
            else:
                # Our documented recommended path for upgrading shards lets us assume that config
                # server nodes will always be fully upgraded before shard nodes.
                self.mixed_bin_versions = [latest_mongod, latest_mongod]
            num_versions = len(self.mixed_bin_versions)
            if num_versions != self.num_nodes and not is_config_svr:
                msg = (("The number of binary versions specified: {} do not match the number of"\
                        " nodes in the replica set: {}.")).format(num_versions, self.num_nodes)
                raise errors.ServerFailure(msg)

        # By default, we only use a replica set connection string if all nodes are capable of being
        # elected primary.
        if self.use_replica_set_connection_string is None:
            self.use_replica_set_connection_string = self.all_nodes_electable

        if self.default_write_concern is True:
            self.default_write_concern = make_historic({
                "w":
                "majority",
                # Use a "signature" value that won't typically match a value assigned in normal use.
                # This way the wtimeout set by this override is distinguishable in the server logs.
                "wtimeout":
                5 * 60 * 1000 + 321,  # 300321ms
            })

        # Set the default oplogSize to 511MB.
        self.mongod_options.setdefault("oplogSize", 511)

        # The dbpath in mongod_options is used as the dbpath prefix for replica set members and
        # takes precedence over other settings. The ShardedClusterFixture uses this parameter to
        # create replica sets and assign their dbpath structure explicitly.
        if "dbpath" in self.mongod_options:
            self._dbpath_prefix = self.mongod_options.pop("dbpath")
        else:
            self._dbpath_prefix = os.path.join(self._dbpath_prefix,
                                               config.FIXTURE_SUBDIR)

        self.nodes = []
        self.replset_name = None
        self.initial_sync_node = None
        self.initial_sync_node_idx = -1

Example #26

0

Show file

File: replicaset.py Project: yilu1021/mongo

    def setup(self):  # pylint: disable=too-many-branches,too-many-statements,too-many-locals
        """Set up the replica set."""
        self.replset_name = self.mongod_options.get("replSet", "rs")
        if not self.nodes:
            for i in range(self.num_nodes):
                node = self._new_mongod(i, self.replset_name)
                self.nodes.append(node)

        for i in range(self.num_nodes):
            steady_state_constraint_param = "oplogApplicationEnforcesSteadyStateConstraints"
            # TODO (SERVER-52985): Set steady state constraint parameters on last-lts nodes.
            if (steady_state_constraint_param
                    not in self.nodes[i].mongod_options["set_parameters"]
                    and self.mixed_bin_versions is not None
                    and self.mixed_bin_versions[i] == "new"):
                self.nodes[i].mongod_options["set_parameters"][
                    steady_state_constraint_param] = True
            if self.linear_chain and i > 0:
                self.nodes[i].mongod_options["set_parameters"][
                    "failpoint.forceSyncSourceCandidate"] = make_historic({
                        "mode":
                        "alwaysOn",
                        "data": {
                            "hostAndPort":
                            self.nodes[i - 1].get_internal_connection_string()
                        }
                    })
            self.nodes[i].setup()

        if self.start_initial_sync_node:
            if not self.initial_sync_node:
                self.initial_sync_node_idx = len(self.nodes)
                self.initial_sync_node = self._new_mongod(
                    self.initial_sync_node_idx, self.replset_name)
            self.initial_sync_node.setup()
            self.initial_sync_node.await_ready()

        if self.mixed_bin_versions:
            for i in range(self.num_nodes):
                if self.nodes[i].mongod_executable != self.mixed_bin_versions[
                        i]:
                    msg = (
                        f"Executable of node{i}: {self.nodes[i].mongod_executable} does not "
                        f"match the executable assigned by mixedBinVersions: "
                        f"{self.mixed_bin_versions[i]}.")
                    raise errors.ServerFailure(msg)

        # We need only to wait to connect to the first node of the replica set because we first
        # initiate it as a single node replica set.
        self.nodes[0].await_ready()

        # Initiate the replica set.
        members = []
        for (i, node) in enumerate(self.nodes):
            member_info = {
                "_id": i,
                "host": node.get_internal_connection_string()
            }
            if i > 0:
                if not self.all_nodes_electable:
                    member_info["priority"] = 0
                if i >= 7 or not self.voting_secondaries:
                    # Only 7 nodes in a replica set can vote, so the other members must still be
                    # non-voting when this fixture is configured to have voting secondaries.
                    member_info["votes"] = 0
            members.append(member_info)
        if self.initial_sync_node:
            members.append({
                "_id":
                self.initial_sync_node_idx,
                "host":
                self.initial_sync_node.get_internal_connection_string(),
                "priority":
                0,
                "hidden":
                1,
                "votes":
                0
            })

        repl_config = {"_id": self.replset_name, "protocolVersion": 1}
        client = self.nodes[0].mongo_client()

        self.auth(client, self.auth_options)

        if client.local.system.replset.count():
            # Skip initializing the replset if there is an existing configuration.
            return

        if self.write_concern_majority_journal_default is not None:
            repl_config[
                "writeConcernMajorityJournalDefault"] = self.write_concern_majority_journal_default
        else:
            server_status = client.admin.command({"serverStatus": 1})
            cmd_line_opts = client.admin.command({"getCmdLineOpts": 1})
            if not (server_status["storageEngine"]["persistent"]
                    and cmd_line_opts["parsed"].get("storage", {}).get(
                        "journal", {}).get("enabled", True)):
                repl_config["writeConcernMajorityJournalDefault"] = False

        if self.replset_config_options.get("configsvr", False):
            repl_config["configsvr"] = True
        if self.replset_config_options.get("settings"):
            replset_settings = self.replset_config_options["settings"]
            repl_config["settings"] = replset_settings

        # Increase the election timeout to 24 hours to prevent spurious elections.
        repl_config.setdefault("settings", {})
        if "electionTimeoutMillis" not in repl_config["settings"]:
            repl_config["settings"][
                "electionTimeoutMillis"] = 24 * 60 * 60 * 1000

        # Start up a single node replica set then reconfigure to the correct size (if the config
        # contains more than 1 node), so the primary is elected more quickly.
        repl_config["members"] = [members[0]]
        self.logger.info("Issuing replSetInitiate command: %s", repl_config)
        self._initiate_repl_set(client, repl_config)
        self._await_primary()

        if self.mixed_bin_versions is not None:
            if self.mixed_bin_versions[0] == "new":
                fcv_response = client.admin.command({
                    "getParameter":
                    1,
                    "featureCompatibilityVersion":
                    1
                })
                fcv = fcv_response["featureCompatibilityVersion"]["version"]
                if fcv != ReplicaSetFixture._LATEST_FCV:
                    msg = (("Server returned FCV{} when we expected FCV{}."
                            ).format(fcv, ReplicaSetFixture._LATEST_FCV))
                    raise errors.ServerFailure(msg)

            # Initiating a replica set with a single node will use "latest" FCV. This will
            # cause IncompatibleServerVersion errors if additional "last-lts" binary version
            # nodes are subsequently added to the set, since such nodes cannot set their FCV to
            # "latest". Therefore, we make sure the primary is "last-lts" FCV before adding in
            # nodes of different binary versions to the replica set.
            client.admin.command({
                "setFeatureCompatibilityVersion":
                ReplicaSetFixture._LAST_LTS_FCV
            })

        if self.nodes[1:]:
            # Wait to connect to each of the secondaries before running the replSetReconfig
            # command.
            for node in self.nodes[1:]:
                node.await_ready()
            # Add in the members one at a time, since non force reconfigs can only add/remove a
            # single voting member at a time.
            for ind in range(2, len(members) + 1):
                self._add_node_to_repl_set(client, repl_config, ind, members)

        self._await_secondaries()
        self._await_newly_added_removals()

Example #27

0

Show file

File: periodic_kill_secondaries.py Project: xnox/mongo

    def _check_invariants_as_standalone(self, secondary):
        # We remove the --replSet option in order to start the node as a standalone.
        replset_name = secondary.mongod_options.pop("replSet")

        try:
            secondary.setup()
            secondary.await_ready()

            client = secondary.mongo_client()
            minvalid_doc = client.local["replset.minvalid"].find_one()
            oplog_truncate_after_doc = client.local[
                "replset.oplogTruncateAfterPoint"].find_one()
            checkpoint_timestamp_doc = client.local[
                "replset.checkpointTimestamp"].find_one()

            latest_oplog_doc = client.local["oplog.rs"].find_one(
                sort=[("$natural", pymongo.DESCENDING)])

            null_ts = bson.Timestamp(0, 0)

            # The oplog could be empty during initial sync. If so, we default it to null.
            latest_oplog_entry_ts = null_ts
            if latest_oplog_doc is not None:
                latest_oplog_entry_ts = latest_oplog_doc.get("ts")
                if latest_oplog_entry_ts is None:
                    raise errors.ServerFailure(
                        "Latest oplog entry had no 'ts' field: {}".format(
                            latest_oplog_doc))

            # The "oplogTruncateAfterPoint" document may not exist at startup. If so, we default
            # it to null.
            oplog_truncate_after_ts = null_ts
            if oplog_truncate_after_doc is not None:
                oplog_truncate_after_ts = oplog_truncate_after_doc.get(
                    "oplogTruncateAfterPoint", null_ts)

            # The "checkpointTimestamp" document may not exist at startup. If so, we default
            # it to null.
            checkpoint_timestamp = null_ts
            if checkpoint_timestamp_doc is not None:
                checkpoint_timestamp = checkpoint_timestamp_doc.get(
                    "checkpointTimestamp")
                if checkpoint_timestamp is None:
                    raise errors.ServerFailure(
                        "Checkpoint timestamp document had no 'checkpointTimestamp'"
                        "field: {}".format(checkpoint_timestamp_doc))

            # checkpointTimestamp <= top of oplog
            # If the oplog is empty, the checkpoint timestamp should also be null.
            if not checkpoint_timestamp <= latest_oplog_entry_ts:
                raise errors.ServerFailure(
                    "The condition checkpointTimestamp <= top of oplog ({} <= {}) doesn't hold:"
                    " checkpointTimestamp document={}, latest oplog entry={}".
                    format(checkpoint_timestamp, latest_oplog_entry_ts,
                           checkpoint_timestamp_doc, latest_oplog_doc))

            if minvalid_doc is not None:
                applied_through_ts = minvalid_doc.get("begin",
                                                      {}).get("ts", null_ts)
                minvalid_ts = minvalid_doc.get("ts", null_ts)

                # The "appliedThrough" value should always equal the "checkpointTimestamp".
                # The writes to "appliedThrough" are given the timestamp of the end of the batch,
                # and batch boundaries are the only valid timestamps in which we could take
                # checkpoints, so if you see a non-null applied through in a stable checkpoint it
                # must be at the same timestamp as the checkpoint.
                if (checkpoint_timestamp != null_ts
                        and applied_through_ts != null_ts
                        and (not checkpoint_timestamp == applied_through_ts)):
                    raise errors.ServerFailure(
                        "The condition checkpointTimestamp ({}) == appliedThrough ({})"
                        " doesn't hold: minValid document={},"
                        " checkpointTimestamp document={}, last oplog entry={}"
                        .format(checkpoint_timestamp, applied_through_ts,
                                minvalid_doc, checkpoint_timestamp_doc,
                                latest_oplog_doc))

                if applied_through_ts == null_ts:
                    # We clear "appliedThrough" to represent having applied through the top of the
                    # oplog in PRIMARY state or immediately after "rollback via refetch".
                    # If we are using a storage engine that supports "recover to a checkpoint,"
                    # then we will have a "checkpointTimestamp" and we should use that as our
                    # "appliedThrough" (similarly to why we assert their equality above).
                    # If both are null, then we are in PRIMARY state on a storage engine that does
                    # not support "recover to a checkpoint" or in RECOVERING immediately after
                    # "rollback via refetch". Since we do not update "minValid" in PRIMARY state,
                    # we leave "appliedThrough" as null so that the invariants below hold, rather
                    # than substituting the latest oplog entry for the "appliedThrough" value.
                    applied_through_ts = checkpoint_timestamp

                if minvalid_ts == null_ts:
                    # The server treats the "ts" field in the minValid document as missing when its
                    # value is the null timestamp.
                    minvalid_ts = applied_through_ts

                if latest_oplog_entry_ts == null_ts:
                    # If the oplog is empty, we treat the "minValid" as the latest oplog entry.
                    latest_oplog_entry_ts = minvalid_ts

                if oplog_truncate_after_ts == null_ts:
                    # The server treats the "oplogTruncateAfterPoint" field as missing when its
                    # value is the null timestamp. When it is null, the oplog is complete and
                    # should not be truncated, so it is effectively the top of the oplog.
                    oplog_truncate_after_ts = latest_oplog_entry_ts

                # Check the ordering invariants before the secondary has reconciled the end of
                # its oplog.
                # The "oplogTruncateAfterPoint" is set to the first timestamp of each batch of
                # oplog entries before they are written to the oplog. Thus, it can be ahead
                # of the top of the oplog before any oplog entries are written, and behind it
                # after some are written. Thus, we cannot compare it to the top of the oplog.

                # appliedThrough <= minValid
                # appliedThrough represents the end of the previous batch, so it is always the
                # earliest.
                if not applied_through_ts <= minvalid_ts:
                    raise errors.ServerFailure(
                        "The condition appliedThrough <= minValid ({} <= {}) doesn't hold: minValid"
                        " document={}, latest oplog entry={}".format(
                            applied_through_ts, minvalid_ts, minvalid_doc,
                            latest_oplog_doc))

                # minValid <= oplogTruncateAfterPoint
                # This is true because this hook is never run after a rollback. Thus, we only
                # move "minValid" to the end of each batch after the batch is written to the oplog.
                # We reset the "oplogTruncateAfterPoint" to null before we move "minValid" from
                # the end of the previous batch to the end of the current batch. Thus "minValid"
                # must be less than or equal to the "oplogTruncateAfterPoint".
                if not minvalid_ts <= oplog_truncate_after_ts:
                    raise errors.ServerFailure(
                        "The condition minValid <= oplogTruncateAfterPoint ({} <= {}) doesn't"
                        " hold: minValid document={}, oplogTruncateAfterPoint document={},"
                        " latest oplog entry={}".format(
                            minvalid_ts, oplog_truncate_after_ts, minvalid_doc,
                            oplog_truncate_after_doc, latest_oplog_doc))

                # minvalid <= latest oplog entry
                # "minValid" is set to the end of a batch after the batch is written to the oplog.
                # Thus it is always less than or equal to the top of the oplog.
                if not minvalid_ts <= latest_oplog_entry_ts:
                    raise errors.ServerFailure(
                        "The condition minValid <= top of oplog ({} <= {}) doesn't"
                        " hold: minValid document={}, latest oplog entry={}".
                        format(minvalid_ts, latest_oplog_entry_ts,
                               minvalid_doc, latest_oplog_doc))

            try:
                secondary.teardown()
            except errors.ServerFailure:
                raise errors.ServerFailure(
                    "{} did not exit cleanly after being started up as a standalone"
                    .format(secondary))
        except pymongo.errors.OperationFailure as err:
            self.logger.exception(
                "Failed to read the minValid document, the oplogTruncateAfterPoint document,"
                " the checkpointTimestamp document, or the latest oplog entry from the mongod on"
                " port %d", secondary.port)
            raise errors.ServerFailure(
                "Failed to read the minValid document, the oplogTruncateAfterPoint document,"
                " the checkpointTimestamp document, or the latest oplog entry from the mongod on"
                " port {}: {}".format(secondary.port, err.args[0]))
        finally:
            # Set the secondary's options back to their original values.
            secondary.mongod_options["replSet"] = replset_name

Example #28

0

Show file

    def _check_invariants_as_standalone(self, secondary):  # pylint: disable=too-many-branches
        # We remove the --replSet option in order to start the node as a standalone.
        replset_name = secondary.mongod_options.pop("replSet")

        try:
            secondary.setup()
            secondary.await_ready()

            client = secondary.mongo_client()
            minvalid_doc = client.local["replset.minvalid"].find_one()
            oplog_truncate_after_doc = client.local[
                "replset.oplogTruncateAfterPoint"].find_one()
            self.logger.info("minValid: {}, oTAP: {}".format(
                minvalid_doc, oplog_truncate_after_doc))

            latest_oplog_doc = client.local["oplog.rs"].find_one(
                sort=[("$natural", pymongo.DESCENDING)])

            null_ts = bson.Timestamp(0, 0)

            # The oplog could be empty during initial sync. If so, we default it to null.
            latest_oplog_entry_ts = null_ts
            if latest_oplog_doc is not None:
                latest_oplog_entry_ts = latest_oplog_doc.get("ts")
                if latest_oplog_entry_ts is None:
                    raise errors.ServerFailure(
                        "Latest oplog entry had no 'ts' field: {}".format(
                            latest_oplog_doc))

            # The "oplogTruncateAfterPoint" document may not exist at startup. If so, we default
            # it to null.
            oplog_truncate_after_ts = null_ts
            if oplog_truncate_after_doc is not None:
                oplog_truncate_after_ts = oplog_truncate_after_doc.get(
                    "oplogTruncateAfterPoint", null_ts)

            if minvalid_doc is not None:
                applied_through_ts = minvalid_doc.get("begin",
                                                      {}).get("ts", null_ts)
                minvalid_ts = minvalid_doc.get("ts", null_ts)

                if minvalid_ts == null_ts:
                    # The server treats the "ts" field in the minValid document as missing when its
                    # value is the null timestamp.
                    minvalid_ts = applied_through_ts

                if latest_oplog_entry_ts == null_ts:
                    # If the oplog is empty, we treat the "minValid" as the latest oplog entry.
                    latest_oplog_entry_ts = minvalid_ts

                if oplog_truncate_after_ts == null_ts:
                    # The server treats the "oplogTruncateAfterPoint" field as missing when its
                    # value is the null timestamp. When it is null, the oplog is complete and
                    # should not be truncated, so it is effectively the top of the oplog.
                    oplog_truncate_after_ts = latest_oplog_entry_ts

                # Check the ordering invariants before the secondary has reconciled the end of
                # its oplog.
                # The "oplogTruncateAfterPoint" is set to the first timestamp of each batch of
                # oplog entries before they are written to the oplog. Thus, it can be ahead
                # of the top of the oplog before any oplog entries are written, and behind it
                # after some are written. Thus, we cannot compare it to the top of the oplog.

                # appliedThrough <= minValid
                # appliedThrough represents the end of the previous batch, so it is always the
                # earliest.
                if not applied_through_ts <= minvalid_ts:
                    raise errors.ServerFailure(
                        "The condition appliedThrough <= minValid ({} <= {}) doesn't hold: minValid"
                        " document={}, latest oplog entry={}".format(
                            applied_through_ts, minvalid_ts, minvalid_doc,
                            latest_oplog_doc))

                # minValid <= oplogTruncateAfterPoint
                # This is true because this hook is never run after a rollback. Thus, we only
                # move "minValid" to the end of each batch after the batch is written to the oplog.
                # We reset the "oplogTruncateAfterPoint" to null before we move "minValid" from
                # the end of the previous batch to the end of the current batch. Thus "minValid"
                # must be less than or equal to the "oplogTruncateAfterPoint".
                if not minvalid_ts <= oplog_truncate_after_ts:
                    raise errors.ServerFailure(
                        "The condition minValid <= oplogTruncateAfterPoint ({} <= {}) doesn't"
                        " hold: minValid document={}, oplogTruncateAfterPoint document={},"
                        " latest oplog entry={}".format(
                            minvalid_ts, oplog_truncate_after_ts, minvalid_doc,
                            oplog_truncate_after_doc, latest_oplog_doc))

                # minvalid <= latest oplog entry
                # "minValid" is set to the end of a batch after the batch is written to the oplog.
                # Thus it is always less than or equal to the top of the oplog.
                if not minvalid_ts <= latest_oplog_entry_ts:
                    raise errors.ServerFailure(
                        "The condition minValid <= top of oplog ({} <= {}) doesn't"
                        " hold: minValid document={}, latest oplog entry={}".
                        format(minvalid_ts, latest_oplog_entry_ts,
                               minvalid_doc, latest_oplog_doc))

            try:
                secondary.teardown()
            except errors.ServerFailure:
                raise errors.ServerFailure(
                    "{} did not exit cleanly after being started up as a standalone"
                    .format(secondary))
        except pymongo.errors.OperationFailure as err:
            self.logger.exception(
                "Failed to read the minValid document, the oplogTruncateAfterPoint document,"
                " or the latest oplog entry from the mongod on port %d",
                secondary.port)
            raise errors.ServerFailure(
                "Failed to read the minValid document, the oplogTruncateAfterPoint document,"
                " or the latest oplog entry from the mongod on"
                " port {}: {}".format(secondary.port, err.args[0]))
        finally:
            # Set the secondary's options back to their original values.
            secondary.mongod_options["replSet"] = replset_name

Example #29

0

Show file

File: stepdown.py Project: yuttasakcom/mongo

 def _check_thread(self):
     if not self._stepdown_thread.is_alive():
         msg = "The stepdown thread is not running."
         self.logger.error(msg)
         raise errors.ServerFailure(msg)

Example #30

0

Show file

File: periodic_kill_secondaries.py Project: xuZhu-lab/percona-server-mongodb

    def _check_invariants_as_standalone(self, secondary):  # pylint: disable=too-many-locals
        # pylint: disable=too-many-branches,too-many-statements
        # We remove the --replSet option in order to start the node as a standalone.
        replset_name = secondary.mongod_options.pop("replSet")
        self.logger.info(
            "Restarting the secondary on port %d as a standalone node with"
            " its data files intact...", secondary.port)

        try:
            secondary.setup()
            secondary.await_ready()

            client = secondary.mongo_client()
            minvalid_doc = client.local["replset.minvalid"].find_one()
            oplog_truncate_after_doc = client.local["replset.oplogTruncateAfterPoint"].find_one()
            recovery_timestamp_res = client.admin.command("replSetTest",
                                                          getLastStableRecoveryTimestamp=True)
            latest_oplog_doc = client.local["oplog.rs"].find_one(sort=[("$natural",
                                                                        pymongo.DESCENDING)])

            self.logger.info("Checking invariants: minValid: {}, oplogTruncateAfterPoint: {},"
                             " stable recovery timestamp: {}, latest oplog doc: {}".format(
                                 minvalid_doc, oplog_truncate_after_doc, recovery_timestamp_res,
                                 latest_oplog_doc))

            null_ts = bson.Timestamp(0, 0)

            # We wait for a stable recovery timestamp at setup, so we must have an oplog.
            latest_oplog_entry_ts = null_ts
            if latest_oplog_doc is None:
                raise errors.ServerFailure("No latest oplog entry")
            latest_oplog_entry_ts = latest_oplog_doc.get("ts")
            if latest_oplog_entry_ts is None:
                raise errors.ServerFailure(
                    "Latest oplog entry had no 'ts' field: {}".format(latest_oplog_doc))

            # The "oplogTruncateAfterPoint" document may not exist at startup. If so, we default
            # it to null.
            oplog_truncate_after_ts = null_ts
            if oplog_truncate_after_doc is not None:
                oplog_truncate_after_ts = oplog_truncate_after_doc.get(
                    "oplogTruncateAfterPoint", null_ts)

            # The "lastStableRecoveryTimestamp" field is present if the storage engine supports
            # "recover to a timestamp". If it's a null timestamp on a durable storage engine, that
            # means we do not yet have a stable checkpoint timestamp and must be restarting at the
            # top of the oplog. Since we wait for a stable recovery timestamp at test fixture setup,
            # we should never encounter a null timestamp here.
            recovery_timestamp = recovery_timestamp_res.get("lastStableRecoveryTimestamp")
            if recovery_timestamp == null_ts:
                raise errors.ServerFailure(
                    "Received null stable recovery timestamp {}".format(recovery_timestamp_res))
            # On a storage engine that doesn't support "recover to a timestamp", we default to null.
            if recovery_timestamp is None:
                recovery_timestamp = null_ts

            # last stable recovery timestamp <= top of oplog
            if not recovery_timestamp <= latest_oplog_entry_ts:
                raise errors.ServerFailure("The condition last stable recovery timestamp <= top"
                                           " of oplog ({} <= {}) doesn't hold:"
                                           " getLastStableRecoveryTimestamp result={},"
                                           " latest oplog entry={}".format(
                                               recovery_timestamp, latest_oplog_entry_ts,
                                               recovery_timestamp_res, latest_oplog_doc))

            if minvalid_doc is not None:
                applied_through_ts = minvalid_doc.get("begin", {}).get("ts", null_ts)
                minvalid_ts = minvalid_doc.get("ts", null_ts)

                # The "appliedThrough" value should always equal the "last stable recovery
                # timestamp", AKA the stable checkpoint for durable engines, on server restart.
                #
                # The written "appliedThrough" time is updated with the latest timestamp at the end
                # of each batch application, and batch boundaries are the only valid stable
                # timestamps on secondaries. Therefore, a non-null appliedThrough timestamp must
                # equal the checkpoint timestamp, because any stable timestamp that the checkpoint
                # could use includes an equal persisted appliedThrough timestamp.
                if (recovery_timestamp != null_ts and applied_through_ts != null_ts
                        and (not recovery_timestamp == applied_through_ts)):
                    raise errors.ServerFailure(
                        "The condition last stable recovery timestamp ({}) == appliedThrough ({})"
                        " doesn't hold: minValid document={},"
                        " getLastStableRecoveryTimestamp result={}, last oplog entry={}".format(
                            recovery_timestamp, applied_through_ts, minvalid_doc,
                            recovery_timestamp_res, latest_oplog_doc))

                if applied_through_ts == null_ts:
                    # We clear "appliedThrough" to represent having applied through the top of the
                    # oplog in PRIMARY state or immediately after "rollback via refetch".
                    # If we are using a storage engine that supports "recover to a timestamp,"
                    # then we will have a "last stable recovery timestamp" and we should use that
                    # as our "appliedThrough" (similarly to why we assert their equality above).
                    # If both are null, then we are in PRIMARY state on a storage engine that does
                    # not support "recover to a timestamp" or in RECOVERING immediately after
                    # "rollback via refetch". Since we do not update "minValid" in PRIMARY state,
                    # we leave "appliedThrough" as null so that the invariants below hold, rather
                    # than substituting the latest oplog entry for the "appliedThrough" value.
                    applied_through_ts = recovery_timestamp

                if minvalid_ts == null_ts:
                    # The server treats the "ts" field in the minValid document as missing when its
                    # value is the null timestamp.
                    minvalid_ts = applied_through_ts

                if latest_oplog_entry_ts == null_ts:
                    # If the oplog is empty, we treat the "minValid" as the latest oplog entry.
                    latest_oplog_entry_ts = minvalid_ts

                if oplog_truncate_after_ts == null_ts:
                    # The server treats the "oplogTruncateAfterPoint" field as missing when its
                    # value is the null timestamp. When it is null, the oplog is complete and
                    # should not be truncated, so it is effectively the top of the oplog.
                    oplog_truncate_after_ts = latest_oplog_entry_ts

                # Check the ordering invariants before the secondary has reconciled the end of
                # its oplog.
                # The "oplogTruncateAfterPoint" is set to the first timestamp of each batch of
                # oplog entries before they are written to the oplog. Thus, it can be ahead
                # of the top of the oplog before any oplog entries are written, and behind it
                # after some are written. Thus, we cannot compare it to the top of the oplog.

                # appliedThrough <= minValid
                # appliedThrough represents the end of the previous batch, so it is always the
                # earliest.
                if applied_through_ts > minvalid_ts:
                    raise errors.ServerFailure(
                        "The condition appliedThrough <= minValid ({} <= {}) doesn't hold: minValid"
                        " document={}, latest oplog entry={}".format(
                            applied_through_ts, minvalid_ts, minvalid_doc, latest_oplog_doc))

                # minValid <= oplogTruncateAfterPoint
                # This is true because this hook is never run after a rollback. Thus, we only
                # move "minValid" to the end of each batch after the batch is written to the oplog.
                # We reset the "oplogTruncateAfterPoint" to null before we move "minValid" from
                # the end of the previous batch to the end of the current batch. Thus "minValid"
                # must be less than or equal to the "oplogTruncateAfterPoint".
                if minvalid_ts > oplog_truncate_after_ts:
                    raise errors.ServerFailure(
                        "The condition minValid <= oplogTruncateAfterPoint ({} <= {}) doesn't"
                        " hold: minValid document={}, oplogTruncateAfterPoint document={},"
                        " latest oplog entry={}".format(minvalid_ts, oplog_truncate_after_ts,
                                                        minvalid_doc, oplog_truncate_after_doc,
                                                        latest_oplog_doc))

                # minvalid <= latest oplog entry
                # "minValid" is set to the end of a batch after the batch is written to the oplog.
                # Thus it is always less than or equal to the top of the oplog.
                if minvalid_ts > latest_oplog_entry_ts:
                    raise errors.ServerFailure(
                        "The condition minValid <= top of oplog ({} <= {}) doesn't"
                        " hold: minValid document={}, latest oplog entry={}".format(
                            minvalid_ts, latest_oplog_entry_ts, minvalid_doc, latest_oplog_doc))

            try:
                secondary.teardown()
            except errors.ServerFailure:
                raise errors.ServerFailure(
                    "{} did not exit cleanly after being started up as a standalone".format(
                        secondary))
        except pymongo.errors.OperationFailure as err:
            self.logger.exception(
                "Failed to read the minValid document, the oplogTruncateAfterPoint document,"
                " the last stable recovery timestamp, or the latest oplog entry from the"
                " mongod on port %d", secondary.port)
            raise errors.ServerFailure(
                "Failed to read the minValid document, the oplogTruncateAfterPoint document,"
                " the last stable recovery timestamp, or the latest oplog entry from the"
                " mongod on port {}: {}".format(secondary.port, err.args[0]))
        finally:
            # Set the secondary's options back to their original values.
            secondary.mongod_options["replSet"] = replset_name