def _enable_rssyncapplystop(self, secondary): # Enable the "rsSyncApplyStop" failpoint on the secondary to prevent them from # applying any oplog entries while the test is running. client = secondary.mongo_client() try: client.admin.command(bson.SON([ ("configureFailPoint", "rsSyncApplyStop"), ("mode", "alwaysOn")])) except pymongo.errors.OperationFailure as err: self.logger.exception( "Unable to disable oplog application on the mongod on port %d", secondary.port) raise errors.ServerFailure( "Unable to disable oplog application on the mongod on port {}: {}".format( secondary.port, err.args[0]))
def _get_primary_url(self): no_primary_err = errors.ServerFailure("No primary found") for node in self._rs_fixture.nodes: try: is_master = node.mongo_client().admin.command( "isMaster")["ismaster"] except pymongo.errors.AutoReconnect: raise no_primary_err if is_master: return node.get_driver_connection_url() raise no_primary_err
def pause(self): """Pause the thread after test.""" self.__lifecycle.mark_test_finished() # Wait until we are no longer executing splits. self._is_idle_evt.wait() # Check if the thread is alive in case it has thrown an exception while running. self._check_thread() # Check that the fixture is still running. if not self._shard_split_fixture.is_running(): raise errors.ServerFailure( f"ShardSplitFixture with pids {self._shard_split_fixture.pids()} expected to be running in" " ContinuousShardSplit, but wasn't.")
def _kill_secondaries(self): for secondary in self.fixture.get_secondaries(): # Disable the "rsSyncApplyStop" failpoint on the secondary to have it resume applying # oplog entries. for secondary in self.fixture.get_secondaries(): client = utils.new_mongo_client(port=secondary.port) try: client.admin.command( bson.SON([("configureFailPoint", "rsSyncApplyStop"), ("mode", "off")])) except pymongo.errors.OperationFailure as err: self.logger.exception( "Unable to re-enable oplog application on the mongod on port %d", secondary.port) raise errors.ServerFailure( "Unable to re-enable oplog application on the mongod on port %d: %s" % (secondary.port, err.args[0])) # Wait a little bit for the secondary to start apply oplog entries so that we are more # likely to kill the mongod process while it is partway into applying a batch. time.sleep(0.1) # Check that the secondary is still running before forcibly terminating it. This ensures # we still detect some cases in which the secondary has already crashed. if not secondary.is_running(): raise errors.ServerFailure( "mongod on port %d was expected to be running in" " PeriodicKillSecondaries.after_test(), but wasn't." % (secondary.port)) self.hook_test_case.logger.info( "Killing the secondary on port %d..." % (secondary.port)) secondary.mongod.stop(kill=True) # Teardown may or may not be considered a success as a result of killing a secondary, so we # ignore the return value of Fixture.teardown(). self.fixture.teardown()
def _await_secondary_state(self, secondary): client = secondary.mongo_client() try: client.admin.command(bson.SON([ ("replSetTest", 1), ("waitForMemberState", 2), # 2 = SECONDARY ("timeoutMillis", fixture.ReplFixture.AWAIT_REPL_TIMEOUT_MINS * 60 * 1000)])) except pymongo.errors.OperationFailure as err: self.hook_test_case.logger.exception( "mongod on port %d failed to reach state SECONDARY after %d seconds", secondary.port, fixture.ReplFixture.AWAIT_REPL_TIMEOUT_MINS * 60) raise errors.ServerFailure( "mongod on port {} failed to reach state SECONDARY after {} seconds: {}".format( secondary.port, fixture.ReplFixture.AWAIT_REPL_TIMEOUT_MINS * 60, err.args[0]))
def pause(self): """Pause the thread.""" self.__lifecycle.mark_test_finished() # Wait until we are no longer executing stepdowns. self._is_idle_evt.wait() # Check if the thread is alive in case it has thrown an exception while running. self._check_thread() # Wait until we all the replica sets have primaries. self._await_primaries() # Wait for Mongos to retarget the primary for each shard and the config server. self._do_wait_for_mongos_retarget() # Check that fixtures are still running for rs_fixture in self._rs_fixtures: if not rs_fixture.is_running(): raise errors.ServerFailure( "ReplicaSetFixture with pids {} expected to be running in" " ContinuousStepdown, but wasn't.".format(rs_fixture.pids())) for mongos_fixture in self._mongos_fixtures: if not mongos_fixture.is_running(): raise errors.ServerFailure("MongoSFixture with pids {} expected to be running in" " ContinuousStepdown, but wasn't.".format( mongos_fixture.pids()))
def _do_teardown(self): if self.mongos is None: self.logger.warning("The mongos fixture has not been set up yet.") return # Teardown is still a success even if nothing is running. self.logger.info("Stopping mongos on port %d with pid %d...", self.port, self.mongos.pid) if not self.is_running(): exit_code = self.mongos.poll() msg = ("mongos on port {:d} was expected to be running, but wasn't. " "Process exited with code {:d}").format(self.port, exit_code) self.logger.warning(msg) raise errors.ServerFailure(msg) self.mongos.stop() exit_code = self.mongos.wait() if exit_code == 0: self.logger.info("Successfully stopped the mongos on port {:d}".format(self.port)) else: self.logger.warning("Stopped the mongos on port {:d}. " "Process exited with code {:d}.".format(self.port, exit_code)) raise errors.ServerFailure( "mongos on port {:d} with pid {:d} exited with code {:d}".format( self.port, self.mongos.pid, exit_code))
def _disable_rssyncapplystop(self, secondary): # Disable the "rsSyncApplyStop" failpoint on the secondary to have it resume applying # oplog entries. client = secondary.mongo_client() try: client.admin.command( bson.SON([("configureFailPoint", "rsSyncApplyStop"), ("mode", "off")])) except pymongo.errors.OperationFailure as err: self.logger.exception( "Unable to re-enable oplog application on the mongod on port %d", secondary.port) raise errors.ServerFailure( "Unable to re-enable oplog application on the mongod on port {}: {}" .format(secondary.port, err.args[0]))
def _do_teardown(self, mode=None): if self.mongos is None: self.logger.warning("The mongos fixture has not been set up yet.") return # Teardown is still a success even if nothing is running. if mode == interface.TeardownMode.ABORT: self.logger.info( "Attempting to send SIGABRT from resmoke to mongos on port %d with pid %d...", self.port, self.mongos.pid) else: self.logger.info("Stopping mongos on port %d with pid %d...", self.port, self.mongos.pid) if not self.is_running(): exit_code = self.mongos.poll() msg = ( "mongos on port {:d} was expected to be running, but wasn't. " "Process exited with code {:d}").format(self.port, exit_code) self.logger.warning(msg) raise errors.ServerFailure(msg) self.mongos.stop(mode=mode) exit_code = self.mongos.wait() # Python's subprocess module returns negative versions of system calls. # pylint: disable=invalid-unary-operand-type if exit_code == 0 or (mode is not None and exit_code == -(mode.value)): self.logger.info( "Successfully stopped the mongos on port {:d}".format( self.port)) else: self.logger.warning("Stopped the mongos on port {:d}. " "Process exited with code {:d}.".format( self.port, exit_code)) raise errors.ServerFailure( "mongos on port {:d} with pid {:d} exited with code {:d}". format(self.port, self.mongos.pid, exit_code))
def pause(self): """Pause the thread after test.""" self.__lifecycle.mark_test_finished() # Wait until we are no longer executing migrations. self._is_idle_evt.wait() # Check if the thread is alive in case it has thrown an exception while running. self._check_thread() # Check that the fixture is still running. if not self._tenant_migration_fixture.is_running(): raise errors.ServerFailure( "TenantMigrationFixture with pids {} expected to be running in" " ContinuousTenantMigration, but wasn't".format( self._tenant_migration_fixture.pids()))
def await_ready(self): """Block until the fixture can be used for testing.""" deadline = time.time( ) + standalone.MongoDFixture.AWAIT_READY_TIMEOUT_SECS # Wait until the mongos is accepting connections. The retry logic is necessary to support # versions of PyMongo <3.0 that immediately raise a ConnectionFailure if a connection cannot # be established. while True: # Check whether the mongos exited for some reason. exit_code = self.mongos.poll() if exit_code is not None: raise errors.ServerFailure( "Could not connect to mongos on port {}, process ended" " unexpectedly with code {}.".format(self.port, exit_code)) try: # Use a shorter connection timeout to more closely satisfy the requested deadline. client = self.mongo_client(timeout_millis=500) client.admin.command("ping") break except pymongo.errors.ConnectionFailure: remaining = deadline - time.time() if remaining <= 0.0: raise errors.ServerFailure( "Failed to connect to mongos on port {} after {} seconds" .format( self.port, standalone.MongoDFixture.AWAIT_READY_TIMEOUT_SECS)) self.logger.info("Waiting to connect to mongos on port %d.", self.port) time.sleep(0.1) # Wait a little bit before trying again. self.logger.info("Successfully contacted the mongos on port %d.", self.port)
def _disable_abort(self, donor_primary_client, donor_primary_port, donor_primary_rs_name): try: donor_primary_client.admin.command( bson.SON([("configureFailPoint", "abortTenantMigrationAfterBlockingStarts"), ("mode", "off")])) except pymongo.errors.OperationFailure as err: self.logger.exception( "Unable to disable the failpoint to make migrations abort on donor primary on port " + "%d of replica set '%s'.", donor_primary_port, donor_primary_rs_name) raise errors.ServerFailure( "Unable to disable the failpoint to make migrations abort on donor primary on port " + "{} of replica set '{}': {}".format( donor_primary_port, donor_primary_rs_name, err.args[0]))
def __restart_init_sync(self, test_report, sync_node, sync_node_conn): if self.use_resync: self.hook_test_case.logger.info( "Calling resync on initial sync node...") cmd = bson.SON([("resync", 1), ("wait", 0)]) sync_node_conn.admin.command(cmd) else: # Tear down and restart the initial sync node to start initial sync again. if not sync_node.teardown(): raise errors.ServerFailure("%s did not exit cleanly" % (sync_node)) self.hook_test_case.logger.info( "Starting the initial sync node back up again...") sync_node.setup() sync_node.await_ready()
def __init__( # pylint: disable=too-many-arguments,too-many-locals self, logger, job_num, mongos_executable=None, mongos_options=None, mongod_executable=None, mongod_options=None, dbpath_prefix=None, preserve_dbpath=False, num_shards=1, num_rs_nodes_per_shard=1, num_mongos=1, enable_sharding=None, enable_balancer=True, enable_autosplit=True, auth_options=None, configsvr_options=None, shard_options=None, mixed_bin_versions=None): """Initialize ShardedClusterFixture with different options for the cluster processes.""" interface.Fixture.__init__(self, logger, job_num, dbpath_prefix=dbpath_prefix) if "dbpath" in mongod_options: raise ValueError("Cannot specify mongod_options.dbpath") self.mongos_executable = mongos_executable self.mongos_options = utils.default_if_none(mongos_options, {}) self.mongod_options = utils.default_if_none(mongod_options, {}) self.mongod_executable = mongod_executable self.mongod_options["set_parameters"] = mongod_options.get("set_parameters", {}).copy() self.mongod_options["set_parameters"]["migrationLockAcquisitionMaxWaitMS"] = \ mongod_options["set_parameters"].get("migrationLockAcquisitionMaxWaitMS", 30000) self.preserve_dbpath = preserve_dbpath # Use 'num_shards' and 'num_rs_nodes_per_shard' values from the command line if they exist. num_shards_option = config.NUM_SHARDS self.num_shards = num_shards if not num_shards_option else num_shards_option num_rs_nodes_per_shard_option = config.NUM_REPLSET_NODES self.num_rs_nodes_per_shard = num_rs_nodes_per_shard if not num_rs_nodes_per_shard_option else num_rs_nodes_per_shard_option self.num_mongos = num_mongos self.enable_sharding = utils.default_if_none(enable_sharding, []) self.enable_balancer = enable_balancer self.enable_autosplit = enable_autosplit self.auth_options = auth_options self.configsvr_options = utils.default_if_none(configsvr_options, {}) self.shard_options = utils.default_if_none(shard_options, {}) self.mixed_bin_versions = utils.default_if_none(mixed_bin_versions, config.MIXED_BIN_VERSIONS) if self.mixed_bin_versions is not None and num_rs_nodes_per_shard is not None: num_mongods = self.num_shards * self.num_rs_nodes_per_shard if len(self.mixed_bin_versions) != num_mongods: msg = (("The number of binary versions specified: {} do not match the number of"\ " nodes in the sharded cluster: {}.")).format(len(self.mixed_bin_versions), num_mongods) raise errors.ServerFailure(msg) self._dbpath_prefix = os.path.join(self._dbpath_prefix, config.FIXTURE_SUBDIR) self.configsvr = None self.mongos = [] self.shards = []
def _restart_and_clear_fixture(self): # We restart the fixture after setting 'preserve_dbpath' back to its original value in order # to clear the contents of the data directory if desired. The CleanEveryN hook cannot be # used in combination with the PeriodicKillSecondaries hook because we may attempt to call # Fixture.teardown() while the "rsSyncApplyStop" failpoint is still enabled on the # secondaries, causing them to exit with a non-zero return code. self.logger.info("Finished verifying data consistency, stopping the fixture...") try: self.fixture.teardown() except errors.ServerFailure: raise errors.ServerFailure( "{} did not exit cleanly after verifying data consistency".format(self.fixture)) self.logger.info("Starting the fixture back up again with no data...") self.fixture.setup() self.fixture.await_ready()
def setup(self): """Set up the sharded cluster.""" if "port" not in self.mongos_options: self.mongos_options["port"] = core.network.PortAllocator.next_fixture_port(self.job_num) self.port = self.mongos_options["port"] mongos = core.programs.mongos_program(self.logger, executable=self.mongos_executable, **self.mongos_options) try: self.logger.info("Starting mongos on port %d...\n%s", self.port, mongos.as_command()) mongos.start() self.logger.info("mongos started on port %d with pid %d.", self.port, mongos.pid) except Exception as err: msg = "Failed to start mongos on port {:d}: {}".format(self.port, err) self.logger.exception(msg) raise errors.ServerFailure(msg) self.mongos = mongos
def _add_node_to_repl_set(self, client, repl_config, member_index, members): self.logger.info("Adding in node %d: %s", member_index, members[member_index - 1]) while True: try: # 'newlyAdded' removal reconfigs could bump the version. # Get the current version to be safe. curr_version = client.admin.command({"replSetGetConfig": 1})['config']['version'] repl_config["version"] = curr_version + 1 repl_config["members"] = members[:member_index] self.logger.info("Issuing replSetReconfig command: %s", repl_config) client.admin.command({ "replSetReconfig": repl_config, "maxTimeMS": self.AWAIT_REPL_TIMEOUT_MINS * 60 * 1000 }) break except pymongo.errors.OperationFailure as err: # These error codes may be transient, and so we retry the reconfig with a # (potentially) higher config version. We should not receive these codes # indefinitely. if (err.code != ReplicaSetFixture. _NEW_REPLICA_SET_CONFIGURATION_INCOMPATIBLE and err.code != ReplicaSetFixture._CURRENT_CONFIG_NOT_COMMITTED_YET and err.code != ReplicaSetFixture._CONFIGURATION_IN_PROGRESS and err.code != ReplicaSetFixture._NODE_NOT_FOUND and err.code != ReplicaSetFixture._INTERRUPTED_DUE_TO_REPL_STATE_CHANGE ): msg = ("Operation failure while setting up the " "replica set fixture: {}").format(err) self.logger.error(msg) raise errors.ServerFailure(msg) msg = ("Retrying failed attempt to add new node to fixture: {}" ).format(err) self.logger.error(msg) time.sleep(0.1) # Wait a little bit before trying again.
def before_test(self, test, test_report): if self._start_time is not None: # The "rsSyncApplyStop" failpoint is already enabled. return # Enable the "rsSyncApplyStop" failpoint on each of the secondaries to prevent them from # applying any oplog entries while the test is running. for secondary in self.fixture.get_secondaries(): client = utils.new_mongo_client(port=secondary.port) try: client.admin.command(bson.SON([ ("configureFailPoint", "rsSyncApplyStop"), ("mode", "alwaysOn")])) except pymongo.errors.OperationFailure as err: self.logger.exception( "Unable to disable oplog application on the mongod on port %d", secondary.port) raise errors.ServerFailure( "Unable to disable oplog application on the mongod on port %d: %s" % (secondary.port, err.args[0])) self._start_time = time.time()
def _do_teardown(self, mode=None): self.logger.info("Stopping all members of the replica set...") running_at_start = self.is_running() if not running_at_start: self.logger.info("All members of the replica set were expected to be running, " "but weren't.") teardown_handler = interface.FixtureTeardownHandler(self.logger) if self.initial_sync_node: teardown_handler.teardown(self.initial_sync_node, "initial sync node", mode=mode) # Terminate the secondaries first to reduce noise in the logs. for node in reversed(self.nodes): teardown_handler.teardown(node, "replica set member on port %d" % node.port, mode=mode) if teardown_handler.was_successful(): self.logger.info("Successfully stopped all members of the replica set.") else: self.logger.error("Stopping the replica set fixture failed.") raise errors.ServerFailure(teardown_handler.get_error_message())
def retry_until_wtimeout(self, insert_fn): """ Given a callback function representing an insert operation on the primary, handle any connection failures, and keep retrying the operation for up to 'AWAIT_REPL_TIMEOUT_MINS' minutes. The insert operation callback should take an argument for the number of remaining seconds to provide as the timeout for the operation. """ deadline = time.time() + ReplFixture.AWAIT_REPL_TIMEOUT_MINS * 60 while True: try: remaining = deadline - time.time() insert_fn(remaining) break except pymongo.errors.ConnectionFailure: remaining = deadline - time.time() if remaining <= 0.0: raise errors.ServerFailure("Failed to connect to ".format( self.get_driver_connection_url()))
def _check_secondaries_and_restart_fixture(self): preserve_dbpaths = [] for node in self.fixture.nodes: preserve_dbpaths.append(node.preserve_dbpath) node.preserve_dbpath = True for secondary in self.fixture.get_secondaries(): self._check_invariants_as_standalone(secondary) self.logger.info( "Restarting the secondary on port %d as a replica set node with" " its data files intact...", secondary.port) # Start the 'secondary' mongod back up as part of the replica set and wait for it to # reach state SECONDARY. secondary.setup() self.logger.info(fixture.create_fixture_table(self.fixture)) secondary.await_ready() self._await_secondary_state(secondary) try: secondary.teardown() except errors.ServerFailure: raise errors.ServerFailure( "{} did not exit cleanly after reconciling the end of its oplog" .format(secondary)) self.logger.info( "Starting the fixture back up again with its data files intact for final" " validation...") try: self.fixture.setup() self.logger.info(fixture.create_fixture_table(self.fixture)) self.fixture.await_ready() finally: for (i, node) in enumerate(self.fixture.nodes): node.preserve_dbpath = preserve_dbpaths[i]
def _kill_secondaries(self): for secondary in self.fixture.get_secondaries(): # Disable the "rsSyncApplyStop" failpoint on the secondary to have it resume applying # oplog entries. self._disable_rssyncapplystop(secondary) # Wait a little bit for the secondary to start apply oplog entries so that we are more # likely to kill the mongod process while it is partway into applying a batch. time.sleep(0.1) # Check that the secondary is still running before forcibly terminating it. This ensures # we still detect some cases in which the secondary has already crashed. if not secondary.is_running(): raise errors.ServerFailure( "mongod on port {} was expected to be running in" " PeriodicKillSecondaries.after_test(), but wasn't.".format(secondary.port)) self.hook_test_case.logger.info( "Killing the secondary on port %d...", secondary.port) secondary.mongod.stop(kill=True) # Teardown may or may not be considered a success as a result of killing a secondary, so we # ignore the return value of Fixture.teardown(). self.fixture.teardown()
def _get_recipient_primary(self, split_opts, timeout_secs=None): if timeout_secs is None: timeout_secs = self._shard_split_fixture.AWAIT_REPL_TIMEOUT_MINS * 60 nodes = split_opts.get_recipient_nodes() start = time.time() clients = {} while True: for node in nodes: now = time.time() if (now - start) >= timeout_secs: msg = f"Timed out while waiting for a primary on replica set '{split_opts.recipient_set_name}'." self.logger.error(msg) raise errors.ServerFailure(msg) try: if node.port not in clients: clients[node.port] = self._create_client(node) client = clients[node.port] is_master = client.admin.command("isMaster")["ismaster"] if is_master: return node except pymongo.errors.ConnectionFailure: continue
def _check_thread(self): """Throw an error if the thread is not running.""" if not self.is_alive(): msg = "Tenant migration thread is not running." self.logger.error(msg) raise errors.ServerFailure(msg)
def __init__( # pylint: disable=too-many-arguments, too-many-locals self, logger, job_num, mongod_executable=None, mongod_options=None, dbpath_prefix=None, preserve_dbpath=False, num_nodes=2, start_initial_sync_node=False, write_concern_majority_journal_default=None, auth_options=None, replset_config_options=None, voting_secondaries=True, all_nodes_electable=False, use_replica_set_connection_string=None, linear_chain=False, mixed_bin_versions=None, default_read_concern=None, default_write_concern=None, shard_logging_prefix=None, replicaset_logging_prefix=None): """Initialize ReplicaSetFixture.""" interface.ReplFixture.__init__(self, logger, job_num, dbpath_prefix=dbpath_prefix) self.mongod_executable = mongod_executable self.mongod_options = make_historic( utils.default_if_none(mongod_options, {})) self.preserve_dbpath = preserve_dbpath self.start_initial_sync_node = start_initial_sync_node self.write_concern_majority_journal_default = write_concern_majority_journal_default self.auth_options = auth_options self.replset_config_options = make_historic( utils.default_if_none(replset_config_options, {})) self.voting_secondaries = voting_secondaries self.all_nodes_electable = all_nodes_electable self.use_replica_set_connection_string = use_replica_set_connection_string self.default_read_concern = default_read_concern self.default_write_concern = default_write_concern self.mixed_bin_versions = utils.default_if_none( mixed_bin_versions, config.MIXED_BIN_VERSIONS) self.mixed_bin_versions_config = self.mixed_bin_versions self.shard_logging_prefix = shard_logging_prefix self.replicaset_logging_prefix = replicaset_logging_prefix # Use the values given from the command line if they exist for linear_chain and num_nodes. linear_chain_option = utils.default_if_none(config.LINEAR_CHAIN, linear_chain) self.linear_chain = linear_chain_option if linear_chain_option else linear_chain num_replset_nodes = config.NUM_REPLSET_NODES self.num_nodes = num_replset_nodes if num_replset_nodes else num_nodes if self.mixed_bin_versions is not None: mongod_executable = utils.default_if_none( self.mongod_executable, config.MONGOD_EXECUTABLE, config.DEFAULT_MONGOD_EXECUTABLE) latest_mongod = mongod_executable # The last-lts binary is currently expected to live in '/data/multiversion', which is # part of the PATH. is_config_svr = "configsvr" in self.replset_config_options and self.replset_config_options[ "configsvr"] if not is_config_svr: self.mixed_bin_versions = [ latest_mongod if (x == "new") else LAST_LTS_MONGOD_BINARY for x in self.mixed_bin_versions ] else: # Our documented recommended path for upgrading shards lets us assume that config # server nodes will always be fully upgraded before shard nodes. self.mixed_bin_versions = [latest_mongod, latest_mongod] num_versions = len(self.mixed_bin_versions) if num_versions != self.num_nodes and not is_config_svr: msg = (("The number of binary versions specified: {} do not match the number of"\ " nodes in the replica set: {}.")).format(num_versions, self.num_nodes) raise errors.ServerFailure(msg) # By default, we only use a replica set connection string if all nodes are capable of being # elected primary. if self.use_replica_set_connection_string is None: self.use_replica_set_connection_string = self.all_nodes_electable if self.default_write_concern is True: self.default_write_concern = make_historic({ "w": "majority", # Use a "signature" value that won't typically match a value assigned in normal use. # This way the wtimeout set by this override is distinguishable in the server logs. "wtimeout": 5 * 60 * 1000 + 321, # 300321ms }) # Set the default oplogSize to 511MB. self.mongod_options.setdefault("oplogSize", 511) # The dbpath in mongod_options is used as the dbpath prefix for replica set members and # takes precedence over other settings. The ShardedClusterFixture uses this parameter to # create replica sets and assign their dbpath structure explicitly. if "dbpath" in self.mongod_options: self._dbpath_prefix = self.mongod_options.pop("dbpath") else: self._dbpath_prefix = os.path.join(self._dbpath_prefix, config.FIXTURE_SUBDIR) self.nodes = [] self.replset_name = None self.initial_sync_node = None self.initial_sync_node_idx = -1
def setup(self): # pylint: disable=too-many-branches,too-many-statements,too-many-locals """Set up the replica set.""" self.replset_name = self.mongod_options.get("replSet", "rs") if not self.nodes: for i in range(self.num_nodes): node = self._new_mongod(i, self.replset_name) self.nodes.append(node) for i in range(self.num_nodes): steady_state_constraint_param = "oplogApplicationEnforcesSteadyStateConstraints" # TODO (SERVER-52985): Set steady state constraint parameters on last-lts nodes. if (steady_state_constraint_param not in self.nodes[i].mongod_options["set_parameters"] and self.mixed_bin_versions is not None and self.mixed_bin_versions[i] == "new"): self.nodes[i].mongod_options["set_parameters"][ steady_state_constraint_param] = True if self.linear_chain and i > 0: self.nodes[i].mongod_options["set_parameters"][ "failpoint.forceSyncSourceCandidate"] = make_historic({ "mode": "alwaysOn", "data": { "hostAndPort": self.nodes[i - 1].get_internal_connection_string() } }) self.nodes[i].setup() if self.start_initial_sync_node: if not self.initial_sync_node: self.initial_sync_node_idx = len(self.nodes) self.initial_sync_node = self._new_mongod( self.initial_sync_node_idx, self.replset_name) self.initial_sync_node.setup() self.initial_sync_node.await_ready() if self.mixed_bin_versions: for i in range(self.num_nodes): if self.nodes[i].mongod_executable != self.mixed_bin_versions[ i]: msg = ( f"Executable of node{i}: {self.nodes[i].mongod_executable} does not " f"match the executable assigned by mixedBinVersions: " f"{self.mixed_bin_versions[i]}.") raise errors.ServerFailure(msg) # We need only to wait to connect to the first node of the replica set because we first # initiate it as a single node replica set. self.nodes[0].await_ready() # Initiate the replica set. members = [] for (i, node) in enumerate(self.nodes): member_info = { "_id": i, "host": node.get_internal_connection_string() } if i > 0: if not self.all_nodes_electable: member_info["priority"] = 0 if i >= 7 or not self.voting_secondaries: # Only 7 nodes in a replica set can vote, so the other members must still be # non-voting when this fixture is configured to have voting secondaries. member_info["votes"] = 0 members.append(member_info) if self.initial_sync_node: members.append({ "_id": self.initial_sync_node_idx, "host": self.initial_sync_node.get_internal_connection_string(), "priority": 0, "hidden": 1, "votes": 0 }) repl_config = {"_id": self.replset_name, "protocolVersion": 1} client = self.nodes[0].mongo_client() self.auth(client, self.auth_options) if client.local.system.replset.count(): # Skip initializing the replset if there is an existing configuration. return if self.write_concern_majority_journal_default is not None: repl_config[ "writeConcernMajorityJournalDefault"] = self.write_concern_majority_journal_default else: server_status = client.admin.command({"serverStatus": 1}) cmd_line_opts = client.admin.command({"getCmdLineOpts": 1}) if not (server_status["storageEngine"]["persistent"] and cmd_line_opts["parsed"].get("storage", {}).get( "journal", {}).get("enabled", True)): repl_config["writeConcernMajorityJournalDefault"] = False if self.replset_config_options.get("configsvr", False): repl_config["configsvr"] = True if self.replset_config_options.get("settings"): replset_settings = self.replset_config_options["settings"] repl_config["settings"] = replset_settings # Increase the election timeout to 24 hours to prevent spurious elections. repl_config.setdefault("settings", {}) if "electionTimeoutMillis" not in repl_config["settings"]: repl_config["settings"][ "electionTimeoutMillis"] = 24 * 60 * 60 * 1000 # Start up a single node replica set then reconfigure to the correct size (if the config # contains more than 1 node), so the primary is elected more quickly. repl_config["members"] = [members[0]] self.logger.info("Issuing replSetInitiate command: %s", repl_config) self._initiate_repl_set(client, repl_config) self._await_primary() if self.mixed_bin_versions is not None: if self.mixed_bin_versions[0] == "new": fcv_response = client.admin.command({ "getParameter": 1, "featureCompatibilityVersion": 1 }) fcv = fcv_response["featureCompatibilityVersion"]["version"] if fcv != ReplicaSetFixture._LATEST_FCV: msg = (("Server returned FCV{} when we expected FCV{}." ).format(fcv, ReplicaSetFixture._LATEST_FCV)) raise errors.ServerFailure(msg) # Initiating a replica set with a single node will use "latest" FCV. This will # cause IncompatibleServerVersion errors if additional "last-lts" binary version # nodes are subsequently added to the set, since such nodes cannot set their FCV to # "latest". Therefore, we make sure the primary is "last-lts" FCV before adding in # nodes of different binary versions to the replica set. client.admin.command({ "setFeatureCompatibilityVersion": ReplicaSetFixture._LAST_LTS_FCV }) if self.nodes[1:]: # Wait to connect to each of the secondaries before running the replSetReconfig # command. for node in self.nodes[1:]: node.await_ready() # Add in the members one at a time, since non force reconfigs can only add/remove a # single voting member at a time. for ind in range(2, len(members) + 1): self._add_node_to_repl_set(client, repl_config, ind, members) self._await_secondaries() self._await_newly_added_removals()
def _check_invariants_as_standalone(self, secondary): # We remove the --replSet option in order to start the node as a standalone. replset_name = secondary.mongod_options.pop("replSet") try: secondary.setup() secondary.await_ready() client = secondary.mongo_client() minvalid_doc = client.local["replset.minvalid"].find_one() oplog_truncate_after_doc = client.local[ "replset.oplogTruncateAfterPoint"].find_one() checkpoint_timestamp_doc = client.local[ "replset.checkpointTimestamp"].find_one() latest_oplog_doc = client.local["oplog.rs"].find_one( sort=[("$natural", pymongo.DESCENDING)]) null_ts = bson.Timestamp(0, 0) # The oplog could be empty during initial sync. If so, we default it to null. latest_oplog_entry_ts = null_ts if latest_oplog_doc is not None: latest_oplog_entry_ts = latest_oplog_doc.get("ts") if latest_oplog_entry_ts is None: raise errors.ServerFailure( "Latest oplog entry had no 'ts' field: {}".format( latest_oplog_doc)) # The "oplogTruncateAfterPoint" document may not exist at startup. If so, we default # it to null. oplog_truncate_after_ts = null_ts if oplog_truncate_after_doc is not None: oplog_truncate_after_ts = oplog_truncate_after_doc.get( "oplogTruncateAfterPoint", null_ts) # The "checkpointTimestamp" document may not exist at startup. If so, we default # it to null. checkpoint_timestamp = null_ts if checkpoint_timestamp_doc is not None: checkpoint_timestamp = checkpoint_timestamp_doc.get( "checkpointTimestamp") if checkpoint_timestamp is None: raise errors.ServerFailure( "Checkpoint timestamp document had no 'checkpointTimestamp'" "field: {}".format(checkpoint_timestamp_doc)) # checkpointTimestamp <= top of oplog # If the oplog is empty, the checkpoint timestamp should also be null. if not checkpoint_timestamp <= latest_oplog_entry_ts: raise errors.ServerFailure( "The condition checkpointTimestamp <= top of oplog ({} <= {}) doesn't hold:" " checkpointTimestamp document={}, latest oplog entry={}". format(checkpoint_timestamp, latest_oplog_entry_ts, checkpoint_timestamp_doc, latest_oplog_doc)) if minvalid_doc is not None: applied_through_ts = minvalid_doc.get("begin", {}).get("ts", null_ts) minvalid_ts = minvalid_doc.get("ts", null_ts) # The "appliedThrough" value should always equal the "checkpointTimestamp". # The writes to "appliedThrough" are given the timestamp of the end of the batch, # and batch boundaries are the only valid timestamps in which we could take # checkpoints, so if you see a non-null applied through in a stable checkpoint it # must be at the same timestamp as the checkpoint. if (checkpoint_timestamp != null_ts and applied_through_ts != null_ts and (not checkpoint_timestamp == applied_through_ts)): raise errors.ServerFailure( "The condition checkpointTimestamp ({}) == appliedThrough ({})" " doesn't hold: minValid document={}," " checkpointTimestamp document={}, last oplog entry={}" .format(checkpoint_timestamp, applied_through_ts, minvalid_doc, checkpoint_timestamp_doc, latest_oplog_doc)) if applied_through_ts == null_ts: # We clear "appliedThrough" to represent having applied through the top of the # oplog in PRIMARY state or immediately after "rollback via refetch". # If we are using a storage engine that supports "recover to a checkpoint," # then we will have a "checkpointTimestamp" and we should use that as our # "appliedThrough" (similarly to why we assert their equality above). # If both are null, then we are in PRIMARY state on a storage engine that does # not support "recover to a checkpoint" or in RECOVERING immediately after # "rollback via refetch". Since we do not update "minValid" in PRIMARY state, # we leave "appliedThrough" as null so that the invariants below hold, rather # than substituting the latest oplog entry for the "appliedThrough" value. applied_through_ts = checkpoint_timestamp if minvalid_ts == null_ts: # The server treats the "ts" field in the minValid document as missing when its # value is the null timestamp. minvalid_ts = applied_through_ts if latest_oplog_entry_ts == null_ts: # If the oplog is empty, we treat the "minValid" as the latest oplog entry. latest_oplog_entry_ts = minvalid_ts if oplog_truncate_after_ts == null_ts: # The server treats the "oplogTruncateAfterPoint" field as missing when its # value is the null timestamp. When it is null, the oplog is complete and # should not be truncated, so it is effectively the top of the oplog. oplog_truncate_after_ts = latest_oplog_entry_ts # Check the ordering invariants before the secondary has reconciled the end of # its oplog. # The "oplogTruncateAfterPoint" is set to the first timestamp of each batch of # oplog entries before they are written to the oplog. Thus, it can be ahead # of the top of the oplog before any oplog entries are written, and behind it # after some are written. Thus, we cannot compare it to the top of the oplog. # appliedThrough <= minValid # appliedThrough represents the end of the previous batch, so it is always the # earliest. if not applied_through_ts <= minvalid_ts: raise errors.ServerFailure( "The condition appliedThrough <= minValid ({} <= {}) doesn't hold: minValid" " document={}, latest oplog entry={}".format( applied_through_ts, minvalid_ts, minvalid_doc, latest_oplog_doc)) # minValid <= oplogTruncateAfterPoint # This is true because this hook is never run after a rollback. Thus, we only # move "minValid" to the end of each batch after the batch is written to the oplog. # We reset the "oplogTruncateAfterPoint" to null before we move "minValid" from # the end of the previous batch to the end of the current batch. Thus "minValid" # must be less than or equal to the "oplogTruncateAfterPoint". if not minvalid_ts <= oplog_truncate_after_ts: raise errors.ServerFailure( "The condition minValid <= oplogTruncateAfterPoint ({} <= {}) doesn't" " hold: minValid document={}, oplogTruncateAfterPoint document={}," " latest oplog entry={}".format( minvalid_ts, oplog_truncate_after_ts, minvalid_doc, oplog_truncate_after_doc, latest_oplog_doc)) # minvalid <= latest oplog entry # "minValid" is set to the end of a batch after the batch is written to the oplog. # Thus it is always less than or equal to the top of the oplog. if not minvalid_ts <= latest_oplog_entry_ts: raise errors.ServerFailure( "The condition minValid <= top of oplog ({} <= {}) doesn't" " hold: minValid document={}, latest oplog entry={}". format(minvalid_ts, latest_oplog_entry_ts, minvalid_doc, latest_oplog_doc)) try: secondary.teardown() except errors.ServerFailure: raise errors.ServerFailure( "{} did not exit cleanly after being started up as a standalone" .format(secondary)) except pymongo.errors.OperationFailure as err: self.logger.exception( "Failed to read the minValid document, the oplogTruncateAfterPoint document," " the checkpointTimestamp document, or the latest oplog entry from the mongod on" " port %d", secondary.port) raise errors.ServerFailure( "Failed to read the minValid document, the oplogTruncateAfterPoint document," " the checkpointTimestamp document, or the latest oplog entry from the mongod on" " port {}: {}".format(secondary.port, err.args[0])) finally: # Set the secondary's options back to their original values. secondary.mongod_options["replSet"] = replset_name
def _check_invariants_as_standalone(self, secondary): # pylint: disable=too-many-branches # We remove the --replSet option in order to start the node as a standalone. replset_name = secondary.mongod_options.pop("replSet") try: secondary.setup() secondary.await_ready() client = secondary.mongo_client() minvalid_doc = client.local["replset.minvalid"].find_one() oplog_truncate_after_doc = client.local[ "replset.oplogTruncateAfterPoint"].find_one() self.logger.info("minValid: {}, oTAP: {}".format( minvalid_doc, oplog_truncate_after_doc)) latest_oplog_doc = client.local["oplog.rs"].find_one( sort=[("$natural", pymongo.DESCENDING)]) null_ts = bson.Timestamp(0, 0) # The oplog could be empty during initial sync. If so, we default it to null. latest_oplog_entry_ts = null_ts if latest_oplog_doc is not None: latest_oplog_entry_ts = latest_oplog_doc.get("ts") if latest_oplog_entry_ts is None: raise errors.ServerFailure( "Latest oplog entry had no 'ts' field: {}".format( latest_oplog_doc)) # The "oplogTruncateAfterPoint" document may not exist at startup. If so, we default # it to null. oplog_truncate_after_ts = null_ts if oplog_truncate_after_doc is not None: oplog_truncate_after_ts = oplog_truncate_after_doc.get( "oplogTruncateAfterPoint", null_ts) if minvalid_doc is not None: applied_through_ts = minvalid_doc.get("begin", {}).get("ts", null_ts) minvalid_ts = minvalid_doc.get("ts", null_ts) if minvalid_ts == null_ts: # The server treats the "ts" field in the minValid document as missing when its # value is the null timestamp. minvalid_ts = applied_through_ts if latest_oplog_entry_ts == null_ts: # If the oplog is empty, we treat the "minValid" as the latest oplog entry. latest_oplog_entry_ts = minvalid_ts if oplog_truncate_after_ts == null_ts: # The server treats the "oplogTruncateAfterPoint" field as missing when its # value is the null timestamp. When it is null, the oplog is complete and # should not be truncated, so it is effectively the top of the oplog. oplog_truncate_after_ts = latest_oplog_entry_ts # Check the ordering invariants before the secondary has reconciled the end of # its oplog. # The "oplogTruncateAfterPoint" is set to the first timestamp of each batch of # oplog entries before they are written to the oplog. Thus, it can be ahead # of the top of the oplog before any oplog entries are written, and behind it # after some are written. Thus, we cannot compare it to the top of the oplog. # appliedThrough <= minValid # appliedThrough represents the end of the previous batch, so it is always the # earliest. if not applied_through_ts <= minvalid_ts: raise errors.ServerFailure( "The condition appliedThrough <= minValid ({} <= {}) doesn't hold: minValid" " document={}, latest oplog entry={}".format( applied_through_ts, minvalid_ts, minvalid_doc, latest_oplog_doc)) # minValid <= oplogTruncateAfterPoint # This is true because this hook is never run after a rollback. Thus, we only # move "minValid" to the end of each batch after the batch is written to the oplog. # We reset the "oplogTruncateAfterPoint" to null before we move "minValid" from # the end of the previous batch to the end of the current batch. Thus "minValid" # must be less than or equal to the "oplogTruncateAfterPoint". if not minvalid_ts <= oplog_truncate_after_ts: raise errors.ServerFailure( "The condition minValid <= oplogTruncateAfterPoint ({} <= {}) doesn't" " hold: minValid document={}, oplogTruncateAfterPoint document={}," " latest oplog entry={}".format( minvalid_ts, oplog_truncate_after_ts, minvalid_doc, oplog_truncate_after_doc, latest_oplog_doc)) # minvalid <= latest oplog entry # "minValid" is set to the end of a batch after the batch is written to the oplog. # Thus it is always less than or equal to the top of the oplog. if not minvalid_ts <= latest_oplog_entry_ts: raise errors.ServerFailure( "The condition minValid <= top of oplog ({} <= {}) doesn't" " hold: minValid document={}, latest oplog entry={}". format(minvalid_ts, latest_oplog_entry_ts, minvalid_doc, latest_oplog_doc)) try: secondary.teardown() except errors.ServerFailure: raise errors.ServerFailure( "{} did not exit cleanly after being started up as a standalone" .format(secondary)) except pymongo.errors.OperationFailure as err: self.logger.exception( "Failed to read the minValid document, the oplogTruncateAfterPoint document," " or the latest oplog entry from the mongod on port %d", secondary.port) raise errors.ServerFailure( "Failed to read the minValid document, the oplogTruncateAfterPoint document," " or the latest oplog entry from the mongod on" " port {}: {}".format(secondary.port, err.args[0])) finally: # Set the secondary's options back to their original values. secondary.mongod_options["replSet"] = replset_name
def _check_thread(self): if not self._stepdown_thread.is_alive(): msg = "The stepdown thread is not running." self.logger.error(msg) raise errors.ServerFailure(msg)
def _check_invariants_as_standalone(self, secondary): # pylint: disable=too-many-locals # pylint: disable=too-many-branches,too-many-statements # We remove the --replSet option in order to start the node as a standalone. replset_name = secondary.mongod_options.pop("replSet") self.logger.info( "Restarting the secondary on port %d as a standalone node with" " its data files intact...", secondary.port) try: secondary.setup() secondary.await_ready() client = secondary.mongo_client() minvalid_doc = client.local["replset.minvalid"].find_one() oplog_truncate_after_doc = client.local["replset.oplogTruncateAfterPoint"].find_one() recovery_timestamp_res = client.admin.command("replSetTest", getLastStableRecoveryTimestamp=True) latest_oplog_doc = client.local["oplog.rs"].find_one(sort=[("$natural", pymongo.DESCENDING)]) self.logger.info("Checking invariants: minValid: {}, oplogTruncateAfterPoint: {}," " stable recovery timestamp: {}, latest oplog doc: {}".format( minvalid_doc, oplog_truncate_after_doc, recovery_timestamp_res, latest_oplog_doc)) null_ts = bson.Timestamp(0, 0) # We wait for a stable recovery timestamp at setup, so we must have an oplog. latest_oplog_entry_ts = null_ts if latest_oplog_doc is None: raise errors.ServerFailure("No latest oplog entry") latest_oplog_entry_ts = latest_oplog_doc.get("ts") if latest_oplog_entry_ts is None: raise errors.ServerFailure( "Latest oplog entry had no 'ts' field: {}".format(latest_oplog_doc)) # The "oplogTruncateAfterPoint" document may not exist at startup. If so, we default # it to null. oplog_truncate_after_ts = null_ts if oplog_truncate_after_doc is not None: oplog_truncate_after_ts = oplog_truncate_after_doc.get( "oplogTruncateAfterPoint", null_ts) # The "lastStableRecoveryTimestamp" field is present if the storage engine supports # "recover to a timestamp". If it's a null timestamp on a durable storage engine, that # means we do not yet have a stable checkpoint timestamp and must be restarting at the # top of the oplog. Since we wait for a stable recovery timestamp at test fixture setup, # we should never encounter a null timestamp here. recovery_timestamp = recovery_timestamp_res.get("lastStableRecoveryTimestamp") if recovery_timestamp == null_ts: raise errors.ServerFailure( "Received null stable recovery timestamp {}".format(recovery_timestamp_res)) # On a storage engine that doesn't support "recover to a timestamp", we default to null. if recovery_timestamp is None: recovery_timestamp = null_ts # last stable recovery timestamp <= top of oplog if not recovery_timestamp <= latest_oplog_entry_ts: raise errors.ServerFailure("The condition last stable recovery timestamp <= top" " of oplog ({} <= {}) doesn't hold:" " getLastStableRecoveryTimestamp result={}," " latest oplog entry={}".format( recovery_timestamp, latest_oplog_entry_ts, recovery_timestamp_res, latest_oplog_doc)) if minvalid_doc is not None: applied_through_ts = minvalid_doc.get("begin", {}).get("ts", null_ts) minvalid_ts = minvalid_doc.get("ts", null_ts) # The "appliedThrough" value should always equal the "last stable recovery # timestamp", AKA the stable checkpoint for durable engines, on server restart. # # The written "appliedThrough" time is updated with the latest timestamp at the end # of each batch application, and batch boundaries are the only valid stable # timestamps on secondaries. Therefore, a non-null appliedThrough timestamp must # equal the checkpoint timestamp, because any stable timestamp that the checkpoint # could use includes an equal persisted appliedThrough timestamp. if (recovery_timestamp != null_ts and applied_through_ts != null_ts and (not recovery_timestamp == applied_through_ts)): raise errors.ServerFailure( "The condition last stable recovery timestamp ({}) == appliedThrough ({})" " doesn't hold: minValid document={}," " getLastStableRecoveryTimestamp result={}, last oplog entry={}".format( recovery_timestamp, applied_through_ts, minvalid_doc, recovery_timestamp_res, latest_oplog_doc)) if applied_through_ts == null_ts: # We clear "appliedThrough" to represent having applied through the top of the # oplog in PRIMARY state or immediately after "rollback via refetch". # If we are using a storage engine that supports "recover to a timestamp," # then we will have a "last stable recovery timestamp" and we should use that # as our "appliedThrough" (similarly to why we assert their equality above). # If both are null, then we are in PRIMARY state on a storage engine that does # not support "recover to a timestamp" or in RECOVERING immediately after # "rollback via refetch". Since we do not update "minValid" in PRIMARY state, # we leave "appliedThrough" as null so that the invariants below hold, rather # than substituting the latest oplog entry for the "appliedThrough" value. applied_through_ts = recovery_timestamp if minvalid_ts == null_ts: # The server treats the "ts" field in the minValid document as missing when its # value is the null timestamp. minvalid_ts = applied_through_ts if latest_oplog_entry_ts == null_ts: # If the oplog is empty, we treat the "minValid" as the latest oplog entry. latest_oplog_entry_ts = minvalid_ts if oplog_truncate_after_ts == null_ts: # The server treats the "oplogTruncateAfterPoint" field as missing when its # value is the null timestamp. When it is null, the oplog is complete and # should not be truncated, so it is effectively the top of the oplog. oplog_truncate_after_ts = latest_oplog_entry_ts # Check the ordering invariants before the secondary has reconciled the end of # its oplog. # The "oplogTruncateAfterPoint" is set to the first timestamp of each batch of # oplog entries before they are written to the oplog. Thus, it can be ahead # of the top of the oplog before any oplog entries are written, and behind it # after some are written. Thus, we cannot compare it to the top of the oplog. # appliedThrough <= minValid # appliedThrough represents the end of the previous batch, so it is always the # earliest. if applied_through_ts > minvalid_ts: raise errors.ServerFailure( "The condition appliedThrough <= minValid ({} <= {}) doesn't hold: minValid" " document={}, latest oplog entry={}".format( applied_through_ts, minvalid_ts, minvalid_doc, latest_oplog_doc)) # minValid <= oplogTruncateAfterPoint # This is true because this hook is never run after a rollback. Thus, we only # move "minValid" to the end of each batch after the batch is written to the oplog. # We reset the "oplogTruncateAfterPoint" to null before we move "minValid" from # the end of the previous batch to the end of the current batch. Thus "minValid" # must be less than or equal to the "oplogTruncateAfterPoint". if minvalid_ts > oplog_truncate_after_ts: raise errors.ServerFailure( "The condition minValid <= oplogTruncateAfterPoint ({} <= {}) doesn't" " hold: minValid document={}, oplogTruncateAfterPoint document={}," " latest oplog entry={}".format(minvalid_ts, oplog_truncate_after_ts, minvalid_doc, oplog_truncate_after_doc, latest_oplog_doc)) # minvalid <= latest oplog entry # "minValid" is set to the end of a batch after the batch is written to the oplog. # Thus it is always less than or equal to the top of the oplog. if minvalid_ts > latest_oplog_entry_ts: raise errors.ServerFailure( "The condition minValid <= top of oplog ({} <= {}) doesn't" " hold: minValid document={}, latest oplog entry={}".format( minvalid_ts, latest_oplog_entry_ts, minvalid_doc, latest_oplog_doc)) try: secondary.teardown() except errors.ServerFailure: raise errors.ServerFailure( "{} did not exit cleanly after being started up as a standalone".format( secondary)) except pymongo.errors.OperationFailure as err: self.logger.exception( "Failed to read the minValid document, the oplogTruncateAfterPoint document," " the last stable recovery timestamp, or the latest oplog entry from the" " mongod on port %d", secondary.port) raise errors.ServerFailure( "Failed to read the minValid document, the oplogTruncateAfterPoint document," " the last stable recovery timestamp, or the latest oplog entry from the" " mongod on port {}: {}".format(secondary.port, err.args[0])) finally: # Set the secondary's options back to their original values. secondary.mongod_options["replSet"] = replset_name