def retry_until_wtimeout(self, insert_fn): """Retry until wtimeout reached. Given a callback function representing an insert operation on the primary, handle any connection failures, and keep retrying the operation for up to 'AWAIT_REPL_TIMEOUT_MINS' minutes. The insert operation callback should take an argument for the number of remaining seconds to provide as the timeout for the operation. """ deadline = time.time() + ReplFixture.AWAIT_REPL_TIMEOUT_MINS * 60 while True: try: remaining = deadline - time.time() insert_fn(remaining) break except pymonger.errors.ConnectionFailure: remaining = deadline - time.time() if remaining <= 0.0: message = "Failed to connect to {} within {} minutes".format( self.get_driver_connection_url(), ReplFixture.AWAIT_REPL_TIMEOUT_MINS) self.logger.error(message) raise errors.ServerFailure(message) except pymonger.errors.WTimeoutError: message = "Replication of write operation timed out." self.logger.error(message) raise errors.ServerFailure(message) except pymonger.errors.PyMongerError as err: message = "Write operation on {} failed: {}".format( self.get_driver_connection_url(), err) raise errors.ServerFailure(message)
def _do_teardown(self): if self.mongers is None: self.logger.warning("The mongers fixture has not been set up yet.") return # Teardown is still a success even if nothing is running. self.logger.info("Stopping mongers on port %d with pid %d...", self.port, self.mongers.pid) if not self.is_running(): exit_code = self.mongers.poll() msg = ( "mongers on port {:d} was expected to be running, but wasn't. " "Process exited with code {:d}").format(self.port, exit_code) self.logger.warning(msg) raise errors.ServerFailure(msg) self.mongers.stop() exit_code = self.mongers.wait() if exit_code == 0: self.logger.info( "Successfully stopped the mongers on port {:d}".format( self.port)) else: self.logger.warning("Stopped the mongers on port {:d}. " "Process exited with code {:d}.".format( self.port, exit_code)) raise errors.ServerFailure( "mongers on port {:d} with pid {:d} exited with code {:d}". format(self.port, self.mongers.pid, exit_code))
def await_ready(self): """Block until the fixture can be used for testing.""" deadline = time.time() + MongerDFixture.AWAIT_READY_TIMEOUT_SECS # Wait until the mongerd is accepting connections. The retry logic is necessary to support # versions of PyMonger <3.0 that immediately raise a ConnectionFailure if a connection cannot # be established. while True: # Check whether the mongerd exited for some reason. exit_code = self.mongerd.poll() if exit_code is not None: raise errors.ServerFailure( "Could not connect to mongerd on port {}, process ended" " unexpectedly with code {}.".format(self.port, exit_code)) try: # Use a shorter connection timeout to more closely satisfy the requested deadline. client = self.monger_client(timeout_millis=500) client.admin.command("ping") break except pymonger.errors.ConnectionFailure: remaining = deadline - time.time() if remaining <= 0.0: raise errors.ServerFailure( "Failed to connect to mongerd on port {} after {} seconds" .format(self.port, MongerDFixture.AWAIT_READY_TIMEOUT_SECS)) self.logger.info("Waiting to connect to mongerd on port %d.", self.port) time.sleep(0.1) # Wait a little bit before trying again. self.logger.info("Successfully contacted the mongerd on port %d.", self.port)
def setup(self): """Set up the sharded cluster.""" if "port" not in self.mongers_options: self.mongers_options[ "port"] = core.network.PortAllocator.next_fixture_port( self.job_num) self.port = self.mongers_options["port"] mongers = core.programs.mongers_program( self.logger, executable=self.mongers_executable, **self.mongers_options) try: self.logger.info("Starting mongers on port %d...\n%s", self.port, mongers.as_command()) mongers.start() self.logger.info("mongers started on port %d with pid %d.", self.port, mongers.pid) except Exception as err: msg = "Failed to start mongers on port {:d}: {}".format( self.port, err) self.logger.exception(msg) raise errors.ServerFailure(msg) self.mongers = mongers
def _do_teardown(self): """Shut down the sharded cluster.""" self.logger.info("Stopping all members of the sharded cluster...") running_at_start = self.is_running() if not running_at_start: self.logger.warning( "All members of the sharded cluster were expected to be running, " "but weren't.") if self.enable_balancer: self.stop_balancer() teardown_handler = interface.FixtureTeardownHandler(self.logger) if self.configsvr is not None: teardown_handler.teardown(self.configsvr, "config server") for mongers in self.mongers: teardown_handler.teardown(mongers, "mongers") for shard in self.shards: teardown_handler.teardown(shard, "shard") if teardown_handler.was_successful(): self.logger.info( "Successfully stopped all members of the sharded cluster.") else: self.logger.error("Stopping the sharded cluster fixture failed.") raise errors.ServerFailure(teardown_handler.get_error_message())
def _check_secondaries_and_restart_fixture(self): preserve_dbpaths = [] for node in self.fixture.nodes: preserve_dbpaths.append(node.preserve_dbpath) node.preserve_dbpath = True for secondary in self.fixture.get_secondaries(): self._check_invariants_as_standalone(secondary) self.logger.info( "Restarting the secondary on port %d as a replica set node with" " its data files intact...", secondary.port) # Start the 'secondary' mongerd back up as part of the replica set and wait for it to # reach state SECONDARY. secondary.setup() secondary.await_ready() self._await_secondary_state(secondary) try: secondary.teardown() except errors.ServerFailure: raise errors.ServerFailure( "{} did not exit cleanly after reconciling the end of its oplog".format( secondary)) self.logger.info("Starting the fixture back up again with its data files intact for final" " validation...") try: self.fixture.setup() self.fixture.await_ready() finally: for (i, node) in enumerate(self.fixture.nodes): node.preserve_dbpath = preserve_dbpaths[i]
def _kill_secondaries(self): for secondary in self.fixture.get_secondaries(): # Disable the "rsSyncApplyStop" failpoint on the secondary to have it resume applying # oplog entries. self._hook._disable_rssyncapplystop(secondary) # pylint: disable=protected-access # Wait a little bit for the secondary to start apply oplog entries so that we are more # likely to kill the mongerd process while it is partway into applying a batch. time.sleep(0.1) # Check that the secondary is still running before forcibly terminating it. This ensures # we still detect some cases in which the secondary has already crashed. if not secondary.is_running(): raise errors.ServerFailure( "mongerd on port {} was expected to be running in" " PeriodicKillSecondaries.after_test(), but wasn't.".format(secondary.port)) self.logger.info("Killing the secondary on port %d...", secondary.port) secondary.mongerd.stop(kill=True) try: self.fixture.teardown() except errors.ServerFailure: # Teardown may or may not be considered a success as a result of killing a secondary, # so we ignore ServerFailure raised during teardown. pass
def setup(self): """Set up the mongerd.""" if not self.preserve_dbpath and os.path.lexists(self._dbpath): utils.rmtree(self._dbpath, ignore_errors=False) try: os.makedirs(self._dbpath) except os.error: # Directory already exists. pass if "port" not in self.mongerd_options: self.mongerd_options[ "port"] = core.network.PortAllocator.next_fixture_port( self.job_num) self.port = self.mongerd_options["port"] mongerd = core.programs.mongerd_program( self.logger, executable=self.mongerd_executable, **self.mongerd_options) try: self.logger.info("Starting mongerd on port %d...\n%s", self.port, mongerd.as_command()) mongerd.start() self.logger.info("mongerd started on port %d with pid %d.", self.port, mongerd.pid) except Exception as err: msg = "Failed to start mongerd on port {:d}: {}".format( self.port, err) self.logger.exception(msg) raise errors.ServerFailure(msg) self.mongerd = mongerd
def _enable_rssyncapplystop(self, secondary): # Enable the "rsSyncApplyStop" failpoint on the secondary to prevent them from # applying any oplog entries while the test is running. client = secondary.monger_client() try: client.admin.command( bson.SON([("configureFailPoint", "rsSyncApplyStop"), ("mode", "alwaysOn")])) except pymonger.errors.OperationFailure as err: self.logger.exception("Unable to disable oplog application on the mongerd on port %d", secondary.port) raise errors.ServerFailure( "Unable to disable oplog application on the mongerd on port {}: {}".format( secondary.port, err.args[0]))
def _disable_rssyncapplystop(self, secondary): # Disable the "rsSyncApplyStop" failpoint on the secondary to have it resume applying # oplog entries. client = secondary.monger_client() try: client.admin.command( bson.SON([("configureFailPoint", "rsSyncApplyStop"), ("mode", "off")])) except pymonger.errors.OperationFailure as err: self.logger.exception("Unable to re-enable oplog application on the mongerd on port %d", secondary.port) raise errors.ServerFailure( "Unable to re-enable oplog application on the mongerd on port {}: {}".format( secondary.port, err.args[0]))
def _await_secondary_state(self, secondary): client = secondary.monger_client() try: client.admin.command( bson.SON([ ("replSetTest", 1), ("waitForMemberState", 2), # 2 = SECONDARY ("timeoutMillis", fixture.ReplFixture.AWAIT_REPL_TIMEOUT_MINS * 60 * 1000) ])) except pymonger.errors.OperationFailure as err: self.logger.exception( "mongerd on port %d failed to reach state SECONDARY after %d seconds", secondary.port, fixture.ReplFixture.AWAIT_REPL_TIMEOUT_MINS * 60) raise errors.ServerFailure( "mongerd on port {} failed to reach state SECONDARY after {} seconds: {}".format( secondary.port, fixture.ReplFixture.AWAIT_REPL_TIMEOUT_MINS * 60, err.args[0]))
def _restart_and_clear_fixture(self): # We restart the fixture after setting 'preserve_dbpath' back to its original value in order # to clear the contents of the data directory if desired. The CleanEveryN hook cannot be # used in combination with the PeriodicKillSecondaries hook because we may attempt to call # Fixture.teardown() while the "rsSyncApplyStop" failpoint is still enabled on the # secondaries, causing them to exit with a non-zero return code. self.logger.info("Finished verifying data consistency, stopping the fixture...") try: self.fixture.teardown() except errors.ServerFailure: raise errors.ServerFailure( "{} did not exit cleanly after verifying data consistency".format(self.fixture)) self.logger.info("Starting the fixture back up again with no data...") self.fixture.setup() self.fixture.await_ready()
def _check_invariants_as_standalone(self, secondary): # pylint: disable=too-many-locals # pylint: disable=too-many-branches,too-many-statements # We remove the --replSet option in order to start the node as a standalone. replset_name = secondary.mongerd_options.pop("replSet") self.logger.info( "Restarting the secondary on port %d as a standalone node with" " its data files intact...", secondary.port) try: secondary.setup() secondary.await_ready() client = secondary.monger_client() minvalid_doc = client.local["replset.minvalid"].find_one() oplog_truncate_after_doc = client.local["replset.oplogTruncateAfterPoint"].find_one() recovery_timestamp_res = client.admin.command("replSetTest", getLastStableRecoveryTimestamp=True) latest_oplog_doc = client.local["oplog.rs"].find_one(sort=[("$natural", pymonger.DESCENDING)]) self.logger.info("Checking invariants: minValid: {}, oplogTruncateAfterPoint: {}," " stable recovery timestamp: {}, latest oplog doc: {}".format( minvalid_doc, oplog_truncate_after_doc, recovery_timestamp_res, latest_oplog_doc)) null_ts = bson.Timestamp(0, 0) # We wait for a stable recovery timestamp at setup, so we must have an oplog. latest_oplog_entry_ts = null_ts if latest_oplog_doc is None: raise errors.ServerFailure("No latest oplog entry") latest_oplog_entry_ts = latest_oplog_doc.get("ts") if latest_oplog_entry_ts is None: raise errors.ServerFailure( "Latest oplog entry had no 'ts' field: {}".format(latest_oplog_doc)) # The "oplogTruncateAfterPoint" document may not exist at startup. If so, we default # it to null. oplog_truncate_after_ts = null_ts if oplog_truncate_after_doc is not None: oplog_truncate_after_ts = oplog_truncate_after_doc.get( "oplogTruncateAfterPoint", null_ts) # The "lastStableRecoveryTimestamp" field is present if the storage engine supports # "recover to a timestamp". If it's a null timestamp on a durable storage engine, that # means we do not yet have a stable checkpoint timestamp and must be restarting at the # top of the oplog. Since we wait for a stable recovery timestamp at test fixture setup, # we should never encounter a null timestamp here. recovery_timestamp = recovery_timestamp_res.get("lastStableRecoveryTimestamp") if recovery_timestamp == null_ts: raise errors.ServerFailure( "Received null stable recovery timestamp {}".format(recovery_timestamp_res)) # On a storage engine that doesn't support "recover to a timestamp", we default to null. if recovery_timestamp is None: recovery_timestamp = null_ts # last stable recovery timestamp <= top of oplog if not recovery_timestamp <= latest_oplog_entry_ts: raise errors.ServerFailure("The condition last stable recovery timestamp <= top" " of oplog ({} <= {}) doesn't hold:" " getLastStableRecoveryTimestamp result={}," " latest oplog entry={}".format( recovery_timestamp, latest_oplog_entry_ts, recovery_timestamp_res, latest_oplog_doc)) if minvalid_doc is not None: applied_through_ts = minvalid_doc.get("begin", {}).get("ts", null_ts) minvalid_ts = minvalid_doc.get("ts", null_ts) # The "appliedThrough" value should always equal the "last stable recovery # timestamp", AKA the stable checkpoint for durable engines, on server restart. # # The written "appliedThrough" time is updated with the latest timestamp at the end # of each batch application, and batch boundaries are the only valid stable # timestamps on secondaries. Therefore, a non-null appliedThrough timestamp must # equal the checkpoint timestamp, because any stable timestamp that the checkpoint # could use includes an equal persisted appliedThrough timestamp. if (recovery_timestamp != null_ts and applied_through_ts != null_ts and (not recovery_timestamp == applied_through_ts)): raise errors.ServerFailure( "The condition last stable recovery timestamp ({}) == appliedThrough ({})" " doesn't hold: minValid document={}," " getLastStableRecoveryTimestamp result={}, last oplog entry={}".format( recovery_timestamp, applied_through_ts, minvalid_doc, recovery_timestamp_res, latest_oplog_doc)) if applied_through_ts == null_ts: # We clear "appliedThrough" to represent having applied through the top of the # oplog in PRIMARY state or immediately after "rollback via refetch". # If we are using a storage engine that supports "recover to a timestamp," # then we will have a "last stable recovery timestamp" and we should use that # as our "appliedThrough" (similarly to why we assert their equality above). # If both are null, then we are in PRIMARY state on a storage engine that does # not support "recover to a timestamp" or in RECOVERING immediately after # "rollback via refetch". Since we do not update "minValid" in PRIMARY state, # we leave "appliedThrough" as null so that the invariants below hold, rather # than substituting the latest oplog entry for the "appliedThrough" value. applied_through_ts = recovery_timestamp if minvalid_ts == null_ts: # The server treats the "ts" field in the minValid document as missing when its # value is the null timestamp. minvalid_ts = applied_through_ts if latest_oplog_entry_ts == null_ts: # If the oplog is empty, we treat the "minValid" as the latest oplog entry. latest_oplog_entry_ts = minvalid_ts if oplog_truncate_after_ts == null_ts: # The server treats the "oplogTruncateAfterPoint" field as missing when its # value is the null timestamp. When it is null, the oplog is complete and # should not be truncated, so it is effectively the top of the oplog. oplog_truncate_after_ts = latest_oplog_entry_ts # Check the ordering invariants before the secondary has reconciled the end of # its oplog. # The "oplogTruncateAfterPoint" is set to the first timestamp of each batch of # oplog entries before they are written to the oplog. Thus, it can be ahead # of the top of the oplog before any oplog entries are written, and behind it # after some are written. Thus, we cannot compare it to the top of the oplog. # appliedThrough <= minValid # appliedThrough represents the end of the previous batch, so it is always the # earliest. if applied_through_ts > minvalid_ts: raise errors.ServerFailure( "The condition appliedThrough <= minValid ({} <= {}) doesn't hold: minValid" " document={}, latest oplog entry={}".format( applied_through_ts, minvalid_ts, minvalid_doc, latest_oplog_doc)) # minValid <= oplogTruncateAfterPoint # This is true because this hook is never run after a rollback. Thus, we only # move "minValid" to the end of each batch after the batch is written to the oplog. # We reset the "oplogTruncateAfterPoint" to null before we move "minValid" from # the end of the previous batch to the end of the current batch. Thus "minValid" # must be less than or equal to the "oplogTruncateAfterPoint". if minvalid_ts > oplog_truncate_after_ts: raise errors.ServerFailure( "The condition minValid <= oplogTruncateAfterPoint ({} <= {}) doesn't" " hold: minValid document={}, oplogTruncateAfterPoint document={}," " latest oplog entry={}".format(minvalid_ts, oplog_truncate_after_ts, minvalid_doc, oplog_truncate_after_doc, latest_oplog_doc)) # minvalid <= latest oplog entry # "minValid" is set to the end of a batch after the batch is written to the oplog. # Thus it is always less than or equal to the top of the oplog. if minvalid_ts > latest_oplog_entry_ts: raise errors.ServerFailure( "The condition minValid <= top of oplog ({} <= {}) doesn't" " hold: minValid document={}, latest oplog entry={}".format( minvalid_ts, latest_oplog_entry_ts, minvalid_doc, latest_oplog_doc)) try: secondary.teardown() except errors.ServerFailure: raise errors.ServerFailure( "{} did not exit cleanly after being started up as a standalone".format( secondary)) except pymonger.errors.OperationFailure as err: self.logger.exception( "Failed to read the minValid document, the oplogTruncateAfterPoint document," " the last stable recovery timestamp, or the latest oplog entry from the" " mongerd on port %d", secondary.port) raise errors.ServerFailure( "Failed to read the minValid document, the oplogTruncateAfterPoint document," " the last stable recovery timestamp, or the latest oplog entry from the" " mongerd on port {}: {}".format(secondary.port, err.args[0])) finally: # Set the secondary's options back to their original values. secondary.mongerd_options["replSet"] = replset_name
def _step_down(self, rs_fixture): try: primary = rs_fixture.get_primary( timeout_secs=self._stepdown_interval_secs) except errors.ServerFailure: # We ignore the ServerFailure exception because it means a primary wasn't available. # We'll try again after self._stepdown_interval_secs seconds. return secondaries = rs_fixture.get_secondaries() # Check that the fixture is still running before stepping down or killing the primary. # This ensures we still detect some cases in which the fixture has already crashed. if not rs_fixture.is_running(): raise errors.ServerFailure( "ReplicaSetFixture {} expected to be running in" " ContinuousStepdown, but wasn't.".format( rs_fixture.replset_name)) if self._terminate: should_kill = self._kill and random.choice([True, False]) action = "Killing" if should_kill else "Terminating" self.logger.info("%s the primary on port %d of replica set '%s'.", action, primary.port, rs_fixture.replset_name) # We send the mongerd process the signal to exit but don't immediately wait for it to # exit because clean shutdown may take a while and we want to restore write availability # as quickly as possible. primary.mongerd.stop(kill=should_kill) elif not self._stepdown_via_heartbeats: self.logger.info( "Stepping down the primary on port %d of replica set '%s'.", primary.port, rs_fixture.replset_name) try: client = primary.monger_client() client.admin.command( bson.SON([ ("replSetStepDown", self._stepdown_duration_secs), ("force", True), ])) except pymonger.errors.AutoReconnect: # AutoReconnect exceptions are expected as connections are closed during stepdown. pass except pymonger.errors.PyMongerError: self.logger.exception( "Error while stepping down the primary on port %d of replica set '%s'.", primary.port, rs_fixture.replset_name) raise # We have skipped stepping down the primary if we want to step up secondaries instead. Here, # we simply need to pick an arbitrary secondary to run for election which will lead to # unconditional stepdown on the primary. # # If we have terminated/killed/stepped down the primary above, write availability has lost. # We pick an arbitrary secondary to run for election immediately in order to avoid a long # period where the replica set doesn't have write availability. If none of the secondaries # are eligible, or their election attempt fails, then we'll run the replSetStepUp command on # 'primary' to ensure we have write availability sooner than the # self._stepdown_duration_secs duration expires. while secondaries: chosen = random.choice(secondaries) self.logger.info( "Attempting to step up the secondary on port %d of replica set '%s'.", chosen.port, rs_fixture.replset_name) try: client = chosen.monger_client() client.admin.command("replSetStepUp") break except pymonger.errors.OperationFailure: # OperationFailure exceptions are expected when the election attempt fails due to # not receiving enough votes. This can happen when the 'chosen' secondary's opTime # is behind that of other secondaries. We handle this by attempting to elect a # different secondary. self.logger.info( "Failed to step up the secondary on port %d of replica set '%s'.", chosen.port, rs_fixture.replset_name) secondaries.remove(chosen) if self._terminate: self.logger.info( "Waiting for the old primary on port %d of replica set '%s' to exit.", primary.port, rs_fixture.replset_name) primary.mongerd.wait() self.logger.info( "Attempting to restart the old primary on port %d of replica set '%s.", primary.port, rs_fixture.replset_name) # Restart the mongerd on the old primary and wait until we can contact it again. Keep the # original preserve_dbpath to restore after restarting the mongerd. original_preserve_dbpath = primary.preserve_dbpath primary.preserve_dbpath = True try: primary.setup() primary.await_ready() finally: primary.preserve_dbpath = original_preserve_dbpath elif not self._stepdown_via_heartbeats: # If we chose to step up a secondary instead, the primary was never stepped down via the # replSetStepDown command and thus will not have a stepdown period. So we can skip # running {replSetFreeze: 0}. Otherwise, the replSetStepDown command run earlier # introduced a stepdown period on the former primary and so we have to run the # {replSetFreeze: 0} command to ensure the former primary is electable in the next # round of _step_down(). client = primary.monger_client() client.admin.command({"replSetFreeze": 0}) elif secondaries: # We successfully stepped up a secondary, wait for the former primary to step down via # heartbeats. We need to wait for the former primary to step down to complete this step # down round and to avoid races between the ContinuousStepdown hook and other test hooks # that may depend on the health of the replica set. self.logger.info( "Successfully stepped up the secondary on port %d of replica set '%s'.", chosen.port, rs_fixture.replset_name) while True: try: client = primary.monger_client() is_secondary = client.admin.command( "isMaster")["secondary"] if is_secondary: break except pymonger.errors.AutoReconnect: pass self.logger.info( "Waiting for primary on port %d of replica set '%s' to step down.", primary.port, rs_fixture.replset_name) time.sleep(0.2) # Wait a little bit before trying again. self.logger.info( "Primary on port %d of replica set '%s' stepped down.", primary.port, rs_fixture.replset_name) if not secondaries: # If we failed to step up one of the secondaries, then we run the replSetStepUp to try # and elect the former primary again. This way we don't need to wait # self._stepdown_duration_secs seconds to restore write availability to the cluster. # Since the former primary may have been killed, we need to wait until it has been # restarted by retrying replSetStepUp. retry_time_secs = rs_fixture.AWAIT_REPL_TIMEOUT_MINS * 60 retry_start_time = time.time() while True: try: client = primary.monger_client() client.admin.command("replSetStepUp") break except pymonger.errors.OperationFailure: self._wait(0.2) if time.time() - retry_start_time > retry_time_secs: raise errors.ServerFailure( "The old primary on port {} of replica set {} did not step up in" " {} seconds.".format(client.port, rs_fixture.replset_name, retry_time_secs)) # Bump the counter for the chosen secondary to indicate that the replSetStepUp command # executed successfully. key = "{}/{}".format( rs_fixture.replset_name, chosen.get_internal_connection_string() if secondaries else "none") self._step_up_stats[key] += 1
def _check_thread(self): if not self._stepdown_thread.is_alive(): msg = "The stepdown thread is not running." self.logger.error(msg) raise errors.ServerFailure(msg)