def test_start_with_mount_failures(self) -> None: # Clone a few other checkouts mount2 = os.path.join(self.mounts_dir, "extra_mount_1") self.eden.clone(self.repo_name, mount2) mount3 = os.path.join(self.mounts_dir, "extra_mount_2") self.eden.clone(self.repo_name, mount3) self.assertEqual( {self.mount: "RUNNING", mount2: "RUNNING", mount3: "RUNNING"}, self.eden.list_cmd_simple(), ) # Now restart EdenFS with mounting blocked self.eden.shutdown() self.eden.spawn_nowait( extra_args=["--enable_fault_injection", "--fault_injection_block_mounts"] ) # Wait for eden to have started mount point initialization self._wait_until_initializing(num_mounts=3) with self.eden.get_thrift_client() as client: # Since we blocked mount initialization the mount should still # report as INITIALIZING, and edenfs should report itself STARTING self.assertEqual( { self.mount: "INITIALIZING", mount2: "INITIALIZING", mount3: "INITIALIZING", }, self.eden.list_cmd_simple(), ) self.assertEqual(fb303_status.STARTING, client.getStatus()) # Fail mounting of the additional 2 mounts we created client.unblockFault( UnblockFaultArg( keyClass="mount", keyValueRegex=".*/extra_mount.*", errorType="runtime_error", errorMessage="PC LOAD LETTER", ) ) # Unblock mounting of the first mount client.unblockFault( UnblockFaultArg(keyClass="mount", keyValueRegex=re.escape(self.mount)) ) # Wait until EdenFS reports itself as alive self._wait_until_alive(client) self.assertEqual( {self.mount: "RUNNING", mount2: "NOT_RUNNING", mount3: "NOT_RUNNING"}, self.eden.list_cmd_simple(), ) # The startup_mount_failures counter should indicate that 2 mounts failed to # remount. with self.eden.get_thrift_client() as client: mount_failures = client.getCounter("startup_mount_failures") self.assertEqual(2, mount_failures)
def test_start_blocked_mount_init(self) -> None: self.eden.shutdown() self.eden.spawn_nowait(extra_args=[ "--enable_fault_injection", "--fault_injection_block_mounts" ]) # Wait for eden to report the mount point in the listMounts() output self._wait_until_initializing() with self.eden.get_thrift_client_legacy() as client: # Since we blocked mount initialization the mount should still # report as INITIALIZING, and edenfs should report itself STARTING self.assertEqual({self.mount: "INITIALIZING"}, self.eden.list_cmd_simple()) self.assertEqual(fb303_status.STARTING, client.getDaemonInfo().status) # Unblock mounting and wait for the mount to transition to running client.unblockFault( UnblockFaultArg(keyClass="mount", keyValueRegex=".*")) self._wait_for_mount_running(client) self._wait_until_alive(client) self.assertEqual(fb303_status.ALIVE, client.getDaemonInfo().status) self.assertEqual({self.mount: "RUNNING"}, self.eden.list_cmd_simple())
def test_start_blocked_mount_init(self) -> None: self.eden.shutdown() self.eden.spawn_nowait( extra_args=["--enable_fault_injection", "--fault_injection_block_mounts"] ) # Wait for eden to report the mount point in the listMounts() output def is_initializing() -> Optional[bool]: try: with self.eden.get_thrift_client() as client: if self.eden.get_mount_state(Path(self.mount), client) is not None: return True assert self.eden._process is not None if self.eden._process.poll(): self.fail("eden exited before becoming healthy") return None except (EdenNotRunningError, TException): return None poll_until(is_initializing, timeout=60) with self.eden.get_thrift_client() as client: # Since we blocked mount initialization the mount should still # report as INITIALIZING, and edenfs should report itself STARTING self.assertEqual({self.mount: "INITIALIZING"}, self.eden.list_cmd_simple()) self.assertEqual(fb_status.STARTING, client.getStatus()) # Unblock mounting and wait for the mount to transition to running client.unblockFault(UnblockFaultArg(keyClass="mount", keyValueRegex=".*")) self._wait_for_mount_running(client) self.assertEqual(fb_status.ALIVE, client.getStatus()) self.assertEqual({self.mount: "RUNNING"}, self.eden.list_cmd_simple())
def test_stop_during_takeover(self) -> None: # block graceful restart with self.eden.get_thrift_client_legacy() as client: client.injectFault( FaultDefinition( keyClass="takeover", keyValueRegex="server_shutdown", block=True ) ) self.eden.wait_for_is_healthy() # Run a graceful restart # This won't succeed until we unblock the shutdown. p = Process(target=self.eden.graceful_restart) p.start() # Wait for the state to be shutting down def state_shutting_down() -> Optional[bool]: if not p.is_alive(): raise Exception( "eden restart --graceful command finished while " "graceful restart was still blocked" ) if client.getDaemonInfo().status is fb303_status.STOPPING: return True return None poll_until(state_shutting_down, timeout=60) # Normal restart should be rejected while a graceful restart # is in progress self.assert_restart_fails_with_in_progress_graceful_restart(client) # Normal shutdown should be rejected while a graceful restart # is in progress self.assert_shutdown_fails_with_in_progress_graceful_restart(client) # Getting SIGTERM should not kill process while a graceful restart is in # progress self.assert_sigkill_fails_with_in_progress_graceful_restart(client) # Unblock the server shutdown and wait for the graceful restart to complete. client.unblockFault( UnblockFaultArg(keyClass="takeover", keyValueRegex="server_shutdown") ) p.join()
def test_mount_init_state(self) -> None: self.eden.run_cmd("unmount", self.mount) self.assertEqual({self.mount: "NOT_RUNNING"}, self.eden.list_cmd_simple()) with self.eden.get_thrift_client_legacy() as client: fault = FaultDefinition(keyClass="mount", keyValueRegex=".*", block=True) client.injectFault(fault) # Run the "eden mount" CLI command. # This won't succeed until we unblock the mount. mount_cmd, edenfsctl_env = self.eden.get_edenfsctl_cmd_env( "mount", self.mount) mount_proc = subprocess.Popen(mount_cmd, env=edenfsctl_env) # Wait for the new mount to be reported by edenfs def mount_started() -> Optional[bool]: if self.eden.get_mount_state(Path(self.mount), client) is not None: return True if mount_proc.poll() is not None: raise Exception( f"eden mount command finished (with status " f"{mount_proc.returncode}) while mounting was " f"still blocked") return None poll_until(mount_started, timeout=30) self.assertEqual({self.mount: "INITIALIZING"}, self.eden.list_cmd_simple()) # Most thrift calls to access the mount should be disallowed while it is # still initializing. self._assert_thrift_calls_fail_during_mount_init(client) # Unblock mounting and wait for the mount to transition to running client.unblockFault( UnblockFaultArg(keyClass="mount", keyValueRegex=".*")) self._wait_for_mount_running(client) self.assertEqual({self.mount: "RUNNING"}, self.eden.list_cmd_simple()) mount_proc.wait()
def test_start_no_mount_wait(self) -> None: self.eden.shutdown() self.eden.start(extra_args=[ "--noWaitForMounts", "--enable_fault_injection", "--fault_injection_block_mounts", ]) self.assertEqual({self.mount: "INITIALIZING"}, self.eden.list_cmd_simple()) # Unblock mounting and wait for the mount to transition to running with self.eden.get_thrift_client() as client: self.assertEqual(fb303_status.ALIVE, client.getStatus()) client.unblockFault( UnblockFaultArg(keyClass="mount", keyValueRegex=".*")) self._wait_for_mount_running(client) self.assertEqual({self.mount: "RUNNING"}, self.eden.list_cmd_simple())
def test_mount_init_state(self) -> None: self.eden.run_cmd("unmount", self.mount) self.assertEqual({self.mount: "NOT_RUNNING"}, self.eden.list_cmd_simple()) with self.eden.get_thrift_client() as client: fault = FaultDefinition(keyClass="mount", keyValueRegex=".*", block=True) client.injectFault(fault) # Run the "eden mount" CLI command. # This won't succeed until we unblock the mount. mount_cmd = self.eden.get_eden_cli_args("mount", self.mount) mount_proc = subprocess.Popen(mount_cmd) # Wait for the new mount to be reported by edenfs def mount_started() -> Optional[bool]: if self.eden.get_mount_state(Path(self.mount), client) is not None: return True if mount_proc.poll() is not None: raise Exception( f"eden mount command finished (with status " f"{mount_proc.returncode}) while mounting was " f"still blocked" ) return None poll_until(mount_started, timeout=30) self.assertEqual({self.mount: "INITIALIZING"}, self.eden.list_cmd_simple()) # Unblock mounting and wait for the mount to transition to running client.unblockFault(UnblockFaultArg(keyClass="mount", keyValueRegex=".*")) def mount_running() -> Optional[bool]: if ( self.eden.get_mount_state(Path(self.mount), client) == MountState.RUNNING ): return True return None poll_until(mount_running, timeout=30) self.assertEqual({self.mount: "RUNNING"}, self.eden.list_cmd_simple()) self.assertEqual(0, mount_proc.wait())
def test_mount_state_during_unmount_with_in_progress_checkout( self) -> None: mounts = self.eden.run_cmd("list") self.assertEqual(f"{self.mount}\n", mounts) self.backing_repo.write_file("foo/bar.txt", "new contents") new_commit = self.backing_repo.commit("Update foo/bar.txt") with self.eden.get_thrift_client() as client: client.injectFault( FaultDefinition(keyClass="inodeCheckout", keyValueRegex=".*", block=True)) # Run a checkout p1 = Process(target=self.repo.update, args=(new_commit, )) p1.start() hg_parent = self.hg("log", "-r.", "-T{node}") # Ensure the checkout has started def checkout_in_progress() -> Optional[bool]: try: client.getScmStatusV2( GetScmStatusParams( mountPoint=bytes(self.mount, encoding="utf-8"), commit=bytes(hg_parent, encoding="utf-8"), listIgnored=False, )) except EdenError as ex: if ex.errorType == EdenErrorType.CHECKOUT_IN_PROGRESS: return True else: raise ex return None util.poll_until(checkout_in_progress, timeout=30) p2 = Process(target=self.eden.unmount, args=(self.mount, )) p2.start() # Wait for the state to be shutting down def state_shutting_down() -> Optional[bool]: mounts = self.eden.run_cmd("list") print(mounts) if mounts.find("SHUTTING_DOWN") != -1: return True if mounts.find("(not mounted)") != -1: self.fail( "mount should not list status as not mounted while " "checkout is in progress") return None util.poll_until(state_shutting_down, timeout=30) # Unblock the server shutdown and wait for the checkout to complete. client.unblockFault( UnblockFaultArg(keyClass="inodeCheckout", keyValueRegex=".*")) # join the checkout before the unmount because the unmount call # won't finish until the checkout has finished p1.join() p2.join()