def test_stop_during_takeover(self) -> None: # block graceful restart with self.eden.get_thrift_client_legacy() as client: client.injectFault( FaultDefinition( keyClass="takeover", keyValueRegex="server_shutdown", block=True ) ) self.eden.wait_for_is_healthy() # Run a graceful restart # This won't succeed until we unblock the shutdown. p = Process(target=self.eden.graceful_restart) p.start() # Wait for the state to be shutting down def state_shutting_down() -> Optional[bool]: if not p.is_alive(): raise Exception( "eden restart --graceful command finished while " "graceful restart was still blocked" ) if client.getDaemonInfo().status is fb303_status.STOPPING: return True return None poll_until(state_shutting_down, timeout=60) # Normal restart should be rejected while a graceful restart # is in progress self.assert_restart_fails_with_in_progress_graceful_restart(client) # Normal shutdown should be rejected while a graceful restart # is in progress self.assert_shutdown_fails_with_in_progress_graceful_restart(client) # Getting SIGTERM should not kill process while a graceful restart is in # progress self.assert_sigkill_fails_with_in_progress_graceful_restart(client) # Unblock the server shutdown and wait for the graceful restart to complete. client.unblockFault( UnblockFaultArg(keyClass="takeover", keyValueRegex="server_shutdown") ) p.join()
def test_mount_init_state(self) -> None: self.eden.run_cmd("unmount", self.mount) self.assertEqual({self.mount: "NOT_RUNNING"}, self.eden.list_cmd_simple()) with self.eden.get_thrift_client_legacy() as client: fault = FaultDefinition(keyClass="mount", keyValueRegex=".*", block=True) client.injectFault(fault) # Run the "eden mount" CLI command. # This won't succeed until we unblock the mount. mount_cmd, edenfsctl_env = self.eden.get_edenfsctl_cmd_env( "mount", self.mount) mount_proc = subprocess.Popen(mount_cmd, env=edenfsctl_env) # Wait for the new mount to be reported by edenfs def mount_started() -> Optional[bool]: if self.eden.get_mount_state(Path(self.mount), client) is not None: return True if mount_proc.poll() is not None: raise Exception( f"eden mount command finished (with status " f"{mount_proc.returncode}) while mounting was " f"still blocked") return None poll_until(mount_started, timeout=30) self.assertEqual({self.mount: "INITIALIZING"}, self.eden.list_cmd_simple()) # Most thrift calls to access the mount should be disallowed while it is # still initializing. self._assert_thrift_calls_fail_during_mount_init(client) # Unblock mounting and wait for the mount to transition to running client.unblockFault( UnblockFaultArg(keyClass="mount", keyValueRegex=".*")) self._wait_for_mount_running(client) self.assertEqual({self.mount: "RUNNING"}, self.eden.list_cmd_simple()) mount_proc.wait()
def test_takeover_with_tree_inode_loading_from_local_store(self) -> None: """ Restart edenfs while a tree inode is being loaded asynchronously. Ensure restarting does not deadlock. """ def load_test_directory_inode_from_local_store_asynchronously( ) -> None: """ Make edenfs start loading "/test-directory" from the local store. To ensure that the local store is in use during takeover, load the tree inode using a prefetch. At the time of writing, os.listdir("foo") causes edenfs to prefetch the tree inodes of foo/*. Exploit this to load the tree inode for "/directory". Other options considered: * At the time of writing, if we load the tree inode using a FUSE request (e.g. os.stat), edenfs would wait for the FUSE request to finish before starting the inode shutdown procedure. * At the time of writing, 'edenfsctl prefetch' does not prefetch tree inodes asynchronously. """ os.listdir(self.mount) graceful_restart_startup_time = 5.0 with self.eden.get_thrift_client() as client: for key_class in [ "local store get single", "local store get batch" ]: client.injectFault( FaultDefinition( keyClass=key_class, keyValueRegex=".*", delayMilliseconds=int(graceful_restart_startup_time * 1000), count=100, )) load_test_directory_inode_from_local_store_asynchronously() self.eden.graceful_restart()
def test_mount_init_state(self) -> None: self.eden.run_cmd("unmount", self.mount) self.assertEqual({self.mount: "NOT_RUNNING"}, self.eden.list_cmd_simple()) with self.eden.get_thrift_client() as client: fault = FaultDefinition(keyClass="mount", keyValueRegex=".*", block=True) client.injectFault(fault) # Run the "eden mount" CLI command. # This won't succeed until we unblock the mount. mount_cmd = self.eden.get_eden_cli_args("mount", self.mount) mount_proc = subprocess.Popen(mount_cmd) # Wait for the new mount to be reported by edenfs def mount_started() -> Optional[bool]: if self.eden.get_mount_state(Path(self.mount), client) is not None: return True if mount_proc.poll() is not None: raise Exception( f"eden mount command finished (with status " f"{mount_proc.returncode}) while mounting was " f"still blocked" ) return None poll_until(mount_started, timeout=30) self.assertEqual({self.mount: "INITIALIZING"}, self.eden.list_cmd_simple()) # Unblock mounting and wait for the mount to transition to running client.unblockFault(UnblockFaultArg(keyClass="mount", keyValueRegex=".*")) def mount_running() -> Optional[bool]: if ( self.eden.get_mount_state(Path(self.mount), client) == MountState.RUNNING ): return True return None poll_until(mount_running, timeout=30) self.assertEqual({self.mount: "RUNNING"}, self.eden.list_cmd_simple()) self.assertEqual(0, mount_proc.wait())
def test_mount_state_during_unmount_with_in_progress_checkout( self) -> None: mounts = self.eden.run_cmd("list") self.assertEqual(f"{self.mount}\n", mounts) self.backing_repo.write_file("foo/bar.txt", "new contents") new_commit = self.backing_repo.commit("Update foo/bar.txt") with self.eden.get_thrift_client() as client: client.injectFault( FaultDefinition(keyClass="inodeCheckout", keyValueRegex=".*", block=True)) # Run a checkout p1 = Process(target=self.repo.update, args=(new_commit, )) p1.start() hg_parent = self.hg("log", "-r.", "-T{node}") # Ensure the checkout has started def checkout_in_progress() -> Optional[bool]: try: client.getScmStatusV2( GetScmStatusParams( mountPoint=bytes(self.mount, encoding="utf-8"), commit=bytes(hg_parent, encoding="utf-8"), listIgnored=False, )) except EdenError as ex: if ex.errorType == EdenErrorType.CHECKOUT_IN_PROGRESS: return True else: raise ex return None util.poll_until(checkout_in_progress, timeout=30) p2 = Process(target=self.eden.unmount, args=(self.mount, )) p2.start() # Wait for the state to be shutting down def state_shutting_down() -> Optional[bool]: mounts = self.eden.run_cmd("list") print(mounts) if mounts.find("SHUTTING_DOWN") != -1: return True if mounts.find("(not mounted)") != -1: self.fail( "mount should not list status as not mounted while " "checkout is in progress") return None util.poll_until(state_shutting_down, timeout=30) # Unblock the server shutdown and wait for the checkout to complete. client.unblockFault( UnblockFaultArg(keyClass="inodeCheckout", keyValueRegex=".*")) # join the checkout before the unmount because the unmount call # won't finish until the checkout has finished p1.join() p2.join()