class TempCluster: def __init__(self, build_dir: str): self.build_dir = Path(build_dir).resolve() assert self.build_dir.exists(), "{} does not exist".format(build_dir) assert self.build_dir.is_dir(), "{} is not a directory".format( build_dir) tmp_dir = self.build_dir.joinpath( 'tmp', ''.join( choice(LocalCluster.valid_letters_for_secret) for i in range(16))) tmp_dir.mkdir(parents=True) self.cluster = LocalCluster( tmp_dir, self.build_dir.joinpath('bin', 'fdbserver'), self.build_dir.joinpath('bin', 'fdbmonitor'), self.build_dir.joinpath('bin', 'fdbcli')) self.log = self.cluster.log self.etc = self.cluster.etc self.data = self.cluster.data self.tmp_dir = tmp_dir def __enter__(self): self.cluster.__enter__() self.cluster.create_database() return self def __exit__(self, xc_type, exc_value, traceback): self.cluster.__exit__(xc_type, exc_value, traceback) shutil.rmtree(self.tmp_dir)
class TempCluster: def __init__(self, build_dir: str, process_number: int = 1, port: str = None): self.build_dir = Path(build_dir).resolve() assert self.build_dir.exists(), "{} does not exist".format(build_dir) assert self.build_dir.is_dir(), "{} is not a directory".format( build_dir) tmp_dir = self.build_dir.joinpath( "tmp", "".join( choice(LocalCluster.valid_letters_for_secret) for i in range(16)), ) tmp_dir.mkdir(parents=True) self.cluster = LocalCluster( tmp_dir, self.build_dir.joinpath("bin", "fdbserver"), self.build_dir.joinpath("bin", "fdbmonitor"), self.build_dir.joinpath("bin", "fdbcli"), process_number, port=port, ) self.log = self.cluster.log self.etc = self.cluster.etc self.data = self.cluster.data self.tmp_dir = tmp_dir def __enter__(self): self.cluster.__enter__() self.cluster.create_database() return self def __exit__(self, xc_type, exc_value, traceback): self.cluster.__exit__(xc_type, exc_value, traceback) shutil.rmtree(self.tmp_dir) def close(self): self.cluster.__exit__(None, None, None) shutil.rmtree(self.tmp_dir)
class UpgradeTest: def __init__(self, args): self.build_dir = Path(args.build_dir).resolve() assert self.build_dir.exists(), "{} does not exist".format( args.build_dir) assert self.build_dir.is_dir(), "{} is not a directory".format( args.build_dir) self.upgrade_path = args.upgrade_path self.used_versions = set(self.upgrade_path).difference( set(CLUSTER_ACTIONS)) for version in self.used_versions: assert version in SUPPORTED_VERSIONS, "Unsupported version or cluster action {}".format( version) self.platform = platform.machine() assert self.platform in SUPPORTED_PLATFORMS, "Unsupported platform {}".format( self.platform) self.tmp_dir = self.build_dir.joinpath("tmp", random_secret_string(16)) self.tmp_dir.mkdir(parents=True) self.download_dir = self.build_dir.joinpath("tmp", "old_binaries") self.local_binary_repo = Path(LOCAL_OLD_BINARY_REPO) if not self.local_binary_repo.exists(): self.local_binary_repo = None self.download_old_binaries() self.create_external_lib_dir() init_version = self.upgrade_path[0] self.cluster = LocalCluster(self.tmp_dir, self.binary_path(init_version, "fdbserver"), self.binary_path(init_version, "fdbmonitor"), self.binary_path(init_version, "fdbcli"), args.process_number, create_config=False, redundancy=args.redundancy) self.cluster.create_cluster_file() self.configure_version(init_version) self.log = self.cluster.log self.etc = self.cluster.etc self.data = self.cluster.data self.input_pipe_path = self.tmp_dir.joinpath("input.{}".format( random_secret_string(8))) self.output_pipe_path = self.tmp_dir.joinpath("output.{}".format( random_secret_string(8))) os.mkfifo(self.input_pipe_path) os.mkfifo(self.output_pipe_path) self.progress_event = Event() self.api_version = None self.tester_retcode = None self.tester_proc = None self.output_pipe = None self.tester_bin = None self.ctrl_pipe = None # Check if the binaries for the given version are available in the local old binaries repository def version_in_local_repo(self, version): return (self.local_binary_repo is not None) and ( self.local_binary_repo.joinpath(version).exists()) def binary_path(self, version, bin_name): if version == CURRENT_VERSION: return self.build_dir.joinpath("bin", bin_name) elif self.version_in_local_repo(version): return self.local_binary_repo.joinpath( version, "bin", "{}-{}".format(bin_name, version)) else: return self.download_dir.joinpath(version, bin_name) def lib_dir(self, version): if version == CURRENT_VERSION: return self.build_dir.joinpath("lib") else: return self.download_dir.joinpath(version) # Download an old binary of a given version from a remote repository def download_old_binary(self, version, target_bin_name, remote_bin_name, make_executable): local_file = self.download_dir.joinpath(version, target_bin_name) if local_file.exists(): return # Download to a temporary file and then replace the target file atomically # to avoid consistency errors in case of multiple tests are downloading the # same file in parallel local_file_tmp = Path("{}.{}".format(str(local_file), random_secret_string(8))) self.download_dir.joinpath(version).mkdir(parents=True, exist_ok=True) remote_file = "{}{}/{}".format(FDB_DOWNLOAD_ROOT, version, remote_bin_name) remote_sha256 = "{}.sha256".format(remote_file) local_sha256 = Path("{}.sha256".format(local_file_tmp)) for attempt_cnt in range(MAX_DOWNLOAD_ATTEMPTS + 1): if attempt_cnt == MAX_DOWNLOAD_ATTEMPTS: assert False, "Failed to download {} after {} attempts".format( local_file_tmp, MAX_DOWNLOAD_ATTEMPTS) try: print("Downloading '{}' to '{}'...".format( remote_file, local_file_tmp)) request.urlretrieve(remote_file, local_file_tmp) print("Downloading '{}' to '{}'...".format( remote_sha256, local_sha256)) request.urlretrieve(remote_sha256, local_sha256) print("Download complete") except Exception as e: print("Retrying on error:", e) continue assert local_file_tmp.exists(), "{} does not exist".format( local_file_tmp) assert local_sha256.exists(), "{} does not exist".format( local_sha256) expected_checksum = read_to_str(local_sha256) actual_checkum = compute_sha256(local_file_tmp) if expected_checksum == actual_checkum: print("Checksum OK") break print("Checksum mismatch. Expected: {} Actual: {}".format( expected_checksum, actual_checkum)) os.rename(local_file_tmp, local_file) os.remove(local_sha256) if make_executable: make_executable_path(local_file) # Copy a client library file from the local old binaries repository # The file needs to be renamed to libfdb_c.so, because it is loaded with this name by fdbcli def copy_clientlib_from_local_repo(self, version): dest_lib_file = self.download_dir.joinpath(version, "libfdb_c.so") if dest_lib_file.exists(): return # Avoid race conditions in case of parallel test execution by first copying to a temporary file # and then renaming it atomically dest_file_tmp = Path("{}.{}".format(str(dest_lib_file), random_secret_string(8))) src_lib_file = self.local_binary_repo.joinpath( version, "lib", "libfdb_c-{}.so".format(version)) assert src_lib_file.exists( ), "Missing file {} in the local old binaries repository".format( src_lib_file) self.download_dir.joinpath(version).mkdir(parents=True, exist_ok=True) shutil.copyfile(src_lib_file, dest_file_tmp) os.rename(dest_file_tmp, dest_lib_file) assert dest_lib_file.exists(), "{} does not exist".format( dest_lib_file) # Download all old binaries required for testing the specified upgrade path def download_old_binaries(self): for version in self.used_versions: if version == CURRENT_VERSION: continue if self.version_in_local_repo(version): self.copy_clientlib_from_local_repo(version) continue self.download_old_binary(version, "fdbserver", "fdbserver.{}".format(self.platform), True) self.download_old_binary(version, "fdbmonitor", "fdbmonitor.{}".format(self.platform), True) self.download_old_binary(version, "fdbcli", "fdbcli.{}".format(self.platform), True) self.download_old_binary(version, "libfdb_c.so", "libfdb_c.{}.so".format(self.platform), False) # Create a directory for external client libraries for MVC and fill it # with the libraries necessary for the specified upgrade path def create_external_lib_dir(self): self.external_lib_dir = self.tmp_dir.joinpath("client_libs") self.external_lib_dir.mkdir(parents=True) for version in self.used_versions: src_file_path = self.lib_dir(version).joinpath("libfdb_c.so") assert src_file_path.exists(), "{} does not exist".format( src_file_path) target_file_path = self.external_lib_dir.joinpath( "libfdb_c.{}.so".format(version)) shutil.copyfile(src_file_path, target_file_path) # Perform a health check of the cluster: Use fdbcli status command to check if the number of # server processes and their versions are as expected def health_check(self, timeout_sec=HEALTH_CHECK_TIMEOUT_SEC): retries = 0 while retries < timeout_sec: retries += 1 status = self.cluster.get_status() if "processes" not in status["cluster"]: print("Health check: no processes found. Retrying") time.sleep(1) continue num_proc = len(status["cluster"]["processes"]) if num_proc != self.cluster.process_number: print( "Health check: {} of {} processes found. Retrying".format( num_proc, self.cluster.process_number)) time.sleep(1) continue for (_, proc_stat) in status["cluster"]["processes"].items(): proc_ver = proc_stat["version"] assert (proc_ver == self.cluster_version ), "Process version: expected: {}, actual: {}".format( self.cluster_version, proc_ver) print("Health check: OK") return assert False, "Health check: Failed" # Create and save a cluster configuration for the given version def configure_version(self, version): self.cluster.fdbmonitor_binary = self.binary_path( version, "fdbmonitor") self.cluster.fdbserver_binary = self.binary_path(version, "fdbserver") self.cluster.fdbcli_binary = self.binary_path(version, "fdbcli") self.cluster.set_env_var = "LD_LIBRARY_PATH", self.lib_dir(version) if version_before(version, "7.1.0"): self.cluster.use_legacy_conf_syntax = True self.cluster.save_config() self.cluster_version = version # Upgrade the cluster to the given version def upgrade_to(self, version): print("Upgrading to version {}".format(version)) self.cluster.stop_cluster() self.configure_version(version) self.cluster.ensure_ports_released() self.cluster.start_cluster() print("Upgraded to {}".format(version)) def __enter__(self): print("Starting cluster version {}".format(self.cluster_version)) self.cluster.start_cluster() self.cluster.create_database(enable_tenants=False) return self def __exit__(self, xc_type, exc_value, traceback): self.cluster.stop_cluster() shutil.rmtree(self.tmp_dir) # Determine FDB API version matching the upgrade path def determine_api_version(self): self.api_version = api_version_from_str(CURRENT_VERSION) for version in self.used_versions: self.api_version = min(api_version_from_str(version), self.api_version) # Start the tester to generate the workload specified by the test file def exec_workload(self, test_file): self.tester_retcode = 1 try: self.determine_api_version() cmd_args = [ self.tester_bin, "--cluster-file", self.cluster.cluster_file, "--test-file", test_file, "--external-client-dir", self.external_lib_dir, "--disable-local-client", "--input-pipe", self.input_pipe_path, "--output-pipe", self.output_pipe_path, "--api-version", str(self.api_version), "--log", "--log-dir", self.log, "--tmp-dir", self.tmp_dir, "--transaction-retry-limit", str(TRANSACTION_RETRY_LIMIT), "--stats-interval", str(TESTER_STATS_INTERVAL_SEC * 1000) ] if RUN_WITH_GDB: cmd_args = ["gdb", "-ex", "run", "--args"] + cmd_args print("Executing test command: {}".format(" ".join( [str(c) for c in cmd_args]))) self.tester_proc = subprocess.Popen(cmd_args, stdout=sys.stdout, stderr=sys.stderr) self.tester_retcode = self.tester_proc.wait() self.tester_proc = None if self.tester_retcode != 0: print("Tester failed with return code {}".format( self.tester_retcode)) except Exception: print("Execution of test workload failed") print(traceback.format_exc()) finally: # If the tester failed to initialize, other threads of the test may stay # blocked on trying to open the named pipes if self.ctrl_pipe is None or self.output_pipe is None: print( "Tester failed before initializing named pipes. Aborting the test" ) os._exit(1) # Perform a progress check: Trigger it and wait until it is completed def progress_check(self): self.progress_event.clear() os.write(self.ctrl_pipe, b"CHECK\n") self.progress_event.wait( None if RUN_WITH_GDB else PROGRESS_CHECK_TIMEOUT_SEC) if self.progress_event.is_set(): print("Progress check: OK") else: assert False, "Progress check failed after upgrade to version {}".format( self.cluster_version) # The main function of a thread for reading and processing # the notifications received from the tester def output_pipe_reader(self): try: print("Opening pipe {} for reading".format(self.output_pipe_path)) self.output_pipe = open(self.output_pipe_path, "r") for line in self.output_pipe: msg = line.strip() print("Received {}".format(msg)) if msg == "CHECK_OK": self.progress_event.set() self.output_pipe.close() except Exception as e: print("Error while reading output pipe", e) print(traceback.format_exc()) # Execute the upgrade test workflow according to the specified # upgrade path: perform the upgrade steps and check success after each step def exec_upgrade_test(self): print("Opening pipe {} for writing".format(self.input_pipe_path)) self.ctrl_pipe = os.open(self.input_pipe_path, os.O_WRONLY) try: self.health_check() self.progress_check() random_sleep(0.0, 2.0) for entry in self.upgrade_path[1:]: if entry == "wiggle": self.cluster.cluster_wiggle() else: assert entry in self.used_versions, "Unexpected entry in the upgrade path: {}".format( entry) self.upgrade_to(entry) self.health_check() self.progress_check() os.write(self.ctrl_pipe, b"STOP\n") finally: os.close(self.ctrl_pipe) # Kill the tester process if it is still alive def kill_tester_if_alive(self, workload_thread): if not workload_thread.is_alive(): return if self.tester_proc is not None: try: print("Killing the tester process") self.tester_proc.kill() workload_thread.join(5) except Exception: print("Failed to kill the tester process") # The main method implementing the test: # - Start a thread for generating the workload using a tester binary # - Start a thread for reading notifications from the tester # - Trigger the upgrade steps and checks in the main thread def exec_test(self, args): self.tester_bin = self.build_dir.joinpath("bin", "fdb_c_api_tester") assert self.tester_bin.exists(), "{} does not exist".format( self.tester_bin) self.tester_proc = None test_retcode = 1 try: workload_thread = Thread(target=self.exec_workload, args=(args.test_file, )) workload_thread.start() reader_thread = Thread(target=self.output_pipe_reader) reader_thread.start() self.exec_upgrade_test() test_retcode = 0 except Exception: print("Upgrade test failed") print(traceback.format_exc()) self.kill_tester_if_alive(workload_thread) finally: workload_thread.join(5) reader_thread.join(5) self.kill_tester_if_alive(workload_thread) if test_retcode == 0: test_retcode = self.tester_retcode return test_retcode def grep_logs_for_events(self, severity): return (subprocess.getoutput("grep -r 'Severity=\"{}\"' {}".format( severity, self.cluster.log.as_posix())).rstrip().splitlines()) # Check the cluster log for errors def check_cluster_logs(self, error_limit=100): sev40s = (subprocess.getoutput("grep -r 'Severity=\"40\"' {}".format( self.cluster.log.as_posix())).rstrip().splitlines()) err_cnt = 0 for line in sev40s: # When running ASAN we expect to see this message. Boost coroutine should be using the # correct asan annotations so that it shouldn't produce any false positives. if line.endswith( "WARNING: ASan doesn't fully support makecontext/swapcontext functions and may produce false " "positives in some cases! "): continue if err_cnt < error_limit: print(line) err_cnt += 1 if err_cnt > 0: print( ">>>>>>>>>>>>>>>>>>>> Found {} severity 40 events - the test fails", err_cnt, ) else: print("No errors found in logs") return err_cnt == 0 # Check the server and client logs for warnings and dump them def dump_warnings_in_logs(self, limit=100): sev30s = (subprocess.getoutput("grep -r 'Severity=\"30\"' {}".format( self.cluster.log.as_posix())).rstrip().splitlines()) if len(sev30s) == 0: print("No warnings found in logs") else: print( ">>>>>>>>>>>>>>>>>>>> Found {} severity 30 events (warnings):". format(len(sev30s))) for line in sev30s[:limit]: print(line) # Dump the last cluster configuration and cluster logs def dump_cluster_logs(self): for etc_file in glob.glob(os.path.join(self.cluster.etc, "*")): print(">>>>>>>>>>>>>>>>>>>> Contents of {}:".format(etc_file)) with open(etc_file, "r") as f: print(f.read()) for log_file in glob.glob(os.path.join(self.cluster.log, "*")): print(">>>>>>>>>>>>>>>>>>>> Contents of {}:".format(log_file)) with open(log_file, "r") as f: print(f.read())
class UpgradeTest: def __init__(self, build_dir: str, upgrade_path: list, process_number: int = 1, port: str = None): self.build_dir = Path(build_dir).resolve() assert self.build_dir.exists(), "{} does not exist".format(build_dir) assert self.build_dir.is_dir(), "{} is not a directory".format( build_dir) self.upgrade_path = upgrade_path for version in upgrade_path: assert version in SUPPORTED_VERSIONS, "Unsupported version {}".format( version) self.platform = platform.machine() assert self.platform in SUPPORTED_PLATFORMS, "Unsupported platform {}".format( self.platform) self.tmp_dir = self.build_dir.joinpath("tmp", random_secret_string(16)) self.tmp_dir.mkdir(parents=True) self.download_dir = self.build_dir.joinpath("tmp", "old_binaries") self.download_old_binaries() self.create_external_lib_dir() init_version = upgrade_path[0] self.cluster = LocalCluster(self.tmp_dir, self.binary_path(init_version, "fdbserver"), self.binary_path(init_version, "fdbmonitor"), self.binary_path(init_version, "fdbcli"), process_number, port=port, create_config=False) self.cluster.create_cluster_file() self.configure_version(init_version) self.log = self.cluster.log self.etc = self.cluster.etc self.data = self.cluster.data self.input_pipe_path = self.tmp_dir.joinpath("input.{}".format( random_secret_string(8))) self.output_pipe_path = self.tmp_dir.joinpath("output.{}".format( random_secret_string(8))) os.mkfifo(self.input_pipe_path) os.mkfifo(self.output_pipe_path) self.progress_event = Event() def binary_path(self, version, bin_name): if version == CURRENT_VERSION: return self.build_dir.joinpath("bin", bin_name) else: return self.download_dir.joinpath(version, bin_name) def lib_dir(self, version): if version == CURRENT_VERSION: return self.build_dir.joinpath("lib") else: return self.download_dir.joinpath(version) # Download an old binary of a given version from a remote repository def download_old_binary(self, version, target_bin_name, remote_bin_name, makeExecutable): local_file = self.binary_path(version, target_bin_name) if (local_file.exists()): return self.download_dir.joinpath(version).mkdir(parents=True, exist_ok=True) remote_file = "{}{}/{}".format(FDB_DOWNLOAD_ROOT, version, remote_bin_name) print("Downloading '{}' to '{}'...".format(remote_file, local_file)) request.urlretrieve(remote_file, local_file) print("Download complete") assert local_file.exists(), "{} does not exist".format(local_file) if makeExecutable: make_executable(local_file) # Download all old binaries required for testing the specified upgrade path def download_old_binaries(self): for version in self.upgrade_path: if version == CURRENT_VERSION: continue self.download_old_binary(version, "fdbserver", "fdbserver.{}".format(self.platform), True) self.download_old_binary(version, "fdbmonitor", "fdbmonitor.{}".format(self.platform), True) self.download_old_binary(version, "fdbcli", "fdbcli.{}".format(self.platform), True) self.download_old_binary(version, "libfdb_c.so", "libfdb_c.{}.so".format(self.platform), False) # Create a directory for external client libraries for MVC and fill it # with the libraries necessary for the specified upgrade path def create_external_lib_dir(self): self.external_lib_dir = self.tmp_dir.joinpath("client_libs") self.external_lib_dir.mkdir(parents=True) for version in self.upgrade_path: src_file_path = self.lib_dir(version).joinpath("libfdb_c.so") assert src_file_path.exists(), "{} does not exist".format( src_file_path) target_file_path = self.external_lib_dir.joinpath( "libfdb_c.{}.so".format(version)) shutil.copyfile(src_file_path, target_file_path) # Perform a health check of the cluster: Use fdbcli status command to check if the number of # server processes and their versions are as expected def health_check(self, timeout_sec=HEALTH_CHECK_TIMEOUT_SEC): retries = 0 while retries < timeout_sec: retries += 1 status = self.cluster.get_status() if not "processes" in status["cluster"]: print("Health check: no processes found. Retrying") time.sleep(1) continue num_proc = len(status["cluster"]["processes"]) if (num_proc < self.cluster.process_number): print( "Health check: {} of {} processes found. Retrying".format( num_proc, self.cluster.process_number)) time.sleep(1) continue assert num_proc == self.cluster.process_number, "Number of processes: expected: {}, actual: {}".format( self.cluster.process_number, num_proc) for (_, proc_stat) in status["cluster"]["processes"].items(): proc_ver = proc_stat["version"] assert proc_ver == self.cluster_version, "Process version: expected: {}, actual: {}".format( self.cluster_version, proc_ver) print("Health check: OK") return assert False, "Health check: Failed" # Create and save a cluster configuration for the given version def configure_version(self, version): self.cluster.fdbmonitor_binary = self.binary_path( version, "fdbmonitor") self.cluster.fdbserver_binary = self.binary_path(version, "fdbserver") self.cluster.fdbcli_binary = self.binary_path(version, "fdbcli") self.cluster.set_env_var = "LD_LIBRARY_PATH", self.lib_dir(version) if (version_before(version, "7.1.0")): self.cluster.use_legacy_conf_syntax = True self.cluster.save_config() self.cluster_version = version # Upgrade the cluster to the given version def upgrade_to(self, version): print("Upgrading to version {}".format(version)) self.cluster.stop_cluster() self.configure_version(version) self.cluster.ensure_ports_released() self.cluster.start_cluster() print("Upgraded to {}".format(version)) def __enter__(self): print("Starting cluster version {}".format(self.cluster_version)) self.cluster.start_cluster() self.cluster.create_database(enable_tenants=False) return self def __exit__(self, xc_type, exc_value, traceback): self.cluster.stop_cluster() shutil.rmtree(self.tmp_dir) # Determine FDB API version matching the upgrade path def determine_api_version(self): self.api_version = api_version_from_str(CURRENT_VERSION) for version in self.upgrade_path: self.api_version = min(api_version_from_str(version), self.api_version) # Start the tester to generate the workload specified by the test file def exec_workload(self, test_file): self.tester_retcode = 1 try: self.determine_api_version() cmd_args = [ self.tester_bin, '--cluster-file', self.cluster.cluster_file, '--test-file', test_file, '--external-client-dir', self.external_lib_dir, '--disable-local-client', '--input-pipe', self.input_pipe_path, '--output-pipe', self.output_pipe_path, '--api-version', str(self.api_version), '--log', '--log-dir', self.log, '--transaction-retry-limit', str(TRANSACTION_RETRY_LIMIT) ] if (RUN_WITH_GDB): cmd_args = ['gdb', '-ex', 'run', '--args'] + cmd_args print("Executing test command: {}".format(" ".join( [str(c) for c in cmd_args]))) self.tester_proc = subprocess.Popen(cmd_args, stdout=sys.stdout, stderr=sys.stderr) self.tester_retcode = self.tester_proc.wait() self.tester_proc = None if (self.tester_retcode != 0): print("Tester failed with return code {}".format( self.tester_retcode)) except Exception: print("Execution of test workload failed") print(traceback.format_exc()) # Perform a progress check: Trigger it and wait until it is completed def progress_check(self, ctrl_pipe): self.progress_event.clear() os.write(ctrl_pipe, b"CHECK\n") self.progress_event.wait( None if RUN_WITH_GDB else PROGRESS_CHECK_TIMEOUT_SEC) if (self.progress_event.is_set()): print("Progress check: OK") else: assert False, "Progress check failed after upgrade to version {}".format( self.cluster_version) # The main function of a thread for reading and processing # the notifications received from the tester def output_pipe_reader(self): try: print("Opening pipe {} for reading".format(self.output_pipe_path)) self.output_pipe = open(self.output_pipe_path, 'r') for line in self.output_pipe: msg = line.strip() print("Received {}".format(msg)) if (msg == "CHECK_OK"): self.progress_event.set() self.output_pipe.close() except Exception as e: print("Error while reading output pipe", e) print(traceback.format_exc()) # Execute the upgrade test workflow according to the specified # upgrade path: perform the upgrade steps and check success after each step def exec_upgrade_test(self): print("Opening pipe {} for writing".format(self.input_pipe_path)) ctrl_pipe = os.open(self.input_pipe_path, os.O_WRONLY) try: self.health_check() self.progress_check(ctrl_pipe) for version in self.upgrade_path[1:]: random_sleep(0.0, 2.0) self.upgrade_to(version) self.health_check() self.progress_check(ctrl_pipe) os.write(ctrl_pipe, b"STOP\n") finally: os.close(ctrl_pipe) # Kill the tester process if it is still alive def kill_tester_if_alive(self, workload_thread): if not workload_thread.is_alive(): return if self.tester_proc is not None: try: print("Killing the tester process") self.tester_proc.kill() workload_thread.join(5) except: print("Failed to kill the tester process") # The main method implementing the test: # - Start a thread for generating the workload using a tester binary # - Start a thread for reading notifications from the tester # - Trigger the upgrade steps and checks in the main thread def exec_test(self, args): self.tester_bin = self.build_dir.joinpath("bin", "fdb_c_api_tester") assert self.tester_bin.exists(), "{} does not exist".format( self.tester_bin) self.tester_proc = None test_retcode = 1 try: workload_thread = Thread(target=self.exec_workload, args=(args.test_file, )) workload_thread.start() reader_thread = Thread(target=self.output_pipe_reader) reader_thread.start() self.exec_upgrade_test() test_retcode = 0 except Exception: print("Upgrade test failed") print(traceback.format_exc()) self.kill_tester_if_alive(workload_thread) finally: workload_thread.join(5) reader_thread.join(5) self.kill_tester_if_alive(workload_thread) if test_retcode == 0: test_retcode = self.tester_retcode return test_retcode # Check the cluster log for errors def check_cluster_logs(self, error_limit=100): sev40s = (subprocess.getoutput("grep -r 'Severity=\"40\"' {}".format( self.cluster.log.as_posix())).rstrip().splitlines()) err_cnt = 0 for line in sev40s: # When running ASAN we expect to see this message. Boost coroutine should be using the # correct asan annotations so that it shouldn't produce any false positives. if line.endswith( "WARNING: ASan doesn't fully support makecontext/swapcontext functions and may produce false positives in some cases!" ): continue if (err_cnt < error_limit): print(line) err_cnt += 1 if err_cnt > 0: print( ">>>>>>>>>>>>>>>>>>>> Found {} severity 40 events - the test fails", err_cnt) else: print("No error found in logs") return err_cnt == 0 # Dump the last cluster configuration and cluster logs def dump_cluster_logs(self): for etc_file in glob.glob(os.path.join(self.cluster.etc, "*")): print(">>>>>>>>>>>>>>>>>>>> Contents of {}:".format(etc_file)) with open(etc_file, "r") as f: print(f.read()) for log_file in glob.glob(os.path.join(self.cluster.log, "*")): print(">>>>>>>>>>>>>>>>>>>> Contents of {}:".format(log_file)) with open(log_file, "r") as f: print(f.read())