def set_needs_restart(self) -> None: """Indicate that the cluster instance needs restart.""" with locking.FileLockIfXdist(self.cluster_lock): self._log( f"c{self.cluster_instance_num}: called `set_needs_restart`") helpers.touch(self.instance_dir / f"{RESTART_NEEDED_GLOB}_{self.worker_id}")
def _marked_select_instance(self, cget_status: ClusterGetStatus) -> bool: """Select this cluster instance for running marked tests if possible.""" marked_running_my_here = list( cget_status.instance_dir.glob( f"{TEST_CURR_MARK_GLOB}_{cget_status.mark}_*")) marked_running_my_anywhere = list( self.cm.pytest_tmp_dir.glob( f"{CLUSTER_DIR_TEMPLATE}*/{TEST_CURR_MARK_GLOB}_{cget_status.mark}_*" )) if not marked_running_my_here and marked_running_my_anywhere: self.cm._log( f"c{cget_status.instance_num}: tests marked with my mark '{cget_status.mark}' " "already running on other cluster instance, cannot run") return False marked_starting_my_here = list( cget_status.instance_dir.glob( f"{TEST_MARK_STARTING_GLOB}_{cget_status.mark}_*")) marked_starting_my_anywhere = list( self.cm.pytest_tmp_dir.glob( f"{CLUSTER_DIR_TEMPLATE}*/{TEST_MARK_STARTING_GLOB}_{cget_status.mark}_*" )) if not marked_starting_my_here and marked_starting_my_anywhere: self.cm._log( f"c{cget_status.instance_num}: tests marked with my mark '{cget_status.mark}' " "starting on other cluster instance, cannot run") return False if marked_running_my_here or marked_starting_my_here: cget_status.selected_instance = cget_status.instance_num self.cm._log( f"c{cget_status.instance_num}: locking to this cluster instance, " f"it has my mark '{cget_status.mark}'") elif cget_status.marked_running_sfiles or cget_status.marked_starting_sfiles: self.cm._log( f"c{cget_status.instance_num}: tests marked with other mark starting " f"or running, I have different mark '{cget_status.mark}'") return False else: # No marked tests are running yet. Indicate that it is planned to start marked tests as # soon as possible (when all currently running tests are finished or the cluster is # restarted). cget_status.selected_instance = cget_status.instance_num mark_starting_file = ( cget_status.instance_dir / f"{TEST_MARK_STARTING_GLOB}_{cget_status.mark}_{self.cm.worker_id}" ) if not mark_starting_file.exists(): self.cm._log( f"c{cget_status.instance_num}: initialized mark '{cget_status.mark}'" ) helpers.touch(mark_starting_file) return True
def testenv_setup_teardown( tmp_path_factory: TempdirFactory, worker_id: str, request: FixtureRequest) -> Generator[None, None, None]: pytest_root_tmp = temptools.get_pytest_root_tmp(tmp_path_factory) with locking.FileLockIfXdist( f"{pytest_root_tmp}/{cluster_management.CLUSTER_LOCK}"): # save environment info for Allure if not list(pytest_root_tmp.glob(".started_session_*")): _save_env_for_allure(request.config) helpers.touch(pytest_root_tmp / f".started_session_{worker_id}") yield with locking.FileLockIfXdist( f"{pytest_root_tmp}/{cluster_management.CLUSTER_LOCK}"): # save CLI coverage to dir specified by `--cli-coverage-dir` cluster_manager_obj = cluster_management.ClusterManager( tmp_path_factory=tmp_path_factory, worker_id=worker_id, pytest_config=request.config) cluster_manager_obj.save_worker_cli_coverage() # perform cleanup if this is the last running pytest worker (pytest_root_tmp / f".started_session_{worker_id}").unlink() if not list(pytest_root_tmp.glob(".started_session_*")): # perform testnet cleanup _testnet_cleanup(pytest_root_tmp=pytest_root_tmp) if configuration.DEV_CLUSTER_RUNNING: # save cluster artifacts artifacts_base_dir = request.config.getoption( "--artifacts-base-dir") if artifacts_base_dir: state_dir = cluster_nodes.get_cluster_env().state_dir artifacts.save_cluster_artifacts(save_dir=pytest_root_tmp, state_dir=state_dir) else: # stop all cluster instances, save artifacts _stop_all_cluster_instances( tmp_path_factory=tmp_path_factory, worker_id=worker_id, pytest_config=request.config, ) # copy collected artifacts to dir specified by `--artifacts-base-dir` artifacts.copy_artifacts(pytest_tmp_dir=pytest_root_tmp, pytest_config=request.config)
def stop_all_clusters(self) -> None: """Stop all cluster instances.""" self._log("called `stop_all_clusters`") # don't stop cluster if it was started outside of test framework if configuration.DEV_CLUSTER_RUNNING: LOGGER.warning( "Ignoring request to stop clusters as 'DEV_CLUSTER_RUNNING' is set." ) return work_dir = cluster_nodes.get_cluster_env().work_dir for instance_num in range(self.num_of_instances): instance_dir = self.pytest_tmp_dir / f"{CLUSTER_DIR_TEMPLATE}{instance_num}" if (not (instance_dir / CLUSTER_RUNNING_FILE).exists() or (instance_dir / CLUSTER_STOPPED_FILE).exists()): self._log(f"c{instance_num}: cluster instance not running") continue state_dir = work_dir / f"{cluster_nodes.STATE_CLUSTER}{instance_num}" stop_script = state_dir / cluster_scripts.STOP_SCRIPT if not stop_script.exists(): self._log(f"c{instance_num}: stop script doesn't exist!") continue self._log( f"c{instance_num}: stopping cluster instance with `{stop_script}`" ) try: helpers.run_command(str(stop_script)) except Exception as err: self._log(f"c{instance_num}: failed to stop cluster:\n{err}") artifacts.save_start_script_coverage( log_file=state_dir / CLUSTER_START_CMDS_LOG, pytest_config=self.pytest_config, ) artifacts.save_cluster_artifacts(save_dir=self.pytest_tmp_dir, state_dir=state_dir) shutil.rmtree(state_dir, ignore_errors=True) helpers.touch(instance_dir / CLUSTER_STOPPED_FILE) self._log(f"c{instance_num}: stopped cluster instance")
def _init_restart(self, cget_status: ClusterGetStatus) -> bool: """Initialize restart on this cluster instance.""" # restart already initialized if cget_status.restart_here: return True # restart is needed when custom start command was specified and the test is marked test or # singleton initial_marked_test = bool(cget_status.mark and not cget_status.marked_running_sfiles) singleton_test = Resources.CLUSTER in cget_status.lock_resources new_cmd_restart = bool(cget_status.start_cmd and (initial_marked_test or singleton_test)) will_restart = new_cmd_restart or self._is_restart_needed( cget_status.instance_num) if not will_restart: return True # if tests are running on the instance, we cannot restart, therefore we cannot continue if cget_status.started_tests_sfiles: self.cm._log( f"c{cget_status.instance_num}: tests are running, cannot restart" ) return False self.cm._log( f"c{cget_status.instance_num}: setting 'restart in progress'") # Cluster restart will be performed by this worker. # By setting `restart_here`, we make sure this worker continue on this cluster instance # after restart is finished. It is important because the `start_cmd` used for starting the # cluster instance might be specific to the test. cget_status.restart_here = True cget_status.selected_instance = cget_status.instance_num restart_in_progress_file = ( cget_status.instance_dir / f"{RESTART_IN_PROGRESS_GLOB}_{self.cm.worker_id}") if not restart_in_progress_file.exists(): helpers.touch(restart_in_progress_file) return True
def _on_marked_test_stop(self, instance_num: int) -> None: """Perform actions after marked tests are finished.""" self.cm._log(f"c{instance_num}: in `_on_marked_test_stop`") instance_dir = self.cm.pytest_tmp_dir / f"{CLUSTER_DIR_TEMPLATE}{instance_num}" # set cluster instance to be restarted if needed restart_after_mark_files = list( instance_dir.glob(f"{RESTART_AFTER_MARK_GLOB}_*")) if restart_after_mark_files: for f in restart_after_mark_files: f.unlink() self.cm._log( f"c{instance_num}: in `_on_marked_test_stop`, creating 'restart needed' file" ) helpers.touch(instance_dir / f"{RESTART_NEEDED_GLOB}_{self.cm.worker_id}") # remove file that indicates that tests with the mark are running marked_running_sfiles = list( instance_dir.glob(f"{TEST_CURR_MARK_GLOB}_*")) if marked_running_sfiles: marked_running_sfiles[0].unlink()
def _create_test_status_files(self, cget_status: ClusterGetStatus) -> None: """Create status files for test that is about to start on this cluster instance.""" # this test is a first marked test if cget_status.mark and not cget_status.marked_running_sfiles: self.cm._log( f"c{cget_status.instance_num}: starting '{cget_status.mark}' tests" ) helpers.touch( self.cm.instance_dir / f"{TEST_CURR_MARK_GLOB}_{cget_status.mark}_{self.cm.worker_id}" ) for sf in cget_status.marked_starting_sfiles: sf.unlink() # create status file for each in-use resource for r in cget_status.use_resources: helpers.touch(self.cm.instance_dir / f"{RESOURCE_IN_USE_GLOB}_{r}_{self.cm.worker_id}") # create status file for each locked resource for r in cget_status.lock_resources: helpers.touch(self.cm.instance_dir / f"{RESOURCE_LOCKED_GLOB}_{r}_{self.cm.worker_id}") # cleanup = cluster restart after test (group of tests) is finished if cget_status.cleanup: # cleanup after group of test that are marked with a marker if cget_status.mark: self.cm._log(f"c{cget_status.instance_num}: cleanup and mark") helpers.touch(self.cm.instance_dir / f"{RESTART_AFTER_MARK_GLOB}_{self.cm.worker_id}") # cleanup after single test (e.g. singleton) else: self.cm._log( f"c{cget_status.instance_num}: cleanup and not mark") helpers.touch(self.cm.instance_dir / f"{RESTART_NEEDED_GLOB}_{self.cm.worker_id}") self.cm._log( f"c{self.cm.cluster_instance_num}: creating 'test running' status file" ) test_running_file = self.cm.instance_dir / f"{TEST_RUNNING_GLOB}_{self.cm.worker_id}" helpers.touch(test_running_file)
def _restart(self, start_cmd: str = "", stop_cmd: str = "") -> bool: # noqa: C901 """Restart cluster. Not called under global lock! """ # pylint: disable=too-many-branches cluster_running_file = self.cm.instance_dir / CLUSTER_RUNNING_FILE # don't restart cluster if it was started outside of test framework if configuration.DEV_CLUSTER_RUNNING: self.cm._log( f"c{self.cm.cluster_instance_num}: ignoring restart, dev cluster is running" ) if cluster_running_file.exists(): LOGGER.warning( "Ignoring requested cluster restart as 'DEV_CLUSTER_RUNNING' is set." ) else: helpers.touch(cluster_running_file) return True # fail if cluster restart is forbidden and it was already started if configuration.FORBID_RESTART and cluster_running_file.exists(): raise RuntimeError( "Cannot restart cluster when 'FORBID_RESTART' is set.") self.cm._log( f"c{self.cm.cluster_instance_num}: called `_restart`, start_cmd='{start_cmd}', " f"stop_cmd='{stop_cmd}'") startup_files = cluster_nodes.get_cluster_type( ).cluster_scripts.prepare_scripts_files( destdir=self.cm._create_startup_files_dir( self.cm.cluster_instance_num), instance_num=self.cm.cluster_instance_num, start_script=start_cmd, stop_script=stop_cmd, ) state_dir = cluster_nodes.get_cluster_env().state_dir self.cm._log( f"c{self.cm.cluster_instance_num}: in `_restart`, new files " f"start_cmd='{startup_files.start_script}', " f"stop_cmd='{startup_files.stop_script}'") excp: Optional[Exception] = None for i in range(2): if i > 0: self.cm._log( f"c{self.cm.cluster_instance_num}: failed to start cluster:\n{excp}\nretrying" ) time.sleep(0.2) try: LOGGER.info( f"Stopping cluster with `{startup_files.stop_script}`.") helpers.run_command(str(startup_files.stop_script)) except Exception as err: self.cm._log( f"c{self.cm.cluster_instance_num}: failed to stop cluster:\n{err}" ) # save artifacts only when produced during this test run if cluster_running_file.exists(): artifacts.save_start_script_coverage( log_file=state_dir / CLUSTER_START_CMDS_LOG, pytest_config=self.cm.pytest_config, ) artifacts.save_cluster_artifacts( save_dir=self.cm.pytest_tmp_dir, state_dir=state_dir) shutil.rmtree(state_dir, ignore_errors=True) with contextlib.suppress(Exception): _kill_supervisor(self.cm.cluster_instance_num) try: cluster_obj = cluster_nodes.start_cluster( cmd=str(startup_files.start_script), args=startup_files.start_script_args) except Exception as err: LOGGER.error(f"Failed to start cluster: {err}") excp = err else: break else: self.cm._log( f"c{self.cm.cluster_instance_num}: failed to start cluster:\n{excp}\ncluster dead" ) if not configuration.IS_XDIST: pytest.exit(msg=f"Failed to start cluster, exception: {excp}", returncode=1) helpers.touch(self.cm.instance_dir / CLUSTER_DEAD_FILE) return False # Create temp dir for faucet addresses data. # Pytest's mktemp adds number to the end of the dir name, so keep the trailing '_' # as separator. Resulting dir name is e.g. 'addrs_data_ci3_0'. tmp_path = Path( self.cm.tmp_path_factory.mktemp( f"addrs_data_ci{self.cm.cluster_instance_num}_")) # setup faucet addresses cluster_nodes.setup_test_addrs(cluster_obj=cluster_obj, destination_dir=tmp_path) # create file that indicates that the cluster is running if not cluster_running_file.exists(): helpers.touch(cluster_running_file) return True