Example #1
0
    def stop_all_clusters(self) -> None:
        """Stop all cluster instances."""
        self._log("called `stop_all_clusters`")
        for instance_num in range(self.num_of_instances):
            instance_dir = self.lock_dir / f"{CLUSTER_DIR_TEMPLATE}{instance_num}"
            if (not (instance_dir / CLUSTER_RUNNING_FILE).exists()
                    or (instance_dir / CLUSTER_STOPPED_FILE).exists()):
                self._log(f"cluster instance {instance_num} not running")
                continue

            startup_files = cluster_nodes.get_cluster_type(
            ).cluster_scripts.prepare_scripts_files(
                destdir=self._create_startup_files_dir(instance_num),
                instance_num=instance_num,
            )
            cluster_nodes.set_cluster_env(instance_num)
            self._log(
                f"stopping cluster instance {instance_num} with `{startup_files.stop_script}`"
            )

            state_dir = cluster_nodes.get_cluster_env().state_dir

            try:
                cluster_nodes.stop_cluster(cmd=str(startup_files.stop_script))
            except Exception as exc:
                LOGGER.error(f"While stopping cluster: {exc}")

            cli_coverage.save_start_script_coverage(
                log_file=state_dir / CLUSTER_START_CMDS_LOG,
                pytest_config=self.pytest_config,
            )
            cluster_nodes.save_cluster_artifacts(
                artifacts_dir=self.pytest_tmp_dir, clean=True)
            open(instance_dir / CLUSTER_STOPPED_FILE, "a").close()
            self._log(f"stopped cluster instance {instance_num}")
Example #2
0
    def _restart(self,
                 start_cmd: str = "",
                 stop_cmd: str = "") -> bool:  # noqa: C901
        """Restart cluster.

        Not called under global lock!
        """
        # pylint: disable=too-many-branches
        cluster_running_file = self.cm.instance_dir / CLUSTER_RUNNING_FILE

        # don't restart cluster if it was started outside of test framework
        if DEV_CLUSTER_RUNNING:
            if cluster_running_file.exists():
                LOGGER.warning(
                    "Ignoring requested cluster restart as 'DEV_CLUSTER_RUNNING' is set."
                )
            else:
                open(cluster_running_file, "a").close()
            return True

        # fail if cluster restart is forbidden and it was already started
        if FORBID_RESTART and cluster_running_file.exists():
            raise RuntimeError(
                "Cannot restart cluster when 'FORBID_RESTART' is set.")

        self.cm._log(
            f"c{self.cm.cluster_instance_num}: called `_restart`, start_cmd='{start_cmd}', "
            f"stop_cmd='{stop_cmd}'")

        startup_files = cluster_nodes.get_cluster_type(
        ).cluster_scripts.prepare_scripts_files(
            destdir=self.cm._create_startup_files_dir(
                self.cm.cluster_instance_num),
            instance_num=self.cm.cluster_instance_num,
            start_script=start_cmd,
            stop_script=stop_cmd,
        )

        state_dir = cluster_nodes.get_cluster_env().state_dir

        self.cm._log(
            f"c{self.cm.cluster_instance_num}: in `_restart`, new files "
            f"start_cmd='{startup_files.start_script}', "
            f"stop_cmd='{startup_files.stop_script}'")

        excp: Optional[Exception] = None
        for i in range(2):
            if i > 0:
                self.cm._log(
                    f"c{self.cm.cluster_instance_num}: failed to start cluster:\n{excp}\nretrying"
                )
                time.sleep(0.2)

            try:
                cluster_nodes.stop_cluster(cmd=str(startup_files.stop_script))
            except Exception as err:
                self.cm._log(
                    f"c{self.cm.cluster_instance_num}: failed to stop cluster:\n{err}"
                )

            # save artifacts only when produced during this test run
            if cluster_running_file.exists():
                cli_coverage.save_start_script_coverage(
                    log_file=state_dir / CLUSTER_START_CMDS_LOG,
                    pytest_config=self.cm.pytest_config,
                )
                self._restart_save_cluster_artifacts(clean=True)

            try:
                _kill_supervisor(self.cm.cluster_instance_num)
            except Exception:
                pass

            try:
                cluster_obj = cluster_nodes.start_cluster(
                    cmd=str(startup_files.start_script),
                    args=startup_files.start_script_args)
            except Exception as err:
                LOGGER.error(f"Failed to start cluster: {err}")
                excp = err
            else:
                break
        else:
            self.cm._log(
                f"c{self.cm.cluster_instance_num}: failed to start cluster:\n{excp}\ncluster dead"
            )
            if not helpers.IS_XDIST:
                pytest.exit(msg=f"Failed to start cluster, exception: {excp}",
                            returncode=1)
            open(self.cm.instance_dir / CLUSTER_DEAD_FILE, "a").close()
            return False

        # setup faucet addresses
        tmp_path = Path(self.cm.tmp_path_factory.mktemp("addrs_data"))
        cluster_nodes.setup_test_addrs(cluster_obj, tmp_path)

        # create file that indicates that the cluster is running
        if not cluster_running_file.exists():
            open(cluster_running_file, "a").close()

        return True